[red-knot] Add notebook support (#12338)

This commit is contained in:
Micha Reiser 2024-07-17 10:26:33 +02:00 committed by GitHub
parent fe04f2b09d
commit 0c72577b5d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 246 additions and 53 deletions

1
Cargo.lock generated
View File

@ -2088,6 +2088,7 @@ dependencies = [
"filetime", "filetime",
"ignore", "ignore",
"insta", "insta",
"ruff_notebook",
"ruff_python_ast", "ruff_python_ast",
"ruff_python_parser", "ruff_python_parser",
"ruff_source_file", "ruff_source_file",

View File

@ -10,7 +10,6 @@ use ruff_db::system::SystemPath;
use rustc_hash::FxHashMap; use rustc_hash::FxHashMap;
use ruff_db::files::{system_path_to_file, File}; use ruff_db::files::{system_path_to_file, File};
use ruff_db::source::source_text;
use crate::db::Db; use crate::db::Db;
use crate::module_name::ModuleName; use crate::module_name::ModuleName;
@ -74,7 +73,10 @@ pub(crate) fn parse_typeshed_versions(
db: &dyn Db, db: &dyn Db,
versions_file: File, versions_file: File,
) -> Result<TypeshedVersions, TypeshedVersionsParseError> { ) -> Result<TypeshedVersions, TypeshedVersionsParseError> {
let file_content = source_text(db.upcast(), versions_file); // TODO: Handle IO errors
let file_content = versions_file
.read_to_string(db.upcast())
.unwrap_or_default();
file_content.parse() file_content.parse()
} }

View File

@ -11,6 +11,7 @@ repository = { workspace = true }
license = { workspace = true } license = { workspace = true }
[dependencies] [dependencies]
ruff_notebook = { workspace = true }
ruff_python_ast = { workspace = true } ruff_python_ast = { workspace = true }
ruff_python_parser = { workspace = true } ruff_python_parser = { workspace = true }
ruff_source_file = { workspace = true } ruff_source_file = { workspace = true }

View File

@ -187,10 +187,10 @@ impl File {
/// an empty string, which is the closest to the content that the file contains now. Returning /// an empty string, which is the closest to the content that the file contains now. Returning
/// an empty string shouldn't be a problem because the query will be re-executed as soon as the /// an empty string shouldn't be a problem because the query will be re-executed as soon as the
/// changes are applied to the database. /// changes are applied to the database.
pub(crate) fn read_to_string(&self, db: &dyn Db) -> String { pub fn read_to_string(&self, db: &dyn Db) -> crate::system::Result<String> {
let path = self.path(db); let path = self.path(db);
let result = match path { match path {
FilePath::System(system) => { FilePath::System(system) => {
// Add a dependency on the revision to ensure the operation gets re-executed when the file changes. // Add a dependency on the revision to ensure the operation gets re-executed when the file changes.
let _ = self.revision(db); let _ = self.revision(db);
@ -198,9 +198,7 @@ impl File {
db.system().read_to_string(system) db.system().read_to_string(system)
} }
FilePath::Vendored(vendored) => db.vendored().read_to_string(vendored), FilePath::Vendored(vendored) => db.vendored().read_to_string(vendored),
}; }
result.unwrap_or_default()
} }
/// Refreshes the file metadata by querying the file system if needed. /// Refreshes the file metadata by querying the file system if needed.
@ -274,7 +272,7 @@ mod tests {
assert_eq!(test.permissions(&db), Some(0o755)); assert_eq!(test.permissions(&db), Some(0o755));
assert_ne!(test.revision(&db), FileRevision::zero()); assert_ne!(test.revision(&db), FileRevision::zero());
assert_eq!(&test.read_to_string(&db), "print('Hello world')"); assert_eq!(&test.read_to_string(&db)?, "print('Hello world')");
Ok(()) Ok(())
} }
@ -304,7 +302,7 @@ mod tests {
} }
#[test] #[test]
fn stubbed_vendored_file() { fn stubbed_vendored_file() -> crate::system::Result<()> {
let mut db = TestDb::new(); let mut db = TestDb::new();
let mut vendored_builder = VendoredFileSystemBuilder::new(); let mut vendored_builder = VendoredFileSystemBuilder::new();
@ -318,7 +316,9 @@ mod tests {
assert_eq!(test.permissions(&db), Some(0o444)); assert_eq!(test.permissions(&db), Some(0o444));
assert_ne!(test.revision(&db), FileRevision::zero()); assert_ne!(test.revision(&db), FileRevision::zero());
assert_eq!(&test.read_to_string(&db), "def foo() -> str"); assert_eq!(&test.read_to_string(&db)?, "def foo() -> str");
Ok(())
} }
#[test] #[test]

View File

@ -1,47 +1,83 @@
use countme::Count;
use ruff_source_file::LineIndex;
use salsa::DebugWithDb;
use std::ops::Deref; use std::ops::Deref;
use std::sync::Arc; use std::sync::Arc;
use countme::Count;
use salsa::DebugWithDb;
use ruff_notebook::Notebook;
use ruff_python_ast::PySourceType;
use ruff_source_file::LineIndex;
use crate::files::File; use crate::files::File;
use crate::Db; use crate::Db;
/// Reads the content of file. /// Reads the source text of a python text file (must be valid UTF8) or notebook.
#[salsa::tracked] #[salsa::tracked]
pub fn source_text(db: &dyn Db, file: File) -> SourceText { pub fn source_text(db: &dyn Db, file: File) -> SourceText {
let _span = tracing::trace_span!("source_text", ?file).entered(); let _span = tracing::trace_span!("source_text", ?file).entered();
let content = file.read_to_string(db); if let Some(path) = file.path(db).as_system_path() {
if path.extension().is_some_and(|extension| {
PySourceType::try_from_extension(extension) == Some(PySourceType::Ipynb)
}) {
// TODO(micha): Proper error handling and emit a diagnostic. Tackle it together with `source_text`.
let notebook = db.system().read_to_notebook(path).unwrap_or_else(|error| {
tracing::error!("Failed to load notebook: {error}");
Notebook::empty()
});
return SourceText {
inner: Arc::new(SourceTextInner {
kind: SourceTextKind::Notebook(notebook),
count: Count::new(),
}),
};
}
};
let content = file.read_to_string(db).unwrap_or_else(|error| {
tracing::error!("Failed to load file: {error}");
String::default()
});
SourceText { SourceText {
inner: Arc::from(content), inner: Arc::new(SourceTextInner {
count: Count::new(), kind: SourceTextKind::Text(content),
count: Count::new(),
}),
} }
} }
/// Computes the [`LineIndex`] for `file`. /// The source text of a file containing python code.
#[salsa::tracked] ///
pub fn line_index(db: &dyn Db, file: File) -> LineIndex { /// The file containing the source text can either be a text file or a notebook.
let _span = tracing::trace_span!("line_index", file = ?file.debug(db)).entered();
let source = source_text(db, file);
LineIndex::from_source_text(&source)
}
/// The source text of a [`File`].
/// ///
/// Cheap cloneable in `O(1)`. /// Cheap cloneable in `O(1)`.
#[derive(Clone, Eq, PartialEq)] #[derive(Clone, Eq, PartialEq)]
pub struct SourceText { pub struct SourceText {
inner: Arc<str>, inner: Arc<SourceTextInner>,
count: Count<Self>,
} }
impl SourceText { impl SourceText {
/// Returns the python code as a `str`.
pub fn as_str(&self) -> &str { pub fn as_str(&self) -> &str {
&self.inner match &self.inner.kind {
SourceTextKind::Text(source) => source,
SourceTextKind::Notebook(notebook) => notebook.source_code(),
}
}
/// Returns the underlying notebook if this is a notebook file.
pub fn as_notebook(&self) -> Option<&Notebook> {
match &self.inner.kind {
SourceTextKind::Notebook(notebook) => Some(notebook),
SourceTextKind::Text(_) => None,
}
}
/// Returns `true` if this is a notebook source file.
pub fn is_notebook(&self) -> bool {
matches!(&self.inner.kind, SourceTextKind::Notebook(_))
} }
} }
@ -55,20 +91,54 @@ impl Deref for SourceText {
impl std::fmt::Debug for SourceText { impl std::fmt::Debug for SourceText {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_tuple("SourceText").field(&self.inner).finish() let mut dbg = f.debug_tuple("SourceText");
match &self.inner.kind {
SourceTextKind::Text(text) => {
dbg.field(text);
}
SourceTextKind::Notebook(notebook) => {
dbg.field(notebook);
}
}
dbg.finish()
} }
} }
#[derive(Eq, PartialEq)]
struct SourceTextInner {
count: Count<SourceText>,
kind: SourceTextKind,
}
#[derive(Eq, PartialEq)]
enum SourceTextKind {
Text(String),
Notebook(Notebook),
}
/// Computes the [`LineIndex`] for `file`.
#[salsa::tracked]
pub fn line_index(db: &dyn Db, file: File) -> LineIndex {
let _span = tracing::trace_span!("line_index", file = ?file.debug(db)).entered();
let source = source_text(db, file);
LineIndex::from_source_text(&source)
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use salsa::EventKind; use salsa::EventKind;
use ruff_source_file::OneIndexed;
use ruff_text_size::TextSize;
use crate::files::system_path_to_file; use crate::files::system_path_to_file;
use crate::source::{line_index, source_text}; use crate::source::{line_index, source_text};
use crate::system::{DbWithTestSystem, SystemPath}; use crate::system::{DbWithTestSystem, SystemPath};
use crate::tests::TestDb; use crate::tests::TestDb;
use ruff_source_file::OneIndexed;
use ruff_text_size::TextSize;
#[test] #[test]
fn re_runs_query_when_file_revision_changes() -> crate::system::Result<()> { fn re_runs_query_when_file_revision_changes() -> crate::system::Result<()> {
@ -79,11 +149,11 @@ mod tests {
let file = system_path_to_file(&db, path).unwrap(); let file = system_path_to_file(&db, path).unwrap();
assert_eq!(&*source_text(&db, file), "x = 10"); assert_eq!(source_text(&db, file).as_str(), "x = 10");
db.write_file(path, "x = 20".to_string()).unwrap(); db.write_file(path, "x = 20".to_string()).unwrap();
assert_eq!(&*source_text(&db, file), "x = 20"); assert_eq!(source_text(&db, file).as_str(), "x = 20");
Ok(()) Ok(())
} }
@ -97,13 +167,13 @@ mod tests {
let file = system_path_to_file(&db, path).unwrap(); let file = system_path_to_file(&db, path).unwrap();
assert_eq!(&*source_text(&db, file), "x = 10"); assert_eq!(source_text(&db, file).as_str(), "x = 10");
// Change the file permission only // Change the file permission only
file.set_permissions(&mut db).to(Some(0o777)); file.set_permissions(&mut db).to(Some(0o777));
db.clear_salsa_events(); db.clear_salsa_events();
assert_eq!(&*source_text(&db, file), "x = 10"); assert_eq!(source_text(&db, file).as_str(), "x = 10");
let events = db.take_salsa_events(); let events = db.take_salsa_events();
@ -123,14 +193,54 @@ mod tests {
let file = system_path_to_file(&db, path).unwrap(); let file = system_path_to_file(&db, path).unwrap();
let index = line_index(&db, file); let index = line_index(&db, file);
let text = source_text(&db, file); let source = source_text(&db, file);
assert_eq!(index.line_count(), 2); assert_eq!(index.line_count(), 2);
assert_eq!( assert_eq!(
index.line_start(OneIndexed::from_zero_indexed(0), &text), index.line_start(OneIndexed::from_zero_indexed(0), source.as_str()),
TextSize::new(0) TextSize::new(0)
); );
Ok(()) Ok(())
} }
#[test]
fn notebook() -> crate::system::Result<()> {
let mut db = TestDb::new();
let path = SystemPath::new("test.ipynb");
db.write_file(
path,
r#"
{
"cells": [{"cell_type": "code", "source": ["x = 10"], "metadata": {}, "outputs": []}],
"metadata": {
"kernelspec": {
"display_name": "Python (ruff)",
"language": "python",
"name": "ruff"
},
"language_info": {
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}"#,
)?;
let file = system_path_to_file(&db, path).unwrap();
let source = source_text(&db, file);
assert!(source.is_notebook());
assert_eq!(source.as_str(), "x = 10\n");
assert!(source.as_notebook().is_some());
Ok(())
}
} }

View File

@ -3,6 +3,7 @@ use std::fmt::Debug;
pub use memory_fs::MemoryFileSystem; pub use memory_fs::MemoryFileSystem;
#[cfg(feature = "os")] #[cfg(feature = "os")]
pub use os::OsSystem; pub use os::OsSystem;
use ruff_notebook::{Notebook, NotebookError};
pub use test::{DbWithTestSystem, TestSystem}; pub use test::{DbWithTestSystem, TestSystem};
use walk_directory::WalkDirectoryBuilder; use walk_directory::WalkDirectoryBuilder;
@ -40,6 +41,13 @@ pub trait System: Debug {
/// Reads the content of the file at `path` into a [`String`]. /// Reads the content of the file at `path` into a [`String`].
fn read_to_string(&self, path: &SystemPath) -> Result<String>; fn read_to_string(&self, path: &SystemPath) -> Result<String>;
/// Reads the content of the file at `path` as a Notebook.
///
/// This method optimizes for the case where the system holds a structured representation of a [`Notebook`],
/// allowing to skip the notebook deserialization. Systems that don't use a structured
/// representation fall-back to deserializing the notebook from a string.
fn read_to_notebook(&self, path: &SystemPath) -> std::result::Result<Notebook, NotebookError>;
/// Returns `true` if `path` exists. /// Returns `true` if `path` exists.
fn path_exists(&self, path: &SystemPath) -> bool { fn path_exists(&self, path: &SystemPath) -> bool {
self.path_metadata(path).is_ok() self.path_metadata(path).is_ok()

View File

@ -126,6 +126,14 @@ impl MemoryFileSystem {
read_to_string(self, path.as_ref()) read_to_string(self, path.as_ref())
} }
pub(crate) fn read_to_notebook(
&self,
path: impl AsRef<SystemPath>,
) -> std::result::Result<ruff_notebook::Notebook, ruff_notebook::NotebookError> {
let content = self.read_to_string(path)?;
ruff_notebook::Notebook::from_source_code(&content)
}
pub fn exists(&self, path: &SystemPath) -> bool { pub fn exists(&self, path: &SystemPath) -> bool {
let by_path = self.inner.by_path.read().unwrap(); let by_path = self.inner.by_path.read().unwrap();
let normalized = self.normalize_path(path); let normalized = self.normalize_path(path);

View File

@ -1,9 +1,13 @@
use std::sync::Arc;
use std::{any::Any, path::PathBuf};
use filetime::FileTime;
use ruff_notebook::{Notebook, NotebookError};
use crate::system::{ use crate::system::{
DirectoryEntry, FileType, Metadata, Result, System, SystemPath, SystemPathBuf, DirectoryEntry, FileType, Metadata, Result, System, SystemPath, SystemPathBuf,
}; };
use filetime::FileTime;
use std::sync::Arc;
use std::{any::Any, path::PathBuf};
use super::walk_directory::{ use super::walk_directory::{
self, DirectoryWalker, WalkDirectoryBuilder, WalkDirectoryConfiguration, self, DirectoryWalker, WalkDirectoryBuilder, WalkDirectoryConfiguration,
@ -65,6 +69,10 @@ impl System for OsSystem {
std::fs::read_to_string(path.as_std_path()) std::fs::read_to_string(path.as_std_path())
} }
fn read_to_notebook(&self, path: &SystemPath) -> std::result::Result<Notebook, NotebookError> {
Notebook::from_path(path.as_std_path())
}
fn path_exists(&self, path: &SystemPath) -> bool { fn path_exists(&self, path: &SystemPath) -> bool {
path.as_std_path().exists() path.as_std_path().exists()
} }
@ -266,10 +274,12 @@ impl From<WalkState> for ignore::WalkState {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use tempfile::TempDir;
use crate::system::walk_directory::tests::DirectoryEntryToString; use crate::system::walk_directory::tests::DirectoryEntryToString;
use crate::system::DirectoryEntry; use crate::system::DirectoryEntry;
use tempfile::TempDir;
use super::*;
#[test] #[test]
fn read_directory() { fn read_directory() {

View File

@ -1,3 +1,5 @@
use ruff_notebook::{Notebook, NotebookError};
use crate::files::File; use crate::files::File;
use crate::system::{DirectoryEntry, MemoryFileSystem, Metadata, Result, System, SystemPath}; use crate::system::{DirectoryEntry, MemoryFileSystem, Metadata, Result, System, SystemPath};
use crate::Db; use crate::Db;
@ -50,14 +52,21 @@ impl System for TestSystem {
fn path_metadata(&self, path: &SystemPath) -> crate::system::Result<Metadata> { fn path_metadata(&self, path: &SystemPath) -> crate::system::Result<Metadata> {
match &self.inner { match &self.inner {
TestSystemInner::Stub(fs) => fs.metadata(path), TestSystemInner::Stub(fs) => fs.metadata(path),
TestSystemInner::System(fs) => fs.path_metadata(path), TestSystemInner::System(system) => system.path_metadata(path),
} }
} }
fn read_to_string(&self, path: &SystemPath) -> crate::system::Result<String> { fn read_to_string(&self, path: &SystemPath) -> crate::system::Result<String> {
match &self.inner { match &self.inner {
TestSystemInner::Stub(fs) => fs.read_to_string(path), TestSystemInner::Stub(fs) => fs.read_to_string(path),
TestSystemInner::System(fs) => fs.read_to_string(path), TestSystemInner::System(system) => system.read_to_string(path),
}
}
fn read_to_notebook(&self, path: &SystemPath) -> std::result::Result<Notebook, NotebookError> {
match &self.inner {
TestSystemInner::Stub(fs) => fs.read_to_notebook(path),
TestSystemInner::System(system) => system.read_to_notebook(path),
} }
} }

View File

@ -19,7 +19,7 @@ use ruff_text_size::TextSize;
use crate::cell::CellOffsets; use crate::cell::CellOffsets;
use crate::index::NotebookIndex; use crate::index::NotebookIndex;
use crate::schema::{Cell, RawNotebook, SortAlphabetically, SourceValue}; use crate::schema::{Cell, RawNotebook, SortAlphabetically, SourceValue};
use crate::RawNotebookMetadata; use crate::{schema, RawNotebookMetadata};
/// Run round-trip source code generation on a given Jupyter notebook file path. /// Run round-trip source code generation on a given Jupyter notebook file path.
pub fn round_trip(path: &Path) -> anyhow::Result<String> { pub fn round_trip(path: &Path) -> anyhow::Result<String> {
@ -52,7 +52,7 @@ pub enum NotebookError {
InvalidFormat(i64), InvalidFormat(i64),
} }
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug)]
pub struct Notebook { pub struct Notebook {
/// Python source code of the notebook. /// Python source code of the notebook.
/// ///
@ -205,6 +205,28 @@ impl Notebook {
}) })
} }
/// Creates an empty notebook.
///
///
pub fn empty() -> Self {
Self::from_raw_notebook(
RawNotebook {
cells: vec![schema::Cell::Code(schema::CodeCell {
execution_count: None,
id: None,
metadata: serde_json::Value::default(),
outputs: vec![],
source: schema::SourceValue::String(String::default()),
})],
metadata: RawNotebookMetadata::default(),
nbformat: 4,
nbformat_minor: 5,
},
false,
)
.unwrap()
}
/// Update the cell offsets as per the given [`SourceMap`]. /// Update the cell offsets as per the given [`SourceMap`].
fn update_cell_offsets(&mut self, source_map: &SourceMap) { fn update_cell_offsets(&mut self, source_map: &SourceMap) {
// When there are multiple cells without any edits, the offsets of those // When there are multiple cells without any edits, the offsets of those
@ -412,6 +434,14 @@ impl Notebook {
} }
} }
impl PartialEq for Notebook {
fn eq(&self, other: &Self) -> bool {
self.trailing_newline == other.trailing_newline && self.raw == other.raw
}
}
impl Eq for Notebook {}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::path::Path; use std::path::Path;
@ -458,6 +488,13 @@ mod tests {
)); ));
} }
#[test]
fn empty_notebook() {
let notebook = Notebook::empty();
assert_eq!(notebook.source_code(), "\n");
}
#[test_case("markdown", false)] #[test_case("markdown", false)]
#[test_case("only_magic", true)] #[test_case("only_magic", true)]
#[test_case("code_and_magic", true)] #[test_case("code_and_magic", true)]

View File

@ -161,7 +161,7 @@ pub struct CodeCell {
/// Notebook root-level metadata. /// Notebook root-level metadata.
#[skip_serializing_none] #[skip_serializing_none]
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Default)]
pub struct RawNotebookMetadata { pub struct RawNotebookMetadata {
/// The author(s) of the notebook document /// The author(s) of the notebook document
pub authors: Option<Value>, pub authors: Option<Value>,

View File

@ -86,12 +86,19 @@ impl PySourceType {
/// ///
/// Falls back to `Python` if the extension is not recognized. /// Falls back to `Python` if the extension is not recognized.
pub fn from_extension(extension: &str) -> Self { pub fn from_extension(extension: &str) -> Self {
match extension { Self::try_from_extension(extension).unwrap_or_default()
}
/// Infers the source type from the file extension.
pub fn try_from_extension(extension: &str) -> Option<Self> {
let ty = match extension {
"py" => Self::Python, "py" => Self::Python,
"pyi" => Self::Stub, "pyi" => Self::Stub,
"ipynb" => Self::Ipynb, "ipynb" => Self::Ipynb,
_ => Self::Python, _ => return None,
} };
Some(ty)
} }
} }