diff --git a/Cargo.lock b/Cargo.lock index 7acfc376d7..bb814066ec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2088,6 +2088,7 @@ dependencies = [ "filetime", "ignore", "insta", + "ruff_notebook", "ruff_python_ast", "ruff_python_parser", "ruff_source_file", diff --git a/crates/red_knot_module_resolver/src/typeshed/versions.rs b/crates/red_knot_module_resolver/src/typeshed/versions.rs index 3b5debd38f..9f0765b9fe 100644 --- a/crates/red_knot_module_resolver/src/typeshed/versions.rs +++ b/crates/red_knot_module_resolver/src/typeshed/versions.rs @@ -10,7 +10,6 @@ use ruff_db::system::SystemPath; use rustc_hash::FxHashMap; use ruff_db::files::{system_path_to_file, File}; -use ruff_db::source::source_text; use crate::db::Db; use crate::module_name::ModuleName; @@ -74,7 +73,10 @@ pub(crate) fn parse_typeshed_versions( db: &dyn Db, versions_file: File, ) -> Result { - let file_content = source_text(db.upcast(), versions_file); + // TODO: Handle IO errors + let file_content = versions_file + .read_to_string(db.upcast()) + .unwrap_or_default(); file_content.parse() } diff --git a/crates/ruff_db/Cargo.toml b/crates/ruff_db/Cargo.toml index bbaf27ace2..fcae2b2ab7 100644 --- a/crates/ruff_db/Cargo.toml +++ b/crates/ruff_db/Cargo.toml @@ -11,6 +11,7 @@ repository = { workspace = true } license = { workspace = true } [dependencies] +ruff_notebook = { workspace = true } ruff_python_ast = { workspace = true } ruff_python_parser = { workspace = true } ruff_source_file = { workspace = true } diff --git a/crates/ruff_db/src/files.rs b/crates/ruff_db/src/files.rs index 318909354e..f64c1c57bb 100644 --- a/crates/ruff_db/src/files.rs +++ b/crates/ruff_db/src/files.rs @@ -187,10 +187,10 @@ impl File { /// an empty string, which is the closest to the content that the file contains now. Returning /// an empty string shouldn't be a problem because the query will be re-executed as soon as the /// changes are applied to the database. - pub(crate) fn read_to_string(&self, db: &dyn Db) -> String { + pub fn read_to_string(&self, db: &dyn Db) -> crate::system::Result { let path = self.path(db); - let result = match path { + match path { FilePath::System(system) => { // Add a dependency on the revision to ensure the operation gets re-executed when the file changes. let _ = self.revision(db); @@ -198,9 +198,7 @@ impl File { db.system().read_to_string(system) } FilePath::Vendored(vendored) => db.vendored().read_to_string(vendored), - }; - - result.unwrap_or_default() + } } /// Refreshes the file metadata by querying the file system if needed. @@ -274,7 +272,7 @@ mod tests { assert_eq!(test.permissions(&db), Some(0o755)); assert_ne!(test.revision(&db), FileRevision::zero()); - assert_eq!(&test.read_to_string(&db), "print('Hello world')"); + assert_eq!(&test.read_to_string(&db)?, "print('Hello world')"); Ok(()) } @@ -304,7 +302,7 @@ mod tests { } #[test] - fn stubbed_vendored_file() { + fn stubbed_vendored_file() -> crate::system::Result<()> { let mut db = TestDb::new(); let mut vendored_builder = VendoredFileSystemBuilder::new(); @@ -318,7 +316,9 @@ mod tests { assert_eq!(test.permissions(&db), Some(0o444)); assert_ne!(test.revision(&db), FileRevision::zero()); - assert_eq!(&test.read_to_string(&db), "def foo() -> str"); + assert_eq!(&test.read_to_string(&db)?, "def foo() -> str"); + + Ok(()) } #[test] diff --git a/crates/ruff_db/src/source.rs b/crates/ruff_db/src/source.rs index 1ce69ff04e..f87cc6805c 100644 --- a/crates/ruff_db/src/source.rs +++ b/crates/ruff_db/src/source.rs @@ -1,47 +1,83 @@ -use countme::Count; -use ruff_source_file::LineIndex; -use salsa::DebugWithDb; use std::ops::Deref; use std::sync::Arc; +use countme::Count; +use salsa::DebugWithDb; + +use ruff_notebook::Notebook; +use ruff_python_ast::PySourceType; +use ruff_source_file::LineIndex; + use crate::files::File; use crate::Db; -/// Reads the content of file. +/// Reads the source text of a python text file (must be valid UTF8) or notebook. #[salsa::tracked] pub fn source_text(db: &dyn Db, file: File) -> SourceText { let _span = tracing::trace_span!("source_text", ?file).entered(); - let content = file.read_to_string(db); + if let Some(path) = file.path(db).as_system_path() { + if path.extension().is_some_and(|extension| { + PySourceType::try_from_extension(extension) == Some(PySourceType::Ipynb) + }) { + // TODO(micha): Proper error handling and emit a diagnostic. Tackle it together with `source_text`. + let notebook = db.system().read_to_notebook(path).unwrap_or_else(|error| { + tracing::error!("Failed to load notebook: {error}"); + Notebook::empty() + }); + + return SourceText { + inner: Arc::new(SourceTextInner { + kind: SourceTextKind::Notebook(notebook), + count: Count::new(), + }), + }; + } + }; + + let content = file.read_to_string(db).unwrap_or_else(|error| { + tracing::error!("Failed to load file: {error}"); + String::default() + }); SourceText { - inner: Arc::from(content), - count: Count::new(), + inner: Arc::new(SourceTextInner { + kind: SourceTextKind::Text(content), + count: Count::new(), + }), } } -/// Computes the [`LineIndex`] for `file`. -#[salsa::tracked] -pub fn line_index(db: &dyn Db, file: File) -> LineIndex { - let _span = tracing::trace_span!("line_index", file = ?file.debug(db)).entered(); - - let source = source_text(db, file); - - LineIndex::from_source_text(&source) -} - -/// The source text of a [`File`]. +/// The source text of a file containing python code. +/// +/// The file containing the source text can either be a text file or a notebook. /// /// Cheap cloneable in `O(1)`. #[derive(Clone, Eq, PartialEq)] pub struct SourceText { - inner: Arc, - count: Count, + inner: Arc, } impl SourceText { + /// Returns the python code as a `str`. pub fn as_str(&self) -> &str { - &self.inner + match &self.inner.kind { + SourceTextKind::Text(source) => source, + SourceTextKind::Notebook(notebook) => notebook.source_code(), + } + } + + /// Returns the underlying notebook if this is a notebook file. + pub fn as_notebook(&self) -> Option<&Notebook> { + match &self.inner.kind { + SourceTextKind::Notebook(notebook) => Some(notebook), + SourceTextKind::Text(_) => None, + } + } + + /// Returns `true` if this is a notebook source file. + pub fn is_notebook(&self) -> bool { + matches!(&self.inner.kind, SourceTextKind::Notebook(_)) } } @@ -55,20 +91,54 @@ impl Deref for SourceText { impl std::fmt::Debug for SourceText { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_tuple("SourceText").field(&self.inner).finish() + let mut dbg = f.debug_tuple("SourceText"); + + match &self.inner.kind { + SourceTextKind::Text(text) => { + dbg.field(text); + } + SourceTextKind::Notebook(notebook) => { + dbg.field(notebook); + } + } + + dbg.finish() } } +#[derive(Eq, PartialEq)] +struct SourceTextInner { + count: Count, + kind: SourceTextKind, +} + +#[derive(Eq, PartialEq)] +enum SourceTextKind { + Text(String), + Notebook(Notebook), +} + +/// Computes the [`LineIndex`] for `file`. +#[salsa::tracked] +pub fn line_index(db: &dyn Db, file: File) -> LineIndex { + let _span = tracing::trace_span!("line_index", file = ?file.debug(db)).entered(); + + let source = source_text(db, file); + + LineIndex::from_source_text(&source) +} + #[cfg(test)] mod tests { use salsa::EventKind; + use ruff_source_file::OneIndexed; + use ruff_text_size::TextSize; + use crate::files::system_path_to_file; use crate::source::{line_index, source_text}; use crate::system::{DbWithTestSystem, SystemPath}; use crate::tests::TestDb; - use ruff_source_file::OneIndexed; - use ruff_text_size::TextSize; #[test] fn re_runs_query_when_file_revision_changes() -> crate::system::Result<()> { @@ -79,11 +149,11 @@ mod tests { let file = system_path_to_file(&db, path).unwrap(); - assert_eq!(&*source_text(&db, file), "x = 10"); + assert_eq!(source_text(&db, file).as_str(), "x = 10"); db.write_file(path, "x = 20".to_string()).unwrap(); - assert_eq!(&*source_text(&db, file), "x = 20"); + assert_eq!(source_text(&db, file).as_str(), "x = 20"); Ok(()) } @@ -97,13 +167,13 @@ mod tests { let file = system_path_to_file(&db, path).unwrap(); - assert_eq!(&*source_text(&db, file), "x = 10"); + assert_eq!(source_text(&db, file).as_str(), "x = 10"); // Change the file permission only file.set_permissions(&mut db).to(Some(0o777)); db.clear_salsa_events(); - assert_eq!(&*source_text(&db, file), "x = 10"); + assert_eq!(source_text(&db, file).as_str(), "x = 10"); let events = db.take_salsa_events(); @@ -123,14 +193,54 @@ mod tests { let file = system_path_to_file(&db, path).unwrap(); let index = line_index(&db, file); - let text = source_text(&db, file); + let source = source_text(&db, file); assert_eq!(index.line_count(), 2); assert_eq!( - index.line_start(OneIndexed::from_zero_indexed(0), &text), + index.line_start(OneIndexed::from_zero_indexed(0), source.as_str()), TextSize::new(0) ); Ok(()) } + + #[test] + fn notebook() -> crate::system::Result<()> { + let mut db = TestDb::new(); + + let path = SystemPath::new("test.ipynb"); + db.write_file( + path, + r#" +{ + "cells": [{"cell_type": "code", "source": ["x = 10"], "metadata": {}, "outputs": []}], + "metadata": { + "kernelspec": { + "display_name": "Python (ruff)", + "language": "python", + "name": "ruff" + }, + "language_info": { + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}"#, + )?; + + let file = system_path_to_file(&db, path).unwrap(); + let source = source_text(&db, file); + + assert!(source.is_notebook()); + assert_eq!(source.as_str(), "x = 10\n"); + assert!(source.as_notebook().is_some()); + + Ok(()) + } } diff --git a/crates/ruff_db/src/system.rs b/crates/ruff_db/src/system.rs index 09c3a87768..b346ffb346 100644 --- a/crates/ruff_db/src/system.rs +++ b/crates/ruff_db/src/system.rs @@ -3,6 +3,7 @@ use std::fmt::Debug; pub use memory_fs::MemoryFileSystem; #[cfg(feature = "os")] pub use os::OsSystem; +use ruff_notebook::{Notebook, NotebookError}; pub use test::{DbWithTestSystem, TestSystem}; use walk_directory::WalkDirectoryBuilder; @@ -40,6 +41,13 @@ pub trait System: Debug { /// Reads the content of the file at `path` into a [`String`]. fn read_to_string(&self, path: &SystemPath) -> Result; + /// Reads the content of the file at `path` as a Notebook. + /// + /// This method optimizes for the case where the system holds a structured representation of a [`Notebook`], + /// allowing to skip the notebook deserialization. Systems that don't use a structured + /// representation fall-back to deserializing the notebook from a string. + fn read_to_notebook(&self, path: &SystemPath) -> std::result::Result; + /// Returns `true` if `path` exists. fn path_exists(&self, path: &SystemPath) -> bool { self.path_metadata(path).is_ok() diff --git a/crates/ruff_db/src/system/memory_fs.rs b/crates/ruff_db/src/system/memory_fs.rs index 3c48690e8f..21fc8bad9e 100644 --- a/crates/ruff_db/src/system/memory_fs.rs +++ b/crates/ruff_db/src/system/memory_fs.rs @@ -126,6 +126,14 @@ impl MemoryFileSystem { read_to_string(self, path.as_ref()) } + pub(crate) fn read_to_notebook( + &self, + path: impl AsRef, + ) -> std::result::Result { + let content = self.read_to_string(path)?; + ruff_notebook::Notebook::from_source_code(&content) + } + pub fn exists(&self, path: &SystemPath) -> bool { let by_path = self.inner.by_path.read().unwrap(); let normalized = self.normalize_path(path); diff --git a/crates/ruff_db/src/system/os.rs b/crates/ruff_db/src/system/os.rs index 5f7882623c..b4bec35a07 100644 --- a/crates/ruff_db/src/system/os.rs +++ b/crates/ruff_db/src/system/os.rs @@ -1,9 +1,13 @@ +use std::sync::Arc; +use std::{any::Any, path::PathBuf}; + +use filetime::FileTime; + +use ruff_notebook::{Notebook, NotebookError}; + use crate::system::{ DirectoryEntry, FileType, Metadata, Result, System, SystemPath, SystemPathBuf, }; -use filetime::FileTime; -use std::sync::Arc; -use std::{any::Any, path::PathBuf}; use super::walk_directory::{ self, DirectoryWalker, WalkDirectoryBuilder, WalkDirectoryConfiguration, @@ -65,6 +69,10 @@ impl System for OsSystem { std::fs::read_to_string(path.as_std_path()) } + fn read_to_notebook(&self, path: &SystemPath) -> std::result::Result { + Notebook::from_path(path.as_std_path()) + } + fn path_exists(&self, path: &SystemPath) -> bool { path.as_std_path().exists() } @@ -266,10 +274,12 @@ impl From for ignore::WalkState { #[cfg(test)] mod tests { - use super::*; + use tempfile::TempDir; + use crate::system::walk_directory::tests::DirectoryEntryToString; use crate::system::DirectoryEntry; - use tempfile::TempDir; + + use super::*; #[test] fn read_directory() { diff --git a/crates/ruff_db/src/system/test.rs b/crates/ruff_db/src/system/test.rs index 4435e55d6d..ca607b4ba9 100644 --- a/crates/ruff_db/src/system/test.rs +++ b/crates/ruff_db/src/system/test.rs @@ -1,3 +1,5 @@ +use ruff_notebook::{Notebook, NotebookError}; + use crate::files::File; use crate::system::{DirectoryEntry, MemoryFileSystem, Metadata, Result, System, SystemPath}; use crate::Db; @@ -50,14 +52,21 @@ impl System for TestSystem { fn path_metadata(&self, path: &SystemPath) -> crate::system::Result { match &self.inner { TestSystemInner::Stub(fs) => fs.metadata(path), - TestSystemInner::System(fs) => fs.path_metadata(path), + TestSystemInner::System(system) => system.path_metadata(path), } } fn read_to_string(&self, path: &SystemPath) -> crate::system::Result { match &self.inner { TestSystemInner::Stub(fs) => fs.read_to_string(path), - TestSystemInner::System(fs) => fs.read_to_string(path), + TestSystemInner::System(system) => system.read_to_string(path), + } + } + + fn read_to_notebook(&self, path: &SystemPath) -> std::result::Result { + match &self.inner { + TestSystemInner::Stub(fs) => fs.read_to_notebook(path), + TestSystemInner::System(system) => system.read_to_notebook(path), } } diff --git a/crates/ruff_notebook/src/notebook.rs b/crates/ruff_notebook/src/notebook.rs index 99408908a9..97096a114a 100644 --- a/crates/ruff_notebook/src/notebook.rs +++ b/crates/ruff_notebook/src/notebook.rs @@ -19,7 +19,7 @@ use ruff_text_size::TextSize; use crate::cell::CellOffsets; use crate::index::NotebookIndex; use crate::schema::{Cell, RawNotebook, SortAlphabetically, SourceValue}; -use crate::RawNotebookMetadata; +use crate::{schema, RawNotebookMetadata}; /// Run round-trip source code generation on a given Jupyter notebook file path. pub fn round_trip(path: &Path) -> anyhow::Result { @@ -52,7 +52,7 @@ pub enum NotebookError { InvalidFormat(i64), } -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug)] pub struct Notebook { /// Python source code of the notebook. /// @@ -205,6 +205,28 @@ impl Notebook { }) } + /// Creates an empty notebook. + /// + /// + pub fn empty() -> Self { + Self::from_raw_notebook( + RawNotebook { + cells: vec![schema::Cell::Code(schema::CodeCell { + execution_count: None, + id: None, + metadata: serde_json::Value::default(), + outputs: vec![], + source: schema::SourceValue::String(String::default()), + })], + metadata: RawNotebookMetadata::default(), + nbformat: 4, + nbformat_minor: 5, + }, + false, + ) + .unwrap() + } + /// Update the cell offsets as per the given [`SourceMap`]. fn update_cell_offsets(&mut self, source_map: &SourceMap) { // When there are multiple cells without any edits, the offsets of those @@ -412,6 +434,14 @@ impl Notebook { } } +impl PartialEq for Notebook { + fn eq(&self, other: &Self) -> bool { + self.trailing_newline == other.trailing_newline && self.raw == other.raw + } +} + +impl Eq for Notebook {} + #[cfg(test)] mod tests { use std::path::Path; @@ -458,6 +488,13 @@ mod tests { )); } + #[test] + fn empty_notebook() { + let notebook = Notebook::empty(); + + assert_eq!(notebook.source_code(), "\n"); + } + #[test_case("markdown", false)] #[test_case("only_magic", true)] #[test_case("code_and_magic", true)] diff --git a/crates/ruff_notebook/src/schema.rs b/crates/ruff_notebook/src/schema.rs index 63874def74..7699755b31 100644 --- a/crates/ruff_notebook/src/schema.rs +++ b/crates/ruff_notebook/src/schema.rs @@ -161,7 +161,7 @@ pub struct CodeCell { /// Notebook root-level metadata. #[skip_serializing_none] -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Default)] pub struct RawNotebookMetadata { /// The author(s) of the notebook document pub authors: Option, diff --git a/crates/ruff_python_ast/src/lib.rs b/crates/ruff_python_ast/src/lib.rs index 47491ebd3b..205c7b98c7 100644 --- a/crates/ruff_python_ast/src/lib.rs +++ b/crates/ruff_python_ast/src/lib.rs @@ -86,12 +86,19 @@ impl PySourceType { /// /// Falls back to `Python` if the extension is not recognized. pub fn from_extension(extension: &str) -> Self { - match extension { + Self::try_from_extension(extension).unwrap_or_default() + } + + /// Infers the source type from the file extension. + pub fn try_from_extension(extension: &str) -> Option { + let ty = match extension { "py" => Self::Python, "pyi" => Self::Stub, "ipynb" => Self::Ipynb, - _ => Self::Python, - } + _ => return None, + }; + + Some(ty) } }