mirror of https://github.com/astral-sh/ruff
[red-knot] Add notebook support (#12338)
This commit is contained in:
parent
fe04f2b09d
commit
0c72577b5d
|
|
@ -2088,6 +2088,7 @@ dependencies = [
|
|||
"filetime",
|
||||
"ignore",
|
||||
"insta",
|
||||
"ruff_notebook",
|
||||
"ruff_python_ast",
|
||||
"ruff_python_parser",
|
||||
"ruff_source_file",
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ use ruff_db::system::SystemPath;
|
|||
use rustc_hash::FxHashMap;
|
||||
|
||||
use ruff_db::files::{system_path_to_file, File};
|
||||
use ruff_db::source::source_text;
|
||||
|
||||
use crate::db::Db;
|
||||
use crate::module_name::ModuleName;
|
||||
|
|
@ -74,7 +73,10 @@ pub(crate) fn parse_typeshed_versions(
|
|||
db: &dyn Db,
|
||||
versions_file: File,
|
||||
) -> Result<TypeshedVersions, TypeshedVersionsParseError> {
|
||||
let file_content = source_text(db.upcast(), versions_file);
|
||||
// TODO: Handle IO errors
|
||||
let file_content = versions_file
|
||||
.read_to_string(db.upcast())
|
||||
.unwrap_or_default();
|
||||
file_content.parse()
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ repository = { workspace = true }
|
|||
license = { workspace = true }
|
||||
|
||||
[dependencies]
|
||||
ruff_notebook = { workspace = true }
|
||||
ruff_python_ast = { workspace = true }
|
||||
ruff_python_parser = { workspace = true }
|
||||
ruff_source_file = { workspace = true }
|
||||
|
|
|
|||
|
|
@ -187,10 +187,10 @@ impl File {
|
|||
/// an empty string, which is the closest to the content that the file contains now. Returning
|
||||
/// an empty string shouldn't be a problem because the query will be re-executed as soon as the
|
||||
/// changes are applied to the database.
|
||||
pub(crate) fn read_to_string(&self, db: &dyn Db) -> String {
|
||||
pub fn read_to_string(&self, db: &dyn Db) -> crate::system::Result<String> {
|
||||
let path = self.path(db);
|
||||
|
||||
let result = match path {
|
||||
match path {
|
||||
FilePath::System(system) => {
|
||||
// Add a dependency on the revision to ensure the operation gets re-executed when the file changes.
|
||||
let _ = self.revision(db);
|
||||
|
|
@ -198,9 +198,7 @@ impl File {
|
|||
db.system().read_to_string(system)
|
||||
}
|
||||
FilePath::Vendored(vendored) => db.vendored().read_to_string(vendored),
|
||||
};
|
||||
|
||||
result.unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Refreshes the file metadata by querying the file system if needed.
|
||||
|
|
@ -274,7 +272,7 @@ mod tests {
|
|||
|
||||
assert_eq!(test.permissions(&db), Some(0o755));
|
||||
assert_ne!(test.revision(&db), FileRevision::zero());
|
||||
assert_eq!(&test.read_to_string(&db), "print('Hello world')");
|
||||
assert_eq!(&test.read_to_string(&db)?, "print('Hello world')");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -304,7 +302,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn stubbed_vendored_file() {
|
||||
fn stubbed_vendored_file() -> crate::system::Result<()> {
|
||||
let mut db = TestDb::new();
|
||||
|
||||
let mut vendored_builder = VendoredFileSystemBuilder::new();
|
||||
|
|
@ -318,7 +316,9 @@ mod tests {
|
|||
|
||||
assert_eq!(test.permissions(&db), Some(0o444));
|
||||
assert_ne!(test.revision(&db), FileRevision::zero());
|
||||
assert_eq!(&test.read_to_string(&db), "def foo() -> str");
|
||||
assert_eq!(&test.read_to_string(&db)?, "def foo() -> str");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -1,47 +1,83 @@
|
|||
use countme::Count;
|
||||
use ruff_source_file::LineIndex;
|
||||
use salsa::DebugWithDb;
|
||||
use std::ops::Deref;
|
||||
use std::sync::Arc;
|
||||
|
||||
use countme::Count;
|
||||
use salsa::DebugWithDb;
|
||||
|
||||
use ruff_notebook::Notebook;
|
||||
use ruff_python_ast::PySourceType;
|
||||
use ruff_source_file::LineIndex;
|
||||
|
||||
use crate::files::File;
|
||||
use crate::Db;
|
||||
|
||||
/// Reads the content of file.
|
||||
/// Reads the source text of a python text file (must be valid UTF8) or notebook.
|
||||
#[salsa::tracked]
|
||||
pub fn source_text(db: &dyn Db, file: File) -> SourceText {
|
||||
let _span = tracing::trace_span!("source_text", ?file).entered();
|
||||
|
||||
let content = file.read_to_string(db);
|
||||
if let Some(path) = file.path(db).as_system_path() {
|
||||
if path.extension().is_some_and(|extension| {
|
||||
PySourceType::try_from_extension(extension) == Some(PySourceType::Ipynb)
|
||||
}) {
|
||||
// TODO(micha): Proper error handling and emit a diagnostic. Tackle it together with `source_text`.
|
||||
let notebook = db.system().read_to_notebook(path).unwrap_or_else(|error| {
|
||||
tracing::error!("Failed to load notebook: {error}");
|
||||
Notebook::empty()
|
||||
});
|
||||
|
||||
return SourceText {
|
||||
inner: Arc::new(SourceTextInner {
|
||||
kind: SourceTextKind::Notebook(notebook),
|
||||
count: Count::new(),
|
||||
}),
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
let content = file.read_to_string(db).unwrap_or_else(|error| {
|
||||
tracing::error!("Failed to load file: {error}");
|
||||
String::default()
|
||||
});
|
||||
|
||||
SourceText {
|
||||
inner: Arc::from(content),
|
||||
count: Count::new(),
|
||||
inner: Arc::new(SourceTextInner {
|
||||
kind: SourceTextKind::Text(content),
|
||||
count: Count::new(),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
/// Computes the [`LineIndex`] for `file`.
|
||||
#[salsa::tracked]
|
||||
pub fn line_index(db: &dyn Db, file: File) -> LineIndex {
|
||||
let _span = tracing::trace_span!("line_index", file = ?file.debug(db)).entered();
|
||||
|
||||
let source = source_text(db, file);
|
||||
|
||||
LineIndex::from_source_text(&source)
|
||||
}
|
||||
|
||||
/// The source text of a [`File`].
|
||||
/// The source text of a file containing python code.
|
||||
///
|
||||
/// The file containing the source text can either be a text file or a notebook.
|
||||
///
|
||||
/// Cheap cloneable in `O(1)`.
|
||||
#[derive(Clone, Eq, PartialEq)]
|
||||
pub struct SourceText {
|
||||
inner: Arc<str>,
|
||||
count: Count<Self>,
|
||||
inner: Arc<SourceTextInner>,
|
||||
}
|
||||
|
||||
impl SourceText {
|
||||
/// Returns the python code as a `str`.
|
||||
pub fn as_str(&self) -> &str {
|
||||
&self.inner
|
||||
match &self.inner.kind {
|
||||
SourceTextKind::Text(source) => source,
|
||||
SourceTextKind::Notebook(notebook) => notebook.source_code(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the underlying notebook if this is a notebook file.
|
||||
pub fn as_notebook(&self) -> Option<&Notebook> {
|
||||
match &self.inner.kind {
|
||||
SourceTextKind::Notebook(notebook) => Some(notebook),
|
||||
SourceTextKind::Text(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `true` if this is a notebook source file.
|
||||
pub fn is_notebook(&self) -> bool {
|
||||
matches!(&self.inner.kind, SourceTextKind::Notebook(_))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -55,20 +91,54 @@ impl Deref for SourceText {
|
|||
|
||||
impl std::fmt::Debug for SourceText {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_tuple("SourceText").field(&self.inner).finish()
|
||||
let mut dbg = f.debug_tuple("SourceText");
|
||||
|
||||
match &self.inner.kind {
|
||||
SourceTextKind::Text(text) => {
|
||||
dbg.field(text);
|
||||
}
|
||||
SourceTextKind::Notebook(notebook) => {
|
||||
dbg.field(notebook);
|
||||
}
|
||||
}
|
||||
|
||||
dbg.finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Eq, PartialEq)]
|
||||
struct SourceTextInner {
|
||||
count: Count<SourceText>,
|
||||
kind: SourceTextKind,
|
||||
}
|
||||
|
||||
#[derive(Eq, PartialEq)]
|
||||
enum SourceTextKind {
|
||||
Text(String),
|
||||
Notebook(Notebook),
|
||||
}
|
||||
|
||||
/// Computes the [`LineIndex`] for `file`.
|
||||
#[salsa::tracked]
|
||||
pub fn line_index(db: &dyn Db, file: File) -> LineIndex {
|
||||
let _span = tracing::trace_span!("line_index", file = ?file.debug(db)).entered();
|
||||
|
||||
let source = source_text(db, file);
|
||||
|
||||
LineIndex::from_source_text(&source)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use salsa::EventKind;
|
||||
|
||||
use ruff_source_file::OneIndexed;
|
||||
use ruff_text_size::TextSize;
|
||||
|
||||
use crate::files::system_path_to_file;
|
||||
use crate::source::{line_index, source_text};
|
||||
use crate::system::{DbWithTestSystem, SystemPath};
|
||||
use crate::tests::TestDb;
|
||||
use ruff_source_file::OneIndexed;
|
||||
use ruff_text_size::TextSize;
|
||||
|
||||
#[test]
|
||||
fn re_runs_query_when_file_revision_changes() -> crate::system::Result<()> {
|
||||
|
|
@ -79,11 +149,11 @@ mod tests {
|
|||
|
||||
let file = system_path_to_file(&db, path).unwrap();
|
||||
|
||||
assert_eq!(&*source_text(&db, file), "x = 10");
|
||||
assert_eq!(source_text(&db, file).as_str(), "x = 10");
|
||||
|
||||
db.write_file(path, "x = 20".to_string()).unwrap();
|
||||
|
||||
assert_eq!(&*source_text(&db, file), "x = 20");
|
||||
assert_eq!(source_text(&db, file).as_str(), "x = 20");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -97,13 +167,13 @@ mod tests {
|
|||
|
||||
let file = system_path_to_file(&db, path).unwrap();
|
||||
|
||||
assert_eq!(&*source_text(&db, file), "x = 10");
|
||||
assert_eq!(source_text(&db, file).as_str(), "x = 10");
|
||||
|
||||
// Change the file permission only
|
||||
file.set_permissions(&mut db).to(Some(0o777));
|
||||
|
||||
db.clear_salsa_events();
|
||||
assert_eq!(&*source_text(&db, file), "x = 10");
|
||||
assert_eq!(source_text(&db, file).as_str(), "x = 10");
|
||||
|
||||
let events = db.take_salsa_events();
|
||||
|
||||
|
|
@ -123,14 +193,54 @@ mod tests {
|
|||
|
||||
let file = system_path_to_file(&db, path).unwrap();
|
||||
let index = line_index(&db, file);
|
||||
let text = source_text(&db, file);
|
||||
let source = source_text(&db, file);
|
||||
|
||||
assert_eq!(index.line_count(), 2);
|
||||
assert_eq!(
|
||||
index.line_start(OneIndexed::from_zero_indexed(0), &text),
|
||||
index.line_start(OneIndexed::from_zero_indexed(0), source.as_str()),
|
||||
TextSize::new(0)
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn notebook() -> crate::system::Result<()> {
|
||||
let mut db = TestDb::new();
|
||||
|
||||
let path = SystemPath::new("test.ipynb");
|
||||
db.write_file(
|
||||
path,
|
||||
r#"
|
||||
{
|
||||
"cells": [{"cell_type": "code", "source": ["x = 10"], "metadata": {}, "outputs": []}],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python (ruff)",
|
||||
"language": "python",
|
||||
"name": "ruff"
|
||||
},
|
||||
"language_info": {
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}"#,
|
||||
)?;
|
||||
|
||||
let file = system_path_to_file(&db, path).unwrap();
|
||||
let source = source_text(&db, file);
|
||||
|
||||
assert!(source.is_notebook());
|
||||
assert_eq!(source.as_str(), "x = 10\n");
|
||||
assert!(source.as_notebook().is_some());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ use std::fmt::Debug;
|
|||
pub use memory_fs::MemoryFileSystem;
|
||||
#[cfg(feature = "os")]
|
||||
pub use os::OsSystem;
|
||||
use ruff_notebook::{Notebook, NotebookError};
|
||||
pub use test::{DbWithTestSystem, TestSystem};
|
||||
use walk_directory::WalkDirectoryBuilder;
|
||||
|
||||
|
|
@ -40,6 +41,13 @@ pub trait System: Debug {
|
|||
/// Reads the content of the file at `path` into a [`String`].
|
||||
fn read_to_string(&self, path: &SystemPath) -> Result<String>;
|
||||
|
||||
/// Reads the content of the file at `path` as a Notebook.
|
||||
///
|
||||
/// This method optimizes for the case where the system holds a structured representation of a [`Notebook`],
|
||||
/// allowing to skip the notebook deserialization. Systems that don't use a structured
|
||||
/// representation fall-back to deserializing the notebook from a string.
|
||||
fn read_to_notebook(&self, path: &SystemPath) -> std::result::Result<Notebook, NotebookError>;
|
||||
|
||||
/// Returns `true` if `path` exists.
|
||||
fn path_exists(&self, path: &SystemPath) -> bool {
|
||||
self.path_metadata(path).is_ok()
|
||||
|
|
|
|||
|
|
@ -126,6 +126,14 @@ impl MemoryFileSystem {
|
|||
read_to_string(self, path.as_ref())
|
||||
}
|
||||
|
||||
pub(crate) fn read_to_notebook(
|
||||
&self,
|
||||
path: impl AsRef<SystemPath>,
|
||||
) -> std::result::Result<ruff_notebook::Notebook, ruff_notebook::NotebookError> {
|
||||
let content = self.read_to_string(path)?;
|
||||
ruff_notebook::Notebook::from_source_code(&content)
|
||||
}
|
||||
|
||||
pub fn exists(&self, path: &SystemPath) -> bool {
|
||||
let by_path = self.inner.by_path.read().unwrap();
|
||||
let normalized = self.normalize_path(path);
|
||||
|
|
|
|||
|
|
@ -1,9 +1,13 @@
|
|||
use std::sync::Arc;
|
||||
use std::{any::Any, path::PathBuf};
|
||||
|
||||
use filetime::FileTime;
|
||||
|
||||
use ruff_notebook::{Notebook, NotebookError};
|
||||
|
||||
use crate::system::{
|
||||
DirectoryEntry, FileType, Metadata, Result, System, SystemPath, SystemPathBuf,
|
||||
};
|
||||
use filetime::FileTime;
|
||||
use std::sync::Arc;
|
||||
use std::{any::Any, path::PathBuf};
|
||||
|
||||
use super::walk_directory::{
|
||||
self, DirectoryWalker, WalkDirectoryBuilder, WalkDirectoryConfiguration,
|
||||
|
|
@ -65,6 +69,10 @@ impl System for OsSystem {
|
|||
std::fs::read_to_string(path.as_std_path())
|
||||
}
|
||||
|
||||
fn read_to_notebook(&self, path: &SystemPath) -> std::result::Result<Notebook, NotebookError> {
|
||||
Notebook::from_path(path.as_std_path())
|
||||
}
|
||||
|
||||
fn path_exists(&self, path: &SystemPath) -> bool {
|
||||
path.as_std_path().exists()
|
||||
}
|
||||
|
|
@ -266,10 +274,12 @@ impl From<WalkState> for ignore::WalkState {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use crate::system::walk_directory::tests::DirectoryEntryToString;
|
||||
use crate::system::DirectoryEntry;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn read_directory() {
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
use ruff_notebook::{Notebook, NotebookError};
|
||||
|
||||
use crate::files::File;
|
||||
use crate::system::{DirectoryEntry, MemoryFileSystem, Metadata, Result, System, SystemPath};
|
||||
use crate::Db;
|
||||
|
|
@ -50,14 +52,21 @@ impl System for TestSystem {
|
|||
fn path_metadata(&self, path: &SystemPath) -> crate::system::Result<Metadata> {
|
||||
match &self.inner {
|
||||
TestSystemInner::Stub(fs) => fs.metadata(path),
|
||||
TestSystemInner::System(fs) => fs.path_metadata(path),
|
||||
TestSystemInner::System(system) => system.path_metadata(path),
|
||||
}
|
||||
}
|
||||
|
||||
fn read_to_string(&self, path: &SystemPath) -> crate::system::Result<String> {
|
||||
match &self.inner {
|
||||
TestSystemInner::Stub(fs) => fs.read_to_string(path),
|
||||
TestSystemInner::System(fs) => fs.read_to_string(path),
|
||||
TestSystemInner::System(system) => system.read_to_string(path),
|
||||
}
|
||||
}
|
||||
|
||||
fn read_to_notebook(&self, path: &SystemPath) -> std::result::Result<Notebook, NotebookError> {
|
||||
match &self.inner {
|
||||
TestSystemInner::Stub(fs) => fs.read_to_notebook(path),
|
||||
TestSystemInner::System(system) => system.read_to_notebook(path),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ use ruff_text_size::TextSize;
|
|||
use crate::cell::CellOffsets;
|
||||
use crate::index::NotebookIndex;
|
||||
use crate::schema::{Cell, RawNotebook, SortAlphabetically, SourceValue};
|
||||
use crate::RawNotebookMetadata;
|
||||
use crate::{schema, RawNotebookMetadata};
|
||||
|
||||
/// Run round-trip source code generation on a given Jupyter notebook file path.
|
||||
pub fn round_trip(path: &Path) -> anyhow::Result<String> {
|
||||
|
|
@ -52,7 +52,7 @@ pub enum NotebookError {
|
|||
InvalidFormat(i64),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Notebook {
|
||||
/// Python source code of the notebook.
|
||||
///
|
||||
|
|
@ -205,6 +205,28 @@ impl Notebook {
|
|||
})
|
||||
}
|
||||
|
||||
/// Creates an empty notebook.
|
||||
///
|
||||
///
|
||||
pub fn empty() -> Self {
|
||||
Self::from_raw_notebook(
|
||||
RawNotebook {
|
||||
cells: vec![schema::Cell::Code(schema::CodeCell {
|
||||
execution_count: None,
|
||||
id: None,
|
||||
metadata: serde_json::Value::default(),
|
||||
outputs: vec![],
|
||||
source: schema::SourceValue::String(String::default()),
|
||||
})],
|
||||
metadata: RawNotebookMetadata::default(),
|
||||
nbformat: 4,
|
||||
nbformat_minor: 5,
|
||||
},
|
||||
false,
|
||||
)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
/// Update the cell offsets as per the given [`SourceMap`].
|
||||
fn update_cell_offsets(&mut self, source_map: &SourceMap) {
|
||||
// When there are multiple cells without any edits, the offsets of those
|
||||
|
|
@ -412,6 +434,14 @@ impl Notebook {
|
|||
}
|
||||
}
|
||||
|
||||
impl PartialEq for Notebook {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.trailing_newline == other.trailing_newline && self.raw == other.raw
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for Notebook {}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::path::Path;
|
||||
|
|
@ -458,6 +488,13 @@ mod tests {
|
|||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_notebook() {
|
||||
let notebook = Notebook::empty();
|
||||
|
||||
assert_eq!(notebook.source_code(), "\n");
|
||||
}
|
||||
|
||||
#[test_case("markdown", false)]
|
||||
#[test_case("only_magic", true)]
|
||||
#[test_case("code_and_magic", true)]
|
||||
|
|
|
|||
|
|
@ -161,7 +161,7 @@ pub struct CodeCell {
|
|||
|
||||
/// Notebook root-level metadata.
|
||||
#[skip_serializing_none]
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Default)]
|
||||
pub struct RawNotebookMetadata {
|
||||
/// The author(s) of the notebook document
|
||||
pub authors: Option<Value>,
|
||||
|
|
|
|||
|
|
@ -86,12 +86,19 @@ impl PySourceType {
|
|||
///
|
||||
/// Falls back to `Python` if the extension is not recognized.
|
||||
pub fn from_extension(extension: &str) -> Self {
|
||||
match extension {
|
||||
Self::try_from_extension(extension).unwrap_or_default()
|
||||
}
|
||||
|
||||
/// Infers the source type from the file extension.
|
||||
pub fn try_from_extension(extension: &str) -> Option<Self> {
|
||||
let ty = match extension {
|
||||
"py" => Self::Python,
|
||||
"pyi" => Self::Stub,
|
||||
"ipynb" => Self::Ipynb,
|
||||
_ => Self::Python,
|
||||
}
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
Some(ty)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue