From ff53db3d995669d04295c397d9b02641f53d0a3f Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Tue, 13 Aug 2024 22:09:56 +0530 Subject: [PATCH] Consider VS Code cell metadata to determine valid code cells (#12864) ## Summary This PR adds support for VS Code specific cell metadata to consider when collecting valid code cells. For context, Ruff only runs on valid code cells. These are the code cells that doesn't contain cell magics. Previously, Ruff only used the notebook's metadata to determine whether it's a Python notebook. But, in VS Code, a notebook's preferred language might be Python but it could still contain code cells for other languages. This can be determined with the `metadata.vscode.languageId` field. ### References: * https://code.visualstudio.com/docs/languages/identifiers * https://github.com/microsoft/vscode/blob/e6c009a3d4ee60f352212b978934f52c4689fbd9/extensions/ipynb/src/serializers.ts#L104-L107 * https://github.com/microsoft/vscode/blob/e6c009a3d4ee60f352212b978934f52c4689fbd9/extensions/ipynb/src/serializers.ts#L117-L122 This brings us one step closer to fixing #12281. ## Test Plan Add test cases for `is_valid_python_code_cell` and an integration test case which showcase running it end to end. The test notebook contains a JavaScript code cell and a Python code cell. --- crates/red_knot_server/src/edit/notebook.rs | 5 +- crates/ruff_linter/src/linter.rs | 17 +++++++ ...er__linter__tests__vscode_language_id.snap | 16 ++++++ .../cell/vscode_language_id_javascript.json | 13 +++++ .../cell/vscode_language_id_python.json | 13 +++++ .../fixtures/jupyter/vscode_language_id.ipynb | 51 +++++++++++++++++++ .../jupyter/vscode_language_id_expected.ipynb | 50 ++++++++++++++++++ crates/ruff_notebook/src/cell.rs | 21 ++++++-- crates/ruff_notebook/src/notebook.rs | 24 ++++++--- crates/ruff_notebook/src/schema.rs | 32 ++++++++++-- crates/ruff_server/src/edit/notebook.rs | 5 +- 11 files changed, 226 insertions(+), 21 deletions(-) create mode 100644 crates/ruff_linter/src/snapshots/ruff_linter__linter__tests__vscode_language_id.snap create mode 100644 crates/ruff_notebook/resources/test/fixtures/jupyter/cell/vscode_language_id_javascript.json create mode 100644 crates/ruff_notebook/resources/test/fixtures/jupyter/cell/vscode_language_id_python.json create mode 100644 crates/ruff_notebook/resources/test/fixtures/jupyter/vscode_language_id.ipynb create mode 100644 crates/ruff_notebook/resources/test/fixtures/jupyter/vscode_language_id_expected.ipynb diff --git a/crates/red_knot_server/src/edit/notebook.rs b/crates/red_knot_server/src/edit/notebook.rs index f13f1e6680..eb716972a9 100644 --- a/crates/red_knot_server/src/edit/notebook.rs +++ b/crates/red_knot_server/src/edit/notebook.rs @@ -1,5 +1,6 @@ use anyhow::Ok; use lsp_types::NotebookCellKind; +use ruff_notebook::CellMetadata; use rustc_hash::{FxBuildHasher, FxHashMap}; use crate::{PositionEncoding, TextDocument}; @@ -65,7 +66,7 @@ impl NotebookDocument { NotebookCellKind::Code => ruff_notebook::Cell::Code(ruff_notebook::CodeCell { execution_count: None, id: None, - metadata: serde_json::Value::Null, + metadata: CellMetadata::default(), outputs: vec![], source: ruff_notebook::SourceValue::String( cell.document.contents().to_string(), @@ -75,7 +76,7 @@ impl NotebookDocument { ruff_notebook::Cell::Markdown(ruff_notebook::MarkdownCell { attachments: None, id: None, - metadata: serde_json::Value::Null, + metadata: CellMetadata::default(), source: ruff_notebook::SourceValue::String( cell.document.contents().to_string(), ), diff --git a/crates/ruff_linter/src/linter.rs b/crates/ruff_linter/src/linter.rs index 11017d3a74..ca7dc608f7 100644 --- a/crates/ruff_linter/src/linter.rs +++ b/crates/ruff_linter/src/linter.rs @@ -791,6 +791,23 @@ mod tests { Ok(()) } + #[test] + fn test_vscode_language_id() -> Result<()> { + let actual = notebook_path("vscode_language_id.ipynb"); + let expected = notebook_path("vscode_language_id_expected.ipynb"); + let TestedNotebook { + messages, + source_notebook, + .. + } = assert_notebook_path( + &actual, + expected, + &settings::LinterSettings::for_rule(Rule::UnusedImport), + )?; + assert_messages!(messages, actual, source_notebook); + Ok(()) + } + #[test_case(Path::new("before_fix.ipynb"), true; "trailing_newline")] #[test_case(Path::new("no_trailing_newline.ipynb"), false; "no_trailing_newline")] fn test_trailing_newline(path: &Path, trailing_newline: bool) -> Result<()> { diff --git a/crates/ruff_linter/src/snapshots/ruff_linter__linter__tests__vscode_language_id.snap b/crates/ruff_linter/src/snapshots/ruff_linter__linter__tests__vscode_language_id.snap new file mode 100644 index 0000000000..d89e58dc78 --- /dev/null +++ b/crates/ruff_linter/src/snapshots/ruff_linter__linter__tests__vscode_language_id.snap @@ -0,0 +1,16 @@ +--- +source: crates/ruff_linter/src/linter.rs +--- +vscode_language_id.ipynb:cell 3:1:8: F401 [*] `os` imported but unused + | +1 | import os + | ^^ F401 +2 | +3 | print("hello world") + | + = help: Remove unused import: `os` + +ℹ Safe fix +1 |-import os +2 1 | +3 2 | print("hello world") diff --git a/crates/ruff_notebook/resources/test/fixtures/jupyter/cell/vscode_language_id_javascript.json b/crates/ruff_notebook/resources/test/fixtures/jupyter/cell/vscode_language_id_javascript.json new file mode 100644 index 0000000000..9c84a13971 --- /dev/null +++ b/crates/ruff_notebook/resources/test/fixtures/jupyter/cell/vscode_language_id_javascript.json @@ -0,0 +1,13 @@ +{ + "execution_count": null, + "cell_type": "code", + "id": "1", + "metadata": { + "vscode": { + "languageId": "javascript" + } + }, + "outputs": [], + "source": [] +} + diff --git a/crates/ruff_notebook/resources/test/fixtures/jupyter/cell/vscode_language_id_python.json b/crates/ruff_notebook/resources/test/fixtures/jupyter/cell/vscode_language_id_python.json new file mode 100644 index 0000000000..c8abc15047 --- /dev/null +++ b/crates/ruff_notebook/resources/test/fixtures/jupyter/cell/vscode_language_id_python.json @@ -0,0 +1,13 @@ +{ + "execution_count": null, + "cell_type": "code", + "id": "1", + "metadata": { + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [] +} + diff --git a/crates/ruff_notebook/resources/test/fixtures/jupyter/vscode_language_id.ipynb b/crates/ruff_notebook/resources/test/fixtures/jupyter/vscode_language_id.ipynb new file mode 100644 index 0000000000..a8a931ee66 --- /dev/null +++ b/crates/ruff_notebook/resources/test/fixtures/jupyter/vscode_language_id.ipynb @@ -0,0 +1,51 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# VS Code `languageId`\n", + "\n", + "This is a test notebook for VS Code specific cell metadata.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true, + "source_hidden": true + }, + "vscode": { + "languageId": "javascript" + } + }, + "outputs": [], + "source": [ + "function add(x, y) {\n", + " return x + y;\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "print(\"hello world\")" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/crates/ruff_notebook/resources/test/fixtures/jupyter/vscode_language_id_expected.ipynb b/crates/ruff_notebook/resources/test/fixtures/jupyter/vscode_language_id_expected.ipynb new file mode 100644 index 0000000000..adeddbe094 --- /dev/null +++ b/crates/ruff_notebook/resources/test/fixtures/jupyter/vscode_language_id_expected.ipynb @@ -0,0 +1,50 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# VS Code `languageId`\n", + "\n", + "This is a test notebook for VS Code specific cell metadata.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true, + "source_hidden": true + }, + "vscode": { + "languageId": "javascript" + } + }, + "outputs": [], + "source": [ + "function add(x, y) {\n", + " return x + y;\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "print(\"hello world\")" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/crates/ruff_notebook/src/cell.rs b/crates/ruff_notebook/src/cell.rs index 196bd9c3d6..1d7985e4a3 100644 --- a/crates/ruff_notebook/src/cell.rs +++ b/crates/ruff_notebook/src/cell.rs @@ -6,6 +6,7 @@ use itertools::Itertools; use ruff_text_size::{TextRange, TextSize}; use crate::schema::{Cell, SourceValue}; +use crate::CellMetadata; impl fmt::Display for SourceValue { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -35,7 +36,7 @@ impl Cell { matches!(self, Cell::Code(_)) } - pub fn metadata(&self) -> &serde_json::Value { + pub fn metadata(&self) -> &CellMetadata { match self { Cell::Code(cell) => &cell.metadata, Cell::Markdown(cell) => &cell.metadata, @@ -54,11 +55,21 @@ impl Cell { /// Return `true` if it's a valid code cell. /// - /// A valid code cell is a cell where the cell type is [`Cell::Code`] and the - /// source doesn't contain a cell magic. - pub(crate) fn is_valid_code_cell(&self) -> bool { + /// A valid code cell is a cell where: + /// 1. The cell type is [`Cell::Code`] + /// 2. The source doesn't contain a cell magic + /// 3. If the language id is set, it should be `python` + pub(crate) fn is_valid_python_code_cell(&self) -> bool { let source = match self { - Cell::Code(cell) => &cell.source, + Cell::Code(cell) + if cell + .metadata + .vscode + .as_ref() + .map_or(true, |vscode| vscode.language_id == "python") => + { + &cell.source + } _ => return false, }; // Ignore cells containing cell magic as they act on the entire cell diff --git a/crates/ruff_notebook/src/notebook.rs b/crates/ruff_notebook/src/notebook.rs index 97096a114a..b2be9ebe6a 100644 --- a/crates/ruff_notebook/src/notebook.rs +++ b/crates/ruff_notebook/src/notebook.rs @@ -19,7 +19,7 @@ use ruff_text_size::TextSize; use crate::cell::CellOffsets; use crate::index::NotebookIndex; use crate::schema::{Cell, RawNotebook, SortAlphabetically, SourceValue}; -use crate::{schema, RawNotebookMetadata}; +use crate::{schema, CellMetadata, RawNotebookMetadata}; /// Run round-trip source code generation on a given Jupyter notebook file path. pub fn round_trip(path: &Path) -> anyhow::Result { @@ -131,7 +131,7 @@ impl Notebook { .cells .iter() .enumerate() - .filter(|(_, cell)| cell.is_valid_code_cell()) + .filter(|(_, cell)| cell.is_valid_python_code_cell()) .map(|(cell_index, _)| u32::try_from(cell_index).unwrap()) .collect::>(); @@ -205,16 +205,14 @@ impl Notebook { }) } - /// Creates an empty notebook. - /// - /// + /// Creates an empty notebook with a single code cell. pub fn empty() -> Self { Self::from_raw_notebook( RawNotebook { cells: vec![schema::Cell::Code(schema::CodeCell { execution_count: None, id: None, - metadata: serde_json::Value::default(), + metadata: CellMetadata::default(), outputs: vec![], source: schema::SourceValue::String(String::default()), })], @@ -507,7 +505,9 @@ mod tests { #[test_case("automagic_before_code", false)] #[test_case("automagic_after_code", true)] #[test_case("unicode_magic_gh9145", true)] - fn test_is_valid_code_cell(cell: &str, expected: bool) -> Result<()> { + #[test_case("vscode_language_id_python", true)] + #[test_case("vscode_language_id_javascript", false)] + fn test_is_valid_python_code_cell(cell: &str, expected: bool) -> Result<()> { /// Read a Jupyter cell from the `resources/test/fixtures/jupyter/cell` directory. fn read_jupyter_cell(path: impl AsRef) -> Result { let path = notebook_path("cell").join(path); @@ -516,7 +516,7 @@ mod tests { } assert_eq!( - read_jupyter_cell(format!("{cell}.json"))?.is_valid_code_cell(), + read_jupyter_cell(format!("{cell}.json"))?.is_valid_python_code_cell(), expected ); Ok(()) @@ -596,4 +596,12 @@ print("after empty cells") ); Ok(()) } + + #[test] + fn round_trip() { + let path = notebook_path("vscode_language_id.ipynb"); + let expected = std::fs::read_to_string(&path).unwrap(); + let actual = super::round_trip(&path).unwrap(); + assert_eq!(actual, expected); + } } diff --git a/crates/ruff_notebook/src/schema.rs b/crates/ruff_notebook/src/schema.rs index 7699755b31..a33d041055 100644 --- a/crates/ruff_notebook/src/schema.rs +++ b/crates/ruff_notebook/src/schema.rs @@ -18,7 +18,7 @@ //! a code cell or not without looking at the `cell_type` property, which //! would require a custom serializer. -use std::collections::BTreeMap; +use std::collections::{BTreeMap, HashMap}; use serde::{Deserialize, Serialize}; use serde_json::Value; @@ -122,7 +122,7 @@ pub struct RawCell { /// pub id: Option, /// Cell-level metadata. - pub metadata: Value, + pub metadata: CellMetadata, pub source: SourceValue, } @@ -137,7 +137,7 @@ pub struct MarkdownCell { /// pub id: Option, /// Cell-level metadata. - pub metadata: Value, + pub metadata: CellMetadata, pub source: SourceValue, } @@ -153,12 +153,36 @@ pub struct CodeCell { #[serde(skip_serializing_if = "Option::is_none")] pub id: Option, /// Cell-level metadata. - pub metadata: Value, + pub metadata: CellMetadata, /// Execution, display, or stream outputs. pub outputs: Vec, pub source: SourceValue, } +/// Cell-level metadata. +#[skip_serializing_none] +#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)] +pub struct CellMetadata { + /// VS Code specific cell metadata. + /// + /// This is [`Some`] only if the cell's preferred language is different from the notebook's + /// preferred language. + /// + pub vscode: Option, + /// Catch-all for metadata that isn't required by Ruff. + #[serde(flatten)] + pub extra: HashMap, +} + +/// VS Code specific cell metadata. +/// +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct CodeCellMetadataVSCode { + /// + pub language_id: String, +} + /// Notebook root-level metadata. #[skip_serializing_none] #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Default)] diff --git a/crates/ruff_server/src/edit/notebook.rs b/crates/ruff_server/src/edit/notebook.rs index bc14dc785c..7e0c65e917 100644 --- a/crates/ruff_server/src/edit/notebook.rs +++ b/crates/ruff_server/src/edit/notebook.rs @@ -1,5 +1,6 @@ use anyhow::Ok; use lsp_types::NotebookCellKind; +use ruff_notebook::CellMetadata; use rustc_hash::{FxBuildHasher, FxHashMap}; use crate::{PositionEncoding, TextDocument}; @@ -65,7 +66,7 @@ impl NotebookDocument { NotebookCellKind::Code => ruff_notebook::Cell::Code(ruff_notebook::CodeCell { execution_count: None, id: None, - metadata: serde_json::Value::Null, + metadata: CellMetadata::default(), outputs: vec![], source: ruff_notebook::SourceValue::String( cell.document.contents().to_string(), @@ -75,7 +76,7 @@ impl NotebookDocument { ruff_notebook::Cell::Markdown(ruff_notebook::MarkdownCell { attachments: None, id: None, - metadata: serde_json::Value::Null, + metadata: CellMetadata::default(), source: ruff_notebook::SourceValue::String( cell.document.contents().to_string(), ),