Add jupyter notebook cell ids in 4.5+ if missing (#6853)

**Summary** See
https://github.com/astral-sh/ruff/issues/6834#issuecomment-1691202417

**Test Plan** Added a new notebook
This commit is contained in:
konsti 2023-08-25 10:34:42 +02:00 committed by GitHub
parent 1c66bb80b7
commit 0b6dab5e3f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 92 additions and 9 deletions

24
Cargo.lock generated
View File

@ -876,8 +876,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427"
dependencies = [
"cfg-if",
"js-sys",
"libc",
"wasi 0.11.0+wasi-snapshot-preview1",
"wasm-bindgen",
]
[[package]]
@ -2128,6 +2130,7 @@ dependencies = [
"typed-arena",
"unicode-width",
"unicode_names2",
"uuid",
"wsl",
]
@ -3346,9 +3349,26 @@ checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
[[package]]
name = "uuid"
version = "1.4.0"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d023da39d1fde5a8a3fe1f3e01ca9632ada0a63e9797de55a879d6e2236277be"
checksum = "79daa5ed5740825c40b389c5e50312b9c86df53fccd33f281df655642b43869d"
dependencies = [
"getrandom",
"rand",
"uuid-macro-internal",
"wasm-bindgen",
]
[[package]]
name = "uuid-macro-internal"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7e1ba1f333bd65ce3c9f27de592fcbc256dafe3af2717f56d7c87761fbaccf4"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.23",
]
[[package]]
name = "valuable"

View File

@ -50,6 +50,7 @@ tracing = "0.1.37"
tracing-indicatif = "0.3.4"
tracing-subscriber = { version = "0.3.17", features = ["env-filter"] }
unicode-width = "0.1.10"
uuid = { version = "1.4.1", features = ["v4", "fast-rng", "macro-diagnostics", "js"] }
wsl = { version = "0.1.0" }
# v1.0.1

View File

@ -77,6 +77,7 @@ toml = { workspace = true }
typed-arena = { version = "2.0.2" }
unicode-width = { workspace = true }
unicode_names2 = { version = "0.6.0", git = "https://github.com/youknowone/unicode_names2.git", rev = "4ce16aa85cbcdd9cc830410f1a72ef9a235f2fde" }
uuid = { workspace = true, features = ["v4", "fast-rng", "macro-diagnostics", "js"] }
wsl = { version = "0.1.0" }
[dev-dependencies]

View File

@ -0,0 +1,37 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import math\n",
"import os\n",
"\n",
"math.pi"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python (ruff)",
"language": "python",
"name": "ruff"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -9,6 +9,7 @@ use itertools::Itertools;
use once_cell::sync::OnceCell;
use serde::Serialize;
use serde_json::error::Category;
use uuid::Uuid;
use ruff_diagnostics::Diagnostic;
use ruff_python_parser::lexer::lex;
@ -156,7 +157,7 @@ impl Notebook {
TextRange::default(),
)
})?;
let raw_notebook: RawNotebook = match serde_json::from_reader(reader.by_ref()) {
let mut raw_notebook: RawNotebook = match serde_json::from_reader(reader.by_ref()) {
Ok(notebook) => notebook,
Err(err) => {
// Translate the error into a diagnostic
@ -262,6 +263,23 @@ impl Notebook {
cell_offsets.push(current_offset);
}
// Add cell ids to 4.5+ notebooks if they are missing
// https://github.com/astral-sh/ruff/issues/6834
// https://github.com/jupyter/enhancement-proposals/blob/master/62-cell-id/cell-id.md#required-field
if raw_notebook.nbformat == 4 && raw_notebook.nbformat_minor >= 5 {
for cell in &mut raw_notebook.cells {
let id = match cell {
Cell::Code(cell) => &mut cell.id,
Cell::Markdown(cell) => &mut cell.id,
Cell::Raw(cell) => &mut cell.id,
};
if id.is_none() {
// https://github.com/jupyter/enhancement-proposals/blob/master/62-cell-id/cell-id.md#questions
*id = Some(Uuid::new_v4().to_string());
}
}
}
Ok(Self {
raw: raw_notebook,
index: OnceCell::new(),
@ -662,21 +680,27 @@ print("after empty cells")
Ok(())
}
#[test]
fn test_no_cell_id() -> Result<()> {
let path = "no_cell_id.ipynb".to_string();
let source_notebook = read_jupyter_notebook(path.as_ref())?;
// Version <4.5, don't emit cell ids
#[test_case(Path::new("no_cell_id.ipynb"), false; "no_cell_id")]
// Version 4.5, cell ids are missing and need to be added
#[test_case(Path::new("add_missing_cell_id.ipynb"), true; "add_missing_cell_id")]
fn test_cell_id(path: &Path, has_id: bool) -> Result<()> {
let source_notebook = read_jupyter_notebook(path)?;
let source_kind = SourceKind::IpyNotebook(source_notebook);
let (_, transformed) = test_contents(
&source_kind,
path.as_ref(),
path,
&settings::Settings::for_rule(Rule::UnusedImport),
);
let linted_notebook = transformed.into_owned().expect_ipy_notebook();
let mut writer = Vec::new();
linted_notebook.write_inner(&mut writer)?;
let actual = String::from_utf8(writer)?;
assert!(!actual.contains(r#""id":"#));
if has_id {
assert!(actual.contains(r#""id": ""#));
} else {
assert!(!actual.contains(r#""id":"#));
}
Ok(())
}
}