From cd6c9371943db7d10fc5c449eccfb2700c485cd2 Mon Sep 17 00:00:00 2001 From: aditya pillai <29032680+pilleye@users.noreply.github.com> Date: Tue, 22 Oct 2024 03:42:40 -0400 Subject: [PATCH] [red-knot] Report line numbers in mdtest relative to the markdown file, not the test snippet (#13804) Co-authored-by: Alex Waygood Co-authored-by: Micha Reiser Co-authored-by: Carl Meyer --- crates/red_knot_test/src/lib.rs | 104 ++++++++++++++-------- crates/red_knot_test/src/parser.rs | 48 ++++++---- crates/ruff_python_trivia/src/cursor.rs | 19 ++++ crates/ruff_source_file/src/line_index.rs | 12 +++ 4 files changed, 130 insertions(+), 53 deletions(-) diff --git a/crates/red_knot_test/src/lib.rs b/crates/red_knot_test/src/lib.rs index 527278e1dd..cd49a98041 100644 --- a/crates/red_knot_test/src/lib.rs +++ b/crates/red_knot_test/src/lib.rs @@ -1,14 +1,13 @@ use colored::Colorize; use parser as test_parser; use red_knot_python_semantic::types::check_types; -use ruff_db::files::{system_path_to_file, Files}; +use ruff_db::files::{system_path_to_file, File, Files}; use ruff_db::parsed::parsed_module; use ruff_db::system::{DbWithTestSystem, SystemPathBuf}; -use std::collections::BTreeMap; +use ruff_source_file::LineIndex; +use ruff_text_size::TextSize; use std::path::Path; -type Failures = BTreeMap; - mod assertion; mod db; mod diagnostic; @@ -40,20 +39,24 @@ pub fn run(path: &Path, title: &str) { any_failures = true; println!("\n{}\n", test.name().bold().underline()); - for (path, by_line) in failures { - println!("{}", path.as_str().bold()); - for (line_number, failures) in by_line.iter() { + let md_index = LineIndex::from_source_text(&source); + + for test_failures in failures { + let backtick_line = md_index.line_index(test_failures.backtick_offset); + + for (relative_line_number, failures) in test_failures.by_line.iter() { for failure in failures { - let line_info = format!("line {line_number}:").cyan(); + let absolute_line_number = + backtick_line.checked_add(relative_line_number).unwrap(); + let line_info = format!("{title}:{absolute_line_number}").cyan(); println!(" {line_info} {failure}"); } } - println!(); } } } - println!("{}\n", "-".repeat(50)); + println!("\n{}\n", "-".repeat(50)); assert!(!any_failures, "Some tests failed."); } @@ -61,40 +64,69 @@ pub fn run(path: &Path, title: &str) { fn run_test(db: &mut db::Db, test: &parser::MarkdownTest) -> Result<(), Failures> { let workspace_root = db.workspace_root().to_path_buf(); - let mut system_paths = vec![]; + let test_files: Vec<_> = test + .files() + .map(|embedded| { + assert!( + matches!(embedded.lang, "py" | "pyi"), + "Non-Python files not supported yet." + ); + let full_path = workspace_root.join(embedded.path); + db.write_file(&full_path, embedded.code).unwrap(); + let file = system_path_to_file(db, full_path).unwrap(); - for file in test.files() { - assert!( - matches!(file.lang, "py" | "pyi"), - "Non-Python files not supported yet." - ); - let full_path = workspace_root.join(file.path); - db.write_file(&full_path, file.code).unwrap(); - system_paths.push(full_path); - } + TestFile { + file, + backtick_offset: embedded.md_offset, + } + }) + .collect(); - let mut failures = BTreeMap::default(); + let failures: Failures = test_files + .into_iter() + .filter_map(|test_file| { + let parsed = parsed_module(db, test_file.file); - for path in system_paths { - let file = system_path_to_file(db, path.clone()).unwrap(); - let parsed = parsed_module(db, file); + // TODO allow testing against code with syntax errors + assert!( + parsed.errors().is_empty(), + "Python syntax errors in {}, {}: {:?}", + test.name(), + test_file.file.path(db), + parsed.errors() + ); - // TODO allow testing against code with syntax errors - assert!( - parsed.errors().is_empty(), - "Python syntax errors in {}, {:?}: {:?}", - test.name(), - path, - parsed.errors() - ); + match matcher::match_file(db, test_file.file, check_types(db, test_file.file)) { + Ok(()) => None, + Err(line_failures) => Some(FileFailures { + backtick_offset: test_file.backtick_offset, + by_line: line_failures, + }), + } + }) + .collect(); - matcher::match_file(db, file, check_types(db, file)).unwrap_or_else(|line_failures| { - failures.insert(path, line_failures); - }); - } if failures.is_empty() { Ok(()) } else { Err(failures) } } + +type Failures = Vec; + +/// The failures for a single file in a test by line number. +struct FileFailures { + /// The offset of the backticks that starts the code block in the Markdown file + backtick_offset: TextSize, + /// The failures by lines in the code block. + by_line: matcher::FailuresByLine, +} + +/// File in a test. +struct TestFile { + file: File, + + // Offset of the backticks that starts the code block in the Markdown file + backtick_offset: TextSize, +} diff --git a/crates/red_knot_test/src/parser.rs b/crates/red_knot_test/src/parser.rs index fa9e39432c..8b1445020d 100644 --- a/crates/red_knot_test/src/parser.rs +++ b/crates/red_knot_test/src/parser.rs @@ -1,8 +1,12 @@ +use std::sync::LazyLock; + use memchr::memchr2; use regex::{Captures, Match, Regex}; -use ruff_index::{newtype_index, IndexVec}; use rustc_hash::{FxHashMap, FxHashSet}; -use std::sync::LazyLock; + +use ruff_index::{newtype_index, IndexVec}; +use ruff_python_trivia::Cursor; +use ruff_text_size::{TextLen, TextSize}; /// Parse the Markdown `source` as a test suite with given `title`. pub(crate) fn parse<'s>(title: &'s str, source: &'s str) -> anyhow::Result> { @@ -132,6 +136,9 @@ pub(crate) struct EmbeddedFile<'s> { pub(crate) path: &'s str, pub(crate) lang: &'s str, pub(crate) code: &'s str, + + /// The offset of the backticks beginning the code block within the markdown file + pub(crate) md_offset: TextSize, } /// Matches a sequence of `#` characters, followed by a title heading, followed by a newline. @@ -185,7 +192,9 @@ struct Parser<'s> { files: IndexVec>, /// The unparsed remainder of the Markdown source. - unparsed: &'s str, + cursor: Cursor<'s>, + + source_len: TextSize, /// Stack of ancestor sections. stack: SectionStack, @@ -205,7 +214,8 @@ impl<'s> Parser<'s> { Self { sections, files: IndexVec::default(), - unparsed: source, + cursor: Cursor::new(source), + source_len: source.text_len(), stack: SectionStack::new(root_section_id), current_section_files: None, } @@ -227,26 +237,23 @@ impl<'s> Parser<'s> { } fn parse_impl(&mut self) -> anyhow::Result<()> { - while let Some(position) = memchr2(b'`', b'#', self.unparsed.as_bytes()) { - let (before, after) = self.unparsed.split_at(position); - self.unparsed = after; + while let Some(position) = memchr2(b'`', b'#', self.cursor.as_bytes()) { + self.cursor.skip_bytes(position.saturating_sub(1)); // code blocks and headers must start on a new line. - if before.is_empty() || before.ends_with('\n') { - let c = after.as_bytes()[0] as char; - - match c { + if position == 0 || self.cursor.eat_char('\n') { + match self.cursor.first() { '#' => { - if let Some(find) = HEADER_RE.find(self.unparsed) { + if let Some(find) = HEADER_RE.find(self.cursor.as_str()) { self.parse_header(find.as_str())?; - self.unparsed = &self.unparsed[find.end()..]; + self.cursor.skip_bytes(find.len()); continue; } } '`' => { - if let Some(captures) = CODE_RE.captures(self.unparsed) { + if let Some(captures) = CODE_RE.captures(self.cursor.as_str()) { self.parse_code_block(&captures)?; - self.unparsed = &self.unparsed[captures.get(0).unwrap().end()..]; + self.cursor.skip_bytes(captures.get(0).unwrap().len()); continue; } } @@ -255,8 +262,8 @@ impl<'s> Parser<'s> { } // Skip to the end of the line - if let Some(position) = memchr::memchr(b'\n', self.unparsed.as_bytes()) { - self.unparsed = &self.unparsed[position + 1..]; + if let Some(position) = memchr::memchr(b'\n', self.cursor.as_bytes()) { + self.cursor.skip_bytes(position); } else { break; } @@ -336,6 +343,8 @@ impl<'s> Parser<'s> { .unwrap_or_default(), // CODE_RE can't match without matches for 'lang' and 'code'. code: captures.name("code").unwrap().into(), + + md_offset: self.offset(), }); if let Some(current_files) = &mut self.current_section_files { @@ -368,6 +377,11 @@ impl<'s> Parser<'s> { self.current_section_files = None; } } + + /// Retrieves the current offset of the cursor within the source code. + fn offset(&self) -> TextSize { + self.source_len - self.cursor.text_len() + } } #[cfg(test)] diff --git a/crates/ruff_python_trivia/src/cursor.rs b/crates/ruff_python_trivia/src/cursor.rs index 5c2e218ff5..c06d831565 100644 --- a/crates/ruff_python_trivia/src/cursor.rs +++ b/crates/ruff_python_trivia/src/cursor.rs @@ -26,6 +26,16 @@ impl<'a> Cursor<'a> { self.chars.clone() } + /// Returns the remaining input as byte slice. + pub fn as_bytes(&self) -> &'a [u8] { + self.as_str().as_bytes() + } + + /// Returns the remaining input as string slice. + pub fn as_str(&self) -> &'a str { + self.chars.as_str() + } + /// Peeks the next character from the input stream without consuming it. /// Returns [`EOF_CHAR`] if the file is at the end of the file. pub fn first(&self) -> char { @@ -110,4 +120,13 @@ impl<'a> Cursor<'a> { self.bump_back(); } } + + /// Skips the next `count` bytes. + /// + /// ## Panics + /// - If `count` is larger than the remaining bytes in the input stream. + /// - If `count` indexes into a multi-byte character. + pub fn skip_bytes(&mut self, count: usize) { + self.chars = self.chars.as_str()[count..].chars(); + } } diff --git a/crates/ruff_source_file/src/line_index.rs b/crates/ruff_source_file/src/line_index.rs index a66d6b2f99..e9d211ca5c 100644 --- a/crates/ruff_source_file/src/line_index.rs +++ b/crates/ruff_source_file/src/line_index.rs @@ -394,6 +394,18 @@ impl OneIndexed { None => Self::MIN, } } + + /// Checked addition. Returns `None` if overflow occurred. + #[must_use] + pub fn checked_add(self, rhs: Self) -> Option { + self.0.checked_add(rhs.0.get()).map(Self) + } + + /// Checked subtraction. Returns `None` if overflow occurred. + #[must_use] + pub fn checked_sub(self, rhs: Self) -> Option { + self.0.get().checked_sub(rhs.get()).and_then(Self::new) + } } impl fmt::Display for OneIndexed {