From cd6c9371943db7d10fc5c449eccfb2700c485cd2 Mon Sep 17 00:00:00 2001
From: aditya pillai <29032680+pilleye@users.noreply.github.com>
Date: Tue, 22 Oct 2024 03:42:40 -0400
Subject: [PATCH] [red-knot] Report line numbers in mdtest relative to the
 markdown file, not the test snippet (#13804)

Co-authored-by: Alex Waygood <alex.waygood@gmail.com>
Co-authored-by: Micha Reiser <micha@reiser.io>
Co-authored-by: Carl Meyer <carl@oddbird.net>
---
 crates/red_knot_test/src/lib.rs           | 104 ++++++++++++++--------
 crates/red_knot_test/src/parser.rs        |  48 ++++++----
 crates/ruff_python_trivia/src/cursor.rs   |  19 ++++
 crates/ruff_source_file/src/line_index.rs |  12 +++
 4 files changed, 130 insertions(+), 53 deletions(-)

diff --git a/crates/red_knot_test/src/lib.rs b/crates/red_knot_test/src/lib.rs
index 527278e1dd..cd49a98041 100644
--- a/crates/red_knot_test/src/lib.rs
+++ b/crates/red_knot_test/src/lib.rs
@@ -1,14 +1,13 @@
 use colored::Colorize;
 use parser as test_parser;
 use red_knot_python_semantic::types::check_types;
-use ruff_db::files::{system_path_to_file, Files};
+use ruff_db::files::{system_path_to_file, File, Files};
 use ruff_db::parsed::parsed_module;
 use ruff_db::system::{DbWithTestSystem, SystemPathBuf};
-use std::collections::BTreeMap;
+use ruff_source_file::LineIndex;
+use ruff_text_size::TextSize;
 use std::path::Path;
 
-type Failures = BTreeMap<SystemPathBuf, matcher::FailuresByLine>;
-
 mod assertion;
 mod db;
 mod diagnostic;
@@ -40,20 +39,24 @@ pub fn run(path: &Path, title: &str) {
             any_failures = true;
             println!("\n{}\n", test.name().bold().underline());
 
-            for (path, by_line) in failures {
-                println!("{}", path.as_str().bold());
-                for (line_number, failures) in by_line.iter() {
+            let md_index = LineIndex::from_source_text(&source);
+
+            for test_failures in failures {
+                let backtick_line = md_index.line_index(test_failures.backtick_offset);
+
+                for (relative_line_number, failures) in test_failures.by_line.iter() {
                     for failure in failures {
-                        let line_info = format!("line {line_number}:").cyan();
+                        let absolute_line_number =
+                            backtick_line.checked_add(relative_line_number).unwrap();
+                        let line_info = format!("{title}:{absolute_line_number}").cyan();
                         println!("    {line_info} {failure}");
                     }
                 }
-                println!();
             }
         }
     }
 
-    println!("{}\n", "-".repeat(50));
+    println!("\n{}\n", "-".repeat(50));
 
     assert!(!any_failures, "Some tests failed.");
 }
@@ -61,40 +64,69 @@ pub fn run(path: &Path, title: &str) {
 fn run_test(db: &mut db::Db, test: &parser::MarkdownTest) -> Result<(), Failures> {
     let workspace_root = db.workspace_root().to_path_buf();
 
-    let mut system_paths = vec![];
+    let test_files: Vec<_> = test
+        .files()
+        .map(|embedded| {
+            assert!(
+                matches!(embedded.lang, "py" | "pyi"),
+                "Non-Python files not supported yet."
+            );
+            let full_path = workspace_root.join(embedded.path);
+            db.write_file(&full_path, embedded.code).unwrap();
+            let file = system_path_to_file(db, full_path).unwrap();
 
-    for file in test.files() {
-        assert!(
-            matches!(file.lang, "py" | "pyi"),
-            "Non-Python files not supported yet."
-        );
-        let full_path = workspace_root.join(file.path);
-        db.write_file(&full_path, file.code).unwrap();
-        system_paths.push(full_path);
-    }
+            TestFile {
+                file,
+                backtick_offset: embedded.md_offset,
+            }
+        })
+        .collect();
 
-    let mut failures = BTreeMap::default();
+    let failures: Failures = test_files
+        .into_iter()
+        .filter_map(|test_file| {
+            let parsed = parsed_module(db, test_file.file);
 
-    for path in system_paths {
-        let file = system_path_to_file(db, path.clone()).unwrap();
-        let parsed = parsed_module(db, file);
+            // TODO allow testing against code with syntax errors
+            assert!(
+                parsed.errors().is_empty(),
+                "Python syntax errors in {}, {}: {:?}",
+                test.name(),
+                test_file.file.path(db),
+                parsed.errors()
+            );
 
-        // TODO allow testing against code with syntax errors
-        assert!(
-            parsed.errors().is_empty(),
-            "Python syntax errors in {}, {:?}: {:?}",
-            test.name(),
-            path,
-            parsed.errors()
-        );
+            match matcher::match_file(db, test_file.file, check_types(db, test_file.file)) {
+                Ok(()) => None,
+                Err(line_failures) => Some(FileFailures {
+                    backtick_offset: test_file.backtick_offset,
+                    by_line: line_failures,
+                }),
+            }
+        })
+        .collect();
 
-        matcher::match_file(db, file, check_types(db, file)).unwrap_or_else(|line_failures| {
-            failures.insert(path, line_failures);
-        });
-    }
     if failures.is_empty() {
         Ok(())
     } else {
         Err(failures)
     }
 }
+
+type Failures = Vec<FileFailures>;
+
+/// The failures for a single file in a test by line number.
+struct FileFailures {
+    /// The offset of the backticks that starts the code block in the Markdown file
+    backtick_offset: TextSize,
+    /// The failures by lines in the code block.
+    by_line: matcher::FailuresByLine,
+}
+
+/// File in a test.
+struct TestFile {
+    file: File,
+
+    // Offset of the backticks that starts the code block in the Markdown file
+    backtick_offset: TextSize,
+}
diff --git a/crates/red_knot_test/src/parser.rs b/crates/red_knot_test/src/parser.rs
index fa9e39432c..8b1445020d 100644
--- a/crates/red_knot_test/src/parser.rs
+++ b/crates/red_knot_test/src/parser.rs
@@ -1,8 +1,12 @@
+use std::sync::LazyLock;
+
 use memchr::memchr2;
 use regex::{Captures, Match, Regex};
-use ruff_index::{newtype_index, IndexVec};
 use rustc_hash::{FxHashMap, FxHashSet};
-use std::sync::LazyLock;
+
+use ruff_index::{newtype_index, IndexVec};
+use ruff_python_trivia::Cursor;
+use ruff_text_size::{TextLen, TextSize};
 
 /// Parse the Markdown `source` as a test suite with given `title`.
 pub(crate) fn parse<'s>(title: &'s str, source: &'s str) -> anyhow::Result<MarkdownTestSuite<'s>> {
@@ -132,6 +136,9 @@ pub(crate) struct EmbeddedFile<'s> {
     pub(crate) path: &'s str,
     pub(crate) lang: &'s str,
     pub(crate) code: &'s str,
+
+    /// The offset of the backticks beginning the code block within the markdown file
+    pub(crate) md_offset: TextSize,
 }
 
 /// Matches a sequence of `#` characters, followed by a title heading, followed by a newline.
@@ -185,7 +192,9 @@ struct Parser<'s> {
     files: IndexVec<EmbeddedFileId, EmbeddedFile<'s>>,
 
     /// The unparsed remainder of the Markdown source.
-    unparsed: &'s str,
+    cursor: Cursor<'s>,
+
+    source_len: TextSize,
 
     /// Stack of ancestor sections.
     stack: SectionStack,
@@ -205,7 +214,8 @@ impl<'s> Parser<'s> {
         Self {
             sections,
             files: IndexVec::default(),
-            unparsed: source,
+            cursor: Cursor::new(source),
+            source_len: source.text_len(),
             stack: SectionStack::new(root_section_id),
             current_section_files: None,
         }
@@ -227,26 +237,23 @@ impl<'s> Parser<'s> {
     }
 
     fn parse_impl(&mut self) -> anyhow::Result<()> {
-        while let Some(position) = memchr2(b'`', b'#', self.unparsed.as_bytes()) {
-            let (before, after) = self.unparsed.split_at(position);
-            self.unparsed = after;
+        while let Some(position) = memchr2(b'`', b'#', self.cursor.as_bytes()) {
+            self.cursor.skip_bytes(position.saturating_sub(1));
 
             // code blocks and headers must start on a new line.
-            if before.is_empty() || before.ends_with('\n') {
-                let c = after.as_bytes()[0] as char;
-
-                match c {
+            if position == 0 || self.cursor.eat_char('\n') {
+                match self.cursor.first() {
                     '#' => {
-                        if let Some(find) = HEADER_RE.find(self.unparsed) {
+                        if let Some(find) = HEADER_RE.find(self.cursor.as_str()) {
                             self.parse_header(find.as_str())?;
-                            self.unparsed = &self.unparsed[find.end()..];
+                            self.cursor.skip_bytes(find.len());
                             continue;
                         }
                     }
                     '`' => {
-                        if let Some(captures) = CODE_RE.captures(self.unparsed) {
+                        if let Some(captures) = CODE_RE.captures(self.cursor.as_str()) {
                             self.parse_code_block(&captures)?;
-                            self.unparsed = &self.unparsed[captures.get(0).unwrap().end()..];
+                            self.cursor.skip_bytes(captures.get(0).unwrap().len());
                             continue;
                         }
                     }
@@ -255,8 +262,8 @@ impl<'s> Parser<'s> {
             }
 
             // Skip to the end of the line
-            if let Some(position) = memchr::memchr(b'\n', self.unparsed.as_bytes()) {
-                self.unparsed = &self.unparsed[position + 1..];
+            if let Some(position) = memchr::memchr(b'\n', self.cursor.as_bytes()) {
+                self.cursor.skip_bytes(position);
             } else {
                 break;
             }
@@ -336,6 +343,8 @@ impl<'s> Parser<'s> {
                 .unwrap_or_default(),
             // CODE_RE can't match without matches for 'lang' and 'code'.
             code: captures.name("code").unwrap().into(),
+
+            md_offset: self.offset(),
         });
 
         if let Some(current_files) = &mut self.current_section_files {
@@ -368,6 +377,11 @@ impl<'s> Parser<'s> {
             self.current_section_files = None;
         }
     }
+
+    /// Retrieves the current offset of the cursor within the source code.
+    fn offset(&self) -> TextSize {
+        self.source_len - self.cursor.text_len()
+    }
 }
 
 #[cfg(test)]
diff --git a/crates/ruff_python_trivia/src/cursor.rs b/crates/ruff_python_trivia/src/cursor.rs
index 5c2e218ff5..c06d831565 100644
--- a/crates/ruff_python_trivia/src/cursor.rs
+++ b/crates/ruff_python_trivia/src/cursor.rs
@@ -26,6 +26,16 @@ impl<'a> Cursor<'a> {
         self.chars.clone()
     }
 
+    /// Returns the remaining input as byte slice.
+    pub fn as_bytes(&self) -> &'a [u8] {
+        self.as_str().as_bytes()
+    }
+
+    /// Returns the remaining input as string slice.
+    pub fn as_str(&self) -> &'a str {
+        self.chars.as_str()
+    }
+
     /// Peeks the next character from the input stream without consuming it.
     /// Returns [`EOF_CHAR`] if the file is at the end of the file.
     pub fn first(&self) -> char {
@@ -110,4 +120,13 @@ impl<'a> Cursor<'a> {
             self.bump_back();
         }
     }
+
+    /// Skips the next `count` bytes.
+    ///
+    /// ## Panics
+    ///  - If `count` is larger than the remaining bytes in the input stream.
+    ///  - If `count` indexes into a multi-byte character.
+    pub fn skip_bytes(&mut self, count: usize) {
+        self.chars = self.chars.as_str()[count..].chars();
+    }
 }
diff --git a/crates/ruff_source_file/src/line_index.rs b/crates/ruff_source_file/src/line_index.rs
index a66d6b2f99..e9d211ca5c 100644
--- a/crates/ruff_source_file/src/line_index.rs
+++ b/crates/ruff_source_file/src/line_index.rs
@@ -394,6 +394,18 @@ impl OneIndexed {
             None => Self::MIN,
         }
     }
+
+    /// Checked addition. Returns `None` if overflow occurred.
+    #[must_use]
+    pub fn checked_add(self, rhs: Self) -> Option<Self> {
+        self.0.checked_add(rhs.0.get()).map(Self)
+    }
+
+    /// Checked subtraction. Returns `None` if overflow occurred.
+    #[must_use]
+    pub fn checked_sub(self, rhs: Self) -> Option<Self> {
+        self.0.get().checked_sub(rhs.get()).and_then(Self::new)
+    }
 }
 
 impl fmt::Display for OneIndexed {