From 040071bbc5b21306f2759693ca6ee2c3fc5420bf Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Thu, 27 Feb 2025 13:25:31 +0000 Subject: [PATCH] [red-knot] Ignore surrounding whitespace when looking for `` directives in mdtests (#16380) --- crates/red_knot_test/src/parser.rs | 52 ++++++++++++++++++------- crates/ruff_python_trivia/src/cursor.rs | 17 +++++++- 2 files changed, 54 insertions(+), 15 deletions(-) diff --git a/crates/red_knot_test/src/parser.rs b/crates/red_knot_test/src/parser.rs index 3e709d95ee..442cb537a2 100644 --- a/crates/red_knot_test/src/parser.rs +++ b/crates/red_knot_test/src/parser.rs @@ -420,27 +420,26 @@ impl<'s> Parser<'s> { } fn parse_impl(&mut self) -> anyhow::Result<()> { - const SECTION_CONFIG_SNAPSHOT: &str = ""; + const SECTION_CONFIG_SNAPSHOT: &str = "snapshot-diagnostics"; const CODE_BLOCK_END: &[u8] = b"```"; + const HTML_COMMENT_END: &[u8] = b"-->"; while let Some(first) = self.cursor.bump() { match first { - '<' => { - self.explicit_path = None; - self.preceding_blank_lines = 0; - // If we want to support more comment directives, then we should - // probably just parse the directive generically first. But it's - // not clear if we'll want to add more, since comments are hidden - // from GitHub Markdown rendering. - if self - .cursor - .as_str() - .starts_with(&SECTION_CONFIG_SNAPSHOT[1..]) + '<' if self.cursor.eat_char3('!', '-', '-') => { + if let Some(position) = + memchr::memmem::find(self.cursor.as_bytes(), HTML_COMMENT_END) { - self.cursor.skip_bytes(SECTION_CONFIG_SNAPSHOT.len() - 1); - self.process_snapshot_diagnostics()?; + let html_comment = self.cursor.as_str()[..position].trim(); + if html_comment == SECTION_CONFIG_SNAPSHOT { + self.process_snapshot_diagnostics()?; + } + self.cursor.skip_bytes(position + HTML_COMMENT_END.len()); + } else { + bail!("Unterminated HTML comment."); } } + '#' => { self.explicit_path = None; self.preceding_blank_lines = 0; @@ -1729,6 +1728,31 @@ mod tests { ); } + #[test] + fn snapshot_diagnostic_directive_detection_ignores_whitespace() { + // A bit weird, but the fact that the parser rejects this indicates that + // we have correctly recognised both of these HTML comments as a directive to have + // snapshotting enabled for the file, which is what we want (even though they both + // contain different amounts of whitespace to the "standard" snapshot directive). + let source = dedent( + " + # Some header + + + + + ```py + x = 1 + ``` + ", + ); + let err = super::parse("file.md", &source).expect_err("Should fail to parse"); + assert_eq!( + err.to_string(), + "Section config to enable snapshotting diagnostics should appear at most once.", + ); + } + #[test] fn section_directive_must_appear_before_config() { let source = dedent( diff --git a/crates/ruff_python_trivia/src/cursor.rs b/crates/ruff_python_trivia/src/cursor.rs index 340b076d42..8ab9c37d41 100644 --- a/crates/ruff_python_trivia/src/cursor.rs +++ b/crates/ruff_python_trivia/src/cursor.rs @@ -95,7 +95,22 @@ impl<'a> Cursor<'a> { /// Eats the next two characters if they are `c1` and `c2`. Does not /// consume any input otherwise, even if the first character matches. pub fn eat_char2(&mut self, c1: char, c2: char) -> bool { - if self.first() == c1 && self.second() == c2 { + let mut chars = self.chars.clone(); + if chars.next() == Some(c1) && chars.next() == Some(c2) { + self.bump(); + self.bump(); + true + } else { + false + } + } + + /// Eats the next three characters if they are `c1`, `c2` and `c3` + /// Does not consume any input otherwise, even if the first character matches. + pub fn eat_char3(&mut self, c1: char, c2: char, c3: char) -> bool { + let mut chars = self.chars.clone(); + if chars.next() == Some(c1) && chars.next() == Some(c2) && chars.next() == Some(c3) { + self.bump(); self.bump(); self.bump(); true