Use datatest for parser tests

This commit is contained in:
Micha Reiser 2025-12-12 09:05:41 +01:00
parent f095e19c2c
commit bd4ee3a761
No known key found for this signature in database
3 changed files with 68 additions and 59 deletions

1
Cargo.lock generated
View File

@ -3398,6 +3398,7 @@ dependencies = [
"bitflags 2.10.0", "bitflags 2.10.0",
"bstr", "bstr",
"compact_str", "compact_str",
"datatest-stable",
"get-size2", "get-size2",
"insta", "insta",
"itertools 0.14.0", "itertools 0.14.0",

View File

@ -12,6 +12,10 @@ license = { workspace = true }
[lib] [lib]
[[test]]
name = "fixtures"
harness = false
[dependencies] [dependencies]
ruff_python_ast = { workspace = true, features = ["get-size"] } ruff_python_ast = { workspace = true, features = ["get-size"] }
ruff_python_trivia = { workspace = true } ruff_python_trivia = { workspace = true }
@ -34,6 +38,7 @@ ruff_python_ast = { workspace = true, features = ["serde"] }
ruff_source_file = { workspace = true } ruff_source_file = { workspace = true }
anyhow = { workspace = true } anyhow = { workspace = true }
datatest-stable = { workspace = true }
insta = { workspace = true, features = ["glob"] } insta = { workspace = true, features = ["glob"] }
itertools = { workspace = true } itertools = { workspace = true }
serde = { workspace = true } serde = { workspace = true }

View File

@ -1,9 +1,4 @@
use std::cell::RefCell; use datatest_stable::Utf8Path;
use std::cmp::Ordering;
use std::fmt::{Formatter, Write};
use std::fs;
use std::path::Path;
use itertools::Itertools; use itertools::Itertools;
use ruff_annotate_snippets::{Level, Renderer, Snippet}; use ruff_annotate_snippets::{Level, Renderer, Snippet};
use ruff_python_ast::token::{Token, Tokens}; use ruff_python_ast::token::{Token, Tokens};
@ -16,39 +11,53 @@ use ruff_python_parser::semantic_errors::{
use ruff_python_parser::{Mode, ParseErrorType, ParseOptions, Parsed, parse_unchecked}; use ruff_python_parser::{Mode, ParseErrorType, ParseOptions, Parsed, parse_unchecked};
use ruff_source_file::{LineIndex, OneIndexed, SourceCode}; use ruff_source_file::{LineIndex, OneIndexed, SourceCode};
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
use std::cell::RefCell;
use std::cmp::Ordering;
use std::fmt::{Formatter, Write};
#[test] #[expect(clippy::needless_pass_by_value, clippy::unnecessary_wraps)]
fn valid_syntax() { fn valid_syntax(path: &Utf8Path, content: String) -> datatest_stable::Result<()> {
insta::glob!("../resources", "valid/**/*.py", test_valid_syntax); test_valid_syntax(path, &content, "./resources/valid");
Ok(())
} }
#[test] #[expect(clippy::needless_pass_by_value, clippy::unnecessary_wraps)]
fn invalid_syntax() { fn invalid_syntax(path: &Utf8Path, content: String) -> datatest_stable::Result<()> {
insta::glob!("../resources", "invalid/**/*.py", test_invalid_syntax); test_invalid_syntax(path, &content, "./resources/invalid");
Ok(())
} }
#[test] #[expect(clippy::needless_pass_by_value, clippy::unnecessary_wraps)]
fn inline_ok() { fn inline_ok(path: &Utf8Path, content: String) -> datatest_stable::Result<()> {
insta::glob!("../resources/inline", "ok/**/*.py", test_valid_syntax); test_valid_syntax(path, &content, "./resources/inline/ok");
Ok(())
} }
#[test] #[expect(clippy::needless_pass_by_value, clippy::unnecessary_wraps)]
fn inline_err() { fn inline_err(path: &Utf8Path, content: String) -> datatest_stable::Result<()> {
insta::glob!("../resources/inline", "err/**/*.py", test_invalid_syntax); test_invalid_syntax(path, &content, "./resources/inline/err");
Ok(())
}
datatest_stable::harness! {
{ test = valid_syntax, root = "./resources/valid", pattern = r"\.pyi?$" },
{ test = inline_ok, root = "./resources/inline/ok", pattern = r"\.pyi?$" },
{ test = invalid_syntax, root = "./resources/invalid", pattern = r"\.pyi?$" },
{ test = inline_err, root="./resources/inline/err", pattern = r"\.pyi?$" }
} }
/// Asserts that the parser generates no syntax errors for a valid program. /// Asserts that the parser generates no syntax errors for a valid program.
/// Snapshots the AST. /// Snapshots the AST.
fn test_valid_syntax(input_path: &Path) { fn test_valid_syntax(input_path: &Utf8Path, source: &str, root: &str) {
let source = fs::read_to_string(input_path).expect("Expected test file to exist"); let test_name = input_path.strip_prefix(root).unwrap_or(input_path).as_str();
let options = extract_options(&source).unwrap_or_else(|| { let options = extract_options(source).unwrap_or_else(|| {
ParseOptions::from(Mode::Module).with_target_version(PythonVersion::latest_preview()) ParseOptions::from(Mode::Module).with_target_version(PythonVersion::latest_preview())
}); });
let parsed = parse_unchecked(&source, options.clone()); let parsed = parse_unchecked(source, options.clone());
if parsed.has_syntax_errors() { if parsed.has_syntax_errors() {
let line_index = LineIndex::from_source_text(&source); let line_index = LineIndex::from_source_text(source);
let source_code = SourceCode::new(&source, &line_index); let source_code = SourceCode::new(source, &line_index);
let mut message = "Expected no syntax errors for a valid program but the parser generated the following errors:\n".to_string(); let mut message = "Expected no syntax errors for a valid program but the parser generated the following errors:\n".to_string();
@ -81,8 +90,8 @@ fn test_valid_syntax(input_path: &Path) {
panic!("{input_path:?}: {message}"); panic!("{input_path:?}: {message}");
} }
validate_tokens(parsed.tokens(), source.text_len(), input_path); validate_tokens(parsed.tokens(), source.text_len());
validate_ast(&parsed, source.text_len(), input_path); validate_ast(&parsed, source.text_len());
let mut output = String::new(); let mut output = String::new();
writeln!(&mut output, "## AST").unwrap(); writeln!(&mut output, "## AST").unwrap();
@ -91,7 +100,7 @@ fn test_valid_syntax(input_path: &Path) {
let parsed = parsed.try_into_module().expect("Parsed with Mode::Module"); let parsed = parsed.try_into_module().expect("Parsed with Mode::Module");
let mut visitor = let mut visitor =
SemanticSyntaxCheckerVisitor::new(&source).with_python_version(options.target_version()); SemanticSyntaxCheckerVisitor::new(source).with_python_version(options.target_version());
for stmt in parsed.suite() { for stmt in parsed.suite() {
visitor.visit_stmt(stmt); visitor.visit_stmt(stmt);
@ -102,8 +111,8 @@ fn test_valid_syntax(input_path: &Path) {
if !semantic_syntax_errors.is_empty() { if !semantic_syntax_errors.is_empty() {
let mut message = "Expected no semantic syntax errors for a valid program:\n".to_string(); let mut message = "Expected no semantic syntax errors for a valid program:\n".to_string();
let line_index = LineIndex::from_source_text(&source); let line_index = LineIndex::from_source_text(source);
let source_code = SourceCode::new(&source, &line_index); let source_code = SourceCode::new(source, &line_index);
for error in semantic_syntax_errors { for error in semantic_syntax_errors {
writeln!( writeln!(
@ -125,6 +134,7 @@ fn test_valid_syntax(input_path: &Path) {
omit_expression => true, omit_expression => true,
input_file => input_path, input_file => input_path,
prepend_module_to_snapshot => false, prepend_module_to_snapshot => false,
snapshot_suffix => test_name
}, { }, {
insta::assert_snapshot!(output); insta::assert_snapshot!(output);
}); });
@ -132,22 +142,23 @@ fn test_valid_syntax(input_path: &Path) {
/// Assert that the parser generates at least one syntax error for the given input file. /// Assert that the parser generates at least one syntax error for the given input file.
/// Snapshots the AST and the error messages. /// Snapshots the AST and the error messages.
fn test_invalid_syntax(input_path: &Path) { fn test_invalid_syntax(input_path: &Utf8Path, source: &str, root: &str) {
let source = fs::read_to_string(input_path).expect("Expected test file to exist"); let test_name = input_path.strip_prefix(root).unwrap_or(input_path).as_str();
let options = extract_options(&source).unwrap_or_else(|| {
let options = extract_options(source).unwrap_or_else(|| {
ParseOptions::from(Mode::Module).with_target_version(PythonVersion::PY314) ParseOptions::from(Mode::Module).with_target_version(PythonVersion::PY314)
}); });
let parsed = parse_unchecked(&source, options.clone()); let parsed = parse_unchecked(source, options.clone());
validate_tokens(parsed.tokens(), source.text_len(), input_path); validate_tokens(parsed.tokens(), source.text_len());
validate_ast(&parsed, source.text_len(), input_path); validate_ast(&parsed, source.text_len());
let mut output = String::new(); let mut output = String::new();
writeln!(&mut output, "## AST").unwrap(); writeln!(&mut output, "## AST").unwrap();
writeln!(&mut output, "\n```\n{:#?}\n```", parsed.syntax()).unwrap(); writeln!(&mut output, "\n```\n{:#?}\n```", parsed.syntax()).unwrap();
let line_index = LineIndex::from_source_text(&source); let line_index = LineIndex::from_source_text(source);
let source_code = SourceCode::new(&source, &line_index); let source_code = SourceCode::new(source, &line_index);
if !parsed.errors().is_empty() { if !parsed.errors().is_empty() {
writeln!(&mut output, "## Errors\n").unwrap(); writeln!(&mut output, "## Errors\n").unwrap();
@ -186,7 +197,7 @@ fn test_invalid_syntax(input_path: &Path) {
let parsed = parsed.try_into_module().expect("Parsed with Mode::Module"); let parsed = parsed.try_into_module().expect("Parsed with Mode::Module");
let mut visitor = let mut visitor =
SemanticSyntaxCheckerVisitor::new(&source).with_python_version(options.target_version()); SemanticSyntaxCheckerVisitor::new(source).with_python_version(options.target_version());
for stmt in parsed.suite() { for stmt in parsed.suite() {
visitor.visit_stmt(stmt); visitor.visit_stmt(stmt);
@ -196,7 +207,7 @@ fn test_invalid_syntax(input_path: &Path) {
assert!( assert!(
parsed.has_syntax_errors() || !semantic_syntax_errors.is_empty(), parsed.has_syntax_errors() || !semantic_syntax_errors.is_empty(),
"{input_path:?}: Expected parser to generate at least one syntax error for a program containing syntax errors." "Expected parser to generate at least one syntax error for a program containing syntax errors."
); );
if !semantic_syntax_errors.is_empty() { if !semantic_syntax_errors.is_empty() {
@ -220,6 +231,7 @@ fn test_invalid_syntax(input_path: &Path) {
omit_expression => true, omit_expression => true,
input_file => input_path, input_file => input_path,
prepend_module_to_snapshot => false, prepend_module_to_snapshot => false,
snapshot_suffix => test_name
}, { }, {
insta::assert_snapshot!(output); insta::assert_snapshot!(output);
}); });
@ -372,26 +384,24 @@ impl std::fmt::Display for CodeFrame<'_> {
/// Verifies that: /// Verifies that:
/// * the ranges are strictly increasing when loop the tokens in insertion order /// * the ranges are strictly increasing when loop the tokens in insertion order
/// * all ranges are within the length of the source code /// * all ranges are within the length of the source code
fn validate_tokens(tokens: &[Token], source_length: TextSize, test_path: &Path) { fn validate_tokens(tokens: &[Token], source_length: TextSize) {
let mut previous: Option<&Token> = None; let mut previous: Option<&Token> = None;
for token in tokens { for token in tokens {
assert!( assert!(
token.end() <= source_length, token.end() <= source_length,
"{path}: Token range exceeds the source code length. Token: {token:#?}", "Token range exceeds the source code length. Token: {token:#?}",
path = test_path.display()
); );
if let Some(previous) = previous { if let Some(previous) = previous {
assert_eq!( assert_eq!(
previous.range().ordering(token.range()), previous.range().ordering(token.range()),
Ordering::Less, Ordering::Less,
"{path}: Token ranges are not in increasing order "Token ranges are not in increasing order
Previous token: {previous:#?} Previous token: {previous:#?}
Current token: {token:#?} Current token: {token:#?}
Tokens: {tokens:#?} Tokens: {tokens:#?}
", ",
path = test_path.display(),
); );
} }
@ -403,9 +413,9 @@ Tokens: {tokens:#?}
/// * the range of the parent node fully encloses all its child nodes /// * the range of the parent node fully encloses all its child nodes
/// * the ranges are strictly increasing when traversing the nodes in pre-order. /// * the ranges are strictly increasing when traversing the nodes in pre-order.
/// * all ranges are within the length of the source code. /// * all ranges are within the length of the source code.
fn validate_ast(parsed: &Parsed<Mod>, source_len: TextSize, test_path: &Path) { fn validate_ast(parsed: &Parsed<Mod>, source_len: TextSize) {
walk_module( walk_module(
&mut ValidateAstVisitor::new(parsed.tokens(), source_len, test_path), &mut ValidateAstVisitor::new(parsed.tokens(), source_len),
parsed.syntax(), parsed.syntax(),
); );
} }
@ -416,17 +426,15 @@ struct ValidateAstVisitor<'a> {
parents: Vec<AnyNodeRef<'a>>, parents: Vec<AnyNodeRef<'a>>,
previous: Option<AnyNodeRef<'a>>, previous: Option<AnyNodeRef<'a>>,
source_length: TextSize, source_length: TextSize,
test_path: &'a Path,
} }
impl<'a> ValidateAstVisitor<'a> { impl<'a> ValidateAstVisitor<'a> {
fn new(tokens: &'a Tokens, source_length: TextSize, test_path: &'a Path) -> Self { fn new(tokens: &'a Tokens, source_length: TextSize) -> Self {
Self { Self {
tokens: tokens.iter().peekable(), tokens: tokens.iter().peekable(),
parents: Vec::new(), parents: Vec::new(),
previous: None, previous: None,
source_length, source_length,
test_path,
} }
} }
} }
@ -444,8 +452,7 @@ impl ValidateAstVisitor<'_> {
// At this point, next_token.end() > node.start() // At this point, next_token.end() > node.start()
assert!( assert!(
next.start() >= node.start(), next.start() >= node.start(),
"{path}: The start of the node falls within a token.\nNode: {node:#?}\n\nToken: {next:#?}\n\nRoot: {root:#?}", "The start of the node falls within a token.\nNode: {node:#?}\n\nToken: {next:#?}\n\nRoot: {root:#?}",
path = self.test_path.display(),
root = self.parents.first() root = self.parents.first()
); );
} }
@ -464,8 +471,7 @@ impl ValidateAstVisitor<'_> {
// At this point, `next_token.end() > node.end()` // At this point, `next_token.end() > node.end()`
assert!( assert!(
next.start() >= node.end(), next.start() >= node.end(),
"{path}: The end of the node falls within a token.\nNode: {node:#?}\n\nToken: {next:#?}\n\nRoot: {root:#?}", "The end of the node falls within a token.\nNode: {node:#?}\n\nToken: {next:#?}\n\nRoot: {root:#?}",
path = self.test_path.display(),
root = self.parents.first() root = self.parents.first()
); );
} }
@ -476,16 +482,14 @@ impl<'ast> SourceOrderVisitor<'ast> for ValidateAstVisitor<'ast> {
fn enter_node(&mut self, node: AnyNodeRef<'ast>) -> TraversalSignal { fn enter_node(&mut self, node: AnyNodeRef<'ast>) -> TraversalSignal {
assert!( assert!(
node.end() <= self.source_length, node.end() <= self.source_length,
"{path}: The range of the node exceeds the length of the source code. Node: {node:#?}", "The range of the node exceeds the length of the source code. Node: {node:#?}",
path = self.test_path.display()
); );
if let Some(previous) = self.previous { if let Some(previous) = self.previous {
assert_ne!( assert_ne!(
previous.range().ordering(node.range()), previous.range().ordering(node.range()),
Ordering::Greater, Ordering::Greater,
"{path}: The ranges of the nodes are not strictly increasing when traversing the AST in pre-order.\nPrevious node: {previous:#?}\n\nCurrent node: {node:#?}\n\nRoot: {root:#?}", "The ranges of the nodes are not strictly increasing when traversing the AST in pre-order.\nPrevious node: {previous:#?}\n\nCurrent node: {node:#?}\n\nRoot: {root:#?}",
path = self.test_path.display(),
root = self.parents.first() root = self.parents.first()
); );
} }
@ -493,8 +497,7 @@ impl<'ast> SourceOrderVisitor<'ast> for ValidateAstVisitor<'ast> {
if let Some(parent) = self.parents.last() { if let Some(parent) = self.parents.last() {
assert!( assert!(
parent.range().contains_range(node.range()), parent.range().contains_range(node.range()),
"{path}: The range of the parent node does not fully enclose the range of the child node.\nParent node: {parent:#?}\n\nChild node: {node:#?}\n\nRoot: {root:#?}", "The range of the parent node does not fully enclose the range of the child node.\nParent node: {parent:#?}\n\nChild node: {node:#?}\n\nRoot: {root:#?}",
path = self.test_path.display(),
root = self.parents.first() root = self.parents.first()
); );
} }