diff --git a/Cargo.lock b/Cargo.lock index 8ffd30015d..856f7c9fea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1942,17 +1942,20 @@ dependencies = [ "clap", "itertools", "libcst", + "log", "once_cell", "pretty_assertions", "regex", "ruff", "ruff_cli", "ruff_diagnostics", + "ruff_python_formatter", "ruff_textwrap", "rustpython-format", "rustpython-parser", "schemars", "serde_json", + "similar", "strum", "strum_macros", ] diff --git a/Cargo.toml b/Cargo.toml index 29941786c9..3051a7e693 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,7 +45,7 @@ schemars = { version = "0.8.12" } serde = { version = "1.0.152", features = ["derive"] } serde_json = { version = "1.0.93", features = ["preserve_order"] } shellexpand = { version = "3.0.0" } -similar = { version = "2.2.1" } +similar = { version = "2.2.1", features = ["inline"] } smallvec = { version = "1.10.0" } strum = { version = "0.24.1", features = ["strum_macros"] } strum_macros = { version = "0.24.3" } diff --git a/crates/ruff/Cargo.toml b/crates/ruff/Cargo.toml index 306a689ae6..cfea318ddc 100644 --- a/crates/ruff/Cargo.toml +++ b/crates/ruff/Cargo.toml @@ -66,7 +66,7 @@ semver = { version = "1.0.16" } serde = { workspace = true } serde_json = { workspace = true } serde_with = { version = "3.0.0" } -similar = { workspace = true, features = ["inline"] } +similar = { workspace = true } shellexpand = { workspace = true } smallvec = { workspace = true } strum = { workspace = true } diff --git a/crates/ruff_cli/src/lib.rs b/crates/ruff_cli/src/lib.rs index 4307f74290..01b3e229e1 100644 --- a/crates/ruff_cli/src/lib.rs +++ b/crates/ruff_cli/src/lib.rs @@ -24,7 +24,7 @@ mod commands; mod diagnostics; mod panic; mod printer; -mod resolve; +pub mod resolve; #[derive(Copy, Clone)] pub enum ExitStatus { diff --git a/crates/ruff_cli/src/resolve.rs b/crates/ruff_cli/src/resolve.rs index 7952d70158..0bf5db6670 100644 --- a/crates/ruff_cli/src/resolve.rs +++ b/crates/ruff_cli/src/resolve.rs @@ -14,7 +14,7 @@ use crate::args::Overrides; /// Resolve the relevant settings strategy and defaults for the current /// invocation. -pub(crate) fn resolve( +pub fn resolve( isolated: bool, config: Option<&Path>, overrides: &Overrides, diff --git a/crates/ruff_dev/Cargo.toml b/crates/ruff_dev/Cargo.toml index 376bb09b8e..fa8ee5d7c5 100644 --- a/crates/ruff_dev/Cargo.toml +++ b/crates/ruff_dev/Cargo.toml @@ -14,12 +14,14 @@ license = { workspace = true } ruff = { path = "../ruff", features = ["schemars"] } ruff_cli = { path = "../ruff_cli" } ruff_diagnostics = { path = "../ruff_diagnostics" } +ruff_python_formatter = { path = "../ruff_python_formatter" } ruff_textwrap = { path = "../ruff_textwrap" } anyhow = { workspace = true } clap = { workspace = true } itertools = { workspace = true } libcst = { workspace = true } +log = { workspace = true } once_cell = { workspace = true } pretty_assertions = { version = "1.3.0" } regex = { workspace = true } @@ -27,5 +29,6 @@ rustpython-format = { workspace = true } rustpython-parser = { workspace = true } schemars = { workspace = true } serde_json = { workspace = true } +similar = { workspace = true } strum = { workspace = true } strum_macros = { workspace = true } diff --git a/crates/ruff_dev/src/check_formatter_stability.rs b/crates/ruff_dev/src/check_formatter_stability.rs new file mode 100644 index 0000000000..26b1d68902 --- /dev/null +++ b/crates/ruff_dev/src/check_formatter_stability.rs @@ -0,0 +1,277 @@ +//! We want to ensure that once formatted content stays the same when formatted again, which is +//! known as formatter stability or formatter idempotency, and that the formatter prints +//! syntactically valid code. As our test cases cover only a limited amount of code, this allows +//! checking entire repositories. +#![allow(clippy::print_stdout)] + +use anyhow::Context; +use clap::Parser; +use log::debug; +use ruff::resolver::python_files_in_path; +use ruff::settings::types::{FilePattern, FilePatternSet}; +use ruff_cli::args::CheckArgs; +use ruff_cli::resolve::resolve; +use ruff_python_formatter::format_module; +use similar::{ChangeTag, TextDiff}; +use std::io::Write; +use std::panic::catch_unwind; +use std::path::{Path, PathBuf}; +use std::process::ExitCode; +use std::time::Instant; +use std::{fs, io, iter}; + +/// Control the verbosity of the output +#[derive(Copy, Clone, PartialEq, Eq, clap::ValueEnum, Default)] +pub(crate) enum Format { + /// Filenames only + Minimal, + /// Filenames and reduced diff + #[default] + Default, + /// Full diff and invalid code + Full, +} + +#[derive(clap::Args)] +pub(crate) struct Args { + /// Like `ruff check`'s files + pub(crate) files: Vec, + /// Control the verbosity of the output + #[arg(long, default_value_t, value_enum)] + pub(crate) format: Format, + /// Print only the first error and exit, `-x` is same as pytest + #[arg(long, short = 'x')] + pub(crate) exit_first_error: bool, +} + +/// Generate ourself a `try_parse_from` impl for `CheckArgs`. This is a strange way to use clap but +/// we want the same behaviour as `ruff_cli` and clap seems to lack a way to parse directly to +/// `Args` instead of a `Parser` +#[derive(Debug, clap::Parser)] +struct WrapperArgs { + #[clap(flatten)] + check_args: CheckArgs, +} + +pub(crate) fn main(args: &Args) -> anyhow::Result { + let start = Instant::now(); + + // Find files to check (or in this case, format twice). Adapted from ruff_cli + // First argument is ignored + let dummy = PathBuf::from("check"); + let check_args_input = iter::once(&dummy).chain(&args.files); + let check_args: CheckArgs = WrapperArgs::try_parse_from(check_args_input)?.check_args; + let (cli, overrides) = check_args.partition(); + let mut pyproject_config = resolve( + cli.isolated, + cli.config.as_deref(), + &overrides, + cli.stdin_filename.as_deref(), + )?; + // We don't want to format pyproject.toml + pyproject_config.settings.lib.include = FilePatternSet::try_from_vec(vec![ + FilePattern::Builtin("*.py"), + FilePattern::Builtin("*.pyi"), + ]) + .unwrap(); + let (paths, _resolver) = python_files_in_path(&cli.files, &pyproject_config, &overrides)?; + assert!(!paths.is_empty(), "no python files in {:?}", cli.files); + + let errors = paths + .into_iter() + .map(|dir_entry| { + // Doesn't make sense to recover here in this test script + let file = dir_entry + .expect("Iterating the files in the repository failed") + .into_path(); + // Handle panics (mostly in `debug_assert!`) + let result = match catch_unwind(|| check_file(&file)) { + Ok(result) => result, + Err(panic) => { + if let Ok(message) = panic.downcast::() { + Err(FormatterStabilityError::Panic { message: *message }) + } else { + Err(FormatterStabilityError::Panic { + // This should not happen, but it can + message: "(Panic didn't set a string message)".to_string(), + }) + } + } + }; + (result, file) + }) + // We only care about the errors + .filter_map(|(result, file)| match result { + Err(err) => Some((err, file)), + Ok(()) => None, + }); + + let mut any_errors = false; + + // Don't collect the iterator so we already see errors while it's still processing + for (error, file) in errors { + any_errors = true; + match error { + FormatterStabilityError::Unstable { + formatted, + reformatted, + } => { + println!("Unstable formatting {}", file.display()); + match args.format { + Format::Minimal => {} + Format::Default => { + diff_show_only_changes( + io::stdout().lock().by_ref(), + &formatted, + &reformatted, + )?; + } + Format::Full => { + let diff = TextDiff::from_lines(&formatted, &reformatted) + .unified_diff() + .header("Formatted once", "Formatted twice") + .to_string(); + println!( + r#"Reformatting the formatted code a second time resulted in formatting changes. +--- +{diff}--- + +Formatted once: +--- +{formatted}--- + +Formatted twice: +--- +{reformatted}---"#, + ); + } + } + } + FormatterStabilityError::InvalidSyntax { err, formatted } => { + println!( + "Formatter generated invalid syntax {}: {}", + file.display(), + err + ); + if args.format == Format::Full { + println!("---\n{formatted}\n---\n"); + } + } + FormatterStabilityError::Panic { message } => { + println!("Panic {}: {}", file.display(), message); + } + FormatterStabilityError::Other(err) => { + println!("Uncategorized error {}: {}", file.display(), err); + } + } + + if args.exit_first_error { + return Ok(ExitCode::FAILURE); + } + } + let duration = start.elapsed(); + println!( + "Formatting {} files twice took {:.2}s", + cli.files.len(), + duration.as_secs_f32() + ); + + if any_errors { + Ok(ExitCode::FAILURE) + } else { + Ok(ExitCode::SUCCESS) + } +} + +/// A compact diff that only shows a header and changes, but nothing unchanged. This makes viewing +/// multiple errors easier. +fn diff_show_only_changes( + writer: &mut impl Write, + formatted: &str, + reformatted: &str, +) -> io::Result<()> { + for changes in TextDiff::from_lines(formatted, reformatted) + .unified_diff() + .iter_hunks() + { + for (idx, change) in changes + .iter_changes() + .filter(|change| change.tag() != ChangeTag::Equal) + .enumerate() + { + if idx == 0 { + writeln!(writer, "{}", changes.header())?; + } + write!(writer, "{}", change.tag())?; + writer.write_all(change.value().as_bytes())?; + } + } + Ok(()) +} + +#[derive(Debug)] +enum FormatterStabilityError { + /// First and second pass of the formatter are different + Unstable { + formatted: String, + reformatted: String, + }, + /// The formatter printed invalid code + InvalidSyntax { + err: anyhow::Error, + formatted: String, + }, + /// From `catch_unwind` + Panic { + message: String, + }, + Other(anyhow::Error), +} + +impl From for FormatterStabilityError { + fn from(error: anyhow::Error) -> Self { + Self::Other(error) + } +} + +/// Run the formatter twice on the given file. Does not write back to the file +fn check_file(input_path: &Path) -> Result<(), FormatterStabilityError> { + let content = fs::read_to_string(input_path).context("Failed to read file")?; + let printed = match format_module(&content) { + Ok(printed) => printed, + Err(err) => { + return if err + .to_string() + .starts_with("Source contains syntax errors ") + { + debug!( + "Skipping {} with invalid first pass {}", + input_path.display(), + err + ); + Ok(()) + } else { + Err(err.into()) + }; + } + }; + let formatted = printed.as_code(); + + let reformatted = match format_module(formatted) { + Ok(reformatted) => reformatted, + Err(err) => { + return Err(FormatterStabilityError::InvalidSyntax { + err, + formatted: formatted.to_string(), + }); + } + }; + + if reformatted.as_code() != formatted { + return Err(FormatterStabilityError::Unstable { + formatted: formatted.to_string(), + reformatted: reformatted.into_code(), + }); + } + Ok(()) +} diff --git a/crates/ruff_dev/src/main.rs b/crates/ruff_dev/src/main.rs index dbdfb45466..0327211a20 100644 --- a/crates/ruff_dev/src/main.rs +++ b/crates/ruff_dev/src/main.rs @@ -6,7 +6,9 @@ use anyhow::Result; use clap::{Parser, Subcommand}; use ruff::logging::{set_up_logging, LogLevel}; use ruff_cli::check; +use std::process::ExitCode; +mod check_formatter_stability; mod generate_all; mod generate_cli_help; mod generate_docs; @@ -61,33 +63,40 @@ enum Command { #[clap(long, short = 'n')] repeat: usize, }, + /// Format a repository twice and ensure that it looks that the first and second formatting + /// look the same. Same arguments as `ruff check` + CheckFormatterStability(check_formatter_stability::Args), } -fn main() -> Result<()> { +fn main() -> Result { let args = Args::parse(); #[allow(clippy::print_stdout)] - match &args.command { - Command::GenerateAll(args) => generate_all::main(args)?, - Command::GenerateJSONSchema(args) => generate_json_schema::main(args)?, + match args.command { + Command::GenerateAll(args) => generate_all::main(&args)?, + Command::GenerateJSONSchema(args) => generate_json_schema::main(&args)?, Command::GenerateRulesTable => println!("{}", generate_rules_table::generate()), Command::GenerateOptions => println!("{}", generate_options::generate()), - Command::GenerateCliHelp(args) => generate_cli_help::main(args)?, - Command::GenerateDocs(args) => generate_docs::main(args)?, - Command::PrintAST(args) => print_ast::main(args)?, - Command::PrintCST(args) => print_cst::main(args)?, - Command::PrintTokens(args) => print_tokens::main(args)?, - Command::RoundTrip(args) => round_trip::main(args)?, + Command::GenerateCliHelp(args) => generate_cli_help::main(&args)?, + Command::GenerateDocs(args) => generate_docs::main(&args)?, + Command::PrintAST(args) => print_ast::main(&args)?, + Command::PrintCST(args) => print_cst::main(&args)?, + Command::PrintTokens(args) => print_tokens::main(&args)?, + Command::RoundTrip(args) => round_trip::main(&args)?, Command::Repeat { args, repeat, log_level_args, } => { - let log_level = LogLevel::from(log_level_args); + let log_level = LogLevel::from(&log_level_args); set_up_logging(&log_level)?; - for _ in 0..*repeat { + for _ in 0..repeat { check(args.clone(), log_level)?; } } + Command::CheckFormatterStability(args) => { + let exit_code = check_formatter_stability::main(&args)?; + return Ok(exit_code); + } } - Ok(()) + Ok(ExitCode::SUCCESS) }