diff --git a/Cargo.lock b/Cargo.lock index db8cdef966..2376d4aab3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -404,6 +404,7 @@ dependencies = [ "encode_unicode", "lazy_static", "libc", + "unicode-width", "windows-sys 0.45.0", ] @@ -949,6 +950,19 @@ dependencies = [ "hashbrown 0.14.0", ] +[[package]] +name = "indicatif" +version = "0.17.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ff8cc23a7393a397ed1d7f56e6365cba772aba9f9912ab968b03043c395d057" +dependencies = [ + "console", + "instant", + "number_prefix", + "portable-atomic", + "unicode-width", +] + [[package]] name = "inotify" version = "0.9.6" @@ -1333,6 +1347,12 @@ dependencies = [ "libc", ] +[[package]] +name = "number_prefix" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" + [[package]] name = "once_cell" version = "1.18.0" @@ -1557,6 +1577,12 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "portable-atomic" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "767eb9f07d4a5ebcb39bbf2d452058a93c011373abf6832e24194a1c3f004794" + [[package]] name = "predicates" version = "3.0.3" @@ -1959,6 +1985,8 @@ version = "0.0.0" dependencies = [ "anyhow", "clap", + "ignore", + "indicatif", "itertools", "libcst", "log", @@ -1969,6 +1997,7 @@ dependencies = [ "ruff", "ruff_cli", "ruff_diagnostics", + "ruff_formatter", "ruff_python_formatter", "ruff_python_stdlib", "ruff_textwrap", @@ -1979,6 +2008,7 @@ dependencies = [ "similar", "strum", "strum_macros", + "tempfile", ] [[package]] @@ -2071,6 +2101,7 @@ dependencies = [ "serde_json", "similar", "smallvec", + "thiserror", ] [[package]] @@ -2629,18 +2660,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.41" +version = "1.0.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c16a64ba9387ef3fdae4f9c1a7f07a0997fce91985c0336f1ddc1822b3b37802" +checksum = "a35fc5b8971143ca348fa6df4f024d4d55264f3468c71ad1c2f365b0a4d58c42" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.41" +version = "1.0.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d14928354b01c4d6a4f0e549069adef399a284e7995c7ccca94e8a07a5346c59" +checksum = "463fe12d7993d3b327787537ce8dd4dfa058de32fc2b195ef3cde03dc4771e8f" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index d48fedbf29..1ca004af7b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,6 +45,7 @@ strum = { version = "0.24.1", features = ["strum_macros"] } strum_macros = { version = "0.24.3" } syn = { version = "2.0.15" } test-case = { version = "3.0.0" } +thiserror = { version = "1.0.43" } toml = { version = "0.7.2" } # v0.0.1 diff --git a/crates/ruff/Cargo.toml b/crates/ruff/Cargo.toml index 7503cdddd5..ad1372686c 100644 --- a/crates/ruff/Cargo.toml +++ b/crates/ruff/Cargo.toml @@ -74,7 +74,7 @@ shellexpand = { workspace = true } smallvec = { workspace = true } strum = { workspace = true } strum_macros = { workspace = true } -thiserror = { version = "1.0.38" } +thiserror = { version = "1.0.43" } toml = { workspace = true } typed-arena = { version = "2.0.2" } unicode-width = { version = "0.1.10" } diff --git a/crates/ruff_cli/src/args.rs b/crates/ruff_cli/src/args.rs index 1050aafcbf..aa299cad60 100644 --- a/crates/ruff_cli/src/args.rs +++ b/crates/ruff_cli/src/args.rs @@ -74,7 +74,8 @@ pub enum Command { }, } -#[derive(Clone, Debug, clap::Args)] +// The `Parser` derive is for ruff_dev, for ruff_cli `Args` would be sufficient +#[derive(Clone, Debug, clap::Parser)] #[allow(clippy::struct_excessive_bools, clippy::module_name_repetitions)] pub struct CheckArgs { /// List of files or directories to check. diff --git a/crates/ruff_dev/Cargo.toml b/crates/ruff_dev/Cargo.toml index 07be9e5d72..36c9c42d59 100644 --- a/crates/ruff_dev/Cargo.toml +++ b/crates/ruff_dev/Cargo.toml @@ -14,12 +14,15 @@ license = { workspace = true } ruff = { path = "../ruff", features = ["schemars"] } ruff_cli = { path = "../ruff_cli" } ruff_diagnostics = { path = "../ruff_diagnostics" } +ruff_formatter = { path = "../ruff_formatter" } ruff_python_formatter = { path = "../ruff_python_formatter" } ruff_python_stdlib = { path = "../ruff_python_stdlib" } ruff_textwrap = { path = "../ruff_textwrap" } anyhow = { workspace = true } clap = { workspace = true } +ignore = { workspace = true } +indicatif = "0.17.5" itertools = { workspace = true } libcst = { workspace = true } log = { workspace = true } @@ -34,3 +37,4 @@ serde_json = { workspace = true } similar = { workspace = true } strum = { workspace = true } strum_macros = { workspace = true } +tempfile = "3.6.0" diff --git a/crates/ruff_dev/src/check_formatter_stability.rs b/crates/ruff_dev/src/check_formatter_stability.rs deleted file mode 100644 index b955a7245a..0000000000 --- a/crates/ruff_dev/src/check_formatter_stability.rs +++ /dev/null @@ -1,472 +0,0 @@ -//! We want to ensure that once formatted content stays the same when formatted again, which is -//! known as formatter stability or formatter idempotency, and that the formatter prints -//! syntactically valid code. As our test cases cover only a limited amount of code, this allows -//! checking entire repositories. - -use std::fmt::{Display, Formatter}; -use std::fs::File; -use std::io::Write; -use std::io::{stdout, BufWriter}; -use std::panic::catch_unwind; -use std::path::{Path, PathBuf}; -use std::process::ExitCode; -use std::sync::mpsc::channel; -use std::time::{Duration, Instant}; -use std::{fmt, fs, iter}; - -use anyhow::{bail, Context}; -use clap::Parser; -use log::debug; -use similar::{ChangeTag, TextDiff}; - -use ruff::resolver::python_files_in_path; -use ruff::settings::types::{FilePattern, FilePatternSet}; -use ruff_cli::args::CheckArgs; -use ruff_cli::resolve::resolve; -use ruff_python_formatter::{format_module, PyFormatOptions}; - -/// Control the verbosity of the output -#[derive(Copy, Clone, PartialEq, Eq, clap::ValueEnum, Default)] -pub(crate) enum Format { - /// Filenames only - Minimal, - /// Filenames and reduced diff - #[default] - Default, - /// Full diff and invalid code - Full, -} - -#[derive(clap::Args)] -pub(crate) struct Args { - /// Like `ruff check`'s files - pub(crate) files: Vec, - /// Control the verbosity of the output - #[arg(long, default_value_t, value_enum)] - pub(crate) format: Format, - /// Print only the first error and exit, `-x` is same as pytest - #[arg(long, short = 'x')] - pub(crate) exit_first_error: bool, - /// Checks each project inside a directory - #[arg(long)] - pub(crate) multi_project: bool, - /// Write all errors to this file in addition to stdout - #[arg(long)] - pub(crate) error_file: Option, -} - -/// Generate ourself a `try_parse_from` impl for `CheckArgs`. This is a strange way to use clap but -/// we want the same behaviour as `ruff_cli` and clap seems to lack a way to parse directly to -/// `Args` instead of a `Parser` -#[derive(Debug, clap::Parser)] -struct WrapperArgs { - #[clap(flatten)] - check_args: CheckArgs, -} - -pub(crate) fn main(args: &Args) -> anyhow::Result { - let all_success = if args.multi_project { - check_multi_project(args) - } else { - let result = check_repo(args)?; - - #[allow(clippy::print_stdout)] - { - print!("{}", result.display(args.format)); - println!( - "Found {} stability errors in {} files in {:.2}s", - result.diagnostics.len(), - result.file_count, - result.duration.as_secs_f32(), - ); - } - - result.is_success() - }; - if all_success { - Ok(ExitCode::SUCCESS) - } else { - Ok(ExitCode::FAILURE) - } -} - -enum Message { - Start { - path: PathBuf, - }, - Failed { - path: PathBuf, - error: anyhow::Error, - }, - Finished { - path: PathBuf, - result: CheckRepoResult, - }, -} - -fn check_multi_project(args: &Args) -> bool { - let mut all_success = true; - let mut total_errors = 0; - let mut total_files = 0; - let start = Instant::now(); - - rayon::scope(|scope| { - let (sender, receiver) = channel(); - - for base_dir in &args.files { - for dir in base_dir.read_dir().unwrap() { - let path = dir.unwrap().path().clone(); - - let sender = sender.clone(); - - scope.spawn(move |_| { - sender.send(Message::Start { path: path.clone() }).unwrap(); - - match check_repo(&Args { - files: vec![path.clone()], - error_file: args.error_file.clone(), - ..*args - }) { - Ok(result) => sender.send(Message::Finished { result, path }), - Err(error) => sender.send(Message::Failed { error, path }), - } - .unwrap(); - }); - } - } - - scope.spawn(|_| { - let mut stdout = stdout().lock(); - let mut error_file = args.error_file.as_ref().map(|error_file| { - BufWriter::new(File::create(error_file).expect("Couldn't open error file")) - }); - - for message in receiver { - match message { - Message::Start { path } => { - writeln!(stdout, "Starting {}", path.display()).unwrap(); - } - Message::Finished { path, result } => { - total_errors += result.diagnostics.len(); - total_files += result.file_count; - - writeln!( - stdout, - "Finished {} with {} files in {:.2}s", - path.display(), - result.file_count, - result.duration.as_secs_f32(), - ) - .unwrap(); - write!(stdout, "{}", result.display(args.format)).unwrap(); - if let Some(error_file) = &mut error_file { - write!(error_file, "{}", result.display(args.format)).unwrap(); - } - all_success = all_success && result.is_success(); - } - Message::Failed { path, error } => { - writeln!(stdout, "Failed {}: {}", path.display(), error).unwrap(); - all_success = false; - } - } - } - }); - }); - - let duration = start.elapsed(); - - #[allow(clippy::print_stdout)] - { - println!( - "{total_errors} stability errors in {total_files} files in {}s", - duration.as_secs_f32() - ); - } - - all_success -} - -/// Returns whether the check was successful -fn check_repo(args: &Args) -> anyhow::Result { - let start = Instant::now(); - - // Find files to check (or in this case, format twice). Adapted from ruff_cli - // First argument is ignored - let dummy = PathBuf::from("check"); - let check_args_input = iter::once(&dummy).chain(&args.files); - let check_args: CheckArgs = WrapperArgs::try_parse_from(check_args_input)?.check_args; - let (cli, overrides) = check_args.partition(); - let mut pyproject_config = resolve( - cli.isolated, - cli.config.as_deref(), - &overrides, - cli.stdin_filename.as_deref(), - )?; - // We don't want to format pyproject.toml - pyproject_config.settings.lib.include = FilePatternSet::try_from_vec(vec![ - FilePattern::Builtin("*.py"), - FilePattern::Builtin("*.pyi"), - ]) - .unwrap(); - let (paths, _resolver) = python_files_in_path(&cli.files, &pyproject_config, &overrides)?; - - if paths.is_empty() { - bail!("no python files in {:?}", cli.files) - } - - let mut formatted_counter = 0; - let errors: Vec<_> = paths - .into_iter() - .map(|dir_entry| { - // Doesn't make sense to recover here in this test script - dir_entry.expect("Iterating the files in the repository failed") - }) - .filter(|dir_entry| { - // For some reason it does not filter in the beginning - dir_entry.file_name() != "pyproject.toml" - }) - .map(|dir_entry| { - let file = dir_entry.path().to_path_buf(); - formatted_counter += 1; - // Handle panics (mostly in `debug_assert!`) - let result = match catch_unwind(|| check_file(&file)) { - Ok(result) => result, - Err(panic) => { - if let Some(message) = panic.downcast_ref::() { - Err(FormatterStabilityError::Panic { - message: message.clone(), - }) - } else if let Some(&message) = panic.downcast_ref::<&str>() { - Err(FormatterStabilityError::Panic { - message: message.to_string(), - }) - } else { - Err(FormatterStabilityError::Panic { - // This should not happen, but it can - message: "(Panic didn't set a string message)".to_string(), - }) - } - } - }; - (result, file) - }) - // We only care about the errors - .filter_map(|(result, file)| match result { - Err(error) => Some(Diagnostic { file, error }), - Ok(()) => None, - }) - .collect(); - - let duration = start.elapsed(); - - Ok(CheckRepoResult { - duration, - file_count: formatted_counter, - diagnostics: errors, - }) -} - -/// A compact diff that only shows a header and changes, but nothing unchanged. This makes viewing -/// multiple errors easier. -fn diff_show_only_changes( - writer: &mut Formatter, - formatted: &str, - reformatted: &str, -) -> fmt::Result { - for changes in TextDiff::from_lines(formatted, reformatted) - .unified_diff() - .iter_hunks() - { - for (idx, change) in changes - .iter_changes() - .filter(|change| change.tag() != ChangeTag::Equal) - .enumerate() - { - if idx == 0 { - writeln!(writer, "{}", changes.header())?; - } - write!(writer, "{}", change.tag())?; - writer.write_str(change.value())?; - } - } - Ok(()) -} - -struct CheckRepoResult { - duration: Duration, - file_count: usize, - diagnostics: Vec, -} - -impl CheckRepoResult { - fn display(&self, format: Format) -> DisplayCheckRepoResult { - DisplayCheckRepoResult { - result: self, - format, - } - } - - fn is_success(&self) -> bool { - self.diagnostics.is_empty() - } -} - -struct DisplayCheckRepoResult<'a> { - result: &'a CheckRepoResult, - format: Format, -} - -impl Display for DisplayCheckRepoResult<'_> { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - for diagnostic in &self.result.diagnostics { - write!(f, "{}", diagnostic.display(self.format))?; - } - Ok(()) - } -} - -#[derive(Debug)] -struct Diagnostic { - file: PathBuf, - error: FormatterStabilityError, -} - -impl Diagnostic { - fn display(&self, format: Format) -> DisplayDiagnostic { - DisplayDiagnostic { - diagnostic: self, - format, - } - } -} - -struct DisplayDiagnostic<'a> { - format: Format, - diagnostic: &'a Diagnostic, -} - -impl Display for DisplayDiagnostic<'_> { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - let Diagnostic { file, error } = &self.diagnostic; - - match error { - FormatterStabilityError::Unstable { - formatted, - reformatted, - } => { - writeln!(f, "Unstable formatting {}", file.display())?; - match self.format { - Format::Minimal => {} - Format::Default => { - diff_show_only_changes(f, formatted, reformatted)?; - } - Format::Full => { - let diff = TextDiff::from_lines(formatted.as_str(), reformatted.as_str()) - .unified_diff() - .header("Formatted once", "Formatted twice") - .to_string(); - writeln!( - f, - r#"Reformatting the formatted code a second time resulted in formatting changes. ---- -{diff}--- - -Formatted once: ---- -{formatted}--- - -Formatted twice: ---- -{reformatted}---\n"#, - )?; - } - } - } - FormatterStabilityError::InvalidSyntax { err, formatted } => { - writeln!( - f, - "Formatter generated invalid syntax {}: {}", - file.display(), - err - )?; - if self.format == Format::Full { - writeln!(f, "---\n{formatted}\n---\n")?; - } - } - FormatterStabilityError::Panic { message } => { - writeln!(f, "Panic {}: {}", file.display(), message)?; - } - FormatterStabilityError::Other(err) => { - writeln!(f, "Uncategorized error {}: {}", file.display(), err)?; - } - } - Ok(()) - } -} - -#[derive(Debug)] -enum FormatterStabilityError { - /// First and second pass of the formatter are different - Unstable { - formatted: String, - reformatted: String, - }, - /// The formatter printed invalid code - InvalidSyntax { - err: anyhow::Error, - formatted: String, - }, - /// From `catch_unwind` - Panic { - message: String, - }, - Other(anyhow::Error), -} - -impl From for FormatterStabilityError { - fn from(error: anyhow::Error) -> Self { - Self::Other(error) - } -} - -/// Run the formatter twice on the given file. Does not write back to the file -fn check_file(input_path: &Path) -> Result<(), FormatterStabilityError> { - let content = fs::read_to_string(input_path).context("Failed to read file")?; - let printed = match format_module(&content, PyFormatOptions::default()) { - Ok(printed) => printed, - Err(err) => { - return if err - .to_string() - .starts_with("Source contains syntax errors ") - { - debug!( - "Skipping {} with invalid first pass {}", - input_path.display(), - err - ); - Ok(()) - } else { - Err(err.into()) - }; - } - }; - let formatted = printed.as_code(); - - let reformatted = match format_module(formatted, PyFormatOptions::default()) { - Ok(reformatted) => reformatted, - Err(err) => { - return Err(FormatterStabilityError::InvalidSyntax { - err, - formatted: formatted.to_string(), - }); - } - }; - - if reformatted.as_code() != formatted { - return Err(FormatterStabilityError::Unstable { - formatted: formatted.to_string(), - reformatted: reformatted.into_code(), - }); - } - Ok(()) -} diff --git a/crates/ruff_dev/src/format_dev.rs b/crates/ruff_dev/src/format_dev.rs new file mode 100644 index 0000000000..7695d93e65 --- /dev/null +++ b/crates/ruff_dev/src/format_dev.rs @@ -0,0 +1,628 @@ +use anyhow::{bail, Context}; +use clap::{CommandFactory, FromArgMatches}; +use ignore::DirEntry; +use indicatif::ProgressBar; +use rayon::iter::{IntoParallelIterator, ParallelIterator}; +use ruff::resolver::python_files_in_path; +use ruff::settings::types::{FilePattern, FilePatternSet}; +use ruff_cli::args::CheckArgs; +use ruff_cli::resolve::resolve; +use ruff_formatter::{FormatError, PrintError}; +use ruff_python_formatter::{format_module, FormatModuleError, PyFormatOptions}; +use similar::{ChangeTag, TextDiff}; +use std::fmt::{Display, Formatter}; +use std::fs::File; +use std::io::{BufWriter, Write}; +use std::ops::{Add, AddAssign}; +use std::panic::catch_unwind; +use std::path::{Path, PathBuf}; +use std::process::ExitCode; +use std::sync::mpsc::channel; +use std::time::{Duration, Instant}; +use std::{fmt, fs, io}; +use tempfile::NamedTempFile; + +/// Find files that ruff would check so we can format them. Adapted from `ruff_cli`. +fn ruff_check_paths(dirs: &[PathBuf]) -> anyhow::Result>> { + let args_matches = CheckArgs::command() + .no_binary_name(true) + .get_matches_from(dirs); + let check_args: CheckArgs = CheckArgs::from_arg_matches(&args_matches)?; + let (cli, overrides) = check_args.partition(); + let mut pyproject_config = resolve( + cli.isolated, + cli.config.as_deref(), + &overrides, + cli.stdin_filename.as_deref(), + )?; + // We don't want to format pyproject.toml + pyproject_config.settings.lib.include = FilePatternSet::try_from_vec(vec![ + FilePattern::Builtin("*.py"), + FilePattern::Builtin("*.pyi"), + ]) + .unwrap(); + let (paths, _resolver) = python_files_in_path(&cli.files, &pyproject_config, &overrides)?; + if paths.is_empty() { + bail!("no python files in {:?}", dirs) + } + Ok(paths) +} + +/// Collects statistics over the formatted files, currently only computes the Jaccard index +/// +/// The [Jaccard index](https://en.wikipedia.org/wiki/Jaccard_index) can be defined as +/// ```text +/// J(A, B) = |A∩B| / (|A\B| + |B\A| + |A∩B|) +/// ``` +/// where in our case `A` is the black formatted input, `B` is the ruff formatted output and the +/// intersection are the lines in the diff that didn't change. We don't just track intersection and +/// union so we can make statistics about size changes. If the input is not black formatted, this +/// only becomes a measure for the changes made to the codebase during the initial formatting. +#[derive(Default, Debug, Copy, Clone)] +pub(crate) struct Statistics { + /// The size of `A\B`, the number of lines only in the input, which we assume to be black + /// formatted + black_input: u32, + /// The size of `B\A`, the number of lines only in the formatted output + ruff_output: u32, + /// The number of matching identical lines + intersection: u32, +} + +impl Statistics { + pub(crate) fn from_versions(black: &str, ruff: &str) -> Self { + if black == ruff { + let intersection = u32::try_from(black.lines().count()).unwrap(); + Self { + black_input: 0, + ruff_output: 0, + intersection, + } + } else { + let diff = TextDiff::from_lines(black, ruff); + let mut statistics = Self::default(); + for change in diff.iter_all_changes() { + match change.tag() { + ChangeTag::Delete => statistics.black_input += 1, + ChangeTag::Insert => statistics.ruff_output += 1, + ChangeTag::Equal => statistics.intersection += 1, + } + } + statistics + } + } + + #[allow(clippy::cast_precision_loss)] + pub(crate) fn jaccard_index(&self) -> f32 { + self.intersection as f32 / (self.black_input + self.ruff_output + self.intersection) as f32 + } +} + +impl Add for Statistics { + type Output = Statistics; + + fn add(self, rhs: Statistics) -> Self::Output { + Statistics { + black_input: self.black_input + rhs.black_input, + ruff_output: self.ruff_output + rhs.ruff_output, + intersection: self.intersection + rhs.intersection, + } + } +} + +impl AddAssign for Statistics { + fn add_assign(&mut self, rhs: Statistics) { + *self = *self + rhs; + } +} + +/// Control the verbosity of the output +#[derive(Copy, Clone, PartialEq, Eq, clap::ValueEnum, Default)] +pub(crate) enum Format { + /// Filenames only + Minimal, + /// Filenames and reduced diff + #[default] + Default, + /// Full diff and invalid code + Full, +} + +#[allow(clippy::struct_excessive_bools)] +#[derive(clap::Args)] +pub(crate) struct Args { + /// Like `ruff check`'s files. See `--multi-project` if you want to format an ecosystem + /// checkout. + pub(crate) files: Vec, + /// Check stability + /// + /// We want to ensure that once formatted content stays the same when formatted again, which is + /// known as formatter stability or formatter idempotency, and that the formatter prints + /// syntactically valid code. As our test cases cover only a limited amount of code, this allows + /// checking entire repositories. + #[arg(long)] + pub(crate) stability_check: bool, + /// Format the files. Without this flag, the python files are not modified + #[arg(long)] + pub(crate) write: bool, + /// Control the verbosity of the output + #[arg(long, default_value_t, value_enum)] + pub(crate) format: Format, + /// Print only the first error and exit, `-x` is same as pytest + #[arg(long, short = 'x')] + pub(crate) exit_first_error: bool, + /// Checks each project inside a directory, useful e.g. if you want to check all of the + /// ecosystem checkouts. + #[arg(long)] + pub(crate) multi_project: bool, + /// Write all errors to this file in addition to stdout. Only used in multi-project mode. + #[arg(long)] + pub(crate) error_file: Option, +} + +pub(crate) fn main(args: &Args) -> anyhow::Result { + let all_success = if args.multi_project { + format_dev_multi_project(args) + } else { + let result = format_dev_project(&args.files, args.stability_check, args.write, true)?; + + #[allow(clippy::print_stdout)] + { + print!("{}", result.display(args.format)); + println!( + "Found {} stability errors in {} files (jaccard index {:.3}) in {:.2}s", + result + .diagnostics + .iter() + .filter(|diagnostics| !diagnostics.error.is_success()) + .count(), + result.file_count, + result.statistics.jaccard_index(), + result.duration.as_secs_f32(), + ); + } + + !result.is_success() + }; + if all_success { + Ok(ExitCode::SUCCESS) + } else { + Ok(ExitCode::FAILURE) + } +} + +/// Each `path` is one of the `files` in `Args` +enum Message { + Start { + path: PathBuf, + }, + Failed { + path: PathBuf, + error: anyhow::Error, + }, + Finished { + path: PathBuf, + result: CheckRepoResult, + }, +} + +/// Checks a directory of projects +fn format_dev_multi_project(args: &Args) -> bool { + let mut all_success = true; + let mut total_errors = 0; + let mut total_files = 0; + let start = Instant::now(); + + rayon::scope(|scope| { + let (sender, receiver) = channel(); + + // Workers, to check is subdirectory in parallel + for base_dir in &args.files { + for dir in base_dir.read_dir().unwrap() { + let path = dir.unwrap().path().clone(); + + let sender = sender.clone(); + + scope.spawn(move |_| { + sender.send(Message::Start { path: path.clone() }).unwrap(); + + match format_dev_project( + &[path.clone()], + args.stability_check, + args.write, + false, + ) { + Ok(result) => sender.send(Message::Finished { result, path }), + Err(error) => sender.send(Message::Failed { error, path }), + } + .unwrap(); + }); + } + } + + // Main thread, writing to stdout + scope.spawn(|_| { + let mut error_file = args.error_file.as_ref().map(|error_file| { + BufWriter::new(File::create(error_file).expect("Couldn't open error file")) + }); + + let bar = ProgressBar::new(args.files.len() as u64); + for message in receiver { + match message { + Message::Start { path } => { + bar.println(path.display().to_string()); + } + Message::Finished { path, result } => { + total_errors += result.diagnostics.len(); + total_files += result.file_count; + + bar.println(format!( + "Finished {} with {} files (jaccard index {:.3}) in {:.2}s", + path.display(), + result.file_count, + result.statistics.jaccard_index(), + result.duration.as_secs_f32(), + )); + bar.println(result.display(args.format).to_string().trim_end()); + if let Some(error_file) = &mut error_file { + write!(error_file, "{}", result.display(args.format)).unwrap(); + } + all_success = all_success && !result.is_success(); + bar.inc(1); + } + Message::Failed { path, error } => { + bar.println(format!("Failed {}: {}", path.display(), error)); + all_success = false; + bar.inc(1); + } + } + } + bar.finish(); + }); + }); + + let duration = start.elapsed(); + + #[allow(clippy::print_stdout)] + { + println!( + "{total_errors} stability errors in {total_files} files in {}s", + duration.as_secs_f32() + ); + } + + all_success +} + +fn format_dev_project( + files: &[PathBuf], + stability_check: bool, + write: bool, + progress_bar: bool, +) -> anyhow::Result { + let start = Instant::now(); + + // Find files to check (or in this case, format twice). Adapted from ruff_cli + // First argument is ignored + let paths = ruff_check_paths(files)?; + + let bar = progress_bar.then(|| ProgressBar::new(paths.len() as u64)); + let result_iter = paths + .into_par_iter() + .map(|dir_entry| { + let dir_entry = match dir_entry.context("Iterating the files in the repository failed") + { + Ok(dir_entry) => dir_entry, + Err(err) => return Err(err), + }; + let file = dir_entry.path().to_path_buf(); + // For some reason it does not filter in the beginning + if dir_entry.file_name() == "pyproject.toml" { + return Ok((Ok(Statistics::default()), file)); + } + + let file = dir_entry.path().to_path_buf(); + // Handle panics (mostly in `debug_assert!`) + let result = match catch_unwind(|| format_dev_file(&file, stability_check, write)) { + Ok(result) => result, + Err(panic) => { + if let Some(message) = panic.downcast_ref::() { + Err(CheckFileError::Panic { + message: message.clone(), + }) + } else if let Some(&message) = panic.downcast_ref::<&str>() { + Err(CheckFileError::Panic { + message: message.to_string(), + }) + } else { + Err(CheckFileError::Panic { + // This should not happen, but it can + message: "(Panic didn't set a string message)".to_string(), + }) + } + } + }; + if let Some(bar) = &bar { + bar.inc(1); + } + Ok((result, file)) + }) + .collect::>>()?; + if let Some(bar) = bar { + bar.finish(); + } + + let mut statistics = Statistics::default(); + let mut formatted_counter = 0; + let mut diagnostics = Vec::new(); + for (result, file) in result_iter { + formatted_counter += 1; + match result { + Ok(statistics_file) => statistics += statistics_file, + Err(error) => diagnostics.push(Diagnostic { file, error }), + } + } + + let duration = start.elapsed(); + + Ok(CheckRepoResult { + duration, + file_count: formatted_counter, + diagnostics, + statistics, + }) +} + +/// A compact diff that only shows a header and changes, but nothing unchanged. This makes viewing +/// multiple errors easier. +fn diff_show_only_changes( + writer: &mut Formatter, + formatted: &str, + reformatted: &str, +) -> fmt::Result { + for changes in TextDiff::from_lines(formatted, reformatted) + .unified_diff() + .iter_hunks() + { + for (idx, change) in changes + .iter_changes() + .filter(|change| change.tag() != ChangeTag::Equal) + .enumerate() + { + if idx == 0 { + writeln!(writer, "{}", changes.header())?; + } + write!(writer, "{}", change.tag())?; + writer.write_str(change.value())?; + } + } + Ok(()) +} + +struct CheckRepoResult { + duration: Duration, + file_count: usize, + diagnostics: Vec, + statistics: Statistics, +} + +impl CheckRepoResult { + fn display(&self, format: Format) -> DisplayCheckRepoResult { + DisplayCheckRepoResult { + result: self, + format, + } + } + + /// We also emit diagnostics if the input file was already invalid or the were io errors. This + /// method helps to differentiate + fn is_success(&self) -> bool { + self.diagnostics + .iter() + .all(|diagnostic| diagnostic.error.is_success()) + } +} + +struct DisplayCheckRepoResult<'a> { + result: &'a CheckRepoResult, + format: Format, +} + +impl Display for DisplayCheckRepoResult<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + for diagnostic in &self.result.diagnostics { + write!(f, "{}", diagnostic.display(self.format))?; + } + Ok(()) + } +} + +#[derive(Debug)] +struct Diagnostic { + file: PathBuf, + error: CheckFileError, +} + +impl Diagnostic { + fn display(&self, format: Format) -> DisplayDiagnostic { + DisplayDiagnostic { + diagnostic: self, + format, + } + } +} + +struct DisplayDiagnostic<'a> { + format: Format, + diagnostic: &'a Diagnostic, +} + +impl Display for DisplayDiagnostic<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + let Diagnostic { file, error } = &self.diagnostic; + + match error { + CheckFileError::Unstable { + formatted, + reformatted, + } => { + writeln!(f, "Unstable formatting {}", file.display())?; + match self.format { + Format::Minimal => {} + Format::Default => { + diff_show_only_changes(f, formatted, reformatted)?; + } + Format::Full => { + let diff = TextDiff::from_lines(formatted.as_str(), reformatted.as_str()) + .unified_diff() + .header("Formatted once", "Formatted twice") + .to_string(); + writeln!( + f, + r#"Reformatting the formatted code a second time resulted in formatting changes. +--- +{diff}--- + +Formatted once: +--- +{formatted}--- + +Formatted twice: +--- +{reformatted}---\n"#, + )?; + } + } + } + CheckFileError::Panic { message } => { + writeln!(f, "Panic {}: {}", file.display(), message)?; + } + CheckFileError::SyntaxErrorInInput(error) => { + writeln!(f, "Syntax error in {}: {}", file.display(), error)?; + } + CheckFileError::SyntaxErrorInOutput { formatted, error } => { + writeln!( + f, + "Formatter generated invalid syntax {}: {}", + file.display(), + error + )?; + if self.format == Format::Full { + writeln!(f, "---\n{formatted}\n---\n")?; + } + } + CheckFileError::FormatError(error) => { + writeln!(f, "Formatter error for {}: {}", file.display(), error)?; + } + CheckFileError::PrintError(error) => { + writeln!(f, "Printer error for {}: {}", file.display(), error)?; + } + CheckFileError::IoError(error) => { + writeln!(f, "Error reading {}: {}", file.display(), error)?; + } + } + Ok(()) + } +} + +#[derive(Debug)] +enum CheckFileError { + /// First and second pass of the formatter are different + Unstable { + formatted: String, + reformatted: String, + }, + /// The input file was already invalid (not a bug) + SyntaxErrorInInput(FormatModuleError), + /// The formatter introduced a syntax error + SyntaxErrorInOutput { + formatted: String, + error: FormatModuleError, + }, + /// The formatter failed (bug) + FormatError(FormatError), + /// The printer failed (bug) + PrintError(PrintError), + /// Failed to read the file, this sometimes happens e.g. with strange filenames (not a bug) + IoError(io::Error), + /// From `catch_unwind` + Panic { message: String }, +} + +impl CheckFileError { + /// Returns `false` if this is a formatter bug or `true` is if it is something outside of ruff + fn is_success(&self) -> bool { + match self { + CheckFileError::SyntaxErrorInInput(_) | CheckFileError::IoError(_) => true, + CheckFileError::Unstable { .. } + | CheckFileError::SyntaxErrorInOutput { .. } + | CheckFileError::FormatError(_) + | CheckFileError::PrintError(_) + | CheckFileError::Panic { .. } => false, + } + } +} + +impl From for CheckFileError { + fn from(value: io::Error) -> Self { + Self::IoError(value) + } +} + +fn format_dev_file( + input_path: &Path, + stability_check: bool, + write: bool, +) -> Result { + let content = fs::read_to_string(input_path)?; + let printed = match format_module(&content, PyFormatOptions::default()) { + Ok(printed) => printed, + Err(err @ (FormatModuleError::LexError(_) | FormatModuleError::ParseError(_))) => { + return Err(CheckFileError::SyntaxErrorInInput(err)); + } + Err(FormatModuleError::FormatError(err)) => { + return Err(CheckFileError::FormatError(err)); + } + Err(FormatModuleError::PrintError(err)) => { + return Err(CheckFileError::PrintError(err)); + } + }; + let formatted = printed.as_code(); + + if write && content != formatted { + // Simple atomic write. + // The file is in a directory so it must have a parent. Surprisingly `DirEntry` doesn't + // give us access without unwrap + let mut file = NamedTempFile::new_in(input_path.parent().unwrap())?; + file.write_all(formatted.as_bytes())?; + // "If a file exists at the target path, persist will atomically replace it." + file.persist(input_path).map_err(|error| error.error)?; + } + + if stability_check { + let reformatted = match format_module(formatted, PyFormatOptions::default()) { + Ok(reformatted) => reformatted, + Err(err @ (FormatModuleError::LexError(_) | FormatModuleError::ParseError(_))) => { + return Err(CheckFileError::SyntaxErrorInOutput { + formatted: formatted.to_string(), + error: err, + }); + } + Err(FormatModuleError::FormatError(err)) => { + return Err(CheckFileError::FormatError(err)); + } + Err(FormatModuleError::PrintError(err)) => { + return Err(CheckFileError::PrintError(err)); + } + }; + + if reformatted.as_code() != formatted { + return Err(CheckFileError::Unstable { + formatted: formatted.to_string(), + reformatted: reformatted.into_code(), + }); + } + } + + Ok(Statistics::from_versions(&content, formatted)) +} diff --git a/crates/ruff_dev/src/main.rs b/crates/ruff_dev/src/main.rs index 243fceb34b..c981d8c8a2 100644 --- a/crates/ruff_dev/src/main.rs +++ b/crates/ruff_dev/src/main.rs @@ -8,7 +8,7 @@ use ruff::logging::{set_up_logging, LogLevel}; use ruff_cli::check; use std::process::ExitCode; -mod check_formatter_stability; +mod format_dev; mod generate_all; mod generate_cli_help; mod generate_docs; @@ -63,9 +63,15 @@ enum Command { #[clap(long)] repeat: usize, }, - /// Format a repository twice and ensure that it looks that the first and second formatting - /// look the same. Same arguments as `ruff check` - CheckFormatterStability(check_formatter_stability::Args), + /// Several utils related to the formatter which can be run on one or more repositories. The + /// selected set of files in a repository is the same as for `ruff check`. + /// + /// * Check formatter stability: Format a repository twice and ensure that it looks that the + /// first and second formatting look the same. + /// * Format: Format the files in a repository to be able to check them with `git diff` + /// * Statistics: The subcommand the Jaccard index between the (assumed to be black formatted) + /// input and the ruff formatted output + FormatDev(format_dev::Args), } fn main() -> Result { @@ -93,8 +99,8 @@ fn main() -> Result { check(args.clone(), log_level)?; } } - Command::CheckFormatterStability(args) => { - let exit_code = check_formatter_stability::main(&args)?; + Command::FormatDev(args) => { + let exit_code = format_dev::main(&args)?; return Ok(exit_code); } } diff --git a/crates/ruff_python_formatter/Cargo.toml b/crates/ruff_python_formatter/Cargo.toml index a096918fa3..c27d92a907 100644 --- a/crates/ruff_python_formatter/Cargo.toml +++ b/crates/ruff_python_formatter/Cargo.toml @@ -27,6 +27,7 @@ rustc-hash = { workspace = true } rustpython-parser = { workspace = true } serde = { workspace = true, optional = true } smallvec = { workspace = true } +thiserror = { workspace = true } [dev-dependencies] ruff_formatter = { path = "../ruff_formatter", features = ["serde"]} diff --git a/crates/ruff_python_formatter/src/lib.rs b/crates/ruff_python_formatter/src/lib.rs index 4932ca5da0..dcdf449795 100644 --- a/crates/ruff_python_formatter/src/lib.rs +++ b/crates/ruff_python_formatter/src/lib.rs @@ -3,17 +3,23 @@ use crate::comments::{ }; use crate::context::PyFormatContext; pub use crate::options::{MagicTrailingComma, PyFormatOptions, QuoteStyle}; -use anyhow::{anyhow, Context, Result}; -use ruff_formatter::prelude::*; -use ruff_formatter::{format, write}; +use ruff_formatter::format_element::tag; +use ruff_formatter::prelude::{ + dynamic_text, source_position, source_text_slice, text, ContainsNewlines, Formatter, Tag, +}; +use ruff_formatter::{ + format, normalize_newlines, write, Buffer, Format, FormatElement, FormatError, FormatResult, + PrintError, +}; use ruff_formatter::{Formatted, Printed, SourceCode}; use ruff_python_ast::node::{AnyNodeRef, AstNode, NodeKind}; use ruff_python_ast::source_code::{CommentRanges, CommentRangesBuilder, Locator}; use ruff_text_size::{TextLen, TextRange}; use rustpython_parser::ast::{Mod, Ranged}; -use rustpython_parser::lexer::lex; -use rustpython_parser::{parse_tokens, Mode}; +use rustpython_parser::lexer::{lex, LexicalError}; +use rustpython_parser::{parse_tokens, Mode, ParseError}; use std::borrow::Cow; +use thiserror::Error; pub(crate) mod builders; pub mod cli; @@ -84,16 +90,40 @@ where } } -pub fn format_module(contents: &str, options: PyFormatOptions) -> Result { +#[derive(Error, Debug)] +pub enum FormatModuleError { + #[error("source contains syntax errors (lexer error): {0:?}")] + LexError(LexicalError), + #[error("source contains syntax errors (parser error): {0:?}")] + ParseError(ParseError), + #[error(transparent)] + FormatError(#[from] FormatError), + #[error(transparent)] + PrintError(#[from] PrintError), +} + +impl From for FormatModuleError { + fn from(value: LexicalError) -> Self { + Self::LexError(value) + } +} + +impl From for FormatModuleError { + fn from(value: ParseError) -> Self { + Self::ParseError(value) + } +} + +pub fn format_module( + contents: &str, + options: PyFormatOptions, +) -> Result { // Tokenize once let mut tokens = Vec::new(); let mut comment_ranges = CommentRangesBuilder::default(); for result in lex(contents, Mode::Module) { - let (token, range) = match result { - Ok((token, range)) => (token, range), - Err(err) => return Err(anyhow!("Source contains syntax errors {err:?}")), - }; + let (token, range) = result?; comment_ranges.visit_token(&token, range); tokens.push(Ok((token, range))); @@ -102,14 +132,11 @@ pub fn format_module(contents: &str, options: PyFormatOptions) -> Result") - .with_context(|| "Syntax error in input")?; + let python_ast = parse_tokens(tokens, Mode::Module, "")?; let formatted = format_node(&python_ast, &comment_ranges, contents, options)?; - formatted - .print() - .with_context(|| "Failed to print the formatter IR") + Ok(formatted.print()?) } pub fn format_node<'a>(