diff --git a/.gitignore b/.gitignore index 5bfce3449a..9a12d51c7b 100644 --- a/.gitignore +++ b/.gitignore @@ -10,7 +10,7 @@ schemastore # `maturin develop` and ecosystem_all_check.sh .venv* # Formatter debugging (crates/ruff_python_formatter/README.md) -scratch.py +scratch.* # Created by `perf` (CONTRIBUTING.md) perf.data perf.data.old diff --git a/Cargo.lock b/Cargo.lock index f3ce5f85ea..b8ac08f55f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -963,6 +963,12 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "indoc" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c785eefb63ebd0e33416dfcb8d6da0bf27ce752843a45632a67bf10d4d4b5c4" + [[package]] name = "inotify" version = "0.9.6" @@ -1987,6 +1993,7 @@ dependencies = [ "clap", "ignore", "indicatif", + "indoc", "itertools", "libcst", "log", @@ -2004,11 +2011,13 @@ dependencies = [ "rustpython-format", "rustpython-parser", "schemars", + "serde", "serde_json", "similar", "strum", "strum_macros", "tempfile", + "toml", ] [[package]] @@ -2783,9 +2792,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "toml" -version = "0.7.5" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ebafdf5ad1220cb59e7d17cf4d2c72015297b75b19a10472f99b89225089240" +checksum = "c17e963a819c331dcacd7ab957d80bc2b9a9c1e71c804826d2f283dd65306542" dependencies = [ "serde", "serde_spanned", @@ -2804,9 +2813,9 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.19.11" +version = "0.19.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "266f016b7f039eec8a1a80dfe6156b633d208b9fccca5e4db1d6775b0c4e34a7" +checksum = "5f8751d9c1b03c6500c387e96f81f815a4f8e72d142d2d4a9ffa6fedd51ddee7" dependencies = [ "indexmap 2.0.0", "serde", @@ -3339,9 +3348,9 @@ checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" [[package]] name = "winnow" -version = "0.4.7" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca0ace3845f0d96209f0375e6d367e3eb87eb65d27d445bdc9f1843a26f39448" +checksum = "81fac9742fd1ad1bd9643b991319f72dd031016d44b77039a26977eb667141e7" dependencies = [ "memchr", ] diff --git a/crates/ruff_dev/Cargo.toml b/crates/ruff_dev/Cargo.toml index 36c9c42d59..f356a44b61 100644 --- a/crates/ruff_dev/Cargo.toml +++ b/crates/ruff_dev/Cargo.toml @@ -28,13 +28,18 @@ libcst = { workspace = true } log = { workspace = true } once_cell = { workspace = true } pretty_assertions = { version = "1.3.0" } -regex = { workspace = true } rayon = "1.7.0" +regex = { workspace = true } rustpython-format = { workspace = true } rustpython-parser = { workspace = true } schemars = { workspace = true } +serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } similar = { workspace = true } strum = { workspace = true } strum_macros = { workspace = true } tempfile = "3.6.0" +toml = { workspace = true, features = ["parse"] } + +[dev-dependencies] +indoc = "2.0.3" diff --git a/crates/ruff_dev/src/format_dev.rs b/crates/ruff_dev/src/format_dev.rs index 9daf4554c5..3c02a12fe4 100644 --- a/crates/ruff_dev/src/format_dev.rs +++ b/crates/ruff_dev/src/format_dev.rs @@ -1,15 +1,18 @@ -use anyhow::{bail, Context}; +use anyhow::{bail, format_err, Context}; use clap::{CommandFactory, FromArgMatches}; use ignore::DirEntry; use indicatif::ProgressBar; - +use log::debug; use rayon::iter::{IntoParallelIterator, ParallelIterator}; use ruff::resolver::python_files_in_path; use ruff::settings::types::{FilePattern, FilePatternSet}; use ruff_cli::args::CheckArgs; use ruff_cli::resolve::resolve; -use ruff_formatter::{FormatError, PrintError}; -use ruff_python_formatter::{format_module, FormatModuleError, PyFormatOptions}; +use ruff_formatter::{FormatError, LineWidth, PrintError}; +use ruff_python_formatter::{ + format_module, FormatModuleError, MagicTrailingComma, PyFormatOptions, +}; +use serde::Deserialize; use similar::{ChangeTag, TextDiff}; use std::fmt::{Display, Formatter}; use std::fs::File; @@ -225,6 +228,7 @@ enum Message { fn format_dev_multi_project(args: &Args) -> bool { let mut total_errors = 0; let mut total_files = 0; + let mut num_projects: usize = 0; let start = Instant::now(); rayon::scope(|scope| { @@ -233,6 +237,7 @@ fn format_dev_multi_project(args: &Args) -> bool { // Workers, to check is subdirectory in parallel for base_dir in &args.files { for dir in base_dir.read_dir().unwrap() { + num_projects += 1; let path = dir.unwrap().path().clone(); let sender = sender.clone(); @@ -255,36 +260,39 @@ fn format_dev_multi_project(args: &Args) -> bool { } // Main thread, writing to stdout + #[allow(clippy::print_stdout)] scope.spawn(|_| { let mut error_file = args.error_file.as_ref().map(|error_file| { BufWriter::new(File::create(error_file).expect("Couldn't open error file")) }); - let bar = ProgressBar::new(args.files.len() as u64); + let bar = ProgressBar::new(num_projects as u64); for message in receiver { match message { Message::Start { path } => { - bar.println(path.display().to_string()); + bar.suspend(|| println!("Starting {}", path.display())); } Message::Finished { path, result } => { total_errors += result.error_count(); total_files += result.file_count; - bar.println(format!( - "Finished {} with {} files (similarity index {:.3}) in {:.2}s", - path.display(), - result.file_count, - result.statistics.similarity_index(), - result.duration.as_secs_f32(), - )); - bar.println(result.display(args.format).to_string().trim_end()); + bar.suspend(|| { + println!( + "Finished {} with {} files (similarity index {:.3}) in {:.2}s", + path.display(), + result.file_count, + result.statistics.similarity_index(), + result.duration.as_secs_f32(), + ); + }); + bar.suspend(|| print!("{}", result.display(args.format))); if let Some(error_file) = &mut error_file { write!(error_file, "{}", result.display(args.format)).unwrap(); } bar.inc(1); } Message::Failed { path, error } => { - bar.println(format!("Failed {}: {}", path.display(), error)); + bar.suspend(|| println!("Failed {}: {}", path.display(), error)); bar.inc(1); } } @@ -314,6 +322,13 @@ fn format_dev_project( ) -> anyhow::Result { let start = Instant::now(); + // TODO(konstin): The assumptions between this script (one repo) and ruff (pass in a bunch of + // files) mismatch. + let options = BlackOptions::from_file(&files[0])?.to_py_format_options(); + debug!("Options for {}: {options:?}", files[0].display()); + + // TODO(konstin): black excludes + // Find files to check (or in this case, format twice). Adapted from ruff_cli // First argument is ignored let paths = ruff_check_paths(files)?; @@ -335,7 +350,9 @@ fn format_dev_project( let file = dir_entry.path().to_path_buf(); // Handle panics (mostly in `debug_assert!`) - let result = match catch_unwind(|| format_dev_file(&file, stability_check, write)) { + let result = match catch_unwind(|| { + format_dev_file(&file, stability_check, write, options.clone()) + }) { Ok(result) => result, Err(panic) => { if let Some(message) = panic.downcast_ref::() { @@ -600,11 +617,12 @@ fn format_dev_file( input_path: &Path, stability_check: bool, write: bool, + options: PyFormatOptions, ) -> Result { let content = fs::read_to_string(input_path)?; #[cfg(not(debug_assertions))] let start = Instant::now(); - let printed = match format_module(&content, PyFormatOptions::default()) { + let printed = match format_module(&content, options.clone()) { Ok(printed) => printed, Err(err @ (FormatModuleError::LexError(_) | FormatModuleError::ParseError(_))) => { return Err(CheckFileError::SyntaxErrorInInput(err)); @@ -631,7 +649,7 @@ fn format_dev_file( } if stability_check { - let reformatted = match format_module(formatted, PyFormatOptions::default()) { + let reformatted = match format_module(formatted, options) { Ok(reformatted) => reformatted, Err(err @ (FormatModuleError::LexError(_) | FormatModuleError::ParseError(_))) => { return Err(CheckFileError::SyntaxErrorInOutput { @@ -662,3 +680,131 @@ fn format_dev_file( Ok(Statistics::from_versions(&content, formatted)) } + +#[derive(Deserialize, Default)] +struct PyprojectToml { + tool: Option, +} + +#[derive(Deserialize, Default)] +struct PyprojectTomlTool { + black: Option, +} + +#[derive(Deserialize, Debug)] +#[serde(default)] +struct BlackOptions { + // Black actually allows both snake case and kebab case + #[serde(alias = "line-length")] + line_length: u16, + #[serde(alias = "skip-magic-trailing-comma")] + skip_magic_trailing_comma: bool, + #[allow(unused)] + #[serde(alias = "force-exclude")] + force_exclude: Option, +} + +impl Default for BlackOptions { + fn default() -> Self { + Self { + line_length: 88, + skip_magic_trailing_comma: false, + force_exclude: None, + } + } +} + +impl BlackOptions { + /// TODO(konstin): For the real version, fix printing of error chains and remove the path + /// argument + fn from_toml(toml: &str, path: &Path) -> anyhow::Result { + let pyproject_toml: PyprojectToml = toml::from_str(toml).map_err(|e| { + format_err!( + "Not a valid pyproject.toml toml file at {}: {e}", + path.display() + ) + })?; + let black_options = pyproject_toml + .tool + .unwrap_or_default() + .black + .unwrap_or_default(); + debug!( + "Found {}, setting black options: {:?}", + path.display(), + &black_options + ); + Ok(black_options) + } + + fn from_file(repo: &Path) -> anyhow::Result { + let path = repo.join("pyproject.toml"); + if !path.is_file() { + debug!( + "No pyproject.toml at {}, using black option defaults", + path.display() + ); + return Ok(Self::default()); + } + Self::from_toml(&fs::read_to_string(&path)?, repo) + } + + fn to_py_format_options(&self) -> PyFormatOptions { + let mut options = PyFormatOptions::default(); + options + .with_line_width( + LineWidth::try_from(self.line_length).expect("Invalid line length limit"), + ) + .with_magic_trailing_comma(if self.skip_magic_trailing_comma { + MagicTrailingComma::Ignore + } else { + MagicTrailingComma::Respect + }); + options + } +} + +#[cfg(test)] +mod tests { + use crate::format_dev::BlackOptions; + use indoc::indoc; + use ruff_formatter::{FormatOptions, LineWidth}; + use ruff_python_formatter::MagicTrailingComma; + use std::path::Path; + + #[test] + fn test_transformers() { + let toml = indoc! {" + [tool.black] + line-length = 119 + target-version = ['py37'] + "}; + let options = BlackOptions::from_toml(toml, Path::new("pyproject.toml")) + .unwrap() + .to_py_format_options(); + assert_eq!(options.line_width(), LineWidth::try_from(119).unwrap()); + assert!(matches!( + options.magic_trailing_comma(), + MagicTrailingComma::Respect + )); + } + + #[test] + fn test_typeshed() { + let toml = indoc! {r#" + [tool.black] + line_length = 130 + target_version = ["py310"] + skip_magic_trailing_comma = true + force-exclude = ".*_pb2.pyi" + "#}; + let options = BlackOptions::from_toml(toml, Path::new("pyproject.toml")) + .unwrap() + .to_py_format_options(); + assert_eq!(options.line_width(), LineWidth::try_from(130).unwrap()); + assert!(matches!( + options.magic_trailing_comma(), + MagicTrailingComma::Ignore + )); + } +} diff --git a/crates/ruff_python_formatter/README.md b/crates/ruff_python_formatter/README.md index 2484326805..6ef69b0dc7 100644 --- a/crates/ruff_python_formatter/README.md +++ b/crates/ruff_python_formatter/README.md @@ -271,7 +271,7 @@ It is also possible large number of repositories using ruff. This dataset is lar only do this occasionally: ```shell -curl https://raw.githubusercontent.com/akx/ruff-usage-aggregate/master/data/known-github-tomls.jsonl > github_search.jsonl +curl https://raw.githubusercontent.com/akx/ruff-usage-aggregate/master/data/known-github-tomls-clean.jsonl> github_search.jsonl python scripts/check_ecosystem.py --checkouts target/checkouts --projects github_search.jsonl -v $(which true) $(which true) cargo run --bin ruff_dev -- format-dev --stability-check --multi-project target/checkouts ```