diff --git a/Cargo.lock b/Cargo.lock index 75df37a918..f8fffa8662 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -894,6 +894,24 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "ignore" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "713f1b139373f96a2e0ce3ac931cd01ee973c3c5dd7c40c0c2efe96ad2b6751d" +dependencies = [ + "crossbeam-utils", + "globset", + "lazy_static", + "log", + "memchr", + "regex", + "same-file", + "thread_local", + "walkdir", + "winapi-util", +] + [[package]] name = "indexmap" version = "1.9.2" @@ -1849,6 +1867,7 @@ dependencies = [ "getrandom 0.2.8", "glob", "globset", + "ignore", "insta", "itertools", "libcst", @@ -2304,6 +2323,15 @@ dependencies = [ "syn", ] +[[package]] +name = "thread_local" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180" +dependencies = [ + "once_cell", +] + [[package]] name = "time" version = "0.1.45" diff --git a/Cargo.toml b/Cargo.toml index 281fdd0d9a..b662a08616 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,7 @@ fern = { version = "0.6.1" } filetime = { version = "0.2.17" } glob = { version = "0.3.0" } globset = { version = "0.4.9" } +ignore = { version = "0.4.18" } itertools = { version = "0.10.5" } libcst = { git = "https://github.com/charliermarsh/LibCST", rev = "f2f0b7a487a8725d161fe8b3ed73a6758b21e177" } log = { version = "0.4.17" } diff --git a/resources/test/project/.gitignore b/resources/test/project/.gitignore new file mode 100644 index 0000000000..01873749f8 --- /dev/null +++ b/resources/test/project/.gitignore @@ -0,0 +1 @@ +examples/generated diff --git a/resources/test/project/README.md b/resources/test/project/README.md index a44660e4f8..df9d689a67 100644 --- a/resources/test/project/README.md +++ b/resources/test/project/README.md @@ -9,33 +9,37 @@ Running from the repo root should pick up and enforce the appropriate settings f ``` ∴ cargo run resources/test/project/ -Found 5 error(s). +Found 7 error(s). +resources/test/project/examples/.dotfiles/script.py:1:8: F401 `os` imported but unused +resources/test/project/examples/.dotfiles/script.py:5:5: F841 Local variable `x` is assigned to but never used resources/test/project/examples/docs/docs/file.py:1:1: I001 Import block is un-sorted or un-formatted resources/test/project/examples/docs/docs/file.py:8:5: F841 Local variable `x` is assigned to but never used resources/test/project/src/file.py:1:8: F401 `os` imported but unused resources/test/project/src/file.py:5:5: F841 Local variable `x` is assigned to but never used resources/test/project/src/import_file.py:1:1: I001 Import block is un-sorted or un-formatted -3 potentially fixable with the --fix option. +4 potentially fixable with the --fix option. ``` Running from the project directory itself should exhibit the same behavior: ``` -∴ cd resources/test/project/ && cargo run . -Found 5 error(s). +∴ (cd resources/test/project/ && cargo run .) +Found 7 error(s). +examples/.dotfiles/script.py:1:8: F401 `os` imported but unused +examples/.dotfiles/script.py:5:5: F841 Local variable `x` is assigned to but never used examples/docs/docs/file.py:1:1: I001 Import block is un-sorted or un-formatted examples/docs/docs/file.py:8:5: F841 Local variable `x` is assigned to but never used src/file.py:1:8: F401 `os` imported but unused src/file.py:5:5: F841 Local variable `x` is assigned to but never used src/import_file.py:1:1: I001 Import block is un-sorted or un-formatted -3 potentially fixable with the --fix option. +4 potentially fixable with the --fix option. ``` Running from the sub-package directory should exhibit the same behavior, but omit the top-level files: ``` -∴ cd resources/test/project/examples/docs && cargo run . +∴ (cd resources/test/project/examples/docs && cargo run .) Found 2 error(s). docs/file.py:1:1: I001 Import block is un-sorted or un-formatted docs/file.py:8:5: F841 Local variable `x` is assigned to but never used @@ -46,8 +50,10 @@ docs/file.py:8:5: F841 Local variable `x` is assigned to but never used file paths from the current working directory: ``` -∴ cargo run -- --config=resources/test/project/pyproject.toml resources/test/project/ -Found 9 error(s). +∴ (cargo run -- --config=resources/test/project/pyproject.toml resources/test/project/) +Found 11 error(s). +resources/test/project/examples/.dotfiles/script.py:1:8: F401 `os` imported but unused +resources/test/project/examples/.dotfiles/script.py:5:5: F841 Local variable `x` is assigned to but never used resources/test/project/examples/docs/docs/concepts/file.py:1:8: F401 `os` imported but unused resources/test/project/examples/docs/docs/concepts/file.py:5:5: F841 Local variable `x` is assigned to but never used resources/test/project/examples/docs/docs/file.py:1:8: F401 `os` imported but unused @@ -57,15 +63,16 @@ resources/test/project/examples/docs/docs/file.py:8:5: F841 Local variable `x` i resources/test/project/src/file.py:1:8: F401 `os` imported but unused resources/test/project/src/file.py:5:5: F841 Local variable `x` is assigned to but never used resources/test/project/src/import_file.py:1:1: I001 Import block is un-sorted or un-formatted -6 potentially fixable with the --fix option. +7 potentially fixable with the --fix option. ``` Running from a parent directory should this "ignore" the `exclude` (hence, `concepts/file.py` gets included in the output): ``` -∴ cd resources/test/project/examples && cargo run -- --config=docs/pyproject.toml . -Found 3 error(s). +∴ (cd resources/test/project/examples && cargo run -- --config=docs/pyproject.toml .) +Found 4 error(s). +.dotfiles/script.py:5:5: F841 Local variable `x` is assigned to but never used docs/docs/concepts/file.py:5:5: F841 Local variable `x` is assigned to but never used docs/docs/file.py:1:1: I001 Import block is un-sorted or un-formatted docs/docs/file.py:8:5: F841 Local variable `x` is assigned to but never used diff --git a/resources/test/project/examples/.dotfiles/script.py b/resources/test/project/examples/.dotfiles/script.py new file mode 100755 index 0000000000..4a49d9883f --- /dev/null +++ b/resources/test/project/examples/.dotfiles/script.py @@ -0,0 +1,5 @@ +import os + + +def f(): + x = 1 diff --git a/src/commands.rs b/src/commands.rs index 0340d6d245..f7ab914577 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -3,6 +3,7 @@ use std::path::{Path, PathBuf}; use std::time::Instant; use anyhow::{bail, Result}; +use ignore::Error; use itertools::Itertools; use log::{debug, error}; #[cfg(not(target_family = "wasm"))] @@ -30,7 +31,7 @@ pub fn run( ) -> Result { // Collect all the files to check. let start = Instant::now(); - let (paths, resolver) = resolver::resolve_python_files(files, strategy, overrides)?; + let (paths, resolver) = resolver::python_files_in_path(files, strategy, overrides)?; let duration = start.elapsed(); debug!("Identified files to lint in: {:?}", duration); @@ -45,7 +46,11 @@ pub fn run( .map_err(|e| (Some(path.to_owned()), e.to_string())) } Err(e) => Err(( - e.path().map(Path::to_owned), + if let Error::WithPath { path, .. } = e { + Some(path.clone()) + } else { + None + }, e.io_error() .map_or_else(|| e.to_string(), io::Error::to_string), )), @@ -111,7 +116,7 @@ pub fn run_stdin( pub fn add_noqa(files: &[PathBuf], strategy: &Strategy, overrides: &Overrides) -> Result { // Collect all the files to check. let start = Instant::now(); - let (paths, resolver) = resolver::resolve_python_files(files, strategy, overrides)?; + let (paths, resolver) = resolver::python_files_in_path(files, strategy, overrides)?; let duration = start.elapsed(); debug!("Identified files to lint in: {:?}", duration); @@ -141,7 +146,7 @@ pub fn add_noqa(files: &[PathBuf], strategy: &Strategy, overrides: &Overrides) - pub fn autoformat(files: &[PathBuf], strategy: &Strategy, overrides: &Overrides) -> Result { // Collect all the files to format. let start = Instant::now(); - let (paths, resolver) = resolver::resolve_python_files(files, strategy, overrides)?; + let (paths, resolver) = resolver::python_files_in_path(files, strategy, overrides)?; let duration = start.elapsed(); debug!("Identified files to lint in: {:?}", duration); @@ -170,7 +175,7 @@ pub fn autoformat(files: &[PathBuf], strategy: &Strategy, overrides: &Overrides) /// Print the user-facing configuration settings. pub fn show_settings(files: &[PathBuf], strategy: &Strategy, overrides: &Overrides) -> Result<()> { // Collect all files in the hierarchy. - let (paths, resolver) = resolver::resolve_python_files(files, strategy, overrides)?; + let (paths, resolver) = resolver::python_files_in_path(files, strategy, overrides)?; // Print the list of files. let Some(entry) = paths @@ -190,7 +195,7 @@ pub fn show_settings(files: &[PathBuf], strategy: &Strategy, overrides: &Overrid /// Show the list of files to be checked based on current settings. pub fn show_files(files: &[PathBuf], strategy: &Strategy, overrides: &Overrides) -> Result<()> { // Collect all files in the hierarchy. - let (paths, _resolver) = resolver::resolve_python_files(files, strategy, overrides)?; + let (paths, _resolver) = resolver::python_files_in_path(files, strategy, overrides)?; // Print the list of files. for entry in paths diff --git a/src/resolver.rs b/src/resolver.rs index 54323ba131..8abcd51087 100644 --- a/src/resolver.rs +++ b/src/resolver.rs @@ -3,12 +3,13 @@ use std::collections::BTreeMap; use std::path::{Path, PathBuf}; +use std::sync::RwLock; -use anyhow::{bail, Result}; +use anyhow::{anyhow, bail, Result}; +use ignore::{DirEntry, WalkBuilder, WalkState}; use log::debug; use path_absolutize::path_dedot; use rustc_hash::FxHashSet; -use walkdir::{DirEntry, WalkDir}; use crate::cli::Overrides; use crate::fs; @@ -169,35 +170,20 @@ fn is_python_file(path: &Path) -> bool { .map_or(false, |ext| ext == "py" || ext == "pyi") } -/// Find all Python (`.py` and `.pyi` files) in a set of `PathBuf`s. -pub fn resolve_python_files( +/// Find all Python (`.py` and `.pyi` files) in a set of paths. +pub fn python_files_in_path( paths: &[PathBuf], strategy: &Strategy, overrides: &Overrides, -) -> Result<(Vec>, Resolver)> { - let mut files = Vec::new(); - let mut resolver = Resolver::default(); - for path in paths { - let (files_in_path, file_resolver) = python_files_in_path(path, strategy, overrides)?; - files.extend(files_in_path); - resolver.merge(file_resolver); - } - Ok((files, resolver)) -} - -/// Find all Python (`.py` and `.pyi` files) in a given `Path`. -fn python_files_in_path( - path: &Path, - strategy: &Strategy, - overrides: &Overrides, -) -> Result<(Vec>, Resolver)> { - let path = fs::normalize_path(path); +) -> Result<(Vec>, Resolver)> { + // Normalize every path (e.g., convert from relative to absolute). + let paths: Vec = paths.iter().map(|path| fs::normalize_path(path)).collect(); // Search for `pyproject.toml` files in all parent directories. let mut resolver = Resolver::default(); - for path in path.ancestors() { - if path.is_dir() { - let pyproject = path.join("pyproject.toml"); + for path in &paths { + for ancestor in path.ancestors() { + let pyproject = ancestor.join("pyproject.toml"); if pyproject.is_file() { let (root, settings) = resolve_scoped_settings(&pyproject, &Relativity::Parent, Some(overrides))?; @@ -206,57 +192,87 @@ fn python_files_in_path( } } - // Collect all Python files. - let files: Vec> = WalkDir::new(path) - .into_iter() - .filter_entry(|entry| { - // Search for the `pyproject.toml` file in this directory, before we visit any - // of its contents. - if entry.file_type().is_dir() { - let pyproject = entry.path().join("pyproject.toml"); - if pyproject.is_file() { - // TODO(charlie): Return a `Result` here. - let (root, settings) = - resolve_scoped_settings(&pyproject, &Relativity::Parent, Some(overrides)) - .unwrap(); - resolver.add(root, settings); - } - } + // Create the `WalkBuilder`. + let mut builder = WalkBuilder::new( + paths + .get(0) + .ok_or_else(|| anyhow!("Expected at least one path to search for Python files"))?, + ); + for path in &paths[1..] { + builder.add(path); + } + let walker = builder.hidden(false).build_parallel(); - let path = entry.path(); - let settings = resolver.resolve(path, strategy); - match fs::extract_path_names(path) { - Ok((file_path, file_basename)) => { - if !settings.exclude.is_empty() - && is_excluded(file_path, file_basename, &settings.exclude) - { - debug!("Ignored path via `exclude`: {:?}", path); - false - } else if !settings.extend_exclude.is_empty() - && is_excluded(file_path, file_basename, &settings.extend_exclude) - { - debug!("Ignored path via `extend-exclude`: {:?}", path); - false - } else { - true + // Run the `WalkParallel` to collect all Python files. + let error: std::sync::Mutex> = std::sync::Mutex::new(Ok(())); + let resolver: RwLock = RwLock::new(resolver); + let files: std::sync::Mutex>> = + std::sync::Mutex::new(vec![]); + walker.run(|| { + Box::new(|result| { + if let Ok(entry) = &result { + // Search for the `pyproject.toml` file in this directory, before we visit any + // of its contents. + if entry + .file_type() + .map_or(false, |file_type| file_type.is_dir()) + { + let pyproject = entry.path().join("pyproject.toml"); + if pyproject.is_file() { + match resolve_scoped_settings( + &pyproject, + &Relativity::Parent, + Some(overrides), + ) { + Ok((root, settings)) => resolver.write().unwrap().add(root, settings), + Err(err) => { + *error.lock().unwrap() = Err(err); + return WalkState::Quit; + } + } } } - Err(e) => { - debug!("Ignored path due to error in parsing: {:?}: {}", path, e); - true + + let path = entry.path(); + let resolver = resolver.read().unwrap(); + let settings = resolver.resolve(path, strategy); + match fs::extract_path_names(path) { + Ok((file_path, file_basename)) => { + if !settings.exclude.is_empty() + && is_excluded(file_path, file_basename, &settings.exclude) + { + debug!("Ignored path via `exclude`: {:?}", path); + return WalkState::Skip; + } else if !settings.extend_exclude.is_empty() + && is_excluded(file_path, file_basename, &settings.extend_exclude) + { + debug!("Ignored path via `extend-exclude`: {:?}", path); + return WalkState::Skip; + } + } + Err(e) => { + debug!("Ignored path due to error in parsing: {:?}: {}", path, e); + return WalkState::Skip; + } } } - }) - .filter(|entry| { - entry.as_ref().map_or(true, |entry| { - (entry.depth() == 0 || is_python_file(entry.path())) - && !entry.file_type().is_dir() - && !(entry.file_type().is_symlink() && entry.path().is_dir()) - }) - }) - .collect::>(); - Ok((files, resolver)) + if result.as_ref().map_or(true, |entry| { + (entry.depth() == 0 || is_python_file(entry.path())) + && !entry + .file_type() + .map_or(false, |file_type| file_type.is_dir()) + }) { + files.lock().unwrap().push(result); + } + + WalkState::Continue + }) + }); + + error.into_inner().unwrap()?; + + Ok((files.into_inner().unwrap(), resolver.into_inner().unwrap())) } #[cfg(test)]