diff --git a/resources/test/package/pyproject.toml b/resources/test/package/pyproject.toml new file mode 100644 index 0000000000..4335ce71c5 --- /dev/null +++ b/resources/test/package/pyproject.toml @@ -0,0 +1,2 @@ +[tool.ruff] +src = ["."] diff --git a/resources/test/project/src/__init__.py b/resources/test/package/src/package/__init__.py similarity index 100% rename from resources/test/project/src/__init__.py rename to resources/test/package/src/package/__init__.py diff --git a/resources/test/package/src/package/app.py b/resources/test/package/src/package/app.py new file mode 100644 index 0000000000..7696ab0a6f --- /dev/null +++ b/resources/test/package/src/package/app.py @@ -0,0 +1,4 @@ +from package.core import method + +if __name__ == "__main__": + method() diff --git a/resources/test/package/src/package/core.py b/resources/test/package/src/package/core.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/resources/test/project/project/__init__.py b/resources/test/project/project/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/resources/test/project/src/file.py b/resources/test/project/project/file.py similarity index 100% rename from resources/test/project/src/file.py rename to resources/test/project/project/file.py diff --git a/resources/test/project/src/import_file.py b/resources/test/project/project/import_file.py similarity index 100% rename from resources/test/project/src/import_file.py rename to resources/test/project/project/import_file.py diff --git a/src/check_imports.rs b/src/check_imports.rs index a0906ea191..363250ee3c 100644 --- a/src/check_imports.rs +++ b/src/check_imports.rs @@ -17,11 +17,14 @@ fn check_import_blocks( locator: &SourceCodeLocator, settings: &Settings, autofix: flags::Autofix, + package: Option<&Path>, ) -> Vec { let mut checks = vec![]; for block in tracker.into_iter() { if !block.imports.is_empty() { - if let Some(check) = isort::plugins::check_imports(&block, locator, settings, autofix) { + if let Some(check) = + isort::plugins::check_imports(&block, locator, settings, autofix, package) + { checks.push(check); } } @@ -36,10 +39,11 @@ pub fn check_imports( settings: &Settings, autofix: flags::Autofix, path: &Path, + package: Option<&Path>, ) -> Vec { let mut tracker = ImportTracker::new(locator, directives, path); for stmt in python_ast { tracker.visit_stmt(stmt); } - check_import_blocks(tracker, locator, settings, autofix) + check_import_blocks(tracker, locator, settings, autofix, package) } diff --git a/src/commands.rs b/src/commands.rs index ae2516a956..0c3d1be16a 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -17,10 +17,10 @@ use crate::cli::Overrides; use crate::iterators::par_iter; use crate::linter::{add_noqa_to_path, autoformat_path, lint_path, lint_stdin, Diagnostics}; use crate::message::Message; -use crate::resolver; use crate::resolver::{FileDiscovery, PyprojectDiscovery}; use crate::settings::flags; use crate::settings::types::SerializationFormat; +use crate::{packages, resolver}; /// Run the linter over a collection of files. pub fn run( @@ -31,21 +31,34 @@ pub fn run( cache: flags::Cache, autofix: fixer::Mode, ) -> Result { - // Collect all the files to check. + // Collect all the Python files to check. let start = Instant::now(); let (paths, resolver) = resolver::python_files_in_path(files, pyproject_strategy, file_strategy, overrides)?; let duration = start.elapsed(); debug!("Identified files to lint in: {:?}", duration); + // Discover the package root for each Python file. + let package_roots = packages::detect_package_roots( + &paths + .iter() + .flatten() + .map(ignore::DirEntry::path) + .collect::>(), + ); + let start = Instant::now(); let mut diagnostics: Diagnostics = par_iter(&paths) .map(|entry| { match entry { Ok(entry) => { let path = entry.path(); + let package = path + .parent() + .and_then(|parent| package_roots.get(parent)) + .and_then(|package| *package); let settings = resolver.resolve(path, pyproject_strategy); - lint_path(path, settings, cache, autofix) + lint_path(path, package, settings, cache, autofix) .map_err(|e| (Some(path.to_owned()), e.to_string())) } Err(e) => Err(( diff --git a/src/isort/categorize.rs b/src/isort/categorize.rs index d246579fba..627b6df42f 100644 --- a/src/isort/categorize.rs +++ b/src/isort/categorize.rs @@ -1,6 +1,8 @@ use std::collections::BTreeSet; use std::fs; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; + +use log::debug; use crate::python::sys::KNOWN_STANDARD_LIBRARY; @@ -13,45 +15,72 @@ pub enum ImportType { LocalFolder, } +#[derive(Debug)] +enum Reason<'a> { + NonZeroLevel, + KnownFirstParty, + KnownThirdParty, + ExtraStandardLibrary, + Future, + KnownStandardLibrary, + SamePackage, + SourceMatch(&'a Path), + NoMatch, +} + pub fn categorize( module_base: &str, level: Option<&usize>, src: &[PathBuf], + package: Option<&Path>, known_first_party: &BTreeSet, known_third_party: &BTreeSet, extra_standard_library: &BTreeSet, ) -> ImportType { - if level.map_or(false, |level| *level > 0) { - ImportType::LocalFolder - } else if known_first_party.contains(module_base) { - ImportType::FirstParty - } else if known_third_party.contains(module_base) { - ImportType::ThirdParty - } else if extra_standard_library.contains(module_base) { - ImportType::StandardLibrary - } else if module_base == "__future__" { - ImportType::Future - } else if KNOWN_STANDARD_LIBRARY.contains(module_base) { - ImportType::StandardLibrary - } else if find_local(src, module_base) { - ImportType::FirstParty - } else { - ImportType::ThirdParty - } + let (import_type, reason) = { + if level.map_or(false, |level| *level > 0) { + (ImportType::LocalFolder, Reason::NonZeroLevel) + } else if known_first_party.contains(module_base) { + (ImportType::FirstParty, Reason::KnownFirstParty) + } else if known_third_party.contains(module_base) { + (ImportType::ThirdParty, Reason::KnownThirdParty) + } else if extra_standard_library.contains(module_base) { + (ImportType::StandardLibrary, Reason::ExtraStandardLibrary) + } else if module_base == "__future__" { + (ImportType::Future, Reason::Future) + } else if KNOWN_STANDARD_LIBRARY.contains(module_base) { + (ImportType::StandardLibrary, Reason::KnownStandardLibrary) + } else if same_package(package, module_base) { + (ImportType::FirstParty, Reason::SamePackage) + } else if let Some(src) = match_sources(src, module_base) { + (ImportType::FirstParty, Reason::SourceMatch(src)) + } else { + (ImportType::ThirdParty, Reason::NoMatch) + } + }; + debug!( + "Categorized '{}' as {:?} ({:?})", + module_base, import_type, reason + ); + import_type } -fn find_local(paths: &[PathBuf], base: &str) -> bool { +fn same_package(package: Option<&Path>, module_base: &str) -> bool { + package.map_or(false, |package| package.ends_with(module_base)) +} + +fn match_sources<'a>(paths: &'a [PathBuf], base: &str) -> Option<&'a Path> { for path in paths { if let Ok(metadata) = fs::metadata(path.join(base)) { if metadata.is_dir() { - return true; + return Some(path); } } if let Ok(metadata) = fs::metadata(path.join(format!("{base}.py"))) { if metadata.is_file() { - return true; + return Some(path); } } } - false + None } diff --git a/src/isort/mod.rs b/src/isort/mod.rs index 1e727b2723..f2cd8f27b7 100644 --- a/src/isort/mod.rs +++ b/src/isort/mod.rs @@ -1,6 +1,6 @@ use std::cmp::Reverse; use std::collections::{BTreeMap, BTreeSet}; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use itertools::Itertools; use ropey::RopeBuilder; @@ -294,6 +294,7 @@ fn normalize_imports(imports: Vec, combine_as_imports: bool) -> fn categorize_imports<'a>( block: ImportBlock<'a>, src: &[PathBuf], + package: Option<&Path>, known_first_party: &BTreeSet, known_third_party: &BTreeSet, extra_standard_library: &BTreeSet, @@ -305,6 +306,7 @@ fn categorize_imports<'a>( &alias.module_base(), None, src, + package, known_first_party, known_third_party, extra_standard_library, @@ -321,6 +323,7 @@ fn categorize_imports<'a>( &import_from.module_base(), import_from.level, src, + package, known_first_party, known_third_party, extra_standard_library, @@ -337,6 +340,7 @@ fn categorize_imports<'a>( &import_from.module_base(), import_from.level, src, + package, known_first_party, known_third_party, extra_standard_library, @@ -353,6 +357,7 @@ fn categorize_imports<'a>( &import_from.module_base(), import_from.level, src, + package, known_first_party, known_third_party, extra_standard_library, @@ -470,6 +475,7 @@ pub fn format_imports( comments: Vec, line_length: usize, src: &[PathBuf], + package: Option<&Path>, known_first_party: &BTreeSet, known_third_party: &BTreeSet, extra_standard_library: &BTreeSet, @@ -486,6 +492,7 @@ pub fn format_imports( let block_by_type = categorize_imports( block, src, + package, known_first_party, known_third_party, extra_standard_library, diff --git a/src/isort/plugins.rs b/src/isort/plugins.rs index acc0d3637d..2d9ddf6a36 100644 --- a/src/isort/plugins.rs +++ b/src/isort/plugins.rs @@ -1,3 +1,5 @@ +use std::path::Path; + use rustpython_ast::{Location, Stmt}; use textwrap::{dedent, indent}; @@ -36,6 +38,7 @@ pub fn check_imports( locator: &SourceCodeLocator, settings: &Settings, autofix: flags::Autofix, + package: Option<&Path>, ) -> Option { let indentation = locator.slice_source_code_range(&extract_indentation_range(&block.imports)); let indentation = leading_space(&indentation); @@ -71,6 +74,7 @@ pub fn check_imports( comments, settings.line_length - indentation.len(), &settings.src, + package, &settings.isort.known_first_party, &settings.isort.known_third_party, &settings.isort.extra_standard_library, diff --git a/src/lib.rs b/src/lib.rs index 33018ba41d..7627e78dd9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -68,6 +68,7 @@ pub mod logging; pub mod mccabe; pub mod message; mod noqa; +mod packages; mod pandas_vet; pub mod pep8_naming; pub mod printer; @@ -123,6 +124,7 @@ pub fn check(path: &Path, contents: &str, autofix: bool) -> Result> { // Generate checks. let checks = check_path( path, + packages::detect_package_root(path), contents, tokens, &locator, diff --git a/src/linter.rs b/src/linter.rs index cad4638e4d..fbfa457bcd 100644 --- a/src/linter.rs +++ b/src/linter.rs @@ -50,6 +50,7 @@ impl AddAssign for Diagnostics { #[allow(clippy::too_many_arguments)] pub(crate) fn check_path( path: &Path, + package: Option<&Path>, contents: &str, tokens: Vec, locator: &SourceCodeLocator, @@ -101,6 +102,7 @@ pub(crate) fn check_path( settings, autofix, path, + package, )); } } @@ -147,6 +149,7 @@ const MAX_ITERATIONS: usize = 100; /// Lint the source code at the given `Path`. pub fn lint_path( path: &Path, + package: Option<&Path>, settings: &Settings, cache: flags::Cache, autofix: fixer::Mode, @@ -163,7 +166,7 @@ pub fn lint_path( let contents = fs::read_file(path)?; // Lint the file. - let (contents, fixed, messages) = lint(contents, path, settings, autofix)?; + let (contents, fixed, messages) = lint(contents, path, package, settings, autofix)?; // Re-populate the cache. cache::set(path, &metadata, settings, autofix, &messages, cache); @@ -197,6 +200,7 @@ pub fn add_noqa_to_path(path: &Path, settings: &Settings) -> Result { // Generate checks, ignoring any existing `noqa` directives. let checks = check_path( path, + None, &contents, tokens, &locator, @@ -247,7 +251,7 @@ pub fn lint_stdin( let contents = stdin.to_string(); // Lint the file. - let (contents, fixed, messages) = lint(contents, path, settings, autofix)?; + let (contents, fixed, messages) = lint(contents, path, None, settings, autofix)?; // Write the fixed contents to stdout. if matches!(autofix, fixer::Mode::Apply) { @@ -260,6 +264,7 @@ pub fn lint_stdin( fn lint( mut contents: String, path: &Path, + package: Option<&Path>, settings: &Settings, autofix: fixer::Mode, ) -> Result<(String, usize, Vec)> { @@ -287,6 +292,7 @@ fn lint( // Generate checks. let checks = check_path( path, + package, &contents, tokens, &locator, @@ -343,6 +349,7 @@ pub fn test_path(path: &Path, settings: &Settings) -> Result> { ); check_path( path, + None, &contents, tokens, &locator, diff --git a/src/packages.rs b/src/packages.rs new file mode 100644 index 0000000000..86ad3e5bb8 --- /dev/null +++ b/src/packages.rs @@ -0,0 +1,157 @@ +//! Detect Python package roots and file associations. + +use std::path::Path; + +use rustc_hash::FxHashMap; + +// If we have a Python package layout like: +// - root/ +// - foo/ +// - __init__.py +// - bar.py +// - baz/ +// - __init__.py +// - qux.py +// +// Then today, if you run with defaults (`src = ["."]`) from `root`, we'll +// detect that `foo.bar`, `foo.baz`, and `foo.baz.qux` are first-party modules +// (since, if you're in `root`, you can see `foo`). +// +// However, we'd also like it to be the case that, even if you run this command +// from `foo`, we still consider `foo.baz.qux` to be first-party when linting +// `foo/bar.py`. More specifically, for each Python file, we should find the +// root of the current package. +// +// Thus, for each file, we iterate up its ancestors, returning the last +// directory containing an `__init__.py`. + +/// Return `true` if the directory at the given `Path` appears to be a Python +/// package. +pub fn is_package(path: &Path) -> bool { + path.join("__init__.py").is_file() +} + +/// Return the package root for the given Python file. +pub fn detect_package_root(path: &Path) -> Option<&Path> { + let mut current = None; + for parent in path.ancestors() { + if !is_package(parent) { + return current; + } + current = Some(parent); + } + current +} + +/// A wrapper around `is_package` to cache filesystem lookups. +fn is_package_with_cache<'a>( + path: &'a Path, + package_cache: &mut FxHashMap<&'a Path, bool>, +) -> bool { + *package_cache + .entry(path) + .or_insert_with(|| is_package(path)) +} + +/// A wrapper around `detect_package_root` to cache filesystem lookups. +fn detect_package_root_with_cache<'a>( + path: &'a Path, + package_cache: &mut FxHashMap<&'a Path, bool>, +) -> Option<&'a Path> { + let mut current = None; + for parent in path.ancestors() { + if !is_package_with_cache(parent, package_cache) { + return current; + } + current = Some(parent); + } + current +} + +/// Return a mapping from Python file to its package root. +pub fn detect_package_roots<'a>(files: &[&'a Path]) -> FxHashMap<&'a Path, Option<&'a Path>> { + // Pre-populate the module cache, since the list of files could (but isn't + // required to) contain some `__init__.py` files. + let mut package_cache: FxHashMap<&Path, bool> = FxHashMap::default(); + for file in files { + if file.ends_with("__init__.py") { + if let Some(parent) = file.parent() { + package_cache.insert(parent, true); + } + } + } + + // Search for the package root for each file. + let mut package_roots: FxHashMap<&Path, Option<&Path>> = FxHashMap::default(); + for file in files { + if let Some(package) = file.parent() { + if package_roots.contains_key(package) { + continue; + } + package_roots.insert( + package, + detect_package_root_with_cache(package, &mut package_cache), + ); + } + } + + package_roots +} + +#[cfg(test)] +mod tests { + use std::path::PathBuf; + + use crate::packages::detect_package_root; + + #[test] + fn package_detection() { + assert_eq!( + detect_package_root( + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("resources/test/package/src/package") + .as_path(), + ), + Some( + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("resources/test/package/src/package") + .as_path() + ) + ); + + assert_eq!( + detect_package_root( + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("resources/test/project/python_modules/core/core") + .as_path(), + ), + Some( + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("resources/test/project/python_modules/core/core") + .as_path() + ) + ); + + assert_eq!( + detect_package_root( + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("resources/test/project/examples/docs/docs/concepts") + .as_path(), + ), + Some( + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("resources/test/project/examples/docs/docs") + .as_path() + ) + ); + + assert_eq!( + detect_package_root( + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("setup.py") + .as_path(), + ), + None, + ); + } +} diff --git a/src/pandas_vet/mod.rs b/src/pandas_vet/mod.rs index db23c09ae1..9d2bb77ce9 100644 --- a/src/pandas_vet/mod.rs +++ b/src/pandas_vet/mod.rs @@ -28,6 +28,7 @@ mod tests { ); let mut checks = check_path( Path::new(""), + None, &contents, tokens, &locator, diff --git a/src/pyflakes/mod.rs b/src/pyflakes/mod.rs index 84d1036081..a05e2cd288 100644 --- a/src/pyflakes/mod.rs +++ b/src/pyflakes/mod.rs @@ -177,6 +177,7 @@ mod tests { ); let mut checks = check_path( Path::new(""), + None, &contents, tokens, &locator, diff --git a/src/resolver.rs b/src/resolver.rs index 2f67f67a96..cf68001622 100644 --- a/src/resolver.rs +++ b/src/resolver.rs @@ -62,11 +62,6 @@ pub struct Resolver { } impl Resolver { - /// Merge a `Resolver` into the current `Resolver`. - pub fn merge(&mut self, resolver: Resolver) { - self.settings.extend(resolver.settings); - } - /// Add a resolved `Settings` under a given `PathBuf` scope. pub fn add(&mut self, path: PathBuf, settings: Settings) { self.settings.insert(path, settings);