Auto-detect same-package imports (#1266)

This commit is contained in:
Charlie Marsh 2022-12-16 21:19:11 -05:00 committed by GitHub
parent 5f67ee93f7
commit ecf0dd05d6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 261 additions and 35 deletions

View File

@ -0,0 +1,2 @@
[tool.ruff]
src = ["."]

View File

@ -0,0 +1,4 @@
from package.core import method
if __name__ == "__main__":
method()

View File

@ -17,11 +17,14 @@ fn check_import_blocks(
locator: &SourceCodeLocator,
settings: &Settings,
autofix: flags::Autofix,
package: Option<&Path>,
) -> Vec<Check> {
let mut checks = vec![];
for block in tracker.into_iter() {
if !block.imports.is_empty() {
if let Some(check) = isort::plugins::check_imports(&block, locator, settings, autofix) {
if let Some(check) =
isort::plugins::check_imports(&block, locator, settings, autofix, package)
{
checks.push(check);
}
}
@ -36,10 +39,11 @@ pub fn check_imports(
settings: &Settings,
autofix: flags::Autofix,
path: &Path,
package: Option<&Path>,
) -> Vec<Check> {
let mut tracker = ImportTracker::new(locator, directives, path);
for stmt in python_ast {
tracker.visit_stmt(stmt);
}
check_import_blocks(tracker, locator, settings, autofix)
check_import_blocks(tracker, locator, settings, autofix, package)
}

View File

@ -17,10 +17,10 @@ use crate::cli::Overrides;
use crate::iterators::par_iter;
use crate::linter::{add_noqa_to_path, autoformat_path, lint_path, lint_stdin, Diagnostics};
use crate::message::Message;
use crate::resolver;
use crate::resolver::{FileDiscovery, PyprojectDiscovery};
use crate::settings::flags;
use crate::settings::types::SerializationFormat;
use crate::{packages, resolver};
/// Run the linter over a collection of files.
pub fn run(
@ -31,21 +31,34 @@ pub fn run(
cache: flags::Cache,
autofix: fixer::Mode,
) -> Result<Diagnostics> {
// Collect all the files to check.
// Collect all the Python files to check.
let start = Instant::now();
let (paths, resolver) =
resolver::python_files_in_path(files, pyproject_strategy, file_strategy, overrides)?;
let duration = start.elapsed();
debug!("Identified files to lint in: {:?}", duration);
// Discover the package root for each Python file.
let package_roots = packages::detect_package_roots(
&paths
.iter()
.flatten()
.map(ignore::DirEntry::path)
.collect::<Vec<_>>(),
);
let start = Instant::now();
let mut diagnostics: Diagnostics = par_iter(&paths)
.map(|entry| {
match entry {
Ok(entry) => {
let path = entry.path();
let package = path
.parent()
.and_then(|parent| package_roots.get(parent))
.and_then(|package| *package);
let settings = resolver.resolve(path, pyproject_strategy);
lint_path(path, settings, cache, autofix)
lint_path(path, package, settings, cache, autofix)
.map_err(|e| (Some(path.to_owned()), e.to_string()))
}
Err(e) => Err((

View File

@ -1,6 +1,8 @@
use std::collections::BTreeSet;
use std::fs;
use std::path::PathBuf;
use std::path::{Path, PathBuf};
use log::debug;
use crate::python::sys::KNOWN_STANDARD_LIBRARY;
@ -13,45 +15,72 @@ pub enum ImportType {
LocalFolder,
}
#[derive(Debug)]
enum Reason<'a> {
NonZeroLevel,
KnownFirstParty,
KnownThirdParty,
ExtraStandardLibrary,
Future,
KnownStandardLibrary,
SamePackage,
SourceMatch(&'a Path),
NoMatch,
}
pub fn categorize(
module_base: &str,
level: Option<&usize>,
src: &[PathBuf],
package: Option<&Path>,
known_first_party: &BTreeSet<String>,
known_third_party: &BTreeSet<String>,
extra_standard_library: &BTreeSet<String>,
) -> ImportType {
let (import_type, reason) = {
if level.map_or(false, |level| *level > 0) {
ImportType::LocalFolder
(ImportType::LocalFolder, Reason::NonZeroLevel)
} else if known_first_party.contains(module_base) {
ImportType::FirstParty
(ImportType::FirstParty, Reason::KnownFirstParty)
} else if known_third_party.contains(module_base) {
ImportType::ThirdParty
(ImportType::ThirdParty, Reason::KnownThirdParty)
} else if extra_standard_library.contains(module_base) {
ImportType::StandardLibrary
(ImportType::StandardLibrary, Reason::ExtraStandardLibrary)
} else if module_base == "__future__" {
ImportType::Future
(ImportType::Future, Reason::Future)
} else if KNOWN_STANDARD_LIBRARY.contains(module_base) {
ImportType::StandardLibrary
} else if find_local(src, module_base) {
ImportType::FirstParty
(ImportType::StandardLibrary, Reason::KnownStandardLibrary)
} else if same_package(package, module_base) {
(ImportType::FirstParty, Reason::SamePackage)
} else if let Some(src) = match_sources(src, module_base) {
(ImportType::FirstParty, Reason::SourceMatch(src))
} else {
ImportType::ThirdParty
(ImportType::ThirdParty, Reason::NoMatch)
}
};
debug!(
"Categorized '{}' as {:?} ({:?})",
module_base, import_type, reason
);
import_type
}
fn find_local(paths: &[PathBuf], base: &str) -> bool {
fn same_package(package: Option<&Path>, module_base: &str) -> bool {
package.map_or(false, |package| package.ends_with(module_base))
}
fn match_sources<'a>(paths: &'a [PathBuf], base: &str) -> Option<&'a Path> {
for path in paths {
if let Ok(metadata) = fs::metadata(path.join(base)) {
if metadata.is_dir() {
return true;
return Some(path);
}
}
if let Ok(metadata) = fs::metadata(path.join(format!("{base}.py"))) {
if metadata.is_file() {
return true;
return Some(path);
}
}
}
false
None
}

View File

@ -1,6 +1,6 @@
use std::cmp::Reverse;
use std::collections::{BTreeMap, BTreeSet};
use std::path::PathBuf;
use std::path::{Path, PathBuf};
use itertools::Itertools;
use ropey::RopeBuilder;
@ -294,6 +294,7 @@ fn normalize_imports(imports: Vec<AnnotatedImport>, combine_as_imports: bool) ->
fn categorize_imports<'a>(
block: ImportBlock<'a>,
src: &[PathBuf],
package: Option<&Path>,
known_first_party: &BTreeSet<String>,
known_third_party: &BTreeSet<String>,
extra_standard_library: &BTreeSet<String>,
@ -305,6 +306,7 @@ fn categorize_imports<'a>(
&alias.module_base(),
None,
src,
package,
known_first_party,
known_third_party,
extra_standard_library,
@ -321,6 +323,7 @@ fn categorize_imports<'a>(
&import_from.module_base(),
import_from.level,
src,
package,
known_first_party,
known_third_party,
extra_standard_library,
@ -337,6 +340,7 @@ fn categorize_imports<'a>(
&import_from.module_base(),
import_from.level,
src,
package,
known_first_party,
known_third_party,
extra_standard_library,
@ -353,6 +357,7 @@ fn categorize_imports<'a>(
&import_from.module_base(),
import_from.level,
src,
package,
known_first_party,
known_third_party,
extra_standard_library,
@ -470,6 +475,7 @@ pub fn format_imports(
comments: Vec<Comment>,
line_length: usize,
src: &[PathBuf],
package: Option<&Path>,
known_first_party: &BTreeSet<String>,
known_third_party: &BTreeSet<String>,
extra_standard_library: &BTreeSet<String>,
@ -486,6 +492,7 @@ pub fn format_imports(
let block_by_type = categorize_imports(
block,
src,
package,
known_first_party,
known_third_party,
extra_standard_library,

View File

@ -1,3 +1,5 @@
use std::path::Path;
use rustpython_ast::{Location, Stmt};
use textwrap::{dedent, indent};
@ -36,6 +38,7 @@ pub fn check_imports(
locator: &SourceCodeLocator,
settings: &Settings,
autofix: flags::Autofix,
package: Option<&Path>,
) -> Option<Check> {
let indentation = locator.slice_source_code_range(&extract_indentation_range(&block.imports));
let indentation = leading_space(&indentation);
@ -71,6 +74,7 @@ pub fn check_imports(
comments,
settings.line_length - indentation.len(),
&settings.src,
package,
&settings.isort.known_first_party,
&settings.isort.known_third_party,
&settings.isort.extra_standard_library,

View File

@ -68,6 +68,7 @@ pub mod logging;
pub mod mccabe;
pub mod message;
mod noqa;
mod packages;
mod pandas_vet;
pub mod pep8_naming;
pub mod printer;
@ -123,6 +124,7 @@ pub fn check(path: &Path, contents: &str, autofix: bool) -> Result<Vec<Check>> {
// Generate checks.
let checks = check_path(
path,
packages::detect_package_root(path),
contents,
tokens,
&locator,

View File

@ -50,6 +50,7 @@ impl AddAssign for Diagnostics {
#[allow(clippy::too_many_arguments)]
pub(crate) fn check_path(
path: &Path,
package: Option<&Path>,
contents: &str,
tokens: Vec<LexResult>,
locator: &SourceCodeLocator,
@ -101,6 +102,7 @@ pub(crate) fn check_path(
settings,
autofix,
path,
package,
));
}
}
@ -147,6 +149,7 @@ const MAX_ITERATIONS: usize = 100;
/// Lint the source code at the given `Path`.
pub fn lint_path(
path: &Path,
package: Option<&Path>,
settings: &Settings,
cache: flags::Cache,
autofix: fixer::Mode,
@ -163,7 +166,7 @@ pub fn lint_path(
let contents = fs::read_file(path)?;
// Lint the file.
let (contents, fixed, messages) = lint(contents, path, settings, autofix)?;
let (contents, fixed, messages) = lint(contents, path, package, settings, autofix)?;
// Re-populate the cache.
cache::set(path, &metadata, settings, autofix, &messages, cache);
@ -197,6 +200,7 @@ pub fn add_noqa_to_path(path: &Path, settings: &Settings) -> Result<usize> {
// Generate checks, ignoring any existing `noqa` directives.
let checks = check_path(
path,
None,
&contents,
tokens,
&locator,
@ -247,7 +251,7 @@ pub fn lint_stdin(
let contents = stdin.to_string();
// Lint the file.
let (contents, fixed, messages) = lint(contents, path, settings, autofix)?;
let (contents, fixed, messages) = lint(contents, path, None, settings, autofix)?;
// Write the fixed contents to stdout.
if matches!(autofix, fixer::Mode::Apply) {
@ -260,6 +264,7 @@ pub fn lint_stdin(
fn lint(
mut contents: String,
path: &Path,
package: Option<&Path>,
settings: &Settings,
autofix: fixer::Mode,
) -> Result<(String, usize, Vec<Message>)> {
@ -287,6 +292,7 @@ fn lint(
// Generate checks.
let checks = check_path(
path,
package,
&contents,
tokens,
&locator,
@ -343,6 +349,7 @@ pub fn test_path(path: &Path, settings: &Settings) -> Result<Vec<Check>> {
);
check_path(
path,
None,
&contents,
tokens,
&locator,

157
src/packages.rs Normal file
View File

@ -0,0 +1,157 @@
//! Detect Python package roots and file associations.
use std::path::Path;
use rustc_hash::FxHashMap;
// If we have a Python package layout like:
// - root/
// - foo/
// - __init__.py
// - bar.py
// - baz/
// - __init__.py
// - qux.py
//
// Then today, if you run with defaults (`src = ["."]`) from `root`, we'll
// detect that `foo.bar`, `foo.baz`, and `foo.baz.qux` are first-party modules
// (since, if you're in `root`, you can see `foo`).
//
// However, we'd also like it to be the case that, even if you run this command
// from `foo`, we still consider `foo.baz.qux` to be first-party when linting
// `foo/bar.py`. More specifically, for each Python file, we should find the
// root of the current package.
//
// Thus, for each file, we iterate up its ancestors, returning the last
// directory containing an `__init__.py`.
/// Return `true` if the directory at the given `Path` appears to be a Python
/// package.
pub fn is_package(path: &Path) -> bool {
path.join("__init__.py").is_file()
}
/// Return the package root for the given Python file.
pub fn detect_package_root(path: &Path) -> Option<&Path> {
let mut current = None;
for parent in path.ancestors() {
if !is_package(parent) {
return current;
}
current = Some(parent);
}
current
}
/// A wrapper around `is_package` to cache filesystem lookups.
fn is_package_with_cache<'a>(
path: &'a Path,
package_cache: &mut FxHashMap<&'a Path, bool>,
) -> bool {
*package_cache
.entry(path)
.or_insert_with(|| is_package(path))
}
/// A wrapper around `detect_package_root` to cache filesystem lookups.
fn detect_package_root_with_cache<'a>(
path: &'a Path,
package_cache: &mut FxHashMap<&'a Path, bool>,
) -> Option<&'a Path> {
let mut current = None;
for parent in path.ancestors() {
if !is_package_with_cache(parent, package_cache) {
return current;
}
current = Some(parent);
}
current
}
/// Return a mapping from Python file to its package root.
pub fn detect_package_roots<'a>(files: &[&'a Path]) -> FxHashMap<&'a Path, Option<&'a Path>> {
// Pre-populate the module cache, since the list of files could (but isn't
// required to) contain some `__init__.py` files.
let mut package_cache: FxHashMap<&Path, bool> = FxHashMap::default();
for file in files {
if file.ends_with("__init__.py") {
if let Some(parent) = file.parent() {
package_cache.insert(parent, true);
}
}
}
// Search for the package root for each file.
let mut package_roots: FxHashMap<&Path, Option<&Path>> = FxHashMap::default();
for file in files {
if let Some(package) = file.parent() {
if package_roots.contains_key(package) {
continue;
}
package_roots.insert(
package,
detect_package_root_with_cache(package, &mut package_cache),
);
}
}
package_roots
}
#[cfg(test)]
mod tests {
use std::path::PathBuf;
use crate::packages::detect_package_root;
#[test]
fn package_detection() {
assert_eq!(
detect_package_root(
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("resources/test/package/src/package")
.as_path(),
),
Some(
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("resources/test/package/src/package")
.as_path()
)
);
assert_eq!(
detect_package_root(
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("resources/test/project/python_modules/core/core")
.as_path(),
),
Some(
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("resources/test/project/python_modules/core/core")
.as_path()
)
);
assert_eq!(
detect_package_root(
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("resources/test/project/examples/docs/docs/concepts")
.as_path(),
),
Some(
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("resources/test/project/examples/docs/docs")
.as_path()
)
);
assert_eq!(
detect_package_root(
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("setup.py")
.as_path(),
),
None,
);
}
}

View File

@ -28,6 +28,7 @@ mod tests {
);
let mut checks = check_path(
Path::new("<filename>"),
None,
&contents,
tokens,
&locator,

View File

@ -177,6 +177,7 @@ mod tests {
);
let mut checks = check_path(
Path::new("<filename>"),
None,
&contents,
tokens,
&locator,

View File

@ -62,11 +62,6 @@ pub struct Resolver {
}
impl Resolver {
/// Merge a `Resolver` into the current `Resolver`.
pub fn merge(&mut self, resolver: Resolver) {
self.settings.extend(resolver.settings);
}
/// Add a resolved `Settings` under a given `PathBuf` scope.
pub fn add(&mut self, path: PathBuf, settings: Settings) {
self.settings.insert(path, settings);