Sort imports by cached key (#7963)

## Summary

Refactor for isort implementation. Closes #7738.

I introduced a `NatOrdString` and a `NatOrdStr` type to have a naturally
ordered `String` and `&str`, and I pretty much went back to the original
implementation based on `module_key`, `member_key` and
`sorted_by_cached_key` from itertools. I tried my best to avoid
unnecessary allocations but it may have been clumsy in some places, so
feedback is appreciated! I also renamed the `Prefix` enum to
`MemberType` (and made some related adjustments) because I think this
fits more what it is, and it's closer to the wording found in the isort
documentation.

I think the result is nicer to work with, and it should make
implementing #1567 and the like easier :)

Of course, I am very much open to any and all remarks on what I did!

## Test Plan

I didn't add any test, I am relying on the existing tests since this is
just a refactor.
This commit is contained in:
bluthej 2023-10-30 05:37:33 +01:00 committed by GitHub
parent 1f2d4f3ee1
commit 5776ec1079
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 154 additions and 167 deletions

View File

@ -8,13 +8,14 @@ pub(crate) use categorize::categorize;
use categorize::categorize_imports;
pub use categorize::{ImportSection, ImportType};
use comments::Comment;
use itertools::Itertools;
use normalize::normalize_imports;
use order::order_imports;
use ruff_python_ast::PySourceType;
use ruff_python_codegen::Stylist;
use ruff_source_file::Locator;
use settings::Settings;
use sorting::cmp_either_import;
use sorting::module_key;
use types::EitherImport::{Import, ImportFrom};
use types::{AliasData, EitherImport, ImportBlock, TrailingComma};
@ -173,17 +174,28 @@ fn format_import_block(
let imports = order_imports(import_block, settings);
let imports = {
let mut imports = imports
.import
.into_iter()
.map(Import)
.chain(imports.import_from.into_iter().map(ImportFrom))
.collect::<Vec<EitherImport>>();
if settings.force_sort_within_sections {
imports.sort_by(|import1, import2| cmp_either_import(import1, import2, settings));
};
let imports = imports
.import
.into_iter()
.map(Import)
.chain(imports.import_from.into_iter().map(ImportFrom));
let imports: Vec<EitherImport> = if settings.force_sort_within_sections {
imports
.sorted_by_cached_key(|import| match import {
Import((alias, _)) => {
module_key(Some(alias.name), alias.asname, None, None, settings)
}
ImportFrom((import_from, _, _, aliases)) => module_key(
import_from.module,
None,
import_from.level,
aliases.first().map(|(alias, _)| (alias.name, alias.asname)),
settings,
),
})
.collect()
} else {
imports.collect()
};
// Add a blank line between every section.

View File

@ -1,9 +1,7 @@
use std::cmp::Ordering;
use itertools::Itertools;
use super::settings::Settings;
use super::sorting::{cmp_import_from, cmp_members, cmp_modules};
use super::sorting::{member_key, module_key};
use super::types::{AliasData, CommentSet, ImportBlock, ImportFromStatement, OrderedImportBlock};
pub(crate) fn order_imports<'a>(
@ -13,12 +11,11 @@ pub(crate) fn order_imports<'a>(
let mut ordered = OrderedImportBlock::default();
// Sort `Stmt::Import`.
ordered.import.extend(
block
.import
.into_iter()
.sorted_by(|(alias1, _), (alias2, _)| cmp_modules(alias1, alias2, settings)),
);
ordered
.import
.extend(block.import.into_iter().sorted_by_cached_key(|(alias, _)| {
module_key(Some(alias.name), alias.asname, None, None, settings)
}));
// Sort `Stmt::ImportFrom`.
ordered.import_from.extend(
@ -53,27 +50,22 @@ pub(crate) fn order_imports<'a>(
trailing_comma,
aliases
.into_iter()
.sorted_by(|(alias1, _), (alias2, _)| {
cmp_members(alias1, alias2, settings)
.sorted_by_cached_key(|(alias, _)| {
member_key(alias.name, alias.asname, settings)
})
.collect::<Vec<(AliasData, CommentSet)>>(),
)
},
)
.sorted_by(
|(import_from1, _, _, aliases1), (import_from2, _, _, aliases2)| {
cmp_import_from(import_from1, import_from2, settings).then_with(|| {
match (aliases1.first(), aliases2.first()) {
(None, None) => Ordering::Equal,
(None, Some(_)) => Ordering::Less,
(Some(_), None) => Ordering::Greater,
(Some((alias1, _)), Some((alias2, _))) => {
cmp_members(alias1, alias2, settings)
}
}
})
},
),
.sorted_by_cached_key(|(import_from, _, _, aliases)| {
module_key(
import_from.module,
None,
import_from.level,
aliases.first().map(|(alias, _)| (alias.name, alias.asname)),
settings,
)
}),
);
ordered
}

View File

@ -1,171 +1,153 @@
/// See: <https://github.com/PyCQA/isort/blob/12cc5fbd67eebf92eb2213b03c07b138ae1fb448/isort/sorting.py#L13>
use std::cmp::Ordering;
use std::collections::BTreeSet;
use natord;
use std::cmp::Reverse;
use std::{borrow::Cow, cmp::Ordering};
use ruff_python_stdlib::str;
use super::settings::{RelativeImportsOrder, Settings};
use super::types::EitherImport::{Import, ImportFrom};
use super::types::{AliasData, EitherImport, ImportFromData, Importable};
#[derive(PartialOrd, Ord, PartialEq, Eq, Copy, Clone)]
pub(crate) enum Prefix {
Constants,
Classes,
Variables,
#[derive(Debug, PartialOrd, Ord, PartialEq, Eq, Copy, Clone)]
pub(crate) enum MemberType {
Constant,
Class,
Variable,
}
fn prefix(name: &str, settings: &Settings) -> Prefix {
fn member_type(name: &str, settings: &Settings) -> MemberType {
if settings.constants.contains(name) {
// Ex) `CONSTANT`
Prefix::Constants
MemberType::Constant
} else if settings.classes.contains(name) {
// Ex) `CLASS`
Prefix::Classes
MemberType::Class
} else if settings.variables.contains(name) {
// Ex) `variable`
Prefix::Variables
MemberType::Variable
} else if name.len() > 1 && str::is_cased_uppercase(name) {
// Ex) `CONSTANT`
Prefix::Constants
MemberType::Constant
} else if name.chars().next().is_some_and(char::is_uppercase) {
// Ex) `Class`
Prefix::Classes
MemberType::Class
} else {
// Ex) `variable`
Prefix::Variables
MemberType::Variable
}
}
/// Compare two module names' by their `force-to-top`ness.
fn cmp_force_to_top(name1: &str, name2: &str, force_to_top: &BTreeSet<String>) -> Ordering {
let force_to_top1 = force_to_top.contains(name1);
let force_to_top2 = force_to_top.contains(name2);
force_to_top1.cmp(&force_to_top2).reverse()
}
#[derive(Eq, PartialEq, Debug)]
pub(crate) struct NatOrdStr<'a>(Cow<'a, str>);
/// Compare two top-level modules.
pub(crate) fn cmp_modules(alias1: &AliasData, alias2: &AliasData, settings: &Settings) -> Ordering {
cmp_force_to_top(alias1.name, alias2.name, &settings.force_to_top)
.then_with(|| {
if settings.case_sensitive {
natord::compare(alias1.name, alias2.name)
} else {
natord::compare_ignore_case(alias1.name, alias2.name)
.then_with(|| natord::compare(alias1.name, alias2.name))
}
})
.then_with(|| match (alias1.asname, alias2.asname) {
(None, None) => Ordering::Equal,
(None, Some(_)) => Ordering::Less,
(Some(_), None) => Ordering::Greater,
(Some(asname1), Some(asname2)) => natord::compare(asname1, asname2),
})
}
/// Compare two member imports within `Stmt::ImportFrom` blocks.
pub(crate) fn cmp_members(alias1: &AliasData, alias2: &AliasData, settings: &Settings) -> Ordering {
match (alias1.name == "*", alias2.name == "*") {
(true, false) => Ordering::Less,
(false, true) => Ordering::Greater,
_ => {
if settings.order_by_type {
prefix(alias1.name, settings)
.cmp(&prefix(alias2.name, settings))
.then_with(|| cmp_modules(alias1, alias2, settings))
} else {
cmp_modules(alias1, alias2, settings)
}
}
impl Ord for NatOrdStr<'_> {
fn cmp(&self, other: &Self) -> Ordering {
natord::compare(&self.0, &other.0)
}
}
/// Compare two relative import levels.
pub(crate) fn cmp_levels(
level1: Option<u32>,
level2: Option<u32>,
relative_imports_order: RelativeImportsOrder,
) -> Ordering {
match (level1, level2) {
(None, None) => Ordering::Equal,
(None, Some(_)) => Ordering::Less,
(Some(_), None) => Ordering::Greater,
(Some(level1), Some(level2)) => match relative_imports_order {
RelativeImportsOrder::ClosestToFurthest => level1.cmp(&level2),
RelativeImportsOrder::FurthestToClosest => level2.cmp(&level1),
},
impl PartialOrd for NatOrdStr<'_> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
/// Compare two `Stmt::ImportFrom` blocks.
pub(crate) fn cmp_import_from(
import_from1: &ImportFromData,
import_from2: &ImportFromData,
impl<'a> From<&'a str> for NatOrdStr<'a> {
fn from(s: &'a str) -> Self {
NatOrdStr(Cow::Borrowed(s))
}
}
impl<'a> From<String> for NatOrdStr<'a> {
fn from(s: String) -> Self {
NatOrdStr(Cow::Owned(s))
}
}
#[derive(Debug, PartialOrd, Ord, PartialEq, Eq, Copy, Clone)]
pub(crate) enum Distance {
Nearest(u32),
Furthest(Reverse<u32>),
}
type ModuleKey<'a> = (
Distance,
Option<bool>,
Option<NatOrdStr<'a>>,
Option<NatOrdStr<'a>>,
Option<NatOrdStr<'a>>,
Option<MemberKey<'a>>,
);
/// Returns a comparable key to capture the desired sorting order for an imported module (e.g.,
/// `foo` in `from foo import bar`).
pub(crate) fn module_key<'a>(
name: Option<&'a str>,
asname: Option<&'a str>,
level: Option<u32>,
first_alias: Option<(&'a str, Option<&'a str>)>,
settings: &Settings,
) -> Ordering {
cmp_levels(
import_from1.level,
import_from2.level,
settings.relative_imports_order,
) -> ModuleKey<'a> {
let distance = match settings.relative_imports_order {
RelativeImportsOrder::ClosestToFurthest => Distance::Nearest(level.unwrap_or_default()),
RelativeImportsOrder::FurthestToClosest => {
Distance::Furthest(Reverse(level.unwrap_or_default()))
}
};
let force_to_top = name.map(|name| !settings.force_to_top.contains(name)); // `false` < `true` so we get forced to top first
let maybe_lowercase_name = name
.and_then(|name| (!settings.case_sensitive).then_some(NatOrdStr(maybe_lowercase(name))));
let module_name = name.map(NatOrdStr::from);
let asname = asname.map(NatOrdStr::from);
let first_alias = first_alias.map(|(name, asname)| member_key(name, asname, settings));
(
distance,
force_to_top,
maybe_lowercase_name,
module_name,
asname,
first_alias,
)
.then_with(|| {
cmp_force_to_top(
&import_from1.module_name(),
&import_from2.module_name(),
&settings.force_to_top,
)
})
.then_with(|| match (&import_from1.module, import_from2.module) {
(None, None) => Ordering::Equal,
(None, Some(_)) => Ordering::Less,
(Some(_), None) => Ordering::Greater,
(Some(module1), Some(module2)) => {
if settings.case_sensitive {
natord::compare(module1, module2)
} else {
natord::compare_ignore_case(module1, module2)
}
}
})
}
/// Compare an import to an import-from.
fn cmp_import_import_from(
import: &AliasData,
import_from: &ImportFromData,
type MemberKey<'a> = (
bool,
Option<MemberType>,
Option<NatOrdStr<'a>>,
NatOrdStr<'a>,
Option<NatOrdStr<'a>>,
);
/// Returns a comparable key to capture the desired sorting order for an imported member (e.g.,
/// `bar` in `from foo import bar`).
pub(crate) fn member_key<'a>(
name: &'a str,
asname: Option<&'a str>,
settings: &Settings,
) -> Ordering {
cmp_force_to_top(
import.name,
&import_from.module_name(),
&settings.force_to_top,
) -> MemberKey<'a> {
let not_star_import = name != "*"; // `false` < `true` so we get star imports first
let member_type = settings
.order_by_type
.then_some(member_type(name, settings));
let maybe_lowercase_name =
(!settings.case_sensitive).then_some(NatOrdStr(maybe_lowercase(name)));
let module_name = NatOrdStr::from(name);
let asname = asname.map(NatOrdStr::from);
(
not_star_import,
member_type,
maybe_lowercase_name,
module_name,
asname,
)
.then_with(|| {
if settings.case_sensitive {
natord::compare(import.name, import_from.module.unwrap_or_default())
} else {
natord::compare_ignore_case(import.name, import_from.module.unwrap_or_default())
}
})
}
/// Compare two [`EitherImport`] enums which may be [`Import`] or [`ImportFrom`]
/// structs.
pub(crate) fn cmp_either_import(
a: &EitherImport,
b: &EitherImport,
settings: &Settings,
) -> Ordering {
match (a, b) {
(Import((alias1, _)), Import((alias2, _))) => cmp_modules(alias1, alias2, settings),
(ImportFrom((import_from, ..)), Import((alias, _))) => {
cmp_import_import_from(alias, import_from, settings).reverse()
}
(Import((alias, _)), ImportFrom((import_from, ..))) => {
cmp_import_import_from(alias, import_from, settings)
}
(ImportFrom((import_from1, ..)), ImportFrom((import_from2, ..))) => {
cmp_import_from(import_from1, import_from2, settings)
}
/// Lowercase the given string, if it contains any uppercase characters.
fn maybe_lowercase(name: &str) -> Cow<'_, str> {
if name.chars().all(char::is_lowercase) {
Cow::Borrowed(name)
} else {
Cow::Owned(name.to_lowercase())
}
}

View File

@ -89,6 +89,7 @@ type ImportFrom<'a> = (
Vec<AliasDataWithComments<'a>>,
);
#[derive(Debug)]
pub(crate) enum EitherImport<'a> {
Import(Import<'a>),
ImportFrom(ImportFrom<'a>),