diff --git a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs index ad77ea0336..83b40b1c8e 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs @@ -6,6 +6,8 @@ use std::borrow::Cow; use std::cmp::Ordering; +use itertools::Itertools; + use ruff_python_ast as ast; use ruff_python_codegen::Stylist; use ruff_python_parser::{TokenKind, Tokens}; @@ -314,6 +316,52 @@ impl<'a> SortClassification<'a> { } } +/// The complexity of the comments in a multiline sequence. +/// +/// A sequence like this has "simple" comments: it's unambiguous +/// which item each comment refers to, so there's no "risk" in sorting it: +/// +/// ```py +/// __all__ = [ +/// "foo", # comment1 +/// "bar", # comment2 +/// ] +/// ``` +/// +/// This sequence has complex comments: we can't safely autofix the sort here, +/// as the own-line comments might be used to create sections in `__all__`: +/// +/// ```py +/// __all__ = [ +/// # fooey things +/// "foo1", +/// "foo2", +/// # barey things +/// "bar1", +/// "foobar", +/// ] +/// ``` +/// +/// This sequence also has complex comments -- it's ambiguous which item +/// each comment should belong to: +/// +/// ```py +/// __all__ = [ +/// "foo1", "foo", "barfoo", # fooey things +/// "baz", bazz2", "fbaz", # barrey things +/// ] +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub(super) enum CommentComplexity { + Simple, + Complex, +} + +impl CommentComplexity { + pub(super) const fn is_complex(self) -> bool { + matches!(self, CommentComplexity::Complex) + } +} + // An instance of this struct encapsulates an analysis /// of a multiline Python tuple/list that represents an /// `__all__`/`__slots__`/etc. definition or augmentation. @@ -328,6 +376,24 @@ impl<'a> MultilineStringSequenceValue<'a> { self.items.len() } + /// Determine the [`CommentComplexity`] of this multiline string sequence. + pub(super) fn comment_complexity(&self) -> CommentComplexity { + if self.items.iter().tuple_windows().any(|(first, second)| { + first.has_own_line_comments() + || first + .end_of_line_comments + .is_some_and(|end_line_comment| second.start() < end_line_comment.end()) + }) || self + .items + .last() + .is_some_and(StringSequenceItem::has_own_line_comments) + { + CommentComplexity::Complex + } else { + CommentComplexity::Simple + } + } + /// Analyse the source range for a multiline Python tuple/list that /// represents an `__all__`/`__slots__`/etc. definition or augmentation. /// Return `None` if the analysis fails for whatever reason. @@ -792,6 +858,10 @@ impl<'a> StringSequenceItem<'a> { fn with_no_comments(value: &'a str, element_range: TextRange) -> Self { Self::new(value, vec![], element_range, None) } + + fn has_own_line_comments(&self) -> bool { + !self.preceding_comment_ranges.is_empty() + } } impl Ranged for StringSequenceItem<'_> { diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs index b6d07ff585..55f7903fa9 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs @@ -1,4 +1,4 @@ -use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; +use ruff_diagnostics::{Applicability, Diagnostic, Edit, Fix, FixAvailability, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast as ast; use ruff_source_file::LineRanges; @@ -54,19 +54,40 @@ use crate::rules::ruff::rules::sequence_sorting::{ /// ``` /// /// ## Fix safety -/// This rule's fix is marked as always being safe, in that +/// This rule's fix is marked as unsafe if there are any comments that take up +/// a whole line by themselves inside the `__all__` definition, for example: +/// ```py +/// __all__ = [ +/// # eggy things +/// "duck_eggs", +/// "chicken_eggs", +/// # hammy things +/// "country_ham", +/// "parma_ham", +/// ] +/// ``` +/// +/// This is a common pattern used to delimit categories within a module's API, +/// but it would be out of the scope of this rule to attempt to maintain these +/// categories when alphabetically sorting the items of `__all__`. +/// +/// The fix is also marked as unsafe if there are more than two `__all__` items +/// on a single line and that line also has a trailing comment, since here it +/// is impossible to accurately gauge which item the comment should be moved +/// with when sorting `__all__`: +/// ```py +/// __all__ = [ +/// "a", "c", "e", # a comment +/// "b", "d", "f", # a second comment +/// ] +/// ``` +/// +/// Other than this, the rule's fix is marked as always being safe, in that /// it should very rarely alter the semantics of any Python code. /// However, note that (although it's rare) the value of `__all__` /// could be read by code elsewhere that depends on the exact /// iteration order of the items in `__all__`, in which case this /// rule's fix could theoretically cause breakage. -/// -/// Note also that for multiline `__all__` definitions -/// that include comments on their own line, it can be hard -/// to tell where the comments should be moved to when sorting -/// the contents of `__all__`. While this rule's fix will -/// never delete a comment, it might *sometimes* move a -/// comment to an unexpected location. #[violation] pub struct UnsortedDunderAll; @@ -208,37 +229,42 @@ fn create_fix( let locator = checker.locator(); let is_multiline = locator.contains_line_break(range); - let sorted_source_code = { - // The machinery in the `MultilineDunderAllValue` is actually - // sophisticated enough that it would work just as well for - // single-line `__all__` definitions, and we could reduce - // the number of lines of code in this file by doing that. - // Unfortunately, however, `MultilineDunderAllValue::from_source_range()` - // must process every token in an `__all__` definition as - // part of its analysis, and this is quite slow. For - // single-line `__all__` definitions, it's also unnecessary, - // as it's impossible to have comments in between the - // `__all__` elements if the `__all__` definition is all on - // a single line. Therefore, as an optimisation, we do the - // bare minimum of token-processing for single-line `__all__` - // definitions: - if is_multiline { - let value = MultilineStringSequenceValue::from_source_range( - range, - kind, - locator, - checker.tokens(), - string_items, - )?; - assert_eq!(value.len(), elts.len()); - value.into_sorted_source_code(SORTING_STYLE, locator, checker.stylist()) + // The machinery in the `MultilineDunderAllValue` is actually + // sophisticated enough that it would work just as well for + // single-line `__all__` definitions, and we could reduce + // the number of lines of code in this file by doing that. + // Unfortunately, however, `MultilineDunderAllValue::from_source_range()` + // must process every token in an `__all__` definition as + // part of its analysis, and this is quite slow. For + // single-line `__all__` definitions, it's also unnecessary, + // as it's impossible to have comments in between the + // `__all__` elements if the `__all__` definition is all on + // a single line. Therefore, as an optimisation, we do the + // bare minimum of token-processing for single-line `__all__` + // definitions: + let (sorted_source_code, applicability) = if is_multiline { + let value = MultilineStringSequenceValue::from_source_range( + range, + kind, + locator, + checker.tokens(), + string_items, + )?; + assert_eq!(value.len(), elts.len()); + let applicability = if value.comment_complexity().is_complex() { + Applicability::Unsafe } else { - sort_single_line_elements_sequence(kind, elts, string_items, locator, SORTING_STYLE) - } + Applicability::Safe + }; + let sorted_source = + value.into_sorted_source_code(SORTING_STYLE, locator, checker.stylist()); + (sorted_source, applicability) + } else { + let sorted_source = + sort_single_line_elements_sequence(kind, elts, string_items, locator, SORTING_STYLE); + (sorted_source, Applicability::Safe) }; - Some(Fix::safe_edit(Edit::range_replacement( - sorted_source_code, - range, - ))) + let edit = Edit::range_replacement(sorted_source_code, range); + Some(Fix::applicable_edit(edit, applicability)) } diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs index b344e4f018..fe5e8f058f 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs @@ -11,8 +11,8 @@ use ruff_text_size::{Ranged, TextRange}; use crate::checkers::ast::Checker; use crate::rules::ruff::rules::sequence_sorting::{ - sort_single_line_elements_sequence, MultilineStringSequenceValue, SequenceKind, - SortClassification, SortingStyle, + sort_single_line_elements_sequence, CommentComplexity, MultilineStringSequenceValue, + SequenceKind, SortClassification, SortingStyle, }; use crate::Locator; @@ -37,11 +37,44 @@ use crate::Locator; /// ``` /// /// ## Fix safety -/// This rule's fix is marked as unsafe whenever Ruff can detect that code -/// elsewhere in the same file reads the `__slots__` variable in some way. -/// This is because the order of the items in `__slots__` may have semantic -/// significance if the `__slots__` of a class is being iterated over, or -/// being assigned to another value. +/// This rule's fix is marked as unsafe in three situations. +/// +/// Firstly, the fix is unsafe if there are any comments that take up +/// a whole line by themselves inside the `__slots__` definition, for example: +/// ```py +/// class Foo: +/// __slots__ = [ +/// # eggy things +/// "duck_eggs", +/// "chicken_eggs", +/// # hammy things +/// "country_ham", +/// "parma_ham", +/// ] +/// ``` +/// +/// This is a common pattern used to delimit categories within a class's slots, +/// but it would be out of the scope of this rule to attempt to maintain these +/// categories when applying a natural sort to the items of `__slots__`. +/// +/// Secondly, the fix is also marked as unsafe if there are more than two +/// `__slots__` items on a single line and that line also has a trailing +/// comment, since here it is impossible to accurately gauge which item the +/// comment should be moved with when sorting `__slots__`: +/// ```py +/// class Bar: +/// __slots__ = [ +/// "a", "c", "e", # a comment +/// "b", "d", "f", # a second comment +/// ] +/// ``` +/// +/// Lastly, this rule's fix is marked as unsafe whenever Ruff can detect that +/// code elsewhere in the same file reads the `__slots__` variable in some way +/// and the `__slots__` variable is not assigned to a set. This is because the +/// order of the items in `__slots__` may have semantic significance if the +/// `__slots__` of a class is being iterated over, or being assigned to another +/// value. /// /// In the vast majority of other cases, this rule's fix is unlikely to /// cause breakage; as such, Ruff will otherwise mark this rule's fix as @@ -49,12 +82,6 @@ use crate::Locator; /// could still be read by code outside of the module in which the /// `__slots__` definition occurs, in which case this rule's fix could /// theoretically cause breakage. -/// -/// Additionally, note that for multiline `__slots__` definitions that -/// include comments on their own line, it can be hard to tell where the -/// comments should be moved to when sorting the contents of `__slots__`. -/// While this rule's fix will never delete a comment, it might *sometimes* -/// move a comment to an unexpected location. #[violation] pub struct UnsortedDunderSlots { class_name: ast::name::Name, @@ -122,15 +149,17 @@ pub(crate) fn sort_dunder_slots(checker: &Checker, binding: &Binding) -> Option< ); if let SortClassification::UnsortedAndMaybeFixable { items } = sort_classification { - if let Some(sorted_source_code) = display.generate_sorted_source_code(&items, checker) { + if let Some((sorted_source_code, comment_complexity)) = + display.generate_sorted_source_code(&items, checker) + { let edit = Edit::range_replacement(sorted_source_code, display.range()); - - let applicability = if display.kind.is_set_literal() || binding.is_unused() { - Applicability::Safe - } else { + let applicability = if comment_complexity.is_complex() + || (binding.is_used() && !display.kind.is_set_literal()) + { Applicability::Unsafe + } else { + Applicability::Safe }; - diagnostic.set_fix(Fix::applicable_edit(edit, applicability)); } } @@ -219,7 +248,11 @@ impl<'a> StringLiteralDisplay<'a> { Some(result) } - fn generate_sorted_source_code(&self, elements: &[&str], checker: &Checker) -> Option { + fn generate_sorted_source_code( + &self, + elements: &[&str], + checker: &Checker, + ) -> Option<(String, CommentComplexity)> { let locator = checker.locator(); let multiline_classification = if locator.contains_line_break(self.range()) { @@ -238,26 +271,31 @@ impl<'a> StringLiteralDisplay<'a> { elements, )?; assert_eq!(analyzed_sequence.len(), self.elts.len()); - Some(analyzed_sequence.into_sorted_source_code( + let comment_complexity = analyzed_sequence.comment_complexity(); + let sorted_code = analyzed_sequence.into_sorted_source_code( SORTING_STYLE, locator, checker.stylist(), - )) + ); + Some((sorted_code, comment_complexity)) } // Sorting multiline dicts is unsupported (DisplayKind::Dict { .. }, MultilineClassification::Multiline) => None, (DisplayKind::Sequence(sequence_kind), MultilineClassification::SingleLine) => { - Some(sort_single_line_elements_sequence( + let sorted_code = sort_single_line_elements_sequence( *sequence_kind, &self.elts, elements, locator, SORTING_STYLE, - )) + ); + Some((sorted_code, CommentComplexity::Simple)) + } + (DisplayKind::Dict { items }, MultilineClassification::SingleLine) => { + let sorted_code = + sort_single_line_elements_dict(&self.elts, elements, items, locator); + Some((sorted_code, CommentComplexity::Simple)) } - (DisplayKind::Dict { items }, MultilineClassification::SingleLine) => Some( - sort_single_line_elements_dict(&self.elts, elements, items, locator), - ), } } } diff --git a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__RUF022_RUF022.py.snap b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__RUF022_RUF022.py.snap index c7d3bbf1a9..bcc2be47a9 100644 --- a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__RUF022_RUF022.py.snap +++ b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__RUF022_RUF022.py.snap @@ -1,6 +1,5 @@ --- source: crates/ruff_linter/src/rules/ruff/mod.rs -snapshot_kind: text --- RUF022.py:5:11: RUF022 [*] `__all__` is not sorted | @@ -267,7 +266,7 @@ RUF022.py:32:11: RUF022 [*] `__all__` is not sorted | = help: Apply an isort-style sorting to `__all__` -ℹ Safe fix +ℹ Unsafe fix 30 30 | #################################### 31 31 | 32 32 | __all__ = ( @@ -302,7 +301,7 @@ RUF022.py:40:11: RUF022 [*] `__all__` is not sorted | = help: Apply an isort-style sorting to `__all__` -ℹ Safe fix +ℹ Unsafe fix 38 38 | ) 39 39 | 40 40 | __all__ = [ @@ -423,7 +422,7 @@ RUF022.py:91:11: RUF022 [*] `__all__` is not sorted | = help: Apply an isort-style sorting to `__all__` -ℹ Safe fix +ℹ Unsafe fix 88 88 | ########################################## 89 89 | 90 90 | # comment0 @@ -464,7 +463,7 @@ RUF022.py:101:11: RUF022 [*] `__all__` is not sorted | = help: Apply an isort-style sorting to `__all__` -ℹ Safe fix +ℹ Unsafe fix 99 99 | # comment7 100 100 | 101 101 | __all__ = [ # comment0 @@ -591,7 +590,7 @@ RUF022.py:125:28: RUF022 [*] `__all__` is not sorted | = help: Apply an isort-style sorting to `__all__` -ℹ Safe fix +ℹ Unsafe fix 123 123 | "register_error", "lookup_error"] 124 124 | 125 125 | __all__: tuple[str, ...] = ( # a comment about the opening paren @@ -658,7 +657,7 @@ RUF022.py:145:16: RUF022 [*] `__all__` is not sorted | = help: Apply an isort-style sorting to `__all__` -ℹ Safe fix +ℹ Unsafe fix 143 143 | ) 144 144 | 145 145 | __all__.extend(( # comment0 @@ -690,7 +689,7 @@ RUF022.py:155:5: RUF022 [*] `__all__` is not sorted | = help: Apply an isort-style sorting to `__all__` -ℹ Safe fix +ℹ Unsafe fix 153 153 | __all__.extend( # comment0 154 154 | # comment1 155 155 | ( # comment2 @@ -723,7 +722,7 @@ RUF022.py:164:16: RUF022 [*] `__all__` is not sorted | = help: Apply an isort-style sorting to `__all__` -ℹ Safe fix +ℹ Unsafe fix 162 162 | ) # comment2 163 163 | 164 164 | __all__.extend([ # comment0 @@ -755,7 +754,7 @@ RUF022.py:174:5: RUF022 [*] `__all__` is not sorted | = help: Apply an isort-style sorting to `__all__` -ℹ Safe fix +ℹ Unsafe fix 172 172 | __all__.extend( # comment0 173 173 | # comment1 174 174 | [ # comment2 @@ -785,7 +784,7 @@ RUF022.py:183:11: RUF022 [*] `__all__` is not sorted | = help: Apply an isort-style sorting to `__all__` -ℹ Safe fix +ℹ Unsafe fix 180 180 | ] # comment4 181 181 | ) # comment2 182 182 | @@ -1014,7 +1013,7 @@ RUF022.py:243:11: RUF022 [*] `__all__` is not sorted | = help: Apply an isort-style sorting to `__all__` -ℹ Safe fix +ℹ Unsafe fix 241 241 | ] 242 242 | 243 243 | __all__ = ( @@ -1056,7 +1055,7 @@ RUF022.py:253:11: RUF022 [*] `__all__` is not sorted | = help: Apply an isort-style sorting to `__all__` -ℹ Safe fix +ℹ Unsafe fix 251 251 | ) 252 252 | 253 253 | __all__ = ( # comment about the opening paren diff --git a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__RUF023_RUF023.py.snap b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__RUF023_RUF023.py.snap index 72437c6a2b..aee3f36be6 100644 --- a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__RUF023_RUF023.py.snap +++ b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__RUF023_RUF023.py.snap @@ -1,6 +1,5 @@ --- source: crates/ruff_linter/src/rules/ruff/mod.rs -snapshot_kind: text --- RUF023.py:6:17: RUF023 [*] `Klass.__slots__` is not sorted | @@ -205,7 +204,7 @@ RUF023.py:33:17: RUF023 [*] `Klass3.__slots__` is not sorted | = help: Apply a natural sort to `Klass3.__slots__` -ℹ Safe fix +ℹ Unsafe fix 31 31 | 32 32 | class Klass3: 33 33 | __slots__ = ( @@ -240,7 +239,7 @@ RUF023.py:40:17: RUF023 [*] `Klass3.__slots__` is not sorted | = help: Apply a natural sort to `Klass3.__slots__` -ℹ Safe fix +ℹ Unsafe fix 38 38 | "a0" 39 39 | ) 40 40 | __slots__ = [ @@ -275,7 +274,7 @@ RUF023.py:54:17: RUF023 [*] `Klass4.__slots__` is not sorted | = help: Apply a natural sort to `Klass4.__slots__` -ℹ Safe fix +ℹ Unsafe fix 51 51 | 52 52 | class Klass4: 53 53 | # comment0 @@ -316,7 +315,7 @@ RUF023.py:64:17: RUF023 [*] `Klass4.__slots__` is not sorted | = help: Apply a natural sort to `Klass4.__slots__` -ℹ Safe fix +ℹ Unsafe fix 62 62 | # comment7 63 63 | 64 64 | __slots__ = [ # comment0 @@ -377,7 +376,7 @@ RUF023.py:75:17: RUF023 [*] `PurePath.__slots__` is not sorted | = help: Apply a natural sort to `PurePath.__slots__` -ℹ Safe fix +ℹ Unsafe fix 73 73 | # from cpython/Lib/pathlib/__init__.py 74 74 | class PurePath: 75 75 | __slots__ = ( @@ -460,7 +459,7 @@ RUF023.py:113:17: RUF023 [*] `ArgumentDescriptor.__slots__` is not sorted | = help: Apply a natural sort to `ArgumentDescriptor.__slots__` -ℹ Safe fix +ℹ Unsafe fix 111 111 | # From cpython/Lib/pickletools.py 112 112 | class ArgumentDescriptor(object): 113 113 | __slots__ = ( @@ -649,7 +648,7 @@ RUF023.py:181:17: RUF023 [*] `BezierBuilder4.__slots__` is not sorted | = help: Apply a natural sort to `BezierBuilder4.__slots__` -ℹ Safe fix +ℹ Unsafe fix 179 179 | 180 180 | class BezierBuilder4: 181 181 | __slots__ = (