Move `utf8-encoding-declaration` to token-based rules (#6110)

Closes #5979.
2023-07-26 18:42:37 -04:00 · 2023-07-26 18:42:37 -04:00 · 64a186272f
parent 8113615534
commit 64a186272f
9 changed files with 56 additions and 46 deletions
--- a/crates/ruff/resources/test/fixtures/pyupgrade/UP009_5.py
+++ b/crates/ruff/resources/test/fixtures/pyupgrade/UP009_5.py
@ -0,0 +1,4 @@
 print('Hello world')
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
--- a/crates/ruff/src/checkers/physical_lines.rs
+++ b/crates/ruff/src/checkers/physical_lines.rs
@ -13,7 +13,6 @@ use crate::rules::pycodestyle::rules::{
    tab_indentation, trailing_whitespace,
 };
 use crate::rules::pylint;
 use crate::rules::pyupgrade::rules::unnecessary_coding_comment;
 use crate::settings::Settings;
 pub(crate) fn check_physical_lines(
@ -28,7 +27,6 @@ pub(crate) fn check_physical_lines(
    let enforce_doc_line_too_long = settings.rules.enabled(Rule::DocLineTooLong);
    let enforce_line_too_long = settings.rules.enabled(Rule::LineTooLong);
    let enforce_no_newline_at_end_of_file = settings.rules.enabled(Rule::MissingNewlineAtEndOfFile);
    let enforce_unnecessary_coding_comment = settings.rules.enabled(Rule::UTF8EncodingDeclaration);
    let enforce_mixed_spaces_and_tabs = settings.rules.enabled(Rule::MixedSpacesAndTabs);
    let enforce_bidirectional_unicode = settings.rules.enabled(Rule::BidirectionalUnicode);
    let enforce_trailing_whitespace = settings.rules.enabled(Rule::TrailingWhitespace);
@ -37,27 +35,9 @@ pub(crate) fn check_physical_lines(
    let enforce_tab_indentation = settings.rules.enabled(Rule::TabIndentation);
    let enforce_copyright_notice = settings.rules.enabled(Rule::MissingCopyrightNotice);
    let fix_unnecessary_coding_comment = settings.rules.should_fix(Rule::UTF8EncodingDeclaration);
    let mut commented_lines_iter = indexer.comment_ranges().iter().peekable();
    let mut doc_lines_iter = doc_lines.iter().peekable();
-    for (index, line) in locator.contents().universal_newlines().enumerate() {
+    for line in locator.contents().universal_newlines() {
        while commented_lines_iter
            .next_if(|comment_range| line.range().contains_range(**comment_range))
            .is_some()
        {
            if enforce_unnecessary_coding_comment {
                if index < 2 {
                    if let Some(diagnostic) =
                        unnecessary_coding_comment(&line, fix_unnecessary_coding_comment)
                    {
                        diagnostics.push(diagnostic);
                    }
                }
            }
        }
        while doc_lines_iter
            .next_if(|doc_line_start| line.range().contains_inclusive(**doc_line_start))
            .is_some()
--- a/crates/ruff/src/checkers/tokens.rs
+++ b/crates/ruff/src/checkers/tokens.rs
@ -69,6 +69,10 @@ pub(crate) fn check_tokens(
        eradicate::rules::commented_out_code(&mut diagnostics, locator, indexer, settings);
    }
    if settings.rules.enabled(Rule::UTF8EncodingDeclaration) {
        pyupgrade::rules::unnecessary_coding_comment(&mut diagnostics, locator, indexer, settings);
    }
    if settings.rules.enabled(Rule::InvalidEscapeSequence) {
        for (tok, range) in tokens.iter().flatten() {
            if tok.is_string() {
--- a/crates/ruff/src/registry.rs
+++ b/crates/ruff/src/registry.rs
@ -246,8 +246,7 @@ impl Rule {
            | Rule::MissingNewlineAtEndOfFile
            | Rule::MixedSpacesAndTabs
            | Rule::TabIndentation
-            | Rule::TrailingWhitespace
+            | Rule::TrailingWhitespace => LintSource::PhysicalLines,
            | Rule::UTF8EncodingDeclaration => LintSource::PhysicalLines,
            Rule::AmbiguousUnicodeCharacterComment
            | Rule::AmbiguousUnicodeCharacterDocstring
            | Rule::AmbiguousUnicodeCharacterString
@ -289,7 +288,8 @@ impl Rule {
            | Rule::SingleLineImplicitStringConcatenation
            | Rule::TrailingCommaOnBareTuple
            | Rule::TypeCommentInStub
-            | Rule::UselessSemicolon => LintSource::Tokens,
+            | Rule::UselessSemicolon
            | Rule::UTF8EncodingDeclaration => LintSource::Tokens,
            Rule::IOError => LintSource::Io,
            Rule::UnsortedImports | Rule::MissingRequiredImport => LintSource::Imports,
            Rule::ImplicitNamespacePackage | Rule::InvalidModuleName => LintSource::Filesystem,
--- a/crates/ruff/src/rules/pyupgrade/mod.rs
+++ b/crates/ruff/src/rules/pyupgrade/mod.rs
@ -68,6 +68,7 @@ mod tests {
    #[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_2.py"))]
    #[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_3.py"))]
    #[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_4.py"))]
    #[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_5.py"))]
    #[test_case(Rule::UnicodeKindPrefix, Path::new("UP025.py"))]
    #[test_case(Rule::UnnecessaryBuiltinImport, Path::new("UP029.py"))]
    #[test_case(Rule::UnnecessaryClassParentheses, Path::new("UP039.py"))]
--- a/crates/ruff/src/rules/pyupgrade/rules/unnecessary_coding_comment.rs
+++ b/crates/ruff/src/rules/pyupgrade/rules/unnecessary_coding_comment.rs
@ -3,7 +3,11 @@ use regex::Regex;
 use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
 use ruff_macros::{derive_message_formats, violation};
-use ruff_source_file::Line;
+use ruff_python_index::Indexer;
 use ruff_source_file::Locator;
 use crate::registry::AsRule;
 use crate::settings::Settings;
 /// ## What it does
 /// Checks for unnecessary UTF-8 encoding declarations.
@ -43,18 +47,31 @@ static CODING_COMMENT_REGEX: Lazy<Regex> =
    Lazy::new(|| Regex::new(r"^[ \t\f]*#.*?coding[:=][ \t]*utf-?8").unwrap());
 /// UP009
-pub(crate) fn unnecessary_coding_comment(line: &Line, autofix: bool) -> Option<Diagnostic> {
+pub(crate) fn unnecessary_coding_comment(
-    // PEP3120 makes utf-8 the default encoding.
+    diagnostics: &mut Vec<Diagnostic>,
-    if CODING_COMMENT_REGEX.is_match(line.as_str()) {
+    locator: &Locator,
-        let mut diagnostic = Diagnostic::new(UTF8EncodingDeclaration, line.full_range());
+    indexer: &Indexer,
-        if autofix {
+    settings: &Settings,
-            diagnostic.set_fix(Fix::automatic(Edit::deletion(
+) {
-                line.start(),
+    // The coding comment must be on one of the first two lines. Since each comment spans at least
-                line.full_end(),
+    // one line, we only need to check the first two comments at most.
-            )));
+    for range in indexer.comment_ranges().iter().take(2) {
        let line = locator.slice(*range);
        if CODING_COMMENT_REGEX.is_match(line) {
            #[allow(deprecated)]
            let line = locator.compute_line_index(range.start());
            if line.to_zero_indexed() > 1 {
                continue;
            }
            let mut diagnostic = Diagnostic::new(UTF8EncodingDeclaration, *range);
            if settings.rules.should_fix(diagnostic.kind.rule()) {
                diagnostic.set_fix(Fix::automatic(Edit::deletion(
                    range.start(),
                    locator.full_line_end(range.end()),
                )));
            }
            diagnostics.push(diagnostic);
        }
        Some(diagnostic)
    } else {
        None
    }
 }
--- a/crates/ruff/src/rules/pyupgrade/snapshots/ruffrulespyupgradetestsUP009_0.py.snap
+++ b/crates/ruff/src/rules/pyupgrade/snapshots/ruffrulespyupgradetestsUP009_0.py.snap
@ -3,10 +3,10 @@ source: crates/ruff/src/rules/pyupgrade/mod.rs
 ---
 UP009_0.py:1:1: UP009 [*] UTF-8 encoding declaration is unnecessary
  |
-1 | / # coding=utf8
+1 | # coding=utf8
-2 | | 
+  | ^^^^^^^^^^^^^ UP009
-  | |_^ UP009
+2 | 
-3 |   print("Hello world")
+3 | print("Hello world")
  |
  = help: Remove unnecessary coding comment
--- a/crates/ruff/src/rules/pyupgrade/snapshots/ruffrulespyupgradetestsUP009_1.py.snap
+++ b/crates/ruff/src/rules/pyupgrade/snapshots/ruffrulespyupgradetestsUP009_1.py.snap
@ -3,11 +3,11 @@ source: crates/ruff/src/rules/pyupgrade/mod.rs
 ---
 UP009_1.py:2:1: UP009 [*] UTF-8 encoding declaration is unnecessary
  |
-1 |   #!/usr/bin/python
+1 | #!/usr/bin/python
-2 | / # -*- coding: utf-8 -*-
+2 | # -*- coding: utf-8 -*-
-3 | | 
+  | ^^^^^^^^^^^^^^^^^^^^^^^ UP009
-  | |_^ UP009
+3 | 
-4 |   print('Hello world')
+4 | print('Hello world')
  |
  = help: Remove unnecessary coding comment
--- a/crates/ruff/src/rules/pyupgrade/snapshots/ruffrulespyupgradetestsUP009_5.py.snap
+++ b/crates/ruff/src/rules/pyupgrade/snapshots/ruffrulespyupgradetestsUP009_5.py.snap
@ -0,0 +1,4 @@
 ---
 source: crates/ruff/src/rules/pyupgrade/mod.rs
 ---