Replace token iteration with Indexer/Locator lookups for relevant rules (#4513)

2023-05-22 02:56:19 -05:00 · 2023-05-22 02:56:19 -05:00 · c6e5fed658
parent f73b398776
commit c6e5fed658
5 changed files with 72 additions and 66 deletions
--- a/crates/ruff/src/checkers/tokens.rs
+++ b/crates/ruff/src/checkers/tokens.rs
@ -12,10 +12,11 @@ use crate::rules::{
 };
 use crate::settings::Settings;
 use ruff_diagnostics::Diagnostic;
-use ruff_python_ast::source_code::Locator;
+use ruff_python_ast::source_code::{Indexer, Locator};

 pub(crate) fn check_tokens(
    locator: &Locator,
+    indexer: &Indexer,
    tokens: &[LexResult],
    settings: &Settings,
    is_stub: bool,
@ -100,15 +101,9 @@ pub(crate) fn check_tokens(

    // ERA001
    if enforce_commented_out_code {
-        for (tok, range) in tokens.iter().flatten() {
-            if matches!(tok, Tok::Comment(_)) {
-                if let Some(diagnostic) =
-                    eradicate::rules::commented_out_code(locator, *range, settings)
-                {
-                    diagnostics.push(diagnostic);
-                }
-            }
-        }
+        diagnostics.extend(eradicate::rules::commented_out_code(
+            indexer, locator, settings,
+        ));
    }

    // W605
@ -185,13 +180,13 @@ pub(crate) fn check_tokens(

    // PYI033
    if enforce_type_comment_in_stub && is_stub {
-        diagnostics.extend(flake8_pyi::rules::type_comment_in_stub(tokens));
+        diagnostics.extend(flake8_pyi::rules::type_comment_in_stub(indexer, locator));
    }

    // TD001, TD002, TD003, TD004, TD005, TD006, TD007
    if enforce_todos {
        diagnostics.extend(
-            flake8_todos::rules::todos(tokens, settings)
+            flake8_todos::rules::todos(indexer, locator, settings)
                .into_iter()
                .filter(|diagnostic| settings.rules.enabled(diagnostic.kind.rule())),
        );
--- a/crates/ruff/src/linter.rs
+++ b/crates/ruff/src/linter.rs
@ -98,7 +98,7 @@ pub fn check_path(
        .any(|rule_code| rule_code.lint_source().is_tokens())
    {
        let is_stub = is_python_stub_file(path);
-        diagnostics.extend(check_tokens(locator, &tokens, settings, is_stub));
+        diagnostics.extend(check_tokens(locator, indexer, &tokens, settings, is_stub));
    }

    // Run the filesystem-based rules.
--- a/crates/ruff/src/rules/eradicate/rules.rs
+++ b/crates/ruff/src/rules/eradicate/rules.rs
@ -1,8 +1,6 @@
-use ruff_text_size::TextRange;
-
 use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
 use ruff_macros::{derive_message_formats, violation};
-use ruff_python_ast::source_code::Locator;
+use ruff_python_ast::source_code::{Indexer, Locator};

 use crate::registry::Rule;
 use crate::settings::Settings;
@ -47,24 +45,28 @@ fn is_standalone_comment(line: &str) -> bool {

 /// ERA001
 pub(crate) fn commented_out_code(
+    indexer: &Indexer,
    locator: &Locator,
-    range: TextRange,
    settings: &Settings,
-) -> Option<Diagnostic> {
-    let line = locator.full_lines(range);
+) -> Vec<Diagnostic> {
+    let mut diagnostics = vec![];

-    // Verify that the comment is on its own line, and that it contains code.
-    if is_standalone_comment(line) && comment_contains_code(line, &settings.task_tags[..]) {
-        let mut diagnostic = Diagnostic::new(CommentedOutCode, range);
+    for range in indexer.comment_ranges() {
+        let line = locator.full_lines(*range);

-        if settings.rules.should_fix(Rule::CommentedOutCode) {
-            #[allow(deprecated)]
-            diagnostic.set_fix(Fix::unspecified(Edit::range_deletion(
-                locator.full_lines_range(range),
-            )));
+        // Verify that the comment is on its own line, and that it contains code.
+        if is_standalone_comment(line) && comment_contains_code(line, &settings.task_tags[..]) {
+            let mut diagnostic = Diagnostic::new(CommentedOutCode, *range);
+
+            if settings.rules.should_fix(Rule::CommentedOutCode) {
+                #[allow(deprecated)]
+                diagnostic.set_fix(Fix::unspecified(Edit::range_deletion(
+                    locator.full_lines_range(*range),
+                )));
+            }
+            diagnostics.push(diagnostic);
        }
-        Some(diagnostic)
-    } else {
-        None
    }
+
+    diagnostics
 }
--- a/crates/ruff/src/rules/flake8_pyi/rules/type_comment_in_stub.rs
+++ b/crates/ruff/src/rules/flake8_pyi/rules/type_comment_in_stub.rs
@ -1,7 +1,6 @@
 use once_cell::sync::Lazy;
 use regex::Regex;
-use rustpython_parser::lexer::LexResult;
-use rustpython_parser::Tok;
+use ruff_python_ast::source_code::{Indexer, Locator};

 use ruff_diagnostics::{Diagnostic, Violation};
 use ruff_macros::{derive_message_formats, violation};
@ -35,14 +34,14 @@ impl Violation for TypeCommentInStub {
 }

 /// PYI033
-pub(crate) fn type_comment_in_stub(tokens: &[LexResult]) -> Vec<Diagnostic> {
+pub(crate) fn type_comment_in_stub(indexer: &Indexer, locator: &Locator) -> Vec<Diagnostic> {
    let mut diagnostics = vec![];

-    for token in tokens.iter().flatten() {
-        if let (Tok::Comment(comment), range) = token {
-            if TYPE_COMMENT_REGEX.is_match(comment) && !TYPE_IGNORE_REGEX.is_match(comment) {
-                diagnostics.push(Diagnostic::new(TypeCommentInStub, *range));
-            }
+    for range in indexer.comment_ranges() {
+        let comment = locator.slice(*range);
+
+        if TYPE_COMMENT_REGEX.is_match(comment) && !TYPE_IGNORE_REGEX.is_match(comment) {
+            diagnostics.push(Diagnostic::new(TypeCommentInStub, *range));
        }
    }

--- a/crates/ruff/src/rules/flake8_todos/rules.rs
+++ b/crates/ruff/src/rules/flake8_todos/rules.rs
@ -1,9 +1,7 @@
-use itertools::Itertools;
 use once_cell::sync::Lazy;
 use regex::RegexSet;
+use ruff_python_ast::source_code::{Indexer, Locator};
 use ruff_text_size::{TextLen, TextRange, TextSize};
-use rustpython_parser::lexer::LexResult;
-use rustpython_parser::Tok;

 use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix, Violation};
 use ruff_macros::{derive_message_formats, violation};
@ -289,44 +287,52 @@ static ISSUE_LINK_REGEX_SET: Lazy<RegexSet> = Lazy::new(|| {
    .unwrap()
 });

-pub(crate) fn todos(tokens: &[LexResult], settings: &Settings) -> Vec<Diagnostic> {
+pub(crate) fn todos(indexer: &Indexer, locator: &Locator, settings: &Settings) -> Vec<Diagnostic> {
    let mut diagnostics: Vec<Diagnostic> = vec![];

-    let mut iter = tokens.iter().flatten().multipeek();
-    while let Some((token, token_range)) = iter.next() {
-        let Tok::Comment(comment) = token else {
-            continue;
-        };
+    let mut iter = indexer.comment_ranges().iter().peekable();
+    while let Some(comment_range) = iter.next() {
+        let comment = locator.slice(*comment_range);

        // Check that the comment is a TODO (properly formed or not).
-        let Some(tag) = detect_tag(comment, token_range.start()) else {
+        let Some(tag) = detect_tag(comment, comment_range.start()) else {
            continue;
        };

        tag_errors(&tag, &mut diagnostics, settings);
-        static_errors(&mut diagnostics, comment, *token_range, &tag);
+        static_errors(&mut diagnostics, comment, *comment_range, &tag);

        // TD003
        let mut has_issue_link = false;
-        while let Some((token, token_range)) = iter.peek() {
-            match token {
-                Tok::Comment(comment) => {
-                    if detect_tag(comment, token_range.start()).is_some() {
-                        break;
-                    }
-                    if ISSUE_LINK_REGEX_SET.is_match(comment) {
-                        has_issue_link = true;
-                        break;
-                    }
-                }
-                Tok::Newline | Tok::NonLogicalNewline => {
-                    continue;
-                }
-                _ => {
-                    break;
-                }
+        let mut curr_range = comment_range;
+        while let Some(next_range) = iter.peek() {
+            // Ensure that next_comment_range is in the same multiline comment "block" as
+            // comment_range.
+            if !locator
+                .slice(TextRange::new(curr_range.end(), next_range.start()))
+                .chars()
+                .all(char::is_whitespace)
+            {
+                break;
            }
+
+            let next_comment = locator.slice(**next_range);
+            if detect_tag(next_comment, next_range.start()).is_some() {
+                break;
+            }
+
+            if ISSUE_LINK_REGEX_SET.is_match(next_comment) {
+                has_issue_link = true;
+            }
+
+            // If the next_comment isn't a tag or an issue, it's worthles in the context of this
+            // linter. We can increment here instead of waiting for the next iteration of the outer
+            // loop.
+            //
+            // Unwrap is safe because peek() is Some()
+            curr_range = iter.next().unwrap();
        }
+
        if !has_issue_link {
            diagnostics.push(Diagnostic::new(MissingTodoLink, tag.range));
        }
@ -400,6 +406,7 @@ fn static_errors(
                trimmed.text_len()
            }
        } else {
+            // TD-002
            diagnostics.push(Diagnostic::new(MissingTodoAuthor, tag.range));

            TextSize::new(0)
@ -411,15 +418,18 @@ fn static_errors(
        if let Some(stripped) = after_colon.strip_prefix(' ') {
            stripped
        } else {
+            // TD-007
            diagnostics.push(Diagnostic::new(MissingSpaceAfterTodoColon, tag.range));
            after_colon
        }
    } else {
+        // TD-004
        diagnostics.push(Diagnostic::new(MissingTodoColon, tag.range));
        ""
    };

    if post_colon.is_empty() {
+        // TD-005
        diagnostics.push(Diagnostic::new(MissingTodoDescription, tag.range));
    }
 }