Move most of token-based rules to use `TokenKind` (#11420)

## Summary This PR moves the following rules to use `TokenKind` instead of `Tok`: * `PLE2510`, `PLE2512`, `PLE2513`, `PLE2514`, `PLE2515` * `E701`, `E702`, `E703` * `ISC001`, `ISC002` * `COM812`, `COM818`, `COM819` * `W391` I've paused here because the next set of rules (`pyupgrade::rules::extraneous_parentheses`) indexes into the token slice but we only have an iterator implementation. So, I want to isolate that change to make sure the logic is still the same when I move to using the iterator approach. This is part of #11401 ## Test Plan `cargo test`
2024-05-14 22:46:42 +05:30 · 2024-05-14 22:46:42 +05:30 · bb1c107afd
parent c17193b5f8
commit bb1c107afd
6 changed files with 96 additions and 106 deletions
--- a/crates/ruff_linter/src/checkers/tokens.rs
+++ b/crates/ruff_linter/src/checkers/tokens.rs
@ -86,8 +86,8 @@ pub(crate) fn check_tokens(
        Rule::InvalidCharacterNul,
        Rule::InvalidCharacterZeroWidthSpace,
    ]) {
-        for (tok, range) in tokens.iter().flatten() {
-            pylint::rules::invalid_string_characters(&mut diagnostics, tok, *range, locator);
+        for (token, range) in tokens.kinds() {
+            pylint::rules::invalid_string_characters(&mut diagnostics, token, range, locator);
        }
    }

@ -98,7 +98,7 @@ pub(crate) fn check_tokens(
    ]) {
        pycodestyle::rules::compound_statements(
            &mut diagnostics,
-            tokens,
+            tokens.kinds(),
            locator,
            indexer,
            source_type,
@ -112,7 +112,7 @@ pub(crate) fn check_tokens(
    ]) {
        flake8_implicit_str_concat::rules::implicit(
            &mut diagnostics,
-            tokens,
+            tokens.kinds(),
            settings,
            locator,
            indexer,
@ -124,7 +124,7 @@ pub(crate) fn check_tokens(
        Rule::TrailingCommaOnBareTuple,
        Rule::ProhibitedTrailingComma,
    ]) {
-        flake8_commas::rules::trailing_commas(&mut diagnostics, tokens, locator, indexer);
+        flake8_commas::rules::trailing_commas(&mut diagnostics, tokens.kinds(), locator, indexer);
    }

    if settings.rules.enabled(Rule::ExtraneousParentheses) {
@ -172,7 +172,7 @@ pub(crate) fn check_tokens(
    }

    if settings.rules.enabled(Rule::TooManyNewlinesAtEndOfFile) {
-        pycodestyle::rules::too_many_newlines_at_end_of_file(&mut diagnostics, tokens);
+        pycodestyle::rules::too_many_newlines_at_end_of_file(&mut diagnostics, tokens.kinds());
    }

    diagnostics.retain(|diagnostic| settings.rules.enabled(diagnostic.kind.rule()));
--- a/crates/ruff_linter/src/rules/flake8_commas/rules/trailing_commas.rs
+++ b/crates/ruff_linter/src/rules/flake8_commas/rules/trailing_commas.rs
@ -2,8 +2,7 @@ use ruff_diagnostics::{AlwaysFixableViolation, Violation};
 use ruff_diagnostics::{Diagnostic, Edit, Fix};
 use ruff_macros::{derive_message_formats, violation};
 use ruff_python_index::Indexer;
-use ruff_python_parser::lexer::LexResult;
-use ruff_python_parser::Tok;
+use ruff_python_parser::{TokenKind, TokenKindIter};
 use ruff_source_file::Locator;
 use ruff_text_size::{Ranged, TextRange};

@ -52,26 +51,26 @@ impl Token {
    }
 }

-impl From<(&Tok, TextRange)> for Token {
-    fn from((tok, range): (&Tok, TextRange)) -> Self {
+impl From<(TokenKind, TextRange)> for Token {
+    fn from((tok, range): (TokenKind, TextRange)) -> Self {
        let ty = match tok {
-            Tok::Name { .. } => TokenType::Named,
-            Tok::String { .. } => TokenType::String,
-            Tok::Newline => TokenType::Newline,
-            Tok::NonLogicalNewline => TokenType::NonLogicalNewline,
-            Tok::Lpar => TokenType::OpeningBracket,
-            Tok::Rpar => TokenType::ClosingBracket,
-            Tok::Lsqb => TokenType::OpeningSquareBracket,
-            Tok::Rsqb => TokenType::ClosingBracket,
-            Tok::Colon => TokenType::Colon,
-            Tok::Comma => TokenType::Comma,
-            Tok::Lbrace => TokenType::OpeningCurlyBracket,
-            Tok::Rbrace => TokenType::ClosingBracket,
-            Tok::Def => TokenType::Def,
-            Tok::For => TokenType::For,
-            Tok::Lambda => TokenType::Lambda,
+            TokenKind::Name => TokenType::Named,
+            TokenKind::String => TokenType::String,
+            TokenKind::Newline => TokenType::Newline,
+            TokenKind::NonLogicalNewline => TokenType::NonLogicalNewline,
+            TokenKind::Lpar => TokenType::OpeningBracket,
+            TokenKind::Rpar => TokenType::ClosingBracket,
+            TokenKind::Lsqb => TokenType::OpeningSquareBracket,
+            TokenKind::Rsqb => TokenType::ClosingBracket,
+            TokenKind::Colon => TokenType::Colon,
+            TokenKind::Comma => TokenType::Comma,
+            TokenKind::Lbrace => TokenType::OpeningCurlyBracket,
+            TokenKind::Rbrace => TokenType::ClosingBracket,
+            TokenKind::Def => TokenType::Def,
+            TokenKind::For => TokenType::For,
+            TokenKind::Lambda => TokenType::Lambda,
            // Import treated like a function.
-            Tok::Import => TokenType::Named,
+            TokenKind::Import => TokenType::Named,
            _ => TokenType::Irrelevant,
        };
        #[allow(clippy::inconsistent_struct_constructor)]
@ -227,27 +226,23 @@ impl AlwaysFixableViolation for ProhibitedTrailingComma {
 /// COM812, COM818, COM819
 pub(crate) fn trailing_commas(
    diagnostics: &mut Vec<Diagnostic>,
-    tokens: &[LexResult],
+    tokens: TokenKindIter,
    locator: &Locator,
    indexer: &Indexer,
 ) {
    let mut fstrings = 0u32;
-    let tokens = tokens.iter().filter_map(|result| {
-        let Ok((tok, tok_range)) = result else {
-            return None;
-        };
-
-        match tok {
+    let tokens = tokens.filter_map(|(token, tok_range)| {
+        match token {
            // Completely ignore comments -- they just interfere with the logic.
-            Tok::Comment(_) => None,
+            TokenKind::Comment => None,
            // F-strings are handled as `String` token type with the complete range
            // of the outermost f-string. This means that the expression inside the
            // f-string is not checked for trailing commas.
-            Tok::FStringStart(_) => {
+            TokenKind::FStringStart => {
                fstrings = fstrings.saturating_add(1);
                None
            }
-            Tok::FStringEnd => {
+            TokenKind::FStringEnd => {
                fstrings = fstrings.saturating_sub(1);
                if fstrings == 0 {
                    indexer
@ -260,7 +255,7 @@ pub(crate) fn trailing_commas(
            }
            _ => {
                if fstrings == 0 {
-                    Some(Token::from((tok, *tok_range)))
+                    Some(Token::from((token, tok_range)))
                } else {
                    None
                }
--- a/crates/ruff_linter/src/rules/flake8_implicit_str_concat/rules/implicit.rs
+++ b/crates/ruff_linter/src/rules/flake8_implicit_str_concat/rules/implicit.rs
@ -4,10 +4,9 @@ use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation};
 use ruff_macros::{derive_message_formats, violation};
 use ruff_python_ast::str::{leading_quote, trailing_quote};
 use ruff_python_index::Indexer;
-use ruff_python_parser::lexer::LexResult;
-use ruff_python_parser::Tok;
+use ruff_python_parser::{TokenKind, TokenKindIter};
 use ruff_source_file::Locator;
-use ruff_text_size::{Ranged, TextRange};
+use ruff_text_size::TextRange;

 use crate::settings::LinterSettings;

@ -93,36 +92,34 @@ impl Violation for MultiLineImplicitStringConcatenation {
 /// ISC001, ISC002
 pub(crate) fn implicit(
    diagnostics: &mut Vec<Diagnostic>,
-    tokens: &[LexResult],
+    tokens: TokenKindIter,
    settings: &LinterSettings,
    locator: &Locator,
    indexer: &Indexer,
 ) {
    for ((a_tok, a_range), (b_tok, b_range)) in tokens
-        .iter()
-        .flatten()
-        .filter(|(tok, _)| {
-            !tok.is_comment()
+        .filter(|(token, _)| {
+            *token != TokenKind::Comment
                && (settings.flake8_implicit_str_concat.allow_multiline
-                    || !tok.is_non_logical_newline())
+                    || *token != TokenKind::NonLogicalNewline)
        })
        .tuple_windows()
    {
        let (a_range, b_range) = match (a_tok, b_tok) {
-            (Tok::String { .. }, Tok::String { .. }) => (*a_range, *b_range),
-            (Tok::String { .. }, Tok::FStringStart(_)) => {
+            (TokenKind::String, TokenKind::String) => (a_range, b_range),
+            (TokenKind::String, TokenKind::FStringStart) => {
                match indexer.fstring_ranges().innermost(b_range.start()) {
-                    Some(b_range) => (*a_range, b_range),
+                    Some(b_range) => (a_range, b_range),
                    None => continue,
                }
            }
-            (Tok::FStringEnd, Tok::String { .. }) => {
+            (TokenKind::FStringEnd, TokenKind::String) => {
                match indexer.fstring_ranges().innermost(a_range.start()) {
-                    Some(a_range) => (a_range, *b_range),
+                    Some(a_range) => (a_range, b_range),
                    None => continue,
                }
            }
-            (Tok::FStringEnd, Tok::FStringStart(_)) => {
+            (TokenKind::FStringEnd, TokenKind::FStringStart) => {
                match (
                    indexer.fstring_ranges().innermost(a_range.start()),
                    indexer.fstring_ranges().innermost(b_range.start()),
--- a/crates/ruff_linter/src/rules/pycodestyle/rules/compound_statements.rs
+++ b/crates/ruff_linter/src/rules/pycodestyle/rules/compound_statements.rs
@ -1,7 +1,6 @@
 use ruff_notebook::CellOffsets;
 use ruff_python_ast::PySourceType;
-use ruff_python_parser::lexer::LexResult;
-use ruff_python_parser::Tok;
+use ruff_python_parser::{TokenKind, TokenKindIter};
 use ruff_text_size::{TextRange, TextSize};

 use ruff_diagnostics::{AlwaysFixableViolation, Violation};
@ -100,7 +99,7 @@ impl AlwaysFixableViolation for UselessSemicolon {
 /// E701, E702, E703
 pub(crate) fn compound_statements(
    diagnostics: &mut Vec<Diagnostic>,
-    lxr: &[LexResult],
+    mut tokens: TokenKindIter,
    locator: &Locator,
    indexer: &Indexer,
    source_type: PySourceType,
@ -134,39 +133,36 @@ pub(crate) fn compound_statements(
    // Track indentation.
    let mut indent = 0u32;

-    // Keep the token iterator to perform lookaheads.
-    let mut tokens = lxr.iter().flatten();
-
-    while let Some(&(ref tok, range)) = tokens.next() {
-        match tok {
-            Tok::Lpar => {
+    while let Some((token, range)) = tokens.next() {
+        match token {
+            TokenKind::Lpar => {
                par_count = par_count.saturating_add(1);
            }
-            Tok::Rpar => {
+            TokenKind::Rpar => {
                par_count = par_count.saturating_sub(1);
            }
-            Tok::Lsqb => {
+            TokenKind::Lsqb => {
                sqb_count = sqb_count.saturating_add(1);
            }
-            Tok::Rsqb => {
+            TokenKind::Rsqb => {
                sqb_count = sqb_count.saturating_sub(1);
            }
-            Tok::Lbrace => {
+            TokenKind::Lbrace => {
                brace_count = brace_count.saturating_add(1);
            }
-            Tok::Rbrace => {
+            TokenKind::Rbrace => {
                brace_count = brace_count.saturating_sub(1);
            }
-            Tok::Ellipsis => {
+            TokenKind::Ellipsis => {
                if allow_ellipsis {
                    allow_ellipsis = false;
                    continue;
                }
            }
-            Tok::Indent => {
+            TokenKind::Indent => {
                indent = indent.saturating_add(1);
            }
-            Tok::Dedent => {
+            TokenKind::Dedent => {
                indent = indent.saturating_sub(1);
            }
            _ => {}
@ -176,8 +172,8 @@ pub(crate) fn compound_statements(
            continue;
        }

-        match tok {
-            Tok::Newline => {
+        match token {
+            TokenKind::Newline => {
                if let Some((start, end)) = semi {
                    if !(source_type.is_ipynb()
                        && indent == 0
@ -215,7 +211,7 @@ pub(crate) fn compound_statements(
                while_ = None;
                with = None;
            }
-            Tok::Colon => {
+            TokenKind::Colon => {
                if case.is_some()
                    || class.is_some()
                    || elif.is_some()
@ -235,11 +231,14 @@ pub(crate) fn compound_statements(
                    allow_ellipsis = true;
                }
            }
-            Tok::Semi => {
+            TokenKind::Semi => {
                semi = Some((range.start(), range.end()));
                allow_ellipsis = false;
            }
-            Tok::Comment(..) | Tok::Indent | Tok::Dedent | Tok::NonLogicalNewline => {}
+            TokenKind::Comment
+            | TokenKind::Indent
+            | TokenKind::Dedent
+            | TokenKind::NonLogicalNewline => {}
            _ => {
                if let Some((start, end)) = semi {
                    diagnostics.push(Diagnostic::new(
@ -277,8 +276,8 @@ pub(crate) fn compound_statements(
            }
        }

-        match tok {
-            Tok::Lambda => {
+        match token {
+            TokenKind::Lambda => {
                // Reset.
                colon = None;
                case = None;
@ -294,40 +293,40 @@ pub(crate) fn compound_statements(
                while_ = None;
                with = None;
            }
-            Tok::Case => {
+            TokenKind::Case => {
                case = Some((range.start(), range.end()));
            }
-            Tok::If => {
+            TokenKind::If => {
                if_ = Some((range.start(), range.end()));
            }
-            Tok::While => {
+            TokenKind::While => {
                while_ = Some((range.start(), range.end()));
            }
-            Tok::For => {
+            TokenKind::For => {
                for_ = Some((range.start(), range.end()));
            }
-            Tok::Try => {
+            TokenKind::Try => {
                try_ = Some((range.start(), range.end()));
            }
-            Tok::Except => {
+            TokenKind::Except => {
                except = Some((range.start(), range.end()));
            }
-            Tok::Finally => {
+            TokenKind::Finally => {
                finally = Some((range.start(), range.end()));
            }
-            Tok::Elif => {
+            TokenKind::Elif => {
                elif = Some((range.start(), range.end()));
            }
-            Tok::Else => {
+            TokenKind::Else => {
                else_ = Some((range.start(), range.end()));
            }
-            Tok::Class => {
+            TokenKind::Class => {
                class = Some((range.start(), range.end()));
            }
-            Tok::With => {
+            TokenKind::With => {
                with = Some((range.start(), range.end()));
            }
-            Tok::Match => {
+            TokenKind::Match => {
                match_ = Some((range.start(), range.end()));
            }
            _ => {}
@ -337,17 +336,17 @@ pub(crate) fn compound_statements(

 /// Returns `true` if there are any non-trivia tokens from the given token
 /// iterator till the given end offset.
-fn has_non_trivia_tokens_till<'a>(
-    tokens: impl Iterator<Item = &'a (Tok, TextRange)>,
-    cell_end: TextSize,
-) -> bool {
-    for &(ref tok, tok_range) in tokens {
+fn has_non_trivia_tokens_till(tokens: TokenKindIter, cell_end: TextSize) -> bool {
+    for (token, tok_range) in tokens {
        if tok_range.start() >= cell_end {
            return false;
        }
        if !matches!(
-            tok,
-            Tok::Newline | Tok::Comment(_) | Tok::EndOfFile | Tok::NonLogicalNewline
+            token,
+            TokenKind::Newline
+                | TokenKind::Comment
+                | TokenKind::EndOfFile
+                | TokenKind::NonLogicalNewline
        ) {
            return true;
        }
--- a/crates/ruff_linter/src/rules/pycodestyle/rules/too_many_newlines_at_end_of_file.rs
+++ b/crates/ruff_linter/src/rules/pycodestyle/rules/too_many_newlines_at_end_of_file.rs
@ -1,7 +1,6 @@
 use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix};
 use ruff_macros::{derive_message_formats, violation};
-use ruff_python_parser::lexer::LexResult;
-use ruff_python_parser::Tok;
+use ruff_python_parser::{TokenKind, TokenKindIter};
 use ruff_text_size::{TextRange, TextSize};

 /// ## What it does
@ -57,23 +56,23 @@ impl AlwaysFixableViolation for TooManyNewlinesAtEndOfFile {
 /// W391
 pub(crate) fn too_many_newlines_at_end_of_file(
    diagnostics: &mut Vec<Diagnostic>,
-    lxr: &[LexResult],
+    tokens: TokenKindIter,
 ) {
    let mut num_trailing_newlines = 0u32;
    let mut start: Option<TextSize> = None;
    let mut end: Option<TextSize> = None;

    // Count the number of trailing newlines.
-    for (tok, range) in lxr.iter().rev().flatten() {
-        match tok {
-            Tok::NonLogicalNewline | Tok::Newline => {
+    for (token, range) in tokens.rev() {
+        match token {
+            TokenKind::NonLogicalNewline | TokenKind::Newline => {
                if num_trailing_newlines == 0 {
                    end = Some(range.end());
                }
                start = Some(range.end());
                num_trailing_newlines += 1;
            }
-            Tok::Dedent => continue,
+            TokenKind::Dedent => continue,
            _ => {
                break;
            }
--- a/crates/ruff_linter/src/rules/pylint/rules/invalid_string_characters.rs
+++ b/crates/ruff_linter/src/rules/pylint/rules/invalid_string_characters.rs
@ -4,7 +4,7 @@ use ruff_diagnostics::AlwaysFixableViolation;
 use ruff_diagnostics::Edit;
 use ruff_diagnostics::{Diagnostic, DiagnosticKind, Fix};
 use ruff_macros::{derive_message_formats, violation};
-use ruff_python_parser::Tok;
+use ruff_python_parser::TokenKind;
 use ruff_source_file::Locator;

 /// ## What it does
@ -174,14 +174,14 @@ impl AlwaysFixableViolation for InvalidCharacterZeroWidthSpace {
 /// PLE2510, PLE2512, PLE2513, PLE2514, PLE2515
 pub(crate) fn invalid_string_characters(
    diagnostics: &mut Vec<Diagnostic>,
-    tok: &Tok,
+    token: TokenKind,
    range: TextRange,
    locator: &Locator,
 ) {
-    let text = match tok {
+    let text = match token {
        // We can't use the `value` field since it's decoded and e.g. for f-strings removed a curly
        // brace that escaped another curly brace, which would gives us wrong column information.
-        Tok::String { .. } | Tok::FStringMiddle { .. } => locator.slice(range),
+        TokenKind::String | TokenKind::FStringMiddle => locator.slice(range),
        _ => return,
    };