From 4667d8697c40f7739f9e280b7b1ba86ddf6dbab7 Mon Sep 17 00:00:00 2001
From: Dhruv Manilawala <dhruvmanila@gmail.com>
Date: Fri, 21 Jun 2024 15:32:40 +0530
Subject: [PATCH] Remove duplication around `is_trivia` functions (#11956)

## Summary

This PR removes the duplication around `is_trivia` functions.

There are two of them in the codebase:
1. In `pycodestyle`, it's for newline, indent, dedent, non-logical
newline and comment
2. In the parser, it's for non-logical newline and comment

The `TokenKind::is_trivia` method used (1) but that's not correct in
that context. So, this PR introduces a new `is_non_logical_token` helper
method for the `pycodestyle` crate and updates the
`TokenKind::is_trivia` implementation with (2).

This also means we can remove `Token::is_trivia` method and the
standalone `token_source::is_trivia` function and use the one on
`TokenKind`.

## Test Plan

`cargo insta test`
---
 .../src/rules/pycodestyle/helpers.rs          | 14 ++++++++
 .../rules/pycodestyle/rules/blank_lines.rs    |  9 ++---
 .../missing_whitespace_around_operator.rs     | 11 +++---
 .../pycodestyle/rules/logical_lines/mod.rs    | 35 ++++---------------
 .../rules/invalid_literal_comparisons.rs      |  2 +-
 crates/ruff_python_parser/src/lexer.rs        |  6 ----
 crates/ruff_python_parser/src/token.rs        | 15 ++++----
 crates/ruff_python_parser/src/token_source.rs |  8 ++---
 8 files changed, 41 insertions(+), 59 deletions(-)

diff --git a/crates/ruff_linter/src/rules/pycodestyle/helpers.rs b/crates/ruff_linter/src/rules/pycodestyle/helpers.rs
index 2b39a6dad1..a3ba640560 100644
--- a/crates/ruff_linter/src/rules/pycodestyle/helpers.rs
+++ b/crates/ruff_linter/src/rules/pycodestyle/helpers.rs
@@ -1,4 +1,18 @@
+use ruff_python_parser::TokenKind;
+
 /// Returns `true` if the name should be considered "ambiguous".
 pub(super) fn is_ambiguous_name(name: &str) -> bool {
     name == "l" || name == "I" || name == "O"
 }
+
+/// Returns `true` if the given `token` is a non-logical token.
+///
+/// Unlike [`TokenKind::is_trivia`], this function also considers the indent, dedent and newline
+/// tokens.
+pub(super) const fn is_non_logical_token(token: TokenKind) -> bool {
+    token.is_trivia()
+        || matches!(
+            token,
+            TokenKind::Newline | TokenKind::Indent | TokenKind::Dedent
+        )
+}
diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/blank_lines.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/blank_lines.rs
index 172ff40e5b..49f25809bb 100644
--- a/crates/ruff_linter/src/rules/pycodestyle/rules/blank_lines.rs
+++ b/crates/ruff_linter/src/rules/pycodestyle/rules/blank_lines.rs
@@ -15,13 +15,14 @@ use ruff_macros::{derive_message_formats, violation};
 use ruff_python_ast::PySourceType;
 use ruff_python_codegen::Stylist;
 use ruff_python_parser::TokenKind;
+use ruff_python_trivia::PythonWhitespace;
 use ruff_source_file::{Locator, UniversalNewlines};
 use ruff_text_size::TextRange;
 use ruff_text_size::TextSize;
 
 use crate::checkers::logical_lines::expand_indent;
 use crate::line_width::IndentWidth;
-use ruff_python_trivia::PythonWhitespace;
+use crate::rules::pycodestyle::helpers::is_non_logical_token;
 
 /// Number of blank lines around top level classes and functions.
 const BLANK_LINES_TOP_LEVEL: u32 = 2;
@@ -489,13 +490,13 @@ impl<'a> Iterator for LinePreprocessor<'a> {
                     (logical_line_kind, range)
                 };
 
-            if !kind.is_trivia() {
+            if !is_non_logical_token(kind) {
                 line_is_comment_only = false;
             }
 
             // A docstring line is composed only of the docstring (TokenKind::String) and trivia tokens.
             // (If a comment follows a docstring, we still count the line as a docstring)
-            if kind != TokenKind::String && !kind.is_trivia() {
+            if kind != TokenKind::String && !is_non_logical_token(kind) {
                 is_docstring = false;
             }
 
@@ -545,7 +546,7 @@ impl<'a> Iterator for LinePreprocessor<'a> {
                 _ => {}
             }
 
-            if !kind.is_trivia() {
+            if !is_non_logical_token(kind) {
                 last_token = kind;
             }
         }
diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_around_operator.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_around_operator.rs
index ba1c3712fd..1f3236e315 100644
--- a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_around_operator.rs
+++ b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_around_operator.rs
@@ -4,6 +4,7 @@ use ruff_python_parser::TokenKind;
 use ruff_text_size::Ranged;
 
 use crate::checkers::logical_lines::LogicalLinesContext;
+use crate::rules::pycodestyle::helpers::is_non_logical_token;
 use crate::rules::pycodestyle::rules::logical_lines::LogicalLine;
 
 /// ## What it does
@@ -146,7 +147,9 @@ pub(crate) fn missing_whitespace_around_operator(
     context: &mut LogicalLinesContext,
 ) {
     let mut tokens = line.tokens().iter().peekable();
-    let first_token = tokens.by_ref().find(|token| !token.kind().is_trivia());
+    let first_token = tokens
+        .by_ref()
+        .find(|token| !is_non_logical_token(token.kind()));
     let Some(mut prev_token) = first_token else {
         return;
     };
@@ -159,7 +162,7 @@ pub(crate) fn missing_whitespace_around_operator(
     while let Some(token) = tokens.next() {
         let kind = token.kind();
 
-        if kind.is_trivia() {
+        if is_non_logical_token(kind) {
             continue;
         }
 
@@ -234,10 +237,10 @@ pub(crate) fn missing_whitespace_around_operator(
 
         if needs_space != NeedsSpace::No {
             let has_leading_trivia =
-                prev_token.end() < token.start() || prev_token.kind().is_trivia();
+                prev_token.end() < token.start() || is_non_logical_token(prev_token.kind());
 
             let has_trailing_trivia = tokens.peek().map_or(true, |next| {
-                token.end() < next.start() || next.kind().is_trivia()
+                token.end() < next.start() || is_non_logical_token(next.kind())
             });
 
             match (has_leading_trivia, has_trailing_trivia) {
diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/mod.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/mod.rs
index a483187e57..f7ca644f4b 100644
--- a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/mod.rs
+++ b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/mod.rs
@@ -20,6 +20,8 @@ use ruff_python_parser::{TokenKind, Tokens};
 use ruff_python_trivia::is_python_whitespace;
 use ruff_source_file::Locator;
 
+use crate::rules::pycodestyle::helpers::is_non_logical_token;
+
 mod extraneous_whitespace;
 mod indentation;
 mod missing_whitespace;
@@ -167,32 +169,14 @@ impl<'a> LogicalLine<'a> {
 
         let start = tokens
             .iter()
-            .position(|t| {
-                !matches!(
-                    t.kind(),
-                    TokenKind::Newline
-                        | TokenKind::NonLogicalNewline
-                        | TokenKind::Indent
-                        | TokenKind::Dedent
-                        | TokenKind::Comment,
-                )
-            })
+            .position(|t| !is_non_logical_token(t.kind()))
             .unwrap_or(tokens.len());
 
         let tokens = &tokens[start..];
 
         let end = tokens
             .iter()
-            .rposition(|t| {
-                !matches!(
-                    t.kind(),
-                    TokenKind::Newline
-                        | TokenKind::NonLogicalNewline
-                        | TokenKind::Indent
-                        | TokenKind::Dedent
-                        | TokenKind::Comment,
-                )
-            })
+            .rposition(|t| !is_non_logical_token(t.kind()))
             .map_or(0, |pos| pos + 1);
 
         &tokens[..end]
@@ -447,14 +431,7 @@ impl LogicalLinesBuilder {
             line.flags.insert(TokenFlags::KEYWORD);
         }
 
-        if !matches!(
-            kind,
-            TokenKind::Comment
-                | TokenKind::Newline
-                | TokenKind::NonLogicalNewline
-                | TokenKind::Dedent
-                | TokenKind::Indent
-        ) {
+        if !is_non_logical_token(kind) {
             line.flags.insert(TokenFlags::NON_TRIVIA);
         }
 
@@ -468,7 +445,7 @@ impl LogicalLinesBuilder {
         if self.current_line.tokens_start < end {
             let is_empty = self.tokens[self.current_line.tokens_start as usize..end as usize]
                 .iter()
-                .all(|token| token.kind.is_newline());
+                .all(|token| token.kind.is_any_newline());
             if !is_empty {
                 self.lines.push(Line {
                     flags: self.current_line.flags,
diff --git a/crates/ruff_linter/src/rules/pyflakes/rules/invalid_literal_comparisons.rs b/crates/ruff_linter/src/rules/pyflakes/rules/invalid_literal_comparisons.rs
index d13dd48607..be201527cd 100644
--- a/crates/ruff_linter/src/rules/pyflakes/rules/invalid_literal_comparisons.rs
+++ b/crates/ruff_linter/src/rules/pyflakes/rules/invalid_literal_comparisons.rs
@@ -146,7 +146,7 @@ fn locate_cmp_ops(expr: &Expr, tokens: &Tokens) -> Vec<LocatedCmpOp> {
     let mut tok_iter = tokens
         .in_range(expr.range())
         .iter()
-        .filter(|token| !token.is_trivia())
+        .filter(|token| !token.kind().is_trivia())
         .peekable();
 
     let mut ops: Vec<LocatedCmpOp> = vec![];
diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs
index fc6790acaf..46005529d5 100644
--- a/crates/ruff_python_parser/src/lexer.rs
+++ b/crates/ruff_python_parser/src/lexer.rs
@@ -1626,12 +1626,6 @@ impl Token {
         (self.kind, self.range)
     }
 
-    /// Returns `true` if this is a trivia token.
-    #[inline]
-    pub const fn is_trivia(self) -> bool {
-        matches!(self.kind, TokenKind::Comment | TokenKind::NonLogicalNewline)
-    }
-
     /// Returns `true` if this is any kind of string token.
     const fn is_any_string(self) -> bool {
         matches!(
diff --git a/crates/ruff_python_parser/src/token.rs b/crates/ruff_python_parser/src/token.rs
index f9f3fe8bb2..f5c3e6ba8b 100644
--- a/crates/ruff_python_parser/src/token.rs
+++ b/crates/ruff_python_parser/src/token.rs
@@ -192,13 +192,15 @@ pub enum TokenKind {
 }
 
 impl TokenKind {
+    /// Returns `true` if this is an end of file token.
     #[inline]
     pub const fn is_eof(self) -> bool {
         matches!(self, TokenKind::EndOfFile)
     }
 
+    /// Returns `true` if this is either a newline or non-logical newline token.
     #[inline]
-    pub const fn is_newline(self) -> bool {
+    pub const fn is_any_newline(self) -> bool {
         matches!(self, TokenKind::Newline | TokenKind::NonLogicalNewline)
     }
 
@@ -294,21 +296,16 @@ impl TokenKind {
         )
     }
 
+    /// Returns `true` if this is a singleton token i.e., `True`, `False`, or `None`.
     #[inline]
     pub const fn is_singleton(self) -> bool {
         matches!(self, TokenKind::False | TokenKind::True | TokenKind::None)
     }
 
+    /// Returns `true` if this is a trivia token i.e., a comment or a non-logical newline.
     #[inline]
     pub const fn is_trivia(&self) -> bool {
-        matches!(
-            self,
-            TokenKind::Newline
-                | TokenKind::Indent
-                | TokenKind::Dedent
-                | TokenKind::NonLogicalNewline
-                | TokenKind::Comment
-        )
+        matches!(self, TokenKind::Comment | TokenKind::NonLogicalNewline)
     }
 
     #[inline]
diff --git a/crates/ruff_python_parser/src/token_source.rs b/crates/ruff_python_parser/src/token_source.rs
index 7662999502..2719abdd64 100644
--- a/crates/ruff_python_parser/src/token_source.rs
+++ b/crates/ruff_python_parser/src/token_source.rs
@@ -114,7 +114,7 @@ impl<'src> TokenSource<'src> {
     fn do_bump(&mut self) {
         loop {
             let kind = self.lexer.next_token();
-            if is_trivia(kind) {
+            if kind.is_trivia() {
                 self.tokens
                     .push(Token::new(kind, self.current_range(), self.current_flags()));
                 continue;
@@ -127,7 +127,7 @@ impl<'src> TokenSource<'src> {
     fn next_non_trivia_token(&mut self) -> TokenKind {
         loop {
             let kind = self.lexer.next_token();
-            if is_trivia(kind) {
+            if kind.is_trivia() {
                 continue;
             }
             break kind;
@@ -187,7 +187,3 @@ fn allocate_tokens_vec(contents: &str) -> Vec<Token> {
     let lower_bound = contents.len().saturating_mul(15) / 100;
     Vec::with_capacity(lower_bound)
 }
-
-fn is_trivia(token: TokenKind) -> bool {
-    matches!(token, TokenKind::Comment | TokenKind::NonLogicalNewline)
-}