From 4667d8697c40f7739f9e280b7b1ba86ddf6dbab7 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Fri, 21 Jun 2024 15:32:40 +0530 Subject: [PATCH] Remove duplication around `is_trivia` functions (#11956) ## Summary This PR removes the duplication around `is_trivia` functions. There are two of them in the codebase: 1. In `pycodestyle`, it's for newline, indent, dedent, non-logical newline and comment 2. In the parser, it's for non-logical newline and comment The `TokenKind::is_trivia` method used (1) but that's not correct in that context. So, this PR introduces a new `is_non_logical_token` helper method for the `pycodestyle` crate and updates the `TokenKind::is_trivia` implementation with (2). This also means we can remove `Token::is_trivia` method and the standalone `token_source::is_trivia` function and use the one on `TokenKind`. ## Test Plan `cargo insta test` --- .../src/rules/pycodestyle/helpers.rs | 14 ++++++++ .../rules/pycodestyle/rules/blank_lines.rs | 9 ++--- .../missing_whitespace_around_operator.rs | 11 +++--- .../pycodestyle/rules/logical_lines/mod.rs | 35 ++++--------------- .../rules/invalid_literal_comparisons.rs | 2 +- crates/ruff_python_parser/src/lexer.rs | 6 ---- crates/ruff_python_parser/src/token.rs | 15 ++++---- crates/ruff_python_parser/src/token_source.rs | 8 ++--- 8 files changed, 41 insertions(+), 59 deletions(-) diff --git a/crates/ruff_linter/src/rules/pycodestyle/helpers.rs b/crates/ruff_linter/src/rules/pycodestyle/helpers.rs index 2b39a6dad1..a3ba640560 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/helpers.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/helpers.rs @@ -1,4 +1,18 @@ +use ruff_python_parser::TokenKind; + /// Returns `true` if the name should be considered "ambiguous". pub(super) fn is_ambiguous_name(name: &str) -> bool { name == "l" || name == "I" || name == "O" } + +/// Returns `true` if the given `token` is a non-logical token. +/// +/// Unlike [`TokenKind::is_trivia`], this function also considers the indent, dedent and newline +/// tokens. +pub(super) const fn is_non_logical_token(token: TokenKind) -> bool { + token.is_trivia() + || matches!( + token, + TokenKind::Newline | TokenKind::Indent | TokenKind::Dedent + ) +} diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/blank_lines.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/blank_lines.rs index 172ff40e5b..49f25809bb 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/blank_lines.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/blank_lines.rs @@ -15,13 +15,14 @@ use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::PySourceType; use ruff_python_codegen::Stylist; use ruff_python_parser::TokenKind; +use ruff_python_trivia::PythonWhitespace; use ruff_source_file::{Locator, UniversalNewlines}; use ruff_text_size::TextRange; use ruff_text_size::TextSize; use crate::checkers::logical_lines::expand_indent; use crate::line_width::IndentWidth; -use ruff_python_trivia::PythonWhitespace; +use crate::rules::pycodestyle::helpers::is_non_logical_token; /// Number of blank lines around top level classes and functions. const BLANK_LINES_TOP_LEVEL: u32 = 2; @@ -489,13 +490,13 @@ impl<'a> Iterator for LinePreprocessor<'a> { (logical_line_kind, range) }; - if !kind.is_trivia() { + if !is_non_logical_token(kind) { line_is_comment_only = false; } // A docstring line is composed only of the docstring (TokenKind::String) and trivia tokens. // (If a comment follows a docstring, we still count the line as a docstring) - if kind != TokenKind::String && !kind.is_trivia() { + if kind != TokenKind::String && !is_non_logical_token(kind) { is_docstring = false; } @@ -545,7 +546,7 @@ impl<'a> Iterator for LinePreprocessor<'a> { _ => {} } - if !kind.is_trivia() { + if !is_non_logical_token(kind) { last_token = kind; } } diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_around_operator.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_around_operator.rs index ba1c3712fd..1f3236e315 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_around_operator.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_around_operator.rs @@ -4,6 +4,7 @@ use ruff_python_parser::TokenKind; use ruff_text_size::Ranged; use crate::checkers::logical_lines::LogicalLinesContext; +use crate::rules::pycodestyle::helpers::is_non_logical_token; use crate::rules::pycodestyle::rules::logical_lines::LogicalLine; /// ## What it does @@ -146,7 +147,9 @@ pub(crate) fn missing_whitespace_around_operator( context: &mut LogicalLinesContext, ) { let mut tokens = line.tokens().iter().peekable(); - let first_token = tokens.by_ref().find(|token| !token.kind().is_trivia()); + let first_token = tokens + .by_ref() + .find(|token| !is_non_logical_token(token.kind())); let Some(mut prev_token) = first_token else { return; }; @@ -159,7 +162,7 @@ pub(crate) fn missing_whitespace_around_operator( while let Some(token) = tokens.next() { let kind = token.kind(); - if kind.is_trivia() { + if is_non_logical_token(kind) { continue; } @@ -234,10 +237,10 @@ pub(crate) fn missing_whitespace_around_operator( if needs_space != NeedsSpace::No { let has_leading_trivia = - prev_token.end() < token.start() || prev_token.kind().is_trivia(); + prev_token.end() < token.start() || is_non_logical_token(prev_token.kind()); let has_trailing_trivia = tokens.peek().map_or(true, |next| { - token.end() < next.start() || next.kind().is_trivia() + token.end() < next.start() || is_non_logical_token(next.kind()) }); match (has_leading_trivia, has_trailing_trivia) { diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/mod.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/mod.rs index a483187e57..f7ca644f4b 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/mod.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/mod.rs @@ -20,6 +20,8 @@ use ruff_python_parser::{TokenKind, Tokens}; use ruff_python_trivia::is_python_whitespace; use ruff_source_file::Locator; +use crate::rules::pycodestyle::helpers::is_non_logical_token; + mod extraneous_whitespace; mod indentation; mod missing_whitespace; @@ -167,32 +169,14 @@ impl<'a> LogicalLine<'a> { let start = tokens .iter() - .position(|t| { - !matches!( - t.kind(), - TokenKind::Newline - | TokenKind::NonLogicalNewline - | TokenKind::Indent - | TokenKind::Dedent - | TokenKind::Comment, - ) - }) + .position(|t| !is_non_logical_token(t.kind())) .unwrap_or(tokens.len()); let tokens = &tokens[start..]; let end = tokens .iter() - .rposition(|t| { - !matches!( - t.kind(), - TokenKind::Newline - | TokenKind::NonLogicalNewline - | TokenKind::Indent - | TokenKind::Dedent - | TokenKind::Comment, - ) - }) + .rposition(|t| !is_non_logical_token(t.kind())) .map_or(0, |pos| pos + 1); &tokens[..end] @@ -447,14 +431,7 @@ impl LogicalLinesBuilder { line.flags.insert(TokenFlags::KEYWORD); } - if !matches!( - kind, - TokenKind::Comment - | TokenKind::Newline - | TokenKind::NonLogicalNewline - | TokenKind::Dedent - | TokenKind::Indent - ) { + if !is_non_logical_token(kind) { line.flags.insert(TokenFlags::NON_TRIVIA); } @@ -468,7 +445,7 @@ impl LogicalLinesBuilder { if self.current_line.tokens_start < end { let is_empty = self.tokens[self.current_line.tokens_start as usize..end as usize] .iter() - .all(|token| token.kind.is_newline()); + .all(|token| token.kind.is_any_newline()); if !is_empty { self.lines.push(Line { flags: self.current_line.flags, diff --git a/crates/ruff_linter/src/rules/pyflakes/rules/invalid_literal_comparisons.rs b/crates/ruff_linter/src/rules/pyflakes/rules/invalid_literal_comparisons.rs index d13dd48607..be201527cd 100644 --- a/crates/ruff_linter/src/rules/pyflakes/rules/invalid_literal_comparisons.rs +++ b/crates/ruff_linter/src/rules/pyflakes/rules/invalid_literal_comparisons.rs @@ -146,7 +146,7 @@ fn locate_cmp_ops(expr: &Expr, tokens: &Tokens) -> Vec { let mut tok_iter = tokens .in_range(expr.range()) .iter() - .filter(|token| !token.is_trivia()) + .filter(|token| !token.kind().is_trivia()) .peekable(); let mut ops: Vec = vec![]; diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index fc6790acaf..46005529d5 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -1626,12 +1626,6 @@ impl Token { (self.kind, self.range) } - /// Returns `true` if this is a trivia token. - #[inline] - pub const fn is_trivia(self) -> bool { - matches!(self.kind, TokenKind::Comment | TokenKind::NonLogicalNewline) - } - /// Returns `true` if this is any kind of string token. const fn is_any_string(self) -> bool { matches!( diff --git a/crates/ruff_python_parser/src/token.rs b/crates/ruff_python_parser/src/token.rs index f9f3fe8bb2..f5c3e6ba8b 100644 --- a/crates/ruff_python_parser/src/token.rs +++ b/crates/ruff_python_parser/src/token.rs @@ -192,13 +192,15 @@ pub enum TokenKind { } impl TokenKind { + /// Returns `true` if this is an end of file token. #[inline] pub const fn is_eof(self) -> bool { matches!(self, TokenKind::EndOfFile) } + /// Returns `true` if this is either a newline or non-logical newline token. #[inline] - pub const fn is_newline(self) -> bool { + pub const fn is_any_newline(self) -> bool { matches!(self, TokenKind::Newline | TokenKind::NonLogicalNewline) } @@ -294,21 +296,16 @@ impl TokenKind { ) } + /// Returns `true` if this is a singleton token i.e., `True`, `False`, or `None`. #[inline] pub const fn is_singleton(self) -> bool { matches!(self, TokenKind::False | TokenKind::True | TokenKind::None) } + /// Returns `true` if this is a trivia token i.e., a comment or a non-logical newline. #[inline] pub const fn is_trivia(&self) -> bool { - matches!( - self, - TokenKind::Newline - | TokenKind::Indent - | TokenKind::Dedent - | TokenKind::NonLogicalNewline - | TokenKind::Comment - ) + matches!(self, TokenKind::Comment | TokenKind::NonLogicalNewline) } #[inline] diff --git a/crates/ruff_python_parser/src/token_source.rs b/crates/ruff_python_parser/src/token_source.rs index 7662999502..2719abdd64 100644 --- a/crates/ruff_python_parser/src/token_source.rs +++ b/crates/ruff_python_parser/src/token_source.rs @@ -114,7 +114,7 @@ impl<'src> TokenSource<'src> { fn do_bump(&mut self) { loop { let kind = self.lexer.next_token(); - if is_trivia(kind) { + if kind.is_trivia() { self.tokens .push(Token::new(kind, self.current_range(), self.current_flags())); continue; @@ -127,7 +127,7 @@ impl<'src> TokenSource<'src> { fn next_non_trivia_token(&mut self) -> TokenKind { loop { let kind = self.lexer.next_token(); - if is_trivia(kind) { + if kind.is_trivia() { continue; } break kind; @@ -187,7 +187,3 @@ fn allocate_tokens_vec(contents: &str) -> Vec { let lower_bound = contents.len().saturating_mul(15) / 100; Vec::with_capacity(lower_bound) } - -fn is_trivia(token: TokenKind) -> bool { - matches!(token, TokenKind::Comment | TokenKind::NonLogicalNewline) -}