mirror of https://github.com/astral-sh/ruff
Move most of token-based rules to use `TokenKind` (#11420)
## Summary This PR moves the following rules to use `TokenKind` instead of `Tok`: * `PLE2510`, `PLE2512`, `PLE2513`, `PLE2514`, `PLE2515` * `E701`, `E702`, `E703` * `ISC001`, `ISC002` * `COM812`, `COM818`, `COM819` * `W391` I've paused here because the next set of rules (`pyupgrade::rules::extraneous_parentheses`) indexes into the token slice but we only have an iterator implementation. So, I want to isolate that change to make sure the logic is still the same when I move to using the iterator approach. This is part of #11401 ## Test Plan `cargo test`
This commit is contained in:
parent
c17193b5f8
commit
bb1c107afd
|
|
@ -86,8 +86,8 @@ pub(crate) fn check_tokens(
|
|||
Rule::InvalidCharacterNul,
|
||||
Rule::InvalidCharacterZeroWidthSpace,
|
||||
]) {
|
||||
for (tok, range) in tokens.iter().flatten() {
|
||||
pylint::rules::invalid_string_characters(&mut diagnostics, tok, *range, locator);
|
||||
for (token, range) in tokens.kinds() {
|
||||
pylint::rules::invalid_string_characters(&mut diagnostics, token, range, locator);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -98,7 +98,7 @@ pub(crate) fn check_tokens(
|
|||
]) {
|
||||
pycodestyle::rules::compound_statements(
|
||||
&mut diagnostics,
|
||||
tokens,
|
||||
tokens.kinds(),
|
||||
locator,
|
||||
indexer,
|
||||
source_type,
|
||||
|
|
@ -112,7 +112,7 @@ pub(crate) fn check_tokens(
|
|||
]) {
|
||||
flake8_implicit_str_concat::rules::implicit(
|
||||
&mut diagnostics,
|
||||
tokens,
|
||||
tokens.kinds(),
|
||||
settings,
|
||||
locator,
|
||||
indexer,
|
||||
|
|
@ -124,7 +124,7 @@ pub(crate) fn check_tokens(
|
|||
Rule::TrailingCommaOnBareTuple,
|
||||
Rule::ProhibitedTrailingComma,
|
||||
]) {
|
||||
flake8_commas::rules::trailing_commas(&mut diagnostics, tokens, locator, indexer);
|
||||
flake8_commas::rules::trailing_commas(&mut diagnostics, tokens.kinds(), locator, indexer);
|
||||
}
|
||||
|
||||
if settings.rules.enabled(Rule::ExtraneousParentheses) {
|
||||
|
|
@ -172,7 +172,7 @@ pub(crate) fn check_tokens(
|
|||
}
|
||||
|
||||
if settings.rules.enabled(Rule::TooManyNewlinesAtEndOfFile) {
|
||||
pycodestyle::rules::too_many_newlines_at_end_of_file(&mut diagnostics, tokens);
|
||||
pycodestyle::rules::too_many_newlines_at_end_of_file(&mut diagnostics, tokens.kinds());
|
||||
}
|
||||
|
||||
diagnostics.retain(|diagnostic| settings.rules.enabled(diagnostic.kind.rule()));
|
||||
|
|
|
|||
|
|
@ -2,8 +2,7 @@ use ruff_diagnostics::{AlwaysFixableViolation, Violation};
|
|||
use ruff_diagnostics::{Diagnostic, Edit, Fix};
|
||||
use ruff_macros::{derive_message_formats, violation};
|
||||
use ruff_python_index::Indexer;
|
||||
use ruff_python_parser::lexer::LexResult;
|
||||
use ruff_python_parser::Tok;
|
||||
use ruff_python_parser::{TokenKind, TokenKindIter};
|
||||
use ruff_source_file::Locator;
|
||||
use ruff_text_size::{Ranged, TextRange};
|
||||
|
||||
|
|
@ -52,26 +51,26 @@ impl Token {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<(&Tok, TextRange)> for Token {
|
||||
fn from((tok, range): (&Tok, TextRange)) -> Self {
|
||||
impl From<(TokenKind, TextRange)> for Token {
|
||||
fn from((tok, range): (TokenKind, TextRange)) -> Self {
|
||||
let ty = match tok {
|
||||
Tok::Name { .. } => TokenType::Named,
|
||||
Tok::String { .. } => TokenType::String,
|
||||
Tok::Newline => TokenType::Newline,
|
||||
Tok::NonLogicalNewline => TokenType::NonLogicalNewline,
|
||||
Tok::Lpar => TokenType::OpeningBracket,
|
||||
Tok::Rpar => TokenType::ClosingBracket,
|
||||
Tok::Lsqb => TokenType::OpeningSquareBracket,
|
||||
Tok::Rsqb => TokenType::ClosingBracket,
|
||||
Tok::Colon => TokenType::Colon,
|
||||
Tok::Comma => TokenType::Comma,
|
||||
Tok::Lbrace => TokenType::OpeningCurlyBracket,
|
||||
Tok::Rbrace => TokenType::ClosingBracket,
|
||||
Tok::Def => TokenType::Def,
|
||||
Tok::For => TokenType::For,
|
||||
Tok::Lambda => TokenType::Lambda,
|
||||
TokenKind::Name => TokenType::Named,
|
||||
TokenKind::String => TokenType::String,
|
||||
TokenKind::Newline => TokenType::Newline,
|
||||
TokenKind::NonLogicalNewline => TokenType::NonLogicalNewline,
|
||||
TokenKind::Lpar => TokenType::OpeningBracket,
|
||||
TokenKind::Rpar => TokenType::ClosingBracket,
|
||||
TokenKind::Lsqb => TokenType::OpeningSquareBracket,
|
||||
TokenKind::Rsqb => TokenType::ClosingBracket,
|
||||
TokenKind::Colon => TokenType::Colon,
|
||||
TokenKind::Comma => TokenType::Comma,
|
||||
TokenKind::Lbrace => TokenType::OpeningCurlyBracket,
|
||||
TokenKind::Rbrace => TokenType::ClosingBracket,
|
||||
TokenKind::Def => TokenType::Def,
|
||||
TokenKind::For => TokenType::For,
|
||||
TokenKind::Lambda => TokenType::Lambda,
|
||||
// Import treated like a function.
|
||||
Tok::Import => TokenType::Named,
|
||||
TokenKind::Import => TokenType::Named,
|
||||
_ => TokenType::Irrelevant,
|
||||
};
|
||||
#[allow(clippy::inconsistent_struct_constructor)]
|
||||
|
|
@ -227,27 +226,23 @@ impl AlwaysFixableViolation for ProhibitedTrailingComma {
|
|||
/// COM812, COM818, COM819
|
||||
pub(crate) fn trailing_commas(
|
||||
diagnostics: &mut Vec<Diagnostic>,
|
||||
tokens: &[LexResult],
|
||||
tokens: TokenKindIter,
|
||||
locator: &Locator,
|
||||
indexer: &Indexer,
|
||||
) {
|
||||
let mut fstrings = 0u32;
|
||||
let tokens = tokens.iter().filter_map(|result| {
|
||||
let Ok((tok, tok_range)) = result else {
|
||||
return None;
|
||||
};
|
||||
|
||||
match tok {
|
||||
let tokens = tokens.filter_map(|(token, tok_range)| {
|
||||
match token {
|
||||
// Completely ignore comments -- they just interfere with the logic.
|
||||
Tok::Comment(_) => None,
|
||||
TokenKind::Comment => None,
|
||||
// F-strings are handled as `String` token type with the complete range
|
||||
// of the outermost f-string. This means that the expression inside the
|
||||
// f-string is not checked for trailing commas.
|
||||
Tok::FStringStart(_) => {
|
||||
TokenKind::FStringStart => {
|
||||
fstrings = fstrings.saturating_add(1);
|
||||
None
|
||||
}
|
||||
Tok::FStringEnd => {
|
||||
TokenKind::FStringEnd => {
|
||||
fstrings = fstrings.saturating_sub(1);
|
||||
if fstrings == 0 {
|
||||
indexer
|
||||
|
|
@ -260,7 +255,7 @@ pub(crate) fn trailing_commas(
|
|||
}
|
||||
_ => {
|
||||
if fstrings == 0 {
|
||||
Some(Token::from((tok, *tok_range)))
|
||||
Some(Token::from((token, tok_range)))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,10 +4,9 @@ use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation};
|
|||
use ruff_macros::{derive_message_formats, violation};
|
||||
use ruff_python_ast::str::{leading_quote, trailing_quote};
|
||||
use ruff_python_index::Indexer;
|
||||
use ruff_python_parser::lexer::LexResult;
|
||||
use ruff_python_parser::Tok;
|
||||
use ruff_python_parser::{TokenKind, TokenKindIter};
|
||||
use ruff_source_file::Locator;
|
||||
use ruff_text_size::{Ranged, TextRange};
|
||||
use ruff_text_size::TextRange;
|
||||
|
||||
use crate::settings::LinterSettings;
|
||||
|
||||
|
|
@ -93,36 +92,34 @@ impl Violation for MultiLineImplicitStringConcatenation {
|
|||
/// ISC001, ISC002
|
||||
pub(crate) fn implicit(
|
||||
diagnostics: &mut Vec<Diagnostic>,
|
||||
tokens: &[LexResult],
|
||||
tokens: TokenKindIter,
|
||||
settings: &LinterSettings,
|
||||
locator: &Locator,
|
||||
indexer: &Indexer,
|
||||
) {
|
||||
for ((a_tok, a_range), (b_tok, b_range)) in tokens
|
||||
.iter()
|
||||
.flatten()
|
||||
.filter(|(tok, _)| {
|
||||
!tok.is_comment()
|
||||
.filter(|(token, _)| {
|
||||
*token != TokenKind::Comment
|
||||
&& (settings.flake8_implicit_str_concat.allow_multiline
|
||||
|| !tok.is_non_logical_newline())
|
||||
|| *token != TokenKind::NonLogicalNewline)
|
||||
})
|
||||
.tuple_windows()
|
||||
{
|
||||
let (a_range, b_range) = match (a_tok, b_tok) {
|
||||
(Tok::String { .. }, Tok::String { .. }) => (*a_range, *b_range),
|
||||
(Tok::String { .. }, Tok::FStringStart(_)) => {
|
||||
(TokenKind::String, TokenKind::String) => (a_range, b_range),
|
||||
(TokenKind::String, TokenKind::FStringStart) => {
|
||||
match indexer.fstring_ranges().innermost(b_range.start()) {
|
||||
Some(b_range) => (*a_range, b_range),
|
||||
Some(b_range) => (a_range, b_range),
|
||||
None => continue,
|
||||
}
|
||||
}
|
||||
(Tok::FStringEnd, Tok::String { .. }) => {
|
||||
(TokenKind::FStringEnd, TokenKind::String) => {
|
||||
match indexer.fstring_ranges().innermost(a_range.start()) {
|
||||
Some(a_range) => (a_range, *b_range),
|
||||
Some(a_range) => (a_range, b_range),
|
||||
None => continue,
|
||||
}
|
||||
}
|
||||
(Tok::FStringEnd, Tok::FStringStart(_)) => {
|
||||
(TokenKind::FStringEnd, TokenKind::FStringStart) => {
|
||||
match (
|
||||
indexer.fstring_ranges().innermost(a_range.start()),
|
||||
indexer.fstring_ranges().innermost(b_range.start()),
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
use ruff_notebook::CellOffsets;
|
||||
use ruff_python_ast::PySourceType;
|
||||
use ruff_python_parser::lexer::LexResult;
|
||||
use ruff_python_parser::Tok;
|
||||
use ruff_python_parser::{TokenKind, TokenKindIter};
|
||||
use ruff_text_size::{TextRange, TextSize};
|
||||
|
||||
use ruff_diagnostics::{AlwaysFixableViolation, Violation};
|
||||
|
|
@ -100,7 +99,7 @@ impl AlwaysFixableViolation for UselessSemicolon {
|
|||
/// E701, E702, E703
|
||||
pub(crate) fn compound_statements(
|
||||
diagnostics: &mut Vec<Diagnostic>,
|
||||
lxr: &[LexResult],
|
||||
mut tokens: TokenKindIter,
|
||||
locator: &Locator,
|
||||
indexer: &Indexer,
|
||||
source_type: PySourceType,
|
||||
|
|
@ -134,39 +133,36 @@ pub(crate) fn compound_statements(
|
|||
// Track indentation.
|
||||
let mut indent = 0u32;
|
||||
|
||||
// Keep the token iterator to perform lookaheads.
|
||||
let mut tokens = lxr.iter().flatten();
|
||||
|
||||
while let Some(&(ref tok, range)) = tokens.next() {
|
||||
match tok {
|
||||
Tok::Lpar => {
|
||||
while let Some((token, range)) = tokens.next() {
|
||||
match token {
|
||||
TokenKind::Lpar => {
|
||||
par_count = par_count.saturating_add(1);
|
||||
}
|
||||
Tok::Rpar => {
|
||||
TokenKind::Rpar => {
|
||||
par_count = par_count.saturating_sub(1);
|
||||
}
|
||||
Tok::Lsqb => {
|
||||
TokenKind::Lsqb => {
|
||||
sqb_count = sqb_count.saturating_add(1);
|
||||
}
|
||||
Tok::Rsqb => {
|
||||
TokenKind::Rsqb => {
|
||||
sqb_count = sqb_count.saturating_sub(1);
|
||||
}
|
||||
Tok::Lbrace => {
|
||||
TokenKind::Lbrace => {
|
||||
brace_count = brace_count.saturating_add(1);
|
||||
}
|
||||
Tok::Rbrace => {
|
||||
TokenKind::Rbrace => {
|
||||
brace_count = brace_count.saturating_sub(1);
|
||||
}
|
||||
Tok::Ellipsis => {
|
||||
TokenKind::Ellipsis => {
|
||||
if allow_ellipsis {
|
||||
allow_ellipsis = false;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Tok::Indent => {
|
||||
TokenKind::Indent => {
|
||||
indent = indent.saturating_add(1);
|
||||
}
|
||||
Tok::Dedent => {
|
||||
TokenKind::Dedent => {
|
||||
indent = indent.saturating_sub(1);
|
||||
}
|
||||
_ => {}
|
||||
|
|
@ -176,8 +172,8 @@ pub(crate) fn compound_statements(
|
|||
continue;
|
||||
}
|
||||
|
||||
match tok {
|
||||
Tok::Newline => {
|
||||
match token {
|
||||
TokenKind::Newline => {
|
||||
if let Some((start, end)) = semi {
|
||||
if !(source_type.is_ipynb()
|
||||
&& indent == 0
|
||||
|
|
@ -215,7 +211,7 @@ pub(crate) fn compound_statements(
|
|||
while_ = None;
|
||||
with = None;
|
||||
}
|
||||
Tok::Colon => {
|
||||
TokenKind::Colon => {
|
||||
if case.is_some()
|
||||
|| class.is_some()
|
||||
|| elif.is_some()
|
||||
|
|
@ -235,11 +231,14 @@ pub(crate) fn compound_statements(
|
|||
allow_ellipsis = true;
|
||||
}
|
||||
}
|
||||
Tok::Semi => {
|
||||
TokenKind::Semi => {
|
||||
semi = Some((range.start(), range.end()));
|
||||
allow_ellipsis = false;
|
||||
}
|
||||
Tok::Comment(..) | Tok::Indent | Tok::Dedent | Tok::NonLogicalNewline => {}
|
||||
TokenKind::Comment
|
||||
| TokenKind::Indent
|
||||
| TokenKind::Dedent
|
||||
| TokenKind::NonLogicalNewline => {}
|
||||
_ => {
|
||||
if let Some((start, end)) = semi {
|
||||
diagnostics.push(Diagnostic::new(
|
||||
|
|
@ -277,8 +276,8 @@ pub(crate) fn compound_statements(
|
|||
}
|
||||
}
|
||||
|
||||
match tok {
|
||||
Tok::Lambda => {
|
||||
match token {
|
||||
TokenKind::Lambda => {
|
||||
// Reset.
|
||||
colon = None;
|
||||
case = None;
|
||||
|
|
@ -294,40 +293,40 @@ pub(crate) fn compound_statements(
|
|||
while_ = None;
|
||||
with = None;
|
||||
}
|
||||
Tok::Case => {
|
||||
TokenKind::Case => {
|
||||
case = Some((range.start(), range.end()));
|
||||
}
|
||||
Tok::If => {
|
||||
TokenKind::If => {
|
||||
if_ = Some((range.start(), range.end()));
|
||||
}
|
||||
Tok::While => {
|
||||
TokenKind::While => {
|
||||
while_ = Some((range.start(), range.end()));
|
||||
}
|
||||
Tok::For => {
|
||||
TokenKind::For => {
|
||||
for_ = Some((range.start(), range.end()));
|
||||
}
|
||||
Tok::Try => {
|
||||
TokenKind::Try => {
|
||||
try_ = Some((range.start(), range.end()));
|
||||
}
|
||||
Tok::Except => {
|
||||
TokenKind::Except => {
|
||||
except = Some((range.start(), range.end()));
|
||||
}
|
||||
Tok::Finally => {
|
||||
TokenKind::Finally => {
|
||||
finally = Some((range.start(), range.end()));
|
||||
}
|
||||
Tok::Elif => {
|
||||
TokenKind::Elif => {
|
||||
elif = Some((range.start(), range.end()));
|
||||
}
|
||||
Tok::Else => {
|
||||
TokenKind::Else => {
|
||||
else_ = Some((range.start(), range.end()));
|
||||
}
|
||||
Tok::Class => {
|
||||
TokenKind::Class => {
|
||||
class = Some((range.start(), range.end()));
|
||||
}
|
||||
Tok::With => {
|
||||
TokenKind::With => {
|
||||
with = Some((range.start(), range.end()));
|
||||
}
|
||||
Tok::Match => {
|
||||
TokenKind::Match => {
|
||||
match_ = Some((range.start(), range.end()));
|
||||
}
|
||||
_ => {}
|
||||
|
|
@ -337,17 +336,17 @@ pub(crate) fn compound_statements(
|
|||
|
||||
/// Returns `true` if there are any non-trivia tokens from the given token
|
||||
/// iterator till the given end offset.
|
||||
fn has_non_trivia_tokens_till<'a>(
|
||||
tokens: impl Iterator<Item = &'a (Tok, TextRange)>,
|
||||
cell_end: TextSize,
|
||||
) -> bool {
|
||||
for &(ref tok, tok_range) in tokens {
|
||||
fn has_non_trivia_tokens_till(tokens: TokenKindIter, cell_end: TextSize) -> bool {
|
||||
for (token, tok_range) in tokens {
|
||||
if tok_range.start() >= cell_end {
|
||||
return false;
|
||||
}
|
||||
if !matches!(
|
||||
tok,
|
||||
Tok::Newline | Tok::Comment(_) | Tok::EndOfFile | Tok::NonLogicalNewline
|
||||
token,
|
||||
TokenKind::Newline
|
||||
| TokenKind::Comment
|
||||
| TokenKind::EndOfFile
|
||||
| TokenKind::NonLogicalNewline
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix};
|
||||
use ruff_macros::{derive_message_formats, violation};
|
||||
use ruff_python_parser::lexer::LexResult;
|
||||
use ruff_python_parser::Tok;
|
||||
use ruff_python_parser::{TokenKind, TokenKindIter};
|
||||
use ruff_text_size::{TextRange, TextSize};
|
||||
|
||||
/// ## What it does
|
||||
|
|
@ -57,23 +56,23 @@ impl AlwaysFixableViolation for TooManyNewlinesAtEndOfFile {
|
|||
/// W391
|
||||
pub(crate) fn too_many_newlines_at_end_of_file(
|
||||
diagnostics: &mut Vec<Diagnostic>,
|
||||
lxr: &[LexResult],
|
||||
tokens: TokenKindIter,
|
||||
) {
|
||||
let mut num_trailing_newlines = 0u32;
|
||||
let mut start: Option<TextSize> = None;
|
||||
let mut end: Option<TextSize> = None;
|
||||
|
||||
// Count the number of trailing newlines.
|
||||
for (tok, range) in lxr.iter().rev().flatten() {
|
||||
match tok {
|
||||
Tok::NonLogicalNewline | Tok::Newline => {
|
||||
for (token, range) in tokens.rev() {
|
||||
match token {
|
||||
TokenKind::NonLogicalNewline | TokenKind::Newline => {
|
||||
if num_trailing_newlines == 0 {
|
||||
end = Some(range.end());
|
||||
}
|
||||
start = Some(range.end());
|
||||
num_trailing_newlines += 1;
|
||||
}
|
||||
Tok::Dedent => continue,
|
||||
TokenKind::Dedent => continue,
|
||||
_ => {
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ use ruff_diagnostics::AlwaysFixableViolation;
|
|||
use ruff_diagnostics::Edit;
|
||||
use ruff_diagnostics::{Diagnostic, DiagnosticKind, Fix};
|
||||
use ruff_macros::{derive_message_formats, violation};
|
||||
use ruff_python_parser::Tok;
|
||||
use ruff_python_parser::TokenKind;
|
||||
use ruff_source_file::Locator;
|
||||
|
||||
/// ## What it does
|
||||
|
|
@ -174,14 +174,14 @@ impl AlwaysFixableViolation for InvalidCharacterZeroWidthSpace {
|
|||
/// PLE2510, PLE2512, PLE2513, PLE2514, PLE2515
|
||||
pub(crate) fn invalid_string_characters(
|
||||
diagnostics: &mut Vec<Diagnostic>,
|
||||
tok: &Tok,
|
||||
token: TokenKind,
|
||||
range: TextRange,
|
||||
locator: &Locator,
|
||||
) {
|
||||
let text = match tok {
|
||||
let text = match token {
|
||||
// We can't use the `value` field since it's decoded and e.g. for f-strings removed a curly
|
||||
// brace that escaped another curly brace, which would gives us wrong column information.
|
||||
Tok::String { .. } | Tok::FStringMiddle { .. } => locator.slice(range),
|
||||
TokenKind::String | TokenKind::FStringMiddle => locator.slice(range),
|
||||
_ => return,
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue