mirror of
https://github.com/astral-sh/ruff
synced 2026-01-21 05:20:49 -05:00
Skip walking all tokens when loading range suppressions (#22446)
- Adds `Tokens::split_at()` to get tokens before/after an offset. - Updates `Suppressions::load_from_tokens` to take an `Indexer` and use comment ranges to minimize the need for walking tokens looking for indent/dedent. Adapted from https://github.com/astral-sh/ruff/pull/21441#pullrequestreview-3503773083 Fixes #22087
This commit is contained in:
@@ -405,7 +405,8 @@ pub fn add_noqa_to_path(
|
||||
);
|
||||
|
||||
// Parse range suppression comments
|
||||
let suppressions = Suppressions::from_tokens(settings, locator.contents(), parsed.tokens());
|
||||
let suppressions =
|
||||
Suppressions::from_tokens(settings, locator.contents(), parsed.tokens(), &indexer);
|
||||
|
||||
// Generate diagnostics, ignoring any existing `noqa` directives.
|
||||
let diagnostics = check_path(
|
||||
@@ -479,7 +480,8 @@ pub fn lint_only(
|
||||
);
|
||||
|
||||
// Parse range suppression comments
|
||||
let suppressions = Suppressions::from_tokens(settings, locator.contents(), parsed.tokens());
|
||||
let suppressions =
|
||||
Suppressions::from_tokens(settings, locator.contents(), parsed.tokens(), &indexer);
|
||||
|
||||
// Generate diagnostics.
|
||||
let diagnostics = check_path(
|
||||
@@ -596,7 +598,8 @@ pub fn lint_fix<'a>(
|
||||
);
|
||||
|
||||
// Parse range suppression comments
|
||||
let suppressions = Suppressions::from_tokens(settings, locator.contents(), parsed.tokens());
|
||||
let suppressions =
|
||||
Suppressions::from_tokens(settings, locator.contents(), parsed.tokens(), &indexer);
|
||||
|
||||
// Generate diagnostics.
|
||||
let diagnostics = check_path(
|
||||
@@ -978,7 +981,8 @@ mod tests {
|
||||
&locator,
|
||||
&indexer,
|
||||
);
|
||||
let suppressions = Suppressions::from_tokens(settings, locator.contents(), parsed.tokens());
|
||||
let suppressions =
|
||||
Suppressions::from_tokens(settings, locator.contents(), parsed.tokens(), &indexer);
|
||||
let mut diagnostics = check_path(
|
||||
path,
|
||||
None,
|
||||
|
||||
@@ -957,7 +957,7 @@ mod tests {
|
||||
&indexer,
|
||||
);
|
||||
let suppressions =
|
||||
Suppressions::from_tokens(&settings, locator.contents(), parsed.tokens());
|
||||
Suppressions::from_tokens(&settings, locator.contents(), parsed.tokens(), &indexer);
|
||||
let mut messages = check_path(
|
||||
Path::new("<filename>"),
|
||||
None,
|
||||
|
||||
@@ -3,13 +3,13 @@ use core::fmt;
|
||||
use ruff_db::diagnostic::Diagnostic;
|
||||
use ruff_diagnostics::{Edit, Fix};
|
||||
use ruff_python_ast::token::{TokenKind, Tokens};
|
||||
use ruff_python_ast::whitespace::indentation;
|
||||
use ruff_python_index::Indexer;
|
||||
use rustc_hash::FxHashSet;
|
||||
use std::cell::Cell;
|
||||
use std::{error::Error, fmt::Formatter};
|
||||
use thiserror::Error;
|
||||
|
||||
use ruff_python_trivia::Cursor;
|
||||
use ruff_python_trivia::{Cursor, indentation_at_offset};
|
||||
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize, TextSlice};
|
||||
use smallvec::{SmallVec, smallvec};
|
||||
|
||||
@@ -153,10 +153,15 @@ pub struct Suppressions {
|
||||
}
|
||||
|
||||
impl Suppressions {
|
||||
pub fn from_tokens(settings: &LinterSettings, source: &str, tokens: &Tokens) -> Suppressions {
|
||||
pub fn from_tokens(
|
||||
settings: &LinterSettings,
|
||||
source: &str,
|
||||
tokens: &Tokens,
|
||||
indexer: &Indexer,
|
||||
) -> Suppressions {
|
||||
if is_range_suppressions_enabled(settings) {
|
||||
let builder = SuppressionsBuilder::new(source);
|
||||
builder.load_from_tokens(tokens)
|
||||
builder.load_from_tokens(tokens, indexer)
|
||||
} else {
|
||||
Suppressions::default()
|
||||
}
|
||||
@@ -355,7 +360,6 @@ pub(crate) struct SuppressionsBuilder<'a> {
|
||||
|
||||
valid: Vec<Suppression>,
|
||||
invalid: Vec<InvalidSuppression>,
|
||||
errors: Vec<ParseError>,
|
||||
|
||||
pending: Vec<PendingSuppressionComment<'a>>,
|
||||
}
|
||||
@@ -368,74 +372,109 @@ impl<'a> SuppressionsBuilder<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn load_from_tokens(mut self, tokens: &Tokens) -> Suppressions {
|
||||
let default_indent = "";
|
||||
pub(crate) fn load_from_tokens(mut self, tokens: &Tokens, indexer: &Indexer) -> Suppressions {
|
||||
let mut indents: Vec<&str> = vec![];
|
||||
let mut errors = Vec::new();
|
||||
|
||||
// Iterate through tokens, tracking indentation, filtering trailing comments, and then
|
||||
// looking for matching comments from the previous block when reaching a dedent token.
|
||||
for (token_index, token) in tokens.iter().enumerate() {
|
||||
match token.kind() {
|
||||
TokenKind::Indent => {
|
||||
indents.push(self.source.slice(token));
|
||||
}
|
||||
TokenKind::Dedent => {
|
||||
self.match_comments(indents.last().copied().unwrap_or_default(), token.range());
|
||||
indents.pop();
|
||||
}
|
||||
TokenKind::Comment => {
|
||||
let mut parser = SuppressionParser::new(self.source, token.range());
|
||||
match parser.parse_comment() {
|
||||
Ok(comment) => {
|
||||
let indent = indentation(self.source, &comment.range);
|
||||
|
||||
let Some(indent) = indent else {
|
||||
// trailing suppressions are not supported
|
||||
self.invalid.push(InvalidSuppression {
|
||||
kind: InvalidSuppressionKind::Trailing,
|
||||
comment,
|
||||
});
|
||||
continue;
|
||||
};
|
||||
|
||||
// comment matches current block's indentation, or precedes an indent/dedent token
|
||||
if indent == indents.last().copied().unwrap_or_default()
|
||||
|| tokens[token_index..]
|
||||
.iter()
|
||||
.find(|t| !t.kind().is_trivia())
|
||||
.is_some_and(|t| {
|
||||
matches!(t.kind(), TokenKind::Dedent | TokenKind::Indent)
|
||||
})
|
||||
{
|
||||
self.pending
|
||||
.push(PendingSuppressionComment { indent, comment });
|
||||
} else {
|
||||
// weirdly indented? ¯\_(ツ)_/¯
|
||||
self.invalid.push(InvalidSuppression {
|
||||
kind: InvalidSuppressionKind::Indentation,
|
||||
comment,
|
||||
});
|
||||
}
|
||||
}
|
||||
Err(ParseError {
|
||||
kind: ParseErrorKind::NotASuppression,
|
||||
..
|
||||
}) => {}
|
||||
Err(error) => {
|
||||
self.errors.push(error);
|
||||
}
|
||||
let mut suppressions = indexer
|
||||
.comment_ranges()
|
||||
.iter()
|
||||
.copied()
|
||||
.filter_map(|comment_range| {
|
||||
let mut parser = SuppressionParser::new(self.source, comment_range);
|
||||
match parser.parse_comment() {
|
||||
Ok(comment) => Some(comment),
|
||||
Err(ParseError {
|
||||
kind: ParseErrorKind::NotASuppression,
|
||||
..
|
||||
}) => None,
|
||||
Err(error) => {
|
||||
errors.push(error);
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
})
|
||||
.peekable();
|
||||
|
||||
'comments: while let Some(suppression) = suppressions.peek() {
|
||||
indents.clear();
|
||||
|
||||
let (before, after) = tokens.split_at(suppression.range.start());
|
||||
let last_indent = before
|
||||
.iter()
|
||||
.rfind(|token| token.kind() == TokenKind::Indent)
|
||||
.map(|token| self.source.slice(token))
|
||||
.unwrap_or_default();
|
||||
|
||||
indents.push(last_indent);
|
||||
|
||||
// Iterate through tokens, tracking indentation, filtering trailing comments, and then
|
||||
// looking for matching comments from the previous block when reaching a dedent token.
|
||||
for (token_index, token) in after.iter().enumerate() {
|
||||
let current_indent = indents.last().copied().unwrap_or_default();
|
||||
match token.kind() {
|
||||
TokenKind::Indent => {
|
||||
indents.push(self.source.slice(token));
|
||||
}
|
||||
TokenKind::Dedent => {
|
||||
self.match_comments(current_indent, token.range());
|
||||
|
||||
indents.pop();
|
||||
|
||||
if indents.is_empty() || self.pending.is_empty() {
|
||||
continue 'comments;
|
||||
}
|
||||
}
|
||||
TokenKind::Comment => {
|
||||
let Some(suppression) =
|
||||
suppressions.next_if(|suppression| suppression.range == token.range())
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let Some(indent) =
|
||||
indentation_at_offset(suppression.range.start(), self.source)
|
||||
else {
|
||||
// trailing suppressions are not supported
|
||||
self.invalid.push(InvalidSuppression {
|
||||
kind: InvalidSuppressionKind::Trailing,
|
||||
comment: suppression,
|
||||
});
|
||||
continue;
|
||||
};
|
||||
|
||||
// comment matches current block's indentation, or precedes an indent/dedent token
|
||||
if indent == current_indent
|
||||
|| after[token_index..]
|
||||
.iter()
|
||||
.find(|t| !t.kind().is_trivia())
|
||||
.is_some_and(|t| {
|
||||
matches!(t.kind(), TokenKind::Dedent | TokenKind::Indent)
|
||||
})
|
||||
{
|
||||
self.pending.push(PendingSuppressionComment {
|
||||
indent,
|
||||
comment: suppression,
|
||||
});
|
||||
} else {
|
||||
// weirdly indented? ¯\_(ツ)_/¯
|
||||
self.invalid.push(InvalidSuppression {
|
||||
kind: InvalidSuppressionKind::Indentation,
|
||||
comment: suppression,
|
||||
});
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.match_comments(default_indent, TextRange::up_to(self.source.text_len()));
|
||||
self.match_comments("", TextRange::up_to(self.source.text_len()));
|
||||
|
||||
Suppressions {
|
||||
valid: self.valid,
|
||||
invalid: self.invalid,
|
||||
errors: self.errors,
|
||||
errors,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -691,6 +730,7 @@ mod tests {
|
||||
|
||||
use insta::assert_debug_snapshot;
|
||||
use itertools::Itertools;
|
||||
use ruff_python_index::Indexer;
|
||||
use ruff_python_parser::{Mode, ParseOptions, parse};
|
||||
use ruff_text_size::{TextLen, TextRange, TextSize};
|
||||
use similar::DiffableStr;
|
||||
@@ -1585,10 +1625,12 @@ def bar():
|
||||
/// Parse all suppressions and errors in a module for testing
|
||||
fn debug(source: &'_ str) -> DebugSuppressions<'_> {
|
||||
let parsed = parse(source, ParseOptions::from(Mode::Module)).unwrap();
|
||||
let indexer = Indexer::from_tokens(parsed.tokens(), source);
|
||||
let suppressions = Suppressions::from_tokens(
|
||||
&LinterSettings::default().with_preview_mode(),
|
||||
source,
|
||||
parsed.tokens(),
|
||||
&indexer,
|
||||
);
|
||||
DebugSuppressions {
|
||||
source,
|
||||
|
||||
@@ -235,7 +235,8 @@ pub(crate) fn test_contents<'a>(
|
||||
&locator,
|
||||
&indexer,
|
||||
);
|
||||
let suppressions = Suppressions::from_tokens(settings, locator.contents(), parsed.tokens());
|
||||
let suppressions =
|
||||
Suppressions::from_tokens(settings, locator.contents(), parsed.tokens(), &indexer);
|
||||
let messages = check_path(
|
||||
path,
|
||||
path.parent()
|
||||
@@ -303,7 +304,7 @@ pub(crate) fn test_contents<'a>(
|
||||
);
|
||||
|
||||
let suppressions =
|
||||
Suppressions::from_tokens(settings, locator.contents(), parsed.tokens());
|
||||
Suppressions::from_tokens(settings, locator.contents(), parsed.tokens(), &indexer);
|
||||
let fixed_messages = check_path(
|
||||
path,
|
||||
None,
|
||||
|
||||
@@ -185,6 +185,33 @@ impl Tokens {
|
||||
|
||||
after
|
||||
}
|
||||
|
||||
/// Returns a pair of token slices from both before and after the given [`TextSize`] offset.
|
||||
///
|
||||
/// If the given offset is between two tokens, the "before" slice will end just before the
|
||||
/// following token. In other words, if the offset is between the end of previous token and
|
||||
/// start of next token, the "before" slice will end just before the next token. The "after"
|
||||
/// slice will contain the rest of the tokens.
|
||||
///
|
||||
/// Note that the contents of the "after" slice may differ from the results of calling `after()`
|
||||
/// directly, particularly when the given offset occurs on zero-width tokens like `Dedent`.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// If the given offset is inside a token range at any point
|
||||
/// other than the start of the range.
|
||||
pub fn split_at(&self, offset: TextSize) -> (&[Token], &[Token]) {
|
||||
let partition_point = self.partition_point(|token| token.start() < offset);
|
||||
let (before, after) = &self.raw.split_at(partition_point);
|
||||
|
||||
if let Some(last) = before.last() {
|
||||
assert!(
|
||||
offset >= last.end(),
|
||||
"Offset {offset:?} is inside token `{last:?}`"
|
||||
);
|
||||
}
|
||||
(before, after)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> IntoIterator for &'a Tokens {
|
||||
@@ -513,4 +540,72 @@ mod tests {
|
||||
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
|
||||
tokens.in_range(TextRange::new(0.into(), 6.into()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokens_split_at_first_token_start() {
|
||||
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
|
||||
let (before, after) = tokens.split_at(TextSize::new(0));
|
||||
assert_eq!(before.len(), 0);
|
||||
assert_eq!(after.len(), 10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokens_split_at_last_token_end() {
|
||||
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
|
||||
let (before, after) = tokens.split_at(TextSize::new(33));
|
||||
assert_eq!(before.len(), 10);
|
||||
assert_eq!(after.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokens_split_at_inside_gap() {
|
||||
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
|
||||
let (before, after) = tokens.split_at(TextSize::new(13));
|
||||
assert_eq!(before.len(), 6);
|
||||
assert_eq!(after.len(), 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "Offset 18 is inside token `Comment 15..24`")]
|
||||
fn tokens_split_at_inside_token() {
|
||||
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
|
||||
tokens.split_at(TextSize::new(18));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokens_split_at_matches_before_and_after() {
|
||||
let offset = TextSize::new(15);
|
||||
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
|
||||
let (before, after) = tokens.split_at(offset);
|
||||
assert_eq!(before, tokens.before(offset));
|
||||
assert_eq!(after, tokens.after(offset));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "Contents of after slice different when offset at dedent")]
|
||||
fn tokens_split_at_matches_before_and_after_zero_length() {
|
||||
let offset = TextSize::new(13);
|
||||
let tokens = new_tokens(
|
||||
[
|
||||
(TokenKind::If, 0..2),
|
||||
(TokenKind::Name, 3..4),
|
||||
(TokenKind::Colon, 4..5),
|
||||
(TokenKind::Newline, 5..6),
|
||||
(TokenKind::Indent, 6..7),
|
||||
(TokenKind::Pass, 7..11),
|
||||
(TokenKind::Newline, 11..12),
|
||||
(TokenKind::NonLogicalNewline, 12..13),
|
||||
(TokenKind::Dedent, 13..13),
|
||||
(TokenKind::Name, 13..14),
|
||||
(TokenKind::Newline, 14..14),
|
||||
]
|
||||
.into_iter(),
|
||||
);
|
||||
let (before, after) = tokens.split_at(offset);
|
||||
assert_eq!(before, tokens.before(offset));
|
||||
assert!(
|
||||
after == tokens.after(offset),
|
||||
"Contents of after slice different when offset at dedent"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -120,8 +120,12 @@ pub(crate) fn check(
|
||||
let directives = extract_directives(parsed.tokens(), Flags::all(), &locator, &indexer);
|
||||
|
||||
// Parse range suppression comments
|
||||
let suppressions =
|
||||
Suppressions::from_tokens(&settings.linter, locator.contents(), parsed.tokens());
|
||||
let suppressions = Suppressions::from_tokens(
|
||||
&settings.linter,
|
||||
locator.contents(),
|
||||
parsed.tokens(),
|
||||
&indexer,
|
||||
);
|
||||
|
||||
// Generate checks.
|
||||
let diagnostics = check_path(
|
||||
|
||||
@@ -241,8 +241,12 @@ impl Workspace {
|
||||
&indexer,
|
||||
);
|
||||
|
||||
let suppressions =
|
||||
Suppressions::from_tokens(&self.settings.linter, locator.contents(), parsed.tokens());
|
||||
let suppressions = Suppressions::from_tokens(
|
||||
&self.settings.linter,
|
||||
locator.contents(),
|
||||
parsed.tokens(),
|
||||
&indexer,
|
||||
);
|
||||
|
||||
// Generate checks.
|
||||
let diagnostics = check_path(
|
||||
|
||||
Reference in New Issue
Block a user