Skip walking all tokens when loading range suppressions (#22446)

- Adds `Tokens::split_at()` to get tokens before/after an offset.
- Updates `Suppressions::load_from_tokens` to take an `Indexer` and use
comment ranges to minimize the need for walking tokens looking for
indent/dedent.

Adapted from
https://github.com/astral-sh/ruff/pull/21441#pullrequestreview-3503773083

Fixes #22087
This commit is contained in:
Amethyst Reese
2026-01-15 12:35:24 -08:00
committed by GitHub
parent b4b8299d6c
commit c696ef4025
7 changed files with 223 additions and 73 deletions

View File

@@ -405,7 +405,8 @@ pub fn add_noqa_to_path(
);
// Parse range suppression comments
let suppressions = Suppressions::from_tokens(settings, locator.contents(), parsed.tokens());
let suppressions =
Suppressions::from_tokens(settings, locator.contents(), parsed.tokens(), &indexer);
// Generate diagnostics, ignoring any existing `noqa` directives.
let diagnostics = check_path(
@@ -479,7 +480,8 @@ pub fn lint_only(
);
// Parse range suppression comments
let suppressions = Suppressions::from_tokens(settings, locator.contents(), parsed.tokens());
let suppressions =
Suppressions::from_tokens(settings, locator.contents(), parsed.tokens(), &indexer);
// Generate diagnostics.
let diagnostics = check_path(
@@ -596,7 +598,8 @@ pub fn lint_fix<'a>(
);
// Parse range suppression comments
let suppressions = Suppressions::from_tokens(settings, locator.contents(), parsed.tokens());
let suppressions =
Suppressions::from_tokens(settings, locator.contents(), parsed.tokens(), &indexer);
// Generate diagnostics.
let diagnostics = check_path(
@@ -978,7 +981,8 @@ mod tests {
&locator,
&indexer,
);
let suppressions = Suppressions::from_tokens(settings, locator.contents(), parsed.tokens());
let suppressions =
Suppressions::from_tokens(settings, locator.contents(), parsed.tokens(), &indexer);
let mut diagnostics = check_path(
path,
None,

View File

@@ -957,7 +957,7 @@ mod tests {
&indexer,
);
let suppressions =
Suppressions::from_tokens(&settings, locator.contents(), parsed.tokens());
Suppressions::from_tokens(&settings, locator.contents(), parsed.tokens(), &indexer);
let mut messages = check_path(
Path::new("<filename>"),
None,

View File

@@ -3,13 +3,13 @@ use core::fmt;
use ruff_db::diagnostic::Diagnostic;
use ruff_diagnostics::{Edit, Fix};
use ruff_python_ast::token::{TokenKind, Tokens};
use ruff_python_ast::whitespace::indentation;
use ruff_python_index::Indexer;
use rustc_hash::FxHashSet;
use std::cell::Cell;
use std::{error::Error, fmt::Formatter};
use thiserror::Error;
use ruff_python_trivia::Cursor;
use ruff_python_trivia::{Cursor, indentation_at_offset};
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize, TextSlice};
use smallvec::{SmallVec, smallvec};
@@ -153,10 +153,15 @@ pub struct Suppressions {
}
impl Suppressions {
pub fn from_tokens(settings: &LinterSettings, source: &str, tokens: &Tokens) -> Suppressions {
pub fn from_tokens(
settings: &LinterSettings,
source: &str,
tokens: &Tokens,
indexer: &Indexer,
) -> Suppressions {
if is_range_suppressions_enabled(settings) {
let builder = SuppressionsBuilder::new(source);
builder.load_from_tokens(tokens)
builder.load_from_tokens(tokens, indexer)
} else {
Suppressions::default()
}
@@ -355,7 +360,6 @@ pub(crate) struct SuppressionsBuilder<'a> {
valid: Vec<Suppression>,
invalid: Vec<InvalidSuppression>,
errors: Vec<ParseError>,
pending: Vec<PendingSuppressionComment<'a>>,
}
@@ -368,74 +372,109 @@ impl<'a> SuppressionsBuilder<'a> {
}
}
pub(crate) fn load_from_tokens(mut self, tokens: &Tokens) -> Suppressions {
let default_indent = "";
pub(crate) fn load_from_tokens(mut self, tokens: &Tokens, indexer: &Indexer) -> Suppressions {
let mut indents: Vec<&str> = vec![];
let mut errors = Vec::new();
// Iterate through tokens, tracking indentation, filtering trailing comments, and then
// looking for matching comments from the previous block when reaching a dedent token.
for (token_index, token) in tokens.iter().enumerate() {
match token.kind() {
TokenKind::Indent => {
indents.push(self.source.slice(token));
}
TokenKind::Dedent => {
self.match_comments(indents.last().copied().unwrap_or_default(), token.range());
indents.pop();
}
TokenKind::Comment => {
let mut parser = SuppressionParser::new(self.source, token.range());
match parser.parse_comment() {
Ok(comment) => {
let indent = indentation(self.source, &comment.range);
let Some(indent) = indent else {
// trailing suppressions are not supported
self.invalid.push(InvalidSuppression {
kind: InvalidSuppressionKind::Trailing,
comment,
});
continue;
};
// comment matches current block's indentation, or precedes an indent/dedent token
if indent == indents.last().copied().unwrap_or_default()
|| tokens[token_index..]
.iter()
.find(|t| !t.kind().is_trivia())
.is_some_and(|t| {
matches!(t.kind(), TokenKind::Dedent | TokenKind::Indent)
})
{
self.pending
.push(PendingSuppressionComment { indent, comment });
} else {
// weirdly indented? ¯\_(ツ)_/¯
self.invalid.push(InvalidSuppression {
kind: InvalidSuppressionKind::Indentation,
comment,
});
}
}
Err(ParseError {
kind: ParseErrorKind::NotASuppression,
..
}) => {}
Err(error) => {
self.errors.push(error);
}
let mut suppressions = indexer
.comment_ranges()
.iter()
.copied()
.filter_map(|comment_range| {
let mut parser = SuppressionParser::new(self.source, comment_range);
match parser.parse_comment() {
Ok(comment) => Some(comment),
Err(ParseError {
kind: ParseErrorKind::NotASuppression,
..
}) => None,
Err(error) => {
errors.push(error);
None
}
}
_ => {}
})
.peekable();
'comments: while let Some(suppression) = suppressions.peek() {
indents.clear();
let (before, after) = tokens.split_at(suppression.range.start());
let last_indent = before
.iter()
.rfind(|token| token.kind() == TokenKind::Indent)
.map(|token| self.source.slice(token))
.unwrap_or_default();
indents.push(last_indent);
// Iterate through tokens, tracking indentation, filtering trailing comments, and then
// looking for matching comments from the previous block when reaching a dedent token.
for (token_index, token) in after.iter().enumerate() {
let current_indent = indents.last().copied().unwrap_or_default();
match token.kind() {
TokenKind::Indent => {
indents.push(self.source.slice(token));
}
TokenKind::Dedent => {
self.match_comments(current_indent, token.range());
indents.pop();
if indents.is_empty() || self.pending.is_empty() {
continue 'comments;
}
}
TokenKind::Comment => {
let Some(suppression) =
suppressions.next_if(|suppression| suppression.range == token.range())
else {
continue;
};
let Some(indent) =
indentation_at_offset(suppression.range.start(), self.source)
else {
// trailing suppressions are not supported
self.invalid.push(InvalidSuppression {
kind: InvalidSuppressionKind::Trailing,
comment: suppression,
});
continue;
};
// comment matches current block's indentation, or precedes an indent/dedent token
if indent == current_indent
|| after[token_index..]
.iter()
.find(|t| !t.kind().is_trivia())
.is_some_and(|t| {
matches!(t.kind(), TokenKind::Dedent | TokenKind::Indent)
})
{
self.pending.push(PendingSuppressionComment {
indent,
comment: suppression,
});
} else {
// weirdly indented? ¯\_(ツ)_/¯
self.invalid.push(InvalidSuppression {
kind: InvalidSuppressionKind::Indentation,
comment: suppression,
});
}
}
_ => {}
}
}
}
self.match_comments(default_indent, TextRange::up_to(self.source.text_len()));
self.match_comments("", TextRange::up_to(self.source.text_len()));
Suppressions {
valid: self.valid,
invalid: self.invalid,
errors: self.errors,
errors,
}
}
@@ -691,6 +730,7 @@ mod tests {
use insta::assert_debug_snapshot;
use itertools::Itertools;
use ruff_python_index::Indexer;
use ruff_python_parser::{Mode, ParseOptions, parse};
use ruff_text_size::{TextLen, TextRange, TextSize};
use similar::DiffableStr;
@@ -1585,10 +1625,12 @@ def bar():
/// Parse all suppressions and errors in a module for testing
fn debug(source: &'_ str) -> DebugSuppressions<'_> {
let parsed = parse(source, ParseOptions::from(Mode::Module)).unwrap();
let indexer = Indexer::from_tokens(parsed.tokens(), source);
let suppressions = Suppressions::from_tokens(
&LinterSettings::default().with_preview_mode(),
source,
parsed.tokens(),
&indexer,
);
DebugSuppressions {
source,

View File

@@ -235,7 +235,8 @@ pub(crate) fn test_contents<'a>(
&locator,
&indexer,
);
let suppressions = Suppressions::from_tokens(settings, locator.contents(), parsed.tokens());
let suppressions =
Suppressions::from_tokens(settings, locator.contents(), parsed.tokens(), &indexer);
let messages = check_path(
path,
path.parent()
@@ -303,7 +304,7 @@ pub(crate) fn test_contents<'a>(
);
let suppressions =
Suppressions::from_tokens(settings, locator.contents(), parsed.tokens());
Suppressions::from_tokens(settings, locator.contents(), parsed.tokens(), &indexer);
let fixed_messages = check_path(
path,
None,

View File

@@ -185,6 +185,33 @@ impl Tokens {
after
}
/// Returns a pair of token slices from both before and after the given [`TextSize`] offset.
///
/// If the given offset is between two tokens, the "before" slice will end just before the
/// following token. In other words, if the offset is between the end of previous token and
/// start of next token, the "before" slice will end just before the next token. The "after"
/// slice will contain the rest of the tokens.
///
/// Note that the contents of the "after" slice may differ from the results of calling `after()`
/// directly, particularly when the given offset occurs on zero-width tokens like `Dedent`.
///
/// # Panics
///
/// If the given offset is inside a token range at any point
/// other than the start of the range.
pub fn split_at(&self, offset: TextSize) -> (&[Token], &[Token]) {
let partition_point = self.partition_point(|token| token.start() < offset);
let (before, after) = &self.raw.split_at(partition_point);
if let Some(last) = before.last() {
assert!(
offset >= last.end(),
"Offset {offset:?} is inside token `{last:?}`"
);
}
(before, after)
}
}
impl<'a> IntoIterator for &'a Tokens {
@@ -513,4 +540,72 @@ mod tests {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
tokens.in_range(TextRange::new(0.into(), 6.into()));
}
#[test]
fn tokens_split_at_first_token_start() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let (before, after) = tokens.split_at(TextSize::new(0));
assert_eq!(before.len(), 0);
assert_eq!(after.len(), 10);
}
#[test]
fn tokens_split_at_last_token_end() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let (before, after) = tokens.split_at(TextSize::new(33));
assert_eq!(before.len(), 10);
assert_eq!(after.len(), 0);
}
#[test]
fn tokens_split_at_inside_gap() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let (before, after) = tokens.split_at(TextSize::new(13));
assert_eq!(before.len(), 6);
assert_eq!(after.len(), 4);
}
#[test]
#[should_panic(expected = "Offset 18 is inside token `Comment 15..24`")]
fn tokens_split_at_inside_token() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
tokens.split_at(TextSize::new(18));
}
#[test]
fn tokens_split_at_matches_before_and_after() {
let offset = TextSize::new(15);
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let (before, after) = tokens.split_at(offset);
assert_eq!(before, tokens.before(offset));
assert_eq!(after, tokens.after(offset));
}
#[test]
#[should_panic(expected = "Contents of after slice different when offset at dedent")]
fn tokens_split_at_matches_before_and_after_zero_length() {
let offset = TextSize::new(13);
let tokens = new_tokens(
[
(TokenKind::If, 0..2),
(TokenKind::Name, 3..4),
(TokenKind::Colon, 4..5),
(TokenKind::Newline, 5..6),
(TokenKind::Indent, 6..7),
(TokenKind::Pass, 7..11),
(TokenKind::Newline, 11..12),
(TokenKind::NonLogicalNewline, 12..13),
(TokenKind::Dedent, 13..13),
(TokenKind::Name, 13..14),
(TokenKind::Newline, 14..14),
]
.into_iter(),
);
let (before, after) = tokens.split_at(offset);
assert_eq!(before, tokens.before(offset));
assert!(
after == tokens.after(offset),
"Contents of after slice different when offset at dedent"
);
}
}

View File

@@ -120,8 +120,12 @@ pub(crate) fn check(
let directives = extract_directives(parsed.tokens(), Flags::all(), &locator, &indexer);
// Parse range suppression comments
let suppressions =
Suppressions::from_tokens(&settings.linter, locator.contents(), parsed.tokens());
let suppressions = Suppressions::from_tokens(
&settings.linter,
locator.contents(),
parsed.tokens(),
&indexer,
);
// Generate checks.
let diagnostics = check_path(

View File

@@ -241,8 +241,12 @@ impl Workspace {
&indexer,
);
let suppressions =
Suppressions::from_tokens(&self.settings.linter, locator.contents(), parsed.tokens());
let suppressions = Suppressions::from_tokens(
&self.settings.linter,
locator.contents(),
parsed.tokens(),
&indexer,
);
// Generate checks.
let diagnostics = check_path(