Skip walking all tokens when loading range suppressions (#22446)

- Adds `Tokens::split_at()` to get tokens before/after an offset.
- Updates `Suppressions::load_from_tokens` to take an `Indexer` and use
comment ranges to minimize the need for walking tokens looking for
indent/dedent.

Adapted from
https://github.com/astral-sh/ruff/pull/21441#pullrequestreview-3503773083

Fixes #22087
This commit is contained in:
Amethyst Reese
2026-01-15 12:35:24 -08:00
committed by GitHub
parent b4b8299d6c
commit c696ef4025
7 changed files with 223 additions and 73 deletions

View File

@@ -185,6 +185,33 @@ impl Tokens {
after
}
/// Returns a pair of token slices from both before and after the given [`TextSize`] offset.
///
/// If the given offset is between two tokens, the "before" slice will end just before the
/// following token. In other words, if the offset is between the end of previous token and
/// start of next token, the "before" slice will end just before the next token. The "after"
/// slice will contain the rest of the tokens.
///
/// Note that the contents of the "after" slice may differ from the results of calling `after()`
/// directly, particularly when the given offset occurs on zero-width tokens like `Dedent`.
///
/// # Panics
///
/// If the given offset is inside a token range at any point
/// other than the start of the range.
pub fn split_at(&self, offset: TextSize) -> (&[Token], &[Token]) {
let partition_point = self.partition_point(|token| token.start() < offset);
let (before, after) = &self.raw.split_at(partition_point);
if let Some(last) = before.last() {
assert!(
offset >= last.end(),
"Offset {offset:?} is inside token `{last:?}`"
);
}
(before, after)
}
}
impl<'a> IntoIterator for &'a Tokens {
@@ -513,4 +540,72 @@ mod tests {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
tokens.in_range(TextRange::new(0.into(), 6.into()));
}
#[test]
fn tokens_split_at_first_token_start() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let (before, after) = tokens.split_at(TextSize::new(0));
assert_eq!(before.len(), 0);
assert_eq!(after.len(), 10);
}
#[test]
fn tokens_split_at_last_token_end() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let (before, after) = tokens.split_at(TextSize::new(33));
assert_eq!(before.len(), 10);
assert_eq!(after.len(), 0);
}
#[test]
fn tokens_split_at_inside_gap() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let (before, after) = tokens.split_at(TextSize::new(13));
assert_eq!(before.len(), 6);
assert_eq!(after.len(), 4);
}
#[test]
#[should_panic(expected = "Offset 18 is inside token `Comment 15..24`")]
fn tokens_split_at_inside_token() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
tokens.split_at(TextSize::new(18));
}
#[test]
fn tokens_split_at_matches_before_and_after() {
let offset = TextSize::new(15);
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let (before, after) = tokens.split_at(offset);
assert_eq!(before, tokens.before(offset));
assert_eq!(after, tokens.after(offset));
}
#[test]
#[should_panic(expected = "Contents of after slice different when offset at dedent")]
fn tokens_split_at_matches_before_and_after_zero_length() {
let offset = TextSize::new(13);
let tokens = new_tokens(
[
(TokenKind::If, 0..2),
(TokenKind::Name, 3..4),
(TokenKind::Colon, 4..5),
(TokenKind::Newline, 5..6),
(TokenKind::Indent, 6..7),
(TokenKind::Pass, 7..11),
(TokenKind::Newline, 11..12),
(TokenKind::NonLogicalNewline, 12..13),
(TokenKind::Dedent, 13..13),
(TokenKind::Name, 13..14),
(TokenKind::Newline, 14..14),
]
.into_iter(),
);
let (before, after) = tokens.split_at(offset);
assert_eq!(before, tokens.before(offset));
assert!(
after == tokens.after(offset),
"Contents of after slice different when offset at dedent"
);
}
}