diff --git a/crates/ruff_linter/src/linter.rs b/crates/ruff_linter/src/linter.rs index 3e2f070a00..ae9b6b215f 100644 --- a/crates/ruff_linter/src/linter.rs +++ b/crates/ruff_linter/src/linter.rs @@ -405,7 +405,8 @@ pub fn add_noqa_to_path( ); // Parse range suppression comments - let suppressions = Suppressions::from_tokens(settings, locator.contents(), parsed.tokens()); + let suppressions = + Suppressions::from_tokens(settings, locator.contents(), parsed.tokens(), &indexer); // Generate diagnostics, ignoring any existing `noqa` directives. let diagnostics = check_path( @@ -479,7 +480,8 @@ pub fn lint_only( ); // Parse range suppression comments - let suppressions = Suppressions::from_tokens(settings, locator.contents(), parsed.tokens()); + let suppressions = + Suppressions::from_tokens(settings, locator.contents(), parsed.tokens(), &indexer); // Generate diagnostics. let diagnostics = check_path( @@ -596,7 +598,8 @@ pub fn lint_fix<'a>( ); // Parse range suppression comments - let suppressions = Suppressions::from_tokens(settings, locator.contents(), parsed.tokens()); + let suppressions = + Suppressions::from_tokens(settings, locator.contents(), parsed.tokens(), &indexer); // Generate diagnostics. let diagnostics = check_path( @@ -978,7 +981,8 @@ mod tests { &locator, &indexer, ); - let suppressions = Suppressions::from_tokens(settings, locator.contents(), parsed.tokens()); + let suppressions = + Suppressions::from_tokens(settings, locator.contents(), parsed.tokens(), &indexer); let mut diagnostics = check_path( path, None, diff --git a/crates/ruff_linter/src/rules/pyflakes/mod.rs b/crates/ruff_linter/src/rules/pyflakes/mod.rs index 02cd5158a8..e1cc8ea935 100644 --- a/crates/ruff_linter/src/rules/pyflakes/mod.rs +++ b/crates/ruff_linter/src/rules/pyflakes/mod.rs @@ -957,7 +957,7 @@ mod tests { &indexer, ); let suppressions = - Suppressions::from_tokens(&settings, locator.contents(), parsed.tokens()); + Suppressions::from_tokens(&settings, locator.contents(), parsed.tokens(), &indexer); let mut messages = check_path( Path::new(""), None, diff --git a/crates/ruff_linter/src/suppression.rs b/crates/ruff_linter/src/suppression.rs index aa888c36d7..81c7da9138 100644 --- a/crates/ruff_linter/src/suppression.rs +++ b/crates/ruff_linter/src/suppression.rs @@ -3,13 +3,13 @@ use core::fmt; use ruff_db::diagnostic::Diagnostic; use ruff_diagnostics::{Edit, Fix}; use ruff_python_ast::token::{TokenKind, Tokens}; -use ruff_python_ast::whitespace::indentation; +use ruff_python_index::Indexer; use rustc_hash::FxHashSet; use std::cell::Cell; use std::{error::Error, fmt::Formatter}; use thiserror::Error; -use ruff_python_trivia::Cursor; +use ruff_python_trivia::{Cursor, indentation_at_offset}; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize, TextSlice}; use smallvec::{SmallVec, smallvec}; @@ -153,10 +153,15 @@ pub struct Suppressions { } impl Suppressions { - pub fn from_tokens(settings: &LinterSettings, source: &str, tokens: &Tokens) -> Suppressions { + pub fn from_tokens( + settings: &LinterSettings, + source: &str, + tokens: &Tokens, + indexer: &Indexer, + ) -> Suppressions { if is_range_suppressions_enabled(settings) { let builder = SuppressionsBuilder::new(source); - builder.load_from_tokens(tokens) + builder.load_from_tokens(tokens, indexer) } else { Suppressions::default() } @@ -355,7 +360,6 @@ pub(crate) struct SuppressionsBuilder<'a> { valid: Vec, invalid: Vec, - errors: Vec, pending: Vec>, } @@ -368,74 +372,109 @@ impl<'a> SuppressionsBuilder<'a> { } } - pub(crate) fn load_from_tokens(mut self, tokens: &Tokens) -> Suppressions { - let default_indent = ""; + pub(crate) fn load_from_tokens(mut self, tokens: &Tokens, indexer: &Indexer) -> Suppressions { let mut indents: Vec<&str> = vec![]; + let mut errors = Vec::new(); - // Iterate through tokens, tracking indentation, filtering trailing comments, and then - // looking for matching comments from the previous block when reaching a dedent token. - for (token_index, token) in tokens.iter().enumerate() { - match token.kind() { - TokenKind::Indent => { - indents.push(self.source.slice(token)); - } - TokenKind::Dedent => { - self.match_comments(indents.last().copied().unwrap_or_default(), token.range()); - indents.pop(); - } - TokenKind::Comment => { - let mut parser = SuppressionParser::new(self.source, token.range()); - match parser.parse_comment() { - Ok(comment) => { - let indent = indentation(self.source, &comment.range); - - let Some(indent) = indent else { - // trailing suppressions are not supported - self.invalid.push(InvalidSuppression { - kind: InvalidSuppressionKind::Trailing, - comment, - }); - continue; - }; - - // comment matches current block's indentation, or precedes an indent/dedent token - if indent == indents.last().copied().unwrap_or_default() - || tokens[token_index..] - .iter() - .find(|t| !t.kind().is_trivia()) - .is_some_and(|t| { - matches!(t.kind(), TokenKind::Dedent | TokenKind::Indent) - }) - { - self.pending - .push(PendingSuppressionComment { indent, comment }); - } else { - // weirdly indented? ¯\_(ツ)_/¯ - self.invalid.push(InvalidSuppression { - kind: InvalidSuppressionKind::Indentation, - comment, - }); - } - } - Err(ParseError { - kind: ParseErrorKind::NotASuppression, - .. - }) => {} - Err(error) => { - self.errors.push(error); - } + let mut suppressions = indexer + .comment_ranges() + .iter() + .copied() + .filter_map(|comment_range| { + let mut parser = SuppressionParser::new(self.source, comment_range); + match parser.parse_comment() { + Ok(comment) => Some(comment), + Err(ParseError { + kind: ParseErrorKind::NotASuppression, + .. + }) => None, + Err(error) => { + errors.push(error); + None } } - _ => {} + }) + .peekable(); + + 'comments: while let Some(suppression) = suppressions.peek() { + indents.clear(); + + let (before, after) = tokens.split_at(suppression.range.start()); + let last_indent = before + .iter() + .rfind(|token| token.kind() == TokenKind::Indent) + .map(|token| self.source.slice(token)) + .unwrap_or_default(); + + indents.push(last_indent); + + // Iterate through tokens, tracking indentation, filtering trailing comments, and then + // looking for matching comments from the previous block when reaching a dedent token. + for (token_index, token) in after.iter().enumerate() { + let current_indent = indents.last().copied().unwrap_or_default(); + match token.kind() { + TokenKind::Indent => { + indents.push(self.source.slice(token)); + } + TokenKind::Dedent => { + self.match_comments(current_indent, token.range()); + + indents.pop(); + + if indents.is_empty() || self.pending.is_empty() { + continue 'comments; + } + } + TokenKind::Comment => { + let Some(suppression) = + suppressions.next_if(|suppression| suppression.range == token.range()) + else { + continue; + }; + + let Some(indent) = + indentation_at_offset(suppression.range.start(), self.source) + else { + // trailing suppressions are not supported + self.invalid.push(InvalidSuppression { + kind: InvalidSuppressionKind::Trailing, + comment: suppression, + }); + continue; + }; + + // comment matches current block's indentation, or precedes an indent/dedent token + if indent == current_indent + || after[token_index..] + .iter() + .find(|t| !t.kind().is_trivia()) + .is_some_and(|t| { + matches!(t.kind(), TokenKind::Dedent | TokenKind::Indent) + }) + { + self.pending.push(PendingSuppressionComment { + indent, + comment: suppression, + }); + } else { + // weirdly indented? ¯\_(ツ)_/¯ + self.invalid.push(InvalidSuppression { + kind: InvalidSuppressionKind::Indentation, + comment: suppression, + }); + } + } + _ => {} + } } } - self.match_comments(default_indent, TextRange::up_to(self.source.text_len())); + self.match_comments("", TextRange::up_to(self.source.text_len())); Suppressions { valid: self.valid, invalid: self.invalid, - errors: self.errors, + errors, } } @@ -691,6 +730,7 @@ mod tests { use insta::assert_debug_snapshot; use itertools::Itertools; + use ruff_python_index::Indexer; use ruff_python_parser::{Mode, ParseOptions, parse}; use ruff_text_size::{TextLen, TextRange, TextSize}; use similar::DiffableStr; @@ -1585,10 +1625,12 @@ def bar(): /// Parse all suppressions and errors in a module for testing fn debug(source: &'_ str) -> DebugSuppressions<'_> { let parsed = parse(source, ParseOptions::from(Mode::Module)).unwrap(); + let indexer = Indexer::from_tokens(parsed.tokens(), source); let suppressions = Suppressions::from_tokens( &LinterSettings::default().with_preview_mode(), source, parsed.tokens(), + &indexer, ); DebugSuppressions { source, diff --git a/crates/ruff_linter/src/test.rs b/crates/ruff_linter/src/test.rs index 344c921890..fc3d79b4f6 100644 --- a/crates/ruff_linter/src/test.rs +++ b/crates/ruff_linter/src/test.rs @@ -235,7 +235,8 @@ pub(crate) fn test_contents<'a>( &locator, &indexer, ); - let suppressions = Suppressions::from_tokens(settings, locator.contents(), parsed.tokens()); + let suppressions = + Suppressions::from_tokens(settings, locator.contents(), parsed.tokens(), &indexer); let messages = check_path( path, path.parent() @@ -303,7 +304,7 @@ pub(crate) fn test_contents<'a>( ); let suppressions = - Suppressions::from_tokens(settings, locator.contents(), parsed.tokens()); + Suppressions::from_tokens(settings, locator.contents(), parsed.tokens(), &indexer); let fixed_messages = check_path( path, None, diff --git a/crates/ruff_python_ast/src/token/tokens.rs b/crates/ruff_python_ast/src/token/tokens.rs index 7ca096d9b0..f26259b912 100644 --- a/crates/ruff_python_ast/src/token/tokens.rs +++ b/crates/ruff_python_ast/src/token/tokens.rs @@ -185,6 +185,33 @@ impl Tokens { after } + + /// Returns a pair of token slices from both before and after the given [`TextSize`] offset. + /// + /// If the given offset is between two tokens, the "before" slice will end just before the + /// following token. In other words, if the offset is between the end of previous token and + /// start of next token, the "before" slice will end just before the next token. The "after" + /// slice will contain the rest of the tokens. + /// + /// Note that the contents of the "after" slice may differ from the results of calling `after()` + /// directly, particularly when the given offset occurs on zero-width tokens like `Dedent`. + /// + /// # Panics + /// + /// If the given offset is inside a token range at any point + /// other than the start of the range. + pub fn split_at(&self, offset: TextSize) -> (&[Token], &[Token]) { + let partition_point = self.partition_point(|token| token.start() < offset); + let (before, after) = &self.raw.split_at(partition_point); + + if let Some(last) = before.last() { + assert!( + offset >= last.end(), + "Offset {offset:?} is inside token `{last:?}`" + ); + } + (before, after) + } } impl<'a> IntoIterator for &'a Tokens { @@ -513,4 +540,72 @@ mod tests { let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); tokens.in_range(TextRange::new(0.into(), 6.into())); } + + #[test] + fn tokens_split_at_first_token_start() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let (before, after) = tokens.split_at(TextSize::new(0)); + assert_eq!(before.len(), 0); + assert_eq!(after.len(), 10); + } + + #[test] + fn tokens_split_at_last_token_end() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let (before, after) = tokens.split_at(TextSize::new(33)); + assert_eq!(before.len(), 10); + assert_eq!(after.len(), 0); + } + + #[test] + fn tokens_split_at_inside_gap() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let (before, after) = tokens.split_at(TextSize::new(13)); + assert_eq!(before.len(), 6); + assert_eq!(after.len(), 4); + } + + #[test] + #[should_panic(expected = "Offset 18 is inside token `Comment 15..24`")] + fn tokens_split_at_inside_token() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + tokens.split_at(TextSize::new(18)); + } + + #[test] + fn tokens_split_at_matches_before_and_after() { + let offset = TextSize::new(15); + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let (before, after) = tokens.split_at(offset); + assert_eq!(before, tokens.before(offset)); + assert_eq!(after, tokens.after(offset)); + } + + #[test] + #[should_panic(expected = "Contents of after slice different when offset at dedent")] + fn tokens_split_at_matches_before_and_after_zero_length() { + let offset = TextSize::new(13); + let tokens = new_tokens( + [ + (TokenKind::If, 0..2), + (TokenKind::Name, 3..4), + (TokenKind::Colon, 4..5), + (TokenKind::Newline, 5..6), + (TokenKind::Indent, 6..7), + (TokenKind::Pass, 7..11), + (TokenKind::Newline, 11..12), + (TokenKind::NonLogicalNewline, 12..13), + (TokenKind::Dedent, 13..13), + (TokenKind::Name, 13..14), + (TokenKind::Newline, 14..14), + ] + .into_iter(), + ); + let (before, after) = tokens.split_at(offset); + assert_eq!(before, tokens.before(offset)); + assert!( + after == tokens.after(offset), + "Contents of after slice different when offset at dedent" + ); + } } diff --git a/crates/ruff_server/src/lint.rs b/crates/ruff_server/src/lint.rs index f02cd04e8c..2856f81db7 100644 --- a/crates/ruff_server/src/lint.rs +++ b/crates/ruff_server/src/lint.rs @@ -120,8 +120,12 @@ pub(crate) fn check( let directives = extract_directives(parsed.tokens(), Flags::all(), &locator, &indexer); // Parse range suppression comments - let suppressions = - Suppressions::from_tokens(&settings.linter, locator.contents(), parsed.tokens()); + let suppressions = Suppressions::from_tokens( + &settings.linter, + locator.contents(), + parsed.tokens(), + &indexer, + ); // Generate checks. let diagnostics = check_path( diff --git a/crates/ruff_wasm/src/lib.rs b/crates/ruff_wasm/src/lib.rs index 9a62704977..785c2095cb 100644 --- a/crates/ruff_wasm/src/lib.rs +++ b/crates/ruff_wasm/src/lib.rs @@ -241,8 +241,12 @@ impl Workspace { &indexer, ); - let suppressions = - Suppressions::from_tokens(&self.settings.linter, locator.contents(), parsed.tokens()); + let suppressions = Suppressions::from_tokens( + &self.settings.linter, + locator.contents(), + parsed.tokens(), + &indexer, + ); // Generate checks. let diagnostics = check_path(