From c34908f5ad1878462806e178af19084c0692adf0 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Tue, 6 Feb 2024 06:44:56 -0800 Subject: [PATCH] Use `memchr` for tab-indentation detection (#9853) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary The benchmarks show a pretty consistent 1% speedup here for all-rules, though not enough to trigger our threshold of course: ![Screenshot 2024-02-05 at 11 55 59 PM](https://github.com/astral-sh/ruff/assets/1309177/317dca3f-f25f-46f5-8ea8-894a1747d006) --- crates/ruff_linter/src/checkers/tokens.rs | 2 +- .../pycodestyle/rules/tab_indentation.rs | 81 ++++++++++--------- 2 files changed, 44 insertions(+), 39 deletions(-) diff --git a/crates/ruff_linter/src/checkers/tokens.rs b/crates/ruff_linter/src/checkers/tokens.rs index c67c21ba4d..26558aa252 100644 --- a/crates/ruff_linter/src/checkers/tokens.rs +++ b/crates/ruff_linter/src/checkers/tokens.rs @@ -95,7 +95,7 @@ pub(crate) fn check_tokens( } if settings.rules.enabled(Rule::TabIndentation) { - pycodestyle::rules::tab_indentation(&mut diagnostics, tokens, locator, indexer); + pycodestyle::rules::tab_indentation(&mut diagnostics, locator, indexer); } if settings.rules.any_enabled(&[ diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/tab_indentation.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/tab_indentation.rs index 153caab648..d0bda797a1 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/tab_indentation.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/tab_indentation.rs @@ -1,11 +1,8 @@ use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_index::Indexer; -use ruff_python_parser::lexer::LexResult; -use ruff_python_parser::Tok; -use ruff_python_trivia::leading_indentation; use ruff_source_file::Locator; -use ruff_text_size::{TextLen, TextRange, TextSize}; +use ruff_text_size::{TextRange, TextSize}; /// ## What it does /// Checks for indentation that uses tabs. @@ -48,44 +45,52 @@ impl Violation for TabIndentation { /// W191 pub(crate) fn tab_indentation( diagnostics: &mut Vec, - tokens: &[LexResult], locator: &Locator, indexer: &Indexer, ) { - // Always check the first line for tab indentation as there's no newline - // token before it. - tab_indentation_at_line_start(diagnostics, locator, TextSize::default()); + let contents = locator.contents().as_bytes(); + let mut offset = 0; + while let Some(index) = memchr::memchr(b'\t', &contents[offset..]) { + // If we find a tab in the file, grab the entire line. + let range = locator.full_line_range(TextSize::try_from(offset + index).unwrap()); - for (tok, range) in tokens.iter().flatten() { - if matches!(tok, Tok::Newline | Tok::NonLogicalNewline) { - tab_indentation_at_line_start(diagnostics, locator, range.end()); + // Determine whether the tab is part of the line's indentation. + if let Some(indent) = tab_indentation_at_line_start(range.start(), locator, indexer) { + diagnostics.push(Diagnostic::new(TabIndentation, indent)); + } + + // Advance to the next line. + offset = range.end().to_usize(); + } +} + +/// If a line includes tabs in its indentation, returns the range of the +/// indent. +fn tab_indentation_at_line_start( + line_start: TextSize, + locator: &Locator, + indexer: &Indexer, +) -> Option { + let mut contains_tab = false; + for (i, char) in locator.after(line_start).as_bytes().iter().enumerate() { + match char { + // If we find a tab character, report it as a violation. + b'\t' => { + contains_tab = true; + } + // If we find a space, continue. + b' ' | b'\x0C' => {} + // If we find a non-whitespace character, stop. + _ => { + if contains_tab { + let range = TextRange::at(line_start, TextSize::try_from(i).unwrap()); + if !indexer.multiline_ranges().contains_range(range) { + return Some(range); + } + } + break; + } } } - - // The lexer doesn't emit `Newline` / `NonLogicalNewline` for a line - // continuation character (`\`), so we need to manually check for tab - // indentation for lines that follow a line continuation character. - for continuation_line in indexer.continuation_line_starts() { - tab_indentation_at_line_start( - diagnostics, - locator, - locator.full_line_end(*continuation_line), - ); - } -} - -/// Checks for indentation that uses tabs for a line starting at -/// the given [`TextSize`]. -fn tab_indentation_at_line_start( - diagnostics: &mut Vec, - locator: &Locator, - line_start: TextSize, -) { - let indent = leading_indentation(locator.after(line_start)); - if indent.find('\t').is_some() { - diagnostics.push(Diagnostic::new( - TabIndentation, - TextRange::at(line_start, indent.text_len()), - )); - } + None }