mirror of https://github.com/astral-sh/ruff
Use `memchr` for tab-indentation detection (#9853)
## Summary The benchmarks show a pretty consistent 1% speedup here for all-rules, though not enough to trigger our threshold of course: 
This commit is contained in:
parent
a662c2447c
commit
c34908f5ad
|
|
@ -95,7 +95,7 @@ pub(crate) fn check_tokens(
|
||||||
}
|
}
|
||||||
|
|
||||||
if settings.rules.enabled(Rule::TabIndentation) {
|
if settings.rules.enabled(Rule::TabIndentation) {
|
||||||
pycodestyle::rules::tab_indentation(&mut diagnostics, tokens, locator, indexer);
|
pycodestyle::rules::tab_indentation(&mut diagnostics, locator, indexer);
|
||||||
}
|
}
|
||||||
|
|
||||||
if settings.rules.any_enabled(&[
|
if settings.rules.any_enabled(&[
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,8 @@
|
||||||
use ruff_diagnostics::{Diagnostic, Violation};
|
use ruff_diagnostics::{Diagnostic, Violation};
|
||||||
use ruff_macros::{derive_message_formats, violation};
|
use ruff_macros::{derive_message_formats, violation};
|
||||||
use ruff_python_index::Indexer;
|
use ruff_python_index::Indexer;
|
||||||
use ruff_python_parser::lexer::LexResult;
|
|
||||||
use ruff_python_parser::Tok;
|
|
||||||
use ruff_python_trivia::leading_indentation;
|
|
||||||
use ruff_source_file::Locator;
|
use ruff_source_file::Locator;
|
||||||
use ruff_text_size::{TextLen, TextRange, TextSize};
|
use ruff_text_size::{TextRange, TextSize};
|
||||||
|
|
||||||
/// ## What it does
|
/// ## What it does
|
||||||
/// Checks for indentation that uses tabs.
|
/// Checks for indentation that uses tabs.
|
||||||
|
|
@ -48,44 +45,52 @@ impl Violation for TabIndentation {
|
||||||
/// W191
|
/// W191
|
||||||
pub(crate) fn tab_indentation(
|
pub(crate) fn tab_indentation(
|
||||||
diagnostics: &mut Vec<Diagnostic>,
|
diagnostics: &mut Vec<Diagnostic>,
|
||||||
tokens: &[LexResult],
|
|
||||||
locator: &Locator,
|
locator: &Locator,
|
||||||
indexer: &Indexer,
|
indexer: &Indexer,
|
||||||
) {
|
) {
|
||||||
// Always check the first line for tab indentation as there's no newline
|
let contents = locator.contents().as_bytes();
|
||||||
// token before it.
|
let mut offset = 0;
|
||||||
tab_indentation_at_line_start(diagnostics, locator, TextSize::default());
|
while let Some(index) = memchr::memchr(b'\t', &contents[offset..]) {
|
||||||
|
// If we find a tab in the file, grab the entire line.
|
||||||
|
let range = locator.full_line_range(TextSize::try_from(offset + index).unwrap());
|
||||||
|
|
||||||
for (tok, range) in tokens.iter().flatten() {
|
// Determine whether the tab is part of the line's indentation.
|
||||||
if matches!(tok, Tok::Newline | Tok::NonLogicalNewline) {
|
if let Some(indent) = tab_indentation_at_line_start(range.start(), locator, indexer) {
|
||||||
tab_indentation_at_line_start(diagnostics, locator, range.end());
|
diagnostics.push(Diagnostic::new(TabIndentation, indent));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Advance to the next line.
|
||||||
|
offset = range.end().to_usize();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// If a line includes tabs in its indentation, returns the range of the
|
||||||
|
/// indent.
|
||||||
|
fn tab_indentation_at_line_start(
|
||||||
|
line_start: TextSize,
|
||||||
|
locator: &Locator,
|
||||||
|
indexer: &Indexer,
|
||||||
|
) -> Option<TextRange> {
|
||||||
|
let mut contains_tab = false;
|
||||||
|
for (i, char) in locator.after(line_start).as_bytes().iter().enumerate() {
|
||||||
|
match char {
|
||||||
|
// If we find a tab character, report it as a violation.
|
||||||
|
b'\t' => {
|
||||||
|
contains_tab = true;
|
||||||
|
}
|
||||||
|
// If we find a space, continue.
|
||||||
|
b' ' | b'\x0C' => {}
|
||||||
|
// If we find a non-whitespace character, stop.
|
||||||
|
_ => {
|
||||||
|
if contains_tab {
|
||||||
|
let range = TextRange::at(line_start, TextSize::try_from(i).unwrap());
|
||||||
|
if !indexer.multiline_ranges().contains_range(range) {
|
||||||
|
return Some(range);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
None
|
||||||
// The lexer doesn't emit `Newline` / `NonLogicalNewline` for a line
|
|
||||||
// continuation character (`\`), so we need to manually check for tab
|
|
||||||
// indentation for lines that follow a line continuation character.
|
|
||||||
for continuation_line in indexer.continuation_line_starts() {
|
|
||||||
tab_indentation_at_line_start(
|
|
||||||
diagnostics,
|
|
||||||
locator,
|
|
||||||
locator.full_line_end(*continuation_line),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Checks for indentation that uses tabs for a line starting at
|
|
||||||
/// the given [`TextSize`].
|
|
||||||
fn tab_indentation_at_line_start(
|
|
||||||
diagnostics: &mut Vec<Diagnostic>,
|
|
||||||
locator: &Locator,
|
|
||||||
line_start: TextSize,
|
|
||||||
) {
|
|
||||||
let indent = leading_indentation(locator.after(line_start));
|
|
||||||
if indent.find('\t').is_some() {
|
|
||||||
diagnostics.push(Diagnostic::new(
|
|
||||||
TabIndentation,
|
|
||||||
TextRange::at(line_start, indent.text_len()),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue