perf(pycodestyle): Remove regex captures (#3735)

This commit is contained in:
Micha Reiser 2023-03-28 09:50:34 +02:00 committed by GitHub
parent 113a8b8fda
commit 1d724b1495
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 154 additions and 55 deletions

View File

@ -43,6 +43,20 @@ pub fn check_logical_lines(
) -> Vec<Diagnostic> { ) -> Vec<Diagnostic> {
let mut diagnostics = vec![]; let mut diagnostics = vec![];
#[cfg(feature = "logical_lines")]
let should_fix_missing_whitespace =
autofix.into() && settings.rules.should_fix(Rule::MissingWhitespace);
#[cfg(not(feature = "logical_lines"))]
let should_fix_missing_whitespace = false;
#[cfg(feature = "logical_lines")]
let should_fix_whitespace_before_parameters =
autofix.into() && settings.rules.should_fix(Rule::WhitespaceBeforeParameters);
#[cfg(not(feature = "logical_lines"))]
let should_fix_whitespace_before_parameters = false;
let indent_char = stylist.indentation().as_char(); let indent_char = stylist.indentation().as_char();
let mut prev_line = None; let mut prev_line = None;
let mut prev_indent_level = None; let mut prev_indent_level = None;
@ -152,15 +166,12 @@ pub fn check_logical_lines(
} }
} }
#[cfg(feature = "logical_lines")] for diagnostic in missing_whitespace(
let should_fix = autofix.into() && settings.rules.should_fix(Rule::MissingWhitespace); line.text(),
start_loc.row(),
#[cfg(not(feature = "logical_lines"))] should_fix_missing_whitespace,
let should_fix = false; indent_level,
) {
for diagnostic in
missing_whitespace(line.text(), start_loc.row(), should_fix, indent_level)
{
if settings.rules.enabled(diagnostic.kind.rule()) { if settings.rules.enabled(diagnostic.kind.rule()) {
diagnostics.push(diagnostic); diagnostics.push(diagnostic);
} }
@ -168,14 +179,9 @@ pub fn check_logical_lines(
} }
if line.flags().contains(TokenFlags::BRACKET) { if line.flags().contains(TokenFlags::BRACKET) {
#[cfg(feature = "logical_lines")] for diagnostic in
let should_fix = whitespace_before_parameters(line.tokens(), should_fix_whitespace_before_parameters)
autofix.into() && settings.rules.should_fix(Rule::WhitespaceBeforeParameters); {
#[cfg(not(feature = "logical_lines"))]
let should_fix = false;
for diagnostic in whitespace_before_parameters(line.tokens(), should_fix) {
if settings.rules.enabled(diagnostic.kind.rule()) { if settings.rules.enabled(diagnostic.kind.rule()) {
diagnostics.push(diagnostic); diagnostics.push(diagnostic);
} }

View File

@ -45,7 +45,8 @@ impl<'a> LogicalLines<'a> {
assert!(u32::try_from(tokens.len()).is_ok()); assert!(u32::try_from(tokens.len()).is_ok());
let single_token = tokens.len() == 1; let single_token = tokens.len() == 1;
let mut builder = LogicalLinesBuilder::with_token_capacity(tokens.len()); let mut builder =
LogicalLinesBuilder::with_capacity(tokens.len(), locator.contents().len());
let mut parens: u32 = 0; let mut parens: u32 = 0;
for (start, token, end) in tokens.iter().flatten() { for (start, token, end) in tokens.iter().flatten() {
@ -280,10 +281,11 @@ pub struct LogicalLinesBuilder<'a> {
} }
impl<'a> LogicalLinesBuilder<'a> { impl<'a> LogicalLinesBuilder<'a> {
fn with_token_capacity(capacity: usize) -> Self { fn with_capacity(tokens: usize, string: usize) -> Self {
Self { Self {
tokens: Vec::with_capacity(capacity), tokens: Vec::with_capacity(tokens),
mappings: Mappings::with_capacity(capacity + 1), mappings: Mappings::with_capacity(tokens + 1),
text: String::with_capacity(string),
..Self::default() ..Self::default()
} }
} }
@ -340,6 +342,9 @@ impl<'a> LogicalLinesBuilder<'a> {
// TODO(charlie): "Mute" strings. // TODO(charlie): "Mute" strings.
let text = if let Tok::String { value, .. } = token { let text = if let Tok::String { value, .. } = token {
// Replace the content of strings with a non-whs sequence because some lints
// search for whitespace in the document and whitespace inside of the string
// would complicate the search.
Cow::Owned(format!("\"{}\"", "x".repeat(value.width()))) Cow::Owned(format!("\"{}\"", "x".repeat(value.width())))
} else { } else {
Cow::Borrowed(locator.slice(Range { Cow::Borrowed(locator.slice(Range {

View File

@ -103,17 +103,16 @@ impl Violation for WhitespaceBeforePunctuation {
// TODO(charlie): Pycodestyle has a negative lookahead on the end. // TODO(charlie): Pycodestyle has a negative lookahead on the end.
static EXTRANEOUS_WHITESPACE_REGEX: Lazy<Regex> = static EXTRANEOUS_WHITESPACE_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"([\[({][ \t]|[ \t][]}),;:])").unwrap()); Lazy::new(|| Regex::new(r"[\[({][ \t]|[ \t][]}),;:]").unwrap());
/// E201, E202, E203 /// E201, E202, E203
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
pub fn extraneous_whitespace(line: &str) -> Vec<(usize, DiagnosticKind)> { pub fn extraneous_whitespace(line: &str) -> Vec<(usize, DiagnosticKind)> {
let mut diagnostics = vec![]; let mut diagnostics = vec![];
for line_match in EXTRANEOUS_WHITESPACE_REGEX.captures_iter(line) { for line_match in EXTRANEOUS_WHITESPACE_REGEX.find_iter(line) {
let match_ = line_match.get(1).unwrap(); let text = &line[line_match.range()];
let text = match_.as_str();
let char = text.trim(); let char = text.trim();
let found = match_.start(); let found = line_match.start();
if text.chars().last().unwrap().is_ascii_whitespace() { if text.chars().last().unwrap().is_ascii_whitespace() {
diagnostics.push((found + 1, WhitespaceAfterOpenBracket.into())); diagnostics.push((found + 1, WhitespaceAfterOpenBracket.into()));
} else if line.chars().nth(found - 1).map_or(false, |c| c != ',') { } else if line.chars().nth(found - 1).map_or(false, |c| c != ',') {

View File

@ -86,3 +86,60 @@ mod whitespace_around_keywords;
mod whitespace_around_named_parameter_equals; mod whitespace_around_named_parameter_equals;
mod whitespace_before_comment; mod whitespace_before_comment;
mod whitespace_before_parameters; mod whitespace_before_parameters;
#[allow(unused)]
enum Whitespace {
None,
Single,
Many,
Tab,
}
impl Whitespace {
#[allow(dead_code)]
fn leading(content: &str) -> (usize, Self) {
let mut offset = 0;
let mut kind = Self::None;
for c in content.chars() {
if c == '\t' {
kind = Self::Tab;
offset += 1;
} else if c.is_whitespace() {
kind = match kind {
Whitespace::None => Whitespace::Single,
Whitespace::Single | Whitespace::Many => Whitespace::Many,
Whitespace::Tab => Whitespace::Tab,
};
offset += c.len_utf8();
} else {
break;
}
}
(offset, kind)
}
#[allow(dead_code)]
fn trailing(content: &str) -> (Self, usize) {
let mut count = 0u32;
let mut offset = 0;
for c in content.chars().rev() {
if c == '\t' {
return (Self::Tab, offset + 1);
} else if c.is_whitespace() {
count += 1;
offset += c.len_utf8();
} else {
break;
}
}
match count {
0 => (Self::None, 0),
1 => (Self::Single, offset),
_ => (Self::Many, offset),
}
}
}

View File

@ -2,10 +2,15 @@
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use regex::Regex; use regex::Regex;
use rustpython_parser::ast::Location;
use rustpython_parser::Tok;
use crate::rules::pycodestyle::helpers::is_op_token;
use crate::rules::pycodestyle::rules::Whitespace;
use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation; use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::source_code::Locator;
/// ## What it does /// ## What it does
/// Checks for extraneous tabs before an operator. /// Checks for extraneous tabs before an operator.
@ -123,28 +128,41 @@ impl Violation for MultipleSpacesAfterOperator {
} }
} }
static OPERATOR_REGEX: Lazy<Regex> = static OPERATOR_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"[-+*/|!<=>%&^]+|:=").unwrap());
Lazy::new(|| Regex::new(r"[^,\s](\s*)(?:[-+*/|!<=>%&^]+|:=)(\s*)").unwrap());
/// E221, E222, E223, E224 /// E221, E222, E223, E224
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
pub fn space_around_operator(line: &str) -> Vec<(usize, DiagnosticKind)> { pub fn space_around_operator(line: &str) -> Vec<(usize, DiagnosticKind)> {
let mut diagnostics = vec![]; let mut diagnostics = vec![];
for line_match in OPERATOR_REGEX.captures_iter(line) { let mut last_end = None;
let before = line_match.get(1).unwrap();
let after = line_match.get(2).unwrap();
if before.as_str().contains('\t') { for line_match in OPERATOR_REGEX.find_iter(line) {
diagnostics.push((before.start(), TabBeforeOperator.into())); if last_end != Some(line_match.start()) {
} else if before.as_str().len() > 1 { let before = &line[..line_match.start()];
diagnostics.push((before.start(), MultipleSpacesBeforeOperator.into()));
match Whitespace::trailing(before) {
(Whitespace::Tab, offset) => {
diagnostics.push((line_match.start() - offset, TabBeforeOperator.into()));
}
(Whitespace::Many, offset) => diagnostics.push((
line_match.start() - offset,
MultipleSpacesBeforeOperator.into(),
)),
_ => {}
}
} }
if after.as_str().contains('\t') { let after = &line[line_match.end()..];
diagnostics.push((after.start(), TabAfterOperator.into())); let (leading_offset, leading_kind) = Whitespace::leading(after);
} else if after.as_str().len() > 1 { match leading_kind {
diagnostics.push((after.start(), MultipleSpacesAfterOperator.into())); Whitespace::Tab => diagnostics.push((line_match.end(), TabAfterOperator.into())),
Whitespace::Many => {
diagnostics.push((line_match.end(), MultipleSpacesAfterOperator.into()));
}
_ => {}
} }
last_end = Some(line_match.end() + leading_offset);
} }
diagnostics diagnostics
} }

View File

@ -3,6 +3,7 @@
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use regex::Regex; use regex::Regex;
use crate::rules::pycodestyle::rules::Whitespace;
use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation; use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
@ -111,28 +112,41 @@ impl Violation for TabBeforeKeyword {
} }
static KEYWORD_REGEX: Lazy<Regex> = Lazy::new(|| { static KEYWORD_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(\s*)\b(?:False|None|True|and|as|assert|async|await|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|raise|return|try|while|with|yield)\b(\s*)").unwrap() Regex::new(r"\b(False|None|True|and|as|assert|async|await|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|raise|return|try|while|with|yield)\b").unwrap()
}); });
/// E271, E272, E273, E274 /// E271, E272, E273, E274
#[cfg(feature = "logical_lines")] #[cfg(feature = "logical_lines")]
pub fn whitespace_around_keywords(line: &str) -> Vec<(usize, DiagnosticKind)> { pub fn whitespace_around_keywords(line: &str) -> Vec<(usize, DiagnosticKind)> {
let mut diagnostics = vec![]; let mut diagnostics = vec![];
for line_match in KEYWORD_REGEX.captures_iter(line) { let mut last_end = None;
let before = line_match.get(1).unwrap();
let after = line_match.get(2).unwrap();
if before.as_str().contains('\t') { for line_match in KEYWORD_REGEX.find_iter(line) {
diagnostics.push((before.start(), TabBeforeKeyword.into())); if last_end != Some(line_match.start()) {
} else if before.as_str().len() > 1 { let before = &line[..line_match.start()];
diagnostics.push((before.start(), MultipleSpacesBeforeKeyword.into())); match Whitespace::trailing(before) {
(Whitespace::Tab, offset) => {
diagnostics.push((line_match.start() - offset, TabBeforeKeyword.into()));
}
(Whitespace::Many, offset) => diagnostics.push((
line_match.start() - offset,
MultipleSpacesBeforeKeyword.into(),
)),
_ => {}
}
} }
if after.as_str().contains('\t') { let after = &line[line_match.end()..];
diagnostics.push((after.start(), TabAfterKeyword.into())); let (leading_offset, leading_kind) = Whitespace::leading(after);
} else if after.as_str().len() > 1 { match leading_kind {
diagnostics.push((after.start(), MultipleSpacesAfterKeyword.into())); Whitespace::Tab => diagnostics.push((line_match.end(), TabAfterKeyword.into())),
Whitespace::Many => {
diagnostics.push((line_match.end(), MultipleSpacesAfterKeyword.into()));
}
_ => {}
} }
last_end = Some(line_match.end() + leading_offset);
} }
diagnostics diagnostics
} }

View File

@ -9,10 +9,10 @@ expression: diagnostics
fixable: false fixable: false
location: location:
row: 28 row: 28
column: 1 column: 2
end_location: end_location:
row: 28 row: 28
column: 1 column: 2
fix: fix:
edits: [] edits: []
parent: ~ parent: ~
@ -23,10 +23,10 @@ expression: diagnostics
fixable: false fixable: false
location: location:
row: 30 row: 30
column: 4 column: 5
end_location: end_location:
row: 30 row: 30
column: 4 column: 5
fix: fix:
edits: [] edits: []
parent: ~ parent: ~