perf(pycodestyle): Remove regex captures (#3735)

2023-03-28 09:50:34 +02:00 · 2023-03-28 09:50:34 +02:00 · 1d724b1495
parent 113a8b8fda
commit 1d724b1495
7 changed files with 154 additions and 55 deletions
--- a/crates/ruff/src/checkers/logical_lines.rs
+++ b/crates/ruff/src/checkers/logical_lines.rs
@ -43,6 +43,20 @@ pub fn check_logical_lines(
 ) -> Vec<Diagnostic> {
    let mut diagnostics = vec![];
    #[cfg(feature = "logical_lines")]
    let should_fix_missing_whitespace =
        autofix.into() && settings.rules.should_fix(Rule::MissingWhitespace);
    #[cfg(not(feature = "logical_lines"))]
    let should_fix_missing_whitespace = false;
    #[cfg(feature = "logical_lines")]
    let should_fix_whitespace_before_parameters =
        autofix.into() && settings.rules.should_fix(Rule::WhitespaceBeforeParameters);
    #[cfg(not(feature = "logical_lines"))]
    let should_fix_whitespace_before_parameters = false;
    let indent_char = stylist.indentation().as_char();
    let mut prev_line = None;
    let mut prev_indent_level = None;
@ -152,15 +166,12 @@ pub fn check_logical_lines(
                }
            }
-            #[cfg(feature = "logical_lines")]
+            for diagnostic in missing_whitespace(
-            let should_fix = autofix.into() && settings.rules.should_fix(Rule::MissingWhitespace);
+                line.text(),
-
+                start_loc.row(),
-            #[cfg(not(feature = "logical_lines"))]
+                should_fix_missing_whitespace,
-            let should_fix = false;
+                indent_level,
-
+            ) {
            for diagnostic in
                missing_whitespace(line.text(), start_loc.row(), should_fix, indent_level)
            {
                if settings.rules.enabled(diagnostic.kind.rule()) {
                    diagnostics.push(diagnostic);
                }
@ -168,14 +179,9 @@ pub fn check_logical_lines(
        }
        if line.flags().contains(TokenFlags::BRACKET) {
-            #[cfg(feature = "logical_lines")]
+            for diagnostic in
-            let should_fix =
+                whitespace_before_parameters(line.tokens(), should_fix_whitespace_before_parameters)
-                autofix.into() && settings.rules.should_fix(Rule::WhitespaceBeforeParameters);
+            {
            #[cfg(not(feature = "logical_lines"))]
            let should_fix = false;
            for diagnostic in whitespace_before_parameters(line.tokens(), should_fix) {
                if settings.rules.enabled(diagnostic.kind.rule()) {
                    diagnostics.push(diagnostic);
                }
--- a/crates/ruff/src/rules/pycodestyle/logical_lines.rs
+++ b/crates/ruff/src/rules/pycodestyle/logical_lines.rs
@ -45,7 +45,8 @@ impl<'a> LogicalLines<'a> {
        assert!(u32::try_from(tokens.len()).is_ok());
        let single_token = tokens.len() == 1;
-        let mut builder = LogicalLinesBuilder::with_token_capacity(tokens.len());
+        let mut builder =
            LogicalLinesBuilder::with_capacity(tokens.len(), locator.contents().len());
        let mut parens: u32 = 0;
        for (start, token, end) in tokens.iter().flatten() {
@ -280,10 +281,11 @@ pub struct LogicalLinesBuilder<'a> {
 }
 impl<'a> LogicalLinesBuilder<'a> {
-    fn with_token_capacity(capacity: usize) -> Self {
+    fn with_capacity(tokens: usize, string: usize) -> Self {
        Self {
-            tokens: Vec::with_capacity(capacity),
+            tokens: Vec::with_capacity(tokens),
-            mappings: Mappings::with_capacity(capacity + 1),
+            mappings: Mappings::with_capacity(tokens + 1),
            text: String::with_capacity(string),
            ..Self::default()
        }
    }
@ -340,6 +342,9 @@ impl<'a> LogicalLinesBuilder<'a> {
        // TODO(charlie): "Mute" strings.
        let text = if let Tok::String { value, .. } = token {
            // Replace the content of strings with a non-whs sequence because some lints
            // search for whitespace in the document and whitespace inside of the string
            // would complicate the search.
            Cow::Owned(format!("\"{}\"", "x".repeat(value.width())))
        } else {
            Cow::Borrowed(locator.slice(Range {
--- a/crates/ruff/src/rules/pycodestyle/rules/extraneous_whitespace.rs
+++ b/crates/ruff/src/rules/pycodestyle/rules/extraneous_whitespace.rs
@ -103,17 +103,16 @@ impl Violation for WhitespaceBeforePunctuation {
 // TODO(charlie): Pycodestyle has a negative lookahead on the end.
 static EXTRANEOUS_WHITESPACE_REGEX: Lazy<Regex> =
-    Lazy::new(|| Regex::new(r"([\[({][ \t]|[ \t][]}),;:])").unwrap());
+    Lazy::new(|| Regex::new(r"[\[({][ \t]|[ \t][]}),;:]").unwrap());
 /// E201, E202, E203
 #[cfg(feature = "logical_lines")]
 pub fn extraneous_whitespace(line: &str) -> Vec<(usize, DiagnosticKind)> {
    let mut diagnostics = vec![];
-    for line_match in EXTRANEOUS_WHITESPACE_REGEX.captures_iter(line) {
+    for line_match in EXTRANEOUS_WHITESPACE_REGEX.find_iter(line) {
-        let match_ = line_match.get(1).unwrap();
+        let text = &line[line_match.range()];
        let text = match_.as_str();
        let char = text.trim();
-        let found = match_.start();
+        let found = line_match.start();
        if text.chars().last().unwrap().is_ascii_whitespace() {
            diagnostics.push((found + 1, WhitespaceAfterOpenBracket.into()));
        } else if line.chars().nth(found - 1).map_or(false, |c| c != ',') {
--- a/crates/ruff/src/rules/pycodestyle/rules/mod.rs
+++ b/crates/ruff/src/rules/pycodestyle/rules/mod.rs
@ -86,3 +86,60 @@ mod whitespace_around_keywords;
 mod whitespace_around_named_parameter_equals;
 mod whitespace_before_comment;
 mod whitespace_before_parameters;
 #[allow(unused)]
 enum Whitespace {
    None,
    Single,
    Many,
    Tab,
 }
 impl Whitespace {
    #[allow(dead_code)]
    fn leading(content: &str) -> (usize, Self) {
        let mut offset = 0;
        let mut kind = Self::None;
        for c in content.chars() {
            if c == '\t' {
                kind = Self::Tab;
                offset += 1;
            } else if c.is_whitespace() {
                kind = match kind {
                    Whitespace::None => Whitespace::Single,
                    Whitespace::Single | Whitespace::Many => Whitespace::Many,
                    Whitespace::Tab => Whitespace::Tab,
                };
                offset += c.len_utf8();
            } else {
                break;
            }
        }
        (offset, kind)
    }
    #[allow(dead_code)]
    fn trailing(content: &str) -> (Self, usize) {
        let mut count = 0u32;
        let mut offset = 0;
        for c in content.chars().rev() {
            if c == '\t' {
                return (Self::Tab, offset + 1);
            } else if c.is_whitespace() {
                count += 1;
                offset += c.len_utf8();
            } else {
                break;
            }
        }
        match count {
            0 => (Self::None, 0),
            1 => (Self::Single, offset),
            _ => (Self::Many, offset),
        }
    }
 }
--- a/crates/ruff/src/rules/pycodestyle/rules/space_around_operator.rs
+++ b/crates/ruff/src/rules/pycodestyle/rules/space_around_operator.rs
@ -2,10 +2,15 @@
 use once_cell::sync::Lazy;
 use regex::Regex;
 use rustpython_parser::ast::Location;
 use rustpython_parser::Tok;
 use crate::rules::pycodestyle::helpers::is_op_token;
 use crate::rules::pycodestyle::rules::Whitespace;
 use ruff_diagnostics::DiagnosticKind;
 use ruff_diagnostics::Violation;
 use ruff_macros::{derive_message_formats, violation};
 use ruff_python_ast::source_code::Locator;
 /// ## What it does
 /// Checks for extraneous tabs before an operator.
@ -123,28 +128,41 @@ impl Violation for MultipleSpacesAfterOperator {
    }
 }
-static OPERATOR_REGEX: Lazy<Regex> =
+static OPERATOR_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"[-+*/|!<=>%&^]+|:=").unwrap());
    Lazy::new(|| Regex::new(r"[^,\s](\s*)(?:[-+*/|!<=>%&^]+|:=)(\s*)").unwrap());
 /// E221, E222, E223, E224
 #[cfg(feature = "logical_lines")]
 pub fn space_around_operator(line: &str) -> Vec<(usize, DiagnosticKind)> {
    let mut diagnostics = vec![];
-    for line_match in OPERATOR_REGEX.captures_iter(line) {
+    let mut last_end = None;
        let before = line_match.get(1).unwrap();
        let after = line_match.get(2).unwrap();
-        if before.as_str().contains('\t') {
+    for line_match in OPERATOR_REGEX.find_iter(line) {
-            diagnostics.push((before.start(), TabBeforeOperator.into()));
+        if last_end != Some(line_match.start()) {
-        } else if before.as_str().len() > 1 {
+            let before = &line[..line_match.start()];
-            diagnostics.push((before.start(), MultipleSpacesBeforeOperator.into()));
+
            match Whitespace::trailing(before) {
                (Whitespace::Tab, offset) => {
                    diagnostics.push((line_match.start() - offset, TabBeforeOperator.into()));
                }
                (Whitespace::Many, offset) => diagnostics.push((
                    line_match.start() - offset,
                    MultipleSpacesBeforeOperator.into(),
                )),
                _ => {}
            }
        }
-        if after.as_str().contains('\t') {
+        let after = &line[line_match.end()..];
-            diagnostics.push((after.start(), TabAfterOperator.into()));
+        let (leading_offset, leading_kind) = Whitespace::leading(after);
-        } else if after.as_str().len() > 1 {
+        match leading_kind {
-            diagnostics.push((after.start(), MultipleSpacesAfterOperator.into()));
+            Whitespace::Tab => diagnostics.push((line_match.end(), TabAfterOperator.into())),
            Whitespace::Many => {
                diagnostics.push((line_match.end(), MultipleSpacesAfterOperator.into()));
            }
            _ => {}
        }
        last_end = Some(line_match.end() + leading_offset);
    }
    diagnostics
 }
--- a/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs
+++ b/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs
@ -3,6 +3,7 @@
 use once_cell::sync::Lazy;
 use regex::Regex;
 use crate::rules::pycodestyle::rules::Whitespace;
 use ruff_diagnostics::DiagnosticKind;
 use ruff_diagnostics::Violation;
 use ruff_macros::{derive_message_formats, violation};
@ -111,28 +112,41 @@ impl Violation for TabBeforeKeyword {
 }
 static KEYWORD_REGEX: Lazy<Regex> = Lazy::new(|| {
-    Regex::new(r"(\s*)\b(?:False|None|True|and|as|assert|async|await|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|raise|return|try|while|with|yield)\b(\s*)").unwrap()
+    Regex::new(r"\b(False|None|True|and|as|assert|async|await|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|raise|return|try|while|with|yield)\b").unwrap()
 });
 /// E271, E272, E273, E274
 #[cfg(feature = "logical_lines")]
 pub fn whitespace_around_keywords(line: &str) -> Vec<(usize, DiagnosticKind)> {
    let mut diagnostics = vec![];
-    for line_match in KEYWORD_REGEX.captures_iter(line) {
+    let mut last_end = None;
        let before = line_match.get(1).unwrap();
        let after = line_match.get(2).unwrap();
-        if before.as_str().contains('\t') {
+    for line_match in KEYWORD_REGEX.find_iter(line) {
-            diagnostics.push((before.start(), TabBeforeKeyword.into()));
+        if last_end != Some(line_match.start()) {
-        } else if before.as_str().len() > 1 {
+            let before = &line[..line_match.start()];
-            diagnostics.push((before.start(), MultipleSpacesBeforeKeyword.into()));
+            match Whitespace::trailing(before) {
                (Whitespace::Tab, offset) => {
                    diagnostics.push((line_match.start() - offset, TabBeforeKeyword.into()));
                }
                (Whitespace::Many, offset) => diagnostics.push((
                    line_match.start() - offset,
                    MultipleSpacesBeforeKeyword.into(),
                )),
                _ => {}
            }
        }
-        if after.as_str().contains('\t') {
+        let after = &line[line_match.end()..];
-            diagnostics.push((after.start(), TabAfterKeyword.into()));
+        let (leading_offset, leading_kind) = Whitespace::leading(after);
-        } else if after.as_str().len() > 1 {
+        match leading_kind {
-            diagnostics.push((after.start(), MultipleSpacesAfterKeyword.into()));
+            Whitespace::Tab => diagnostics.push((line_match.end(), TabAfterKeyword.into())),
            Whitespace::Many => {
                diagnostics.push((line_match.end(), MultipleSpacesAfterKeyword.into()));
            }
            _ => {}
        }
        last_end = Some(line_match.end() + leading_offset);
    }
    diagnostics
 }
--- a/crates/ruff/src/rules/pycodestyle/snapshots/ruffrulespycodestyletestsE274_E27.py.snap
+++ b/crates/ruff/src/rules/pycodestyle/snapshots/ruffrulespycodestyletestsE274_E27.py.snap
@ -9,10 +9,10 @@ expression: diagnostics
    fixable: false
  location:
    row: 28
-    column: 1
+    column: 2
  end_location:
    row: 28
-    column: 1
+    column: 2
  fix:
    edits: []
  parent: ~
@ -23,10 +23,10 @@ expression: diagnostics
    fixable: false
  location:
    row: 30
-    column: 4
+    column: 5
  end_location:
    row: 30
-    column: 4
+    column: 5
  fix:
    edits: []
  parent: ~