mirror of https://github.com/astral-sh/ruff
perf(pycodestyle): Remove regex captures (#3735)
This commit is contained in:
parent
113a8b8fda
commit
1d724b1495
|
|
@ -43,6 +43,20 @@ pub fn check_logical_lines(
|
||||||
) -> Vec<Diagnostic> {
|
) -> Vec<Diagnostic> {
|
||||||
let mut diagnostics = vec![];
|
let mut diagnostics = vec![];
|
||||||
|
|
||||||
|
#[cfg(feature = "logical_lines")]
|
||||||
|
let should_fix_missing_whitespace =
|
||||||
|
autofix.into() && settings.rules.should_fix(Rule::MissingWhitespace);
|
||||||
|
|
||||||
|
#[cfg(not(feature = "logical_lines"))]
|
||||||
|
let should_fix_missing_whitespace = false;
|
||||||
|
|
||||||
|
#[cfg(feature = "logical_lines")]
|
||||||
|
let should_fix_whitespace_before_parameters =
|
||||||
|
autofix.into() && settings.rules.should_fix(Rule::WhitespaceBeforeParameters);
|
||||||
|
|
||||||
|
#[cfg(not(feature = "logical_lines"))]
|
||||||
|
let should_fix_whitespace_before_parameters = false;
|
||||||
|
|
||||||
let indent_char = stylist.indentation().as_char();
|
let indent_char = stylist.indentation().as_char();
|
||||||
let mut prev_line = None;
|
let mut prev_line = None;
|
||||||
let mut prev_indent_level = None;
|
let mut prev_indent_level = None;
|
||||||
|
|
@ -152,15 +166,12 @@ pub fn check_logical_lines(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "logical_lines")]
|
for diagnostic in missing_whitespace(
|
||||||
let should_fix = autofix.into() && settings.rules.should_fix(Rule::MissingWhitespace);
|
line.text(),
|
||||||
|
start_loc.row(),
|
||||||
#[cfg(not(feature = "logical_lines"))]
|
should_fix_missing_whitespace,
|
||||||
let should_fix = false;
|
indent_level,
|
||||||
|
) {
|
||||||
for diagnostic in
|
|
||||||
missing_whitespace(line.text(), start_loc.row(), should_fix, indent_level)
|
|
||||||
{
|
|
||||||
if settings.rules.enabled(diagnostic.kind.rule()) {
|
if settings.rules.enabled(diagnostic.kind.rule()) {
|
||||||
diagnostics.push(diagnostic);
|
diagnostics.push(diagnostic);
|
||||||
}
|
}
|
||||||
|
|
@ -168,14 +179,9 @@ pub fn check_logical_lines(
|
||||||
}
|
}
|
||||||
|
|
||||||
if line.flags().contains(TokenFlags::BRACKET) {
|
if line.flags().contains(TokenFlags::BRACKET) {
|
||||||
#[cfg(feature = "logical_lines")]
|
for diagnostic in
|
||||||
let should_fix =
|
whitespace_before_parameters(line.tokens(), should_fix_whitespace_before_parameters)
|
||||||
autofix.into() && settings.rules.should_fix(Rule::WhitespaceBeforeParameters);
|
{
|
||||||
|
|
||||||
#[cfg(not(feature = "logical_lines"))]
|
|
||||||
let should_fix = false;
|
|
||||||
|
|
||||||
for diagnostic in whitespace_before_parameters(line.tokens(), should_fix) {
|
|
||||||
if settings.rules.enabled(diagnostic.kind.rule()) {
|
if settings.rules.enabled(diagnostic.kind.rule()) {
|
||||||
diagnostics.push(diagnostic);
|
diagnostics.push(diagnostic);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -45,7 +45,8 @@ impl<'a> LogicalLines<'a> {
|
||||||
assert!(u32::try_from(tokens.len()).is_ok());
|
assert!(u32::try_from(tokens.len()).is_ok());
|
||||||
|
|
||||||
let single_token = tokens.len() == 1;
|
let single_token = tokens.len() == 1;
|
||||||
let mut builder = LogicalLinesBuilder::with_token_capacity(tokens.len());
|
let mut builder =
|
||||||
|
LogicalLinesBuilder::with_capacity(tokens.len(), locator.contents().len());
|
||||||
let mut parens: u32 = 0;
|
let mut parens: u32 = 0;
|
||||||
|
|
||||||
for (start, token, end) in tokens.iter().flatten() {
|
for (start, token, end) in tokens.iter().flatten() {
|
||||||
|
|
@ -280,10 +281,11 @@ pub struct LogicalLinesBuilder<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> LogicalLinesBuilder<'a> {
|
impl<'a> LogicalLinesBuilder<'a> {
|
||||||
fn with_token_capacity(capacity: usize) -> Self {
|
fn with_capacity(tokens: usize, string: usize) -> Self {
|
||||||
Self {
|
Self {
|
||||||
tokens: Vec::with_capacity(capacity),
|
tokens: Vec::with_capacity(tokens),
|
||||||
mappings: Mappings::with_capacity(capacity + 1),
|
mappings: Mappings::with_capacity(tokens + 1),
|
||||||
|
text: String::with_capacity(string),
|
||||||
..Self::default()
|
..Self::default()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -340,6 +342,9 @@ impl<'a> LogicalLinesBuilder<'a> {
|
||||||
|
|
||||||
// TODO(charlie): "Mute" strings.
|
// TODO(charlie): "Mute" strings.
|
||||||
let text = if let Tok::String { value, .. } = token {
|
let text = if let Tok::String { value, .. } = token {
|
||||||
|
// Replace the content of strings with a non-whs sequence because some lints
|
||||||
|
// search for whitespace in the document and whitespace inside of the string
|
||||||
|
// would complicate the search.
|
||||||
Cow::Owned(format!("\"{}\"", "x".repeat(value.width())))
|
Cow::Owned(format!("\"{}\"", "x".repeat(value.width())))
|
||||||
} else {
|
} else {
|
||||||
Cow::Borrowed(locator.slice(Range {
|
Cow::Borrowed(locator.slice(Range {
|
||||||
|
|
|
||||||
|
|
@ -103,17 +103,16 @@ impl Violation for WhitespaceBeforePunctuation {
|
||||||
|
|
||||||
// TODO(charlie): Pycodestyle has a negative lookahead on the end.
|
// TODO(charlie): Pycodestyle has a negative lookahead on the end.
|
||||||
static EXTRANEOUS_WHITESPACE_REGEX: Lazy<Regex> =
|
static EXTRANEOUS_WHITESPACE_REGEX: Lazy<Regex> =
|
||||||
Lazy::new(|| Regex::new(r"([\[({][ \t]|[ \t][]}),;:])").unwrap());
|
Lazy::new(|| Regex::new(r"[\[({][ \t]|[ \t][]}),;:]").unwrap());
|
||||||
|
|
||||||
/// E201, E202, E203
|
/// E201, E202, E203
|
||||||
#[cfg(feature = "logical_lines")]
|
#[cfg(feature = "logical_lines")]
|
||||||
pub fn extraneous_whitespace(line: &str) -> Vec<(usize, DiagnosticKind)> {
|
pub fn extraneous_whitespace(line: &str) -> Vec<(usize, DiagnosticKind)> {
|
||||||
let mut diagnostics = vec![];
|
let mut diagnostics = vec![];
|
||||||
for line_match in EXTRANEOUS_WHITESPACE_REGEX.captures_iter(line) {
|
for line_match in EXTRANEOUS_WHITESPACE_REGEX.find_iter(line) {
|
||||||
let match_ = line_match.get(1).unwrap();
|
let text = &line[line_match.range()];
|
||||||
let text = match_.as_str();
|
|
||||||
let char = text.trim();
|
let char = text.trim();
|
||||||
let found = match_.start();
|
let found = line_match.start();
|
||||||
if text.chars().last().unwrap().is_ascii_whitespace() {
|
if text.chars().last().unwrap().is_ascii_whitespace() {
|
||||||
diagnostics.push((found + 1, WhitespaceAfterOpenBracket.into()));
|
diagnostics.push((found + 1, WhitespaceAfterOpenBracket.into()));
|
||||||
} else if line.chars().nth(found - 1).map_or(false, |c| c != ',') {
|
} else if line.chars().nth(found - 1).map_or(false, |c| c != ',') {
|
||||||
|
|
|
||||||
|
|
@ -86,3 +86,60 @@ mod whitespace_around_keywords;
|
||||||
mod whitespace_around_named_parameter_equals;
|
mod whitespace_around_named_parameter_equals;
|
||||||
mod whitespace_before_comment;
|
mod whitespace_before_comment;
|
||||||
mod whitespace_before_parameters;
|
mod whitespace_before_parameters;
|
||||||
|
|
||||||
|
#[allow(unused)]
|
||||||
|
enum Whitespace {
|
||||||
|
None,
|
||||||
|
Single,
|
||||||
|
Many,
|
||||||
|
Tab,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Whitespace {
|
||||||
|
#[allow(dead_code)]
|
||||||
|
fn leading(content: &str) -> (usize, Self) {
|
||||||
|
let mut offset = 0;
|
||||||
|
let mut kind = Self::None;
|
||||||
|
|
||||||
|
for c in content.chars() {
|
||||||
|
if c == '\t' {
|
||||||
|
kind = Self::Tab;
|
||||||
|
offset += 1;
|
||||||
|
} else if c.is_whitespace() {
|
||||||
|
kind = match kind {
|
||||||
|
Whitespace::None => Whitespace::Single,
|
||||||
|
Whitespace::Single | Whitespace::Many => Whitespace::Many,
|
||||||
|
Whitespace::Tab => Whitespace::Tab,
|
||||||
|
};
|
||||||
|
offset += c.len_utf8();
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(offset, kind)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
|
fn trailing(content: &str) -> (Self, usize) {
|
||||||
|
let mut count = 0u32;
|
||||||
|
let mut offset = 0;
|
||||||
|
|
||||||
|
for c in content.chars().rev() {
|
||||||
|
if c == '\t' {
|
||||||
|
return (Self::Tab, offset + 1);
|
||||||
|
} else if c.is_whitespace() {
|
||||||
|
count += 1;
|
||||||
|
offset += c.len_utf8();
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
match count {
|
||||||
|
0 => (Self::None, 0),
|
||||||
|
1 => (Self::Single, offset),
|
||||||
|
_ => (Self::Many, offset),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,10 +2,15 @@
|
||||||
|
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
|
use rustpython_parser::ast::Location;
|
||||||
|
use rustpython_parser::Tok;
|
||||||
|
|
||||||
|
use crate::rules::pycodestyle::helpers::is_op_token;
|
||||||
|
use crate::rules::pycodestyle::rules::Whitespace;
|
||||||
use ruff_diagnostics::DiagnosticKind;
|
use ruff_diagnostics::DiagnosticKind;
|
||||||
use ruff_diagnostics::Violation;
|
use ruff_diagnostics::Violation;
|
||||||
use ruff_macros::{derive_message_formats, violation};
|
use ruff_macros::{derive_message_formats, violation};
|
||||||
|
use ruff_python_ast::source_code::Locator;
|
||||||
|
|
||||||
/// ## What it does
|
/// ## What it does
|
||||||
/// Checks for extraneous tabs before an operator.
|
/// Checks for extraneous tabs before an operator.
|
||||||
|
|
@ -123,28 +128,41 @@ impl Violation for MultipleSpacesAfterOperator {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static OPERATOR_REGEX: Lazy<Regex> =
|
static OPERATOR_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"[-+*/|!<=>%&^]+|:=").unwrap());
|
||||||
Lazy::new(|| Regex::new(r"[^,\s](\s*)(?:[-+*/|!<=>%&^]+|:=)(\s*)").unwrap());
|
|
||||||
|
|
||||||
/// E221, E222, E223, E224
|
/// E221, E222, E223, E224
|
||||||
#[cfg(feature = "logical_lines")]
|
#[cfg(feature = "logical_lines")]
|
||||||
pub fn space_around_operator(line: &str) -> Vec<(usize, DiagnosticKind)> {
|
pub fn space_around_operator(line: &str) -> Vec<(usize, DiagnosticKind)> {
|
||||||
let mut diagnostics = vec![];
|
let mut diagnostics = vec![];
|
||||||
for line_match in OPERATOR_REGEX.captures_iter(line) {
|
let mut last_end = None;
|
||||||
let before = line_match.get(1).unwrap();
|
|
||||||
let after = line_match.get(2).unwrap();
|
|
||||||
|
|
||||||
if before.as_str().contains('\t') {
|
for line_match in OPERATOR_REGEX.find_iter(line) {
|
||||||
diagnostics.push((before.start(), TabBeforeOperator.into()));
|
if last_end != Some(line_match.start()) {
|
||||||
} else if before.as_str().len() > 1 {
|
let before = &line[..line_match.start()];
|
||||||
diagnostics.push((before.start(), MultipleSpacesBeforeOperator.into()));
|
|
||||||
|
match Whitespace::trailing(before) {
|
||||||
|
(Whitespace::Tab, offset) => {
|
||||||
|
diagnostics.push((line_match.start() - offset, TabBeforeOperator.into()));
|
||||||
|
}
|
||||||
|
(Whitespace::Many, offset) => diagnostics.push((
|
||||||
|
line_match.start() - offset,
|
||||||
|
MultipleSpacesBeforeOperator.into(),
|
||||||
|
)),
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if after.as_str().contains('\t') {
|
let after = &line[line_match.end()..];
|
||||||
diagnostics.push((after.start(), TabAfterOperator.into()));
|
let (leading_offset, leading_kind) = Whitespace::leading(after);
|
||||||
} else if after.as_str().len() > 1 {
|
match leading_kind {
|
||||||
diagnostics.push((after.start(), MultipleSpacesAfterOperator.into()));
|
Whitespace::Tab => diagnostics.push((line_match.end(), TabAfterOperator.into())),
|
||||||
|
Whitespace::Many => {
|
||||||
|
diagnostics.push((line_match.end(), MultipleSpacesAfterOperator.into()));
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
last_end = Some(line_match.end() + leading_offset);
|
||||||
}
|
}
|
||||||
diagnostics
|
diagnostics
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
|
|
||||||
|
use crate::rules::pycodestyle::rules::Whitespace;
|
||||||
use ruff_diagnostics::DiagnosticKind;
|
use ruff_diagnostics::DiagnosticKind;
|
||||||
use ruff_diagnostics::Violation;
|
use ruff_diagnostics::Violation;
|
||||||
use ruff_macros::{derive_message_formats, violation};
|
use ruff_macros::{derive_message_formats, violation};
|
||||||
|
|
@ -111,28 +112,41 @@ impl Violation for TabBeforeKeyword {
|
||||||
}
|
}
|
||||||
|
|
||||||
static KEYWORD_REGEX: Lazy<Regex> = Lazy::new(|| {
|
static KEYWORD_REGEX: Lazy<Regex> = Lazy::new(|| {
|
||||||
Regex::new(r"(\s*)\b(?:False|None|True|and|as|assert|async|await|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|raise|return|try|while|with|yield)\b(\s*)").unwrap()
|
Regex::new(r"\b(False|None|True|and|as|assert|async|await|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|raise|return|try|while|with|yield)\b").unwrap()
|
||||||
});
|
});
|
||||||
|
|
||||||
/// E271, E272, E273, E274
|
/// E271, E272, E273, E274
|
||||||
#[cfg(feature = "logical_lines")]
|
#[cfg(feature = "logical_lines")]
|
||||||
pub fn whitespace_around_keywords(line: &str) -> Vec<(usize, DiagnosticKind)> {
|
pub fn whitespace_around_keywords(line: &str) -> Vec<(usize, DiagnosticKind)> {
|
||||||
let mut diagnostics = vec![];
|
let mut diagnostics = vec![];
|
||||||
for line_match in KEYWORD_REGEX.captures_iter(line) {
|
let mut last_end = None;
|
||||||
let before = line_match.get(1).unwrap();
|
|
||||||
let after = line_match.get(2).unwrap();
|
|
||||||
|
|
||||||
if before.as_str().contains('\t') {
|
for line_match in KEYWORD_REGEX.find_iter(line) {
|
||||||
diagnostics.push((before.start(), TabBeforeKeyword.into()));
|
if last_end != Some(line_match.start()) {
|
||||||
} else if before.as_str().len() > 1 {
|
let before = &line[..line_match.start()];
|
||||||
diagnostics.push((before.start(), MultipleSpacesBeforeKeyword.into()));
|
match Whitespace::trailing(before) {
|
||||||
|
(Whitespace::Tab, offset) => {
|
||||||
|
diagnostics.push((line_match.start() - offset, TabBeforeKeyword.into()));
|
||||||
|
}
|
||||||
|
(Whitespace::Many, offset) => diagnostics.push((
|
||||||
|
line_match.start() - offset,
|
||||||
|
MultipleSpacesBeforeKeyword.into(),
|
||||||
|
)),
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if after.as_str().contains('\t') {
|
let after = &line[line_match.end()..];
|
||||||
diagnostics.push((after.start(), TabAfterKeyword.into()));
|
let (leading_offset, leading_kind) = Whitespace::leading(after);
|
||||||
} else if after.as_str().len() > 1 {
|
match leading_kind {
|
||||||
diagnostics.push((after.start(), MultipleSpacesAfterKeyword.into()));
|
Whitespace::Tab => diagnostics.push((line_match.end(), TabAfterKeyword.into())),
|
||||||
|
Whitespace::Many => {
|
||||||
|
diagnostics.push((line_match.end(), MultipleSpacesAfterKeyword.into()));
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
last_end = Some(line_match.end() + leading_offset);
|
||||||
}
|
}
|
||||||
diagnostics
|
diagnostics
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -9,10 +9,10 @@ expression: diagnostics
|
||||||
fixable: false
|
fixable: false
|
||||||
location:
|
location:
|
||||||
row: 28
|
row: 28
|
||||||
column: 1
|
column: 2
|
||||||
end_location:
|
end_location:
|
||||||
row: 28
|
row: 28
|
||||||
column: 1
|
column: 2
|
||||||
fix:
|
fix:
|
||||||
edits: []
|
edits: []
|
||||||
parent: ~
|
parent: ~
|
||||||
|
|
@ -23,10 +23,10 @@ expression: diagnostics
|
||||||
fixable: false
|
fixable: false
|
||||||
location:
|
location:
|
||||||
row: 30
|
row: 30
|
||||||
column: 4
|
column: 5
|
||||||
end_location:
|
end_location:
|
||||||
row: 30
|
row: 30
|
||||||
column: 4
|
column: 5
|
||||||
fix:
|
fix:
|
||||||
edits: []
|
edits: []
|
||||||
parent: ~
|
parent: ~
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue