Implement whitespace-around-keywords (E271, E272, E273, E274) (#2653)

2023-02-07 22:31:13 -05:00 · 2023-02-07 22:31:13 -05:00 · f5efdd058e
parent 4c35feaa18
commit f5efdd058e
11 changed files with 492 additions and 55 deletions
--- a/crates/ruff/resources/test/fixtures/pycodestyle/E27.py
+++ b/crates/ruff/resources/test/fixtures/pycodestyle/E27.py
@ -0,0 +1,58 @@
+#: Okay
+True and False
+#: E271
+True and  False
+#: E272
+True  and False
+#: E271
+if   1:
+#: E273
+True and		False
+#: E273 E274
+True		and	False
+#: E271
+a and  b
+#: E271
+1 and  b
+#: E271
+a and  2
+#: E271 E272
+1  and  b
+#: E271 E272
+a  and  2
+#: E272
+this  and False
+#: E273
+a and	b
+#: E274
+a		and b
+#: E273 E274
+this		and	False
+#: Okay
+from u import (a, b)
+from v import c, d
+#: E271
+from w import  (e, f)
+#: E275
+from w import(e, f)
+#: E275
+from importable.module import(e, f)
+#: E275
+try:
+    from importable.module import(e, f)
+except ImportError:
+    pass
+#: E275
+if(foo):
+    pass
+else:
+    pass
+#: Okay
+matched = {"true": True, "false": False}
+#: E275:2:11
+if True:
+    assert(1)
+#: Okay
+def f():
+    print((yield))
+    x = (yield)
--- a/crates/ruff/src/checkers/logical_lines.rs
+++ b/crates/ruff/src/checkers/logical_lines.rs
@ -5,8 +5,10 @@ use rustpython_parser::lexer::LexResult;

 use crate::ast::types::Range;
 use crate::registry::Diagnostic;
-use crate::rules::pycodestyle::logical_lines::iter_logical_lines;
-use crate::rules::pycodestyle::rules::{extraneous_whitespace, indentation, space_around_operator};
+use crate::rules::pycodestyle::logical_lines::{iter_logical_lines, TokenFlags};
+use crate::rules::pycodestyle::rules::{
+    extraneous_whitespace, indentation, space_around_operator, whitespace_around_keywords,
+};
 use crate::settings::Settings;
 use crate::source_code::{Locator, Stylist};

@ -57,7 +59,7 @@ pub fn check_logical_lines(
        // Generate mapping from logical to physical offsets.
        let mapping_offsets = line.mapping.iter().map(|(offset, _)| *offset).collect_vec();

-        if line.operator {
+        if line.flags.contains(TokenFlags::OPERATOR) {
            for (index, kind) in space_around_operator(&line.text) {
                let (token_offset, pos) = line.mapping[bisect_left(&mapping_offsets, &index)];
                let location = Location::new(pos.row(), pos.column() + index - token_offset);
@ -72,7 +74,10 @@ pub fn check_logical_lines(
                }
            }
        }
-        if line.bracket || line.punctuation {
+        if line
+            .flags
+            .contains(TokenFlags::OPERATOR | TokenFlags::PUNCTUATION)
+        {
            for (index, kind) in extraneous_whitespace(&line.text) {
                let (token_offset, pos) = line.mapping[bisect_left(&mapping_offsets, &index)];
                let location = Location::new(pos.row(), pos.column() + index - token_offset);
@ -87,6 +92,21 @@ pub fn check_logical_lines(
                }
            }
        }
+        if line.flags.contains(TokenFlags::KEYWORD) {
+            for (index, kind) in whitespace_around_keywords(&line.text) {
+                let (token_offset, pos) = line.mapping[bisect_left(&mapping_offsets, &index)];
+                let location = Location::new(pos.row(), pos.column() + index - token_offset);
+                if settings.rules.enabled(kind.rule()) {
+                    diagnostics.push(Diagnostic {
+                        kind,
+                        location,
+                        end_location: location,
+                        fix: None,
+                        parent: None,
+                    });
+                }
+            }
+        }

        for (index, kind) in indentation(
            &line,
--- a/crates/ruff/src/registry.rs
+++ b/crates/ruff/src/registry.rs
@ -41,6 +41,14 @@ ruff_macros::define_rule_mapping!(
    E223 => rules::pycodestyle::rules::TabBeforeOperator,
    #[cfg(feature = "logical_lines")]
    E224 => rules::pycodestyle::rules::TabAfterOperator,
+    #[cfg(feature = "logical_lines")]
+    E271 => rules::pycodestyle::rules::MultipleSpacesAfterKeyword,
+    #[cfg(feature = "logical_lines")]
+    E272 => rules::pycodestyle::rules::MultipleSpacesBeforeKeyword,
+    #[cfg(feature = "logical_lines")]
+    E273 => rules::pycodestyle::rules::TabAfterKeyword,
+    #[cfg(feature = "logical_lines")]
+    E274 => rules::pycodestyle::rules::TabBeforeKeyword,
    E401 => rules::pycodestyle::rules::MultipleImportsOnOneLine,
    E402 => rules::pycodestyle::rules::ModuleImportNotAtTopOfFile,
    E501 => rules::pycodestyle::rules::LineTooLong,
@ -760,7 +768,11 @@ impl Rule {
            | Rule::UnexpectedIndentationComment
            | Rule::WhitespaceAfterOpenBracket
            | Rule::WhitespaceBeforeCloseBracket
-            | Rule::WhitespaceBeforePunctuation => &LintSource::LogicalLines,
+            | Rule::WhitespaceBeforePunctuation
+            | Rule::MultipleSpacesAfterKeyword
+            | Rule::MultipleSpacesBeforeKeyword
+            | Rule::TabAfterKeyword
+            | Rule::TabBeforeKeyword => &LintSource::LogicalLines,
            _ => &LintSource::Ast,
        }
    }
--- a/crates/ruff/src/rules/pycodestyle/logical_lines.rs
+++ b/crates/ruff/src/rules/pycodestyle/logical_lines.rs
@ -1,19 +1,29 @@
+use bitflags::bitflags;
 use rustpython_parser::ast::Location;
 use rustpython_parser::lexer::{LexResult, Tok};

 use crate::ast::types::Range;
 use crate::source_code::Locator;

+bitflags! {
+    #[derive(Default)]
+    pub struct TokenFlags: u32 {
+        /// Whether the logical line contains an operator.
+        const OPERATOR = 0b0000_0001;
+        /// Whether the logical line contains a bracket.
+        const BRACKET = 0b0000_0010;
+        /// Whether the logical line contains a punctuation mark.
+        const PUNCTUATION = 0b0000_0100;
+        /// Whether the logical line contains a keyword.
+        const KEYWORD = 0b0000_1000;
+    }
+}
+
 #[derive(Debug)]
 pub struct LogicalLine {
    pub text: String,
    pub mapping: Vec<(usize, Location)>,
-    /// Whether the logical line contains an operator.
-    pub operator: bool,
-    /// Whether the logical line contains a comment.
-    pub bracket: bool,
-    /// Whether the logical line contains a punctuation mark.
-    pub punctuation: bool,
+    pub flags: TokenFlags,
 }

 impl LogicalLine {
@ -24,10 +34,8 @@ impl LogicalLine {

 fn build_line(tokens: &[(Location, &Tok, Location)], locator: &Locator) -> LogicalLine {
    let mut logical = String::with_capacity(88);
-    let mut operator = false;
-    let mut bracket = false;
-    let mut punctuation = false;
    let mut mapping = Vec::new();
+    let mut flags = TokenFlags::empty();
    let mut prev: Option<&Location> = None;
    let mut length = 0;
    for (start, tok, end) in tokens {
@ -46,48 +54,89 @@ fn build_line(tokens: &[(Location, &Tok, Location)], locator: &Locator) -> Logic
            continue;
        }

-        if !operator {
-            operator |= matches!(
-                tok,
-                Tok::Amper
-                    | Tok::AmperEqual
-                    | Tok::CircumFlex
-                    | Tok::CircumflexEqual
-                    | Tok::Colon
-                    | Tok::ColonEqual
-                    | Tok::DoubleSlash
-                    | Tok::DoubleSlashEqual
-                    | Tok::DoubleStar
-                    | Tok::Equal
-                    | Tok::Greater
-                    | Tok::GreaterEqual
-                    | Tok::Less
-                    | Tok::LessEqual
-                    | Tok::Minus
-                    | Tok::MinusEqual
-                    | Tok::NotEqual
-                    | Tok::Percent
-                    | Tok::PercentEqual
-                    | Tok::Plus
-                    | Tok::PlusEqual
-                    | Tok::Slash
-                    | Tok::SlashEqual
-                    | Tok::Star
-                    | Tok::StarEqual
-                    | Tok::Vbar
-                    | Tok::VbarEqual
-            );
+        if matches!(
+            tok,
+            Tok::Amper
+                | Tok::AmperEqual
+                | Tok::CircumFlex
+                | Tok::CircumflexEqual
+                | Tok::Colon
+                | Tok::ColonEqual
+                | Tok::DoubleSlash
+                | Tok::DoubleSlashEqual
+                | Tok::DoubleStar
+                | Tok::Equal
+                | Tok::Greater
+                | Tok::GreaterEqual
+                | Tok::Less
+                | Tok::LessEqual
+                | Tok::Minus
+                | Tok::MinusEqual
+                | Tok::NotEqual
+                | Tok::Percent
+                | Tok::PercentEqual
+                | Tok::Plus
+                | Tok::PlusEqual
+                | Tok::Slash
+                | Tok::SlashEqual
+                | Tok::Star
+                | Tok::StarEqual
+                | Tok::Vbar
+                | Tok::VbarEqual
+        ) {
+            flags.insert(TokenFlags::OPERATOR);
        }

-        if !bracket {
-            bracket |= matches!(
-                tok,
-                Tok::Lpar | Tok::Lsqb | Tok::Lbrace | Tok::Rpar | Tok::Rsqb | Tok::Rbrace
-            );
+        if matches!(
+            tok,
+            Tok::Lpar | Tok::Lsqb | Tok::Lbrace | Tok::Rpar | Tok::Rsqb | Tok::Rbrace
+        ) {
+            flags.insert(TokenFlags::BRACKET);
        }

-        if !punctuation {
-            punctuation |= matches!(tok, Tok::Comma | Tok::Semi | Tok::Colon);
+        if matches!(tok, Tok::Comma | Tok::Semi | Tok::Colon) {
+            flags.insert(TokenFlags::PUNCTUATION);
+        }
+
+        if matches!(
+            tok,
+            Tok::False
+                | Tok::None
+                | Tok::True
+                | Tok::And
+                | Tok::As
+                | Tok::Assert
+                | Tok::Async
+                | Tok::Await
+                | Tok::Break
+                | Tok::Class
+                | Tok::Continue
+                | Tok::Def
+                | Tok::Del
+                | Tok::Elif
+                | Tok::Else
+                | Tok::Except
+                | Tok::Finally
+                | Tok::For
+                | Tok::From
+                | Tok::Global
+                | Tok::If
+                | Tok::Import
+                | Tok::In
+                | Tok::Is
+                | Tok::Lambda
+                | Tok::Nonlocal
+                | Tok::Not
+                | Tok::Or
+                | Tok::Pass
+                | Tok::Raise
+                | Tok::Return
+                | Tok::Try
+                | Tok::While
+                | Tok::With
+                | Tok::Yield
+        ) {
+            flags.insert(TokenFlags::KEYWORD);
        }

        // TODO(charlie): "Mute" strings.
@ -130,10 +179,8 @@ fn build_line(tokens: &[(Location, &Tok, Location)], locator: &Locator) -> Logic

    LogicalLine {
        text: logical,
-        operator,
-        bracket,
-        punctuation,
        mapping,
+        flags,
    }
 }

--- a/crates/ruff/src/rules/pycodestyle/mod.rs
+++ b/crates/ruff/src/rules/pycodestyle/mod.rs
@ -53,12 +53,16 @@ mod tests {
    #[cfg(feature = "logical_lines")]
    #[test_case(Rule::IndentationWithInvalidMultiple, Path::new("E11.py"))]
    #[test_case(Rule::IndentationWithInvalidMultipleComment, Path::new("E11.py"))]
+    #[test_case(Rule::MultipleSpacesAfterKeyword, Path::new("E27.py"))]
    #[test_case(Rule::MultipleSpacesAfterOperator, Path::new("E22.py"))]
+    #[test_case(Rule::MultipleSpacesBeforeKeyword, Path::new("E27.py"))]
    #[test_case(Rule::MultipleSpacesBeforeOperator, Path::new("E22.py"))]
    #[test_case(Rule::NoIndentedBlock, Path::new("E11.py"))]
    #[test_case(Rule::NoIndentedBlockComment, Path::new("E11.py"))]
    #[test_case(Rule::OverIndented, Path::new("E11.py"))]
+    #[test_case(Rule::TabAfterKeyword, Path::new("E27.py"))]
    #[test_case(Rule::TabAfterOperator, Path::new("E22.py"))]
+    #[test_case(Rule::TabBeforeKeyword, Path::new("E27.py"))]
    #[test_case(Rule::TabBeforeOperator, Path::new("E22.py"))]
    #[test_case(Rule::UnexpectedIndentation, Path::new("E11.py"))]
    #[test_case(Rule::UnexpectedIndentationComment, Path::new("E11.py"))]
--- a/crates/ruff/src/rules/pycodestyle/rules/mod.rs
+++ b/crates/ruff/src/rules/pycodestyle/rules/mod.rs
@ -29,6 +29,10 @@ pub use space_around_operator::{
    TabAfterOperator, TabBeforeOperator,
 };
 pub use type_comparison::{type_comparison, TypeComparison};
+pub use whitespace_around_keywords::{
+    whitespace_around_keywords, MultipleSpacesAfterKeyword, MultipleSpacesBeforeKeyword,
+    TabAfterKeyword, TabBeforeKeyword,
+};

 mod ambiguous_class_name;
 mod ambiguous_function_name;
@ -48,3 +52,4 @@ mod no_newline_at_end_of_file;
 mod not_tests;
 mod space_around_operator;
 mod type_comparison;
+mod whitespace_around_keywords;
--- a/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs
+++ b/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs
@ -0,0 +1,81 @@
+#![allow(dead_code)]
+
+use once_cell::sync::Lazy;
+use regex::Regex;
+
+use ruff_macros::{define_violation, derive_message_formats};
+
+use crate::registry::DiagnosticKind;
+use crate::violation::Violation;
+
+define_violation!(
+    pub struct MultipleSpacesAfterKeyword;
+);
+impl Violation for MultipleSpacesAfterKeyword {
+    #[derive_message_formats]
+    fn message(&self) -> String {
+        format!("Multiple spaces after keyword")
+    }
+}
+
+define_violation!(
+    pub struct MultipleSpacesBeforeKeyword;
+);
+impl Violation for MultipleSpacesBeforeKeyword {
+    #[derive_message_formats]
+    fn message(&self) -> String {
+        format!("Multiple spaces before keyword")
+    }
+}
+
+define_violation!(
+    pub struct TabAfterKeyword;
+);
+impl Violation for TabAfterKeyword {
+    #[derive_message_formats]
+    fn message(&self) -> String {
+        format!("Tab after keyword")
+    }
+}
+
+define_violation!(
+    pub struct TabBeforeKeyword;
+);
+impl Violation for TabBeforeKeyword {
+    #[derive_message_formats]
+    fn message(&self) -> String {
+        format!("Tab before keyword")
+    }
+}
+
+static KEYWORD_REGEX: Lazy<Regex> = Lazy::new(|| {
+    Regex::new(r"(\s*)\b(?:False|None|True|and|as|assert|async|await|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|raise|return|try|while|with|yield)\b(\s*)").unwrap()
+});
+
+/// E271, E272, E273, E274
+#[cfg(feature = "logical_lines")]
+pub fn whitespace_around_keywords(line: &str) -> Vec<(usize, DiagnosticKind)> {
+    let mut diagnostics = vec![];
+    for line_match in KEYWORD_REGEX.captures_iter(line) {
+        let before = line_match.get(1).unwrap();
+        let after = line_match.get(2).unwrap();
+
+        if before.as_str().contains('\t') {
+            diagnostics.push((before.start(), TabBeforeKeyword.into()));
+        } else if before.as_str().len() > 1 {
+            diagnostics.push((before.start(), MultipleSpacesBeforeKeyword.into()));
+        }
+
+        if after.as_str().contains('\t') {
+            diagnostics.push((after.start(), TabAfterKeyword.into()));
+        } else if after.as_str().len() > 1 {
+            diagnostics.push((after.start(), MultipleSpacesAfterKeyword.into()));
+        }
+    }
+    diagnostics
+}
+
+#[cfg(not(feature = "logical_lines"))]
+pub fn whitespace_around_keywords(_line: &str) -> Vec<(usize, DiagnosticKind)> {
+    vec![]
+}
--- a/crates/ruff/src/rules/pycodestyle/snapshots/ruffrulespycodestyletestsE271_E27.py.snap
+++ b/crates/ruff/src/rules/pycodestyle/snapshots/ruffrulespycodestyletestsE271_E27.py.snap
@ -0,0 +1,95 @@
+---
+source: crates/ruff/src/rules/pycodestyle/mod.rs
+expression: diagnostics
+---
+- kind:
+    MultipleSpacesAfterKeyword: ~
+  location:
+    row: 4
+    column: 8
+  end_location:
+    row: 4
+    column: 8
+  fix: ~
+  parent: ~
+- kind:
+    MultipleSpacesAfterKeyword: ~
+  location:
+    row: 6
+    column: 4
+  end_location:
+    row: 6
+    column: 4
+  fix: ~
+  parent: ~
+- kind:
+    MultipleSpacesAfterKeyword: ~
+  location:
+    row: 8
+    column: 2
+  end_location:
+    row: 8
+    column: 2
+  fix: ~
+  parent: ~
+- kind:
+    MultipleSpacesAfterKeyword: ~
+  location:
+    row: 14
+    column: 5
+  end_location:
+    row: 14
+    column: 5
+  fix: ~
+  parent: ~
+- kind:
+    MultipleSpacesAfterKeyword: ~
+  location:
+    row: 16
+    column: 5
+  end_location:
+    row: 16
+    column: 5
+  fix: ~
+  parent: ~
+- kind:
+    MultipleSpacesAfterKeyword: ~
+  location:
+    row: 18
+    column: 5
+  end_location:
+    row: 18
+    column: 5
+  fix: ~
+  parent: ~
+- kind:
+    MultipleSpacesAfterKeyword: ~
+  location:
+    row: 20
+    column: 6
+  end_location:
+    row: 20
+    column: 6
+  fix: ~
+  parent: ~
+- kind:
+    MultipleSpacesAfterKeyword: ~
+  location:
+    row: 22
+    column: 6
+  end_location:
+    row: 22
+    column: 6
+  fix: ~
+  parent: ~
+- kind:
+    MultipleSpacesAfterKeyword: ~
+  location:
+    row: 35
+    column: 13
+  end_location:
+    row: 35
+    column: 13
+  fix: ~
+  parent: ~
+
--- a/crates/ruff/src/rules/pycodestyle/snapshots/ruffrulespycodestyletestsE272_E27.py.snap
+++ b/crates/ruff/src/rules/pycodestyle/snapshots/ruffrulespycodestyletestsE272_E27.py.snap
@ -0,0 +1,35 @@
+---
+source: crates/ruff/src/rules/pycodestyle/mod.rs
+expression: diagnostics
+---
+- kind:
+    MultipleSpacesBeforeKeyword: ~
+  location:
+    row: 20
+    column: 1
+  end_location:
+    row: 20
+    column: 1
+  fix: ~
+  parent: ~
+- kind:
+    MultipleSpacesBeforeKeyword: ~
+  location:
+    row: 22
+    column: 1
+  end_location:
+    row: 22
+    column: 1
+  fix: ~
+  parent: ~
+- kind:
+    MultipleSpacesBeforeKeyword: ~
+  location:
+    row: 24
+    column: 4
+  end_location:
+    row: 24
+    column: 4
+  fix: ~
+  parent: ~
+
--- a/crates/ruff/src/rules/pycodestyle/snapshots/ruffrulespycodestyletestsE273_E27.py.snap
+++ b/crates/ruff/src/rules/pycodestyle/snapshots/ruffrulespycodestyletestsE273_E27.py.snap
@ -0,0 +1,55 @@
+---
+source: crates/ruff/src/rules/pycodestyle/mod.rs
+expression: diagnostics
+---
+- kind:
+    TabAfterKeyword: ~
+  location:
+    row: 10
+    column: 8
+  end_location:
+    row: 10
+    column: 8
+  fix: ~
+  parent: ~
+- kind:
+    TabAfterKeyword: ~
+  location:
+    row: 12
+    column: 4
+  end_location:
+    row: 12
+    column: 4
+  fix: ~
+  parent: ~
+- kind:
+    TabAfterKeyword: ~
+  location:
+    row: 12
+    column: 9
+  end_location:
+    row: 12
+    column: 9
+  fix: ~
+  parent: ~
+- kind:
+    TabAfterKeyword: ~
+  location:
+    row: 26
+    column: 5
+  end_location:
+    row: 26
+    column: 5
+  fix: ~
+  parent: ~
+- kind:
+    TabAfterKeyword: ~
+  location:
+    row: 30
+    column: 9
+  end_location:
+    row: 30
+    column: 9
+  fix: ~
+  parent: ~
+
--- a/crates/ruff/src/rules/pycodestyle/snapshots/ruffrulespycodestyletestsE274_E27.py.snap
+++ b/crates/ruff/src/rules/pycodestyle/snapshots/ruffrulespycodestyletestsE274_E27.py.snap
@ -0,0 +1,25 @@
+---
+source: crates/ruff/src/rules/pycodestyle/mod.rs
+expression: diagnostics
+---
+- kind:
+    TabBeforeKeyword: ~
+  location:
+    row: 28
+    column: 1
+  end_location:
+    row: 28
+    column: 1
+  fix: ~
+  parent: ~
+- kind:
+    TabBeforeKeyword: ~
+  location:
+    row: 30
+    column: 4
+  end_location:
+    row: 30
+    column: 4
+  fix: ~
+  parent: ~
+