perf(logical-lines): Various small perf improvements (#4022)

This commit is contained in:
Micha Reiser 2023-04-26 21:10:35 +02:00 committed by GitHub
parent cab65b25da
commit f3e6ddda62
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 349 additions and 536 deletions

View File

@ -1,7 +1,7 @@
use ruff_text_size::TextRange;
use rustpython_parser::lexer::LexResult;
use ruff_diagnostics::{Diagnostic, Fix};
use ruff_diagnostics::{Diagnostic, DiagnosticKind, Fix};
use ruff_python_ast::source_code::{Locator, Stylist};
use ruff_python_ast::token_kind::TokenKind;
@ -37,7 +37,7 @@ pub fn check_logical_lines(
settings: &Settings,
autofix: flags::Autofix,
) -> Vec<Diagnostic> {
let mut diagnostics = vec![];
let mut context = LogicalLinesContext::new(settings);
#[cfg(feature = "logical_lines")]
let should_fix_missing_whitespace =
@ -59,106 +59,33 @@ pub fn check_logical_lines(
for line in &LogicalLines::from_tokens(tokens, locator) {
if line.flags().contains(TokenFlags::OPERATOR) {
for (location, kind) in space_around_operator(&line) {
if settings.rules.enabled(kind.rule()) {
diagnostics.push(Diagnostic {
kind,
range: TextRange::empty(location),
fix: Fix::empty(),
parent: None,
});
}
}
for (location, kind) in whitespace_around_named_parameter_equals(&line.tokens()) {
if settings.rules.enabled(kind.rule()) {
diagnostics.push(Diagnostic {
kind,
range: TextRange::empty(location),
fix: Fix::empty(),
parent: None,
});
}
}
for (location, kind) in missing_whitespace_around_operator(&line.tokens()) {
if settings.rules.enabled(kind.rule()) {
diagnostics.push(Diagnostic {
kind,
range: TextRange::empty(location),
fix: Fix::empty(),
parent: None,
});
}
}
for diagnostic in missing_whitespace(&line, should_fix_missing_whitespace) {
if settings.rules.enabled(diagnostic.kind.rule()) {
diagnostics.push(diagnostic);
}
}
space_around_operator(&line, &mut context);
whitespace_around_named_parameter_equals(&line, &mut context);
missing_whitespace_around_operator(&line, &mut context);
missing_whitespace(&line, should_fix_missing_whitespace, &mut context);
}
if line
.flags()
.contains(TokenFlags::OPERATOR | TokenFlags::PUNCTUATION)
{
for (location, kind) in extraneous_whitespace(&line) {
if settings.rules.enabled(kind.rule()) {
diagnostics.push(Diagnostic {
kind,
range: TextRange::empty(location),
fix: Fix::empty(),
parent: None,
});
}
}
extraneous_whitespace(&line, &mut context);
}
if line.flags().contains(TokenFlags::KEYWORD) {
for (location, kind) in whitespace_around_keywords(&line) {
if settings.rules.enabled(kind.rule()) {
diagnostics.push(Diagnostic {
kind,
range: TextRange::empty(location),
fix: Fix::empty(),
parent: None,
});
}
}
for (location, kind) in missing_whitespace_after_keyword(&line.tokens()) {
if settings.rules.enabled(kind.rule()) {
diagnostics.push(Diagnostic {
kind,
range: TextRange::empty(location),
fix: Fix::empty(),
parent: None,
});
}
}
whitespace_around_keywords(&line, &mut context);
missing_whitespace_after_keyword(&line, &mut context);
}
if line.flags().contains(TokenFlags::COMMENT) {
for (range, kind) in
whitespace_before_comment(&line.tokens(), locator, prev_line.is_none())
{
if settings.rules.enabled(kind.rule()) {
diagnostics.push(Diagnostic {
kind,
range,
fix: Fix::empty(),
parent: None,
});
}
}
whitespace_before_comment(&line, locator, prev_line.is_none(), &mut context);
}
if line.flags().contains(TokenFlags::BRACKET) {
for diagnostic in whitespace_before_parameters(
&line.tokens(),
whitespace_before_parameters(
&line,
should_fix_whitespace_before_parameters,
) {
if settings.rules.enabled(diagnostic.kind.rule()) {
diagnostics.push(diagnostic);
}
}
&mut context,
);
}
// Extract the indentation level.
@ -185,12 +112,7 @@ pub fn check_logical_lines(
indent_size,
) {
if settings.rules.enabled(kind.rule()) {
diagnostics.push(Diagnostic {
kind,
range,
fix: Fix::empty(),
parent: None,
});
context.push(kind, range);
}
}
@ -199,7 +121,40 @@ pub fn check_logical_lines(
prev_indent_level = Some(indent_level);
}
}
diagnostics
context.diagnostics
}
#[derive(Debug, Clone)]
pub(crate) struct LogicalLinesContext<'a> {
settings: &'a Settings,
diagnostics: Vec<Diagnostic>,
}
impl<'a> LogicalLinesContext<'a> {
fn new(settings: &'a Settings) -> Self {
Self {
settings,
diagnostics: Vec::new(),
}
}
pub fn push<K: Into<DiagnosticKind>>(&mut self, kind: K, range: TextRange) {
let kind = kind.into();
if self.settings.rules.enabled(kind.rule()) {
self.diagnostics.push(Diagnostic {
kind,
range,
fix: Fix::empty(),
parent: None,
});
}
}
pub fn push_diagnostic(&mut self, diagnostic: Diagnostic) {
if self.settings.rules.enabled(diagnostic.kind.rule()) {
self.diagnostics.push(diagnostic);
}
}
}
#[cfg(test)]

View File

@ -1,6 +1,7 @@
use ruff_text_size::TextSize;
use ruff_text_size::TextRange;
use super::{LogicalLine, Whitespace};
use crate::checkers::logical_lines::LogicalLinesContext;
use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
@ -101,17 +102,15 @@ impl Violation for WhitespaceBeforePunctuation {
}
/// E201, E202, E203
pub(crate) fn extraneous_whitespace(line: &LogicalLine) -> Vec<(TextSize, DiagnosticKind)> {
let mut diagnostics = vec![];
let mut last_token: Option<TokenKind> = None;
pub(crate) fn extraneous_whitespace(line: &LogicalLine, context: &mut LogicalLinesContext) {
let mut last_token = TokenKind::EndOfFile;
for token in line.tokens() {
let kind = token.kind();
match kind {
TokenKind::Lbrace | TokenKind::Lpar | TokenKind::Lsqb => {
if !matches!(line.trailing_whitespace(&token), Whitespace::None) {
let end = token.end();
diagnostics.push((end, WhitespaceAfterOpenBracket.into()));
if !matches!(line.trailing_whitespace(token), Whitespace::None) {
context.push(WhitespaceAfterOpenBracket, TextRange::empty(token.end()));
}
}
TokenKind::Rbrace
@ -120,19 +119,20 @@ pub(crate) fn extraneous_whitespace(line: &LogicalLine) -> Vec<(TextSize, Diagno
| TokenKind::Comma
| TokenKind::Semi
| TokenKind::Colon => {
let diagnostic_kind =
if matches!(kind, TokenKind::Comma | TokenKind::Semi | TokenKind::Colon) {
DiagnosticKind::from(WhitespaceBeforePunctuation)
} else {
DiagnosticKind::from(WhitespaceBeforeCloseBracket)
};
if let (Whitespace::Single | Whitespace::Many | Whitespace::Tab, offset) =
line.leading_whitespace(&token)
line.leading_whitespace(token)
{
if !matches!(last_token, Some(TokenKind::Comma)) {
let start = token.start();
diagnostics.push((start - offset, diagnostic_kind));
if !matches!(last_token, TokenKind::Comma | TokenKind::EndOfFile) {
let diagnostic_kind = if matches!(
kind,
TokenKind::Comma | TokenKind::Semi | TokenKind::Colon
) {
DiagnosticKind::from(WhitespaceBeforePunctuation)
} else {
DiagnosticKind::from(WhitespaceBeforeCloseBracket)
};
context.push(diagnostic_kind, TextRange::empty(token.start() - offset));
}
}
}
@ -140,8 +140,6 @@ pub(crate) fn extraneous_whitespace(line: &LogicalLine) -> Vec<(TextSize, Diagno
_ => {}
}
last_token = Some(kind);
last_token = kind;
}
diagnostics
}

View File

@ -1,9 +1,10 @@
use super::LogicalLine;
use crate::checkers::logical_lines::LogicalLinesContext;
use ruff_diagnostics::Edit;
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::token_kind::TokenKind;
use ruff_text_size::TextRange;
use ruff_text_size::{TextRange, TextSize};
#[violation]
pub struct MissingWhitespace {
@ -35,12 +36,14 @@ impl AlwaysAutofixableViolation for MissingWhitespace {
}
/// E231
pub(crate) fn missing_whitespace(line: &LogicalLine, autofix: bool) -> Vec<Diagnostic> {
let mut diagnostics = vec![];
pub(crate) fn missing_whitespace(
line: &LogicalLine,
autofix: bool,
context: &mut LogicalLinesContext,
) {
let mut open_parentheses = 0u32;
let mut prev_lsqb = None;
let mut prev_lbrace = None;
let mut prev_lsqb = TextSize::default();
let mut prev_lbrace = TextSize::default();
let mut iter = line.tokens().iter().peekable();
while let Some(token) = iter.next() {
@ -48,17 +51,17 @@ pub(crate) fn missing_whitespace(line: &LogicalLine, autofix: bool) -> Vec<Diagn
match kind {
TokenKind::Lsqb => {
open_parentheses += 1;
prev_lsqb = Some(token.start());
prev_lsqb = token.start();
}
TokenKind::Rsqb => {
open_parentheses += 1;
}
TokenKind::Lbrace => {
prev_lbrace = Some(token.start());
prev_lbrace = token.start();
}
TokenKind::Comma | TokenKind::Semi | TokenKind::Colon => {
let after = line.text_after(&token);
let after = line.text_after(token);
if !after.chars().next().map_or(false, char::is_whitespace) {
if let Some(next_token) = iter.peek() {
@ -85,11 +88,10 @@ pub(crate) fn missing_whitespace(line: &LogicalLine, autofix: bool) -> Vec<Diagn
if autofix {
diagnostic.set_fix(Edit::insertion(" ".to_string(), token.end()));
}
diagnostics.push(diagnostic);
context.push_diagnostic(diagnostic);
}
}
_ => {}
}
}
diagnostics
}

View File

@ -1,13 +1,12 @@
use itertools::Itertools;
use ruff_text_size::TextSize;
use ruff_text_size::TextRange;
use ruff_diagnostics::DiagnosticKind;
use crate::checkers::logical_lines::LogicalLinesContext;
use crate::rules::pycodestyle::rules::logical_lines::LogicalLine;
use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::token_kind::TokenKind;
use super::LogicalLineTokens;
#[violation]
pub struct MissingWhitespaceAfterKeyword;
@ -20,11 +19,10 @@ impl Violation for MissingWhitespaceAfterKeyword {
/// E275
pub(crate) fn missing_whitespace_after_keyword(
tokens: &LogicalLineTokens,
) -> Vec<(TextSize, DiagnosticKind)> {
let mut diagnostics = vec![];
for (tok0, tok1) in tokens.iter().tuple_windows() {
line: &LogicalLine,
context: &mut LogicalLinesContext,
) {
for (tok0, tok1) in line.tokens().iter().tuple_windows() {
let tok0_kind = tok0.kind();
let tok1_kind = tok1.kind();
@ -36,8 +34,7 @@ pub(crate) fn missing_whitespace_after_keyword(
|| matches!(tok1_kind, TokenKind::Colon | TokenKind::Newline))
&& tok0.end() == tok1.start()
{
diagnostics.push((tok0.end(), MissingWhitespaceAfterKeyword.into()));
context.push(MissingWhitespaceAfterKeyword, TextRange::empty(tok0.end()));
}
}
diagnostics
}

View File

@ -1,10 +1,10 @@
use ruff_diagnostics::DiagnosticKind;
use crate::checkers::logical_lines::LogicalLinesContext;
use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::token_kind::TokenKind;
use ruff_text_size::TextSize;
use ruff_text_size::{TextRange, TextSize};
use crate::rules::pycodestyle::rules::logical_lines::LogicalLineTokens;
use crate::rules::pycodestyle::rules::logical_lines::LogicalLine;
// E225
#[violation]
@ -53,18 +53,24 @@ impl Violation for MissingWhitespaceAroundModuloOperator {
/// E225, E226, E227, E228
#[allow(clippy::if_same_then_else)]
pub(crate) fn missing_whitespace_around_operator(
tokens: &LogicalLineTokens,
) -> Vec<(TextSize, DiagnosticKind)> {
let mut diagnostics = vec![];
line: &LogicalLine,
context: &mut LogicalLinesContext,
) {
#[derive(Copy, Clone, Eq, PartialEq)]
enum NeedsSpace {
Yes,
No,
Unset,
}
let mut needs_space_main: Option<bool> = Some(false);
let mut needs_space_aux: Option<bool> = None;
let mut prev_end_aux: Option<TextSize> = None;
let mut needs_space_main = NeedsSpace::No;
let mut needs_space_aux = NeedsSpace::Unset;
let mut prev_end_aux = TextSize::default();
let mut parens = 0u32;
let mut prev_type: Option<TokenKind> = None;
let mut prev_end: Option<TextSize> = None;
let mut prev_type: TokenKind = TokenKind::EndOfFile;
let mut prev_end = TextSize::default();
for token in tokens {
for token in line.tokens() {
let kind = token.kind();
if kind.is_skip_comment() {
@ -77,100 +83,104 @@ pub(crate) fn missing_whitespace_around_operator(
_ => {}
};
let needs_space =
needs_space_main == Some(true) || needs_space_aux.is_some() || prev_end_aux.is_some();
let needs_space = needs_space_main == NeedsSpace::Yes
|| needs_space_aux != NeedsSpace::Unset
|| prev_end_aux != TextSize::new(0);
if needs_space {
if Some(token.start()) != prev_end {
if needs_space_main != Some(true) && needs_space_aux != Some(true) {
diagnostics.push((
prev_end_aux.unwrap(),
MissingWhitespaceAroundOperator.into(),
));
if token.start() > prev_end {
if needs_space_main != NeedsSpace::Yes && needs_space_aux != NeedsSpace::Yes {
context.push(
MissingWhitespaceAroundOperator,
TextRange::empty(prev_end_aux),
);
}
needs_space_main = Some(false);
needs_space_aux = None;
prev_end_aux = None;
needs_space_main = NeedsSpace::No;
needs_space_aux = NeedsSpace::Unset;
prev_end_aux = TextSize::new(0);
} else if kind == TokenKind::Greater
&& matches!(prev_type, Some(TokenKind::Less | TokenKind::Minus))
&& matches!(prev_type, TokenKind::Less | TokenKind::Minus)
{
// Tolerate the "<>" operator, even if running Python 3
// Deal with Python 3's annotated return value "->"
} else if prev_type == Some(TokenKind::Slash)
} else if prev_type == TokenKind::Slash
&& matches!(kind, TokenKind::Comma | TokenKind::Rpar | TokenKind::Colon)
|| (prev_type == Some(TokenKind::Rpar) && kind == TokenKind::Colon)
|| (prev_type == TokenKind::Rpar && kind == TokenKind::Colon)
{
// Tolerate the "/" operator in function definition
// For more info see PEP570
} else {
if needs_space_main == Some(true) || needs_space_aux == Some(true) {
diagnostics.push((prev_end.unwrap(), MissingWhitespaceAroundOperator.into()));
} else if prev_type != Some(TokenKind::DoubleStar) {
if prev_type == Some(TokenKind::Percent) {
diagnostics.push((
prev_end_aux.unwrap(),
MissingWhitespaceAroundModuloOperator.into(),
));
} else if !prev_type.unwrap().is_arithmetic() {
diagnostics.push((
prev_end_aux.unwrap(),
MissingWhitespaceAroundBitwiseOrShiftOperator.into(),
));
if needs_space_main == NeedsSpace::Yes || needs_space_aux == NeedsSpace::Yes {
context.push(MissingWhitespaceAroundOperator, TextRange::empty(prev_end));
} else if prev_type != TokenKind::DoubleStar {
if prev_type == TokenKind::Percent {
context.push(
MissingWhitespaceAroundModuloOperator,
TextRange::empty(prev_end_aux),
);
} else if !prev_type.is_arithmetic() {
context.push(
MissingWhitespaceAroundBitwiseOrShiftOperator,
TextRange::empty(prev_end_aux),
);
} else {
diagnostics.push((
prev_end_aux.unwrap(),
MissingWhitespaceAroundArithmeticOperator.into(),
));
context.push(
MissingWhitespaceAroundArithmeticOperator,
TextRange::empty(prev_end_aux),
);
}
}
needs_space_main = Some(false);
needs_space_aux = None;
prev_end_aux = None;
needs_space_main = NeedsSpace::No;
needs_space_aux = NeedsSpace::Unset;
prev_end_aux = TextSize::new(0);
}
} else if (kind.is_operator() || matches!(kind, TokenKind::Name)) && prev_end.is_some() {
} else if (kind.is_operator() || matches!(kind, TokenKind::Name))
&& prev_end != TextSize::default()
{
if kind == TokenKind::Equal && parens > 0 {
// Allow keyword args or defaults: foo(bar=None).
} else if kind.is_whitespace_needed() {
needs_space_main = Some(true);
needs_space_aux = None;
prev_end_aux = None;
needs_space_main = NeedsSpace::Yes;
needs_space_aux = NeedsSpace::Unset;
prev_end_aux = TextSize::new(0);
} else if kind.is_unary() {
// Check if the operator is used as a binary operator
// Allow unary operators: -123, -x, +1.
// Allow argument unpacking: foo(*args, **kwargs)
if let Some(prev_type) = prev_type {
if (matches!(
prev_type,
TokenKind::Rpar | TokenKind::Rsqb | TokenKind::Rbrace
)) || (!prev_type.is_operator() && !prev_type.is_keyword())
&& (!prev_type.is_soft_keyword())
{
needs_space_main = None;
needs_space_aux = None;
prev_end_aux = None;
}
if (matches!(
prev_type,
TokenKind::Rpar | TokenKind::Rsqb | TokenKind::Rbrace
)) || (!prev_type.is_operator()
&& !prev_type.is_keyword()
&& !prev_type.is_soft_keyword())
{
needs_space_main = NeedsSpace::Unset;
needs_space_aux = NeedsSpace::Unset;
prev_end_aux = TextSize::new(0);
}
} else if kind.is_whitespace_optional() {
needs_space_main = None;
needs_space_aux = None;
prev_end_aux = None;
needs_space_main = NeedsSpace::Unset;
needs_space_aux = NeedsSpace::Unset;
prev_end_aux = TextSize::new(0);
}
if needs_space_main.is_none() {
if needs_space_main == NeedsSpace::Unset {
// Surrounding space is optional, but ensure that
// trailing space matches opening space
prev_end_aux = prev_end;
needs_space_aux = Some(Some(token.start()) != prev_end_aux);
} else if needs_space_main == Some(true) && Some(token.start()) == prev_end_aux {
needs_space_aux = if token.start() == prev_end {
NeedsSpace::No
} else {
NeedsSpace::Yes
};
} else if needs_space_main == NeedsSpace::Yes && token.start() == prev_end_aux {
// A needed opening space was not found
diagnostics.push((prev_end.unwrap(), MissingWhitespaceAroundOperator.into()));
needs_space_main = Some(false);
needs_space_aux = None;
prev_end_aux = None;
context.push(MissingWhitespaceAroundOperator, TextRange::empty(prev_end));
needs_space_main = NeedsSpace::No;
needs_space_aux = NeedsSpace::Unset;
prev_end_aux = TextSize::new(0);
}
}
prev_type = Some(kind);
prev_end = Some(token.end());
prev_type = kind;
prev_end = token.end();
}
diagnostics
}

View File

@ -77,7 +77,7 @@ bitflags! {
#[derive(Clone)]
pub(crate) struct LogicalLines<'a> {
tokens: Tokens,
tokens: Vec<LogicalLineToken>,
lines: Vec<Line>,
locator: &'a Locator<'a>,
}
@ -160,65 +160,69 @@ impl<'a> LogicalLine<'a> {
/// Returns logical line's text including comments, indents, dedent and trailing new lines.
pub fn text(&self) -> &'a str {
self.tokens().text()
let tokens = self.tokens();
match (tokens.first(), tokens.last()) {
(Some(first), Some(last)) => self
.lines
.locator
.slice(TextRange::new(first.start(), last.end())),
_ => "",
}
}
/// Returns the text without any leading or trailing newline, comment, indent, or dedent of this line
#[cfg(test)]
pub fn text_trimmed(&self) -> &'a str {
self.tokens_trimmed().text()
let tokens = self.tokens_trimmed();
match (tokens.first(), tokens.last()) {
(Some(first), Some(last)) => self
.lines
.locator
.slice(TextRange::new(first.start(), last.end())),
_ => "",
}
}
pub fn tokens_trimmed(&self) -> LogicalLineTokens<'a> {
let mut front = self.line.tokens_start as usize;
let mut back = self.line.tokens_end as usize;
pub fn tokens_trimmed(&self) -> &'a [LogicalLineToken] {
let tokens = self.tokens();
let mut kinds = self.lines.tokens.kinds[front..back].iter();
let start = tokens
.iter()
.position(|t| {
!matches!(
t.kind(),
TokenKind::Newline
| TokenKind::NonLogicalNewline
| TokenKind::Indent
| TokenKind::Dedent
| TokenKind::Comment,
)
})
.unwrap_or(tokens.len());
for kind in kinds.by_ref() {
if !matches!(
kind,
TokenKind::Newline
| TokenKind::NonLogicalNewline
| TokenKind::Indent
| TokenKind::Dedent
| TokenKind::Comment
) {
break;
}
front += 1;
}
let tokens = &tokens[start..];
for kind in kinds.rev() {
if !matches!(
kind,
TokenKind::Newline
| TokenKind::NonLogicalNewline
| TokenKind::Indent
| TokenKind::Dedent
| TokenKind::Comment
) {
break;
}
back -= 1;
}
let end = tokens
.iter()
.rposition(|t| {
!matches!(
t.kind(),
TokenKind::Newline
| TokenKind::NonLogicalNewline
| TokenKind::Indent
| TokenKind::Dedent
| TokenKind::Comment,
)
})
.map_or(0, |pos| pos + 1);
LogicalLineTokens {
lines: self.lines,
front,
back,
}
&tokens[..end]
}
/// Returns the text after `token`
#[inline]
pub fn text_after(&self, token: &LogicalLineToken<'a>) -> &str {
debug_assert!(
(self.line.tokens_start as usize..self.line.tokens_end as usize)
.contains(&token.position),
"Token does not belong to this line"
);
pub fn text_after(&self, token: &'a LogicalLineToken) -> &str {
// SAFETY: The line must have at least one token or `token` would not belong to this line.
let last_token = self.tokens().last().unwrap();
self.lines
@ -228,13 +232,7 @@ impl<'a> LogicalLine<'a> {
/// Returns the text before `token`
#[inline]
pub fn text_before(&self, token: &LogicalLineToken<'a>) -> &str {
debug_assert!(
(self.line.tokens_start as usize..self.line.tokens_end as usize)
.contains(&token.position),
"Token does not belong to this line"
);
pub fn text_before(&self, token: &'a LogicalLineToken) -> &str {
// SAFETY: The line must have at least one token or `token` would not belong to this line.
let first_token = self.tokens().first().unwrap();
self.lines
@ -243,25 +241,21 @@ impl<'a> LogicalLine<'a> {
}
/// Returns the whitespace *after* the `token`
pub fn trailing_whitespace(&self, token: &LogicalLineToken<'a>) -> Whitespace {
pub fn trailing_whitespace(&self, token: &'a LogicalLineToken) -> Whitespace {
Whitespace::leading(self.text_after(token))
}
/// Returns the whitespace and whitespace byte-length *before* the `token`
pub fn leading_whitespace(&self, token: &LogicalLineToken<'a>) -> (Whitespace, TextSize) {
pub fn leading_whitespace(&self, token: &'a LogicalLineToken) -> (Whitespace, TextSize) {
Whitespace::trailing(self.text_before(token))
}
/// Returns all tokens of the line, including comments and trailing new lines.
pub fn tokens(&self) -> LogicalLineTokens<'a> {
LogicalLineTokens {
lines: self.lines,
front: self.line.tokens_start as usize,
back: self.line.tokens_end as usize,
}
pub fn tokens(&self) -> &'a [LogicalLineToken] {
&self.lines.tokens[self.line.tokens_start as usize..self.line.tokens_end as usize]
}
pub fn first_token(&self) -> Option<LogicalLineToken> {
pub fn first_token(&self) -> Option<&'a LogicalLineToken> {
self.tokens().first()
}
@ -322,160 +316,36 @@ impl ExactSizeIterator for LogicalLinesIter<'_> {}
impl FusedIterator for LogicalLinesIter<'_> {}
/// The tokens of a logical line
pub(crate) struct LogicalLineTokens<'a> {
lines: &'a LogicalLines<'a>,
front: usize,
back: usize,
}
impl<'a> LogicalLineTokens<'a> {
pub fn iter(&self) -> LogicalLineTokensIter<'a> {
LogicalLineTokensIter {
tokens: &self.lines.tokens,
front: self.front,
back: self.back,
}
}
pub fn text(&self) -> &'a str {
match (self.first(), self.last()) {
(Some(first), Some(last)) => {
let locator = self.lines.locator;
locator.slice(TextRange::new(first.start(), last.end()))
}
_ => "",
}
}
/// Returns the first token
pub fn first(&self) -> Option<LogicalLineToken<'a>> {
self.iter().next()
}
/// Returns the last token
pub fn last(&self) -> Option<LogicalLineToken<'a>> {
self.iter().next_back()
}
}
impl<'a> IntoIterator for LogicalLineTokens<'a> {
type Item = LogicalLineToken<'a>;
type IntoIter = LogicalLineTokensIter<'a>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
impl<'a> IntoIterator for &LogicalLineTokens<'a> {
type Item = LogicalLineToken<'a>;
type IntoIter = LogicalLineTokensIter<'a>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
impl Debug for LogicalLineTokens<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_list().entries(self.iter()).finish()
}
}
/// Iterator over the tokens of a [`LogicalLine`]
pub(crate) struct LogicalLineTokensIter<'a> {
tokens: &'a Tokens,
front: usize,
back: usize,
}
impl<'a> Iterator for LogicalLineTokensIter<'a> {
type Item = LogicalLineToken<'a>;
fn next(&mut self) -> Option<Self::Item> {
if self.front < self.back {
let result = Some(LogicalLineToken {
tokens: self.tokens,
position: self.front,
});
self.front += 1;
result
} else {
None
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
let len = self.back - self.front;
(len, Some(len))
}
}
impl ExactSizeIterator for LogicalLineTokensIter<'_> {}
impl FusedIterator for LogicalLineTokensIter<'_> {}
impl DoubleEndedIterator for LogicalLineTokensIter<'_> {
fn next_back(&mut self) -> Option<Self::Item> {
if self.front < self.back {
self.back -= 1;
Some(LogicalLineToken {
position: self.back,
tokens: self.tokens,
})
} else {
None
}
}
}
/// A token of a [`LogicalLine`]
#[derive(Clone)]
pub(crate) struct LogicalLineToken<'a> {
tokens: &'a Tokens,
position: usize,
#[derive(Clone, Debug)]
pub(crate) struct LogicalLineToken {
kind: TokenKind,
range: TextRange,
}
impl<'a> LogicalLineToken<'a> {
impl LogicalLineToken {
/// Returns the token's kind
#[inline]
pub fn kind(&self) -> TokenKind {
#[allow(unsafe_code)]
unsafe {
*self.tokens.kinds.get_unchecked(self.position)
}
pub const fn kind(&self) -> TokenKind {
self.kind
}
/// Returns the token's start location
#[inline]
pub fn start(&self) -> TextSize {
self.range().start()
pub const fn start(&self) -> TextSize {
self.range.start()
}
/// Returns the token's end location
#[inline]
pub fn end(&self) -> TextSize {
self.range().end()
pub const fn end(&self) -> TextSize {
self.range.end()
}
/// Returns a tuple with the token's `(start, end)` locations
#[inline]
pub fn range(&self) -> TextRange {
#[allow(unsafe_code)]
unsafe {
*self.tokens.ranges.get_unchecked(self.position)
}
}
}
impl Debug for LogicalLineToken<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("LogicalLineToken")
.field("kind", &self.kind())
.field("range", &self.range())
.finish()
pub const fn range(&self) -> TextRange {
self.range
}
}
@ -552,15 +422,15 @@ struct CurrentLine {
/// Builder for [`LogicalLines`]
#[derive(Debug, Default)]
struct LogicalLinesBuilder {
tokens: Tokens,
tokens: Vec<LogicalLineToken>,
lines: Vec<Line>,
current_line: Option<CurrentLine>,
current_line: CurrentLine,
}
impl LogicalLinesBuilder {
fn with_capacity(tokens: usize) -> Self {
Self {
tokens: Tokens::with_capacity(tokens),
tokens: Vec::with_capacity(tokens),
..Self::default()
}
}
@ -568,12 +438,7 @@ impl LogicalLinesBuilder {
// SAFETY: `LogicalLines::from_tokens` asserts that the file has less than `u32::MAX` tokens and each tokens is at least one character long
#[allow(clippy::cast_possible_truncation)]
fn push_token(&mut self, kind: TokenKind, range: TextRange) {
let tokens_start = self.tokens.len();
let line = self.current_line.get_or_insert_with(|| CurrentLine {
flags: TokenFlags::empty(),
tokens_start: tokens_start as u32,
});
let line = &mut self.current_line;
if matches!(kind, TokenKind::Comment) {
line.flags.insert(TokenFlags::COMMENT);
@ -612,18 +477,24 @@ impl LogicalLinesBuilder {
),
);
self.tokens.push(kind, range);
self.tokens.push(LogicalLineToken { kind, range });
}
// SAFETY: `LogicalLines::from_tokens` asserts that the file has less than `u32::MAX` tokens and each tokens is at least one character long
#[allow(clippy::cast_possible_truncation)]
fn finish_line(&mut self) {
if let Some(current) = self.current_line.take() {
let end = self.tokens.len() as u32;
if self.current_line.tokens_start < end {
self.lines.push(Line {
flags: current.flags,
tokens_start: current.tokens_start,
tokens_end: self.tokens.len() as u32,
flags: self.current_line.flags,
tokens_start: self.current_line.tokens_start,
tokens_end: end,
});
self.current_line = CurrentLine {
flags: TokenFlags::default(),
tokens_start: end,
}
}
}
@ -644,33 +515,3 @@ struct Line {
tokens_start: u32,
tokens_end: u32,
}
#[derive(Debug, Clone, Default)]
struct Tokens {
/// The token kinds
kinds: Vec<TokenKind>,
/// The ranges
ranges: Vec<TextRange>,
}
impl Tokens {
/// Creates new tokens with a reserved size of `capacity`
fn with_capacity(capacity: usize) -> Self {
Self {
kinds: Vec::with_capacity(capacity),
ranges: Vec::with_capacity(capacity),
}
}
/// Returns the number of stored tokens.
fn len(&self) -> usize {
self.kinds.len()
}
/// Adds a new token with the given `kind` and `range`
fn push(&mut self, kind: TokenKind, range: TextRange) {
self.kinds.push(kind);
self.ranges.push(range);
}
}

View File

@ -1,7 +1,7 @@
use ruff_text_size::TextSize;
use ruff_text_size::TextRange;
use super::{LogicalLine, Whitespace};
use ruff_diagnostics::DiagnosticKind;
use crate::checkers::logical_lines::LogicalLinesContext;
use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::token_kind::TokenKind;
@ -123,8 +123,7 @@ impl Violation for MultipleSpacesAfterOperator {
}
/// E221, E222, E223, E224
pub(crate) fn space_around_operator(line: &LogicalLine) -> Vec<(TextSize, DiagnosticKind)> {
let mut diagnostics = vec![];
pub(crate) fn space_around_operator(line: &LogicalLine, context: &mut LogicalLinesContext) {
let mut after_operator = false;
for token in line.tokens() {
@ -132,27 +131,30 @@ pub(crate) fn space_around_operator(line: &LogicalLine) -> Vec<(TextSize, Diagno
if is_operator {
if !after_operator {
match line.leading_whitespace(&token) {
match line.leading_whitespace(token) {
(Whitespace::Tab, offset) => {
let start = token.start();
diagnostics.push((start - offset, TabBeforeOperator.into()));
context.push(TabBeforeOperator, TextRange::empty(start - offset));
}
(Whitespace::Many, offset) => {
let start = token.start();
diagnostics.push((start - offset, MultipleSpacesBeforeOperator.into()));
context.push(
MultipleSpacesBeforeOperator,
TextRange::empty(start - offset),
);
}
_ => {}
}
}
match line.trailing_whitespace(&token) {
match line.trailing_whitespace(token) {
Whitespace::Tab => {
let end = token.end();
diagnostics.push((end, TabAfterOperator.into()));
context.push(TabAfterOperator, TextRange::empty(end));
}
Whitespace::Many => {
let end = token.end();
diagnostics.push((end, MultipleSpacesAfterOperator.into()));
context.push(MultipleSpacesAfterOperator, TextRange::empty(end));
}
_ => {}
}
@ -160,8 +162,6 @@ pub(crate) fn space_around_operator(line: &LogicalLine) -> Vec<(TextSize, Diagno
after_operator = is_operator;
}
diagnostics
}
const fn is_operator_token(token: TokenKind) -> bool {

View File

@ -1,8 +1,8 @@
use super::{LogicalLine, Whitespace};
use ruff_diagnostics::DiagnosticKind;
use crate::checkers::logical_lines::LogicalLinesContext;
use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
use ruff_text_size::TextSize;
use ruff_text_size::TextRange;
/// ## What it does
/// Checks for extraneous whitespace after keywords.
@ -108,35 +108,37 @@ impl Violation for TabBeforeKeyword {
}
/// E271, E272, E273, E274
pub(crate) fn whitespace_around_keywords(line: &LogicalLine) -> Vec<(TextSize, DiagnosticKind)> {
let mut diagnostics = vec![];
pub(crate) fn whitespace_around_keywords(line: &LogicalLine, context: &mut LogicalLinesContext) {
let mut after_keyword = false;
for token in line.tokens() {
let is_keyword = token.kind().is_keyword();
if is_keyword {
if !after_keyword {
match line.leading_whitespace(&token) {
match line.leading_whitespace(token) {
(Whitespace::Tab, offset) => {
let start = token.start();
diagnostics.push((start - offset, TabBeforeKeyword.into()));
context.push(TabBeforeKeyword, TextRange::empty(start - offset));
}
(Whitespace::Many, offset) => {
let start = token.start();
diagnostics.push((start - offset, MultipleSpacesBeforeKeyword.into()));
context.push(
MultipleSpacesBeforeKeyword,
TextRange::empty(start - offset),
);
}
_ => {}
}
}
match line.trailing_whitespace(&token) {
match line.trailing_whitespace(token) {
Whitespace::Tab => {
let end = token.end();
diagnostics.push((end, TabAfterKeyword.into()));
context.push(TabAfterKeyword, TextRange::empty(end));
}
Whitespace::Many => {
let end = token.end();
diagnostics.push((end, MultipleSpacesAfterKeyword.into()));
context.push(MultipleSpacesAfterKeyword, TextRange::empty(end));
}
_ => {}
}
@ -144,6 +146,4 @@ pub(crate) fn whitespace_around_keywords(line: &LogicalLine) -> Vec<(TextSize, D
after_keyword = is_keyword;
}
diagnostics
}

View File

@ -1,10 +1,9 @@
use ruff_diagnostics::DiagnosticKind;
use crate::checkers::logical_lines::LogicalLinesContext;
use crate::rules::pycodestyle::rules::logical_lines::{LogicalLine, LogicalLineToken};
use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::token_kind::TokenKind;
use ruff_text_size::TextSize;
use super::LogicalLineTokens;
use ruff_text_size::{TextRange, TextSize};
#[violation]
pub struct UnexpectedSpacesAroundKeywordParameterEquals;
@ -26,7 +25,7 @@ impl Violation for MissingWhitespaceAroundParameterEquals {
}
}
fn is_in_def(tokens: &LogicalLineTokens) -> bool {
fn is_in_def(tokens: &[LogicalLineToken]) -> bool {
for token in tokens {
match token.kind() {
TokenKind::Async | TokenKind::Indent | TokenKind::Dedent => continue,
@ -40,15 +39,15 @@ fn is_in_def(tokens: &LogicalLineTokens) -> bool {
/// E251, E252
pub(crate) fn whitespace_around_named_parameter_equals(
tokens: &LogicalLineTokens,
) -> Vec<(TextSize, DiagnosticKind)> {
let mut diagnostics = vec![];
line: &LogicalLine,
context: &mut LogicalLinesContext,
) {
let mut parens = 0u32;
let mut annotated_func_arg = false;
let mut prev_end: Option<TextSize> = None;
let mut prev_end = TextSize::default();
let in_def = is_in_def(tokens);
let mut iter = tokens.iter().peekable();
let in_def = is_in_def(line.tokens());
let mut iter = line.tokens().iter().peekable();
while let Some(token) = iter.next() {
let kind = token.kind();
@ -78,8 +77,11 @@ pub(crate) fn whitespace_around_named_parameter_equals(
TokenKind::Equal if parens > 0 => {
if annotated_func_arg && parens == 1 {
let start = token.start();
if Some(start) == prev_end {
diagnostics.push((start, MissingWhitespaceAroundParameterEquals.into()));
if start == prev_end && prev_end != TextSize::new(0) {
context.push(
MissingWhitespaceAroundParameterEquals,
TextRange::empty(start),
);
}
while let Some(next) = iter.peek() {
@ -89,20 +91,20 @@ pub(crate) fn whitespace_around_named_parameter_equals(
let next_start = next.start();
if next_start == token.end() {
diagnostics.push((
next_start,
MissingWhitespaceAroundParameterEquals.into(),
));
context.push(
MissingWhitespaceAroundParameterEquals,
TextRange::empty(next_start),
);
}
break;
}
}
} else {
if Some(token.start()) != prev_end {
diagnostics.push((
prev_end.unwrap(),
UnexpectedSpacesAroundKeywordParameterEquals.into(),
));
if token.start() != prev_end {
context.push(
UnexpectedSpacesAroundKeywordParameterEquals,
TextRange::empty(prev_end),
);
}
while let Some(next) = iter.peek() {
@ -110,10 +112,10 @@ pub(crate) fn whitespace_around_named_parameter_equals(
iter.next();
} else {
if next.start() != token.end() {
diagnostics.push((
token.end(),
UnexpectedSpacesAroundKeywordParameterEquals.into(),
));
context.push(
UnexpectedSpacesAroundKeywordParameterEquals,
TextRange::empty(token.end()),
);
}
break;
}
@ -123,7 +125,6 @@ pub(crate) fn whitespace_around_named_parameter_equals(
_ => {}
}
prev_end = Some(token.end());
prev_end = token.end();
}
diagnostics
}

View File

@ -1,5 +1,5 @@
use super::LogicalLineTokens;
use ruff_diagnostics::DiagnosticKind;
use crate::checkers::logical_lines::LogicalLinesContext;
use crate::rules::pycodestyle::rules::logical_lines::LogicalLine;
use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::source_code::Locator;
@ -137,13 +137,13 @@ impl Violation for MultipleLeadingHashesForBlockComment {
/// E261, E262, E265, E266
pub(crate) fn whitespace_before_comment(
tokens: &LogicalLineTokens,
line: &LogicalLine,
locator: &Locator,
is_first_row: bool,
) -> Vec<(TextRange, DiagnosticKind)> {
let mut diagnostics = vec![];
context: &mut LogicalLinesContext,
) {
let mut prev_end = TextSize::default();
for token in tokens {
for token in line.tokens() {
let kind = token.kind();
if let TokenKind::Comment = kind {
@ -158,10 +158,10 @@ pub(crate) fn whitespace_before_comment(
let is_inline_comment = !line.trim().is_empty();
if is_inline_comment {
if range.start() - prev_end < " ".text_len() {
diagnostics.push((
context.push(
TooFewSpacesBeforeInlineComment,
TextRange::new(prev_end, range.start()),
TooFewSpacesBeforeInlineComment.into(),
));
);
}
}
@ -179,14 +179,14 @@ pub(crate) fn whitespace_before_comment(
if is_inline_comment {
if bad_prefix.is_some() || comment.chars().next().map_or(false, char::is_whitespace)
{
diagnostics.push((range, NoSpaceAfterInlineComment.into()));
context.push(NoSpaceAfterInlineComment, range);
}
} else if let Some(bad_prefix) = bad_prefix {
if bad_prefix != '!' || !is_first_row {
if bad_prefix != '#' {
diagnostics.push((range, NoSpaceAfterBlockComment.into()));
context.push(NoSpaceAfterBlockComment, range);
} else if !comment.is_empty() {
diagnostics.push((range, MultipleLeadingHashesForBlockComment.into()));
context.push(MultipleLeadingHashesForBlockComment, range);
}
}
}
@ -194,5 +194,4 @@ pub(crate) fn whitespace_before_comment(
prev_end = token.end();
}
}
diagnostics
}

View File

@ -1,10 +1,10 @@
use crate::checkers::logical_lines::LogicalLinesContext;
use crate::rules::pycodestyle::rules::logical_lines::LogicalLine;
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::token_kind::TokenKind;
use ruff_text_size::{TextRange, TextSize};
use super::LogicalLineTokens;
#[violation]
pub struct WhitespaceBeforeParameters {
pub bracket: TokenKind,
@ -35,17 +35,17 @@ impl AlwaysAutofixableViolation for WhitespaceBeforeParameters {
/// E211
pub(crate) fn whitespace_before_parameters(
tokens: &LogicalLineTokens,
line: &LogicalLine,
autofix: bool,
) -> Vec<Diagnostic> {
let mut diagnostics = vec![];
let previous = tokens.first().unwrap();
context: &mut LogicalLinesContext,
) {
let previous = line.tokens().first().unwrap();
let mut pre_pre_kind: Option<TokenKind> = None;
let mut prev_token = previous.kind();
let mut prev_end = previous.end();
for token in tokens {
for token in line.tokens() {
let kind = token.kind();
if matches!(kind, TokenKind::Lpar | TokenKind::Lsqb)
@ -65,11 +65,10 @@ pub(crate) fn whitespace_before_parameters(
if autofix {
diagnostic.set_fix(Edit::deletion(start, end));
}
diagnostics.push(diagnostic);
context.push_diagnostic(diagnostic);
}
pre_pre_kind = Some(prev_token);
prev_token = kind;
prev_end = token.end();
}
diagnostics
}

View File

@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize};
use crate::Fix;
#[derive(Debug, PartialEq, Eq)]
#[derive(Debug, PartialEq, Eq, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct DiagnosticKind {
/// The identifier of the diagnostic, used to align the diagnostic with a rule.
@ -20,7 +20,7 @@ pub struct DiagnosticKind {
pub fixable: bool,
}
#[derive(Debug, PartialEq, Eq)]
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Diagnostic {
pub kind: DiagnosticKind,
pub range: TextRange,

View File

@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize};
use crate::edit::Edit;
/// A collection of [`Edit`] elements to be applied to a source file.
#[derive(Default, Debug, PartialEq, Eq)]
#[derive(Default, Debug, PartialEq, Eq, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct Fix {
edits: Vec<Edit>,

View File

@ -24,14 +24,14 @@ struct LineIndexInner {
impl LineIndex {
/// Builds the [`LineIndex`] from the source text of a file.
pub fn from_source_text(text: &str) -> Self {
assert!(u32::try_from(text.len()).is_ok());
let mut line_starts: Vec<TextSize> = Vec::with_capacity(text.len() / 88);
line_starts.push(TextSize::default());
let bytes = text.as_bytes();
let mut utf8 = false;
assert!(u32::try_from(bytes.len()).is_ok());
for (i, byte) in bytes.iter().enumerate() {
utf8 |= !byte.is_ascii();
@ -39,7 +39,9 @@ impl LineIndex {
// Only track one line break for `\r\n`.
b'\r' if bytes.get(i + 1) == Some(&b'\n') => continue,
b'\n' | b'\r' => {
line_starts.push(TextSize::try_from(i + 1).unwrap());
// SAFETY: Assertion above guarantees `i <= u32::MAX`
#[allow(clippy::cast_possible_truncation)]
line_starts.push(TextSize::from(i as u32) + TextSize::from(1));
}
_ => {}
}

View File

@ -167,6 +167,7 @@ pub enum TokenKind {
}
impl TokenKind {
#[inline]
pub const fn is_whitespace_needed(&self) -> bool {
matches!(
self,
@ -197,6 +198,7 @@ impl TokenKind {
)
}
#[inline]
pub const fn is_whitespace_optional(&self) -> bool {
self.is_arithmetic()
|| matches!(
@ -210,6 +212,7 @@ impl TokenKind {
)
}
#[inline]
pub const fn is_unary(&self) -> bool {
matches!(
self,
@ -221,6 +224,7 @@ impl TokenKind {
)
}
#[inline]
pub const fn is_keyword(&self) -> bool {
matches!(
self,
@ -261,6 +265,7 @@ impl TokenKind {
)
}
#[inline]
pub const fn is_operator(&self) -> bool {
matches!(
self,
@ -313,10 +318,12 @@ impl TokenKind {
)
}
#[inline]
pub const fn is_singleton(&self) -> bool {
matches!(self, TokenKind::False | TokenKind::True | TokenKind::None)
}
#[inline]
pub const fn is_skip_comment(&self) -> bool {
matches!(
self,
@ -328,6 +335,7 @@ impl TokenKind {
)
}
#[inline]
pub const fn is_arithmetic(&self) -> bool {
matches!(
self,
@ -340,6 +348,7 @@ impl TokenKind {
)
}
#[inline]
pub const fn is_soft_keyword(&self) -> bool {
matches!(self, TokenKind::Match | TokenKind::Case)
}