From 4782675bf956e4d5a7bb894e032e8fbcb2cbae20 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Sat, 15 Jul 2023 21:03:27 -0400 Subject: [PATCH] Remove lexer-based comment range detection (#5785) ## Summary I'm doing some unrelated profiling, and I noticed that this method is actually measurable on the CPython benchmark -- it's > 1% of execution time. We don't need to lex here, we already know the ranges of all comments, so we can just do a simple binary search for overlap, which brings the method down to 0%. ## Test Plan `cargo test` --- .../flake8_pytest_style/rules/assertion.rs | 6 ++--- .../flake8_simplify/rules/ast_bool_op.rs | 2 +- .../src/rules/flake8_simplify/rules/ast_if.rs | 19 +++++++------- .../rules/flake8_simplify/rules/ast_with.rs | 11 ++++---- .../rules/suppressible_exception.rs | 2 +- .../src/rules/pylint/rules/nested_min_max.rs | 2 +- .../rules/collection_literal_concatenation.rs | 2 +- crates/ruff_python_ast/src/helpers.rs | 25 +++---------------- .../src/source_code/comment_ranges.rs | 17 +++++++++++++ .../src/source_code/indexer.rs | 2 +- 10 files changed, 44 insertions(+), 44 deletions(-) diff --git a/crates/ruff/src/rules/flake8_pytest_style/rules/assertion.rs b/crates/ruff/src/rules/flake8_pytest_style/rules/assertion.rs index 363b327c8c..d5e4111ee5 100644 --- a/crates/ruff/src/rules/flake8_pytest_style/rules/assertion.rs +++ b/crates/ruff/src/rules/flake8_pytest_style/rules/assertion.rs @@ -11,7 +11,7 @@ use rustpython_parser::ast::{self, BoolOp, ExceptHandler, Expr, Keyword, Ranged, use ruff_diagnostics::{AutofixKind, Diagnostic, Edit, Fix, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_ast::helpers::{has_comments_in, Truthiness}; +use ruff_python_ast::helpers::Truthiness; use ruff_python_ast::source_code::{Locator, Stylist}; use ruff_python_ast::visitor::Visitor; use ruff_python_ast::{visitor, whitespace}; @@ -197,7 +197,7 @@ pub(crate) fn unittest_assertion( if checker.semantic().stmt().is_expr_stmt() && checker.semantic().expr_parent().is_none() && !checker.semantic().scope().kind.is_lambda() - && !has_comments_in(expr.range(), checker.locator) + && !checker.indexer.comment_ranges().intersects(expr.range()) { if let Ok(stmt) = unittest_assert.generate_assert(args, keywords) { diagnostic.set_fix(Fix::suggested(Edit::range_replacement( @@ -483,7 +483,7 @@ pub(crate) fn composite_condition( if checker.patch(diagnostic.kind.rule()) { if matches!(composite, CompositionKind::Simple) && msg.is_none() - && !has_comments_in(stmt.range(), checker.locator) + && !checker.indexer.comment_ranges().intersects(stmt.range()) { #[allow(deprecated)] diagnostic.try_set_fix_from_edit(|| { diff --git a/crates/ruff/src/rules/flake8_simplify/rules/ast_bool_op.rs b/crates/ruff/src/rules/flake8_simplify/rules/ast_bool_op.rs index df882a4b0e..410817550a 100644 --- a/crates/ruff/src/rules/flake8_simplify/rules/ast_bool_op.rs +++ b/crates/ruff/src/rules/flake8_simplify/rules/ast_bool_op.rs @@ -505,7 +505,7 @@ pub(crate) fn compare_with_tuple(checker: &mut Checker, expr: &Expr) { } // Avoid removing comments. - if has_comments(expr, checker.locator) { + if has_comments(expr, checker.locator, checker.indexer) { continue; } diff --git a/crates/ruff/src/rules/flake8_simplify/rules/ast_if.rs b/crates/ruff/src/rules/flake8_simplify/rules/ast_if.rs index 7633be8993..7c181d1405 100644 --- a/crates/ruff/src/rules/flake8_simplify/rules/ast_if.rs +++ b/crates/ruff/src/rules/flake8_simplify/rules/ast_if.rs @@ -6,9 +6,7 @@ use rustpython_parser::ast::{self, CmpOp, Constant, Expr, ExprContext, Identifie use ruff_diagnostics::{AutofixKind, Diagnostic, Edit, Fix, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::comparable::{ComparableConstant, ComparableExpr, ComparableStmt}; -use ruff_python_ast::helpers::{ - any_over_expr, contains_effect, first_colon_range, has_comments, has_comments_in, -}; +use ruff_python_ast::helpers::{any_over_expr, contains_effect, first_colon_range, has_comments}; use ruff_python_semantic::SemanticModel; use ruff_python_whitespace::UniversalNewlines; @@ -378,10 +376,11 @@ pub(crate) fn nested_if_statements( // The fixer preserves comments in the nested body, but removes comments between // the outer and inner if statements. let nested_if = &body[0]; - if !has_comments_in( - TextRange::new(stmt.start(), nested_if.start()), - checker.locator, - ) { + if !checker + .indexer + .comment_ranges() + .intersects(TextRange::new(stmt.start(), nested_if.start())) + { match fix_if::fix_nested_if_statements(checker.locator, checker.stylist, stmt) { Ok(edit) => { if edit @@ -464,7 +463,7 @@ pub(crate) fn needless_bool(checker: &mut Checker, stmt: &Stmt) { if checker.patch(diagnostic.kind.rule()) { if matches!(if_return, Bool::True) && matches!(else_return, Bool::False) - && !has_comments(stmt, checker.locator) + && !has_comments(stmt, checker.locator, checker.indexer) && (test.is_compare_expr() || checker.semantic().is_builtin("bool")) { if test.is_compare_expr() { @@ -650,7 +649,7 @@ pub(crate) fn use_ternary_operator(checker: &mut Checker, stmt: &Stmt, parent: O stmt.range(), ); if checker.patch(diagnostic.kind.rule()) { - if !has_comments(stmt, checker.locator) { + if !has_comments(stmt, checker.locator, checker.indexer) { diagnostic.set_fix(Fix::suggested(Edit::range_replacement( contents, stmt.range(), @@ -1050,7 +1049,7 @@ pub(crate) fn use_dict_get_with_default( stmt.range(), ); if checker.patch(diagnostic.kind.rule()) { - if !has_comments(stmt, checker.locator) { + if !has_comments(stmt, checker.locator, checker.indexer) { diagnostic.set_fix(Fix::suggested(Edit::range_replacement( contents, stmt.range(), diff --git a/crates/ruff/src/rules/flake8_simplify/rules/ast_with.rs b/crates/ruff/src/rules/flake8_simplify/rules/ast_with.rs index 8029e2a97b..f7a2cec192 100644 --- a/crates/ruff/src/rules/flake8_simplify/rules/ast_with.rs +++ b/crates/ruff/src/rules/flake8_simplify/rules/ast_with.rs @@ -5,7 +5,7 @@ use rustpython_parser::ast::{self, Ranged, Stmt, WithItem}; use ruff_diagnostics::{AutofixKind, Violation}; use ruff_diagnostics::{Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_ast::helpers::{first_colon_range, has_comments_in}; +use ruff_python_ast::helpers::first_colon_range; use ruff_python_whitespace::UniversalNewlines; use crate::checkers::ast::Checker; @@ -129,10 +129,11 @@ pub(crate) fn multiple_with_statements( ), ); if checker.patch(diagnostic.kind.rule()) { - if !has_comments_in( - TextRange::new(with_stmt.start(), with_body[0].start()), - checker.locator, - ) { + if !checker + .indexer + .comment_ranges() + .intersects(TextRange::new(with_stmt.start(), with_body[0].start())) + { match fix_with::fix_multiple_with_statements( checker.locator, checker.stylist, diff --git a/crates/ruff/src/rules/flake8_simplify/rules/suppressible_exception.rs b/crates/ruff/src/rules/flake8_simplify/rules/suppressible_exception.rs index a025d17601..7e9a119f6c 100644 --- a/crates/ruff/src/rules/flake8_simplify/rules/suppressible_exception.rs +++ b/crates/ruff/src/rules/flake8_simplify/rules/suppressible_exception.rs @@ -117,7 +117,7 @@ pub(crate) fn suppressible_exception( stmt.range(), ); if checker.patch(diagnostic.kind.rule()) { - if !has_comments(stmt, checker.locator) { + if !has_comments(stmt, checker.locator, checker.indexer) { diagnostic.try_set_fix(|| { let (import_edit, binding) = checker.importer.get_or_import_symbol( &ImportRequest::import("contextlib", "suppress"), diff --git a/crates/ruff/src/rules/pylint/rules/nested_min_max.rs b/crates/ruff/src/rules/pylint/rules/nested_min_max.rs index 05cc249fc3..aee12efe55 100644 --- a/crates/ruff/src/rules/pylint/rules/nested_min_max.rs +++ b/crates/ruff/src/rules/pylint/rules/nested_min_max.rs @@ -150,7 +150,7 @@ pub(crate) fn nested_min_max( }) { let mut diagnostic = Diagnostic::new(NestedMinMax { func: min_max }, expr.range()); if checker.patch(diagnostic.kind.rule()) { - if !has_comments(expr, checker.locator) { + if !has_comments(expr, checker.locator, checker.indexer) { let flattened_expr = Expr::Call(ast::ExprCall { func: Box::new(func.clone()), args: collect_nested_args(min_max, args, checker.semantic()), diff --git a/crates/ruff/src/rules/ruff/rules/collection_literal_concatenation.rs b/crates/ruff/src/rules/ruff/rules/collection_literal_concatenation.rs index 2a9c25725b..8d843477a0 100644 --- a/crates/ruff/src/rules/ruff/rules/collection_literal_concatenation.rs +++ b/crates/ruff/src/rules/ruff/rules/collection_literal_concatenation.rs @@ -192,7 +192,7 @@ pub(crate) fn collection_literal_concatenation(checker: &mut Checker, expr: &Exp expr.range(), ); if checker.patch(diagnostic.kind.rule()) { - if !has_comments(expr, checker.locator) { + if !has_comments(expr, checker.locator, checker.indexer) { // This suggestion could be unsafe if the non-literal expression in the // expression has overridden the `__add__` (or `__radd__`) magic methods. diagnostic.set_fix(Fix::suggested(Edit::range_replacement( diff --git a/crates/ruff_python_ast/src/helpers.rs b/crates/ruff_python_ast/src/helpers.rs index 33fae231d1..83c7058d4c 100644 --- a/crates/ruff_python_ast/src/helpers.rs +++ b/crates/ruff_python_ast/src/helpers.rs @@ -717,7 +717,7 @@ pub fn map_subscript(expr: &Expr) -> &Expr { } /// Returns `true` if a statement or expression includes at least one comment. -pub fn has_comments(node: &T, locator: &Locator) -> bool +pub fn has_comments(node: &T, locator: &Locator, indexer: &Indexer) -> bool where T: Ranged, { @@ -732,26 +732,9 @@ where locator.line_end(node.end()) }; - has_comments_in(TextRange::new(start, end), locator) -} - -/// Returns `true` if a [`TextRange`] includes at least one comment. -pub fn has_comments_in(range: TextRange, locator: &Locator) -> bool { - let source = &locator.contents()[range]; - - for tok in lexer::lex_starts_at(source, Mode::Module, range.start()) { - match tok { - Ok((tok, _)) => { - if matches!(tok, Tok::Comment(..)) { - return true; - } - } - Err(_) => { - return false; - } - } - } - false + indexer + .comment_ranges() + .intersects(TextRange::new(start, end)) } /// Return `true` if the body uses `locals()`, `globals()`, `vars()`, `eval()`. diff --git a/crates/ruff_python_ast/src/source_code/comment_ranges.rs b/crates/ruff_python_ast/src/source_code/comment_ranges.rs index 189addc418..977bc83b0d 100644 --- a/crates/ruff_python_ast/src/source_code/comment_ranges.rs +++ b/crates/ruff_python_ast/src/source_code/comment_ranges.rs @@ -10,6 +10,23 @@ pub struct CommentRanges { raw: Vec, } +impl CommentRanges { + /// Returns `true` if the given range includes a comment. + pub fn intersects(&self, target: TextRange) -> bool { + self.raw + .binary_search_by(|range| { + if target.contains_range(*range) { + std::cmp::Ordering::Equal + } else if range.end() < target.start() { + std::cmp::Ordering::Less + } else { + std::cmp::Ordering::Greater + } + }) + .is_ok() + } +} + impl Deref for CommentRanges { type Target = [TextRange]; diff --git a/crates/ruff_python_ast/src/source_code/indexer.rs b/crates/ruff_python_ast/src/source_code/indexer.rs index 23227965a5..c70e8378fc 100644 --- a/crates/ruff_python_ast/src/source_code/indexer.rs +++ b/crates/ruff_python_ast/src/source_code/indexer.rs @@ -93,7 +93,7 @@ impl Indexer { } /// Returns the byte offset ranges of comments - pub fn comment_ranges(&self) -> &CommentRanges { + pub const fn comment_ranges(&self) -> &CommentRanges { &self.comment_ranges }