Remove lexer-based comment range detection (#5785)

## Summary

I'm doing some unrelated profiling, and I noticed that this method is
actually measurable on the CPython benchmark -- it's > 1% of execution
time. We don't need to lex here, we already know the ranges of all
comments, so we can just do a simple binary search for overlap, which
brings the method down to 0%.

## Test Plan

`cargo test`
This commit is contained in:
Charlie Marsh 2023-07-15 21:03:27 -04:00 committed by GitHub
parent f2e995f78d
commit 4782675bf9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 44 additions and 44 deletions

View File

@ -11,7 +11,7 @@ use rustpython_parser::ast::{self, BoolOp, ExceptHandler, Expr, Keyword, Ranged,
use ruff_diagnostics::{AutofixKind, Diagnostic, Edit, Fix, Violation}; use ruff_diagnostics::{AutofixKind, Diagnostic, Edit, Fix, Violation};
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::helpers::{has_comments_in, Truthiness}; use ruff_python_ast::helpers::Truthiness;
use ruff_python_ast::source_code::{Locator, Stylist}; use ruff_python_ast::source_code::{Locator, Stylist};
use ruff_python_ast::visitor::Visitor; use ruff_python_ast::visitor::Visitor;
use ruff_python_ast::{visitor, whitespace}; use ruff_python_ast::{visitor, whitespace};
@ -197,7 +197,7 @@ pub(crate) fn unittest_assertion(
if checker.semantic().stmt().is_expr_stmt() if checker.semantic().stmt().is_expr_stmt()
&& checker.semantic().expr_parent().is_none() && checker.semantic().expr_parent().is_none()
&& !checker.semantic().scope().kind.is_lambda() && !checker.semantic().scope().kind.is_lambda()
&& !has_comments_in(expr.range(), checker.locator) && !checker.indexer.comment_ranges().intersects(expr.range())
{ {
if let Ok(stmt) = unittest_assert.generate_assert(args, keywords) { if let Ok(stmt) = unittest_assert.generate_assert(args, keywords) {
diagnostic.set_fix(Fix::suggested(Edit::range_replacement( diagnostic.set_fix(Fix::suggested(Edit::range_replacement(
@ -483,7 +483,7 @@ pub(crate) fn composite_condition(
if checker.patch(diagnostic.kind.rule()) { if checker.patch(diagnostic.kind.rule()) {
if matches!(composite, CompositionKind::Simple) if matches!(composite, CompositionKind::Simple)
&& msg.is_none() && msg.is_none()
&& !has_comments_in(stmt.range(), checker.locator) && !checker.indexer.comment_ranges().intersects(stmt.range())
{ {
#[allow(deprecated)] #[allow(deprecated)]
diagnostic.try_set_fix_from_edit(|| { diagnostic.try_set_fix_from_edit(|| {

View File

@ -505,7 +505,7 @@ pub(crate) fn compare_with_tuple(checker: &mut Checker, expr: &Expr) {
} }
// Avoid removing comments. // Avoid removing comments.
if has_comments(expr, checker.locator) { if has_comments(expr, checker.locator, checker.indexer) {
continue; continue;
} }

View File

@ -6,9 +6,7 @@ use rustpython_parser::ast::{self, CmpOp, Constant, Expr, ExprContext, Identifie
use ruff_diagnostics::{AutofixKind, Diagnostic, Edit, Fix, Violation}; use ruff_diagnostics::{AutofixKind, Diagnostic, Edit, Fix, Violation};
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::comparable::{ComparableConstant, ComparableExpr, ComparableStmt}; use ruff_python_ast::comparable::{ComparableConstant, ComparableExpr, ComparableStmt};
use ruff_python_ast::helpers::{ use ruff_python_ast::helpers::{any_over_expr, contains_effect, first_colon_range, has_comments};
any_over_expr, contains_effect, first_colon_range, has_comments, has_comments_in,
};
use ruff_python_semantic::SemanticModel; use ruff_python_semantic::SemanticModel;
use ruff_python_whitespace::UniversalNewlines; use ruff_python_whitespace::UniversalNewlines;
@ -378,10 +376,11 @@ pub(crate) fn nested_if_statements(
// The fixer preserves comments in the nested body, but removes comments between // The fixer preserves comments in the nested body, but removes comments between
// the outer and inner if statements. // the outer and inner if statements.
let nested_if = &body[0]; let nested_if = &body[0];
if !has_comments_in( if !checker
TextRange::new(stmt.start(), nested_if.start()), .indexer
checker.locator, .comment_ranges()
) { .intersects(TextRange::new(stmt.start(), nested_if.start()))
{
match fix_if::fix_nested_if_statements(checker.locator, checker.stylist, stmt) { match fix_if::fix_nested_if_statements(checker.locator, checker.stylist, stmt) {
Ok(edit) => { Ok(edit) => {
if edit if edit
@ -464,7 +463,7 @@ pub(crate) fn needless_bool(checker: &mut Checker, stmt: &Stmt) {
if checker.patch(diagnostic.kind.rule()) { if checker.patch(diagnostic.kind.rule()) {
if matches!(if_return, Bool::True) if matches!(if_return, Bool::True)
&& matches!(else_return, Bool::False) && matches!(else_return, Bool::False)
&& !has_comments(stmt, checker.locator) && !has_comments(stmt, checker.locator, checker.indexer)
&& (test.is_compare_expr() || checker.semantic().is_builtin("bool")) && (test.is_compare_expr() || checker.semantic().is_builtin("bool"))
{ {
if test.is_compare_expr() { if test.is_compare_expr() {
@ -650,7 +649,7 @@ pub(crate) fn use_ternary_operator(checker: &mut Checker, stmt: &Stmt, parent: O
stmt.range(), stmt.range(),
); );
if checker.patch(diagnostic.kind.rule()) { if checker.patch(diagnostic.kind.rule()) {
if !has_comments(stmt, checker.locator) { if !has_comments(stmt, checker.locator, checker.indexer) {
diagnostic.set_fix(Fix::suggested(Edit::range_replacement( diagnostic.set_fix(Fix::suggested(Edit::range_replacement(
contents, contents,
stmt.range(), stmt.range(),
@ -1050,7 +1049,7 @@ pub(crate) fn use_dict_get_with_default(
stmt.range(), stmt.range(),
); );
if checker.patch(diagnostic.kind.rule()) { if checker.patch(diagnostic.kind.rule()) {
if !has_comments(stmt, checker.locator) { if !has_comments(stmt, checker.locator, checker.indexer) {
diagnostic.set_fix(Fix::suggested(Edit::range_replacement( diagnostic.set_fix(Fix::suggested(Edit::range_replacement(
contents, contents,
stmt.range(), stmt.range(),

View File

@ -5,7 +5,7 @@ use rustpython_parser::ast::{self, Ranged, Stmt, WithItem};
use ruff_diagnostics::{AutofixKind, Violation}; use ruff_diagnostics::{AutofixKind, Violation};
use ruff_diagnostics::{Diagnostic, Fix}; use ruff_diagnostics::{Diagnostic, Fix};
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::helpers::{first_colon_range, has_comments_in}; use ruff_python_ast::helpers::first_colon_range;
use ruff_python_whitespace::UniversalNewlines; use ruff_python_whitespace::UniversalNewlines;
use crate::checkers::ast::Checker; use crate::checkers::ast::Checker;
@ -129,10 +129,11 @@ pub(crate) fn multiple_with_statements(
), ),
); );
if checker.patch(diagnostic.kind.rule()) { if checker.patch(diagnostic.kind.rule()) {
if !has_comments_in( if !checker
TextRange::new(with_stmt.start(), with_body[0].start()), .indexer
checker.locator, .comment_ranges()
) { .intersects(TextRange::new(with_stmt.start(), with_body[0].start()))
{
match fix_with::fix_multiple_with_statements( match fix_with::fix_multiple_with_statements(
checker.locator, checker.locator,
checker.stylist, checker.stylist,

View File

@ -117,7 +117,7 @@ pub(crate) fn suppressible_exception(
stmt.range(), stmt.range(),
); );
if checker.patch(diagnostic.kind.rule()) { if checker.patch(diagnostic.kind.rule()) {
if !has_comments(stmt, checker.locator) { if !has_comments(stmt, checker.locator, checker.indexer) {
diagnostic.try_set_fix(|| { diagnostic.try_set_fix(|| {
let (import_edit, binding) = checker.importer.get_or_import_symbol( let (import_edit, binding) = checker.importer.get_or_import_symbol(
&ImportRequest::import("contextlib", "suppress"), &ImportRequest::import("contextlib", "suppress"),

View File

@ -150,7 +150,7 @@ pub(crate) fn nested_min_max(
}) { }) {
let mut diagnostic = Diagnostic::new(NestedMinMax { func: min_max }, expr.range()); let mut diagnostic = Diagnostic::new(NestedMinMax { func: min_max }, expr.range());
if checker.patch(diagnostic.kind.rule()) { if checker.patch(diagnostic.kind.rule()) {
if !has_comments(expr, checker.locator) { if !has_comments(expr, checker.locator, checker.indexer) {
let flattened_expr = Expr::Call(ast::ExprCall { let flattened_expr = Expr::Call(ast::ExprCall {
func: Box::new(func.clone()), func: Box::new(func.clone()),
args: collect_nested_args(min_max, args, checker.semantic()), args: collect_nested_args(min_max, args, checker.semantic()),

View File

@ -192,7 +192,7 @@ pub(crate) fn collection_literal_concatenation(checker: &mut Checker, expr: &Exp
expr.range(), expr.range(),
); );
if checker.patch(diagnostic.kind.rule()) { if checker.patch(diagnostic.kind.rule()) {
if !has_comments(expr, checker.locator) { if !has_comments(expr, checker.locator, checker.indexer) {
// This suggestion could be unsafe if the non-literal expression in the // This suggestion could be unsafe if the non-literal expression in the
// expression has overridden the `__add__` (or `__radd__`) magic methods. // expression has overridden the `__add__` (or `__radd__`) magic methods.
diagnostic.set_fix(Fix::suggested(Edit::range_replacement( diagnostic.set_fix(Fix::suggested(Edit::range_replacement(

View File

@ -717,7 +717,7 @@ pub fn map_subscript(expr: &Expr) -> &Expr {
} }
/// Returns `true` if a statement or expression includes at least one comment. /// Returns `true` if a statement or expression includes at least one comment.
pub fn has_comments<T>(node: &T, locator: &Locator) -> bool pub fn has_comments<T>(node: &T, locator: &Locator, indexer: &Indexer) -> bool
where where
T: Ranged, T: Ranged,
{ {
@ -732,26 +732,9 @@ where
locator.line_end(node.end()) locator.line_end(node.end())
}; };
has_comments_in(TextRange::new(start, end), locator) indexer
} .comment_ranges()
.intersects(TextRange::new(start, end))
/// Returns `true` if a [`TextRange`] includes at least one comment.
pub fn has_comments_in(range: TextRange, locator: &Locator) -> bool {
let source = &locator.contents()[range];
for tok in lexer::lex_starts_at(source, Mode::Module, range.start()) {
match tok {
Ok((tok, _)) => {
if matches!(tok, Tok::Comment(..)) {
return true;
}
}
Err(_) => {
return false;
}
}
}
false
} }
/// Return `true` if the body uses `locals()`, `globals()`, `vars()`, `eval()`. /// Return `true` if the body uses `locals()`, `globals()`, `vars()`, `eval()`.

View File

@ -10,6 +10,23 @@ pub struct CommentRanges {
raw: Vec<TextRange>, raw: Vec<TextRange>,
} }
impl CommentRanges {
/// Returns `true` if the given range includes a comment.
pub fn intersects(&self, target: TextRange) -> bool {
self.raw
.binary_search_by(|range| {
if target.contains_range(*range) {
std::cmp::Ordering::Equal
} else if range.end() < target.start() {
std::cmp::Ordering::Less
} else {
std::cmp::Ordering::Greater
}
})
.is_ok()
}
}
impl Deref for CommentRanges { impl Deref for CommentRanges {
type Target = [TextRange]; type Target = [TextRange];

View File

@ -93,7 +93,7 @@ impl Indexer {
} }
/// Returns the byte offset ranges of comments /// Returns the byte offset ranges of comments
pub fn comment_ranges(&self) -> &CommentRanges { pub const fn comment_ranges(&self) -> &CommentRanges {
&self.comment_ranges &self.comment_ranges
} }