mirror of https://github.com/astral-sh/ruff
Remove lexer-based comment range detection (#5785)
## Summary I'm doing some unrelated profiling, and I noticed that this method is actually measurable on the CPython benchmark -- it's > 1% of execution time. We don't need to lex here, we already know the ranges of all comments, so we can just do a simple binary search for overlap, which brings the method down to 0%. ## Test Plan `cargo test`
This commit is contained in:
parent
f2e995f78d
commit
4782675bf9
|
|
@ -11,7 +11,7 @@ use rustpython_parser::ast::{self, BoolOp, ExceptHandler, Expr, Keyword, Ranged,
|
||||||
|
|
||||||
use ruff_diagnostics::{AutofixKind, Diagnostic, Edit, Fix, Violation};
|
use ruff_diagnostics::{AutofixKind, Diagnostic, Edit, Fix, Violation};
|
||||||
use ruff_macros::{derive_message_formats, violation};
|
use ruff_macros::{derive_message_formats, violation};
|
||||||
use ruff_python_ast::helpers::{has_comments_in, Truthiness};
|
use ruff_python_ast::helpers::Truthiness;
|
||||||
use ruff_python_ast::source_code::{Locator, Stylist};
|
use ruff_python_ast::source_code::{Locator, Stylist};
|
||||||
use ruff_python_ast::visitor::Visitor;
|
use ruff_python_ast::visitor::Visitor;
|
||||||
use ruff_python_ast::{visitor, whitespace};
|
use ruff_python_ast::{visitor, whitespace};
|
||||||
|
|
@ -197,7 +197,7 @@ pub(crate) fn unittest_assertion(
|
||||||
if checker.semantic().stmt().is_expr_stmt()
|
if checker.semantic().stmt().is_expr_stmt()
|
||||||
&& checker.semantic().expr_parent().is_none()
|
&& checker.semantic().expr_parent().is_none()
|
||||||
&& !checker.semantic().scope().kind.is_lambda()
|
&& !checker.semantic().scope().kind.is_lambda()
|
||||||
&& !has_comments_in(expr.range(), checker.locator)
|
&& !checker.indexer.comment_ranges().intersects(expr.range())
|
||||||
{
|
{
|
||||||
if let Ok(stmt) = unittest_assert.generate_assert(args, keywords) {
|
if let Ok(stmt) = unittest_assert.generate_assert(args, keywords) {
|
||||||
diagnostic.set_fix(Fix::suggested(Edit::range_replacement(
|
diagnostic.set_fix(Fix::suggested(Edit::range_replacement(
|
||||||
|
|
@ -483,7 +483,7 @@ pub(crate) fn composite_condition(
|
||||||
if checker.patch(diagnostic.kind.rule()) {
|
if checker.patch(diagnostic.kind.rule()) {
|
||||||
if matches!(composite, CompositionKind::Simple)
|
if matches!(composite, CompositionKind::Simple)
|
||||||
&& msg.is_none()
|
&& msg.is_none()
|
||||||
&& !has_comments_in(stmt.range(), checker.locator)
|
&& !checker.indexer.comment_ranges().intersects(stmt.range())
|
||||||
{
|
{
|
||||||
#[allow(deprecated)]
|
#[allow(deprecated)]
|
||||||
diagnostic.try_set_fix_from_edit(|| {
|
diagnostic.try_set_fix_from_edit(|| {
|
||||||
|
|
|
||||||
|
|
@ -505,7 +505,7 @@ pub(crate) fn compare_with_tuple(checker: &mut Checker, expr: &Expr) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Avoid removing comments.
|
// Avoid removing comments.
|
||||||
if has_comments(expr, checker.locator) {
|
if has_comments(expr, checker.locator, checker.indexer) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,9 +6,7 @@ use rustpython_parser::ast::{self, CmpOp, Constant, Expr, ExprContext, Identifie
|
||||||
use ruff_diagnostics::{AutofixKind, Diagnostic, Edit, Fix, Violation};
|
use ruff_diagnostics::{AutofixKind, Diagnostic, Edit, Fix, Violation};
|
||||||
use ruff_macros::{derive_message_formats, violation};
|
use ruff_macros::{derive_message_formats, violation};
|
||||||
use ruff_python_ast::comparable::{ComparableConstant, ComparableExpr, ComparableStmt};
|
use ruff_python_ast::comparable::{ComparableConstant, ComparableExpr, ComparableStmt};
|
||||||
use ruff_python_ast::helpers::{
|
use ruff_python_ast::helpers::{any_over_expr, contains_effect, first_colon_range, has_comments};
|
||||||
any_over_expr, contains_effect, first_colon_range, has_comments, has_comments_in,
|
|
||||||
};
|
|
||||||
use ruff_python_semantic::SemanticModel;
|
use ruff_python_semantic::SemanticModel;
|
||||||
use ruff_python_whitespace::UniversalNewlines;
|
use ruff_python_whitespace::UniversalNewlines;
|
||||||
|
|
||||||
|
|
@ -378,10 +376,11 @@ pub(crate) fn nested_if_statements(
|
||||||
// The fixer preserves comments in the nested body, but removes comments between
|
// The fixer preserves comments in the nested body, but removes comments between
|
||||||
// the outer and inner if statements.
|
// the outer and inner if statements.
|
||||||
let nested_if = &body[0];
|
let nested_if = &body[0];
|
||||||
if !has_comments_in(
|
if !checker
|
||||||
TextRange::new(stmt.start(), nested_if.start()),
|
.indexer
|
||||||
checker.locator,
|
.comment_ranges()
|
||||||
) {
|
.intersects(TextRange::new(stmt.start(), nested_if.start()))
|
||||||
|
{
|
||||||
match fix_if::fix_nested_if_statements(checker.locator, checker.stylist, stmt) {
|
match fix_if::fix_nested_if_statements(checker.locator, checker.stylist, stmt) {
|
||||||
Ok(edit) => {
|
Ok(edit) => {
|
||||||
if edit
|
if edit
|
||||||
|
|
@ -464,7 +463,7 @@ pub(crate) fn needless_bool(checker: &mut Checker, stmt: &Stmt) {
|
||||||
if checker.patch(diagnostic.kind.rule()) {
|
if checker.patch(diagnostic.kind.rule()) {
|
||||||
if matches!(if_return, Bool::True)
|
if matches!(if_return, Bool::True)
|
||||||
&& matches!(else_return, Bool::False)
|
&& matches!(else_return, Bool::False)
|
||||||
&& !has_comments(stmt, checker.locator)
|
&& !has_comments(stmt, checker.locator, checker.indexer)
|
||||||
&& (test.is_compare_expr() || checker.semantic().is_builtin("bool"))
|
&& (test.is_compare_expr() || checker.semantic().is_builtin("bool"))
|
||||||
{
|
{
|
||||||
if test.is_compare_expr() {
|
if test.is_compare_expr() {
|
||||||
|
|
@ -650,7 +649,7 @@ pub(crate) fn use_ternary_operator(checker: &mut Checker, stmt: &Stmt, parent: O
|
||||||
stmt.range(),
|
stmt.range(),
|
||||||
);
|
);
|
||||||
if checker.patch(diagnostic.kind.rule()) {
|
if checker.patch(diagnostic.kind.rule()) {
|
||||||
if !has_comments(stmt, checker.locator) {
|
if !has_comments(stmt, checker.locator, checker.indexer) {
|
||||||
diagnostic.set_fix(Fix::suggested(Edit::range_replacement(
|
diagnostic.set_fix(Fix::suggested(Edit::range_replacement(
|
||||||
contents,
|
contents,
|
||||||
stmt.range(),
|
stmt.range(),
|
||||||
|
|
@ -1050,7 +1049,7 @@ pub(crate) fn use_dict_get_with_default(
|
||||||
stmt.range(),
|
stmt.range(),
|
||||||
);
|
);
|
||||||
if checker.patch(diagnostic.kind.rule()) {
|
if checker.patch(diagnostic.kind.rule()) {
|
||||||
if !has_comments(stmt, checker.locator) {
|
if !has_comments(stmt, checker.locator, checker.indexer) {
|
||||||
diagnostic.set_fix(Fix::suggested(Edit::range_replacement(
|
diagnostic.set_fix(Fix::suggested(Edit::range_replacement(
|
||||||
contents,
|
contents,
|
||||||
stmt.range(),
|
stmt.range(),
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ use rustpython_parser::ast::{self, Ranged, Stmt, WithItem};
|
||||||
use ruff_diagnostics::{AutofixKind, Violation};
|
use ruff_diagnostics::{AutofixKind, Violation};
|
||||||
use ruff_diagnostics::{Diagnostic, Fix};
|
use ruff_diagnostics::{Diagnostic, Fix};
|
||||||
use ruff_macros::{derive_message_formats, violation};
|
use ruff_macros::{derive_message_formats, violation};
|
||||||
use ruff_python_ast::helpers::{first_colon_range, has_comments_in};
|
use ruff_python_ast::helpers::first_colon_range;
|
||||||
use ruff_python_whitespace::UniversalNewlines;
|
use ruff_python_whitespace::UniversalNewlines;
|
||||||
|
|
||||||
use crate::checkers::ast::Checker;
|
use crate::checkers::ast::Checker;
|
||||||
|
|
@ -129,10 +129,11 @@ pub(crate) fn multiple_with_statements(
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
if checker.patch(diagnostic.kind.rule()) {
|
if checker.patch(diagnostic.kind.rule()) {
|
||||||
if !has_comments_in(
|
if !checker
|
||||||
TextRange::new(with_stmt.start(), with_body[0].start()),
|
.indexer
|
||||||
checker.locator,
|
.comment_ranges()
|
||||||
) {
|
.intersects(TextRange::new(with_stmt.start(), with_body[0].start()))
|
||||||
|
{
|
||||||
match fix_with::fix_multiple_with_statements(
|
match fix_with::fix_multiple_with_statements(
|
||||||
checker.locator,
|
checker.locator,
|
||||||
checker.stylist,
|
checker.stylist,
|
||||||
|
|
|
||||||
|
|
@ -117,7 +117,7 @@ pub(crate) fn suppressible_exception(
|
||||||
stmt.range(),
|
stmt.range(),
|
||||||
);
|
);
|
||||||
if checker.patch(diagnostic.kind.rule()) {
|
if checker.patch(diagnostic.kind.rule()) {
|
||||||
if !has_comments(stmt, checker.locator) {
|
if !has_comments(stmt, checker.locator, checker.indexer) {
|
||||||
diagnostic.try_set_fix(|| {
|
diagnostic.try_set_fix(|| {
|
||||||
let (import_edit, binding) = checker.importer.get_or_import_symbol(
|
let (import_edit, binding) = checker.importer.get_or_import_symbol(
|
||||||
&ImportRequest::import("contextlib", "suppress"),
|
&ImportRequest::import("contextlib", "suppress"),
|
||||||
|
|
|
||||||
|
|
@ -150,7 +150,7 @@ pub(crate) fn nested_min_max(
|
||||||
}) {
|
}) {
|
||||||
let mut diagnostic = Diagnostic::new(NestedMinMax { func: min_max }, expr.range());
|
let mut diagnostic = Diagnostic::new(NestedMinMax { func: min_max }, expr.range());
|
||||||
if checker.patch(diagnostic.kind.rule()) {
|
if checker.patch(diagnostic.kind.rule()) {
|
||||||
if !has_comments(expr, checker.locator) {
|
if !has_comments(expr, checker.locator, checker.indexer) {
|
||||||
let flattened_expr = Expr::Call(ast::ExprCall {
|
let flattened_expr = Expr::Call(ast::ExprCall {
|
||||||
func: Box::new(func.clone()),
|
func: Box::new(func.clone()),
|
||||||
args: collect_nested_args(min_max, args, checker.semantic()),
|
args: collect_nested_args(min_max, args, checker.semantic()),
|
||||||
|
|
|
||||||
|
|
@ -192,7 +192,7 @@ pub(crate) fn collection_literal_concatenation(checker: &mut Checker, expr: &Exp
|
||||||
expr.range(),
|
expr.range(),
|
||||||
);
|
);
|
||||||
if checker.patch(diagnostic.kind.rule()) {
|
if checker.patch(diagnostic.kind.rule()) {
|
||||||
if !has_comments(expr, checker.locator) {
|
if !has_comments(expr, checker.locator, checker.indexer) {
|
||||||
// This suggestion could be unsafe if the non-literal expression in the
|
// This suggestion could be unsafe if the non-literal expression in the
|
||||||
// expression has overridden the `__add__` (or `__radd__`) magic methods.
|
// expression has overridden the `__add__` (or `__radd__`) magic methods.
|
||||||
diagnostic.set_fix(Fix::suggested(Edit::range_replacement(
|
diagnostic.set_fix(Fix::suggested(Edit::range_replacement(
|
||||||
|
|
|
||||||
|
|
@ -717,7 +717,7 @@ pub fn map_subscript(expr: &Expr) -> &Expr {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns `true` if a statement or expression includes at least one comment.
|
/// Returns `true` if a statement or expression includes at least one comment.
|
||||||
pub fn has_comments<T>(node: &T, locator: &Locator) -> bool
|
pub fn has_comments<T>(node: &T, locator: &Locator, indexer: &Indexer) -> bool
|
||||||
where
|
where
|
||||||
T: Ranged,
|
T: Ranged,
|
||||||
{
|
{
|
||||||
|
|
@ -732,26 +732,9 @@ where
|
||||||
locator.line_end(node.end())
|
locator.line_end(node.end())
|
||||||
};
|
};
|
||||||
|
|
||||||
has_comments_in(TextRange::new(start, end), locator)
|
indexer
|
||||||
}
|
.comment_ranges()
|
||||||
|
.intersects(TextRange::new(start, end))
|
||||||
/// Returns `true` if a [`TextRange`] includes at least one comment.
|
|
||||||
pub fn has_comments_in(range: TextRange, locator: &Locator) -> bool {
|
|
||||||
let source = &locator.contents()[range];
|
|
||||||
|
|
||||||
for tok in lexer::lex_starts_at(source, Mode::Module, range.start()) {
|
|
||||||
match tok {
|
|
||||||
Ok((tok, _)) => {
|
|
||||||
if matches!(tok, Tok::Comment(..)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Err(_) => {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
false
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return `true` if the body uses `locals()`, `globals()`, `vars()`, `eval()`.
|
/// Return `true` if the body uses `locals()`, `globals()`, `vars()`, `eval()`.
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,23 @@ pub struct CommentRanges {
|
||||||
raw: Vec<TextRange>,
|
raw: Vec<TextRange>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl CommentRanges {
|
||||||
|
/// Returns `true` if the given range includes a comment.
|
||||||
|
pub fn intersects(&self, target: TextRange) -> bool {
|
||||||
|
self.raw
|
||||||
|
.binary_search_by(|range| {
|
||||||
|
if target.contains_range(*range) {
|
||||||
|
std::cmp::Ordering::Equal
|
||||||
|
} else if range.end() < target.start() {
|
||||||
|
std::cmp::Ordering::Less
|
||||||
|
} else {
|
||||||
|
std::cmp::Ordering::Greater
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.is_ok()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Deref for CommentRanges {
|
impl Deref for CommentRanges {
|
||||||
type Target = [TextRange];
|
type Target = [TextRange];
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -93,7 +93,7 @@ impl Indexer {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the byte offset ranges of comments
|
/// Returns the byte offset ranges of comments
|
||||||
pub fn comment_ranges(&self) -> &CommentRanges {
|
pub const fn comment_ranges(&self) -> &CommentRanges {
|
||||||
&self.comment_ranges
|
&self.comment_ranges
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue