Use `Tokens` from parsed type annotation or parsed source (#11740)

## Summary

This PR fixes a bug where the checker would require the tokens for an
invalid offset w.r.t. the source code.

Taking the source code from the linked issue as an example:
```py
relese_version :"0.0is 64"
```

Now, this isn't really a valid type annotation but that's what this PR
is fixing. Regardless of whether it's valid or not, Ruff shouldn't
panic.

The checker would visit the parsed type annotation (`0.0is 64`) and try
to detect any violations. Certain rule logic requests the tokens for the
same but it would fail because the lexer would only have the `String`
token considering original source code. This worked before because the
lexer was invoked again for each rule logic.

The solution is to store the parsed type annotation on the checker if
it's in a typing context and use the tokens from that instead if it's
available. This is enforced by creating a new API on the checker to get
the tokens.

But, this means that there are two ways to get the tokens via the
checker API. I want to restrict this in a follow-up PR (#11741) to only
expose `tokens` and `comment_ranges` as methods and restrict access to
the parsed source code.

fixes: #11736 

## Test Plan

- [x] Add a test case for `F632` rule and update the snapshot
- [x] Check all affected rules
- [x] No ecosystem changes
This commit is contained in:
Dhruv Manilawala 2024-06-05 13:20:33 +05:30 committed by GitHub
parent eed6d784df
commit b021b5babe
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
21 changed files with 159 additions and 31 deletions

View File

@ -29,3 +29,6 @@ not ''}
# Regression test for
if values[1is not None ] is not '-':
pass
# Regression test for https://github.com/astral-sh/ruff/issues/11736
variable: "123 is not y"

View File

@ -80,3 +80,7 @@ print("foo".encode()) # print(b"foo")
# Regression test for: https://github.com/astral-sh/ruff/issues/7455#issuecomment-1722459882
def _match_ignore(line):
input=stdin and'\n'.encode()or None
# Not a valid type annotation but this test shouldn't result in a panic.
# Refer: https://github.com/astral-sh/ruff/issues/11736
x: '"foo".encode("utf-8")'

View File

@ -0,0 +1,4 @@
# Not a valid type annotation but this test shouldn't result in a panic.
# Refer: https://github.com/astral-sh/ruff/issues/11736
x: 'open("foo", "r")'

View File

@ -125,3 +125,7 @@ path = "%s-%s-%s.pem" % (
'Hello %s' % bar.baz
'Hello %s' % bar['bop']
# Not a valid type annotation but this test shouldn't result in a panic.
# Refer: https://github.com/astral-sh/ruff/issues/11736
x: "'%s + %s' % (1, 2)"

View File

@ -265,3 +265,7 @@ raise ValueError(
# The call should be removed, but the string itself should remain.
"".format(self.project)
# Not a valid type annotation but this test shouldn't result in a panic.
# Refer: https://github.com/astral-sh/ruff/issues/11736
x: "'{} + {}'.format(x, y)"

View File

@ -49,7 +49,7 @@ use ruff_python_ast::{helpers, str, visitor, PySourceType};
use ruff_python_codegen::{Generator, Stylist};
use ruff_python_index::Indexer;
use ruff_python_parser::typing::{parse_type_annotation, AnnotationKind};
use ruff_python_parser::Parsed;
use ruff_python_parser::{Parsed, Tokens};
use ruff_python_semantic::all::{DunderAllDefinition, DunderAllFlags};
use ruff_python_semantic::analyze::{imports, typing};
use ruff_python_semantic::{
@ -176,8 +176,10 @@ impl ExpectedDocstringKind {
}
pub(crate) struct Checker<'a> {
/// The parsed [`Parsed`].
/// The [`Parsed`] output for the source code.
parsed: &'a Parsed<ModModule>,
/// The [`Parsed`] output for the type annotation the checker is currently in.
parsed_type_annotation: Option<&'a Parsed<ModExpression>>,
/// The [`Path`] to the file under analysis.
path: &'a Path,
/// The [`Path`] to the package containing the current file.
@ -243,6 +245,7 @@ impl<'a> Checker<'a> {
) -> Checker<'a> {
Checker {
parsed,
parsed_type_annotation: None,
settings,
noqa_line_for,
noqa,
@ -328,6 +331,16 @@ impl<'a> Checker<'a> {
self.parsed
}
/// Returns the [`Tokens`] for the parsed type annotation if the checker is in a typing context
/// or the parsed source code.
pub(crate) fn tokens(&self) -> &'a Tokens {
if let Some(parsed_type_annotation) = self.parsed_type_annotation {
parsed_type_annotation.tokens()
} else {
self.parsed.tokens()
}
}
/// The [`Locator`] for the current file, which enables extraction of source code from byte
/// offsets.
pub(crate) const fn locator(&self) -> &'a Locator<'a> {
@ -2160,6 +2173,7 @@ impl<'a> Checker<'a> {
parse_type_annotation(string_expr, self.locator.contents())
{
let parsed_annotation = allocator.alloc(parsed_annotation);
self.parsed_type_annotation = Some(parsed_annotation);
let annotation = string_expr.value.to_str();
let range = string_expr.range();
@ -2187,6 +2201,7 @@ impl<'a> Checker<'a> {
self.semantic.flags |=
SemanticModelFlags::TYPE_DEFINITION | type_definition_flag;
self.visit_expr(parsed_annotation.expr());
self.parsed_type_annotation = None;
} else {
if self.enabled(Rule::ForwardAnnotationSyntaxError) {
self.diagnostics.push(Diagnostic::new(

View File

@ -96,7 +96,7 @@ pub(crate) fn invalid_literal_comparison(
{
let mut diagnostic = Diagnostic::new(IsLiteral { cmp_op: op.into() }, expr.range());
if lazy_located.is_none() {
lazy_located = Some(locate_cmp_ops(expr, checker.parsed().tokens()));
lazy_located = Some(locate_cmp_ops(expr, checker.tokens()));
}
if let Some(located_op) = lazy_located.as_ref().and_then(|located| located.get(index)) {
assert_eq!(located_op.op, *op);

View File

@ -170,11 +170,10 @@ fn remove_unused_variable(binding: &Binding, checker: &Checker) -> Option<Fix> {
)
.unwrap_or(target.range())
.start();
let end =
match_token_after(checker.parsed().tokens(), target.end(), |token| {
token == TokenKind::Equal
})?
.start();
let end = match_token_after(checker.tokens(), target.end(), |token| {
token == TokenKind::Equal
})?
.start();
let edit = Edit::deletion(start, end);
Some(Fix::unsafe_edit(edit))
} else {
@ -206,10 +205,9 @@ fn remove_unused_variable(binding: &Binding, checker: &Checker) -> Option<Fix> {
// If the expression is complex (`x = foo()`), remove the assignment,
// but preserve the right-hand side.
let start = statement.start();
let end = match_token_after(checker.parsed().tokens(), start, |token| {
token == TokenKind::Equal
})?
.start();
let end =
match_token_after(checker.tokens(), start, |token| token == TokenKind::Equal)?
.start();
let edit = Edit::deletion(start, end);
Some(Fix::unsafe_edit(edit))
} else {
@ -228,19 +226,17 @@ fn remove_unused_variable(binding: &Binding, checker: &Checker) -> Option<Fix> {
if let Some(optional_vars) = &item.optional_vars {
if optional_vars.range() == binding.range() {
// Find the first token before the `as` keyword.
let start = match_token_before(
checker.parsed().tokens(),
item.context_expr.start(),
|token| token == TokenKind::As,
)?
.end();
let start =
match_token_before(checker.tokens(), item.context_expr.start(), |token| {
token == TokenKind::As
})?
.end();
// Find the first colon, comma, or closing bracket after the `as` keyword.
let end =
match_token_or_closing_brace(checker.parsed().tokens(), start, |token| {
token == TokenKind::Colon || token == TokenKind::Comma
})?
.start();
let end = match_token_or_closing_brace(checker.tokens(), start, |token| {
token == TokenKind::Colon || token == TokenKind::Comma
})?
.start();
let edit = Edit::deletion(start, end);
return Some(Fix::unsafe_edit(edit));

View File

@ -203,6 +203,8 @@ F632.py:30:4: F632 [*] Use `!=` to compare constant literals
30 |-if values[1is not None ] is not '-':
30 |+if values[1is not None ] != '-':
31 31 | pass
32 32 |
33 33 | # Regression test for https://github.com/astral-sh/ruff/issues/11736
F632.py:30:11: F632 [*] Use `!=` to compare constant literals
|
@ -220,5 +222,20 @@ F632.py:30:11: F632 [*] Use `!=` to compare constant literals
30 |-if values[1is not None ] is not '-':
30 |+if values[1!= None ] is not '-':
31 31 | pass
32 32 |
33 33 | # Regression test for https://github.com/astral-sh/ruff/issues/11736
F632.py:34:12: F632 [*] Use `!=` to compare constant literals
|
33 | # Regression test for https://github.com/astral-sh/ruff/issues/11736
34 | variable: "123 is not y"
| ^^^^^^^^^^^^ F632
|
= help: Replace `is not` with `!=`
Safe fix
31 31 | pass
32 32 |
33 33 | # Regression test for https://github.com/astral-sh/ruff/issues/11736
34 |-variable: "123 is not y"
34 |+variable: "123 != y"

View File

@ -58,6 +58,7 @@ mod tests {
#[test_case(Rule::QuotedAnnotation, Path::new("UP037_0.py"))]
#[test_case(Rule::QuotedAnnotation, Path::new("UP037_1.py"))]
#[test_case(Rule::RedundantOpenModes, Path::new("UP015.py"))]
#[test_case(Rule::RedundantOpenModes, Path::new("UP015_1.py"))]
#[test_case(Rule::ReplaceStdoutStderr, Path::new("UP022.py"))]
#[test_case(Rule::ReplaceUniversalNewlines, Path::new("UP021.py"))]
#[test_case(Rule::SuperCallWithParameters, Path::new("UP008.py"))]

View File

@ -719,7 +719,7 @@ pub(crate) fn deprecated_import(checker: &mut Checker, import_from_stmt: &StmtIm
module,
checker.locator(),
checker.stylist(),
checker.parsed().tokens(),
checker.tokens(),
checker.settings.target_version,
);

View File

@ -409,7 +409,7 @@ pub(crate) fn f_strings(checker: &mut Checker, call: &ast::ExprCall, summary: &F
};
let mut patches: Vec<(TextRange, FStringConversion)> = vec![];
let mut tokens = checker.parsed().tokens().in_range(call.func.range()).iter();
let mut tokens = checker.tokens().in_range(call.func.range()).iter();
let end = loop {
let Some(token) = tokens.next() else {
unreachable!("Should break from the `Tok::Dot` arm");

View File

@ -460,7 +460,7 @@ pub(crate) fn printf_string_formatting(
}
if let Some(prev_end) = prev_end {
for token in checker.parsed().tokens().after(prev_end) {
for token in checker.tokens().after(prev_end) {
match token.kind() {
// If we hit a right paren, we have to preserve it.
TokenKind::Rpar => {

View File

@ -79,7 +79,7 @@ pub(crate) fn redundant_open_modes(checker: &mut Checker, call: &ast::ExprCall)
call,
&keyword.value,
mode.replacement_value(),
checker.parsed().tokens(),
checker.tokens(),
));
}
}
@ -93,7 +93,7 @@ pub(crate) fn redundant_open_modes(checker: &mut Checker, call: &ast::ExprCall)
call,
mode_param,
mode.replacement_value(),
checker.parsed().tokens(),
checker.tokens(),
));
}
}

View File

@ -165,7 +165,7 @@ pub(crate) fn unnecessary_encode_utf8(checker: &mut Checker, call: &ast::ExprCal
diagnostic.set_fix(replace_with_bytes_literal(
checker.locator(),
call,
checker.parsed().tokens(),
checker.tokens(),
));
checker.diagnostics.push(diagnostic);
} else if let EncodingArg::Keyword(kwarg) = encoding_arg {

View File

@ -557,6 +557,8 @@ UP012.py:82:17: UP012 [*] Unnecessary call to `encode` as UTF-8
81 | def _match_ignore(line):
82 | input=stdin and'\n'.encode()or None
| ^^^^^^^^^^^^^ UP012
83 |
84 | # Not a valid type annotation but this test shouldn't result in a panic.
|
= help: Rewrite as bytes literal
@ -566,5 +568,22 @@ UP012.py:82:17: UP012 [*] Unnecessary call to `encode` as UTF-8
81 81 | def _match_ignore(line):
82 |- input=stdin and'\n'.encode()or None
82 |+ input=stdin and b'\n' or None
83 83 |
84 84 | # Not a valid type annotation but this test shouldn't result in a panic.
85 85 | # Refer: https://github.com/astral-sh/ruff/issues/11736
UP012.py:86:5: UP012 [*] Unnecessary call to `encode` as UTF-8
|
84 | # Not a valid type annotation but this test shouldn't result in a panic.
85 | # Refer: https://github.com/astral-sh/ruff/issues/11736
86 | x: '"foo".encode("utf-8")'
| ^^^^^^^^^^^^^^^^^^^^^ UP012
|
= help: Rewrite as bytes literal
Safe fix
83 83 |
84 84 | # Not a valid type annotation but this test shouldn't result in a panic.
85 85 | # Refer: https://github.com/astral-sh/ruff/issues/11736
86 |-x: '"foo".encode("utf-8")'
86 |+x: 'b"foo"'

View File

@ -0,0 +1,18 @@
---
source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
---
UP015_1.py:3:5: UP015 [*] Unnecessary open mode parameters
|
1 | # Not a valid type annotation but this test shouldn't result in a panic.
2 | # Refer: https://github.com/astral-sh/ruff/issues/11736
3 | x: 'open("foo", "r")'
| ^^^^^^^^^^^^^^^^ UP015
|
= help: Remove open mode parameters
Safe fix
1 1 | # Not a valid type annotation but this test shouldn't result in a panic.
2 2 | # Refer: https://github.com/astral-sh/ruff/issues/11736
3 |-x: 'open("foo", "r")'
3 |+x: 'open("foo")'
4 4 |

View File

@ -978,6 +978,7 @@ UP031_0.py:125:1: UP031 [*] Use format specifiers instead of percent format
125 |+'Hello {}'.format(bar.baz)
126 126 |
127 127 | 'Hello %s' % bar['bop']
128 128 |
UP031_0.py:127:1: UP031 [*] Use format specifiers instead of percent format
|
@ -985,6 +986,8 @@ UP031_0.py:127:1: UP031 [*] Use format specifiers instead of percent format
126 |
127 | 'Hello %s' % bar['bop']
| ^^^^^^^^^^^^^^^^^^^^^^^ UP031
128 |
129 | # Not a valid type annotation but this test shouldn't result in a panic.
|
= help: Replace with format specifiers
@ -994,3 +997,22 @@ UP031_0.py:127:1: UP031 [*] Use format specifiers instead of percent format
126 126 |
127 |-'Hello %s' % bar['bop']
127 |+'Hello {}'.format(bar['bop'])
128 128 |
129 129 | # Not a valid type annotation but this test shouldn't result in a panic.
130 130 | # Refer: https://github.com/astral-sh/ruff/issues/11736
UP031_0.py:131:5: UP031 [*] Use format specifiers instead of percent format
|
129 | # Not a valid type annotation but this test shouldn't result in a panic.
130 | # Refer: https://github.com/astral-sh/ruff/issues/11736
131 | x: "'%s + %s' % (1, 2)"
| ^^^^^^^^^^^^^^^^^^ UP031
|
= help: Replace with format specifiers
Unsafe fix
128 128 |
129 129 | # Not a valid type annotation but this test shouldn't result in a panic.
130 130 | # Refer: https://github.com/astral-sh/ruff/issues/11736
131 |-x: "'%s + %s' % (1, 2)"
131 |+x: "'{} + {}'.format(1, 2)"

View File

@ -1361,6 +1361,8 @@ UP032_0.py:267:1: UP032 [*] Use f-string instead of `format` call
266 | # The call should be removed, but the string itself should remain.
267 | "".format(self.project)
| ^^^^^^^^^^^^^^^^^^^^^^^ UP032
268 |
269 | # Not a valid type annotation but this test shouldn't result in a panic.
|
= help: Convert to f-string
@ -1370,3 +1372,22 @@ UP032_0.py:267:1: UP032 [*] Use f-string instead of `format` call
266 266 | # The call should be removed, but the string itself should remain.
267 |-"".format(self.project)
267 |+""
268 268 |
269 269 | # Not a valid type annotation but this test shouldn't result in a panic.
270 270 | # Refer: https://github.com/astral-sh/ruff/issues/11736
UP032_0.py:271:5: UP032 [*] Use f-string instead of `format` call
|
269 | # Not a valid type annotation but this test shouldn't result in a panic.
270 | # Refer: https://github.com/astral-sh/ruff/issues/11736
271 | x: "'{} + {}'.format(x, y)"
| ^^^^^^^^^^^^^^^^^^^^^^ UP032
|
= help: Convert to f-string
Safe fix
268 268 |
269 269 | # Not a valid type annotation but this test shouldn't result in a panic.
270 270 | # Refer: https://github.com/astral-sh/ruff/issues/11736
271 |-x: "'{} + {}'.format(x, y)"
271 |+x: "f'{x} + {y}'"

View File

@ -216,7 +216,7 @@ fn create_fix(
range,
kind,
locator,
checker.parsed().tokens(),
checker.tokens(),
string_items,
)?;
assert_eq!(value.len(), elts.len());

View File

@ -210,7 +210,7 @@ impl<'a> StringLiteralDisplay<'a> {
self.range(),
*sequence_kind,
locator,
checker.parsed().tokens(),
checker.tokens(),
elements,
)?;
assert_eq!(analyzed_sequence.len(), self.elts.len());