Files
ruff/crates/ruff_python_ast/src/identifier.rs
Charlie Marsh daefa74e9a Remove async AST node variants for with, for, and def (#6369)
## Summary

Per the suggestion in
https://github.com/astral-sh/ruff/discussions/6183, this PR removes
`AsyncWith`, `AsyncFor`, and `AsyncFunctionDef`, replacing them with an
`is_async` field on the non-async variants of those structs. Unlike an
interpreter, we _generally_ have identical handling for these nodes, so
separating them into distinct variants adds complexity from which we
don't really benefit. This can be seen below, where we get to remove a
_ton_ of code related to adding generic `Any*` wrappers, and a ton of
duplicate branches for these cases.

## Test Plan

`cargo test` is unchanged, apart from parser snapshots.
2023-08-07 16:36:02 +00:00

233 lines
6.8 KiB
Rust

//! Extract [`TextRange`] information from AST nodes.
//!
//! For example, given:
//! ```python
//! try:
//! ...
//! except Exception as e:
//! ...
//! ```
//!
//! This module can be used to identify the [`TextRange`] of the `except` token.
use crate::{self as ast, Alias, ExceptHandler, Parameter, ParameterWithDefault, Ranged, Stmt};
use ruff_text_size::{TextLen, TextRange, TextSize};
use ruff_python_trivia::{is_python_whitespace, Cursor};
pub trait Identifier {
/// Return the [`TextRange`] of the identifier in the given AST node.
fn identifier(&self) -> TextRange;
}
impl Identifier for Stmt {
/// Return the [`TextRange`] of the identifier in the given statement.
///
/// For example, return the range of `f` in:
/// ```python
/// def f():
/// ...
/// ```
fn identifier(&self) -> TextRange {
match self {
Stmt::ClassDef(ast::StmtClassDef { name, .. })
| Stmt::FunctionDef(ast::StmtFunctionDef { name, .. }) => name.range(),
_ => self.range(),
}
}
}
impl Identifier for Parameter {
/// Return the [`TextRange`] for the identifier defining an [`Parameter`].
///
/// For example, return the range of `x` in:
/// ```python
/// def f(x: int):
/// ...
/// ```
fn identifier(&self) -> TextRange {
self.name.range()
}
}
impl Identifier for ParameterWithDefault {
/// Return the [`TextRange`] for the identifier defining an [`ParameterWithDefault`].
///
/// For example, return the range of `x` in:
/// ```python
/// def f(x: int = 0):
/// ...
/// ```
fn identifier(&self) -> TextRange {
self.parameter.identifier()
}
}
impl Identifier for Alias {
/// Return the [`TextRange`] for the identifier defining an [`Alias`].
///
/// For example, return the range of `x` in:
/// ```python
/// from foo import bar as x
/// ```
fn identifier(&self) -> TextRange {
self.asname
.as_ref()
.map_or_else(|| self.name.range(), Ranged::range)
}
}
/// Return the [`TextRange`] of the `except` token in an [`ExceptHandler`].
pub fn except(handler: &ExceptHandler, source: &str) -> TextRange {
IdentifierTokenizer::new(source, handler.range())
.next()
.expect("Failed to find `except` token in `ExceptHandler`")
}
/// Return the [`TextRange`] of the `else` token in a `For` or `While` statement.
pub fn else_(stmt: &Stmt, source: &str) -> Option<TextRange> {
let (Stmt::For(ast::StmtFor { body, orelse, .. })
| Stmt::While(ast::StmtWhile { body, orelse, .. })) = stmt
else {
return None;
};
if orelse.is_empty() {
return None;
}
IdentifierTokenizer::starts_at(
body.last().expect("Expected body to be non-empty").end(),
source,
)
.next()
}
/// Return `true` if the given character starts a valid Python identifier.
///
/// Python identifiers must start with an alphabetic character or an underscore.
fn is_python_identifier_start(c: char) -> bool {
c.is_alphabetic() || c == '_'
}
/// Return `true` if the given character is a valid Python identifier continuation character.
///
/// Python identifiers can contain alphanumeric characters and underscores, but cannot start with a
/// number.
fn is_python_identifier_continue(c: char) -> bool {
c.is_alphanumeric() || c == '_'
}
/// Simple zero allocation tokenizer for Python identifiers.
///
/// The tokenizer must operate over a range that can only contain identifiers, keywords, and
/// comments (along with whitespace and continuation characters). It does not support other tokens,
/// like operators, literals, or delimiters. It also does not differentiate between keywords and
/// identifiers, treating every valid token as an "identifier".
///
/// This is useful for cases like, e.g., identifying the alias name in an aliased import (`bar` in
/// `import foo as bar`), where we're guaranteed to only have identifiers and keywords in the
/// relevant range.
pub(crate) struct IdentifierTokenizer<'a> {
cursor: Cursor<'a>,
offset: TextSize,
}
impl<'a> IdentifierTokenizer<'a> {
pub(crate) fn new(source: &'a str, range: TextRange) -> Self {
Self {
cursor: Cursor::new(&source[range]),
offset: range.start(),
}
}
pub(crate) fn starts_at(offset: TextSize, source: &'a str) -> Self {
let range = TextRange::new(offset, source.text_len());
Self::new(source, range)
}
fn next_token(&mut self) -> Option<TextRange> {
while let Some(c) = {
self.offset += self.cursor.token_len();
self.cursor.start_token();
self.cursor.bump()
} {
match c {
c if is_python_identifier_start(c) => {
self.cursor.eat_while(is_python_identifier_continue);
return Some(TextRange::at(self.offset, self.cursor.token_len()));
}
c if is_python_whitespace(c) => {
self.cursor.eat_while(is_python_whitespace);
}
'#' => {
self.cursor.eat_while(|c| !matches!(c, '\n' | '\r'));
}
'\r' => {
self.cursor.eat_char('\n');
}
'\n' => {
// Nothing to do.
}
'\\' => {
// Nothing to do.
}
_ => {
// Nothing to do.
}
};
}
None
}
}
impl Iterator for IdentifierTokenizer<'_> {
type Item = TextRange;
fn next(&mut self) -> Option<Self::Item> {
self.next_token()
}
}
#[cfg(test)]
mod tests {
use super::IdentifierTokenizer;
use ruff_text_size::{TextLen, TextRange, TextSize};
#[test]
fn extract_global_names() {
let contents = r#"global X,Y, Z"#.trim();
let mut names = IdentifierTokenizer::new(
contents,
TextRange::new(TextSize::new(0), contents.text_len()),
);
let range = names.next_token().unwrap();
assert_eq!(&contents[range], "global");
assert_eq!(range, TextRange::new(TextSize::from(0), TextSize::from(6)));
let range = names.next_token().unwrap();
assert_eq!(&contents[range], "X");
assert_eq!(range, TextRange::new(TextSize::from(7), TextSize::from(8)));
let range = names.next_token().unwrap();
assert_eq!(&contents[range], "Y");
assert_eq!(range, TextRange::new(TextSize::from(9), TextSize::from(10)));
let range = names.next_token().unwrap();
assert_eq!(&contents[range], "Z");
assert_eq!(
range,
TextRange::new(TextSize::from(12), TextSize::from(13))
);
}
}