diff --git a/crates/ruff_benchmark/benches/lexer.rs b/crates/ruff_benchmark/benches/lexer.rs index 968ac74b14..3b1a4dafde 100644 --- a/crates/ruff_benchmark/benches/lexer.rs +++ b/crates/ruff_benchmark/benches/lexer.rs @@ -6,7 +6,8 @@ use criterion::{ use ruff_benchmark::{ LARGE_DATASET, NUMPY_CTYPESLIB, NUMPY_GLOBALS, PYDANTIC_TYPES, TestCase, UNICODE_PYPINYIN, }; -use ruff_python_parser::{Mode, TokenKind, lexer}; +use ruff_python_ast::token::TokenKind; +use ruff_python_parser::{Mode, lexer}; #[cfg(target_os = "windows")] #[global_allocator] diff --git a/crates/ruff_linter/src/checkers/ast/mod.rs b/crates/ruff_linter/src/checkers/ast/mod.rs index 5f7f459ba9..8c360448a4 100644 --- a/crates/ruff_linter/src/checkers/ast/mod.rs +++ b/crates/ruff_linter/src/checkers/ast/mod.rs @@ -35,6 +35,7 @@ use ruff_python_ast::helpers::{collect_import_from_member, is_docstring_stmt, to use ruff_python_ast::identifier::Identifier; use ruff_python_ast::name::QualifiedName; use ruff_python_ast::str::Quote; +use ruff_python_ast::token::Tokens; use ruff_python_ast::visitor::{Visitor, walk_except_handler, walk_pattern}; use ruff_python_ast::{ self as ast, AnyParameterRef, ArgOrKeyword, Comprehension, ElifElseClause, ExceptHandler, Expr, @@ -48,7 +49,7 @@ use ruff_python_parser::semantic_errors::{ SemanticSyntaxChecker, SemanticSyntaxContext, SemanticSyntaxError, SemanticSyntaxErrorKind, }; use ruff_python_parser::typing::{AnnotationKind, ParsedAnnotation, parse_type_annotation}; -use ruff_python_parser::{ParseError, Parsed, Tokens}; +use ruff_python_parser::{ParseError, Parsed}; use ruff_python_semantic::all::{DunderAllDefinition, DunderAllFlags}; use ruff_python_semantic::analyze::{imports, typing}; use ruff_python_semantic::{ diff --git a/crates/ruff_linter/src/checkers/logical_lines.rs b/crates/ruff_linter/src/checkers/logical_lines.rs index 5c60a3171a..695fd294aa 100644 --- a/crates/ruff_linter/src/checkers/logical_lines.rs +++ b/crates/ruff_linter/src/checkers/logical_lines.rs @@ -1,6 +1,6 @@ +use ruff_python_ast::token::{TokenKind, Tokens}; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; -use ruff_python_parser::{TokenKind, Tokens}; use ruff_source_file::LineRanges; use ruff_text_size::{Ranged, TextRange}; diff --git a/crates/ruff_linter/src/checkers/tokens.rs b/crates/ruff_linter/src/checkers/tokens.rs index ce54050f9d..7154c3e16b 100644 --- a/crates/ruff_linter/src/checkers/tokens.rs +++ b/crates/ruff_linter/src/checkers/tokens.rs @@ -4,9 +4,9 @@ use std::path::Path; use ruff_notebook::CellOffsets; use ruff_python_ast::PySourceType; +use ruff_python_ast::token::Tokens; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; -use ruff_python_parser::Tokens; use crate::Locator; use crate::directives::TodoComment; diff --git a/crates/ruff_linter/src/directives.rs b/crates/ruff_linter/src/directives.rs index a642d57271..ea63f6edfd 100644 --- a/crates/ruff_linter/src/directives.rs +++ b/crates/ruff_linter/src/directives.rs @@ -5,8 +5,8 @@ use std::str::FromStr; use bitflags::bitflags; +use ruff_python_ast::token::{TokenKind, Tokens}; use ruff_python_index::Indexer; -use ruff_python_parser::{TokenKind, Tokens}; use ruff_python_trivia::CommentRanges; use ruff_source_file::LineRanges; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; diff --git a/crates/ruff_linter/src/doc_lines.rs b/crates/ruff_linter/src/doc_lines.rs index 7d7367beba..394b50ee70 100644 --- a/crates/ruff_linter/src/doc_lines.rs +++ b/crates/ruff_linter/src/doc_lines.rs @@ -5,8 +5,8 @@ use std::iter::FusedIterator; use std::slice::Iter; use ruff_python_ast::statement_visitor::{StatementVisitor, walk_stmt}; +use ruff_python_ast::token::{Token, TokenKind, Tokens}; use ruff_python_ast::{self as ast, Stmt, Suite}; -use ruff_python_parser::{Token, TokenKind, Tokens}; use ruff_source_file::UniversalNewlineIterator; use ruff_text_size::{Ranged, TextSize}; diff --git a/crates/ruff_linter/src/importer/mod.rs b/crates/ruff_linter/src/importer/mod.rs index de77c384fc..4ffa03d677 100644 --- a/crates/ruff_linter/src/importer/mod.rs +++ b/crates/ruff_linter/src/importer/mod.rs @@ -9,10 +9,11 @@ use anyhow::Result; use libcst_native as cst; use ruff_diagnostics::Edit; +use ruff_python_ast::token::Tokens; use ruff_python_ast::{self as ast, Expr, ModModule, Stmt}; use ruff_python_codegen::Stylist; use ruff_python_importer::Insertion; -use ruff_python_parser::{Parsed, Tokens}; +use ruff_python_parser::Parsed; use ruff_python_semantic::{ ImportedName, MemberNameImport, ModuleNameImport, NameImport, SemanticModel, }; diff --git a/crates/ruff_linter/src/rules/flake8_commas/rules/trailing_commas.rs b/crates/ruff_linter/src/rules/flake8_commas/rules/trailing_commas.rs index 220c15fbed..ce32b163c8 100644 --- a/crates/ruff_linter/src/rules/flake8_commas/rules/trailing_commas.rs +++ b/crates/ruff_linter/src/rules/flake8_commas/rules/trailing_commas.rs @@ -1,6 +1,6 @@ use ruff_macros::{ViolationMetadata, derive_message_formats}; +use ruff_python_ast::token::{TokenKind, Tokens}; use ruff_python_index::Indexer; -use ruff_python_parser::{TokenKind, Tokens}; use ruff_text_size::{Ranged, TextRange}; use crate::Locator; diff --git a/crates/ruff_linter/src/rules/flake8_comprehensions/rules/unnecessary_generator_list.rs b/crates/ruff_linter/src/rules/flake8_comprehensions/rules/unnecessary_generator_list.rs index d2b088fadf..5fdc1a37a3 100644 --- a/crates/ruff_linter/src/rules/flake8_comprehensions/rules/unnecessary_generator_list.rs +++ b/crates/ruff_linter/src/rules/flake8_comprehensions/rules/unnecessary_generator_list.rs @@ -3,7 +3,7 @@ use ruff_python_ast as ast; use ruff_python_ast::ExprGenerator; use ruff_python_ast::comparable::ComparableExpr; use ruff_python_ast::parenthesize::parenthesized_range; -use ruff_python_parser::TokenKind; +use ruff_python_ast::token::TokenKind; use ruff_text_size::{Ranged, TextRange, TextSize}; use crate::checkers::ast::Checker; diff --git a/crates/ruff_linter/src/rules/flake8_comprehensions/rules/unnecessary_generator_set.rs b/crates/ruff_linter/src/rules/flake8_comprehensions/rules/unnecessary_generator_set.rs index b2ed05c925..0560935bae 100644 --- a/crates/ruff_linter/src/rules/flake8_comprehensions/rules/unnecessary_generator_set.rs +++ b/crates/ruff_linter/src/rules/flake8_comprehensions/rules/unnecessary_generator_set.rs @@ -3,7 +3,7 @@ use ruff_python_ast as ast; use ruff_python_ast::ExprGenerator; use ruff_python_ast::comparable::ComparableExpr; use ruff_python_ast::parenthesize::parenthesized_range; -use ruff_python_parser::TokenKind; +use ruff_python_ast::token::TokenKind; use ruff_text_size::{Ranged, TextRange, TextSize}; use crate::checkers::ast::Checker; diff --git a/crates/ruff_linter/src/rules/flake8_comprehensions/rules/unnecessary_list_comprehension_set.rs b/crates/ruff_linter/src/rules/flake8_comprehensions/rules/unnecessary_list_comprehension_set.rs index fd171a58b2..b4fda738e2 100644 --- a/crates/ruff_linter/src/rules/flake8_comprehensions/rules/unnecessary_list_comprehension_set.rs +++ b/crates/ruff_linter/src/rules/flake8_comprehensions/rules/unnecessary_list_comprehension_set.rs @@ -1,7 +1,7 @@ use ruff_macros::{ViolationMetadata, derive_message_formats}; use ruff_python_ast as ast; use ruff_python_ast::parenthesize::parenthesized_range; -use ruff_python_parser::TokenKind; +use ruff_python_ast::token::TokenKind; use ruff_text_size::{Ranged, TextRange, TextSize}; use crate::checkers::ast::Checker; diff --git a/crates/ruff_linter/src/rules/flake8_implicit_str_concat/rules/implicit.rs b/crates/ruff_linter/src/rules/flake8_implicit_str_concat/rules/implicit.rs index c33dcccabc..b1639e1f0f 100644 --- a/crates/ruff_linter/src/rules/flake8_implicit_str_concat/rules/implicit.rs +++ b/crates/ruff_linter/src/rules/flake8_implicit_str_concat/rules/implicit.rs @@ -3,8 +3,8 @@ use std::borrow::Cow; use itertools::Itertools; use ruff_macros::{ViolationMetadata, derive_message_formats}; use ruff_python_ast::StringFlags; +use ruff_python_ast::token::{Token, TokenKind, Tokens}; use ruff_python_index::Indexer; -use ruff_python_parser::{Token, TokenKind, Tokens}; use ruff_source_file::LineRanges; use ruff_text_size::{Ranged, TextLen, TextRange}; diff --git a/crates/ruff_linter/src/rules/flake8_pie/rules/unnecessary_spread.rs b/crates/ruff_linter/src/rules/flake8_pie/rules/unnecessary_spread.rs index 4b99f4e887..a5faff18d8 100644 --- a/crates/ruff_linter/src/rules/flake8_pie/rules/unnecessary_spread.rs +++ b/crates/ruff_linter/src/rules/flake8_pie/rules/unnecessary_spread.rs @@ -1,6 +1,6 @@ use ruff_macros::{ViolationMetadata, derive_message_formats}; +use ruff_python_ast::token::{TokenKind, Tokens}; use ruff_python_ast::{self as ast, Expr}; -use ruff_python_parser::{TokenKind, Tokens}; use ruff_text_size::{Ranged, TextLen, TextSize}; use crate::checkers::ast::Checker; diff --git a/crates/ruff_linter/src/rules/flake8_return/rules/function.rs b/crates/ruff_linter/src/rules/flake8_return/rules/function.rs index 018ddd925b..448cbd51a7 100644 --- a/crates/ruff_linter/src/rules/flake8_return/rules/function.rs +++ b/crates/ruff_linter/src/rules/flake8_return/rules/function.rs @@ -4,10 +4,10 @@ use ruff_diagnostics::Applicability; use ruff_macros::{ViolationMetadata, derive_message_formats}; use ruff_python_ast::helpers::{is_const_false, is_const_true}; use ruff_python_ast::stmt_if::elif_else_range; +use ruff_python_ast::token::TokenKind; use ruff_python_ast::visitor::Visitor; use ruff_python_ast::whitespace::indentation; use ruff_python_ast::{self as ast, Decorator, ElifElseClause, Expr, Stmt}; -use ruff_python_parser::TokenKind; use ruff_python_semantic::SemanticModel; use ruff_python_semantic::analyze::visibility::is_property; use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer, is_python_whitespace}; diff --git a/crates/ruff_linter/src/rules/isort/annotate.rs b/crates/ruff_linter/src/rules/isort/annotate.rs index ccbcc84b19..585b58e651 100644 --- a/crates/ruff_linter/src/rules/isort/annotate.rs +++ b/crates/ruff_linter/src/rules/isort/annotate.rs @@ -1,5 +1,5 @@ +use ruff_python_ast::token::Tokens; use ruff_python_ast::{self as ast, Stmt}; -use ruff_python_parser::Tokens; use ruff_source_file::LineRanges; use ruff_text_size::{Ranged, TextRange}; diff --git a/crates/ruff_linter/src/rules/isort/helpers.rs b/crates/ruff_linter/src/rules/isort/helpers.rs index f144a56993..bffc404100 100644 --- a/crates/ruff_linter/src/rules/isort/helpers.rs +++ b/crates/ruff_linter/src/rules/isort/helpers.rs @@ -1,5 +1,5 @@ use ruff_python_ast::Stmt; -use ruff_python_parser::{TokenKind, Tokens}; +use ruff_python_ast::token::{TokenKind, Tokens}; use ruff_python_trivia::PythonWhitespace; use ruff_source_file::UniversalNewlines; use ruff_text_size::Ranged; diff --git a/crates/ruff_linter/src/rules/isort/mod.rs b/crates/ruff_linter/src/rules/isort/mod.rs index 28c3c69650..abbff742d4 100644 --- a/crates/ruff_linter/src/rules/isort/mod.rs +++ b/crates/ruff_linter/src/rules/isort/mod.rs @@ -11,8 +11,8 @@ use comments::Comment; use normalize::normalize_imports; use order::order_imports; use ruff_python_ast::PySourceType; +use ruff_python_ast::token::Tokens; use ruff_python_codegen::Stylist; -use ruff_python_parser::Tokens; use settings::Settings; use types::EitherImport::{Import, ImportFrom}; use types::{AliasData, ImportBlock, TrailingComma}; diff --git a/crates/ruff_linter/src/rules/isort/rules/organize_imports.rs b/crates/ruff_linter/src/rules/isort/rules/organize_imports.rs index febe9fc425..2071555c8b 100644 --- a/crates/ruff_linter/src/rules/isort/rules/organize_imports.rs +++ b/crates/ruff_linter/src/rules/isort/rules/organize_imports.rs @@ -1,11 +1,11 @@ use itertools::{EitherOrBoth, Itertools}; use ruff_macros::{ViolationMetadata, derive_message_formats}; +use ruff_python_ast::token::Tokens; use ruff_python_ast::whitespace::trailing_lines_end; use ruff_python_ast::{PySourceType, PythonVersion, Stmt}; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; -use ruff_python_parser::Tokens; use ruff_python_trivia::{PythonWhitespace, leading_indentation, textwrap::indent}; use ruff_source_file::{LineRanges, UniversalNewlines}; use ruff_text_size::{Ranged, TextRange}; diff --git a/crates/ruff_linter/src/rules/pycodestyle/helpers.rs b/crates/ruff_linter/src/rules/pycodestyle/helpers.rs index a3ba640560..18047a3263 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/helpers.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/helpers.rs @@ -1,4 +1,4 @@ -use ruff_python_parser::TokenKind; +use ruff_python_ast::token::TokenKind; /// Returns `true` if the name should be considered "ambiguous". pub(super) fn is_ambiguous_name(name: &str) -> bool { diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/blank_lines.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/blank_lines.rs index 978806ee95..32795e95d5 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/blank_lines.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/blank_lines.rs @@ -8,10 +8,10 @@ use itertools::Itertools; use ruff_macros::{ViolationMetadata, derive_message_formats}; use ruff_notebook::CellOffsets; use ruff_python_ast::PySourceType; +use ruff_python_ast::token::TokenIterWithContext; +use ruff_python_ast::token::TokenKind; +use ruff_python_ast::token::Tokens; use ruff_python_codegen::Stylist; -use ruff_python_parser::TokenIterWithContext; -use ruff_python_parser::TokenKind; -use ruff_python_parser::Tokens; use ruff_python_trivia::PythonWhitespace; use ruff_source_file::{LineRanges, UniversalNewlines}; use ruff_text_size::TextRange; diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/compound_statements.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/compound_statements.rs index 8fd4889f3b..2749e35861 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/compound_statements.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/compound_statements.rs @@ -1,8 +1,8 @@ use ruff_macros::{ViolationMetadata, derive_message_formats}; use ruff_notebook::CellOffsets; use ruff_python_ast::PySourceType; +use ruff_python_ast::token::{TokenIterWithContext, TokenKind, Tokens}; use ruff_python_index::Indexer; -use ruff_python_parser::{TokenIterWithContext, TokenKind, Tokens}; use ruff_text_size::{Ranged, TextSize}; use crate::Locator; diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/extraneous_whitespace.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/extraneous_whitespace.rs index ff57fca1fe..3f2def0f3f 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/extraneous_whitespace.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/extraneous_whitespace.rs @@ -1,5 +1,5 @@ use ruff_macros::{ViolationMetadata, derive_message_formats}; -use ruff_python_parser::TokenKind; +use ruff_python_ast::token::TokenKind; use ruff_text_size::{Ranged, TextRange}; use crate::AlwaysFixableViolation; diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/indentation.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/indentation.rs index 5c351f695f..4d2e1dbacd 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/indentation.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/indentation.rs @@ -1,5 +1,5 @@ use ruff_macros::{ViolationMetadata, derive_message_formats}; -use ruff_python_parser::TokenKind; +use ruff_python_ast::token::TokenKind; use ruff_text_size::TextRange; use crate::Violation; diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/missing_whitespace.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/missing_whitespace.rs index 759bbb29d6..999dc2f637 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/missing_whitespace.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/missing_whitespace.rs @@ -1,5 +1,5 @@ use ruff_macros::{ViolationMetadata, derive_message_formats}; -use ruff_python_parser::TokenKind; +use ruff_python_ast::token::TokenKind; use ruff_text_size::Ranged; use crate::Edit; diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_after_keyword.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_after_keyword.rs index c767be0dc8..c3bb8f3422 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_after_keyword.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_after_keyword.rs @@ -1,5 +1,5 @@ use ruff_macros::{ViolationMetadata, derive_message_formats}; -use ruff_python_parser::TokenKind; +use ruff_python_ast::token::TokenKind; use ruff_text_size::Ranged; use crate::checkers::ast::LintContext; diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_around_operator.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_around_operator.rs index 408575fa23..ffe43270b0 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_around_operator.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_around_operator.rs @@ -1,5 +1,5 @@ use ruff_macros::{ViolationMetadata, derive_message_formats}; -use ruff_python_parser::TokenKind; +use ruff_python_ast::token::TokenKind; use ruff_text_size::{Ranged, TextRange}; use crate::checkers::ast::LintContext; diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/mod.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/mod.rs index 9f5c033b9f..bb45d6379a 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/mod.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/mod.rs @@ -9,7 +9,7 @@ pub(crate) use missing_whitespace::*; pub(crate) use missing_whitespace_after_keyword::*; pub(crate) use missing_whitespace_around_operator::*; pub(crate) use redundant_backslash::*; -use ruff_python_parser::{TokenKind, Tokens}; +use ruff_python_ast::token::{TokenKind, Tokens}; use ruff_python_trivia::is_python_whitespace; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; pub(crate) use space_around_operator::*; diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/redundant_backslash.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/redundant_backslash.rs index 2092b63716..e077825712 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/redundant_backslash.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/redundant_backslash.rs @@ -1,6 +1,6 @@ use ruff_macros::{ViolationMetadata, derive_message_formats}; +use ruff_python_ast::token::TokenKind; use ruff_python_index::Indexer; -use ruff_python_parser::TokenKind; use ruff_source_file::LineRanges; use ruff_text_size::{Ranged, TextRange, TextSize}; diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/space_around_operator.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/space_around_operator.rs index c806ba46f4..74341c3da8 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/space_around_operator.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/space_around_operator.rs @@ -1,5 +1,5 @@ use ruff_macros::{ViolationMetadata, derive_message_formats}; -use ruff_python_parser::TokenKind; +use ruff_python_ast::token::TokenKind; use ruff_text_size::{Ranged, TextRange}; use crate::checkers::ast::LintContext; diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/whitespace_around_named_parameter_equals.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/whitespace_around_named_parameter_equals.rs index bed7da83e1..66c1c95dd3 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/whitespace_around_named_parameter_equals.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/whitespace_around_named_parameter_equals.rs @@ -1,5 +1,5 @@ use ruff_macros::{ViolationMetadata, derive_message_formats}; -use ruff_python_parser::TokenKind; +use ruff_python_ast::token::TokenKind; use ruff_text_size::{Ranged, TextRange, TextSize}; use crate::checkers::ast::LintContext; diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/whitespace_before_comment.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/whitespace_before_comment.rs index dc9e91dac2..729079c092 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/whitespace_before_comment.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/whitespace_before_comment.rs @@ -1,5 +1,5 @@ use ruff_macros::{ViolationMetadata, derive_message_formats}; -use ruff_python_parser::TokenKind; +use ruff_python_ast::token::TokenKind; use ruff_python_trivia::PythonWhitespace; use ruff_source_file::LineRanges; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/whitespace_before_parameters.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/whitespace_before_parameters.rs index e176455718..d2595c384f 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/whitespace_before_parameters.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/whitespace_before_parameters.rs @@ -1,5 +1,5 @@ use ruff_macros::{ViolationMetadata, derive_message_formats}; -use ruff_python_parser::TokenKind; +use ruff_python_ast::token::TokenKind; use ruff_text_size::{Ranged, TextRange, TextSize}; use crate::checkers::ast::LintContext; diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/too_many_newlines_at_end_of_file.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/too_many_newlines_at_end_of_file.rs index 3a84aad979..cd6b416e5a 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/too_many_newlines_at_end_of_file.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/too_many_newlines_at_end_of_file.rs @@ -3,7 +3,7 @@ use std::iter::Peekable; use itertools::Itertools; use ruff_macros::{ViolationMetadata, derive_message_formats}; use ruff_notebook::CellOffsets; -use ruff_python_parser::{Token, TokenKind, Tokens}; +use ruff_python_ast::token::{Token, TokenKind, Tokens}; use ruff_text_size::{Ranged, TextRange, TextSize}; use crate::{AlwaysFixableViolation, Edit, Fix, checkers::ast::LintContext}; diff --git a/crates/ruff_linter/src/rules/pyflakes/rules/invalid_literal_comparisons.rs b/crates/ruff_linter/src/rules/pyflakes/rules/invalid_literal_comparisons.rs index d09d6cd006..9e19625745 100644 --- a/crates/ruff_linter/src/rules/pyflakes/rules/invalid_literal_comparisons.rs +++ b/crates/ruff_linter/src/rules/pyflakes/rules/invalid_literal_comparisons.rs @@ -2,8 +2,8 @@ use anyhow::{Error, bail}; use ruff_macros::{ViolationMetadata, derive_message_formats}; use ruff_python_ast::helpers; +use ruff_python_ast::token::{TokenKind, Tokens}; use ruff_python_ast::{CmpOp, Expr}; -use ruff_python_parser::{TokenKind, Tokens}; use ruff_text_size::{Ranged, TextRange}; use crate::checkers::ast::Checker; diff --git a/crates/ruff_linter/src/rules/pyflakes/rules/unused_variable.rs b/crates/ruff_linter/src/rules/pyflakes/rules/unused_variable.rs index aa5610620e..810c5742b9 100644 --- a/crates/ruff_linter/src/rules/pyflakes/rules/unused_variable.rs +++ b/crates/ruff_linter/src/rules/pyflakes/rules/unused_variable.rs @@ -3,8 +3,8 @@ use itertools::Itertools; use ruff_macros::{ViolationMetadata, derive_message_formats}; use ruff_python_ast::helpers::contains_effect; use ruff_python_ast::parenthesize::parenthesized_range; +use ruff_python_ast::token::{TokenKind, Tokens}; use ruff_python_ast::{self as ast, Stmt}; -use ruff_python_parser::{TokenKind, Tokens}; use ruff_python_semantic::Binding; use ruff_text_size::{Ranged, TextRange, TextSize}; diff --git a/crates/ruff_linter/src/rules/pylint/rules/invalid_string_characters.rs b/crates/ruff_linter/src/rules/pylint/rules/invalid_string_characters.rs index 05688f67c7..d197f2c536 100644 --- a/crates/ruff_linter/src/rules/pylint/rules/invalid_string_characters.rs +++ b/crates/ruff_linter/src/rules/pylint/rules/invalid_string_characters.rs @@ -1,5 +1,5 @@ use ruff_macros::{ViolationMetadata, derive_message_formats}; -use ruff_python_parser::{Token, TokenKind}; +use ruff_python_ast::token::{Token, TokenKind}; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; use crate::Locator; diff --git a/crates/ruff_linter/src/rules/pyupgrade/fixes.rs b/crates/ruff_linter/src/rules/pyupgrade/fixes.rs index 3e0cdef305..7d26032797 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/fixes.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/fixes.rs @@ -1,5 +1,5 @@ use ruff_python_ast::StmtImportFrom; -use ruff_python_parser::{TokenKind, Tokens}; +use ruff_python_ast::token::{TokenKind, Tokens}; use ruff_text_size::{Ranged, TextRange}; use crate::Locator; diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/deprecated_import.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/deprecated_import.rs index 51c3147a50..12e1b1e039 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/rules/deprecated_import.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/rules/deprecated_import.rs @@ -1,10 +1,10 @@ use itertools::Itertools; use ruff_macros::{ViolationMetadata, derive_message_formats}; +use ruff_python_ast::token::Tokens; use ruff_python_ast::whitespace::indentation; use ruff_python_ast::{Alias, StmtImportFrom, StmtRef}; use ruff_python_codegen::Stylist; -use ruff_python_parser::Tokens; use ruff_text_size::Ranged; use crate::Locator; diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/extraneous_parentheses.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/extraneous_parentheses.rs index 9fe0324c1c..76b3ef86b2 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/rules/extraneous_parentheses.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/rules/extraneous_parentheses.rs @@ -1,7 +1,7 @@ use std::slice::Iter; use ruff_macros::{ViolationMetadata, derive_message_formats}; -use ruff_python_parser::{Token, TokenKind, Tokens}; +use ruff_python_ast::token::{Token, TokenKind, Tokens}; use ruff_text_size::{Ranged, TextRange}; use crate::Locator; diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/f_strings.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/f_strings.rs index b889c66d8c..9749969691 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/rules/f_strings.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/rules/f_strings.rs @@ -6,11 +6,11 @@ use rustc_hash::{FxHashMap, FxHashSet}; use ruff_macros::{ViolationMetadata, derive_message_formats}; use ruff_python_ast::helpers::any_over_expr; use ruff_python_ast::str::{leading_quote, trailing_quote}; +use ruff_python_ast::token::TokenKind; use ruff_python_ast::{self as ast, Expr, Keyword, StringFlags}; use ruff_python_literal::format::{ FieldName, FieldNamePart, FieldType, FormatPart, FormatString, FromTemplate, }; -use ruff_python_parser::TokenKind; use ruff_source_file::LineRanges; use ruff_text_size::{Ranged, TextRange}; diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/printf_string_formatting.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/printf_string_formatting.rs index d6de04d355..fc9de0b4bb 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/rules/printf_string_formatting.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/rules/printf_string_formatting.rs @@ -3,12 +3,12 @@ use std::fmt::Write; use std::str::FromStr; use ruff_macros::{ViolationMetadata, derive_message_formats}; +use ruff_python_ast::token::TokenKind; use ruff_python_ast::{self as ast, AnyStringFlags, Expr, StringFlags, whitespace::indentation}; use ruff_python_codegen::Stylist; use ruff_python_literal::cformat::{ CConversionFlags, CFormatPart, CFormatPrecision, CFormatQuantity, CFormatString, }; -use ruff_python_parser::TokenKind; use ruff_python_stdlib::identifiers::is_identifier; use ruff_source_file::LineRanges; use ruff_text_size::{Ranged, TextRange}; diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/quoted_annotation.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/quoted_annotation.rs index 25a85e0a18..6ea46f23cd 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/rules/quoted_annotation.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/rules/quoted_annotation.rs @@ -1,6 +1,6 @@ use ruff_macros::{ViolationMetadata, derive_message_formats}; use ruff_python_ast::Stmt; -use ruff_python_parser::TokenKind; +use ruff_python_ast::token::TokenKind; use ruff_python_semantic::SemanticModel; use ruff_source_file::LineRanges; use ruff_text_size::{TextLen, TextRange, TextSize}; diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/redundant_open_modes.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/redundant_open_modes.rs index cf87abc039..ec10105cb1 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/rules/redundant_open_modes.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/rules/redundant_open_modes.rs @@ -1,7 +1,7 @@ use anyhow::Result; use ruff_macros::{ViolationMetadata, derive_message_formats}; +use ruff_python_ast::token::{TokenKind, Tokens}; use ruff_python_ast::{self as ast, Expr}; -use ruff_python_parser::{TokenKind, Tokens}; use ruff_python_stdlib::open_mode::OpenMode; use ruff_text_size::{Ranged, TextSize}; diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_encode_utf8.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_encode_utf8.rs index 12ca46ed4b..01cfe4fb03 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_encode_utf8.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_encode_utf8.rs @@ -1,8 +1,8 @@ use std::fmt::Write as _; use ruff_macros::{ViolationMetadata, derive_message_formats}; +use ruff_python_ast::token::{TokenKind, Tokens}; use ruff_python_ast::{self as ast, Arguments, Expr, Keyword}; -use ruff_python_parser::{TokenKind, Tokens}; use ruff_text_size::{Ranged, TextRange}; use crate::Locator; diff --git a/crates/ruff_linter/src/rules/ruff/rules/explicit_f_string_type_conversion.rs b/crates/ruff_linter/src/rules/ruff/rules/explicit_f_string_type_conversion.rs index 8dcd347fe3..2ba444d6b9 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/explicit_f_string_type_conversion.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/explicit_f_string_type_conversion.rs @@ -4,8 +4,8 @@ use anyhow::Result; use libcst_native::{LeftParen, ParenthesizedNode, RightParen}; use ruff_macros::{ViolationMetadata, derive_message_formats}; +use ruff_python_ast::token::TokenKind; use ruff_python_ast::{self as ast, Expr, OperatorPrecedence}; -use ruff_python_parser::TokenKind; use ruff_text_size::Ranged; use crate::checkers::ast::Checker; diff --git a/crates/ruff_linter/src/rules/ruff/rules/needless_else.rs b/crates/ruff_linter/src/rules/ruff/rules/needless_else.rs index a60b6bde2e..f8022372b8 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/needless_else.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/needless_else.rs @@ -2,9 +2,9 @@ use std::cmp::Ordering; use ruff_macros::{ViolationMetadata, derive_message_formats}; use ruff_python_ast::helpers::comment_indentation_after; +use ruff_python_ast::token::{TokenKind, Tokens}; use ruff_python_ast::whitespace::indentation; use ruff_python_ast::{Stmt, StmtExpr, StmtFor, StmtIf, StmtTry, StmtWhile}; -use ruff_python_parser::{TokenKind, Tokens}; use ruff_source_file::LineRanges; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; diff --git a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs index e5881bc0ed..72965e5d25 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs @@ -9,8 +9,8 @@ use std::cmp::Ordering; use itertools::Itertools; use ruff_python_ast as ast; +use ruff_python_ast::token::{TokenKind, Tokens}; use ruff_python_codegen::Stylist; -use ruff_python_parser::{TokenKind, Tokens}; use ruff_python_stdlib::str::is_cased_uppercase; use ruff_python_trivia::{SimpleTokenKind, first_non_trivia_token, leading_indentation}; use ruff_source_file::LineRanges; diff --git a/crates/ruff_linter/src/rules/ruff/rules/starmap_zip.rs b/crates/ruff_linter/src/rules/ruff/rules/starmap_zip.rs index f814bf980b..e9ed8d31bb 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/starmap_zip.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/starmap_zip.rs @@ -1,7 +1,7 @@ use ruff_macros::{ViolationMetadata, derive_message_formats}; use ruff_python_ast::PythonVersion; +use ruff_python_ast::token::TokenKind; use ruff_python_ast::{Expr, ExprCall, parenthesize::parenthesized_range}; -use ruff_python_parser::TokenKind; use ruff_text_size::{Ranged, TextRange}; use crate::checkers::ast::Checker; diff --git a/crates/ruff_python_ast/src/lib.rs b/crates/ruff_python_ast/src/lib.rs index 2642572e1b..5efa9a07e7 100644 --- a/crates/ruff_python_ast/src/lib.rs +++ b/crates/ruff_python_ast/src/lib.rs @@ -29,6 +29,7 @@ pub mod statement_visitor; pub mod stmt_if; pub mod str; pub mod str_prefix; +pub mod token; pub mod traversal; pub mod types; pub mod visitor; diff --git a/crates/ruff_python_ast/src/token.rs b/crates/ruff_python_ast/src/token.rs new file mode 100644 index 0000000000..fc1b62a366 --- /dev/null +++ b/crates/ruff_python_ast/src/token.rs @@ -0,0 +1,851 @@ +//! Token kinds for Python source code created by the lexer and consumed by the `ruff_python_parser`. +//! +//! This module defines the tokens that the lexer recognizes. The tokens are +//! loosely based on the token definitions found in the [CPython source]. +//! +//! [CPython source]: https://github.com/python/cpython/blob/dfc2e065a2e71011017077e549cd2f9bf4944c54/Grammar/Tokens + +use std::fmt; + +use bitflags::bitflags; + +use crate::str::{Quote, TripleQuotes}; +use crate::str_prefix::{ + AnyStringPrefix, ByteStringPrefix, FStringPrefix, StringLiteralPrefix, TStringPrefix, +}; +use crate::{AnyStringFlags, BoolOp, Operator, StringFlags, UnaryOp}; +use ruff_text_size::{Ranged, TextRange}; + +mod tokens; + +pub use tokens::{TokenAt, TokenIterWithContext, Tokens}; + +#[derive(Clone, Copy, PartialEq, Eq)] +#[cfg_attr(feature = "get-size", derive(get_size2::GetSize))] +pub struct Token { + /// The kind of the token. + kind: TokenKind, + /// The range of the token. + range: TextRange, + /// The set of flags describing this token. + flags: TokenFlags, +} + +impl Token { + pub fn new(kind: TokenKind, range: TextRange, flags: TokenFlags) -> Token { + Self { kind, range, flags } + } + + /// Returns the token kind. + #[inline] + pub const fn kind(&self) -> TokenKind { + self.kind + } + + /// Returns the token as a tuple of (kind, range). + #[inline] + pub const fn as_tuple(&self) -> (TokenKind, TextRange) { + (self.kind, self.range) + } + + /// Returns `true` if the current token is a triple-quoted string of any kind. + /// + /// # Panics + /// + /// If it isn't a string or any f/t-string tokens. + pub fn is_triple_quoted_string(self) -> bool { + self.unwrap_string_flags().is_triple_quoted() + } + + /// Returns the [`Quote`] style for the current string token of any kind. + /// + /// # Panics + /// + /// If it isn't a string or any f/t-string tokens. + pub fn string_quote_style(self) -> Quote { + self.unwrap_string_flags().quote_style() + } + + /// Returns the [`AnyStringFlags`] style for the current string token of any kind. + /// + /// # Panics + /// + /// If it isn't a string or any f/t-string tokens. + pub fn unwrap_string_flags(self) -> AnyStringFlags { + self.string_flags() + .unwrap_or_else(|| panic!("token to be a string")) + } + + /// Returns true if the current token is a string and it is raw. + pub fn string_flags(self) -> Option { + if self.is_any_string() { + Some(self.flags.as_any_string_flags()) + } else { + None + } + } + + /// Returns `true` if this is any kind of string token - including + /// tokens in t-strings (which do not have type `str`). + const fn is_any_string(self) -> bool { + matches!( + self.kind, + TokenKind::String + | TokenKind::FStringStart + | TokenKind::FStringMiddle + | TokenKind::FStringEnd + | TokenKind::TStringStart + | TokenKind::TStringMiddle + | TokenKind::TStringEnd + ) + } +} + +impl Ranged for Token { + fn range(&self) -> TextRange { + self.range + } +} + +impl fmt::Debug for Token { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:?} {:?}", self.kind, self.range)?; + if !self.flags.is_empty() { + f.write_str(" (flags = ")?; + let mut first = true; + for (name, _) in self.flags.iter_names() { + if first { + first = false; + } else { + f.write_str(" | ")?; + } + f.write_str(name)?; + } + f.write_str(")")?; + } + Ok(()) + } +} + +/// A kind of a token. +#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug, PartialOrd, Ord)] +#[cfg_attr(feature = "get-size", derive(get_size2::GetSize))] +pub enum TokenKind { + /// Token kind for a name, commonly known as an identifier. + Name, + /// Token kind for an integer. + Int, + /// Token kind for a floating point number. + Float, + /// Token kind for a complex number. + Complex, + /// Token kind for a string. + String, + /// Token kind for the start of an f-string. This includes the `f`/`F`/`fr` prefix + /// and the opening quote(s). + FStringStart, + /// Token kind that includes the portion of text inside the f-string that's not + /// part of the expression part and isn't an opening or closing brace. + FStringMiddle, + /// Token kind for the end of an f-string. This includes the closing quote. + FStringEnd, + /// Token kind for the start of a t-string. This includes the `t`/`T`/`tr` prefix + /// and the opening quote(s). + TStringStart, + /// Token kind that includes the portion of text inside the t-string that's not + /// part of the interpolation part and isn't an opening or closing brace. + TStringMiddle, + /// Token kind for the end of a t-string. This includes the closing quote. + TStringEnd, + /// Token kind for a IPython escape command. + IpyEscapeCommand, + /// Token kind for a comment. These are filtered out of the token stream prior to parsing. + Comment, + /// Token kind for a newline. + Newline, + /// Token kind for a newline that is not a logical line break. These are filtered out of + /// the token stream prior to parsing. + NonLogicalNewline, + /// Token kind for an indent. + Indent, + /// Token kind for a dedent. + Dedent, + EndOfFile, + /// Token kind for a question mark `?`. + Question, + /// Token kind for an exclamation mark `!`. + Exclamation, + /// Token kind for a left parenthesis `(`. + Lpar, + /// Token kind for a right parenthesis `)`. + Rpar, + /// Token kind for a left square bracket `[`. + Lsqb, + /// Token kind for a right square bracket `]`. + Rsqb, + /// Token kind for a colon `:`. + Colon, + /// Token kind for a comma `,`. + Comma, + /// Token kind for a semicolon `;`. + Semi, + /// Token kind for plus `+`. + Plus, + /// Token kind for minus `-`. + Minus, + /// Token kind for star `*`. + Star, + /// Token kind for slash `/`. + Slash, + /// Token kind for vertical bar `|`. + Vbar, + /// Token kind for ampersand `&`. + Amper, + /// Token kind for less than `<`. + Less, + /// Token kind for greater than `>`. + Greater, + /// Token kind for equal `=`. + Equal, + /// Token kind for dot `.`. + Dot, + /// Token kind for percent `%`. + Percent, + /// Token kind for left bracket `{`. + Lbrace, + /// Token kind for right bracket `}`. + Rbrace, + /// Token kind for double equal `==`. + EqEqual, + /// Token kind for not equal `!=`. + NotEqual, + /// Token kind for less than or equal `<=`. + LessEqual, + /// Token kind for greater than or equal `>=`. + GreaterEqual, + /// Token kind for tilde `~`. + Tilde, + /// Token kind for caret `^`. + CircumFlex, + /// Token kind for left shift `<<`. + LeftShift, + /// Token kind for right shift `>>`. + RightShift, + /// Token kind for double star `**`. + DoubleStar, + /// Token kind for double star equal `**=`. + DoubleStarEqual, + /// Token kind for plus equal `+=`. + PlusEqual, + /// Token kind for minus equal `-=`. + MinusEqual, + /// Token kind for star equal `*=`. + StarEqual, + /// Token kind for slash equal `/=`. + SlashEqual, + /// Token kind for percent equal `%=`. + PercentEqual, + /// Token kind for ampersand equal `&=`. + AmperEqual, + /// Token kind for vertical bar equal `|=`. + VbarEqual, + /// Token kind for caret equal `^=`. + CircumflexEqual, + /// Token kind for left shift equal `<<=`. + LeftShiftEqual, + /// Token kind for right shift equal `>>=`. + RightShiftEqual, + /// Token kind for double slash `//`. + DoubleSlash, + /// Token kind for double slash equal `//=`. + DoubleSlashEqual, + /// Token kind for colon equal `:=`. + ColonEqual, + /// Token kind for at `@`. + At, + /// Token kind for at equal `@=`. + AtEqual, + /// Token kind for arrow `->`. + Rarrow, + /// Token kind for ellipsis `...`. + Ellipsis, + + // The keywords should be sorted in alphabetical order. If the boundary tokens for the + // "Keywords" and "Soft keywords" group change, update the related methods on `TokenKind`. + + // Keywords + And, + As, + Assert, + Async, + Await, + Break, + Class, + Continue, + Def, + Del, + Elif, + Else, + Except, + False, + Finally, + For, + From, + Global, + If, + Import, + In, + Is, + Lambda, + None, + Nonlocal, + Not, + Or, + Pass, + Raise, + Return, + True, + Try, + While, + With, + Yield, + + // Soft keywords + Case, + Match, + Type, + + Unknown, +} + +impl TokenKind { + /// Returns `true` if this is an end of file token. + #[inline] + pub const fn is_eof(self) -> bool { + matches!(self, TokenKind::EndOfFile) + } + + /// Returns `true` if this is either a newline or non-logical newline token. + #[inline] + pub const fn is_any_newline(self) -> bool { + matches!(self, TokenKind::Newline | TokenKind::NonLogicalNewline) + } + + /// Returns `true` if the token is a keyword (including soft keywords). + /// + /// See also [`is_soft_keyword`], [`is_non_soft_keyword`]. + /// + /// [`is_soft_keyword`]: TokenKind::is_soft_keyword + /// [`is_non_soft_keyword`]: TokenKind::is_non_soft_keyword + #[inline] + pub fn is_keyword(self) -> bool { + TokenKind::And <= self && self <= TokenKind::Type + } + + /// Returns `true` if the token is strictly a soft keyword. + /// + /// See also [`is_keyword`], [`is_non_soft_keyword`]. + /// + /// [`is_keyword`]: TokenKind::is_keyword + /// [`is_non_soft_keyword`]: TokenKind::is_non_soft_keyword + #[inline] + pub fn is_soft_keyword(self) -> bool { + TokenKind::Case <= self && self <= TokenKind::Type + } + + /// Returns `true` if the token is strictly a non-soft keyword. + /// + /// See also [`is_keyword`], [`is_soft_keyword`]. + /// + /// [`is_keyword`]: TokenKind::is_keyword + /// [`is_soft_keyword`]: TokenKind::is_soft_keyword + #[inline] + pub fn is_non_soft_keyword(self) -> bool { + TokenKind::And <= self && self <= TokenKind::Yield + } + + #[inline] + pub const fn is_operator(self) -> bool { + matches!( + self, + TokenKind::Lpar + | TokenKind::Rpar + | TokenKind::Lsqb + | TokenKind::Rsqb + | TokenKind::Comma + | TokenKind::Semi + | TokenKind::Plus + | TokenKind::Minus + | TokenKind::Star + | TokenKind::Slash + | TokenKind::Vbar + | TokenKind::Amper + | TokenKind::Less + | TokenKind::Greater + | TokenKind::Equal + | TokenKind::Dot + | TokenKind::Percent + | TokenKind::Lbrace + | TokenKind::Rbrace + | TokenKind::EqEqual + | TokenKind::NotEqual + | TokenKind::LessEqual + | TokenKind::GreaterEqual + | TokenKind::Tilde + | TokenKind::CircumFlex + | TokenKind::LeftShift + | TokenKind::RightShift + | TokenKind::DoubleStar + | TokenKind::PlusEqual + | TokenKind::MinusEqual + | TokenKind::StarEqual + | TokenKind::SlashEqual + | TokenKind::PercentEqual + | TokenKind::AmperEqual + | TokenKind::VbarEqual + | TokenKind::CircumflexEqual + | TokenKind::LeftShiftEqual + | TokenKind::RightShiftEqual + | TokenKind::DoubleStarEqual + | TokenKind::DoubleSlash + | TokenKind::DoubleSlashEqual + | TokenKind::At + | TokenKind::AtEqual + | TokenKind::Rarrow + | TokenKind::Ellipsis + | TokenKind::ColonEqual + | TokenKind::Colon + | TokenKind::And + | TokenKind::Or + | TokenKind::Not + | TokenKind::In + | TokenKind::Is + ) + } + + /// Returns `true` if this is a singleton token i.e., `True`, `False`, or `None`. + #[inline] + pub const fn is_singleton(self) -> bool { + matches!(self, TokenKind::False | TokenKind::True | TokenKind::None) + } + + /// Returns `true` if this is a trivia token i.e., a comment or a non-logical newline. + #[inline] + pub const fn is_trivia(&self) -> bool { + matches!(self, TokenKind::Comment | TokenKind::NonLogicalNewline) + } + + /// Returns `true` if this is a comment token. + #[inline] + pub const fn is_comment(&self) -> bool { + matches!(self, TokenKind::Comment) + } + + #[inline] + pub const fn is_arithmetic(self) -> bool { + matches!( + self, + TokenKind::DoubleStar + | TokenKind::Star + | TokenKind::Plus + | TokenKind::Minus + | TokenKind::Slash + | TokenKind::DoubleSlash + | TokenKind::At + ) + } + + #[inline] + pub const fn is_bitwise_or_shift(self) -> bool { + matches!( + self, + TokenKind::LeftShift + | TokenKind::LeftShiftEqual + | TokenKind::RightShift + | TokenKind::RightShiftEqual + | TokenKind::Amper + | TokenKind::AmperEqual + | TokenKind::Vbar + | TokenKind::VbarEqual + | TokenKind::CircumFlex + | TokenKind::CircumflexEqual + | TokenKind::Tilde + ) + } + + /// Returns `true` if the current token is a unary arithmetic operator. + #[inline] + pub const fn is_unary_arithmetic_operator(self) -> bool { + matches!(self, TokenKind::Plus | TokenKind::Minus) + } + + #[inline] + pub const fn is_interpolated_string_end(self) -> bool { + matches!(self, TokenKind::FStringEnd | TokenKind::TStringEnd) + } + + /// Returns the [`UnaryOp`] that corresponds to this token kind, if it is a unary arithmetic + /// operator, otherwise return [None]. + /// + /// Use [`as_unary_operator`] to match against any unary operator. + /// + /// [`as_unary_operator`]: TokenKind::as_unary_operator + #[inline] + pub const fn as_unary_arithmetic_operator(self) -> Option { + Some(match self { + TokenKind::Plus => UnaryOp::UAdd, + TokenKind::Minus => UnaryOp::USub, + _ => return None, + }) + } + + /// Returns the [`UnaryOp`] that corresponds to this token kind, if it is a unary operator, + /// otherwise return [None]. + /// + /// Use [`as_unary_arithmetic_operator`] to match against only an arithmetic unary operator. + /// + /// [`as_unary_arithmetic_operator`]: TokenKind::as_unary_arithmetic_operator + #[inline] + pub const fn as_unary_operator(self) -> Option { + Some(match self { + TokenKind::Plus => UnaryOp::UAdd, + TokenKind::Minus => UnaryOp::USub, + TokenKind::Tilde => UnaryOp::Invert, + TokenKind::Not => UnaryOp::Not, + _ => return None, + }) + } + + /// Returns the [`BoolOp`] that corresponds to this token kind, if it is a boolean operator, + /// otherwise return [None]. + #[inline] + pub const fn as_bool_operator(self) -> Option { + Some(match self { + TokenKind::And => BoolOp::And, + TokenKind::Or => BoolOp::Or, + _ => return None, + }) + } + + /// Returns the binary [`Operator`] that corresponds to the current token, if it's a binary + /// operator, otherwise return [None]. + /// + /// Use [`as_augmented_assign_operator`] to match against an augmented assignment token. + /// + /// [`as_augmented_assign_operator`]: TokenKind::as_augmented_assign_operator + pub const fn as_binary_operator(self) -> Option { + Some(match self { + TokenKind::Plus => Operator::Add, + TokenKind::Minus => Operator::Sub, + TokenKind::Star => Operator::Mult, + TokenKind::At => Operator::MatMult, + TokenKind::DoubleStar => Operator::Pow, + TokenKind::Slash => Operator::Div, + TokenKind::DoubleSlash => Operator::FloorDiv, + TokenKind::Percent => Operator::Mod, + TokenKind::Amper => Operator::BitAnd, + TokenKind::Vbar => Operator::BitOr, + TokenKind::CircumFlex => Operator::BitXor, + TokenKind::LeftShift => Operator::LShift, + TokenKind::RightShift => Operator::RShift, + _ => return None, + }) + } + + /// Returns the [`Operator`] that corresponds to this token kind, if it is + /// an augmented assignment operator, or [`None`] otherwise. + #[inline] + pub const fn as_augmented_assign_operator(self) -> Option { + Some(match self { + TokenKind::PlusEqual => Operator::Add, + TokenKind::MinusEqual => Operator::Sub, + TokenKind::StarEqual => Operator::Mult, + TokenKind::AtEqual => Operator::MatMult, + TokenKind::DoubleStarEqual => Operator::Pow, + TokenKind::SlashEqual => Operator::Div, + TokenKind::DoubleSlashEqual => Operator::FloorDiv, + TokenKind::PercentEqual => Operator::Mod, + TokenKind::AmperEqual => Operator::BitAnd, + TokenKind::VbarEqual => Operator::BitOr, + TokenKind::CircumflexEqual => Operator::BitXor, + TokenKind::LeftShiftEqual => Operator::LShift, + TokenKind::RightShiftEqual => Operator::RShift, + _ => return None, + }) + } +} + +impl From for TokenKind { + #[inline] + fn from(op: BoolOp) -> Self { + match op { + BoolOp::And => TokenKind::And, + BoolOp::Or => TokenKind::Or, + } + } +} + +impl From for TokenKind { + #[inline] + fn from(op: UnaryOp) -> Self { + match op { + UnaryOp::Invert => TokenKind::Tilde, + UnaryOp::Not => TokenKind::Not, + UnaryOp::UAdd => TokenKind::Plus, + UnaryOp::USub => TokenKind::Minus, + } + } +} + +impl From for TokenKind { + #[inline] + fn from(op: Operator) -> Self { + match op { + Operator::Add => TokenKind::Plus, + Operator::Sub => TokenKind::Minus, + Operator::Mult => TokenKind::Star, + Operator::MatMult => TokenKind::At, + Operator::Div => TokenKind::Slash, + Operator::Mod => TokenKind::Percent, + Operator::Pow => TokenKind::DoubleStar, + Operator::LShift => TokenKind::LeftShift, + Operator::RShift => TokenKind::RightShift, + Operator::BitOr => TokenKind::Vbar, + Operator::BitXor => TokenKind::CircumFlex, + Operator::BitAnd => TokenKind::Amper, + Operator::FloorDiv => TokenKind::DoubleSlash, + } + } +} + +impl fmt::Display for TokenKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let value = match self { + TokenKind::Unknown => "Unknown", + TokenKind::Newline => "newline", + TokenKind::NonLogicalNewline => "NonLogicalNewline", + TokenKind::Indent => "indent", + TokenKind::Dedent => "dedent", + TokenKind::EndOfFile => "end of file", + TokenKind::Name => "name", + TokenKind::Int => "int", + TokenKind::Float => "float", + TokenKind::Complex => "complex", + TokenKind::String => "string", + TokenKind::FStringStart => "FStringStart", + TokenKind::FStringMiddle => "FStringMiddle", + TokenKind::FStringEnd => "FStringEnd", + TokenKind::TStringStart => "TStringStart", + TokenKind::TStringMiddle => "TStringMiddle", + TokenKind::TStringEnd => "TStringEnd", + TokenKind::IpyEscapeCommand => "IPython escape command", + TokenKind::Comment => "comment", + TokenKind::Question => "`?`", + TokenKind::Exclamation => "`!`", + TokenKind::Lpar => "`(`", + TokenKind::Rpar => "`)`", + TokenKind::Lsqb => "`[`", + TokenKind::Rsqb => "`]`", + TokenKind::Lbrace => "`{`", + TokenKind::Rbrace => "`}`", + TokenKind::Equal => "`=`", + TokenKind::ColonEqual => "`:=`", + TokenKind::Dot => "`.`", + TokenKind::Colon => "`:`", + TokenKind::Semi => "`;`", + TokenKind::Comma => "`,`", + TokenKind::Rarrow => "`->`", + TokenKind::Plus => "`+`", + TokenKind::Minus => "`-`", + TokenKind::Star => "`*`", + TokenKind::DoubleStar => "`**`", + TokenKind::Slash => "`/`", + TokenKind::DoubleSlash => "`//`", + TokenKind::Percent => "`%`", + TokenKind::Vbar => "`|`", + TokenKind::Amper => "`&`", + TokenKind::CircumFlex => "`^`", + TokenKind::LeftShift => "`<<`", + TokenKind::RightShift => "`>>`", + TokenKind::Tilde => "`~`", + TokenKind::At => "`@`", + TokenKind::Less => "`<`", + TokenKind::Greater => "`>`", + TokenKind::EqEqual => "`==`", + TokenKind::NotEqual => "`!=`", + TokenKind::LessEqual => "`<=`", + TokenKind::GreaterEqual => "`>=`", + TokenKind::PlusEqual => "`+=`", + TokenKind::MinusEqual => "`-=`", + TokenKind::StarEqual => "`*=`", + TokenKind::DoubleStarEqual => "`**=`", + TokenKind::SlashEqual => "`/=`", + TokenKind::DoubleSlashEqual => "`//=`", + TokenKind::PercentEqual => "`%=`", + TokenKind::VbarEqual => "`|=`", + TokenKind::AmperEqual => "`&=`", + TokenKind::CircumflexEqual => "`^=`", + TokenKind::LeftShiftEqual => "`<<=`", + TokenKind::RightShiftEqual => "`>>=`", + TokenKind::AtEqual => "`@=`", + TokenKind::Ellipsis => "`...`", + TokenKind::False => "`False`", + TokenKind::None => "`None`", + TokenKind::True => "`True`", + TokenKind::And => "`and`", + TokenKind::As => "`as`", + TokenKind::Assert => "`assert`", + TokenKind::Async => "`async`", + TokenKind::Await => "`await`", + TokenKind::Break => "`break`", + TokenKind::Class => "`class`", + TokenKind::Continue => "`continue`", + TokenKind::Def => "`def`", + TokenKind::Del => "`del`", + TokenKind::Elif => "`elif`", + TokenKind::Else => "`else`", + TokenKind::Except => "`except`", + TokenKind::Finally => "`finally`", + TokenKind::For => "`for`", + TokenKind::From => "`from`", + TokenKind::Global => "`global`", + TokenKind::If => "`if`", + TokenKind::Import => "`import`", + TokenKind::In => "`in`", + TokenKind::Is => "`is`", + TokenKind::Lambda => "`lambda`", + TokenKind::Nonlocal => "`nonlocal`", + TokenKind::Not => "`not`", + TokenKind::Or => "`or`", + TokenKind::Pass => "`pass`", + TokenKind::Raise => "`raise`", + TokenKind::Return => "`return`", + TokenKind::Try => "`try`", + TokenKind::While => "`while`", + TokenKind::Match => "`match`", + TokenKind::Type => "`type`", + TokenKind::Case => "`case`", + TokenKind::With => "`with`", + TokenKind::Yield => "`yield`", + }; + f.write_str(value) + } +} + +bitflags! { + #[derive(Clone, Copy, Debug, PartialEq, Eq)] + pub struct TokenFlags: u16 { + /// The token is a string with double quotes (`"`). + const DOUBLE_QUOTES = 1 << 0; + /// The token is a triple-quoted string i.e., it starts and ends with three consecutive + /// quote characters (`"""` or `'''`). + const TRIPLE_QUOTED_STRING = 1 << 1; + + /// The token is a unicode string i.e., prefixed with `u` or `U` + const UNICODE_STRING = 1 << 2; + /// The token is a byte string i.e., prefixed with `b` or `B` + const BYTE_STRING = 1 << 3; + /// The token is an f-string i.e., prefixed with `f` or `F` + const F_STRING = 1 << 4; + /// The token is a t-string i.e., prefixed with `t` or `T` + const T_STRING = 1 << 5; + /// The token is a raw string and the prefix character is in lowercase. + const RAW_STRING_LOWERCASE = 1 << 6; + /// The token is a raw string and the prefix character is in uppercase. + const RAW_STRING_UPPERCASE = 1 << 7; + /// String without matching closing quote(s) + const UNCLOSED_STRING = 1 << 8; + + /// The token is a raw string i.e., prefixed with `r` or `R` + const RAW_STRING = Self::RAW_STRING_LOWERCASE.bits() | Self::RAW_STRING_UPPERCASE.bits(); + + } +} + +#[cfg(feature = "get-size")] +impl get_size2::GetSize for TokenFlags {} + +impl StringFlags for TokenFlags { + fn quote_style(self) -> Quote { + if self.intersects(TokenFlags::DOUBLE_QUOTES) { + Quote::Double + } else { + Quote::Single + } + } + + fn triple_quotes(self) -> TripleQuotes { + if self.intersects(TokenFlags::TRIPLE_QUOTED_STRING) { + TripleQuotes::Yes + } else { + TripleQuotes::No + } + } + + fn prefix(self) -> AnyStringPrefix { + if self.intersects(TokenFlags::F_STRING) { + if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) { + AnyStringPrefix::Format(FStringPrefix::Raw { uppercase_r: false }) + } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) { + AnyStringPrefix::Format(FStringPrefix::Raw { uppercase_r: true }) + } else { + AnyStringPrefix::Format(FStringPrefix::Regular) + } + } else if self.intersects(TokenFlags::T_STRING) { + if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) { + AnyStringPrefix::Template(TStringPrefix::Raw { uppercase_r: false }) + } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) { + AnyStringPrefix::Template(TStringPrefix::Raw { uppercase_r: true }) + } else { + AnyStringPrefix::Template(TStringPrefix::Regular) + } + } else if self.intersects(TokenFlags::BYTE_STRING) { + if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) { + AnyStringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: false }) + } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) { + AnyStringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: true }) + } else { + AnyStringPrefix::Bytes(ByteStringPrefix::Regular) + } + } else if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) { + AnyStringPrefix::Regular(StringLiteralPrefix::Raw { uppercase: false }) + } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) { + AnyStringPrefix::Regular(StringLiteralPrefix::Raw { uppercase: true }) + } else if self.intersects(TokenFlags::UNICODE_STRING) { + AnyStringPrefix::Regular(StringLiteralPrefix::Unicode) + } else { + AnyStringPrefix::Regular(StringLiteralPrefix::Empty) + } + } + + fn is_unclosed(self) -> bool { + self.intersects(TokenFlags::UNCLOSED_STRING) + } +} + +impl TokenFlags { + /// Returns `true` if the token is an f-string. + pub const fn is_f_string(self) -> bool { + self.intersects(TokenFlags::F_STRING) + } + + /// Returns `true` if the token is a t-string. + pub const fn is_t_string(self) -> bool { + self.intersects(TokenFlags::T_STRING) + } + + /// Returns `true` if the token is a t-string. + pub const fn is_interpolated_string(self) -> bool { + self.intersects(TokenFlags::T_STRING.union(TokenFlags::F_STRING)) + } + + /// Returns `true` if the token is a triple-quoted t-string. + pub fn is_triple_quoted_interpolated_string(self) -> bool { + self.intersects(TokenFlags::TRIPLE_QUOTED_STRING) && self.is_interpolated_string() + } + + /// Returns `true` if the token is a raw string. + pub const fn is_raw_string(self) -> bool { + self.intersects(TokenFlags::RAW_STRING) + } +} diff --git a/crates/ruff_python_ast/src/token/tokens.rs b/crates/ruff_python_ast/src/token/tokens.rs new file mode 100644 index 0000000000..edc7e27463 --- /dev/null +++ b/crates/ruff_python_ast/src/token/tokens.rs @@ -0,0 +1,520 @@ +use std::{iter::FusedIterator, ops::Deref}; + +use super::{Token, TokenKind}; +use ruff_python_trivia::CommentRanges; +use ruff_text_size::{Ranged as _, TextRange, TextSize}; + +/// Tokens represents a vector of lexed [`Token`]. +#[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(feature = "get-size", derive(get_size2::GetSize))] +pub struct Tokens { + raw: Vec, +} + +impl Tokens { + pub fn new(tokens: Vec) -> Tokens { + Tokens { raw: tokens } + } + + /// Returns an iterator over all the tokens that provides context. + pub fn iter_with_context(&self) -> TokenIterWithContext<'_> { + TokenIterWithContext::new(&self.raw) + } + + /// Performs a binary search to find the index of the **first** token that starts at the given `offset`. + /// + /// Unlike `binary_search_by_key`, this method ensures that if multiple tokens start at the same offset, + /// it returns the index of the first one. Multiple tokens can start at the same offset in cases where + /// zero-length tokens are involved (like `Dedent` or `Newline` at the end of the file). + pub fn binary_search_by_start(&self, offset: TextSize) -> Result { + let partition_point = self.partition_point(|token| token.start() < offset); + + let after = &self[partition_point..]; + + if after.first().is_some_and(|first| first.start() == offset) { + Ok(partition_point) + } else { + Err(partition_point) + } + } + + /// Returns a slice of [`Token`] that are within the given `range`. + /// + /// The start and end offset of the given range should be either: + /// 1. Token boundary + /// 2. Gap between the tokens + /// + /// For example, considering the following tokens and their corresponding range: + /// + /// | Token | Range | + /// |---------------------|-----------| + /// | `Def` | `0..3` | + /// | `Name` | `4..7` | + /// | `Lpar` | `7..8` | + /// | `Rpar` | `8..9` | + /// | `Colon` | `9..10` | + /// | `Newline` | `10..11` | + /// | `Comment` | `15..24` | + /// | `NonLogicalNewline` | `24..25` | + /// | `Indent` | `25..29` | + /// | `Pass` | `29..33` | + /// + /// Here, for (1) a token boundary is considered either the start or end offset of any of the + /// above tokens. For (2), the gap would be any offset between the `Newline` and `Comment` + /// token which are 12, 13, and 14. + /// + /// Examples: + /// 1) `4..10` would give `Name`, `Lpar`, `Rpar`, `Colon` + /// 2) `11..25` would give `Comment`, `NonLogicalNewline` + /// 3) `12..25` would give same as (2) and offset 12 is in the "gap" + /// 4) `9..12` would give `Colon`, `Newline` and offset 12 is in the "gap" + /// 5) `18..27` would panic because both the start and end offset is within a token + /// + /// ## Note + /// + /// The returned slice can contain the [`TokenKind::Unknown`] token if there was a lexical + /// error encountered within the given range. + /// + /// # Panics + /// + /// If either the start or end offset of the given range is within a token range. + pub fn in_range(&self, range: TextRange) -> &[Token] { + let tokens_after_start = self.after(range.start()); + + Self::before_impl(tokens_after_start, range.end()) + } + + /// Searches the token(s) at `offset`. + /// + /// Returns [`TokenAt::Between`] if `offset` points directly inbetween two tokens + /// (the left token ends at `offset` and the right token starts at `offset`). + pub fn at_offset(&self, offset: TextSize) -> TokenAt { + match self.binary_search_by_start(offset) { + // The token at `index` starts exactly at `offset. + // ```python + // object.attribute + // ^ OFFSET + // ``` + Ok(index) => { + let token = self[index]; + // `token` starts exactly at `offset`. Test if the offset is right between + // `token` and the previous token (if there's any) + if let Some(previous) = index.checked_sub(1).map(|idx| self[idx]) { + if previous.end() == offset { + return TokenAt::Between(previous, token); + } + } + + TokenAt::Single(token) + } + + // No token found that starts exactly at the given offset. But it's possible that + // the token starting before `offset` fully encloses `offset` (it's end range ends after `offset`). + // ```python + // object.attribute + // ^ OFFSET + // # or + // if True: + // print("test") + // ^ OFFSET + // ``` + Err(index) => { + if let Some(previous) = index.checked_sub(1).map(|idx| self[idx]) { + if previous.range().contains_inclusive(offset) { + return TokenAt::Single(previous); + } + } + + TokenAt::None + } + } + } + + /// Returns a slice of tokens before the given [`TextSize`] offset. + /// + /// If the given offset is between two tokens, the returned slice will end just before the + /// following token. In other words, if the offset is between the end of previous token and + /// start of next token, the returned slice will end just before the next token. + /// + /// # Panics + /// + /// If the given offset is inside a token range at any point + /// other than the start of the range. + pub fn before(&self, offset: TextSize) -> &[Token] { + Self::before_impl(&self.raw, offset) + } + + fn before_impl(tokens: &[Token], offset: TextSize) -> &[Token] { + let partition_point = tokens.partition_point(|token| token.start() < offset); + let before = &tokens[..partition_point]; + + if let Some(last) = before.last() { + // If it's equal to the end offset, then it's at a token boundary which is + // valid. If it's greater than the end offset, then it's in the gap between + // the tokens which is valid as well. + assert!( + offset >= last.end(), + "Offset {:?} is inside a token range {:?}", + offset, + last.range() + ); + } + before + } + + /// Returns a slice of tokens after the given [`TextSize`] offset. + /// + /// If the given offset is between two tokens, the returned slice will start from the following + /// token. In other words, if the offset is between the end of previous token and start of next + /// token, the returned slice will start from the next token. + /// + /// # Panics + /// + /// If the given offset is inside a token range at any point + /// other than the start of the range. + pub fn after(&self, offset: TextSize) -> &[Token] { + let partition_point = self.partition_point(|token| token.end() <= offset); + let after = &self[partition_point..]; + + if let Some(first) = after.first() { + // valid. If it's greater than the end offset, then it's in the gap between + // the tokens which is valid as well. + assert!( + offset <= first.start(), + "Offset {:?} is inside a token range {:?}", + offset, + first.range() + ); + } + + after + } +} + +impl<'a> IntoIterator for &'a Tokens { + type Item = &'a Token; + type IntoIter = std::slice::Iter<'a, Token>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl Deref for Tokens { + type Target = [Token]; + + fn deref(&self) -> &Self::Target { + &self.raw + } +} + +/// A token that encloses a given offset or ends exactly at it. +#[derive(Debug, Clone)] +pub enum TokenAt { + /// There's no token at the given offset + None, + + /// There's a single token at the given offset. + Single(Token), + + /// The offset falls exactly between two tokens. E.g. `CURSOR` in `call(arguments)` is + /// positioned exactly between the `call` and `(` tokens. + Between(Token, Token), +} + +impl Iterator for TokenAt { + type Item = Token; + + fn next(&mut self) -> Option { + match *self { + TokenAt::None => None, + TokenAt::Single(token) => { + *self = TokenAt::None; + Some(token) + } + TokenAt::Between(first, second) => { + *self = TokenAt::Single(second); + Some(first) + } + } + } +} + +impl FusedIterator for TokenAt {} + +impl From<&Tokens> for CommentRanges { + fn from(tokens: &Tokens) -> Self { + let mut ranges = vec![]; + for token in tokens { + if token.kind() == TokenKind::Comment { + ranges.push(token.range()); + } + } + CommentRanges::new(ranges) + } +} + +/// An iterator over the [`Token`]s with context. +/// +/// This struct is created by the [`iter_with_context`] method on [`Tokens`]. Refer to its +/// documentation for more details. +/// +/// [`iter_with_context`]: Tokens::iter_with_context +#[derive(Debug, Clone)] +pub struct TokenIterWithContext<'a> { + inner: std::slice::Iter<'a, Token>, + nesting: u32, +} + +impl<'a> TokenIterWithContext<'a> { + fn new(tokens: &'a [Token]) -> TokenIterWithContext<'a> { + TokenIterWithContext { + inner: tokens.iter(), + nesting: 0, + } + } + + /// Return the nesting level the iterator is currently in. + pub const fn nesting(&self) -> u32 { + self.nesting + } + + /// Returns `true` if the iterator is within a parenthesized context. + pub const fn in_parenthesized_context(&self) -> bool { + self.nesting > 0 + } + + /// Returns the next [`Token`] in the iterator without consuming it. + pub fn peek(&self) -> Option<&'a Token> { + self.clone().next() + } +} + +impl<'a> Iterator for TokenIterWithContext<'a> { + type Item = &'a Token; + + fn next(&mut self) -> Option { + let token = self.inner.next()?; + + match token.kind() { + TokenKind::Lpar | TokenKind::Lbrace | TokenKind::Lsqb => self.nesting += 1, + TokenKind::Rpar | TokenKind::Rbrace | TokenKind::Rsqb => { + self.nesting = self.nesting.saturating_sub(1); + } + // This mimics the behavior of re-lexing which reduces the nesting level on the lexer. + // We don't need to reduce it by 1 because unlike the lexer we see the final token + // after recovering from every unclosed parenthesis. + TokenKind::Newline if self.nesting > 0 => { + self.nesting = 0; + } + _ => {} + } + + Some(token) + } +} + +impl FusedIterator for TokenIterWithContext<'_> {} + +#[cfg(test)] +mod tests { + use std::ops::Range; + + use ruff_text_size::TextSize; + + use crate::token::{Token, TokenFlags, TokenKind}; + + use super::*; + + /// Test case containing a "gap" between two tokens. + /// + /// Code: + const TEST_CASE_WITH_GAP: [(TokenKind, Range); 10] = [ + (TokenKind::Def, 0..3), + (TokenKind::Name, 4..7), + (TokenKind::Lpar, 7..8), + (TokenKind::Rpar, 8..9), + (TokenKind::Colon, 9..10), + (TokenKind::Newline, 10..11), + // Gap ||..|| + (TokenKind::Comment, 15..24), + (TokenKind::NonLogicalNewline, 24..25), + (TokenKind::Indent, 25..29), + (TokenKind::Pass, 29..33), + // No newline at the end to keep the token set full of unique tokens + ]; + + /// Helper function to create [`Tokens`] from an iterator of (kind, range). + fn new_tokens(tokens: impl Iterator)>) -> Tokens { + Tokens::new( + tokens + .map(|(kind, range)| { + Token::new( + kind, + TextRange::new(TextSize::new(range.start), TextSize::new(range.end)), + TokenFlags::empty(), + ) + }) + .collect(), + ) + } + + #[test] + fn tokens_after_offset_at_token_start() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let after = tokens.after(TextSize::new(8)); + assert_eq!(after.len(), 7); + assert_eq!(after.first().unwrap().kind(), TokenKind::Rpar); + } + + #[test] + fn tokens_after_offset_at_token_end() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let after = tokens.after(TextSize::new(11)); + assert_eq!(after.len(), 4); + assert_eq!(after.first().unwrap().kind(), TokenKind::Comment); + } + + #[test] + fn tokens_after_offset_between_tokens() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let after = tokens.after(TextSize::new(13)); + assert_eq!(after.len(), 4); + assert_eq!(after.first().unwrap().kind(), TokenKind::Comment); + } + + #[test] + fn tokens_after_offset_at_last_token_end() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let after = tokens.after(TextSize::new(33)); + assert_eq!(after.len(), 0); + } + + #[test] + #[should_panic(expected = "Offset 5 is inside a token range 4..7")] + fn tokens_after_offset_inside_token() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + tokens.after(TextSize::new(5)); + } + + #[test] + fn tokens_before_offset_at_first_token_start() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let before = tokens.before(TextSize::new(0)); + assert_eq!(before.len(), 0); + } + + #[test] + fn tokens_before_offset_after_first_token_gap() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let before = tokens.before(TextSize::new(3)); + assert_eq!(before.len(), 1); + assert_eq!(before.last().unwrap().kind(), TokenKind::Def); + } + + #[test] + fn tokens_before_offset_at_second_token_start() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let before = tokens.before(TextSize::new(4)); + assert_eq!(before.len(), 1); + assert_eq!(before.last().unwrap().kind(), TokenKind::Def); + } + + #[test] + fn tokens_before_offset_at_token_start() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let before = tokens.before(TextSize::new(8)); + assert_eq!(before.len(), 3); + assert_eq!(before.last().unwrap().kind(), TokenKind::Lpar); + } + + #[test] + fn tokens_before_offset_at_token_end() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let before = tokens.before(TextSize::new(11)); + assert_eq!(before.len(), 6); + assert_eq!(before.last().unwrap().kind(), TokenKind::Newline); + } + + #[test] + fn tokens_before_offset_between_tokens() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let before = tokens.before(TextSize::new(13)); + assert_eq!(before.len(), 6); + assert_eq!(before.last().unwrap().kind(), TokenKind::Newline); + } + + #[test] + fn tokens_before_offset_at_last_token_end() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let before = tokens.before(TextSize::new(33)); + assert_eq!(before.len(), 10); + assert_eq!(before.last().unwrap().kind(), TokenKind::Pass); + } + + #[test] + #[should_panic(expected = "Offset 5 is inside a token range 4..7")] + fn tokens_before_offset_inside_token() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + tokens.before(TextSize::new(5)); + } + + #[test] + fn tokens_in_range_at_token_offset() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let in_range = tokens.in_range(TextRange::new(4.into(), 10.into())); + assert_eq!(in_range.len(), 4); + assert_eq!(in_range.first().unwrap().kind(), TokenKind::Name); + assert_eq!(in_range.last().unwrap().kind(), TokenKind::Colon); + } + + #[test] + fn tokens_in_range_start_offset_at_token_end() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let in_range = tokens.in_range(TextRange::new(11.into(), 29.into())); + assert_eq!(in_range.len(), 3); + assert_eq!(in_range.first().unwrap().kind(), TokenKind::Comment); + assert_eq!(in_range.last().unwrap().kind(), TokenKind::Indent); + } + + #[test] + fn tokens_in_range_end_offset_at_token_start() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let in_range = tokens.in_range(TextRange::new(8.into(), 15.into())); + assert_eq!(in_range.len(), 3); + assert_eq!(in_range.first().unwrap().kind(), TokenKind::Rpar); + assert_eq!(in_range.last().unwrap().kind(), TokenKind::Newline); + } + + #[test] + fn tokens_in_range_start_offset_between_tokens() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let in_range = tokens.in_range(TextRange::new(13.into(), 29.into())); + assert_eq!(in_range.len(), 3); + assert_eq!(in_range.first().unwrap().kind(), TokenKind::Comment); + assert_eq!(in_range.last().unwrap().kind(), TokenKind::Indent); + } + + #[test] + fn tokens_in_range_end_offset_between_tokens() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + let in_range = tokens.in_range(TextRange::new(9.into(), 13.into())); + assert_eq!(in_range.len(), 2); + assert_eq!(in_range.first().unwrap().kind(), TokenKind::Colon); + assert_eq!(in_range.last().unwrap().kind(), TokenKind::Newline); + } + + #[test] + #[should_panic(expected = "Offset 5 is inside a token range 4..7")] + fn tokens_in_range_start_offset_inside_token() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + tokens.in_range(TextRange::new(5.into(), 10.into())); + } + + #[test] + #[should_panic(expected = "Offset 6 is inside a token range 4..7")] + fn tokens_in_range_end_offset_inside_token() { + let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); + tokens.in_range(TextRange::new(0.into(), 6.into())); + } +} diff --git a/crates/ruff_python_codegen/src/stylist.rs b/crates/ruff_python_codegen/src/stylist.rs index 8daf9b3a4f..06e5b27407 100644 --- a/crates/ruff_python_codegen/src/stylist.rs +++ b/crates/ruff_python_codegen/src/stylist.rs @@ -5,7 +5,7 @@ use std::cell::OnceCell; use std::ops::Deref; use ruff_python_ast::str::Quote; -use ruff_python_parser::{Token, TokenKind, Tokens}; +use ruff_python_ast::token::{Token, TokenKind, Tokens}; use ruff_source_file::{LineEnding, LineRanges, find_newline}; use ruff_text_size::Ranged; diff --git a/crates/ruff_python_formatter/src/context.rs b/crates/ruff_python_formatter/src/context.rs index 528afc6c71..239edc8d5b 100644 --- a/crates/ruff_python_formatter/src/context.rs +++ b/crates/ruff_python_formatter/src/context.rs @@ -3,7 +3,7 @@ use std::ops::{Deref, DerefMut}; use ruff_formatter::{Buffer, FormatContext, GroupId, IndentWidth, SourceCode}; use ruff_python_ast::str::Quote; -use ruff_python_parser::Tokens; +use ruff_python_ast::token::Tokens; use crate::PyFormatOptions; use crate::comments::Comments; diff --git a/crates/ruff_python_formatter/src/verbatim.rs b/crates/ruff_python_formatter/src/verbatim.rs index 9802257248..e0bbf00ad6 100644 --- a/crates/ruff_python_formatter/src/verbatim.rs +++ b/crates/ruff_python_formatter/src/verbatim.rs @@ -5,7 +5,7 @@ use std::slice::Iter; use ruff_formatter::{FormatError, write}; use ruff_python_ast::AnyNodeRef; use ruff_python_ast::Stmt; -use ruff_python_parser::{self as parser, TokenKind}; +use ruff_python_ast::token::{Token as AstToken, TokenKind}; use ruff_python_trivia::lines_before; use ruff_source_file::LineRanges; use ruff_text_size::{Ranged, TextRange, TextSize}; @@ -770,7 +770,7 @@ impl Format> for FormatVerbatimStatementRange { } struct LogicalLinesIter<'a> { - tokens: Iter<'a, parser::Token>, + tokens: Iter<'a, AstToken>, // The end of the last logical line last_line_end: TextSize, // The position where the content to lex ends. @@ -778,7 +778,7 @@ struct LogicalLinesIter<'a> { } impl<'a> LogicalLinesIter<'a> { - fn new(tokens: Iter<'a, parser::Token>, verbatim_range: TextRange) -> Self { + fn new(tokens: Iter<'a, AstToken>, verbatim_range: TextRange) -> Self { Self { tokens, last_line_end: verbatim_range.start(), diff --git a/crates/ruff_python_importer/Cargo.toml b/crates/ruff_python_importer/Cargo.toml index 96070a2400..a563d79e29 100644 --- a/crates/ruff_python_importer/Cargo.toml +++ b/crates/ruff_python_importer/Cargo.toml @@ -14,7 +14,6 @@ license = { workspace = true } ruff_diagnostics = { workspace = true } ruff_python_ast = { workspace = true } ruff_python_codegen = { workspace = true } -ruff_python_parser = { workspace = true } ruff_python_trivia = { workspace = true } ruff_source_file = { workspace = true, features = ["serde"] } ruff_text_size = { workspace = true } @@ -22,6 +21,8 @@ ruff_text_size = { workspace = true } anyhow = { workspace = true } [dev-dependencies] +ruff_python_parser = { workspace = true } + insta = { workspace = true } [features] diff --git a/crates/ruff_python_importer/src/insertion.rs b/crates/ruff_python_importer/src/insertion.rs index 69bd5aa33d..293cec988e 100644 --- a/crates/ruff_python_importer/src/insertion.rs +++ b/crates/ruff_python_importer/src/insertion.rs @@ -5,8 +5,8 @@ use std::ops::Add; use ruff_diagnostics::Edit; use ruff_python_ast::Stmt; use ruff_python_ast::helpers::is_docstring_stmt; +use ruff_python_ast::token::{TokenKind, Tokens}; use ruff_python_codegen::Stylist; -use ruff_python_parser::{TokenKind, Tokens}; use ruff_python_trivia::is_python_whitespace; use ruff_python_trivia::{PythonWhitespace, textwrap::indent}; use ruff_source_file::{LineRanges, UniversalNewlineIterator}; @@ -194,7 +194,7 @@ impl<'a> Insertion<'a> { tokens .before(at) .last() - .map(ruff_python_parser::Token::kind), + .map(ruff_python_ast::token::Token::kind), Some(TokenKind::Import) ) { return None; diff --git a/crates/ruff_python_index/Cargo.toml b/crates/ruff_python_index/Cargo.toml index 622a63777b..4ff47bb9ab 100644 --- a/crates/ruff_python_index/Cargo.toml +++ b/crates/ruff_python_index/Cargo.toml @@ -15,12 +15,12 @@ doctest = false [dependencies] ruff_python_ast = { workspace = true } -ruff_python_parser = { workspace = true } ruff_python_trivia = { workspace = true } ruff_source_file = { workspace = true } ruff_text_size = { workspace = true } [dev-dependencies] +ruff_python_parser = { workspace = true } [lints] workspace = true diff --git a/crates/ruff_python_index/src/indexer.rs b/crates/ruff_python_index/src/indexer.rs index 04c44a7c4b..80c0e00e20 100644 --- a/crates/ruff_python_index/src/indexer.rs +++ b/crates/ruff_python_index/src/indexer.rs @@ -2,7 +2,7 @@ //! are omitted from the AST (e.g., commented lines). use ruff_python_ast::Stmt; -use ruff_python_parser::{TokenKind, Tokens}; +use ruff_python_ast::token::{TokenKind, Tokens}; use ruff_python_trivia::{ CommentRanges, has_leading_content, has_trailing_content, is_python_whitespace, }; diff --git a/crates/ruff_python_index/src/interpolated_string_ranges.rs b/crates/ruff_python_index/src/interpolated_string_ranges.rs index 45dc7c2765..935f3f08f2 100644 --- a/crates/ruff_python_index/src/interpolated_string_ranges.rs +++ b/crates/ruff_python_index/src/interpolated_string_ranges.rs @@ -1,6 +1,6 @@ use std::collections::BTreeMap; -use ruff_python_parser::{Token, TokenKind}; +use ruff_python_ast::token::{Token, TokenKind}; use ruff_text_size::{Ranged, TextRange, TextSize}; /// Stores the ranges of all interpolated strings in a file sorted by [`TextRange::start`]. diff --git a/crates/ruff_python_index/src/multiline_ranges.rs b/crates/ruff_python_index/src/multiline_ranges.rs index 585ff6f1ae..c8f2bc1bac 100644 --- a/crates/ruff_python_index/src/multiline_ranges.rs +++ b/crates/ruff_python_index/src/multiline_ranges.rs @@ -1,4 +1,4 @@ -use ruff_python_parser::{Token, TokenKind}; +use ruff_python_ast::token::{Token, TokenKind}; use ruff_text_size::{Ranged, TextRange}; /// Stores the range of all multiline strings in a file sorted by diff --git a/crates/ruff_python_parser/src/error.rs b/crates/ruff_python_parser/src/error.rs index 8b02546d3b..6dd1dac0d3 100644 --- a/crates/ruff_python_parser/src/error.rs +++ b/crates/ruff_python_parser/src/error.rs @@ -1,9 +1,10 @@ use std::fmt::{self, Display}; use ruff_python_ast::PythonVersion; +use ruff_python_ast::token::TokenKind; use ruff_text_size::{Ranged, TextRange}; -use crate::{TokenKind, string::InterpolatedStringKind}; +use crate::string::InterpolatedStringKind; /// Represents represent errors that occur during parsing and are /// returned by the `parse_*` functions. diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index dc864d71b6..8b4b3a061c 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -14,6 +14,7 @@ use unicode_normalization::UnicodeNormalization; use ruff_python_ast::name::Name; use ruff_python_ast::str_prefix::{AnyStringPrefix, StringLiteralPrefix}; +use ruff_python_ast::token::{TokenFlags, TokenKind}; use ruff_python_ast::{Int, IpyEscapeKind, StringFlags}; use ruff_python_trivia::is_python_whitespace; use ruff_text_size::{TextLen, TextRange, TextSize}; @@ -26,7 +27,7 @@ use crate::lexer::interpolated_string::{ InterpolatedStringContext, InterpolatedStrings, InterpolatedStringsCheckpoint, }; use crate::string::InterpolatedStringKind; -use crate::token::{TokenFlags, TokenKind, TokenValue}; +use crate::token::TokenValue; mod cursor; mod indentation; diff --git a/crates/ruff_python_parser/src/lib.rs b/crates/ruff_python_parser/src/lib.rs index ce409200ae..86bfe56697 100644 --- a/crates/ruff_python_parser/src/lib.rs +++ b/crates/ruff_python_parser/src/lib.rs @@ -63,23 +63,20 @@ //! [lexical analysis]: https://en.wikipedia.org/wiki/Lexical_analysis //! [parsing]: https://en.wikipedia.org/wiki/Parsing //! [lexer]: crate::lexer -use std::iter::FusedIterator; -use std::ops::Deref; pub use crate::error::{ InterpolatedStringErrorType, LexicalErrorType, ParseError, ParseErrorType, UnsupportedSyntaxError, UnsupportedSyntaxErrorKind, }; pub use crate::parser::ParseOptions; -pub use crate::token::{Token, TokenKind}; use crate::parser::Parser; +use ruff_python_ast::token::Tokens; use ruff_python_ast::{ Expr, Mod, ModExpression, ModModule, PySourceType, StringFlags, StringLiteral, Suite, }; -use ruff_python_trivia::CommentRanges; -use ruff_text_size::{Ranged, TextRange, TextSize}; +use ruff_text_size::{Ranged, TextRange}; mod error; pub mod lexer; @@ -473,351 +470,6 @@ impl Parsed { } } -/// Tokens represents a vector of lexed [`Token`]. -#[derive(Debug, Clone, PartialEq, Eq, get_size2::GetSize)] -pub struct Tokens { - raw: Vec, -} - -impl Tokens { - pub(crate) fn new(tokens: Vec) -> Tokens { - Tokens { raw: tokens } - } - - /// Returns an iterator over all the tokens that provides context. - pub fn iter_with_context(&self) -> TokenIterWithContext<'_> { - TokenIterWithContext::new(&self.raw) - } - - /// Performs a binary search to find the index of the **first** token that starts at the given `offset`. - /// - /// Unlike `binary_search_by_key`, this method ensures that if multiple tokens start at the same offset, - /// it returns the index of the first one. Multiple tokens can start at the same offset in cases where - /// zero-length tokens are involved (like `Dedent` or `Newline` at the end of the file). - pub fn binary_search_by_start(&self, offset: TextSize) -> Result { - let partition_point = self.partition_point(|token| token.start() < offset); - - let after = &self[partition_point..]; - - if after.first().is_some_and(|first| first.start() == offset) { - Ok(partition_point) - } else { - Err(partition_point) - } - } - - /// Returns a slice of [`Token`] that are within the given `range`. - /// - /// The start and end offset of the given range should be either: - /// 1. Token boundary - /// 2. Gap between the tokens - /// - /// For example, considering the following tokens and their corresponding range: - /// - /// | Token | Range | - /// |---------------------|-----------| - /// | `Def` | `0..3` | - /// | `Name` | `4..7` | - /// | `Lpar` | `7..8` | - /// | `Rpar` | `8..9` | - /// | `Colon` | `9..10` | - /// | `Newline` | `10..11` | - /// | `Comment` | `15..24` | - /// | `NonLogicalNewline` | `24..25` | - /// | `Indent` | `25..29` | - /// | `Pass` | `29..33` | - /// - /// Here, for (1) a token boundary is considered either the start or end offset of any of the - /// above tokens. For (2), the gap would be any offset between the `Newline` and `Comment` - /// token which are 12, 13, and 14. - /// - /// Examples: - /// 1) `4..10` would give `Name`, `Lpar`, `Rpar`, `Colon` - /// 2) `11..25` would give `Comment`, `NonLogicalNewline` - /// 3) `12..25` would give same as (2) and offset 12 is in the "gap" - /// 4) `9..12` would give `Colon`, `Newline` and offset 12 is in the "gap" - /// 5) `18..27` would panic because both the start and end offset is within a token - /// - /// ## Note - /// - /// The returned slice can contain the [`TokenKind::Unknown`] token if there was a lexical - /// error encountered within the given range. - /// - /// # Panics - /// - /// If either the start or end offset of the given range is within a token range. - pub fn in_range(&self, range: TextRange) -> &[Token] { - let tokens_after_start = self.after(range.start()); - - Self::before_impl(tokens_after_start, range.end()) - } - - /// Searches the token(s) at `offset`. - /// - /// Returns [`TokenAt::Between`] if `offset` points directly inbetween two tokens - /// (the left token ends at `offset` and the right token starts at `offset`). - /// - /// - /// ## Examples - /// - /// [Playground](https://play.ruff.rs/f3ad0a55-5931-4a13-96c7-b2b8bfdc9a2e?secondary=Tokens) - /// - /// ``` - /// # use ruff_python_ast::PySourceType; - /// # use ruff_python_parser::{Token, TokenAt, TokenKind}; - /// # use ruff_text_size::{Ranged, TextSize}; - /// - /// let source = r#" - /// def test(arg): - /// arg.call() - /// if True: - /// pass - /// print("true") - /// "#.trim(); - /// - /// let parsed = ruff_python_parser::parse_unchecked_source(source, PySourceType::Python); - /// let tokens = parsed.tokens(); - /// - /// let collect_tokens = |offset: TextSize| { - /// tokens.at_offset(offset).into_iter().map(|t| (t.kind(), &source[t.range()])).collect::>() - /// }; - /// - /// assert_eq!(collect_tokens(TextSize::new(4)), vec! [(TokenKind::Name, "test")]); - /// assert_eq!(collect_tokens(TextSize::new(6)), vec! [(TokenKind::Name, "test")]); - /// // between `arg` and `.` - /// assert_eq!(collect_tokens(TextSize::new(22)), vec! [(TokenKind::Name, "arg"), (TokenKind::Dot, ".")]); - /// assert_eq!(collect_tokens(TextSize::new(36)), vec! [(TokenKind::If, "if")]); - /// // Before the dedent token - /// assert_eq!(collect_tokens(TextSize::new(57)), vec! []); - /// ``` - pub fn at_offset(&self, offset: TextSize) -> TokenAt { - match self.binary_search_by_start(offset) { - // The token at `index` starts exactly at `offset. - // ```python - // object.attribute - // ^ OFFSET - // ``` - Ok(index) => { - let token = self[index]; - // `token` starts exactly at `offset`. Test if the offset is right between - // `token` and the previous token (if there's any) - if let Some(previous) = index.checked_sub(1).map(|idx| self[idx]) { - if previous.end() == offset { - return TokenAt::Between(previous, token); - } - } - - TokenAt::Single(token) - } - - // No token found that starts exactly at the given offset. But it's possible that - // the token starting before `offset` fully encloses `offset` (it's end range ends after `offset`). - // ```python - // object.attribute - // ^ OFFSET - // # or - // if True: - // print("test") - // ^ OFFSET - // ``` - Err(index) => { - if let Some(previous) = index.checked_sub(1).map(|idx| self[idx]) { - if previous.range().contains_inclusive(offset) { - return TokenAt::Single(previous); - } - } - - TokenAt::None - } - } - } - - /// Returns a slice of tokens before the given [`TextSize`] offset. - /// - /// If the given offset is between two tokens, the returned slice will end just before the - /// following token. In other words, if the offset is between the end of previous token and - /// start of next token, the returned slice will end just before the next token. - /// - /// # Panics - /// - /// If the given offset is inside a token range at any point - /// other than the start of the range. - pub fn before(&self, offset: TextSize) -> &[Token] { - Self::before_impl(&self.raw, offset) - } - - fn before_impl(tokens: &[Token], offset: TextSize) -> &[Token] { - let partition_point = tokens.partition_point(|token| token.start() < offset); - let before = &tokens[..partition_point]; - - if let Some(last) = before.last() { - // If it's equal to the end offset, then it's at a token boundary which is - // valid. If it's greater than the end offset, then it's in the gap between - // the tokens which is valid as well. - assert!( - offset >= last.end(), - "Offset {:?} is inside a token range {:?}", - offset, - last.range() - ); - } - before - } - - /// Returns a slice of tokens after the given [`TextSize`] offset. - /// - /// If the given offset is between two tokens, the returned slice will start from the following - /// token. In other words, if the offset is between the end of previous token and start of next - /// token, the returned slice will start from the next token. - /// - /// # Panics - /// - /// If the given offset is inside a token range at any point - /// other than the start of the range. - pub fn after(&self, offset: TextSize) -> &[Token] { - let partition_point = self.partition_point(|token| token.end() <= offset); - let after = &self[partition_point..]; - - if let Some(first) = after.first() { - // valid. If it's greater than the end offset, then it's in the gap between - // the tokens which is valid as well. - assert!( - offset <= first.start(), - "Offset {:?} is inside a token range {:?}", - offset, - first.range() - ); - } - - after - } -} - -impl<'a> IntoIterator for &'a Tokens { - type Item = &'a Token; - type IntoIter = std::slice::Iter<'a, Token>; - - fn into_iter(self) -> Self::IntoIter { - self.iter() - } -} - -impl Deref for Tokens { - type Target = [Token]; - - fn deref(&self) -> &Self::Target { - &self.raw - } -} - -/// A token that encloses a given offset or ends exactly at it. -#[derive(Debug, Clone)] -pub enum TokenAt { - /// There's no token at the given offset - None, - - /// There's a single token at the given offset. - Single(Token), - - /// The offset falls exactly between two tokens. E.g. `CURSOR` in `call(arguments)` is - /// positioned exactly between the `call` and `(` tokens. - Between(Token, Token), -} - -impl Iterator for TokenAt { - type Item = Token; - - fn next(&mut self) -> Option { - match *self { - TokenAt::None => None, - TokenAt::Single(token) => { - *self = TokenAt::None; - Some(token) - } - TokenAt::Between(first, second) => { - *self = TokenAt::Single(second); - Some(first) - } - } - } -} - -impl FusedIterator for TokenAt {} - -impl From<&Tokens> for CommentRanges { - fn from(tokens: &Tokens) -> Self { - let mut ranges = vec![]; - for token in tokens { - if token.kind() == TokenKind::Comment { - ranges.push(token.range()); - } - } - CommentRanges::new(ranges) - } -} - -/// An iterator over the [`Token`]s with context. -/// -/// This struct is created by the [`iter_with_context`] method on [`Tokens`]. Refer to its -/// documentation for more details. -/// -/// [`iter_with_context`]: Tokens::iter_with_context -#[derive(Debug, Clone)] -pub struct TokenIterWithContext<'a> { - inner: std::slice::Iter<'a, Token>, - nesting: u32, -} - -impl<'a> TokenIterWithContext<'a> { - fn new(tokens: &'a [Token]) -> TokenIterWithContext<'a> { - TokenIterWithContext { - inner: tokens.iter(), - nesting: 0, - } - } - - /// Return the nesting level the iterator is currently in. - pub const fn nesting(&self) -> u32 { - self.nesting - } - - /// Returns `true` if the iterator is within a parenthesized context. - pub const fn in_parenthesized_context(&self) -> bool { - self.nesting > 0 - } - - /// Returns the next [`Token`] in the iterator without consuming it. - pub fn peek(&self) -> Option<&'a Token> { - self.clone().next() - } -} - -impl<'a> Iterator for TokenIterWithContext<'a> { - type Item = &'a Token; - - fn next(&mut self) -> Option { - let token = self.inner.next()?; - - match token.kind() { - TokenKind::Lpar | TokenKind::Lbrace | TokenKind::Lsqb => self.nesting += 1, - TokenKind::Rpar | TokenKind::Rbrace | TokenKind::Rsqb => { - self.nesting = self.nesting.saturating_sub(1); - } - // This mimics the behavior of re-lexing which reduces the nesting level on the lexer. - // We don't need to reduce it by 1 because unlike the lexer we see the final token - // after recovering from every unclosed parenthesis. - TokenKind::Newline if self.nesting > 0 => { - self.nesting = 0; - } - _ => {} - } - - Some(token) - } -} - -impl FusedIterator for TokenIterWithContext<'_> {} - /// Control in the different modes by which a source file can be parsed. /// /// The mode argument specifies in what way code must be parsed. @@ -888,204 +540,3 @@ impl std::fmt::Display for ModeParseError { write!(f, r#"mode must be "exec", "eval", "ipython", or "single""#) } } - -#[cfg(test)] -mod tests { - use std::ops::Range; - - use crate::token::TokenFlags; - - use super::*; - - /// Test case containing a "gap" between two tokens. - /// - /// Code: - const TEST_CASE_WITH_GAP: [(TokenKind, Range); 10] = [ - (TokenKind::Def, 0..3), - (TokenKind::Name, 4..7), - (TokenKind::Lpar, 7..8), - (TokenKind::Rpar, 8..9), - (TokenKind::Colon, 9..10), - (TokenKind::Newline, 10..11), - // Gap ||..|| - (TokenKind::Comment, 15..24), - (TokenKind::NonLogicalNewline, 24..25), - (TokenKind::Indent, 25..29), - (TokenKind::Pass, 29..33), - // No newline at the end to keep the token set full of unique tokens - ]; - - /// Helper function to create [`Tokens`] from an iterator of (kind, range). - fn new_tokens(tokens: impl Iterator)>) -> Tokens { - Tokens::new( - tokens - .map(|(kind, range)| { - Token::new( - kind, - TextRange::new(TextSize::new(range.start), TextSize::new(range.end)), - TokenFlags::empty(), - ) - }) - .collect(), - ) - } - - #[test] - fn tokens_after_offset_at_token_start() { - let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); - let after = tokens.after(TextSize::new(8)); - assert_eq!(after.len(), 7); - assert_eq!(after.first().unwrap().kind(), TokenKind::Rpar); - } - - #[test] - fn tokens_after_offset_at_token_end() { - let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); - let after = tokens.after(TextSize::new(11)); - assert_eq!(after.len(), 4); - assert_eq!(after.first().unwrap().kind(), TokenKind::Comment); - } - - #[test] - fn tokens_after_offset_between_tokens() { - let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); - let after = tokens.after(TextSize::new(13)); - assert_eq!(after.len(), 4); - assert_eq!(after.first().unwrap().kind(), TokenKind::Comment); - } - - #[test] - fn tokens_after_offset_at_last_token_end() { - let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); - let after = tokens.after(TextSize::new(33)); - assert_eq!(after.len(), 0); - } - - #[test] - #[should_panic(expected = "Offset 5 is inside a token range 4..7")] - fn tokens_after_offset_inside_token() { - let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); - tokens.after(TextSize::new(5)); - } - - #[test] - fn tokens_before_offset_at_first_token_start() { - let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); - let before = tokens.before(TextSize::new(0)); - assert_eq!(before.len(), 0); - } - - #[test] - fn tokens_before_offset_after_first_token_gap() { - let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); - let before = tokens.before(TextSize::new(3)); - assert_eq!(before.len(), 1); - assert_eq!(before.last().unwrap().kind(), TokenKind::Def); - } - - #[test] - fn tokens_before_offset_at_second_token_start() { - let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); - let before = tokens.before(TextSize::new(4)); - assert_eq!(before.len(), 1); - assert_eq!(before.last().unwrap().kind(), TokenKind::Def); - } - - #[test] - fn tokens_before_offset_at_token_start() { - let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); - let before = tokens.before(TextSize::new(8)); - assert_eq!(before.len(), 3); - assert_eq!(before.last().unwrap().kind(), TokenKind::Lpar); - } - - #[test] - fn tokens_before_offset_at_token_end() { - let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); - let before = tokens.before(TextSize::new(11)); - assert_eq!(before.len(), 6); - assert_eq!(before.last().unwrap().kind(), TokenKind::Newline); - } - - #[test] - fn tokens_before_offset_between_tokens() { - let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); - let before = tokens.before(TextSize::new(13)); - assert_eq!(before.len(), 6); - assert_eq!(before.last().unwrap().kind(), TokenKind::Newline); - } - - #[test] - fn tokens_before_offset_at_last_token_end() { - let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); - let before = tokens.before(TextSize::new(33)); - assert_eq!(before.len(), 10); - assert_eq!(before.last().unwrap().kind(), TokenKind::Pass); - } - - #[test] - #[should_panic(expected = "Offset 5 is inside a token range 4..7")] - fn tokens_before_offset_inside_token() { - let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); - tokens.before(TextSize::new(5)); - } - - #[test] - fn tokens_in_range_at_token_offset() { - let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); - let in_range = tokens.in_range(TextRange::new(4.into(), 10.into())); - assert_eq!(in_range.len(), 4); - assert_eq!(in_range.first().unwrap().kind(), TokenKind::Name); - assert_eq!(in_range.last().unwrap().kind(), TokenKind::Colon); - } - - #[test] - fn tokens_in_range_start_offset_at_token_end() { - let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); - let in_range = tokens.in_range(TextRange::new(11.into(), 29.into())); - assert_eq!(in_range.len(), 3); - assert_eq!(in_range.first().unwrap().kind(), TokenKind::Comment); - assert_eq!(in_range.last().unwrap().kind(), TokenKind::Indent); - } - - #[test] - fn tokens_in_range_end_offset_at_token_start() { - let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); - let in_range = tokens.in_range(TextRange::new(8.into(), 15.into())); - assert_eq!(in_range.len(), 3); - assert_eq!(in_range.first().unwrap().kind(), TokenKind::Rpar); - assert_eq!(in_range.last().unwrap().kind(), TokenKind::Newline); - } - - #[test] - fn tokens_in_range_start_offset_between_tokens() { - let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); - let in_range = tokens.in_range(TextRange::new(13.into(), 29.into())); - assert_eq!(in_range.len(), 3); - assert_eq!(in_range.first().unwrap().kind(), TokenKind::Comment); - assert_eq!(in_range.last().unwrap().kind(), TokenKind::Indent); - } - - #[test] - fn tokens_in_range_end_offset_between_tokens() { - let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); - let in_range = tokens.in_range(TextRange::new(9.into(), 13.into())); - assert_eq!(in_range.len(), 2); - assert_eq!(in_range.first().unwrap().kind(), TokenKind::Colon); - assert_eq!(in_range.last().unwrap().kind(), TokenKind::Newline); - } - - #[test] - #[should_panic(expected = "Offset 5 is inside a token range 4..7")] - fn tokens_in_range_start_offset_inside_token() { - let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); - tokens.in_range(TextRange::new(5.into(), 10.into())); - } - - #[test] - #[should_panic(expected = "Offset 6 is inside a token range 4..7")] - fn tokens_in_range_end_offset_inside_token() { - let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); - tokens.in_range(TextRange::new(0.into(), 6.into())); - } -} diff --git a/crates/ruff_python_parser/src/parser/expression.rs b/crates/ruff_python_parser/src/parser/expression.rs index 2ae786ce77..f0e930461a 100644 --- a/crates/ruff_python_parser/src/parser/expression.rs +++ b/crates/ruff_python_parser/src/parser/expression.rs @@ -4,6 +4,7 @@ use bitflags::bitflags; use rustc_hash::{FxBuildHasher, FxHashSet}; use ruff_python_ast::name::Name; +use ruff_python_ast::token::TokenKind; use ruff_python_ast::{ self as ast, AnyStringFlags, AtomicNodeIndex, BoolOp, CmpOp, ConversionFlag, Expr, ExprContext, FString, InterpolatedStringElement, InterpolatedStringElements, IpyEscapeKind, Number, @@ -18,7 +19,7 @@ use crate::string::{ InterpolatedStringKind, StringType, parse_interpolated_string_literal_element, parse_string_literal, }; -use crate::token::{TokenKind, TokenValue}; +use crate::token::TokenValue; use crate::token_set::TokenSet; use crate::{ InterpolatedStringErrorType, Mode, ParseErrorType, UnsupportedSyntaxError, diff --git a/crates/ruff_python_parser/src/parser/helpers.rs b/crates/ruff_python_parser/src/parser/helpers.rs index 819bc9f3b4..8abbb6355a 100644 --- a/crates/ruff_python_parser/src/parser/helpers.rs +++ b/crates/ruff_python_parser/src/parser/helpers.rs @@ -1,7 +1,8 @@ +use ruff_python_ast::token::TokenKind; use ruff_python_ast::{self as ast, CmpOp, Expr, ExprContext, Number}; use ruff_text_size::{Ranged, TextRange}; -use crate::{TokenKind, error::RelaxedDecoratorError}; +use crate::error::RelaxedDecoratorError; /// Set the `ctx` for `Expr::Id`, `Expr::Attribute`, `Expr::Subscript`, `Expr::Starred`, /// `Expr::Tuple` and `Expr::List`. If `expr` is either `Expr::Tuple` or `Expr::List`, diff --git a/crates/ruff_python_parser/src/parser/mod.rs b/crates/ruff_python_parser/src/parser/mod.rs index 90396cb72d..8d0614b6b9 100644 --- a/crates/ruff_python_parser/src/parser/mod.rs +++ b/crates/ruff_python_parser/src/parser/mod.rs @@ -2,6 +2,7 @@ use std::cmp::Ordering; use bitflags::bitflags; +use ruff_python_ast::token::TokenKind; use ruff_python_ast::{AtomicNodeIndex, Mod, ModExpression, ModModule}; use ruff_text_size::{Ranged, TextRange, TextSize}; @@ -12,7 +13,7 @@ use crate::string::InterpolatedStringKind; use crate::token::TokenValue; use crate::token_set::TokenSet; use crate::token_source::{TokenSource, TokenSourceCheckpoint}; -use crate::{Mode, ParseError, ParseErrorType, TokenKind, UnsupportedSyntaxErrorKind}; +use crate::{Mode, ParseError, ParseErrorType, UnsupportedSyntaxErrorKind}; use crate::{Parsed, Tokens}; pub use crate::parser::options::ParseOptions; diff --git a/crates/ruff_python_parser/src/parser/pattern.rs b/crates/ruff_python_parser/src/parser/pattern.rs index 2839a7dcad..f28dc237c9 100644 --- a/crates/ruff_python_parser/src/parser/pattern.rs +++ b/crates/ruff_python_parser/src/parser/pattern.rs @@ -1,4 +1,5 @@ use ruff_python_ast::name::Name; +use ruff_python_ast::token::TokenKind; use ruff_python_ast::{ self as ast, AtomicNodeIndex, Expr, ExprContext, Number, Operator, Pattern, Singleton, }; @@ -7,7 +8,7 @@ use ruff_text_size::{Ranged, TextSize}; use crate::ParseErrorType; use crate::parser::progress::ParserProgress; use crate::parser::{Parser, RecoveryContextKind, SequenceMatchPatternParentheses, recovery}; -use crate::token::{TokenKind, TokenValue}; +use crate::token::TokenValue; use crate::token_set::TokenSet; use super::expression::ExpressionContext; diff --git a/crates/ruff_python_parser/src/parser/statement.rs b/crates/ruff_python_parser/src/parser/statement.rs index 134c9c40fd..07e5816a9c 100644 --- a/crates/ruff_python_parser/src/parser/statement.rs +++ b/crates/ruff_python_parser/src/parser/statement.rs @@ -2,6 +2,7 @@ use compact_str::CompactString; use std::fmt::{Display, Write}; use ruff_python_ast::name::Name; +use ruff_python_ast::token::TokenKind; use ruff_python_ast::{ self as ast, AtomicNodeIndex, ExceptHandler, Expr, ExprContext, IpyEscapeKind, Operator, PythonVersion, Stmt, WithItem, @@ -14,7 +15,7 @@ use crate::parser::progress::ParserProgress; use crate::parser::{ FunctionKind, Parser, RecoveryContext, RecoveryContextKind, WithItemKind, helpers, }; -use crate::token::{TokenKind, TokenValue}; +use crate::token::TokenValue; use crate::token_set::TokenSet; use crate::{Mode, ParseErrorType, UnsupportedSyntaxErrorKind}; diff --git a/crates/ruff_python_parser/src/string.rs b/crates/ruff_python_parser/src/string.rs index 4510934685..4b750865a2 100644 --- a/crates/ruff_python_parser/src/string.rs +++ b/crates/ruff_python_parser/src/string.rs @@ -3,13 +3,11 @@ use bstr::ByteSlice; use std::fmt; +use ruff_python_ast::token::TokenKind; use ruff_python_ast::{self as ast, AnyStringFlags, AtomicNodeIndex, Expr, StringFlags}; use ruff_text_size::{Ranged, TextRange, TextSize}; -use crate::{ - TokenKind, - error::{LexicalError, LexicalErrorType}, -}; +use crate::error::{LexicalError, LexicalErrorType}; #[derive(Debug)] pub(crate) enum StringType { diff --git a/crates/ruff_python_parser/src/token.rs b/crates/ruff_python_parser/src/token.rs index a5790a9597..f2f96e133d 100644 --- a/crates/ruff_python_parser/src/token.rs +++ b/crates/ruff_python_parser/src/token.rs @@ -1,848 +1,4 @@ -//! Token kinds for Python source code created by the lexer and consumed by the `ruff_python_parser`. -//! -//! This module defines the tokens that the lexer recognizes. The tokens are -//! loosely based on the token definitions found in the [CPython source]. -//! -//! [CPython source]: https://github.com/python/cpython/blob/dfc2e065a2e71011017077e549cd2f9bf4944c54/Grammar/Tokens - -use std::fmt; - -use bitflags::bitflags; - -use ruff_python_ast::name::Name; -use ruff_python_ast::str::{Quote, TripleQuotes}; -use ruff_python_ast::str_prefix::{ - AnyStringPrefix, ByteStringPrefix, FStringPrefix, StringLiteralPrefix, TStringPrefix, -}; -use ruff_python_ast::{AnyStringFlags, BoolOp, Int, IpyEscapeKind, Operator, StringFlags, UnaryOp}; -use ruff_text_size::{Ranged, TextRange}; - -#[derive(Clone, Copy, PartialEq, Eq, get_size2::GetSize)] -pub struct Token { - /// The kind of the token. - kind: TokenKind, - /// The range of the token. - range: TextRange, - /// The set of flags describing this token. - flags: TokenFlags, -} - -impl Token { - pub(crate) fn new(kind: TokenKind, range: TextRange, flags: TokenFlags) -> Token { - Self { kind, range, flags } - } - - /// Returns the token kind. - #[inline] - pub const fn kind(&self) -> TokenKind { - self.kind - } - - /// Returns the token as a tuple of (kind, range). - #[inline] - pub const fn as_tuple(&self) -> (TokenKind, TextRange) { - (self.kind, self.range) - } - - /// Returns `true` if the current token is a triple-quoted string of any kind. - /// - /// # Panics - /// - /// If it isn't a string or any f/t-string tokens. - pub fn is_triple_quoted_string(self) -> bool { - self.unwrap_string_flags().is_triple_quoted() - } - - /// Returns the [`Quote`] style for the current string token of any kind. - /// - /// # Panics - /// - /// If it isn't a string or any f/t-string tokens. - pub fn string_quote_style(self) -> Quote { - self.unwrap_string_flags().quote_style() - } - - /// Returns the [`AnyStringFlags`] style for the current string token of any kind. - /// - /// # Panics - /// - /// If it isn't a string or any f/t-string tokens. - pub fn unwrap_string_flags(self) -> AnyStringFlags { - self.string_flags() - .unwrap_or_else(|| panic!("token to be a string")) - } - - /// Returns true if the current token is a string and it is raw. - pub fn string_flags(self) -> Option { - if self.is_any_string() { - Some(self.flags.as_any_string_flags()) - } else { - None - } - } - - /// Returns `true` if this is any kind of string token - including - /// tokens in t-strings (which do not have type `str`). - const fn is_any_string(self) -> bool { - matches!( - self.kind, - TokenKind::String - | TokenKind::FStringStart - | TokenKind::FStringMiddle - | TokenKind::FStringEnd - | TokenKind::TStringStart - | TokenKind::TStringMiddle - | TokenKind::TStringEnd - ) - } -} - -impl Ranged for Token { - fn range(&self) -> TextRange { - self.range - } -} - -impl fmt::Debug for Token { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{:?} {:?}", self.kind, self.range)?; - if !self.flags.is_empty() { - f.write_str(" (flags = ")?; - let mut first = true; - for (name, _) in self.flags.iter_names() { - if first { - first = false; - } else { - f.write_str(" | ")?; - } - f.write_str(name)?; - } - f.write_str(")")?; - } - Ok(()) - } -} - -/// A kind of a token. -#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug, PartialOrd, Ord, get_size2::GetSize)] -pub enum TokenKind { - /// Token kind for a name, commonly known as an identifier. - Name, - /// Token kind for an integer. - Int, - /// Token kind for a floating point number. - Float, - /// Token kind for a complex number. - Complex, - /// Token kind for a string. - String, - /// Token kind for the start of an f-string. This includes the `f`/`F`/`fr` prefix - /// and the opening quote(s). - FStringStart, - /// Token kind that includes the portion of text inside the f-string that's not - /// part of the expression part and isn't an opening or closing brace. - FStringMiddle, - /// Token kind for the end of an f-string. This includes the closing quote. - FStringEnd, - /// Token kind for the start of a t-string. This includes the `t`/`T`/`tr` prefix - /// and the opening quote(s). - TStringStart, - /// Token kind that includes the portion of text inside the t-string that's not - /// part of the interpolation part and isn't an opening or closing brace. - TStringMiddle, - /// Token kind for the end of a t-string. This includes the closing quote. - TStringEnd, - /// Token kind for a IPython escape command. - IpyEscapeCommand, - /// Token kind for a comment. These are filtered out of the token stream prior to parsing. - Comment, - /// Token kind for a newline. - Newline, - /// Token kind for a newline that is not a logical line break. These are filtered out of - /// the token stream prior to parsing. - NonLogicalNewline, - /// Token kind for an indent. - Indent, - /// Token kind for a dedent. - Dedent, - EndOfFile, - /// Token kind for a question mark `?`. - Question, - /// Token kind for an exclamation mark `!`. - Exclamation, - /// Token kind for a left parenthesis `(`. - Lpar, - /// Token kind for a right parenthesis `)`. - Rpar, - /// Token kind for a left square bracket `[`. - Lsqb, - /// Token kind for a right square bracket `]`. - Rsqb, - /// Token kind for a colon `:`. - Colon, - /// Token kind for a comma `,`. - Comma, - /// Token kind for a semicolon `;`. - Semi, - /// Token kind for plus `+`. - Plus, - /// Token kind for minus `-`. - Minus, - /// Token kind for star `*`. - Star, - /// Token kind for slash `/`. - Slash, - /// Token kind for vertical bar `|`. - Vbar, - /// Token kind for ampersand `&`. - Amper, - /// Token kind for less than `<`. - Less, - /// Token kind for greater than `>`. - Greater, - /// Token kind for equal `=`. - Equal, - /// Token kind for dot `.`. - Dot, - /// Token kind for percent `%`. - Percent, - /// Token kind for left bracket `{`. - Lbrace, - /// Token kind for right bracket `}`. - Rbrace, - /// Token kind for double equal `==`. - EqEqual, - /// Token kind for not equal `!=`. - NotEqual, - /// Token kind for less than or equal `<=`. - LessEqual, - /// Token kind for greater than or equal `>=`. - GreaterEqual, - /// Token kind for tilde `~`. - Tilde, - /// Token kind for caret `^`. - CircumFlex, - /// Token kind for left shift `<<`. - LeftShift, - /// Token kind for right shift `>>`. - RightShift, - /// Token kind for double star `**`. - DoubleStar, - /// Token kind for double star equal `**=`. - DoubleStarEqual, - /// Token kind for plus equal `+=`. - PlusEqual, - /// Token kind for minus equal `-=`. - MinusEqual, - /// Token kind for star equal `*=`. - StarEqual, - /// Token kind for slash equal `/=`. - SlashEqual, - /// Token kind for percent equal `%=`. - PercentEqual, - /// Token kind for ampersand equal `&=`. - AmperEqual, - /// Token kind for vertical bar equal `|=`. - VbarEqual, - /// Token kind for caret equal `^=`. - CircumflexEqual, - /// Token kind for left shift equal `<<=`. - LeftShiftEqual, - /// Token kind for right shift equal `>>=`. - RightShiftEqual, - /// Token kind for double slash `//`. - DoubleSlash, - /// Token kind for double slash equal `//=`. - DoubleSlashEqual, - /// Token kind for colon equal `:=`. - ColonEqual, - /// Token kind for at `@`. - At, - /// Token kind for at equal `@=`. - AtEqual, - /// Token kind for arrow `->`. - Rarrow, - /// Token kind for ellipsis `...`. - Ellipsis, - - // The keywords should be sorted in alphabetical order. If the boundary tokens for the - // "Keywords" and "Soft keywords" group change, update the related methods on `TokenKind`. - - // Keywords - And, - As, - Assert, - Async, - Await, - Break, - Class, - Continue, - Def, - Del, - Elif, - Else, - Except, - False, - Finally, - For, - From, - Global, - If, - Import, - In, - Is, - Lambda, - None, - Nonlocal, - Not, - Or, - Pass, - Raise, - Return, - True, - Try, - While, - With, - Yield, - - // Soft keywords - Case, - Match, - Type, - - Unknown, -} - -impl TokenKind { - /// Returns `true` if this is an end of file token. - #[inline] - pub const fn is_eof(self) -> bool { - matches!(self, TokenKind::EndOfFile) - } - - /// Returns `true` if this is either a newline or non-logical newline token. - #[inline] - pub const fn is_any_newline(self) -> bool { - matches!(self, TokenKind::Newline | TokenKind::NonLogicalNewline) - } - - /// Returns `true` if the token is a keyword (including soft keywords). - /// - /// See also [`is_soft_keyword`], [`is_non_soft_keyword`]. - /// - /// [`is_soft_keyword`]: TokenKind::is_soft_keyword - /// [`is_non_soft_keyword`]: TokenKind::is_non_soft_keyword - #[inline] - pub fn is_keyword(self) -> bool { - TokenKind::And <= self && self <= TokenKind::Type - } - - /// Returns `true` if the token is strictly a soft keyword. - /// - /// See also [`is_keyword`], [`is_non_soft_keyword`]. - /// - /// [`is_keyword`]: TokenKind::is_keyword - /// [`is_non_soft_keyword`]: TokenKind::is_non_soft_keyword - #[inline] - pub fn is_soft_keyword(self) -> bool { - TokenKind::Case <= self && self <= TokenKind::Type - } - - /// Returns `true` if the token is strictly a non-soft keyword. - /// - /// See also [`is_keyword`], [`is_soft_keyword`]. - /// - /// [`is_keyword`]: TokenKind::is_keyword - /// [`is_soft_keyword`]: TokenKind::is_soft_keyword - #[inline] - pub fn is_non_soft_keyword(self) -> bool { - TokenKind::And <= self && self <= TokenKind::Yield - } - - #[inline] - pub const fn is_operator(self) -> bool { - matches!( - self, - TokenKind::Lpar - | TokenKind::Rpar - | TokenKind::Lsqb - | TokenKind::Rsqb - | TokenKind::Comma - | TokenKind::Semi - | TokenKind::Plus - | TokenKind::Minus - | TokenKind::Star - | TokenKind::Slash - | TokenKind::Vbar - | TokenKind::Amper - | TokenKind::Less - | TokenKind::Greater - | TokenKind::Equal - | TokenKind::Dot - | TokenKind::Percent - | TokenKind::Lbrace - | TokenKind::Rbrace - | TokenKind::EqEqual - | TokenKind::NotEqual - | TokenKind::LessEqual - | TokenKind::GreaterEqual - | TokenKind::Tilde - | TokenKind::CircumFlex - | TokenKind::LeftShift - | TokenKind::RightShift - | TokenKind::DoubleStar - | TokenKind::PlusEqual - | TokenKind::MinusEqual - | TokenKind::StarEqual - | TokenKind::SlashEqual - | TokenKind::PercentEqual - | TokenKind::AmperEqual - | TokenKind::VbarEqual - | TokenKind::CircumflexEqual - | TokenKind::LeftShiftEqual - | TokenKind::RightShiftEqual - | TokenKind::DoubleStarEqual - | TokenKind::DoubleSlash - | TokenKind::DoubleSlashEqual - | TokenKind::At - | TokenKind::AtEqual - | TokenKind::Rarrow - | TokenKind::Ellipsis - | TokenKind::ColonEqual - | TokenKind::Colon - | TokenKind::And - | TokenKind::Or - | TokenKind::Not - | TokenKind::In - | TokenKind::Is - ) - } - - /// Returns `true` if this is a singleton token i.e., `True`, `False`, or `None`. - #[inline] - pub const fn is_singleton(self) -> bool { - matches!(self, TokenKind::False | TokenKind::True | TokenKind::None) - } - - /// Returns `true` if this is a trivia token i.e., a comment or a non-logical newline. - #[inline] - pub const fn is_trivia(&self) -> bool { - matches!(self, TokenKind::Comment | TokenKind::NonLogicalNewline) - } - - /// Returns `true` if this is a comment token. - #[inline] - pub const fn is_comment(&self) -> bool { - matches!(self, TokenKind::Comment) - } - - #[inline] - pub const fn is_arithmetic(self) -> bool { - matches!( - self, - TokenKind::DoubleStar - | TokenKind::Star - | TokenKind::Plus - | TokenKind::Minus - | TokenKind::Slash - | TokenKind::DoubleSlash - | TokenKind::At - ) - } - - #[inline] - pub const fn is_bitwise_or_shift(self) -> bool { - matches!( - self, - TokenKind::LeftShift - | TokenKind::LeftShiftEqual - | TokenKind::RightShift - | TokenKind::RightShiftEqual - | TokenKind::Amper - | TokenKind::AmperEqual - | TokenKind::Vbar - | TokenKind::VbarEqual - | TokenKind::CircumFlex - | TokenKind::CircumflexEqual - | TokenKind::Tilde - ) - } - - /// Returns `true` if the current token is a unary arithmetic operator. - #[inline] - pub const fn is_unary_arithmetic_operator(self) -> bool { - matches!(self, TokenKind::Plus | TokenKind::Minus) - } - - #[inline] - pub const fn is_interpolated_string_end(self) -> bool { - matches!(self, TokenKind::FStringEnd | TokenKind::TStringEnd) - } - - /// Returns the [`UnaryOp`] that corresponds to this token kind, if it is a unary arithmetic - /// operator, otherwise return [None]. - /// - /// Use [`as_unary_operator`] to match against any unary operator. - /// - /// [`as_unary_operator`]: TokenKind::as_unary_operator - #[inline] - pub const fn as_unary_arithmetic_operator(self) -> Option { - Some(match self { - TokenKind::Plus => UnaryOp::UAdd, - TokenKind::Minus => UnaryOp::USub, - _ => return None, - }) - } - - /// Returns the [`UnaryOp`] that corresponds to this token kind, if it is a unary operator, - /// otherwise return [None]. - /// - /// Use [`as_unary_arithmetic_operator`] to match against only an arithmetic unary operator. - /// - /// [`as_unary_arithmetic_operator`]: TokenKind::as_unary_arithmetic_operator - #[inline] - pub const fn as_unary_operator(self) -> Option { - Some(match self { - TokenKind::Plus => UnaryOp::UAdd, - TokenKind::Minus => UnaryOp::USub, - TokenKind::Tilde => UnaryOp::Invert, - TokenKind::Not => UnaryOp::Not, - _ => return None, - }) - } - - /// Returns the [`BoolOp`] that corresponds to this token kind, if it is a boolean operator, - /// otherwise return [None]. - #[inline] - pub const fn as_bool_operator(self) -> Option { - Some(match self { - TokenKind::And => BoolOp::And, - TokenKind::Or => BoolOp::Or, - _ => return None, - }) - } - - /// Returns the binary [`Operator`] that corresponds to the current token, if it's a binary - /// operator, otherwise return [None]. - /// - /// Use [`as_augmented_assign_operator`] to match against an augmented assignment token. - /// - /// [`as_augmented_assign_operator`]: TokenKind::as_augmented_assign_operator - pub const fn as_binary_operator(self) -> Option { - Some(match self { - TokenKind::Plus => Operator::Add, - TokenKind::Minus => Operator::Sub, - TokenKind::Star => Operator::Mult, - TokenKind::At => Operator::MatMult, - TokenKind::DoubleStar => Operator::Pow, - TokenKind::Slash => Operator::Div, - TokenKind::DoubleSlash => Operator::FloorDiv, - TokenKind::Percent => Operator::Mod, - TokenKind::Amper => Operator::BitAnd, - TokenKind::Vbar => Operator::BitOr, - TokenKind::CircumFlex => Operator::BitXor, - TokenKind::LeftShift => Operator::LShift, - TokenKind::RightShift => Operator::RShift, - _ => return None, - }) - } - - /// Returns the [`Operator`] that corresponds to this token kind, if it is - /// an augmented assignment operator, or [`None`] otherwise. - #[inline] - pub const fn as_augmented_assign_operator(self) -> Option { - Some(match self { - TokenKind::PlusEqual => Operator::Add, - TokenKind::MinusEqual => Operator::Sub, - TokenKind::StarEqual => Operator::Mult, - TokenKind::AtEqual => Operator::MatMult, - TokenKind::DoubleStarEqual => Operator::Pow, - TokenKind::SlashEqual => Operator::Div, - TokenKind::DoubleSlashEqual => Operator::FloorDiv, - TokenKind::PercentEqual => Operator::Mod, - TokenKind::AmperEqual => Operator::BitAnd, - TokenKind::VbarEqual => Operator::BitOr, - TokenKind::CircumflexEqual => Operator::BitXor, - TokenKind::LeftShiftEqual => Operator::LShift, - TokenKind::RightShiftEqual => Operator::RShift, - _ => return None, - }) - } -} - -impl From for TokenKind { - #[inline] - fn from(op: BoolOp) -> Self { - match op { - BoolOp::And => TokenKind::And, - BoolOp::Or => TokenKind::Or, - } - } -} - -impl From for TokenKind { - #[inline] - fn from(op: UnaryOp) -> Self { - match op { - UnaryOp::Invert => TokenKind::Tilde, - UnaryOp::Not => TokenKind::Not, - UnaryOp::UAdd => TokenKind::Plus, - UnaryOp::USub => TokenKind::Minus, - } - } -} - -impl From for TokenKind { - #[inline] - fn from(op: Operator) -> Self { - match op { - Operator::Add => TokenKind::Plus, - Operator::Sub => TokenKind::Minus, - Operator::Mult => TokenKind::Star, - Operator::MatMult => TokenKind::At, - Operator::Div => TokenKind::Slash, - Operator::Mod => TokenKind::Percent, - Operator::Pow => TokenKind::DoubleStar, - Operator::LShift => TokenKind::LeftShift, - Operator::RShift => TokenKind::RightShift, - Operator::BitOr => TokenKind::Vbar, - Operator::BitXor => TokenKind::CircumFlex, - Operator::BitAnd => TokenKind::Amper, - Operator::FloorDiv => TokenKind::DoubleSlash, - } - } -} - -impl fmt::Display for TokenKind { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let value = match self { - TokenKind::Unknown => "Unknown", - TokenKind::Newline => "newline", - TokenKind::NonLogicalNewline => "NonLogicalNewline", - TokenKind::Indent => "indent", - TokenKind::Dedent => "dedent", - TokenKind::EndOfFile => "end of file", - TokenKind::Name => "name", - TokenKind::Int => "int", - TokenKind::Float => "float", - TokenKind::Complex => "complex", - TokenKind::String => "string", - TokenKind::FStringStart => "FStringStart", - TokenKind::FStringMiddle => "FStringMiddle", - TokenKind::FStringEnd => "FStringEnd", - TokenKind::TStringStart => "TStringStart", - TokenKind::TStringMiddle => "TStringMiddle", - TokenKind::TStringEnd => "TStringEnd", - TokenKind::IpyEscapeCommand => "IPython escape command", - TokenKind::Comment => "comment", - TokenKind::Question => "`?`", - TokenKind::Exclamation => "`!`", - TokenKind::Lpar => "`(`", - TokenKind::Rpar => "`)`", - TokenKind::Lsqb => "`[`", - TokenKind::Rsqb => "`]`", - TokenKind::Lbrace => "`{`", - TokenKind::Rbrace => "`}`", - TokenKind::Equal => "`=`", - TokenKind::ColonEqual => "`:=`", - TokenKind::Dot => "`.`", - TokenKind::Colon => "`:`", - TokenKind::Semi => "`;`", - TokenKind::Comma => "`,`", - TokenKind::Rarrow => "`->`", - TokenKind::Plus => "`+`", - TokenKind::Minus => "`-`", - TokenKind::Star => "`*`", - TokenKind::DoubleStar => "`**`", - TokenKind::Slash => "`/`", - TokenKind::DoubleSlash => "`//`", - TokenKind::Percent => "`%`", - TokenKind::Vbar => "`|`", - TokenKind::Amper => "`&`", - TokenKind::CircumFlex => "`^`", - TokenKind::LeftShift => "`<<`", - TokenKind::RightShift => "`>>`", - TokenKind::Tilde => "`~`", - TokenKind::At => "`@`", - TokenKind::Less => "`<`", - TokenKind::Greater => "`>`", - TokenKind::EqEqual => "`==`", - TokenKind::NotEqual => "`!=`", - TokenKind::LessEqual => "`<=`", - TokenKind::GreaterEqual => "`>=`", - TokenKind::PlusEqual => "`+=`", - TokenKind::MinusEqual => "`-=`", - TokenKind::StarEqual => "`*=`", - TokenKind::DoubleStarEqual => "`**=`", - TokenKind::SlashEqual => "`/=`", - TokenKind::DoubleSlashEqual => "`//=`", - TokenKind::PercentEqual => "`%=`", - TokenKind::VbarEqual => "`|=`", - TokenKind::AmperEqual => "`&=`", - TokenKind::CircumflexEqual => "`^=`", - TokenKind::LeftShiftEqual => "`<<=`", - TokenKind::RightShiftEqual => "`>>=`", - TokenKind::AtEqual => "`@=`", - TokenKind::Ellipsis => "`...`", - TokenKind::False => "`False`", - TokenKind::None => "`None`", - TokenKind::True => "`True`", - TokenKind::And => "`and`", - TokenKind::As => "`as`", - TokenKind::Assert => "`assert`", - TokenKind::Async => "`async`", - TokenKind::Await => "`await`", - TokenKind::Break => "`break`", - TokenKind::Class => "`class`", - TokenKind::Continue => "`continue`", - TokenKind::Def => "`def`", - TokenKind::Del => "`del`", - TokenKind::Elif => "`elif`", - TokenKind::Else => "`else`", - TokenKind::Except => "`except`", - TokenKind::Finally => "`finally`", - TokenKind::For => "`for`", - TokenKind::From => "`from`", - TokenKind::Global => "`global`", - TokenKind::If => "`if`", - TokenKind::Import => "`import`", - TokenKind::In => "`in`", - TokenKind::Is => "`is`", - TokenKind::Lambda => "`lambda`", - TokenKind::Nonlocal => "`nonlocal`", - TokenKind::Not => "`not`", - TokenKind::Or => "`or`", - TokenKind::Pass => "`pass`", - TokenKind::Raise => "`raise`", - TokenKind::Return => "`return`", - TokenKind::Try => "`try`", - TokenKind::While => "`while`", - TokenKind::Match => "`match`", - TokenKind::Type => "`type`", - TokenKind::Case => "`case`", - TokenKind::With => "`with`", - TokenKind::Yield => "`yield`", - }; - f.write_str(value) - } -} - -bitflags! { - #[derive(Clone, Copy, Debug, PartialEq, Eq)] - pub(crate) struct TokenFlags: u16 { - /// The token is a string with double quotes (`"`). - const DOUBLE_QUOTES = 1 << 0; - /// The token is a triple-quoted string i.e., it starts and ends with three consecutive - /// quote characters (`"""` or `'''`). - const TRIPLE_QUOTED_STRING = 1 << 1; - - /// The token is a unicode string i.e., prefixed with `u` or `U` - const UNICODE_STRING = 1 << 2; - /// The token is a byte string i.e., prefixed with `b` or `B` - const BYTE_STRING = 1 << 3; - /// The token is an f-string i.e., prefixed with `f` or `F` - const F_STRING = 1 << 4; - /// The token is a t-string i.e., prefixed with `t` or `T` - const T_STRING = 1 << 5; - /// The token is a raw string and the prefix character is in lowercase. - const RAW_STRING_LOWERCASE = 1 << 6; - /// The token is a raw string and the prefix character is in uppercase. - const RAW_STRING_UPPERCASE = 1 << 7; - /// String without matching closing quote(s) - const UNCLOSED_STRING = 1 << 8; - - /// The token is a raw string i.e., prefixed with `r` or `R` - const RAW_STRING = Self::RAW_STRING_LOWERCASE.bits() | Self::RAW_STRING_UPPERCASE.bits(); - - } -} - -impl get_size2::GetSize for TokenFlags {} - -impl StringFlags for TokenFlags { - fn quote_style(self) -> Quote { - if self.intersects(TokenFlags::DOUBLE_QUOTES) { - Quote::Double - } else { - Quote::Single - } - } - - fn triple_quotes(self) -> TripleQuotes { - if self.intersects(TokenFlags::TRIPLE_QUOTED_STRING) { - TripleQuotes::Yes - } else { - TripleQuotes::No - } - } - - fn prefix(self) -> AnyStringPrefix { - if self.intersects(TokenFlags::F_STRING) { - if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) { - AnyStringPrefix::Format(FStringPrefix::Raw { uppercase_r: false }) - } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) { - AnyStringPrefix::Format(FStringPrefix::Raw { uppercase_r: true }) - } else { - AnyStringPrefix::Format(FStringPrefix::Regular) - } - } else if self.intersects(TokenFlags::T_STRING) { - if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) { - AnyStringPrefix::Template(TStringPrefix::Raw { uppercase_r: false }) - } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) { - AnyStringPrefix::Template(TStringPrefix::Raw { uppercase_r: true }) - } else { - AnyStringPrefix::Template(TStringPrefix::Regular) - } - } else if self.intersects(TokenFlags::BYTE_STRING) { - if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) { - AnyStringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: false }) - } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) { - AnyStringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: true }) - } else { - AnyStringPrefix::Bytes(ByteStringPrefix::Regular) - } - } else if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) { - AnyStringPrefix::Regular(StringLiteralPrefix::Raw { uppercase: false }) - } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) { - AnyStringPrefix::Regular(StringLiteralPrefix::Raw { uppercase: true }) - } else if self.intersects(TokenFlags::UNICODE_STRING) { - AnyStringPrefix::Regular(StringLiteralPrefix::Unicode) - } else { - AnyStringPrefix::Regular(StringLiteralPrefix::Empty) - } - } - - fn is_unclosed(self) -> bool { - self.intersects(TokenFlags::UNCLOSED_STRING) - } -} - -impl TokenFlags { - /// Returns `true` if the token is an f-string. - pub(crate) const fn is_f_string(self) -> bool { - self.intersects(TokenFlags::F_STRING) - } - - /// Returns `true` if the token is a t-string. - pub(crate) const fn is_t_string(self) -> bool { - self.intersects(TokenFlags::T_STRING) - } - - /// Returns `true` if the token is a t-string. - pub(crate) const fn is_interpolated_string(self) -> bool { - self.intersects(TokenFlags::T_STRING.union(TokenFlags::F_STRING)) - } - - /// Returns `true` if the token is a triple-quoted t-string. - pub(crate) fn is_triple_quoted_interpolated_string(self) -> bool { - self.intersects(TokenFlags::TRIPLE_QUOTED_STRING) && self.is_interpolated_string() - } - - /// Returns `true` if the token is a raw string. - pub(crate) const fn is_raw_string(self) -> bool { - self.intersects(TokenFlags::RAW_STRING) - } -} +use ruff_python_ast::{Int, IpyEscapeKind, name::Name}; #[derive(Clone, Debug, Default)] pub(crate) enum TokenValue { diff --git a/crates/ruff_python_parser/src/token_set.rs b/crates/ruff_python_parser/src/token_set.rs index 843fe53faa..7ced27a8fb 100644 --- a/crates/ruff_python_parser/src/token_set.rs +++ b/crates/ruff_python_parser/src/token_set.rs @@ -1,4 +1,4 @@ -use crate::TokenKind; +use ruff_python_ast::token::TokenKind; /// A bit-set of `TokenKind`s #[derive(Clone, Copy)] @@ -42,7 +42,7 @@ impl From<[TokenKind; N]> for TokenSet { #[test] fn token_set_works_for_tokens() { - use crate::TokenKind::*; + use ruff_python_ast::token::TokenKind::*; let mut ts = TokenSet::new([EndOfFile, Name]); assert!(ts.contains(EndOfFile)); assert!(ts.contains(Name)); diff --git a/crates/ruff_python_parser/src/token_source.rs b/crates/ruff_python_parser/src/token_source.rs index f24fb4771f..e5755806e3 100644 --- a/crates/ruff_python_parser/src/token_source.rs +++ b/crates/ruff_python_parser/src/token_source.rs @@ -1,10 +1,11 @@ +use ruff_python_ast::token::{Token, TokenFlags, TokenKind}; use ruff_text_size::{Ranged, TextRange, TextSize}; use crate::Mode; use crate::error::LexicalError; use crate::lexer::{Lexer, LexerCheckpoint}; use crate::string::InterpolatedStringKind; -use crate::token::{Token, TokenFlags, TokenKind, TokenValue}; +use crate::token::TokenValue; /// Token source for the parser that skips over any trivia tokens. #[derive(Debug)] diff --git a/crates/ruff_python_parser/tests/fixtures.rs b/crates/ruff_python_parser/tests/fixtures.rs index 2de49e6d68..8f9a2994db 100644 --- a/crates/ruff_python_parser/tests/fixtures.rs +++ b/crates/ruff_python_parser/tests/fixtures.rs @@ -5,13 +5,14 @@ use std::fs; use std::path::Path; use ruff_annotate_snippets::{Level, Renderer, Snippet}; +use ruff_python_ast::token::Token; use ruff_python_ast::visitor::Visitor; use ruff_python_ast::visitor::source_order::{SourceOrderVisitor, TraversalSignal, walk_module}; use ruff_python_ast::{self as ast, AnyNodeRef, Mod, PythonVersion}; use ruff_python_parser::semantic_errors::{ SemanticSyntaxChecker, SemanticSyntaxContext, SemanticSyntaxError, }; -use ruff_python_parser::{Mode, ParseErrorType, ParseOptions, Token, parse_unchecked}; +use ruff_python_parser::{Mode, ParseErrorType, ParseOptions, parse_unchecked}; use ruff_source_file::{LineIndex, OneIndexed, SourceCode}; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; diff --git a/crates/ty_ide/Cargo.toml b/crates/ty_ide/Cargo.toml index 4be5d49fe2..a7665b32ca 100644 --- a/crates/ty_ide/Cargo.toml +++ b/crates/ty_ide/Cargo.toml @@ -19,7 +19,6 @@ ruff_memory_usage = { workspace = true } ruff_python_ast = { workspace = true } ruff_python_codegen = { workspace = true } ruff_python_importer = { workspace = true } -ruff_python_parser = { workspace = true } ruff_python_trivia = { workspace = true } ruff_source_file = { workspace = true } ruff_text_size = { workspace = true } @@ -37,6 +36,8 @@ smallvec = { workspace = true } tracing = { workspace = true } [dev-dependencies] +ruff_python_parser = { workspace = true } + camino = { workspace = true } insta = { workspace = true, features = ["filters"] } diff --git a/crates/ty_ide/src/completion.rs b/crates/ty_ide/src/completion.rs index 811c2f6aef..ae8e75cb9d 100644 --- a/crates/ty_ide/src/completion.rs +++ b/crates/ty_ide/src/completion.rs @@ -5,9 +5,9 @@ use ruff_db::parsed::{ParsedModuleRef, parsed_module}; use ruff_db::source::source_text; use ruff_diagnostics::Edit; use ruff_python_ast::name::Name; +use ruff_python_ast::token::{Token, TokenAt, TokenKind, Tokens}; use ruff_python_ast::{self as ast, AnyNodeRef}; use ruff_python_codegen::Stylist; -use ruff_python_parser::{Token, TokenAt, TokenKind, Tokens}; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; use ty_python_semantic::types::UnionType; use ty_python_semantic::{ @@ -1557,7 +1557,8 @@ fn compare_suggestions(c1: &Completion, c2: &Completion) -> Ordering { #[cfg(test)] mod tests { use insta::assert_snapshot; - use ruff_python_parser::{Mode, ParseOptions, TokenKind, Tokens}; + use ruff_python_ast::token::{TokenKind, Tokens}; + use ruff_python_parser::{Mode, ParseOptions}; use ty_python_semantic::ModuleName; use crate::completion::{Completion, completion}; diff --git a/crates/ty_ide/src/goto.rs b/crates/ty_ide/src/goto.rs index 17df9f11d9..359438bca3 100644 --- a/crates/ty_ide/src/goto.rs +++ b/crates/ty_ide/src/goto.rs @@ -8,8 +8,8 @@ use std::borrow::Cow; use crate::find_node::covering_node; use crate::stub_mapping::StubMapper; use ruff_db::parsed::ParsedModuleRef; +use ruff_python_ast::token::{TokenKind, Tokens}; use ruff_python_ast::{self as ast, AnyNodeRef}; -use ruff_python_parser::{TokenKind, Tokens}; use ruff_text_size::{Ranged, TextRange, TextSize}; use ty_python_semantic::ResolvedDefinition; diff --git a/crates/ty_ide/src/importer.rs b/crates/ty_ide/src/importer.rs index 94b2457e74..bf75157147 100644 --- a/crates/ty_ide/src/importer.rs +++ b/crates/ty_ide/src/importer.rs @@ -24,10 +24,10 @@ use ruff_db::source::source_text; use ruff_diagnostics::Edit; use ruff_python_ast as ast; use ruff_python_ast::name::Name; +use ruff_python_ast::token::Tokens; use ruff_python_ast::visitor::source_order::{SourceOrderVisitor, TraversalSignal, walk_stmt}; use ruff_python_codegen::Stylist; use ruff_python_importer::Insertion; -use ruff_python_parser::{Parsed, Tokens}; use ruff_text_size::{Ranged, TextRange, TextSize}; use ty_project::Db; use ty_python_semantic::semantic_index::definition::DefinitionKind; @@ -76,7 +76,7 @@ impl<'a> Importer<'a> { source: &'a str, parsed: &'a ParsedModuleRef, ) -> Self { - let imports = TopLevelImports::find(parsed); + let imports = TopLevelImports::find(parsed.syntax()); Self { db, @@ -749,9 +749,9 @@ struct TopLevelImports<'ast> { impl<'ast> TopLevelImports<'ast> { /// Find all top-level imports from the given AST of a Python module. - fn find(parsed: &'ast Parsed) -> Vec> { + fn find(module: &'ast ast::ModModule) -> Vec> { let mut visitor = TopLevelImports::default(); - visitor.visit_body(parsed.suite()); + visitor.visit_body(&module.body); visitor.imports } } diff --git a/crates/ty_ide/src/references.rs b/crates/ty_ide/src/references.rs index 27f1a3f2cb..d759b1daed 100644 --- a/crates/ty_ide/src/references.rs +++ b/crates/ty_ide/src/references.rs @@ -14,11 +14,11 @@ use crate::find_node::CoveringNode; use crate::goto::GotoTarget; use crate::{Db, NavigationTargets, ReferenceKind, ReferenceTarget}; use ruff_db::files::File; +use ruff_python_ast::token::Tokens; use ruff_python_ast::{ self as ast, AnyNodeRef, visitor::source_order::{SourceOrderVisitor, TraversalSignal}, }; -use ruff_python_parser::Tokens; use ruff_text_size::{Ranged, TextRange}; use ty_python_semantic::{ImportAliasResolution, SemanticModel}; diff --git a/crates/ty_ide/src/signature_help.rs b/crates/ty_ide/src/signature_help.rs index 1f7041eaab..d79f298dd6 100644 --- a/crates/ty_ide/src/signature_help.rs +++ b/crates/ty_ide/src/signature_help.rs @@ -11,8 +11,8 @@ use crate::goto::Definitions; use crate::{Db, find_node::covering_node}; use ruff_db::files::File; use ruff_db::parsed::parsed_module; +use ruff_python_ast::token::TokenKind; use ruff_python_ast::{self as ast, AnyNodeRef}; -use ruff_python_parser::TokenKind; use ruff_text_size::{Ranged, TextRange, TextSize}; use ty_python_semantic::ResolvedDefinition; use ty_python_semantic::SemanticModel; @@ -381,7 +381,7 @@ mod tests { f = func_a else: f = func_b - + f( "#, ); @@ -426,10 +426,10 @@ mod tests { @overload def process(value: int) -> str: ... - + @overload def process(value: str) -> int: ... - + def process(value): if isinstance(value, int): return str(value) @@ -826,10 +826,10 @@ def ab(a: int, *, c: int): r#" class Point: """A simple point class representing a 2D coordinate.""" - + def __init__(self, x: int, y: int): """Initialize a point with x and y coordinates. - + Args: x: The x-coordinate y: The y-coordinate @@ -961,12 +961,12 @@ def ab(a: int, *, c: int): r#" from typing import overload - @overload + @overload def process(value: int) -> str: ... - + @overload def process(value: str, flag: bool) -> int: ... - + def process(value, flag=None): if isinstance(value, int): return str(value) diff --git a/crates/ty_python_semantic/src/suppression.rs b/crates/ty_python_semantic/src/suppression.rs index c0524728a2..fd8df281e0 100644 --- a/crates/ty_python_semantic/src/suppression.rs +++ b/crates/ty_python_semantic/src/suppression.rs @@ -15,7 +15,7 @@ use ruff_db::diagnostic::{ }; use ruff_db::{files::File, parsed::parsed_module, source::source_text}; use ruff_diagnostics::{Edit, Fix}; -use ruff_python_parser::TokenKind; +use ruff_python_ast::token::TokenKind; use ruff_python_trivia::Cursor; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};