perf: Cursor based lexer (#38)

This commit is contained in:
Micha Reiser 2023-07-26 07:50:45 +02:00 committed by GitHub
parent 13196fc500
commit 593b46be5e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
30 changed files with 2502 additions and 1804 deletions

View File

@ -37,10 +37,8 @@ jobs:
- uses: Swatinem/rust-cache@v2
- name: run tests with num-bigint
run: cargo test --all --no-default-features --features num-bigint
- name: run tests with malachite-bigint and all features
run: cargo test --all --features malachite-bigint,full-lexer,serde
- name: run tests
run: cargo test --all --all-features
lint:
name: Check Rust code with rustfmt and clippy
@ -53,9 +51,7 @@ jobs:
- name: run rustfmt
run: cargo fmt --all -- --check
- name: run clippy
run: cargo clippy --all --no-default-features --features num-bigint
- name: run clippy
run: cargo clippy --all --features malachite-bigint,full-lexer,serde -- -Dwarnings
run: cargo clippy --all --all-features -- -Dwarnings
- uses: actions/setup-python@v4
with:

View File

@ -21,23 +21,17 @@ rustpython-literal = { path = "literal" }
rustpython-format = { path = "format" }
rustpython-parser = { path = "parser", default-features = false }
ahash = "0.7.6"
anyhow = "1.0.45"
cfg-if = "1.0"
insta = "1.14.0"
itertools = "0.10.3"
is-macro = "0.2.2"
log = "0.4.16"
num-complex = "0.4.0"
num-bigint = "0.4.3"
num-traits = "0.2"
pyo3 = { version = "0.19.0" }
malachite-bigint = { version = "0.1.0" }
memchr = "2.5.0"
rand = "0.8.5"
serde = "1.0"
static_assertions = "1.1"
once_cell = "1.17.1"
unicode_names2 = { version = "0.6.0", git = "https://github.com/youknowone/unicode_names2.git", rev = "4ce16aa85cbcdd9cc830410f1a72ef9a235f2fde" }
[profile.dev.package."*"]

View File

@ -7,14 +7,10 @@ edition = "2021"
repository = "https://github.com/RustPython/Parser/"
license = "MIT"
[features]
default = ["malachite-bigint"]
[dependencies]
rustpython-parser-core = { workspace = true }
rustpython-literal = { workspace = true, optional = true }
is-macro = { workspace = true }
num-bigint = { workspace = true, optional = true }
malachite-bigint = { workspace = true, optional = true }
num-bigint = { workspace = true }
static_assertions = "1.1.0"

View File

@ -2,8 +2,8 @@
use rustpython_parser_core::text_size::TextRange;
use crate::bigint::BigInt;
use crate::Ranged;
use num_bigint::BigInt;
pub type String = std::string::String;

View File

@ -1,6 +1,6 @@
#![allow(clippy::derive_partial_eq_without_eq)]
use crate::text_size::TextRange;
pub use crate::{builtin::*, text_size::TextSize, ConversionFlag, Node};
use crate::text_size::{TextRange, TextSize};
pub(crate) use crate::{builtin::*, ConversionFlag, Node};
use std::fmt::{self, Debug};
// This file was originally generated from asdl by a python script, but we now edit it manually

View File

@ -20,11 +20,6 @@ mod generic;
mod impls;
mod ranged;
#[cfg(feature = "malachite-bigint")]
pub use malachite_bigint as bigint;
#[cfg(all(feature = "num-bigint", not(feature = "malachite-bigint")))]
pub use num_bigint as bigint;
pub use builtin::*;
pub use generic::*;
pub use ranged::Ranged;

View File

@ -2,8 +2,6 @@
use crate::text_size::{TextRange, TextSize};
pub use crate::builtin::*;
pub trait Ranged {
fn range(&self) -> TextRange;

View File

@ -13,7 +13,6 @@ ruff_text_size = { path = "../ruff_text_size" }
serde = { version = "1.0.133", optional = true, default-features = false, features = ["derive"] }
is-macro.workspace = true
memchr.workspace = true
[features]
default = []

View File

@ -13,8 +13,6 @@ rustpython-literal = { workspace = true }
bitflags = "2.3.1"
itertools = "0.10.5"
num-traits = { workspace = true }
num-bigint = { workspace = true, optional = true }
malachite-bigint = { workspace = true, optional = true }
num-bigint = { workspace = true }
[features]
default = ["malachite-bigint"]

View File

@ -9,7 +9,7 @@ use std::{
str::FromStr,
};
use crate::bigint::{BigInt, Sign};
use num_bigint::{BigInt, Sign};
#[derive(Debug, PartialEq)]
pub enum CFormatErrorType {

View File

@ -6,7 +6,7 @@ use rustpython_literal::format::Case;
use std::ops::Deref;
use std::{cmp, str::FromStr};
use crate::bigint::{BigInt, Sign};
use num_bigint::{BigInt, Sign};
trait FormatParse {
fn parse(text: &str) -> (Option<Self>, &str)

View File

@ -1,8 +1,3 @@
#[cfg(feature = "malachite-bigint")]
pub use malachite_bigint as bigint;
#[cfg(all(feature = "num-bigint", not(feature = "malachite-bigint")))]
pub use num_bigint as bigint;
pub use crate::format::*;
pub mod cformat;

View File

@ -385,7 +385,7 @@ impl<'a> Escape for AsciiEscape<'a> {
fn layout(&self) -> &EscapeLayout {
&self.layout
}
#[allow(unsafe_code)]
fn write_source(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result {
formatter.write_str(unsafe {
// SAFETY: this function must be called only when source is printable ascii characters

View File

@ -9,16 +9,11 @@ license = "MIT"
edition = "2021"
[features]
default = ["malachite-bigint"]
serde = ["dep:serde", "rustpython-parser-core/serde"]
full-lexer = []
malachite-bigint = ["dep:malachite-bigint", "rustpython-ast/malachite-bigint"]
num-bigint = ["dep:num-bigint", "rustpython-ast/num-bigint"]
[build-dependencies]
anyhow = { workspace = true }
lalrpop = { version = "0.20.0", default-features = false, optional = true }
phf_codegen = "0.11.1"
tiny-keccak = { version = "2", features = ["sha3"] }
[dependencies]
@ -27,18 +22,16 @@ rustpython-parser-core = { workspace = true }
itertools = { workspace = true }
is-macro = { workspace = true }
log = { workspace = true }
malachite-bigint = { workspace = true, optional = true }
num-bigint = { workspace = true, optional = true }
num-bigint = { workspace = true }
num-traits = { workspace = true }
unicode_names2 = { workspace = true }
unic-emoji-char = "0.9.0"
unic-ucd-ident = "0.9.0"
lalrpop-util = { version = "0.20.0", default-features = false }
phf = "0.11.1"
rustc-hash = "1.1.0"
serde = { version = "1.0.133", optional = true, default-features = false, features = ["derive"] }
static_assertions = "1.1.0"
[dev-dependencies]
insta = { workspace = true }

View File

@ -1,13 +1,10 @@
use std::fmt::Write as _;
use std::fs::File;
use std::io::{BufRead, BufReader, BufWriter, Write};
use std::io::{BufRead, BufReader};
use std::path::{Path, PathBuf};
use tiny_keccak::{Hasher, Sha3};
fn main() -> anyhow::Result<()> {
let out_dir = PathBuf::from(std::env::var_os("OUT_DIR").unwrap());
gen_phf(&out_dir);
const SOURCE: &str = "src/python.lalrpop";
println!("cargo:rerun-if-changed={SOURCE}");
@ -16,6 +13,7 @@ fn main() -> anyhow::Result<()> {
#[cfg(feature = "lalrpop")]
{
let out_dir = PathBuf::from(std::env::var_os("OUT_DIR").unwrap());
target = out_dir.join("src/python.rs");
}
#[cfg(not(feature = "lalrpop"))]
@ -113,55 +111,3 @@ fn sha_equal(expected_sha3_str: &str, actual_sha3: &[u8; 32]) -> bool {
}
*actual_sha3 == expected_sha3
}
fn gen_phf(out_dir: &Path) {
let mut kwds = phf_codegen::Map::new();
let kwds = kwds
// Alphabetical keywords:
.entry("...", "Tok::Ellipsis")
.entry("False", "Tok::False")
.entry("None", "Tok::None")
.entry("True", "Tok::True")
// more so "standard" keywords
.entry("and", "Tok::And")
.entry("as", "Tok::As")
.entry("assert", "Tok::Assert")
.entry("async", "Tok::Async")
.entry("await", "Tok::Await")
.entry("break", "Tok::Break")
.entry("case", "Tok::Case")
.entry("class", "Tok::Class")
.entry("continue", "Tok::Continue")
.entry("def", "Tok::Def")
.entry("del", "Tok::Del")
.entry("elif", "Tok::Elif")
.entry("else", "Tok::Else")
.entry("except", "Tok::Except")
.entry("finally", "Tok::Finally")
.entry("for", "Tok::For")
.entry("from", "Tok::From")
.entry("global", "Tok::Global")
.entry("if", "Tok::If")
.entry("import", "Tok::Import")
.entry("in", "Tok::In")
.entry("is", "Tok::Is")
.entry("lambda", "Tok::Lambda")
.entry("match", "Tok::Match")
.entry("nonlocal", "Tok::Nonlocal")
.entry("not", "Tok::Not")
.entry("or", "Tok::Or")
.entry("pass", "Tok::Pass")
.entry("raise", "Tok::Raise")
.entry("return", "Tok::Return")
.entry("try", "Tok::Try")
.entry("type", "Tok::Type")
.entry("while", "Tok::While")
.entry("with", "Tok::With")
.entry("yield", "Tok::Yield")
.build();
writeln!(
BufWriter::new(File::create(out_dir.join("keywords.rs")).unwrap()),
"{kwds}",
)
.unwrap();
}

View File

@ -10,8 +10,8 @@ use rustc_hash::FxHashSet;
use rustpython_ast::Ranged;
pub(crate) struct ArgumentList {
pub args: Vec<ast::Expr>,
pub keywords: Vec<ast::Keyword>,
pub(crate) args: Vec<ast::Expr>,
pub(crate) keywords: Vec<ast::Keyword>,
}
// Perform validation of function/lambda arguments in a function definition.

View File

@ -1,12 +1,8 @@
// This file was originally generated from asdl by a python script, but we now edit it manually
impl Parse for ast::StmtFunctionDef {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Stmt::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -24,12 +20,7 @@ impl Parse for ast::StmtFunctionDef {
}
impl Parse for ast::StmtAsyncFunctionDef {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Stmt::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -47,12 +38,7 @@ impl Parse for ast::StmtAsyncFunctionDef {
}
impl Parse for ast::StmtClassDef {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Stmt::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -70,12 +56,7 @@ impl Parse for ast::StmtClassDef {
}
impl Parse for ast::StmtReturn {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Stmt::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -93,12 +74,7 @@ impl Parse for ast::StmtReturn {
}
impl Parse for ast::StmtDelete {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Stmt::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -116,12 +92,7 @@ impl Parse for ast::StmtDelete {
}
impl Parse for ast::StmtAssign {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Stmt::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -139,12 +110,7 @@ impl Parse for ast::StmtAssign {
}
impl Parse for ast::StmtTypeAlias {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Stmt::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -162,12 +128,7 @@ impl Parse for ast::StmtTypeAlias {
}
impl Parse for ast::StmtAugAssign {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Stmt::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -185,12 +146,7 @@ impl Parse for ast::StmtAugAssign {
}
impl Parse for ast::StmtAnnAssign {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Stmt::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -208,12 +164,7 @@ impl Parse for ast::StmtAnnAssign {
}
impl Parse for ast::StmtFor {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Stmt::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -231,12 +182,7 @@ impl Parse for ast::StmtFor {
}
impl Parse for ast::StmtAsyncFor {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Stmt::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -254,12 +200,7 @@ impl Parse for ast::StmtAsyncFor {
}
impl Parse for ast::StmtWhile {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Stmt::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -277,12 +218,7 @@ impl Parse for ast::StmtWhile {
}
impl Parse for ast::StmtIf {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Stmt::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -300,12 +236,7 @@ impl Parse for ast::StmtIf {
}
impl Parse for ast::StmtWith {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Stmt::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -323,12 +254,7 @@ impl Parse for ast::StmtWith {
}
impl Parse for ast::StmtAsyncWith {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Stmt::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -346,12 +272,7 @@ impl Parse for ast::StmtAsyncWith {
}
impl Parse for ast::StmtMatch {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Stmt::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -369,12 +290,7 @@ impl Parse for ast::StmtMatch {
}
impl Parse for ast::StmtRaise {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Stmt::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -392,12 +308,7 @@ impl Parse for ast::StmtRaise {
}
impl Parse for ast::StmtTry {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Stmt::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -415,12 +326,7 @@ impl Parse for ast::StmtTry {
}
impl Parse for ast::StmtTryStar {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Stmt::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -438,12 +344,7 @@ impl Parse for ast::StmtTryStar {
}
impl Parse for ast::StmtAssert {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Stmt::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -461,12 +362,7 @@ impl Parse for ast::StmtAssert {
}
impl Parse for ast::StmtImport {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Stmt::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -484,12 +380,7 @@ impl Parse for ast::StmtImport {
}
impl Parse for ast::StmtImportFrom {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Stmt::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -507,12 +398,7 @@ impl Parse for ast::StmtImportFrom {
}
impl Parse for ast::StmtGlobal {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Stmt::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -530,12 +416,7 @@ impl Parse for ast::StmtGlobal {
}
impl Parse for ast::StmtNonlocal {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Stmt::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -553,12 +434,7 @@ impl Parse for ast::StmtNonlocal {
}
impl Parse for ast::StmtExpr {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Stmt::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -576,12 +452,7 @@ impl Parse for ast::StmtExpr {
}
impl Parse for ast::StmtPass {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Stmt::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -599,12 +470,7 @@ impl Parse for ast::StmtPass {
}
impl Parse for ast::StmtBreak {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Stmt::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -622,12 +488,7 @@ impl Parse for ast::StmtBreak {
}
impl Parse for ast::StmtContinue {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Stmt::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -645,12 +506,7 @@ impl Parse for ast::StmtContinue {
}
impl Parse for ast::ExprBoolOp {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Expr::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -668,12 +524,7 @@ impl Parse for ast::ExprBoolOp {
}
impl Parse for ast::ExprNamedExpr {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Expr::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -691,12 +542,7 @@ impl Parse for ast::ExprNamedExpr {
}
impl Parse for ast::ExprBinOp {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Expr::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -714,12 +560,7 @@ impl Parse for ast::ExprBinOp {
}
impl Parse for ast::ExprUnaryOp {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Expr::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -737,12 +578,7 @@ impl Parse for ast::ExprUnaryOp {
}
impl Parse for ast::ExprLambda {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Expr::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -760,12 +596,7 @@ impl Parse for ast::ExprLambda {
}
impl Parse for ast::ExprIfExp {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Expr::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -783,12 +614,7 @@ impl Parse for ast::ExprIfExp {
}
impl Parse for ast::ExprDict {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Expr::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -806,12 +632,7 @@ impl Parse for ast::ExprDict {
}
impl Parse for ast::ExprSet {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Expr::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -829,12 +650,7 @@ impl Parse for ast::ExprSet {
}
impl Parse for ast::ExprListComp {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Expr::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -852,12 +668,7 @@ impl Parse for ast::ExprListComp {
}
impl Parse for ast::ExprSetComp {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Expr::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -875,12 +686,7 @@ impl Parse for ast::ExprSetComp {
}
impl Parse for ast::ExprDictComp {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Expr::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -898,12 +704,7 @@ impl Parse for ast::ExprDictComp {
}
impl Parse for ast::ExprGeneratorExp {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Expr::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -921,12 +722,7 @@ impl Parse for ast::ExprGeneratorExp {
}
impl Parse for ast::ExprAwait {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Expr::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -944,12 +740,7 @@ impl Parse for ast::ExprAwait {
}
impl Parse for ast::ExprYield {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Expr::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -967,12 +758,7 @@ impl Parse for ast::ExprYield {
}
impl Parse for ast::ExprYieldFrom {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Expr::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -990,12 +776,7 @@ impl Parse for ast::ExprYieldFrom {
}
impl Parse for ast::ExprCompare {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Expr::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -1013,12 +794,7 @@ impl Parse for ast::ExprCompare {
}
impl Parse for ast::ExprCall {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Expr::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -1036,12 +812,7 @@ impl Parse for ast::ExprCall {
}
impl Parse for ast::ExprFormattedValue {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Expr::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -1059,12 +830,7 @@ impl Parse for ast::ExprFormattedValue {
}
impl Parse for ast::ExprJoinedStr {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Expr::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -1082,12 +848,7 @@ impl Parse for ast::ExprJoinedStr {
}
impl Parse for ast::ExprConstant {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Expr::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -1105,12 +866,7 @@ impl Parse for ast::ExprConstant {
}
impl Parse for ast::ExprAttribute {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Expr::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -1128,12 +884,7 @@ impl Parse for ast::ExprAttribute {
}
impl Parse for ast::ExprSubscript {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Expr::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -1151,12 +902,7 @@ impl Parse for ast::ExprSubscript {
}
impl Parse for ast::ExprStarred {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Expr::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -1174,12 +920,7 @@ impl Parse for ast::ExprStarred {
}
impl Parse for ast::ExprName {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Expr::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -1197,12 +938,7 @@ impl Parse for ast::ExprName {
}
impl Parse for ast::ExprList {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Expr::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -1220,12 +956,7 @@ impl Parse for ast::ExprList {
}
impl Parse for ast::ExprTuple {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Expr::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -1243,12 +974,7 @@ impl Parse for ast::ExprTuple {
}
impl Parse for ast::ExprSlice {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Expr::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,

File diff suppressed because it is too large Load Diff

107
parser/src/lexer/cursor.rs Normal file
View File

@ -0,0 +1,107 @@
use crate::text_size::{TextLen, TextSize};
use std::str::Chars;
pub(crate) const EOF_CHAR: char = '\0';
#[derive(Clone, Debug)]
pub(super) struct Cursor<'a> {
chars: Chars<'a>,
source_length: TextSize,
#[cfg(debug_assertions)]
prev_char: char,
}
impl<'a> Cursor<'a> {
pub(crate) fn new(source: &'a str) -> Self {
Self {
source_length: source.text_len(),
chars: source.chars(),
#[cfg(debug_assertions)]
prev_char: EOF_CHAR,
}
}
/// Returns the previous token. Useful for debug assertions.
#[cfg(debug_assertions)]
pub(super) const fn previous(&self) -> char {
self.prev_char
}
/// Peeks the next character from the input stream without consuming it.
/// Returns [EOF_CHAR] if the file is at the end of the file.
pub(super) fn first(&self) -> char {
self.chars.clone().next().unwrap_or(EOF_CHAR)
}
/// Peeks the second character from the input stream without consuming it.
/// Returns [EOF_CHAR] if the position is past the end of the file.
pub(super) fn second(&self) -> char {
let mut chars = self.chars.clone();
chars.next();
chars.next().unwrap_or(EOF_CHAR)
}
/// Returns the remaining text to lex.
pub(super) fn rest(&self) -> &'a str {
self.chars.as_str()
}
// SAFETY: The `source.text_len` call in `new` would panic if the string length is larger than a `u32`.
#[allow(clippy::cast_possible_truncation)]
pub(super) fn text_len(&self) -> TextSize {
TextSize::new(self.chars.as_str().len() as u32)
}
pub(super) fn token_len(&self) -> TextSize {
self.source_length - self.text_len()
}
pub(super) fn start_token(&mut self) {
self.source_length = self.text_len()
}
pub(super) fn is_eof(&self) -> bool {
self.chars.as_str().is_empty()
}
/// Consumes the next character
pub(super) fn bump(&mut self) -> Option<char> {
let prev = self.chars.next()?;
#[cfg(debug_assertions)]
{
self.prev_char = prev;
}
Some(prev)
}
pub(super) fn eat_char(&mut self, c: char) -> bool {
if self.first() == c {
self.bump();
true
} else {
false
}
}
pub(super) fn eat_if<F>(&mut self, mut predicate: F) -> Option<char>
where
F: FnMut(char) -> bool,
{
if predicate(self.first()) && !self.is_eof() {
self.bump()
} else {
None
}
}
/// Eats symbols while predicate returns true or until the end of file is reached.
pub(super) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
// It was tried making optimized version of this for eg. line comments, but
// LLVM can inline all of this and compile it down to fast iteration over bytes.
while predicate(self.first()) && !self.is_eof() {
self.bump();
}
}
}

View File

@ -0,0 +1,129 @@
use static_assertions::assert_eq_size;
use std::cmp::Ordering;
use std::fmt::Debug;
/// The column index of an indentation.
///
/// A space increments the column by one. A tab adds up to 2 (if tab size is 2) indices, but just one
/// if the column isn't even.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Default)]
pub(super) struct Column(u32);
impl Column {
pub(super) const fn new(column: u32) -> Self {
Self(column)
}
}
/// The number of characters in an indentation. Each character accounts for 1.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Default)]
pub(super) struct Character(u32);
impl Character {
pub(super) const fn new(characters: u32) -> Self {
Self(characters)
}
}
/// The [Indentation](https://docs.python.org/3/reference/lexical_analysis.html#indentation) of a logical line.
#[derive(Copy, Clone, Debug, Eq, PartialEq, Default)]
pub(super) struct Indentation {
column: Column,
character: Character,
}
impl Indentation {
const TAB_SIZE: u32 = 2;
pub(super) const fn root() -> Self {
Self {
column: Column::new(0),
character: Character::new(0),
}
}
#[cfg(test)]
pub(super) const fn new(column: Column, character: Character) -> Self {
Self { character, column }
}
#[must_use]
pub(super) fn add_space(self) -> Self {
Self {
character: Character(self.character.0 + 1),
column: Column(self.column.0 + 1),
}
}
#[must_use]
pub(super) fn add_tab(self) -> Self {
Self {
character: Character(self.character.0 + 1),
// Compute the column index:
// * Adds `TAB_SIZE` if `column` is a multiple of `TAB_SIZE`
// * Rounds `column` up to the next multiple of `TAB_SIZE` otherwise.
// https://github.com/python/cpython/blob/2cf99026d6320f38937257da1ab014fc873a11a6/Parser/tokenizer.c#L1818
column: Column((self.column.0 / Self::TAB_SIZE + 1) * Self::TAB_SIZE),
}
}
pub(super) fn try_compare(
&self,
other: &Indentation,
) -> Result<Ordering, UnexpectedIndentation> {
let column_ordering = self.column.cmp(&other.column);
let character_ordering = self.character.cmp(&other.character);
if column_ordering == character_ordering {
Ok(column_ordering)
} else {
Err(UnexpectedIndentation)
}
}
}
#[derive(Debug, Copy, Clone, PartialEq)]
pub(super) struct UnexpectedIndentation;
// The indentations stack is used to keep track of the current indentation level
// [See Indentation](docs.python.org/3/reference/lexical_analysis.html#indentation).
#[derive(Debug, Clone, Default)]
pub(super) struct Indentations {
stack: Vec<Indentation>,
}
impl Indentations {
pub(super) fn push(&mut self, indent: Indentation) {
debug_assert_eq!(self.current().try_compare(&indent), Ok(Ordering::Less));
self.stack.push(indent);
}
pub(super) fn pop(&mut self) -> Option<Indentation> {
self.stack.pop()
}
pub(super) fn current(&self) -> &Indentation {
static ROOT: Indentation = Indentation::root();
self.stack.last().unwrap_or(&ROOT)
}
}
assert_eq_size!(Indentation, u64);
#[cfg(test)]
mod tests {
use super::{Character, Column, Indentation};
use std::cmp::Ordering;
#[test]
fn indentation_try_compare() {
let tab = Indentation::new(Column::new(8), Character::new(1));
assert_eq!(tab.try_compare(&tab), Ok(Ordering::Equal));
let two_tabs = Indentation::new(Column::new(16), Character::new(2));
assert_eq!(two_tabs.try_compare(&tab), Ok(Ordering::Greater));
assert_eq!(tab.try_compare(&two_tabs), Ok(Ordering::Less));
}
}

View File

@ -12,6 +12,12 @@
//! [Abstract Syntax Tree]: https://en.wikipedia.org/wiki/Abstract_syntax_tree
//! [`Mode`]: crate::mode
use std::iter;
use itertools::Itertools;
pub(super) use lalrpop_util::ParseError as LalrpopError;
use crate::lexer::{lex, lex_starts_at};
use crate::{
ast::{self, Ranged},
lexer::{self, LexResult, LexicalError, LexicalErrorType},
@ -20,11 +26,6 @@ use crate::{
token::Tok,
Mode,
};
use itertools::Itertools;
use std::iter;
use crate::{lexer::Lexer, soft_keywords::SoftKeywordTransformer};
pub(super) use lalrpop_util::ParseError as LalrpopError;
/// Parse Python code string to implementor's type.
///
@ -56,27 +57,28 @@ pub trait Parse
where
Self: Sized,
{
const MODE: Mode;
fn parse(source: &str, source_path: &str) -> Result<Self, ParseError> {
Self::parse_starts_at(source, source_path, TextSize::default())
let tokens = lex(source, Self::MODE);
Self::parse_tokens(tokens, source_path)
}
fn parse_without_path(source: &str) -> Result<Self, ParseError> {
Self::parse(source, "<unknown>")
}
fn parse_starts_at(
source: &str,
source_path: &str,
offset: TextSize,
) -> Result<Self, ParseError> {
let lxr = Self::lex_starts_at(source, offset);
#[cfg(feature = "full-lexer")]
let lxr =
lxr.filter_ok(|(tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
Self::parse_tokens(lxr, source_path)
let tokens = lex_starts_at(source, Self::MODE, offset);
Self::parse_tokens(tokens, source_path)
}
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>>;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -84,17 +86,13 @@ where
}
impl Parse for ast::ModModule {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
lexer::lex_starts_at(source, Mode::Module, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
) -> Result<Self, ParseError> {
match parse_filtered_tokens(lxr, Mode::Module, source_path)? {
match parse_tokens(lxr, Mode::Module, source_path)? {
ast::Mod::Module(m) => Ok(m),
_ => unreachable!("Mode::Module doesn't return other variant"),
}
@ -102,17 +100,13 @@ impl Parse for ast::ModModule {
}
impl Parse for ast::ModExpression {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
lexer::lex_starts_at(source, Mode::Expression, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
) -> Result<Self, ParseError> {
match parse_filtered_tokens(lxr, Mode::Expression, source_path)? {
match parse_tokens(lxr, Mode::Expression, source_path)? {
ast::Mod::Expression(m) => Ok(m),
_ => unreachable!("Mode::Module doesn't return other variant"),
}
@ -120,17 +114,12 @@ impl Parse for ast::ModExpression {
}
impl Parse for ast::ModInteractive {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
lexer::lex_starts_at(source, Mode::Interactive, offset)
}
const MODE: Mode = Mode::Interactive;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
) -> Result<Self, ParseError> {
match parse_filtered_tokens(lxr, Mode::Interactive, source_path)? {
match parse_tokens(lxr, Mode::Interactive, source_path)? {
ast::Mod::Interactive(m) => Ok(m),
_ => unreachable!("Mode::Module doesn't return other variant"),
}
@ -138,12 +127,8 @@ impl Parse for ast::ModInteractive {
}
impl Parse for ast::Suite {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::ModModule::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -153,12 +138,8 @@ impl Parse for ast::Suite {
}
impl Parse for ast::Stmt {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::ModModule::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Module;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -186,12 +167,8 @@ impl Parse for ast::Stmt {
}
impl Parse for ast::Expr {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::ModExpression::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -201,12 +178,8 @@ impl Parse for ast::Expr {
}
impl Parse for ast::Identifier {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Expr::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -227,12 +200,8 @@ impl Parse for ast::Identifier {
}
impl Parse for ast::Constant {
fn lex_starts_at(
source: &str,
offset: TextSize,
) -> SoftKeywordTransformer<Lexer<std::str::Chars>> {
ast::Expr::lex_starts_at(source, offset)
}
const MODE: Mode = Mode::Expression;
fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source_path: &str,
@ -426,10 +395,12 @@ pub fn parse_tokens(
source_path: &str,
) -> Result<ast::Mod, ParseError> {
let lxr = lxr.into_iter();
#[cfg(feature = "full-lexer")]
let lxr =
lxr.filter_ok(|(tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
parse_filtered_tokens(lxr, mode, source_path)
parse_filtered_tokens(
lxr.filter_ok(|(tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline)),
mode,
source_path,
)
}
fn parse_filtered_tokens(
@ -571,8 +542,10 @@ include!("gen/parse.rs");
#[cfg(test)]
mod tests {
use super::*;
use crate::{ast, Parse};
use insta::assert_debug_snapshot;
use super::*;
#[test]
fn test_parse_empty() {
@ -656,7 +629,6 @@ class Foo(A, B):
}
#[test]
#[cfg(feature = "all-nodes-with-ranges")]
fn test_parse_class_generic_types() {
let source = "\
# TypeVar
@ -687,7 +659,6 @@ class Foo[X, Y: str, *U, **P]():
insta::assert_debug_snapshot!(ast::Suite::parse(source, "<test>").unwrap());
}
#[test]
#[cfg(feature = "all-nodes-with-ranges")]
fn test_parse_function_definition() {
let source = "\
def func(a):
@ -985,6 +956,57 @@ x = type = 1
insta::assert_debug_snapshot!(ast::Suite::parse(source, "<test>").unwrap());
}
#[test]
fn numeric_literals() {
let source = r#"x = 123456789
x = 123456
x = .1
x = 1.
x = 1E+1
x = 1E-1
x = 1.000_000_01
x = 123456789.123456789
x = 123456789.123456789E123456789
x = 123456789E123456789
x = 123456789J
x = 123456789.123456789J
x = 0XB1ACC
x = 0B1011
x = 0O777
x = 0.000000006
x = 10000
x = 133333
"#;
insta::assert_debug_snapshot!(ast::Suite::parse(source, "<test>").unwrap());
}
#[test]
fn numeric_literals_attribute_access() {
let source = r#"x = .1.is_integer()
x = 1. .imag
x = 1E+1.imag
x = 1E-1.real
x = 123456789.123456789.hex()
x = 123456789.123456789E123456789 .real
x = 123456789E123456789 .conjugate()
x = 123456789J.real
x = 123456789.123456789J.__add__(0b1011.bit_length())
x = 0XB1ACC.conjugate()
x = 0B1011 .conjugate()
x = 0O777 .real
x = 0.000000006 .hex()
x = -100.0000J
if 10 .real:
...
y = 100[no]
y = 100(no)
"#;
assert_debug_snapshot!(ast::Suite::parse(source, "<test>").unwrap())
}
#[test]
fn test_match_as_identifier() {
let source = r#"\

View File

@ -3,8 +3,9 @@
// See also: file:///usr/share/doc/python/html/reference/compound_stmts.html#function-definitions
// See also: https://greentreesnakes.readthedocs.io/en/latest/nodes.html#keyword
use num_bigint::BigInt;
use crate::{
ast::{self as ast, Ranged, bigint::BigInt, MagicKind},
ast::{self as ast, Ranged, MagicKind},
Mode,
lexer::{LexicalError, LexicalErrorType},
function::{ArgumentList, parse_args, validate_pos_params, validate_arguments},
@ -19,7 +20,7 @@ grammar(mode: Mode);
// This is a hack to reduce the amount of lalrpop tables generated:
// For each public entry point, a full parse table is generated.
// By having only a single pub function, we reduce this to one.
pub Top: ast::Mod = {
pub(crate) Top: ast::Mod = {
<start:@L> StartModule <body:Program> <end:@R> => ast::ModModule { body, type_ignores: vec![], range: (start..end).into() }.into(),
<start:@L> StartInteractive <body:Program> <end:@R> => ast::ModInteractive { body, range: (start..end).into() }.into(),
<start:@L> StartExpression <body:TestList> ("\n")* <end:@R> => ast::ModExpression { body: Box::new(body), range: (start..end).into() }.into()

16
parser/src/python.rs generated
View File

@ -1,7 +1,8 @@
// auto-generated: "lalrpop 0.20.0"
// sha3: fa57e02e9e5bfceb811748310e8d17940d15b6c6e2d6191d9ae71b2e4dc435d8
// sha3: 263bb187f0a83dfe2a024fa0eed0ad8cb855da5991584b5040fa7d870fdb84af
use num_bigint::BigInt;
use crate::{
ast::{self as ast, Ranged, bigint::BigInt, MagicKind},
ast::{self as ast, Ranged, MagicKind},
Mode,
lexer::{LexicalError, LexicalErrorType},
function::{ArgumentList, parse_args, validate_pos_params, validate_arguments},
@ -21,8 +22,9 @@ extern crate alloc;
#[allow(non_snake_case, non_camel_case_types, unused_mut, unused_variables, unused_imports, unused_parens, clippy::all)]
mod __parse__Top {
use num_bigint::BigInt;
use crate::{
ast::{self as ast, Ranged, bigint::BigInt, MagicKind},
ast::{self as ast, Ranged, MagicKind},
Mode,
lexer::{LexicalError, LexicalErrorType},
function::{ArgumentList, parse_args, validate_pos_params, validate_arguments},
@ -11488,19 +11490,19 @@ mod __parse__Top {
_ => panic!("invalid reduction index {}", __reduce_index)
}
}
pub struct TopParser {
pub(crate) struct TopParser {
_priv: (),
}
impl TopParser {
pub fn new() -> TopParser {
pub(crate) fn new() -> TopParser {
TopParser {
_priv: (),
}
}
#[allow(dead_code)]
pub fn parse<
pub(crate) fn parse<
__TOKEN: __ToTriple<>,
__TOKENS: IntoIterator<Item=__TOKEN>,
>(
@ -30744,7 +30746,7 @@ mod __parse__Top {
(3, 276)
}
}
pub use self::__parse__Top::TopParser;
pub(crate) use self::__parse__Top::TopParser;
#[allow(unused_variables)]
#[allow(clippy::too_many_arguments)]

View File

@ -0,0 +1,440 @@
---
source: parser/src/parser.rs
expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
---
[
Assign(
StmtAssign {
range: 0..13,
targets: [
Name(
ExprName {
range: 0..1,
id: "x",
ctx: Store,
},
),
],
value: Constant(
ExprConstant {
range: 4..13,
value: Int(
123456789,
),
kind: None,
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 14..24,
targets: [
Name(
ExprName {
range: 14..15,
id: "x",
ctx: Store,
},
),
],
value: Constant(
ExprConstant {
range: 18..24,
value: Int(
123456,
),
kind: None,
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 25..31,
targets: [
Name(
ExprName {
range: 25..26,
id: "x",
ctx: Store,
},
),
],
value: Constant(
ExprConstant {
range: 29..31,
value: Float(
0.1,
),
kind: None,
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 32..38,
targets: [
Name(
ExprName {
range: 32..33,
id: "x",
ctx: Store,
},
),
],
value: Constant(
ExprConstant {
range: 36..38,
value: Float(
1.0,
),
kind: None,
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 39..47,
targets: [
Name(
ExprName {
range: 39..40,
id: "x",
ctx: Store,
},
),
],
value: Constant(
ExprConstant {
range: 43..47,
value: Float(
10.0,
),
kind: None,
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 48..56,
targets: [
Name(
ExprName {
range: 48..49,
id: "x",
ctx: Store,
},
),
],
value: Constant(
ExprConstant {
range: 52..56,
value: Float(
0.1,
),
kind: None,
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 57..73,
targets: [
Name(
ExprName {
range: 57..58,
id: "x",
ctx: Store,
},
),
],
value: Constant(
ExprConstant {
range: 61..73,
value: Float(
1.00000001,
),
kind: None,
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 74..97,
targets: [
Name(
ExprName {
range: 74..75,
id: "x",
ctx: Store,
},
),
],
value: Constant(
ExprConstant {
range: 78..97,
value: Float(
123456789.12345679,
),
kind: None,
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 98..131,
targets: [
Name(
ExprName {
range: 98..99,
id: "x",
ctx: Store,
},
),
],
value: Constant(
ExprConstant {
range: 102..131,
value: Float(
inf,
),
kind: None,
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 132..155,
targets: [
Name(
ExprName {
range: 132..133,
id: "x",
ctx: Store,
},
),
],
value: Constant(
ExprConstant {
range: 136..155,
value: Float(
inf,
),
kind: None,
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 156..170,
targets: [
Name(
ExprName {
range: 156..157,
id: "x",
ctx: Store,
},
),
],
value: Constant(
ExprConstant {
range: 160..170,
value: Complex {
real: 0.0,
imag: 123456789.0,
},
kind: None,
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 171..195,
targets: [
Name(
ExprName {
range: 171..172,
id: "x",
ctx: Store,
},
),
],
value: Constant(
ExprConstant {
range: 175..195,
value: Complex {
real: 0.0,
imag: 123456789.12345679,
},
kind: None,
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 196..207,
targets: [
Name(
ExprName {
range: 196..197,
id: "x",
ctx: Store,
},
),
],
value: Constant(
ExprConstant {
range: 200..207,
value: Int(
727756,
),
kind: None,
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 208..218,
targets: [
Name(
ExprName {
range: 208..209,
id: "x",
ctx: Store,
},
),
],
value: Constant(
ExprConstant {
range: 212..218,
value: Int(
11,
),
kind: None,
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 219..228,
targets: [
Name(
ExprName {
range: 219..220,
id: "x",
ctx: Store,
},
),
],
value: Constant(
ExprConstant {
range: 223..228,
value: Int(
511,
),
kind: None,
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 229..244,
targets: [
Name(
ExprName {
range: 229..230,
id: "x",
ctx: Store,
},
),
],
value: Constant(
ExprConstant {
range: 233..244,
value: Float(
6e-9,
),
kind: None,
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 245..254,
targets: [
Name(
ExprName {
range: 245..246,
id: "x",
ctx: Store,
},
),
],
value: Constant(
ExprConstant {
range: 249..254,
value: Int(
10000,
),
kind: None,
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 255..265,
targets: [
Name(
ExprName {
range: 255..256,
id: "x",
ctx: Store,
},
),
],
value: Constant(
ExprConstant {
range: 259..265,
value: Int(
133333,
),
kind: None,
},
),
type_comment: None,
},
),
]

View File

@ -0,0 +1,672 @@
---
source: parser/src/parser.rs
expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
---
[
Assign(
StmtAssign {
range: 0..19,
targets: [
Name(
ExprName {
range: 0..1,
id: "x",
ctx: Store,
},
),
],
value: Call(
ExprCall {
range: 4..19,
func: Attribute(
ExprAttribute {
range: 4..17,
value: Constant(
ExprConstant {
range: 4..6,
value: Float(
0.1,
),
kind: None,
},
),
attr: Identifier {
id: "is_integer",
range: 7..17,
},
ctx: Load,
},
),
args: [],
keywords: [],
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 20..32,
targets: [
Name(
ExprName {
range: 20..21,
id: "x",
ctx: Store,
},
),
],
value: Attribute(
ExprAttribute {
range: 24..32,
value: Constant(
ExprConstant {
range: 24..26,
value: Float(
1.0,
),
kind: None,
},
),
attr: Identifier {
id: "imag",
range: 28..32,
},
ctx: Load,
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 33..46,
targets: [
Name(
ExprName {
range: 33..34,
id: "x",
ctx: Store,
},
),
],
value: Attribute(
ExprAttribute {
range: 37..46,
value: Constant(
ExprConstant {
range: 37..41,
value: Float(
10.0,
),
kind: None,
},
),
attr: Identifier {
id: "imag",
range: 42..46,
},
ctx: Load,
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 47..60,
targets: [
Name(
ExprName {
range: 47..48,
id: "x",
ctx: Store,
},
),
],
value: Attribute(
ExprAttribute {
range: 51..60,
value: Constant(
ExprConstant {
range: 51..55,
value: Float(
0.1,
),
kind: None,
},
),
attr: Identifier {
id: "real",
range: 56..60,
},
ctx: Load,
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 61..90,
targets: [
Name(
ExprName {
range: 61..62,
id: "x",
ctx: Store,
},
),
],
value: Call(
ExprCall {
range: 65..90,
func: Attribute(
ExprAttribute {
range: 65..88,
value: Constant(
ExprConstant {
range: 65..84,
value: Float(
123456789.12345679,
),
kind: None,
},
),
attr: Identifier {
id: "hex",
range: 85..88,
},
ctx: Load,
},
),
args: [],
keywords: [],
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 91..130,
targets: [
Name(
ExprName {
range: 91..92,
id: "x",
ctx: Store,
},
),
],
value: Attribute(
ExprAttribute {
range: 95..130,
value: Constant(
ExprConstant {
range: 95..124,
value: Float(
inf,
),
kind: None,
},
),
attr: Identifier {
id: "real",
range: 126..130,
},
ctx: Load,
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 131..167,
targets: [
Name(
ExprName {
range: 131..132,
id: "x",
ctx: Store,
},
),
],
value: Call(
ExprCall {
range: 135..167,
func: Attribute(
ExprAttribute {
range: 135..165,
value: Constant(
ExprConstant {
range: 135..154,
value: Float(
inf,
),
kind: None,
},
),
attr: Identifier {
id: "conjugate",
range: 156..165,
},
ctx: Load,
},
),
args: [],
keywords: [],
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 168..187,
targets: [
Name(
ExprName {
range: 168..169,
id: "x",
ctx: Store,
},
),
],
value: Attribute(
ExprAttribute {
range: 172..187,
value: Constant(
ExprConstant {
range: 172..182,
value: Complex {
real: 0.0,
imag: 123456789.0,
},
kind: None,
},
),
attr: Identifier {
id: "real",
range: 183..187,
},
ctx: Load,
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 188..241,
targets: [
Name(
ExprName {
range: 188..189,
id: "x",
ctx: Store,
},
),
],
value: Call(
ExprCall {
range: 192..241,
func: Attribute(
ExprAttribute {
range: 192..220,
value: Constant(
ExprConstant {
range: 192..212,
value: Complex {
real: 0.0,
imag: 123456789.12345679,
},
kind: None,
},
),
attr: Identifier {
id: "__add__",
range: 213..220,
},
ctx: Load,
},
),
args: [
Call(
ExprCall {
range: 221..240,
func: Attribute(
ExprAttribute {
range: 221..238,
value: Constant(
ExprConstant {
range: 221..227,
value: Int(
11,
),
kind: None,
},
),
attr: Identifier {
id: "bit_length",
range: 228..238,
},
ctx: Load,
},
),
args: [],
keywords: [],
},
),
],
keywords: [],
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 242..265,
targets: [
Name(
ExprName {
range: 242..243,
id: "x",
ctx: Store,
},
),
],
value: Call(
ExprCall {
range: 246..265,
func: Attribute(
ExprAttribute {
range: 246..263,
value: Constant(
ExprConstant {
range: 246..253,
value: Int(
727756,
),
kind: None,
},
),
attr: Identifier {
id: "conjugate",
range: 254..263,
},
ctx: Load,
},
),
args: [],
keywords: [],
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 266..289,
targets: [
Name(
ExprName {
range: 266..267,
id: "x",
ctx: Store,
},
),
],
value: Call(
ExprCall {
range: 270..289,
func: Attribute(
ExprAttribute {
range: 270..287,
value: Constant(
ExprConstant {
range: 270..276,
value: Int(
11,
),
kind: None,
},
),
attr: Identifier {
id: "conjugate",
range: 278..287,
},
ctx: Load,
},
),
args: [],
keywords: [],
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 290..305,
targets: [
Name(
ExprName {
range: 290..291,
id: "x",
ctx: Store,
},
),
],
value: Attribute(
ExprAttribute {
range: 294..305,
value: Constant(
ExprConstant {
range: 294..299,
value: Int(
511,
),
kind: None,
},
),
attr: Identifier {
id: "real",
range: 301..305,
},
ctx: Load,
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 306..329,
targets: [
Name(
ExprName {
range: 306..307,
id: "x",
ctx: Store,
},
),
],
value: Call(
ExprCall {
range: 310..329,
func: Attribute(
ExprAttribute {
range: 310..327,
value: Constant(
ExprConstant {
range: 310..321,
value: Float(
6e-9,
),
kind: None,
},
),
attr: Identifier {
id: "hex",
range: 324..327,
},
ctx: Load,
},
),
args: [],
keywords: [],
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 330..344,
targets: [
Name(
ExprName {
range: 330..331,
id: "x",
ctx: Store,
},
),
],
value: UnaryOp(
ExprUnaryOp {
range: 334..344,
op: USub,
operand: Constant(
ExprConstant {
range: 335..344,
value: Complex {
real: 0.0,
imag: 100.0,
},
kind: None,
},
),
},
),
type_comment: None,
},
),
If(
StmtIf {
range: 346..366,
test: Attribute(
ExprAttribute {
range: 349..357,
value: Constant(
ExprConstant {
range: 349..351,
value: Int(
10,
),
kind: None,
},
),
attr: Identifier {
id: "real",
range: 353..357,
},
ctx: Load,
},
),
body: [
Expr(
StmtExpr {
range: 363..366,
value: Constant(
ExprConstant {
range: 363..366,
value: Ellipsis,
kind: None,
},
),
},
),
],
elif_else_clauses: [],
},
),
Assign(
StmtAssign {
range: 368..379,
targets: [
Name(
ExprName {
range: 368..369,
id: "y",
ctx: Store,
},
),
],
value: Subscript(
ExprSubscript {
range: 372..379,
value: Constant(
ExprConstant {
range: 372..375,
value: Int(
100,
),
kind: None,
},
),
slice: Name(
ExprName {
range: 376..378,
id: "no",
ctx: Load,
},
),
ctx: Load,
},
),
type_comment: None,
},
),
Assign(
StmtAssign {
range: 380..391,
targets: [
Name(
ExprName {
range: 380..381,
id: "y",
ctx: Store,
},
),
],
value: Call(
ExprCall {
range: 384..391,
func: Constant(
ExprConstant {
range: 384..387,
value: Int(
100,
),
kind: None,
},
),
args: [
Name(
ExprName {
range: 388..390,
id: "no",
ctx: Load,
},
),
],
keywords: [],
},
),
type_comment: None,
},
),
]

View File

@ -6,9 +6,10 @@ expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
ClassDef(
StmtClassDef {
range: 10..29,
name: Identifier(
"Foo",
),
name: Identifier {
id: "Foo",
range: 16..19,
},
bases: [],
keywords: [],
body: [
@ -25,26 +26,28 @@ expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
},
),
],
decorator_list: [],
type_params: [
TypeVar(
TypeParamTypeVar {
range: 20..21,
name: Identifier(
"T",
),
name: Identifier {
id: "T",
range: 20..21,
},
bound: None,
},
),
],
decorator_list: [],
},
),
ClassDef(
StmtClassDef {
range: 52..76,
name: Identifier(
"Foo",
),
name: Identifier {
id: "Foo",
range: 58..61,
},
bases: [],
keywords: [],
body: [
@ -61,21 +64,19 @@ expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
},
),
],
decorator_list: [],
type_params: [
TypeVar(
TypeParamTypeVar {
range: 62..68,
name: Identifier(
"T",
),
name: Identifier {
id: "T",
range: 62..63,
},
bound: Some(
Name(
ExprName {
range: 65..68,
id: Identifier(
"str",
),
id: "str",
ctx: Load,
},
),
@ -83,14 +84,16 @@ expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
},
),
],
decorator_list: [],
},
),
ClassDef(
StmtClassDef {
range: 105..138,
name: Identifier(
"Foo",
),
name: Identifier {
id: "Foo",
range: 111..114,
},
bases: [],
keywords: [],
body: [
@ -107,14 +110,14 @@ expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
},
),
],
decorator_list: [],
type_params: [
TypeVar(
TypeParamTypeVar {
range: 115..130,
name: Identifier(
"T",
),
name: Identifier {
id: "T",
range: 115..116,
},
bound: Some(
Tuple(
ExprTuple {
@ -123,18 +126,14 @@ expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
Name(
ExprName {
range: 119..122,
id: Identifier(
"str",
),
id: "str",
ctx: Load,
},
),
Name(
ExprName {
range: 124..129,
id: Identifier(
"bytes",
),
id: "bytes",
ctx: Load,
},
),
@ -146,14 +145,16 @@ expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
},
),
],
decorator_list: [],
},
),
ClassDef(
StmtClassDef {
range: 159..181,
name: Identifier(
"Foo",
),
name: Identifier {
id: "Foo",
range: 165..168,
},
bases: [],
keywords: [],
body: [
@ -170,35 +171,38 @@ expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
},
),
],
decorator_list: [],
type_params: [
TypeVar(
TypeParamTypeVar {
range: 169..170,
name: Identifier(
"T",
),
name: Identifier {
id: "T",
range: 169..170,
},
bound: None,
},
),
TypeVar(
TypeParamTypeVar {
range: 172..173,
name: Identifier(
"U",
),
name: Identifier {
id: "U",
range: 172..173,
},
bound: None,
},
),
],
decorator_list: [],
},
),
ClassDef(
StmtClassDef {
range: 200..223,
name: Identifier(
"Foo",
),
name: Identifier {
id: "Foo",
range: 206..209,
},
bases: [],
keywords: [],
body: [
@ -215,35 +219,38 @@ expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
},
),
],
decorator_list: [],
type_params: [
TypeVar(
TypeParamTypeVar {
range: 210..211,
name: Identifier(
"T",
),
name: Identifier {
id: "T",
range: 210..211,
},
bound: None,
},
),
TypeVar(
TypeParamTypeVar {
range: 213..214,
name: Identifier(
"U",
),
name: Identifier {
id: "U",
range: 213..214,
},
bound: None,
},
),
],
decorator_list: [],
},
),
ClassDef(
StmtClassDef {
range: 240..261,
name: Identifier(
"Foo",
),
name: Identifier {
id: "Foo",
range: 246..249,
},
bases: [],
keywords: [],
body: [
@ -260,25 +267,27 @@ expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
},
),
],
decorator_list: [],
type_params: [
TypeVarTuple(
TypeParamTypeVarTuple {
range: 250..253,
name: Identifier(
"Ts",
),
name: Identifier {
id: "Ts",
range: 251..253,
},
},
),
],
decorator_list: [],
},
),
ClassDef(
StmtClassDef {
range: 275..296,
name: Identifier(
"Foo",
),
name: Identifier {
id: "Foo",
range: 281..284,
},
bases: [],
keywords: [],
body: [
@ -295,25 +304,27 @@ expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
},
),
],
decorator_list: [],
type_params: [
ParamSpec(
TypeParamParamSpec {
range: 285..288,
name: Identifier(
"P",
),
name: Identifier {
id: "P",
range: 287..288,
},
},
),
],
decorator_list: [],
},
),
ClassDef(
StmtClassDef {
range: 312..351,
name: Identifier(
"Foo",
),
name: Identifier {
id: "Foo",
range: 318..321,
},
bases: [],
keywords: [],
body: [
@ -323,30 +334,29 @@ expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
},
),
],
decorator_list: [],
type_params: [
TypeVar(
TypeParamTypeVar {
range: 322..323,
name: Identifier(
"X",
),
name: Identifier {
id: "X",
range: 322..323,
},
bound: None,
},
),
TypeVar(
TypeParamTypeVar {
range: 325..331,
name: Identifier(
"Y",
),
name: Identifier {
id: "Y",
range: 325..326,
},
bound: Some(
Name(
ExprName {
range: 328..331,
id: Identifier(
"str",
),
id: "str",
ctx: Load,
},
),
@ -356,20 +366,23 @@ expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
TypeVarTuple(
TypeParamTypeVarTuple {
range: 333..335,
name: Identifier(
"U",
),
name: Identifier {
id: "U",
range: 334..335,
},
},
),
ParamSpec(
TypeParamParamSpec {
range: 337..340,
name: Identifier(
"P",
),
name: Identifier {
id: "P",
range: 339..340,
},
},
),
],
decorator_list: [],
},
),
]

View File

@ -6,20 +6,22 @@ expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
FunctionDef(
StmtFunctionDef {
range: 0..20,
name: Identifier(
"func",
),
name: Identifier {
id: "func",
range: 4..8,
},
args: Arguments {
range: 9..10,
range: 8..11,
posonlyargs: [],
args: [
ArgWithDefault {
range: 9..10,
def: Arg {
range: 9..10,
arg: Identifier(
"a",
),
arg: Identifier {
id: "a",
range: 9..10,
},
annotation: None,
type_comment: None,
},
@ -46,34 +48,34 @@ expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
],
decorator_list: [],
returns: None,
type_comment: None,
type_params: [],
type_comment: None,
},
),
FunctionDef(
StmtFunctionDef {
range: 22..53,
name: Identifier(
"func",
),
name: Identifier {
id: "func",
range: 26..30,
},
args: Arguments {
range: 34..38,
range: 33..39,
posonlyargs: [],
args: [
ArgWithDefault {
range: 34..38,
def: Arg {
range: 34..38,
arg: Identifier(
"a",
),
arg: Identifier {
id: "a",
range: 34..35,
},
annotation: Some(
Name(
ExprName {
range: 37..38,
id: Identifier(
"T",
),
id: "T",
ctx: Load,
},
),
@ -106,51 +108,50 @@ expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
Name(
ExprName {
range: 43..44,
id: Identifier(
"T",
),
id: "T",
ctx: Load,
},
),
),
type_comment: None,
type_params: [
TypeVar(
TypeParamTypeVar {
range: 31..32,
name: Identifier(
"T",
),
name: Identifier {
id: "T",
range: 31..32,
},
bound: None,
},
),
],
type_comment: None,
},
),
FunctionDef(
StmtFunctionDef {
range: 55..91,
name: Identifier(
"func",
),
name: Identifier {
id: "func",
range: 59..63,
},
args: Arguments {
range: 72..76,
range: 71..77,
posonlyargs: [],
args: [
ArgWithDefault {
range: 72..76,
def: Arg {
range: 72..76,
arg: Identifier(
"a",
),
arg: Identifier {
id: "a",
range: 72..73,
},
annotation: Some(
Name(
ExprName {
range: 75..76,
id: Identifier(
"T",
),
id: "T",
ctx: Load,
},
),
@ -183,28 +184,24 @@ expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
Name(
ExprName {
range: 81..82,
id: Identifier(
"T",
),
id: "T",
ctx: Load,
},
),
),
type_comment: None,
type_params: [
TypeVar(
TypeParamTypeVar {
range: 64..70,
name: Identifier(
"T",
),
name: Identifier {
id: "T",
range: 64..65,
},
bound: Some(
Name(
ExprName {
range: 67..70,
id: Identifier(
"str",
),
id: "str",
ctx: Load,
},
),
@ -212,32 +209,33 @@ expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
},
),
],
type_comment: None,
},
),
FunctionDef(
StmtFunctionDef {
range: 93..138,
name: Identifier(
"func",
),
name: Identifier {
id: "func",
range: 97..101,
},
args: Arguments {
range: 119..123,
range: 118..124,
posonlyargs: [],
args: [
ArgWithDefault {
range: 119..123,
def: Arg {
range: 119..123,
arg: Identifier(
"a",
),
arg: Identifier {
id: "a",
range: 119..120,
},
annotation: Some(
Name(
ExprName {
range: 122..123,
id: Identifier(
"T",
),
id: "T",
ctx: Load,
},
),
@ -270,21 +268,19 @@ expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
Name(
ExprName {
range: 128..129,
id: Identifier(
"T",
),
id: "T",
ctx: Load,
},
),
),
type_comment: None,
type_params: [
TypeVar(
TypeParamTypeVar {
range: 102..117,
name: Identifier(
"T",
),
name: Identifier {
id: "T",
range: 102..103,
},
bound: Some(
Tuple(
ExprTuple {
@ -293,18 +289,14 @@ expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
Name(
ExprName {
range: 106..109,
id: Identifier(
"str",
),
id: "str",
ctx: Load,
},
),
Name(
ExprName {
range: 111..116,
id: Identifier(
"bytes",
),
id: "bytes",
ctx: Load,
},
),
@ -316,24 +308,27 @@ expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
},
),
],
type_comment: None,
},
),
FunctionDef(
StmtFunctionDef {
range: 140..171,
name: Identifier(
"func",
),
name: Identifier {
id: "func",
range: 144..148,
},
args: Arguments {
range: 154..161,
range: 153..162,
posonlyargs: [],
args: [],
vararg: Some(
Arg {
range: 155..161,
arg: Identifier(
"a",
),
arg: Identifier {
id: "a",
range: 155..156,
},
annotation: Some(
Starred(
ExprStarred {
@ -341,9 +336,7 @@ expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
value: Name(
ExprName {
range: 159..161,
id: Identifier(
"Ts",
),
id: "Ts",
ctx: Load,
},
),
@ -373,35 +366,38 @@ expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
],
decorator_list: [],
returns: None,
type_comment: None,
type_params: [
TypeVarTuple(
TypeParamTypeVarTuple {
range: 149..152,
name: Identifier(
"Ts",
),
name: Identifier {
id: "Ts",
range: 150..152,
},
},
),
],
type_comment: None,
},
),
FunctionDef(
StmtFunctionDef {
range: 173..230,
name: Identifier(
"func",
),
name: Identifier {
id: "func",
range: 177..181,
},
args: Arguments {
range: 187..220,
range: 186..221,
posonlyargs: [],
args: [],
vararg: Some(
Arg {
range: 188..200,
arg: Identifier(
"args",
),
arg: Identifier {
id: "args",
range: 188..192,
},
annotation: Some(
Attribute(
ExprAttribute {
@ -409,15 +405,14 @@ expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
value: Name(
ExprName {
range: 194..195,
id: Identifier(
"P",
),
id: "P",
ctx: Load,
},
),
attr: Identifier(
"args",
),
attr: Identifier {
id: "args",
range: 196..200,
},
ctx: Load,
},
),
@ -429,9 +424,10 @@ expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
kwarg: Some(
Arg {
range: 204..220,
arg: Identifier(
"kwargs",
),
arg: Identifier {
id: "kwargs",
range: 204..210,
},
annotation: Some(
Attribute(
ExprAttribute {
@ -439,15 +435,14 @@ expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
value: Name(
ExprName {
range: 212..213,
id: Identifier(
"P",
),
id: "P",
ctx: Load,
},
),
attr: Identifier(
"kwargs",
),
attr: Identifier {
id: "kwargs",
range: 214..220,
},
ctx: Load,
},
),
@ -472,25 +467,27 @@ expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
],
decorator_list: [],
returns: None,
type_comment: None,
type_params: [
ParamSpec(
TypeParamParamSpec {
range: 182..185,
name: Identifier(
"P",
),
name: Identifier {
id: "P",
range: 184..185,
},
},
),
],
type_comment: None,
},
),
FunctionDef(
StmtFunctionDef {
range: 232..273,
name: Identifier(
"func",
),
name: Identifier {
id: "func",
range: 236..240,
},
args: Arguments {
range: 261..263,
posonlyargs: [],
@ -508,30 +505,29 @@ expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
],
decorator_list: [],
returns: None,
type_comment: None,
type_params: [
TypeVar(
TypeParamTypeVar {
range: 241..242,
name: Identifier(
"T",
),
name: Identifier {
id: "T",
range: 241..242,
},
bound: None,
},
),
TypeVar(
TypeParamTypeVar {
range: 244..250,
name: Identifier(
"U",
),
name: Identifier {
id: "U",
range: 244..245,
},
bound: Some(
Name(
ExprName {
range: 247..250,
id: Identifier(
"str",
),
id: "str",
ctx: Load,
},
),
@ -541,20 +537,23 @@ expression: "ast::Suite::parse(source, \"<test>\").unwrap()"
TypeVarTuple(
TypeParamTypeVarTuple {
range: 252..255,
name: Identifier(
"Ts",
),
name: Identifier {
id: "Ts",
range: 253..255,
},
},
),
ParamSpec(
TypeParamParamSpec {
range: 257..260,
name: Identifier(
"P",
),
name: Identifier {
id: "P",
range: 259..260,
},
},
),
],
type_comment: None,
},
),
]

View File

@ -134,7 +134,6 @@ where
self.start_of_line = next.as_ref().map_or(false, |lex_result| {
lex_result.as_ref().map_or(false, |(tok, _)| {
#[cfg(feature = "full-lexer")]
if matches!(tok, Tok::NonLogicalNewline | Tok::Comment { .. }) {
return self.start_of_line;
}

View File

@ -736,14 +736,14 @@ pub(crate) fn parse_strings(
#[derive(Debug, PartialEq)]
struct FStringError {
/// The type of error that occurred.
pub error: FStringErrorType,
pub(crate) error: FStringErrorType,
/// The location of the error.
pub location: TextSize,
pub(crate) location: TextSize,
}
impl FStringError {
/// Creates a new `FStringError` with the given error type and location.
pub fn new(error: FStringErrorType, location: TextSize) -> Self {
pub(crate) fn new(error: FStringErrorType, location: TextSize) -> Self {
Self { error, location }
}
}

View File

@ -3,10 +3,10 @@
//! This module defines the tokens that the lexer recognizes. The tokens are
//! loosely based on the token definitions found in the [CPython source].
//!
//! [CPython source]: https://github.com/python/cpython/blob/dfc2e065a2e71011017077e549cd2f9bf4944c54/Include/internal/pycore_token.h
use crate::ast::bigint::BigInt;
//! [CPython source]: https://github.com/python/cpython/blob/dfc2e065a2e71011017077e549cd2f9bf4944c54/Include/internal/pycore_token.h;
use crate::ast::MagicKind;
use crate::{text_size::TextSize, Mode};
use num_bigint::BigInt;
use std::fmt;
/// The set of tokens the Python source code can be tokenized in.
@ -52,13 +52,11 @@ pub enum Tok {
kind: MagicKind,
},
/// Token value for a comment. These are filtered out of the token stream prior to parsing.
#[cfg(feature = "full-lexer")]
Comment(String),
/// Token value for a newline.
Newline,
/// Token value for a newline that is not a logical line break. These are filtered out of
/// the token stream prior to parsing.
#[cfg(feature = "full-lexer")]
NonLogicalNewline,
/// Token value for an indent.
Indent,
@ -236,7 +234,6 @@ impl fmt::Display for Tok {
}
MagicCommand { kind, value } => write!(f, "{kind}{value}"),
Newline => f.write_str("Newline"),
#[cfg(feature = "full-lexer")]
NonLogicalNewline => f.write_str("NonLogicalNewline"),
Indent => f.write_str("Indent"),
Dedent => f.write_str("Dedent"),
@ -250,7 +247,6 @@ impl fmt::Display for Tok {
Rsqb => f.write_str("']'"),
Colon => f.write_str("':'"),
Comma => f.write_str("','"),
#[cfg(feature = "full-lexer")]
Comment(value) => f.write_str(value),
Semi => f.write_str("';'"),
Plus => f.write_str("'+'"),