From 802df97d559e4849e4db933cc5031b5e00f49bc8 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Thu, 27 Jul 2023 11:31:10 +0200 Subject: [PATCH] Fix clippy issues --- rustpython_original/literal/src/cformat.rs | 107 +++++---- rustpython_original/literal/src/escape.rs | 40 +++- rustpython_original/literal/src/float.rs | 21 +- rustpython_original/literal/src/format.rs | 140 ++++++----- .../ruff_python_ast/src/nodes.rs | 11 +- .../ruff_python_parser/src/context.rs | 6 +- .../ruff_python_parser/src/function.rs | 76 +++--- .../ruff_python_parser/src/lexer.rs | 77 +++--- .../ruff_python_parser/src/lexer/cursor.rs | 6 +- .../src/lexer/indentation.rs | 15 +- .../ruff_python_parser/src/{gen => }/parse.rs | 200 +++++++++++++++- .../ruff_python_parser/src/parser.rs | 222 +---------------- .../ruff_python_parser/src/python.lalrpop | 56 +++-- .../ruff_python_parser/src/python.rs | 64 +++-- .../ruff_python_parser/src/soft_keywords.rs | 14 +- .../ruff_python_parser/src/string.rs | 224 ++++++++---------- .../ruff_python_parser/src/token.rs | 7 +- 17 files changed, 657 insertions(+), 629 deletions(-) rename rustpython_original/ruff_python_parser/src/{gen => }/parse.rs (84%) diff --git a/rustpython_original/literal/src/cformat.rs b/rustpython_original/literal/src/cformat.rs index 7a07f5314d..f0aa883bdc 100644 --- a/rustpython_original/literal/src/cformat.rs +++ b/rustpython_original/literal/src/cformat.rs @@ -32,7 +32,9 @@ pub struct CFormatError { impl fmt::Display for CFormatError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - use CFormatErrorType::*; + use CFormatErrorType::{ + IncompleteFormat, IntTooBig, UnmatchedKeyParentheses, UnsupportedFormatChar, + }; match self.typ { UnmatchedKeyParentheses => write!(f, "incomplete format key"), IncompleteFormat => write!(f, "incomplete format"), @@ -42,7 +44,9 @@ impl fmt::Display for CFormatError { c, c as u32, self.index ), IntTooBig => write!(f, "width/precision too big"), - _ => write!(f, "unexpected error parsing format string"), + CFormatErrorType::MissingModuloSign => { + write!(f, "unexpected error parsing format string") + } } } } @@ -187,14 +191,14 @@ impl CFormatSpec { let fill_chars_needed = width.saturating_sub(num_chars); let fill_string = CFormatSpec::compute_fill_string(fill_char, fill_chars_needed); - if !fill_string.is_empty() { + if fill_string.is_empty() { + string + } else { if self.flags.contains(CConversionFlags::LEFT_ADJUST) { format!("{string}{fill_string}") } else { format!("{fill_string}{string}") } - } else { - string } } @@ -210,13 +214,13 @@ impl CFormatSpec { let fill_chars_needed = width.saturating_sub(num_chars); let fill_string = CFormatSpec::compute_fill_string(fill_char, fill_chars_needed); - if !fill_string.is_empty() { + if fill_string.is_empty() { + string + } else { // Don't left-adjust if precision-filling: that will always be prepending 0s to %d // arguments, the LEFT_ADJUST flag will be used by a later call to fill_string with // the 0-filled string as the string param. format!("{fill_string}{string}") - } else { - string } } @@ -279,7 +283,7 @@ impl CFormatSpec { } pub fn format_number(&self, num: &BigInt) -> String { - use CNumberType::*; + use CNumberType::{Decimal, Hex, Octal}; let magnitude = num.abs(); let prefix = if self.flags.contains(CConversionFlags::ALTERNATE_FORM) { match self.format_type { @@ -312,10 +316,10 @@ impl CFormatSpec { let padded_magnitude_string = self.fill_string_with_precision(magnitude_string, '0'); if self.flags.contains(CConversionFlags::ZERO_PAD) { - let fill_char = if !self.flags.contains(CConversionFlags::LEFT_ADJUST) { - '0' - } else { + let fill_char = if self.flags.contains(CConversionFlags::LEFT_ADJUST) { ' ' // '-' overrides the '0' conversion if both are given + } else { + '0' }; let signed_prefix = format!("{sign_string}{prefix}"); format!( @@ -386,10 +390,10 @@ impl CFormatSpec { }; if self.flags.contains(CConversionFlags::ZERO_PAD) { - let fill_char = if !self.flags.contains(CConversionFlags::LEFT_ADJUST) { - '0' - } else { + let fill_char = if self.flags.contains(CConversionFlags::LEFT_ADJUST) { ' ' + } else { + '0' }; format!( "{}{}", @@ -462,14 +466,14 @@ where T: Into, I: Iterator, { - use CFloatType::*; - use CNumberType::*; + use CFloatType::{Exponent, General, PointDecimal}; + use CNumberType::{Decimal, Hex, Octal}; let (index, c) = match iter.next() { Some((index, c)) => (index, c.into()), None => { return Err(( CFormatErrorType::IncompleteFormat, - iter.peek().map(|x| x.0).unwrap_or(0), + iter.peek().map_or(0, |x| x.0), )); } }; @@ -494,6 +498,7 @@ where Ok((format_type, c)) } +#[allow(clippy::cast_possible_wrap)] fn parse_quantity(iter: &mut ParseIter) -> Result, ParsingError> where T: Into + Copy, @@ -587,7 +592,7 @@ impl CFormatPart { pub fn has_key(&self) -> bool { match self { CFormatPart::Spec(s) => s.mapping_key.is_some(), - _ => false, + CFormatPart::Literal(_) => false, } } } @@ -640,21 +645,20 @@ impl CFormatBytes { iter.next().unwrap(); literal.push(b'%'); continue; - } else { - if !literal.is_empty() { - parts.push(( - part_index, - CFormatPart::Literal(std::mem::take(&mut literal)), - )); - } - let spec = CFormatSpec::parse(iter).map_err(|err| CFormatError { - typ: err.0, - index: err.1, - })?; - parts.push((index, CFormatPart::Spec(spec))); - if let Some(&(index, _)) = iter.peek() { - part_index = index; - } + } + if !literal.is_empty() { + parts.push(( + part_index, + CFormatPart::Literal(std::mem::take(&mut literal)), + )); + } + let spec = CFormatSpec::parse(iter).map_err(|err| CFormatError { + typ: err.0, + index: err.1, + })?; + parts.push((index, CFormatPart::Spec(spec))); + if let Some(&(index, _)) = iter.peek() { + part_index = index; } } else { return Err(CFormatError { @@ -673,7 +677,7 @@ impl CFormatBytes { } pub fn parse_from_bytes(bytes: &[u8]) -> Result { - let mut iter = bytes.iter().cloned().enumerate().peekable(); + let mut iter = bytes.iter().copied().enumerate().peekable(); Self::parse(&mut iter) } } @@ -701,21 +705,20 @@ impl CFormatString { iter.next().unwrap(); literal.push('%'); continue; - } else { - if !literal.is_empty() { - parts.push(( - part_index, - CFormatPart::Literal(std::mem::take(&mut literal)), - )); - } - let spec = CFormatSpec::parse(iter).map_err(|err| CFormatError { - typ: err.0, - index: err.1, - })?; - parts.push((index, CFormatPart::Spec(spec))); - if let Some(&(index, _)) = iter.peek() { - part_index = index; - } + } + if !literal.is_empty() { + parts.push(( + part_index, + CFormatPart::Literal(std::mem::take(&mut literal)), + )); + } + let spec = CFormatSpec::parse(iter).map_err(|err| CFormatError { + typ: err.0, + index: err.1, + })?; + parts.push((index, CFormatPart::Spec(spec))); + if let Some(&(index, _)) = iter.peek() { + part_index = index; } } else { return Err(CFormatError { @@ -868,7 +871,7 @@ mod tests { .parse::() .unwrap() .format_string("Hello, World!".to_owned()), - "".to_owned() + String::new() ); assert_eq!( "%5.s" @@ -997,7 +1000,7 @@ mod tests { assert_eq!( "%f".parse::() .unwrap() - .format_float(1.2345678901), + .format_float(1.234_567_890_1), "1.234568" ); } diff --git a/rustpython_original/literal/src/escape.rs b/rustpython_original/literal/src/escape.rs index 0cb07adbc0..c291e13034 100644 --- a/rustpython_original/literal/src/escape.rs +++ b/rustpython_original/literal/src/escape.rs @@ -6,7 +6,8 @@ pub enum Quote { impl Quote { #[inline] - pub const fn swap(self) -> Quote { + #[must_use] + pub const fn swap(self) -> Self { match self { Quote::Single => Quote::Double, Quote::Double => Quote::Single, @@ -126,6 +127,11 @@ impl std::fmt::Display for StrRepr<'_, '_> { impl UnicodeEscape<'_> { const REPR_RESERVED_LEN: usize = 2; // for quotes + #[allow( + clippy::cast_possible_wrap, + clippy::cast_possible_truncation, + clippy::cast_sign_loss + )] pub fn repr_layout(source: &str, preferred_quote: Quote) -> EscapeLayout { Self::output_layout_with_checker(source, preferred_quote, |a, b| { Some((a as isize).checked_add(b as isize)? as usize) @@ -155,8 +161,15 @@ impl UnicodeEscape<'_> { }; let Some(new_len) = length_add(out_len, incr) else { #[cold] - fn stop(single_count: usize, double_count: usize, preferred_quote: Quote) -> EscapeLayout { - EscapeLayout { quote: choose_quote(single_count, double_count, preferred_quote).0, len: None } + fn stop( + single_count: usize, + double_count: usize, + preferred_quote: Quote, + ) -> EscapeLayout { + EscapeLayout { + quote: choose_quote(single_count, double_count, preferred_quote).0, + len: None, + } } return stop(single_count, double_count, preferred_quote); }; @@ -296,12 +309,22 @@ impl<'a> AsciiEscape<'a> { } impl AsciiEscape<'_> { + #[allow( + clippy::cast_possible_wrap, + clippy::cast_possible_truncation, + clippy::cast_sign_loss + )] pub fn repr_layout(source: &[u8], preferred_quote: Quote) -> EscapeLayout { Self::output_layout_with_checker(source, preferred_quote, 3, |a, b| { Some((a as isize).checked_add(b as isize)? as usize) }) } + #[allow( + clippy::cast_possible_wrap, + clippy::cast_possible_truncation, + clippy::cast_sign_loss + )] pub fn named_repr_layout(source: &[u8], name: &str) -> EscapeLayout { Self::output_layout_with_checker(source, Quote::Single, name.len() + 2 + 3, |a, b| { Some((a as isize).checked_add(b as isize)? as usize) @@ -332,8 +355,15 @@ impl AsciiEscape<'_> { }; let Some(new_len) = length_add(out_len, incr) else { #[cold] - fn stop(single_count: usize, double_count: usize, preferred_quote: Quote) -> EscapeLayout { - EscapeLayout { quote: choose_quote(single_count, double_count, preferred_quote).0, len: None } + fn stop( + single_count: usize, + double_count: usize, + preferred_quote: Quote, + ) -> EscapeLayout { + EscapeLayout { + quote: choose_quote(single_count, double_count, preferred_quote).0, + len: None, + } } return stop(single_count, double_count, preferred_quote); }; diff --git a/rustpython_original/literal/src/float.rs b/rustpython_original/literal/src/float.rs index bfd7d43e86..2a9b021f87 100644 --- a/rustpython_original/literal/src/float.rs +++ b/rustpython_original/literal/src/float.rs @@ -7,7 +7,7 @@ pub fn parse_str(literal: &str) -> Option { } pub fn parse_bytes(literal: &[u8]) -> Option { - parse_inner(trim_slice(literal, |b| b.is_ascii_whitespace())) + parse_inner(trim_slice(literal, u8::is_ascii_whitespace)) } fn trim_slice(v: &[T], mut trim: impl FnMut(&T) -> bool) -> &[T] { @@ -72,7 +72,7 @@ pub fn format_fixed(precision: usize, magnitude: f64, case: Case, alternate_form } magnitude if magnitude.is_nan() => format_nan(case), magnitude if magnitude.is_infinite() => format_inf(case), - _ => "".to_string(), + _ => String::new(), } } @@ -99,7 +99,7 @@ pub fn format_exponent( } magnitude if magnitude.is_nan() => format_nan(case), magnitude if magnitude.is_infinite() => format_inf(case), - _ => "".to_string(), + _ => String::new(), } } @@ -132,6 +132,11 @@ fn remove_trailing_decimal_point(s: String) -> String { s } +#[allow( + clippy::cast_sign_loss, + clippy::cast_possible_truncation, + clippy::cast_possible_wrap +)] pub fn format_general( precision: usize, magnitude: f64, @@ -145,7 +150,7 @@ pub fn format_general( let mut parts = r_exp.splitn(2, 'e'); let base = parts.next().unwrap(); let exponent = parts.next().unwrap().parse::().unwrap(); - if exponent < -4 || exponent + (always_shows_fract as i64) >= (precision as i64) { + if exponent < -4 || exponent + i64::from(always_shows_fract) >= (precision as i64) { let e = match case { Case::Lower => 'e', Case::Upper => 'E', @@ -164,7 +169,7 @@ pub fn format_general( } magnitude if magnitude.is_nan() => format_nan(case), magnitude if magnitude.is_infinite() => format_inf(case), - _ => "".to_string(), + _ => String::new(), } } @@ -231,7 +236,7 @@ pub fn from_hex(s: &str) -> Option { if !has_p && has_dot { hex.push_str("p0"); } else if !has_p && !has_dot { - hex.push_str(".p0") + hex.push_str(".p0"); } hexf_parse::parse_hexf64(hex.as_str(), false).ok() @@ -261,6 +266,7 @@ pub fn to_hex(value: f64) -> String { } #[test] +#[allow(clippy::float_cmp)] fn test_to_hex() { use rand::Rng; for _ in 0..20000 { @@ -273,7 +279,8 @@ fn test_to_hex() { // println!("{} -> {}", f, hex); let roundtrip = hexf_parse::parse_hexf64(&hex, false).unwrap(); // println!(" -> {}", roundtrip); - assert!(f == roundtrip, "{} {} {}", f, hex, roundtrip); + + assert_eq!(f, roundtrip, "{f} {hex} {roundtrip}"); } } diff --git a/rustpython_original/literal/src/format.rs b/rustpython_original/literal/src/format.rs index b80adc9a41..7d3625b4db 100644 --- a/rustpython_original/literal/src/format.rs +++ b/rustpython_original/literal/src/format.rs @@ -121,7 +121,7 @@ impl FormatParse for FormatGrouping { } } -#[derive(Debug, PartialEq)] +#[derive(Copy, Clone, Debug, PartialEq)] pub enum FormatType { String, Binary, @@ -311,8 +311,13 @@ impl FormatSpec { .collect::() } + #[allow( + clippy::cast_possible_wrap, + clippy::cast_possible_truncation, + clippy::cast_sign_loss + )] fn add_magnitude_separators_for_char( - magnitude_str: String, + magnitude_str: &str, inter: i32, sep: char, disp_digit_cnt: i32, @@ -324,11 +329,16 @@ impl FormatSpec { let int_digit_cnt = disp_digit_cnt - dec_digit_cnt; let mut result = FormatSpec::separate_integer(magnitude_int_str, inter, sep, int_digit_cnt); if let Some(part) = parts.next() { - result.push_str(&format!(".{part}")) + result.push_str(&format!(".{part}")); } result } + #[allow( + clippy::cast_sign_loss, + clippy::cast_possible_wrap, + clippy::cast_possible_truncation + )] fn separate_integer( magnitude_str: String, inter: i32, @@ -336,7 +346,7 @@ impl FormatSpec { disp_digit_cnt: i32, ) -> String { let magnitude_len = magnitude_str.len() as i32; - let offset = (disp_digit_cnt % (inter + 1) == 0) as i32; + let offset = i32::from(disp_digit_cnt % (inter + 1) == 0); let disp_digit_cnt = disp_digit_cnt + offset; let pad_cnt = disp_digit_cnt - magnitude_len; let sep_cnt = disp_digit_cnt / (inter + 1); @@ -353,9 +363,14 @@ impl FormatSpec { } } + #[allow( + clippy::cast_sign_loss, + clippy::cast_possible_truncation, + clippy::cast_possible_wrap + )] fn insert_separator(mut magnitude_str: String, inter: i32, sep: char, sep_cnt: i32) -> String { let magnitude_len = magnitude_str.len() as i32; - for i in 1..sep_cnt + 1 { + for i in 1..=sep_cnt { magnitude_str.insert((magnitude_len - inter * i) as usize, sep); } magnitude_str @@ -396,6 +411,7 @@ impl FormatSpec { } } + #[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation)] fn add_magnitude_separators(&self, magnitude_str: String, prefix: &str) -> String { match &self.grouping_option { Some(fg) => { @@ -408,7 +424,7 @@ impl FormatSpec { let width = self.width.unwrap_or(magnitude_len) as i32 - prefix.len() as i32; let disp_digit_cnt = cmp::max(width, magnitude_len as i32); FormatSpec::add_magnitude_separators_for_char( - magnitude_str, + &magnitude_str, inter, sep, disp_digit_cnt, @@ -431,7 +447,7 @@ impl FormatSpec { | FormatType::Character, ) => self.format_int(&BigInt::from_u8(x).unwrap()), Some(FormatType::Exponent(_) | FormatType::FixedPoint(_) | FormatType::Percentage) => { - self.format_float(x as f64) + self.format_float(f64::from(x)) } None => { let first_letter = (input.to_string().as_bytes()[0] as char).to_uppercase(); @@ -452,17 +468,19 @@ impl FormatSpec { *case, self.alternate_form, )), - Some(FormatType::Decimal) - | Some(FormatType::Binary) - | Some(FormatType::Octal) - | Some(FormatType::Hex(_)) - | Some(FormatType::String) - | Some(FormatType::Character) - | Some(FormatType::Number(Case::Upper)) => { + Some( + FormatType::Decimal + | FormatType::Binary + | FormatType::Octal + | FormatType::Hex(_) + | FormatType::String + | FormatType::Character + | FormatType::Number(Case::Upper), + ) => { let ch = char::from(self.format_type.as_ref().unwrap()); Err(FormatSpecError::UnknownFormatCode(ch, "float")) } - Some(FormatType::GeneralFormat(case)) | Some(FormatType::Number(case)) => { + Some(FormatType::GeneralFormat(case) | FormatType::Number(case)) => { let precision = if precision == 0 { 1 } else { precision }; Ok(float::format_general( precision, @@ -513,11 +531,17 @@ impl FormatSpec { } }; let magnitude_str = self.add_magnitude_separators(raw_magnitude_str?, sign_str); - self.format_sign_and_align(&AsciiStr::new(&magnitude_str), sign_str, FormatAlign::Right) + Ok( + self.format_sign_and_align( + &AsciiStr::new(&magnitude_str), + sign_str, + FormatAlign::Right, + ), + ) } #[inline] - fn format_int_radix(&self, magnitude: BigInt, radix: u32) -> Result { + fn format_int_radix(&self, magnitude: &BigInt, radix: u32) -> Result { match self.precision { Some(_) => Err(FormatSpecError::PrecisionNotAllowed), None => Ok(magnitude.to_str_radix(radix)), @@ -539,19 +563,21 @@ impl FormatSpec { "" }; let raw_magnitude_str = match self.format_type { - Some(FormatType::Binary) => self.format_int_radix(magnitude, 2), - Some(FormatType::Decimal) => self.format_int_radix(magnitude, 10), - Some(FormatType::Octal) => self.format_int_radix(magnitude, 8), - Some(FormatType::Hex(Case::Lower)) => self.format_int_radix(magnitude, 16), - Some(FormatType::Hex(Case::Upper)) => match self.precision { - Some(_) => Err(FormatSpecError::PrecisionNotAllowed), - None => { + Some(FormatType::Binary) => self.format_int_radix(&magnitude, 2), + Some(FormatType::Decimal) => self.format_int_radix(&magnitude, 10), + Some(FormatType::Octal) => self.format_int_radix(&magnitude, 8), + Some(FormatType::Hex(Case::Lower)) => self.format_int_radix(&magnitude, 16), + Some(FormatType::Hex(Case::Upper)) => { + if self.precision.is_some() { + Err(FormatSpecError::PrecisionNotAllowed) + } else { let mut result = magnitude.to_str_radix(16); result.make_ascii_uppercase(); Ok(result) } - }, - Some(FormatType::Number(Case::Lower)) => self.format_int_radix(magnitude, 10), + } + + Some(FormatType::Number(Case::Lower)) => self.format_int_radix(&magnitude, 10), Some(FormatType::Number(Case::Upper)) => { Err(FormatSpecError::UnknownFormatCode('N', "int")) } @@ -560,18 +586,20 @@ impl FormatSpec { (Some(_), _) => Err(FormatSpecError::NotAllowed("Sign")), (_, true) => Err(FormatSpecError::NotAllowed("Alternate form (#)")), (_, _) => match num.to_u32() { - Some(n) if n <= 0x10ffff => Ok(std::char::from_u32(n).unwrap().to_string()), + Some(n) if n <= 0x0010_ffff => Ok(std::char::from_u32(n).unwrap().to_string()), Some(_) | None => Err(FormatSpecError::CodeNotInRange), }, }, - Some(FormatType::GeneralFormat(_)) - | Some(FormatType::FixedPoint(_)) - | Some(FormatType::Exponent(_)) - | Some(FormatType::Percentage) => match num.to_f64() { + Some( + FormatType::GeneralFormat(_) + | FormatType::FixedPoint(_) + | FormatType::Exponent(_) + | FormatType::Percentage, + ) => match num.to_f64() { Some(float) => return self.format_float(float), _ => Err(FormatSpecError::UnableToConvert), }, - None => self.format_int_radix(magnitude, 10), + None => self.format_int_radix(&magnitude, 10), }?; let format_sign = self.sign.unwrap_or(FormatSign::Minus); let sign_str = match num.sign() { @@ -584,11 +612,11 @@ impl FormatSpec { }; let sign_prefix = format!("{sign_str}{prefix}"); let magnitude_str = self.add_magnitude_separators(raw_magnitude_str, &sign_prefix); - self.format_sign_and_align( + Ok(self.format_sign_and_align( &AsciiStr::new(&magnitude_str), &sign_prefix, FormatAlign::Right, - ) + )) } pub fn format_string(&self, s: &T) -> Result @@ -597,14 +625,13 @@ impl FormatSpec { { self.validate_format(FormatType::String)?; match self.format_type { - Some(FormatType::String) | None => self - .format_sign_and_align(s, "", FormatAlign::Left) - .map(|mut value| { - if let Some(precision) = self.precision { - value.truncate(precision); - } - value - }), + Some(FormatType::String) | None => { + let mut value = self.format_sign_and_align(s, "", FormatAlign::Left); + if let Some(precision) = self.precision { + value.truncate(precision); + } + Ok(value) + } _ => { let ch = char::from(self.format_type.as_ref().unwrap()); Err(FormatSpecError::UnknownFormatCode(ch, "str")) @@ -612,12 +639,13 @@ impl FormatSpec { } } + #[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation)] fn format_sign_and_align( &self, magnitude_str: &T, sign_str: &str, default_align: FormatAlign, - ) -> Result + ) -> String where T: CharLen + Deref, { @@ -629,8 +657,8 @@ impl FormatSpec { cmp::max(0, (w as i32) - (num_chars as i32) - (sign_str.len() as i32)) }); - let magnitude_str = magnitude_str.deref(); - Ok(match align { + let magnitude_str = &**magnitude_str; + match align { FormatAlign::Left => format!( "{}{}{}", sign_str, @@ -658,7 +686,7 @@ impl FormatSpec { FormatSpec::compute_fill_string(fill_char, right_fill_chars_needed); format!("{left_fill_string}{sign_str}{magnitude_str}{right_fill_string}") } - }) + } } } @@ -801,7 +829,7 @@ impl FieldName { let mut parts = Vec::new(); while let Some(part) = FieldNamePart::parse_part(&mut chars)? { - parts.push(part) + parts.push(part); } Ok(FieldName { field_type, parts }) @@ -851,10 +879,10 @@ impl FormatString { cur_text = remaining; } Err(err) => { - return if !result_string.is_empty() { - Ok((FormatPart::Literal(result_string), cur_text)) - } else { + return if result_string.is_empty() { Err(err) + } else { + Ok((FormatPart::Literal(result_string), cur_text)) }; } } @@ -910,20 +938,18 @@ impl FormatString { } else if c == '{' { if nested { return Err(FormatParseError::InvalidFormatSpecifier); - } else { - nested = true; - left.push(c); - continue; } + nested = true; + left.push(c); + continue; } else if c == '}' { if nested { nested = false; left.push(c); continue; - } else { - end_bracket_pos = Some(idx); - break; } + end_bracket_pos = Some(idx); + break; } else { left.push(c); } diff --git a/rustpython_original/ruff_python_ast/src/nodes.rs b/rustpython_original/ruff_python_ast/src/nodes.rs index ccb4463ea1..4de5ac6cb0 100644 --- a/rustpython_original/ruff_python_ast/src/nodes.rs +++ b/rustpython_original/ruff_python_ast/src/nodes.rs @@ -901,6 +901,7 @@ impl From for Expr { /// Transforms a value prior to formatting it. #[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, is_macro::Is)] #[repr(i8)] +#[allow(clippy::cast_possible_wrap)] pub enum ConversionFlag { /// No conversion None = -1, // CPython uses -1 @@ -1138,7 +1139,7 @@ impl BoolOp { pub const fn and(&self) -> Option { match self { BoolOp::And => Some(BoolOpAnd), - _ => None, + BoolOp::Or => None, } } @@ -1146,7 +1147,7 @@ impl BoolOp { pub const fn or(&self) -> Option { match self { BoolOp::Or => Some(BoolOpOr), - _ => None, + BoolOp::And => None, } } } @@ -2107,7 +2108,7 @@ pub struct Decorator { /// `defaults` and `kw_defaults` fields are removed and the default values are placed under each `arg_with_default` typed argument. /// `vararg` and `kwarg` are still typed as `arg` because they never can have a default value. /// -/// The matching Python style AST type is [PythonArguments]. While [PythonArguments] has ordered `kwonlyargs` fields by +/// The matching Python style AST type is [`PythonArguments`]. While [`PythonArguments`] has ordered `kwonlyargs` fields by /// default existence, [Arguments] has location-ordered kwonlyargs fields. /// /// NOTE: This type is different from original Python AST. @@ -2200,14 +2201,14 @@ impl Arguments { self.posonlyargs .iter() .chain(self.args.iter()) - .filter_map(|arg| arg.default.as_ref().map(|e| e.as_ref())) + .filter_map(|arg| arg.default.as_ref().map(std::convert::AsRef::as_ref)) } #[allow(clippy::type_complexity)] pub fn split_kwonlyargs(&self) -> (Vec<&Arg>, Vec<(&Arg, &Expr)>) { let mut args = Vec::new(); let mut with_defaults = Vec::new(); - for arg in self.kwonlyargs.iter() { + for arg in &self.kwonlyargs { if let Some(ref default) = arg.default { with_defaults.push((arg.as_arg(), &**default)); } else { diff --git a/rustpython_original/ruff_python_parser/src/context.rs b/rustpython_original/ruff_python_parser/src/context.rs index 1ae4bf4f22..683c35a30a 100644 --- a/rustpython_original/ruff_python_parser/src/context.rs +++ b/rustpython_original/ruff_python_parser/src/context.rs @@ -2,7 +2,7 @@ use ruff_python_ast::{self as ast, Expr, ExprContext}; pub(crate) fn set_context(expr: Expr, ctx: ExprContext) -> Expr { match expr { - Expr::Name(ast::ExprName { id, range, .. }) => ast::ExprName { id, ctx, range }.into(), + Expr::Name(ast::ExprName { id, range, .. }) => ast::ExprName { range, id, ctx }.into(), Expr::Tuple(ast::ExprTuple { elts, range, .. }) => ast::ExprTuple { elts: elts.into_iter().map(|elt| set_context(elt, ctx)).collect(), range, @@ -19,9 +19,9 @@ pub(crate) fn set_context(expr: Expr, ctx: ExprContext) -> Expr { Expr::Attribute(ast::ExprAttribute { value, attr, range, .. }) => ast::ExprAttribute { + range, value, attr, - range, ctx, } .into(), @@ -31,9 +31,9 @@ pub(crate) fn set_context(expr: Expr, ctx: ExprContext) -> Expr { range, .. }) => ast::ExprSubscript { + range, value, slice, - range, ctx, } .into(), diff --git a/rustpython_original/ruff_python_parser/src/function.rs b/rustpython_original/ruff_python_parser/src/function.rs index cb9a47b5ae..a26ef6395e 100644 --- a/rustpython_original/ruff_python_parser/src/function.rs +++ b/rustpython_original/ruff_python_parser/src/function.rs @@ -1,3 +1,4 @@ +use std::hash::BuildHasherDefault; // Contains functions that perform validation and parsing of arguments and parameters. // Checks apply both to functions and to lambdas. use crate::lexer::{LexicalError, LexicalErrorType}; @@ -15,10 +16,10 @@ pub(crate) fn validate_arguments(arguments: &ast::Arguments) -> Result<(), Lexic let mut all_arg_names = FxHashSet::with_capacity_and_hasher( arguments.posonlyargs.len() + arguments.args.len() - + arguments.vararg.is_some() as usize + + usize::from(arguments.vararg.is_some()) + arguments.kwonlyargs.len() - + arguments.kwarg.is_some() as usize, - Default::default(), + + usize::from(arguments.kwarg.is_some()), + BuildHasherDefault::default(), ); let posonlyargs = arguments.posonlyargs.iter(); @@ -79,49 +80,46 @@ pub(crate) fn parse_args(func_args: Vec) -> Result { - // Check for duplicate keyword arguments in the call. - if let Some(keyword_name) = &name { - if !keyword_names.insert(keyword_name.to_string()) { - return Err(LexicalError { - error: LexicalErrorType::DuplicateKeywordArgumentError( - keyword_name.to_string(), - ), - location: start, - }); - } - } else { - double_starred = true; - } - - keywords.push(ast::Keyword { - arg: name, - value, - range: TextRange::new(start, end), - }); - } - None => { - // Positional arguments mustn't follow keyword arguments. - if !keywords.is_empty() && !is_starred(&value) { + if let Some((start, end, name)) = name { + // Check for duplicate keyword arguments in the call. + if let Some(keyword_name) = &name { + if !keyword_names.insert(keyword_name.to_string()) { return Err(LexicalError { - error: LexicalErrorType::PositionalArgumentError, - location: value.start(), + error: LexicalErrorType::DuplicateKeywordArgumentError( + keyword_name.to_string(), + ), + location: start, }); + } + } else { + double_starred = true; + } + + keywords.push(ast::Keyword { + arg: name, + value, + range: TextRange::new(start, end), + }); + } else { + // Positional arguments mustn't follow keyword arguments. + if !keywords.is_empty() && !is_starred(&value) { + return Err(LexicalError { + error: LexicalErrorType::PositionalArgumentError, + location: value.start(), + }); // Allow starred arguments after keyword arguments but // not after double-starred arguments. - } else if double_starred { - return Err(LexicalError { - error: LexicalErrorType::UnpackedArgumentError, - location: value.start(), - }); - } - - args.push(value); + } else if double_starred { + return Err(LexicalError { + error: LexicalErrorType::UnpackedArgumentError, + location: value.start(), + }); } + + args.push(value); } } Ok(ArgumentList { args, keywords }) diff --git a/rustpython_original/ruff_python_parser/src/lexer.rs b/rustpython_original/ruff_python_parser/src/lexer.rs index 0645ac3d65..a65737c04f 100644 --- a/rustpython_original/ruff_python_parser/src/lexer.rs +++ b/rustpython_original/ruff_python_parser/src/lexer.rs @@ -143,6 +143,11 @@ impl<'source> Lexer<'source> { /// Create a new lexer from T and a starting location. You probably want to use /// [`lex`] instead. pub fn new(input: &'source str, mode: Mode) -> Self { + assert!( + u32::try_from(input.len()).is_ok(), + "Lexer only supports files with a size up to 4GB" + ); + let mut lxr = Lexer { state: State::AfterNewline, nesting: 0, @@ -351,7 +356,7 @@ impl<'source> Lexer<'source> { /// Consume a sequence of numbers with the given radix, /// the digits can be decorated with underscores - /// like this: '1_2_3_4' == '1234' + /// like this: '`1_2_3_4`' == '1234' fn radix_run(&mut self, first: Option, radix: Radix) -> Cow<'source, str> { let start = if let Some(first) = first { self.offset() - first.text_len() @@ -384,13 +389,13 @@ impl<'source> Lexer<'source> { } /// Lex a single comment. - fn lex_comment(&mut self) -> Result { + fn lex_comment(&mut self) -> Tok { #[cfg(debug_assertions)] debug_assert_eq!(self.cursor.previous(), '#'); self.cursor.eat_while(|c| !matches!(c, '\n' | '\r')); - return Ok(Tok::Comment(self.token_text().to_string())); + Tok::Comment(self.token_text().to_string()) } /// Lex a single magic command. @@ -418,10 +423,10 @@ impl<'source> Lexer<'source> { self.cursor.bump(); self.cursor.bump(); continue; - } else { - self.cursor.bump(); - value.push('\\'); } + + self.cursor.bump(); + value.push('\\'); } '\n' | '\r' | EOF_CHAR => { return Tok::MagicCommand { kind, value }; @@ -507,7 +512,7 @@ impl<'source> Lexer<'source> { pub fn next_token(&mut self) -> LexResult { // Return dedent tokens until the current indentation level matches the indentation of the next token. if let Some(indentation) = self.pending_indentation.take() { - if let Ok(Ordering::Greater) = self.indentations.current().try_compare(&indentation) { + if let Ok(Ordering::Greater) = self.indentations.current().try_compare(indentation) { self.pending_indentation = Some(indentation); self.indentations.pop(); return Ok((Tok::Dedent, TextRange::empty(self.offset()))); @@ -601,7 +606,7 @@ impl<'source> Lexer<'source> { &mut self, indentation: Indentation, ) -> Result, LexicalError> { - let token = match self.indentations.current().try_compare(&indentation) { + let token = match self.indentations.current().try_compare(indentation) { // Dedent Ok(Ordering::Greater) => { self.indentations.pop(); @@ -656,7 +661,7 @@ impl<'source> Lexer<'source> { let token = match c { c if is_ascii_identifier_start(c) => self.lex_identifier(c)?, '0'..='9' => self.lex_number(c)?, - '#' => return self.lex_comment().map(|token| (token, self.token_range())), + '#' => return Ok((self.lex_comment(), self.token_range())), '"' | '\'' => self.lex_string(StringKind::String, c)?, '=' => { if self.cursor.eat_char('=') { @@ -900,6 +905,8 @@ impl<'source> Lexer<'source> { &self.source[self.token_range()] } + // Lexer doesn't allow files larger than 4GB + #[allow(clippy::cast_possible_truncation)] #[inline] fn offset(&self) -> TextSize { TextSize::new(self.source.len() as u32) - self.cursor.text_len() @@ -1153,7 +1160,7 @@ mod tests { } fn assert_jupyter_magic_line_continuation_with_eol(eol: &str) { - let source = format!("%matplotlib \\{} --inline", eol); + let source = format!("%matplotlib \\{eol} --inline"); let tokens = lex_jupyter_source(&source); assert_eq!( tokens, @@ -1164,7 +1171,7 @@ mod tests { }, Tok::Newline ] - ) + ); } #[test] @@ -1183,7 +1190,7 @@ mod tests { } fn assert_jupyter_magic_line_continuation_with_eol_and_eof(eol: &str) { - let source = format!("%matplotlib \\{}", eol); + let source = format!("%matplotlib \\{eol}"); let tokens = lex_jupyter_source(&source); assert_eq!( tokens, @@ -1194,7 +1201,7 @@ mod tests { }, Tok::Newline ] - ) + ); } #[test] @@ -1220,52 +1227,52 @@ mod tests { tokens, vec![ Tok::MagicCommand { - value: "".to_string(), + value: String::new(), kind: MagicKind::Magic, }, Tok::Newline, Tok::MagicCommand { - value: "".to_string(), + value: String::new(), kind: MagicKind::Magic2, }, Tok::Newline, Tok::MagicCommand { - value: "".to_string(), + value: String::new(), kind: MagicKind::Shell, }, Tok::Newline, Tok::MagicCommand { - value: "".to_string(), + value: String::new(), kind: MagicKind::ShCap, }, Tok::Newline, Tok::MagicCommand { - value: "".to_string(), + value: String::new(), kind: MagicKind::Help, }, Tok::Newline, Tok::MagicCommand { - value: "".to_string(), + value: String::new(), kind: MagicKind::Help2, }, Tok::Newline, Tok::MagicCommand { - value: "".to_string(), + value: String::new(), kind: MagicKind::Paren, }, Tok::Newline, Tok::MagicCommand { - value: "".to_string(), + value: String::new(), kind: MagicKind::Quote, }, Tok::Newline, Tok::MagicCommand { - value: "".to_string(), + value: String::new(), kind: MagicKind::Quote2, }, Tok::Newline, ] - ) + ); } #[test] @@ -1346,7 +1353,7 @@ mod tests { }, Tok::Newline, ] - ) + ); } #[test] fn test_jupyter_magic_indentation() { @@ -1371,7 +1378,7 @@ if True: Tok::Newline, Tok::Dedent, ] - ) + ); } #[test] @@ -1424,13 +1431,13 @@ baz = %matplotlib \ }, Tok::Newline, ] - ) + ); } fn assert_no_jupyter_magic(tokens: &[Tok]) { for tok in tokens { if let Tok::MagicCommand { .. } = tok { - panic!("Unexpected magic command token: {:?}", tok) + panic!("Unexpected magic command token: {tok:?}") } } } @@ -1475,7 +1482,7 @@ def f(arg=%timeit a = b): value: BigInt::from(123), }, Tok::Int { - value: BigInt::from(1234567890), + value: BigInt::from(1_234_567_890), }, Tok::Float { value: 0.2 }, Tok::Float { value: 100.0 }, @@ -1851,13 +1858,13 @@ def f(arg=%timeit a = b): } fn assert_string_continuation_with_eol(eol: &str) { - let source = format!("\"abc\\{}def\"", eol); + let source = format!("\"abc\\{eol}def\""); let tokens = lex_source(&source); assert_eq!( tokens, - vec![str_tok(&format!("abc\\{}def", eol)), Tok::Newline] - ) + vec![str_tok(&format!("abc\\{eol}def")), Tok::Newline] + ); } #[test] @@ -1879,23 +1886,23 @@ def f(arg=%timeit a = b): fn test_escape_unicode_name() { let source = r#""\N{EN SPACE}""#; let tokens = lex_source(source); - assert_eq!(tokens, vec![str_tok(r"\N{EN SPACE}"), Tok::Newline]) + assert_eq!(tokens, vec![str_tok(r"\N{EN SPACE}"), Tok::Newline]); } fn assert_triple_quoted(eol: &str) { - let source = format!("\"\"\"{0} test string{0} \"\"\"", eol); + let source = format!("\"\"\"{eol} test string{eol} \"\"\""); let tokens = lex_source(&source); assert_eq!( tokens, vec![ Tok::String { - value: format!("{0} test string{0} ", eol), + value: format!("{eol} test string{eol} "), kind: StringKind::String, triple_quoted: true, }, Tok::Newline, ] - ) + ); } #[test] diff --git a/rustpython_original/ruff_python_parser/src/lexer/cursor.rs b/rustpython_original/ruff_python_parser/src/lexer/cursor.rs index 2811c1ab4e..01c64c98e0 100644 --- a/rustpython_original/ruff_python_parser/src/lexer/cursor.rs +++ b/rustpython_original/ruff_python_parser/src/lexer/cursor.rs @@ -28,13 +28,13 @@ impl<'a> Cursor<'a> { } /// Peeks the next character from the input stream without consuming it. - /// Returns [EOF_CHAR] if the file is at the end of the file. + /// Returns [`EOF_CHAR`] if the file is at the end of the file. pub(super) fn first(&self) -> char { self.chars.clone().next().unwrap_or(EOF_CHAR) } /// Peeks the second character from the input stream without consuming it. - /// Returns [EOF_CHAR] if the position is past the end of the file. + /// Returns [`EOF_CHAR`] if the position is past the end of the file. pub(super) fn second(&self) -> char { let mut chars = self.chars.clone(); chars.next(); @@ -57,7 +57,7 @@ impl<'a> Cursor<'a> { } pub(super) fn start_token(&mut self) { - self.source_length = self.text_len() + self.source_length = self.text_len(); } pub(super) fn is_eof(&self) -> bool { diff --git a/rustpython_original/ruff_python_parser/src/lexer/indentation.rs b/rustpython_original/ruff_python_parser/src/lexer/indentation.rs index 31732e21f5..9c2f6a05ee 100644 --- a/rustpython_original/ruff_python_parser/src/lexer/indentation.rs +++ b/rustpython_original/ruff_python_parser/src/lexer/indentation.rs @@ -44,7 +44,7 @@ impl Indentation { #[cfg(test)] pub(super) const fn new(column: Column, character: Character) -> Self { - Self { character, column } + Self { column, character } } #[must_use] @@ -67,10 +67,7 @@ impl Indentation { } } - pub(super) fn try_compare( - &self, - other: &Indentation, - ) -> Result { + pub(super) fn try_compare(self, other: Indentation) -> Result { let column_ordering = self.column.cmp(&other.column); let character_ordering = self.character.cmp(&other.character); @@ -94,7 +91,7 @@ pub(super) struct Indentations { impl Indentations { pub(super) fn push(&mut self, indent: Indentation) { - debug_assert_eq!(self.current().try_compare(&indent), Ok(Ordering::Less)); + debug_assert_eq!(self.current().try_compare(indent), Ok(Ordering::Less)); self.stack.push(indent); } @@ -120,10 +117,10 @@ mod tests { fn indentation_try_compare() { let tab = Indentation::new(Column::new(8), Character::new(1)); - assert_eq!(tab.try_compare(&tab), Ok(Ordering::Equal)); + assert_eq!(tab.try_compare(tab), Ok(Ordering::Equal)); let two_tabs = Indentation::new(Column::new(16), Character::new(2)); - assert_eq!(two_tabs.try_compare(&tab), Ok(Ordering::Greater)); - assert_eq!(tab.try_compare(&two_tabs), Ok(Ordering::Less)); + assert_eq!(two_tabs.try_compare(tab), Ok(Ordering::Greater)); + assert_eq!(tab.try_compare(two_tabs), Ok(Ordering::Less)); } } diff --git a/rustpython_original/ruff_python_parser/src/gen/parse.rs b/rustpython_original/ruff_python_parser/src/parse.rs similarity index 84% rename from rustpython_original/ruff_python_parser/src/gen/parse.rs rename to rustpython_original/ruff_python_parser/src/parse.rs index 6c659c1bdd..d5c64c8925 100644 --- a/rustpython_original/ruff_python_parser/src/gen/parse.rs +++ b/rustpython_original/ruff_python_parser/src/parse.rs @@ -1,4 +1,202 @@ -// This file was originally generated from asdl by a python script, but we now edit it manually +use crate::lexer::{lex, lex_starts_at, LexResult}; +use crate::{parse_tokens, Mode, ParseError, ParseErrorType}; +use ruff_python_ast as ast; +use ruff_python_ast::Ranged; +use ruff_text_size::TextSize; + +/// Parse Python code string to implementor's type. +/// +/// # Example +/// +/// For example, parsing a simple function definition and a call to that function: +/// +/// ``` +/// use ruff_python_parser::{self as parser, Parse}; +/// use ruff_python_ast as ast; +/// let source = r#" +/// def foo(): +/// return 42 +/// +/// print(foo()) +/// "#; +/// let program = ast::Suite::parse(source, ""); +/// assert!(program.is_ok()); +/// ``` +/// +/// Parsing a single expression denoting the addition of two numbers, but this time specifying a different, +/// somewhat silly, location: +/// +/// ``` +/// # use ruff_text_size::TextSize; +/// # use ruff_python_ast as ast; +/// # use ruff_python_parser::{self as parser, Parse}; +/// +/// let expr = ast::Expr::parse_starts_at("1 + 2", "", TextSize::from(400)); +/// assert!(expr.is_ok()); +pub trait Parse +where + Self: Sized, +{ + const MODE: Mode; + + fn parse(source: &str, source_path: &str) -> Result { + let tokens = lex(source, Self::MODE); + + Self::parse_tokens(tokens, source_path) + } + + fn parse_without_path(source: &str) -> Result { + Self::parse(source, "") + } + + fn parse_starts_at( + source: &str, + source_path: &str, + offset: TextSize, + ) -> Result { + let tokens = lex_starts_at(source, Self::MODE, offset); + + Self::parse_tokens(tokens, source_path) + } + + fn parse_tokens( + lxr: impl IntoIterator, + source_path: &str, + ) -> Result; +} + +impl Parse for ast::ModModule { + const MODE: Mode = Mode::Module; + + fn parse_tokens( + lxr: impl IntoIterator, + source_path: &str, + ) -> Result { + match parse_tokens(lxr, Mode::Module, source_path)? { + ast::Mod::Module(m) => Ok(m), + _ => unreachable!("Mode::Module doesn't return other variant"), + } + } +} + +impl Parse for ast::ModExpression { + const MODE: Mode = Mode::Expression; + + fn parse_tokens( + lxr: impl IntoIterator, + source_path: &str, + ) -> Result { + match parse_tokens(lxr, Mode::Expression, source_path)? { + ast::Mod::Expression(m) => Ok(m), + _ => unreachable!("Mode::Module doesn't return other variant"), + } + } +} + +impl Parse for ast::ModInteractive { + const MODE: Mode = Mode::Interactive; + fn parse_tokens( + lxr: impl IntoIterator, + source_path: &str, + ) -> Result { + match parse_tokens(lxr, Mode::Interactive, source_path)? { + ast::Mod::Interactive(m) => Ok(m), + _ => unreachable!("Mode::Module doesn't return other variant"), + } + } +} + +impl Parse for ast::Suite { + const MODE: Mode = Mode::Module; + + fn parse_tokens( + lxr: impl IntoIterator, + source_path: &str, + ) -> Result { + Ok(ast::ModModule::parse_tokens(lxr, source_path)?.body) + } +} + +impl Parse for ast::Stmt { + const MODE: Mode = Mode::Module; + + fn parse_tokens( + lxr: impl IntoIterator, + source_path: &str, + ) -> Result { + let mut statements = ast::ModModule::parse_tokens(lxr, source_path)?.body; + let statement = match statements.len() { + 0 => { + return Err(ParseError { + error: ParseErrorType::Eof, + offset: TextSize::default(), + source_path: source_path.to_owned(), + }) + } + 1 => statements.pop().unwrap(), + _ => { + return Err(ParseError { + error: ParseErrorType::InvalidToken, + offset: statements[1].range().start(), + source_path: source_path.to_owned(), + }) + } + }; + Ok(statement) + } +} + +impl Parse for ast::Expr { + const MODE: Mode = Mode::Expression; + + fn parse_tokens( + lxr: impl IntoIterator, + source_path: &str, + ) -> Result { + Ok(*ast::ModExpression::parse_tokens(lxr, source_path)?.body) + } +} + +impl Parse for ast::Identifier { + const MODE: Mode = Mode::Expression; + + fn parse_tokens( + lxr: impl IntoIterator, + source_path: &str, + ) -> Result { + let expr = ast::Expr::parse_tokens(lxr, source_path)?; + match expr { + ast::Expr::Name(name) => { + let range = name.range(); + Ok(ast::Identifier::new(name.id, range)) + } + expr => Err(ParseError { + error: ParseErrorType::InvalidToken, + offset: expr.range().start(), + source_path: source_path.to_owned(), + }), + } + } +} + +impl Parse for ast::Constant { + const MODE: Mode = Mode::Expression; + + fn parse_tokens( + lxr: impl IntoIterator, + source_path: &str, + ) -> Result { + let expr = ast::Expr::parse_tokens(lxr, source_path)?; + match expr { + ast::Expr::Constant(c) => Ok(c.value), + expr => Err(ParseError { + error: ParseErrorType::InvalidToken, + offset: expr.range().start(), + source_path: source_path.to_owned(), + }), + } + } +} impl Parse for ast::StmtFunctionDef { const MODE: Mode = Mode::Module; diff --git a/rustpython_original/ruff_python_parser/src/parser.rs b/rustpython_original/ruff_python_parser/src/parser.rs index 6a29cdde2d..25c5b4f293 100644 --- a/rustpython_original/ruff_python_parser/src/parser.rs +++ b/rustpython_original/ruff_python_parser/src/parser.rs @@ -1,4 +1,4 @@ -//! Contains the interface to the Python ruff_python_parser. +//! Contains the interface to the Python `ruff_python_parser`. //! //! Functions in this module can be used to parse Python code into an [Abstract Syntax Tree] //! (AST) that is then transformed into bytecode. @@ -16,210 +16,15 @@ use std::{fmt, iter}; use itertools::Itertools; pub(super) use lalrpop_util::ParseError as LalrpopError; -use ruff_text_size::TextSize; +use ruff_text_size::{TextRange, TextSize}; -use crate::lexer::{lex, lex_starts_at}; use crate::{ lexer::{self, LexResult, LexicalError, LexicalErrorType}, python, token::Tok, - Mode, + Mode, Parse, }; -use ruff_python_ast::{self as ast, Ranged}; - -/// Parse Python code string to implementor's type. -/// -/// # Example -/// -/// For example, parsing a simple function definition and a call to that function: -/// -/// ``` -/// use ruff_python_parser::{self as parser, Parse}; -/// use ruff_python_ast as ast; -/// let source = r#" -/// def foo(): -/// return 42 -/// -/// print(foo()) -/// "#; -/// let program = ast::Suite::parse(source, ""); -/// assert!(program.is_ok()); -/// ``` -/// -/// Parsing a single expression denoting the addition of two numbers, but this time specifying a different, -/// somewhat silly, location: -/// -/// ``` -/// # use ruff_text_size::TextSize; -/// # use ruff_python_ast as ast; -/// # use ruff_python_parser::{self as parser, Parse}; -/// -/// let expr = ast::Expr::parse_starts_at("1 + 2", "", TextSize::from(400)); -/// assert!(expr.is_ok()); -pub trait Parse -where - Self: Sized, -{ - const MODE: Mode; - - fn parse(source: &str, source_path: &str) -> Result { - let tokens = lex(source, Self::MODE); - - Self::parse_tokens(tokens, source_path) - } - - fn parse_without_path(source: &str) -> Result { - Self::parse(source, "") - } - - fn parse_starts_at( - source: &str, - source_path: &str, - offset: TextSize, - ) -> Result { - let tokens = lex_starts_at(source, Self::MODE, offset); - - Self::parse_tokens(tokens, source_path) - } - - fn parse_tokens( - lxr: impl IntoIterator, - source_path: &str, - ) -> Result; -} - -impl Parse for ast::ModModule { - const MODE: Mode = Mode::Module; - - fn parse_tokens( - lxr: impl IntoIterator, - source_path: &str, - ) -> Result { - match parse_tokens(lxr, Mode::Module, source_path)? { - ast::Mod::Module(m) => Ok(m), - _ => unreachable!("Mode::Module doesn't return other variant"), - } - } -} - -impl Parse for ast::ModExpression { - const MODE: Mode = Mode::Expression; - - fn parse_tokens( - lxr: impl IntoIterator, - source_path: &str, - ) -> Result { - match parse_tokens(lxr, Mode::Expression, source_path)? { - ast::Mod::Expression(m) => Ok(m), - _ => unreachable!("Mode::Module doesn't return other variant"), - } - } -} - -impl Parse for ast::ModInteractive { - const MODE: Mode = Mode::Interactive; - fn parse_tokens( - lxr: impl IntoIterator, - source_path: &str, - ) -> Result { - match parse_tokens(lxr, Mode::Interactive, source_path)? { - ast::Mod::Interactive(m) => Ok(m), - _ => unreachable!("Mode::Module doesn't return other variant"), - } - } -} - -impl Parse for ast::Suite { - const MODE: Mode = Mode::Module; - - fn parse_tokens( - lxr: impl IntoIterator, - source_path: &str, - ) -> Result { - Ok(ast::ModModule::parse_tokens(lxr, source_path)?.body) - } -} - -impl Parse for ast::Stmt { - const MODE: Mode = Mode::Module; - - fn parse_tokens( - lxr: impl IntoIterator, - source_path: &str, - ) -> Result { - let mut statements = ast::ModModule::parse_tokens(lxr, source_path)?.body; - let statement = match statements.len() { - 0 => { - return Err(ParseError { - error: ParseErrorType::Eof, - offset: TextSize::default(), - source_path: source_path.to_owned(), - }) - } - 1 => statements.pop().unwrap(), - _ => { - return Err(ParseError { - error: ParseErrorType::InvalidToken, - offset: statements[1].range().start(), - source_path: source_path.to_owned(), - }) - } - }; - Ok(statement) - } -} - -impl Parse for ast::Expr { - const MODE: Mode = Mode::Expression; - - fn parse_tokens( - lxr: impl IntoIterator, - source_path: &str, - ) -> Result { - Ok(*ast::ModExpression::parse_tokens(lxr, source_path)?.body) - } -} - -impl Parse for ast::Identifier { - const MODE: Mode = Mode::Expression; - - fn parse_tokens( - lxr: impl IntoIterator, - source_path: &str, - ) -> Result { - let expr = ast::Expr::parse_tokens(lxr, source_path)?; - match expr { - ast::Expr::Name(name) => { - let range = name.range(); - Ok(ast::Identifier::new(name.id, range)) - } - expr => Err(ParseError { - error: ParseErrorType::InvalidToken, - offset: expr.range().start(), - source_path: source_path.to_owned(), - }), - } - } -} - -impl Parse for ast::Constant { - const MODE: Mode = Mode::Expression; - - fn parse_tokens( - lxr: impl IntoIterator, - source_path: &str, - ) -> Result { - let expr = ast::Expr::parse_tokens(lxr, source_path)?; - match expr { - ast::Expr::Constant(c) => Ok(c.value), - expr => Err(ParseError { - error: ParseErrorType::InvalidToken, - offset: expr.range().start(), - source_path: source_path.to_owned(), - }), - } - } -} +use ruff_python_ast as ast; /// Parse a full Python program usually consisting of multiple lines. /// @@ -241,7 +46,7 @@ impl Parse for ast::Constant { /// let program = parser::parse_program(source, ""); /// assert!(program.is_ok()); /// ``` -#[deprecated = "Use ruff_python_ast::Suite::parse from rustpython_parser::Parse trait."] +#[deprecated = "Use ruff_python_ast::Suite::parse from ruff_python_parser::Parse trait."] pub fn parse_program(source: &str, source_path: &str) -> Result { parse(source, Mode::Module, source_path).map(|top| match top { ast::Mod::Module(ast::ModModule { body, .. }) => body, @@ -265,7 +70,7 @@ pub fn parse_program(source: &str, source_path: &str) -> Result Result { ast::Expr::parse(source, path) } @@ -287,7 +92,7 @@ pub fn parse_expression(source: &str, path: &str) -> Result", TextSize::from(400)); /// assert!(expr.is_ok()); /// ``` -#[deprecated = "Use ruff_python_ast::Expr::parse_starts_at from rustpython_parser::Parse trait."] +#[deprecated = "Use ruff_python_ast::Expr::parse_starts_at from ruff_python_parser::Parse trait."] pub fn parse_expression_starts_at( source: &str, path: &str, @@ -346,7 +151,7 @@ pub fn parse(source: &str, mode: Mode, source_path: &str) -> Result Result { - let marker_token = (Tok::start_marker(mode), Default::default()); + let marker_token = (Tok::start_marker(mode), TextRange::default()); let lexer = iter::once(Ok(marker_token)).chain(lxr); python::TopParser::new() .parse( @@ -574,14 +379,11 @@ impl ParseErrorType { pub fn is_tab_error(&self) -> bool { matches!( self, - ParseErrorType::Lexical(LexicalErrorType::TabError) - | ParseErrorType::Lexical(LexicalErrorType::TabsAfterSpaces) + ParseErrorType::Lexical(LexicalErrorType::TabError | LexicalErrorType::TabsAfterSpaces) ) } } -include!("gen/parse.rs"); - #[cfg(test)] mod tests { use crate::Parse; @@ -933,7 +735,7 @@ type X[T: int, *Ts, **P] = (T, Ts, P) type X[T: (int, str), *Ts, **P] = (T, Ts, P) # soft keyword as alias name -type type = int +type type = int type match = int type case = int @@ -1047,7 +849,7 @@ if 10 .real: y = 100[no] y = 100(no) "#; - assert_debug_snapshot!(ast::Suite::parse(source, "").unwrap()) + assert_debug_snapshot!(ast::Suite::parse(source, "").unwrap()); } #[test] diff --git a/rustpython_original/ruff_python_parser/src/python.lalrpop b/rustpython_original/ruff_python_parser/src/python.lalrpop index 9b5bab6488..89aad05fcc 100644 --- a/rustpython_original/ruff_python_parser/src/python.lalrpop +++ b/rustpython_original/ruff_python_parser/src/python.lalrpop @@ -228,7 +228,7 @@ RaiseStatement: ast::Stmt = { }, "raise" > >)?> => { ast::Stmt::Raise( - ast::StmtRaise { exc: Some(Box::new(t)), cause: c.map(|x| Box::new(x)), range: (location..end_location).into() } + ast::StmtRaise { exc: Some(Box::new(t)), cause: c.map(Box::new), range: (location..end_location).into() } ) }, }; @@ -285,7 +285,7 @@ ImportAsAlias: ast::Alias = { DottedName: ast::Identifier = { => ast::Identifier::new(n, (location..end_location).into()), => { - let mut r = n.to_string(); + let mut r = n; for x in n2 { r.push('.'); r.push_str(x.1.as_str()); @@ -315,7 +315,7 @@ AssertStatement: ast::Stmt = { ast::Stmt::Assert( ast::StmtAssert { test: Box::new(test), - msg: msg.map(|e| Box::new(e)), + msg: msg.map(Box::new), range: (location..end_location).into() } ) @@ -346,10 +346,10 @@ LineMagicExpr: ast::Expr = { if mode == Mode::Jupyter { // This should never occur as the lexer won't allow it. if !matches!(m.0, MagicKind::Magic | MagicKind::Shell) { - Err(LexicalError { + return Err(LexicalError { error: LexicalErrorType::OtherError("expr line magics are only allowed for % and !".to_string()), location, - })? + })?; } Ok(ast::Expr::LineMagic( ast::ExprLineMagic { @@ -678,42 +678,42 @@ MatchMappingEntry: (ast::Expr, ast::Pattern) = { MappingPattern: ast::Pattern = { "{" "}" => { - return ast::PatternMatchMapping { + ast::PatternMatchMapping { keys: vec![], patterns: vec![], rest: None, range: (location..end_location).into() - }.into(); + }.into() }, "{" > ","? "}" => { let (keys, patterns) = e .into_iter() .unzip(); - return ast::PatternMatchMapping { + ast::PatternMatchMapping { keys, patterns, rest: None, range: (location..end_location).into() - }.into(); + }.into() }, "{" "**" ","? "}" => { - return ast::PatternMatchMapping { + ast::PatternMatchMapping { keys: vec![], patterns: vec![], rest: Some(rest), range: (location..end_location).into() - }.into(); + }.into() }, "{" > "," "**" ","? "}" => { let (keys, patterns) = e .into_iter() .unzip(); - return ast::PatternMatchMapping { + ast::PatternMatchMapping { keys, patterns, rest: Some(rest), range: (location..end_location).into() - }.into(); + }.into() }, } @@ -822,8 +822,7 @@ IfStatement: ast::Stmt = { let end_location = elif_else_clauses .last() - .map(|last| last.end()) - .unwrap_or_else(|| body.last().unwrap().end()); + .map_or_else(|| body.last().unwrap().end(), Ranged::end); ast::Stmt::If( ast::StmtIf { test: Box::new(test), body, elif_else_clauses, range: (location..end_location).into() } @@ -875,9 +874,9 @@ TryStatement: ast::Stmt = { let finalbody = finalbody.unwrap_or_default(); let end_location = finalbody .last() - .map(|last| last.end()) - .or_else(|| orelse.last().map(|last| last.end())) - .or_else(|| handlers.last().map(|last| last.end())) + .map(Ranged::end) + .or_else(|| orelse.last().map(Ranged::end)) + .or_else(|| handlers.last().map(Ranged::end)) .unwrap(); ast::Stmt::Try( ast::StmtTry { @@ -895,8 +894,8 @@ TryStatement: ast::Stmt = { let end_location = finalbody .last() .or_else(|| orelse.last()) - .map(|last| last.end()) - .or_else(|| handlers.last().map(|last| last.end())) + .map(Ranged::end) + .or_else(|| handlers.last().map(Ranged::end)) .unwrap(); ast::Stmt::TryStar( ast::StmtTryStar { @@ -1016,7 +1015,7 @@ WithItem: ast::WithItem = { FuncDef: ast::Stmt = { "def" " >)?> ":" => { let args = Box::new(args); - let returns = r.map(|x| Box::new(x)); + let returns = r.map(Box::new); let end_location = body.last().unwrap().end(); let type_comment = None; if is_async.is_some() { @@ -1052,11 +1051,10 @@ Parameters: ast::Arguments = { let range = (location..end_location).into(); let args = a - .map(|mut arguments| { + .map_or_else(|| ast::Arguments::empty(range), |mut arguments| { arguments.range = range; arguments - }) - .unwrap_or_else(|| ast::Arguments::empty(range)); + }); Ok(args) } @@ -1180,10 +1178,10 @@ DoubleStarTypedParameter: ast::Arg = { ParameterListStarArgs: (Option>, Vec, Option>) = { "*" >)*> >)?> =>? { if va.is_none() && kwonlyargs.is_empty() && kwarg.is_none() { - Err(LexicalError { + return Err(LexicalError { error: LexicalErrorType::OtherError("named arguments must follow bare *".to_string()), location, - })? + })?; } let kwarg = kwarg.flatten(); @@ -1526,10 +1524,10 @@ Atom: ast::Expr = { "(" >> ",")?> )*> ")" =>? { if left.is_none() && right.is_empty() && trailing_comma.is_none() { if mid.is_starred_expr() { - Err(LexicalError{ + return Err(LexicalError{ error: LexicalErrorType::OtherError("cannot use starred expression here".to_string()), location: mid.start(), - })? + })?; } Ok(mid) } else { @@ -1720,7 +1718,7 @@ OneOrMore: Vec = { } }; -/// Two or more items that are separted by `Sep` +/// Two or more items that are separated by `Sep` TwoOrMore: Vec = { Sep => vec![e1, e2], > Sep => { diff --git a/rustpython_original/ruff_python_parser/src/python.rs b/rustpython_original/ruff_python_parser/src/python.rs index cf1eafd864..116ef566ce 100644 --- a/rustpython_original/ruff_python_parser/src/python.rs +++ b/rustpython_original/ruff_python_parser/src/python.rs @@ -1,5 +1,5 @@ // auto-generated: "lalrpop 0.20.0" -// sha3: 9c49dc85355275f274dcc32e163c443c0dc567214c9725547e2218e9acd22577 +// sha3: bf0ea34f78939474a89bc0d4b6e7c14f370a2d2cd2ca8b98bd5aefdae0e1d5f1 use num_bigint::BigInt; use ruff_text_size::TextSize; use ruff_python_ast::{self as ast, Ranged, MagicKind}; @@ -31555,7 +31555,7 @@ fn __action59< { { ast::Stmt::Raise( - ast::StmtRaise { exc: Some(Box::new(t)), cause: c.map(|x| Box::new(x)), range: (location..end_location).into() } + ast::StmtRaise { exc: Some(Box::new(t)), cause: c.map(Box::new), range: (location..end_location).into() } ) } } @@ -31723,7 +31723,7 @@ fn __action70< ) -> ast::Identifier { { - let mut r = n.to_string(); + let mut r = n; for x in n2 { r.push('.'); r.push_str(x.1.as_str()); @@ -31784,7 +31784,7 @@ fn __action73< ast::Stmt::Assert( ast::StmtAssert { test: Box::new(test), - msg: msg.map(|e| Box::new(e)), + msg: msg.map(Box::new), range: (location..end_location).into() } ) @@ -31833,10 +31833,10 @@ fn __action75< if mode == Mode::Jupyter { // This should never occur as the lexer won't allow it. if !matches!(m.0, MagicKind::Magic | MagicKind::Shell) { - Err(LexicalError { + return Err(LexicalError { error: LexicalErrorType::OtherError("expr line magics are only allowed for % and !".to_string()), location, - })? + })?; } Ok(ast::Expr::LineMagic( ast::ExprLineMagic { @@ -32791,12 +32791,12 @@ fn __action133< ) -> ast::Pattern { { - return ast::PatternMatchMapping { + ast::PatternMatchMapping { keys: vec![], patterns: vec![], rest: None, range: (location..end_location).into() - }.into(); + }.into() } } @@ -32817,12 +32817,12 @@ fn __action134< let (keys, patterns) = e .into_iter() .unzip(); - return ast::PatternMatchMapping { + ast::PatternMatchMapping { keys, patterns, rest: None, range: (location..end_location).into() - }.into(); + }.into() } } @@ -32841,12 +32841,12 @@ fn __action135< ) -> ast::Pattern { { - return ast::PatternMatchMapping { + ast::PatternMatchMapping { keys: vec![], patterns: vec![], rest: Some(rest), range: (location..end_location).into() - }.into(); + }.into() } } @@ -32870,12 +32870,12 @@ fn __action136< let (keys, patterns) = e .into_iter() .unzip(); - return ast::PatternMatchMapping { + ast::PatternMatchMapping { keys, patterns, rest: Some(rest), range: (location..end_location).into() - }.into(); + }.into() } } @@ -33131,8 +33131,7 @@ fn __action146< let end_location = elif_else_clauses .last() - .map(|last| last.end()) - .unwrap_or_else(|| body.last().unwrap().end()); + .map_or_else(|| body.last().unwrap().end(), Ranged::end); ast::Stmt::If( ast::StmtIf { test: Box::new(test), body, elif_else_clauses, range: (location..end_location).into() } @@ -33225,9 +33224,9 @@ fn __action149< let finalbody = finalbody.unwrap_or_default(); let end_location = finalbody .last() - .map(|last| last.end()) - .or_else(|| orelse.last().map(|last| last.end())) - .or_else(|| handlers.last().map(|last| last.end())) + .map(Ranged::end) + .or_else(|| orelse.last().map(Ranged::end)) + .or_else(|| handlers.last().map(Ranged::end)) .unwrap(); ast::Stmt::Try( ast::StmtTry { @@ -33262,8 +33261,8 @@ fn __action150< let end_location = finalbody .last() .or_else(|| orelse.last()) - .map(|last| last.end()) - .or_else(|| handlers.last().map(|last| last.end())) + .map(Ranged::end) + .or_else(|| handlers.last().map(Ranged::end)) .unwrap(); ast::Stmt::TryStar( ast::StmtTryStar { @@ -33522,7 +33521,7 @@ fn __action162< { { let args = Box::new(args); - let returns = r.map(|x| Box::new(x)); + let returns = r.map(Box::new); let end_location = body.last().unwrap().end(); let type_comment = None; if is_async.is_some() { @@ -33591,11 +33590,10 @@ fn __action165< let range = (location..end_location).into(); let args = a - .map(|mut arguments| { + .map_or_else(|| ast::Arguments::empty(range), |mut arguments| { arguments.range = range; arguments - }) - .unwrap_or_else(|| ast::Arguments::empty(range)); + }); Ok(args) } @@ -37001,10 +36999,10 @@ fn __action417< { { if va.is_none() && kwonlyargs.is_empty() && kwarg.is_none() { - Err(LexicalError { + return Err(LexicalError { error: LexicalErrorType::OtherError("named arguments must follow bare *".to_string()), location, - })? + })?; } let kwarg = kwarg.flatten(); @@ -37118,10 +37116,10 @@ fn __action425< { { if va.is_none() && kwonlyargs.is_empty() && kwarg.is_none() { - Err(LexicalError { + return Err(LexicalError { error: LexicalErrorType::OtherError("named arguments must follow bare *".to_string()), location, - })? + })?; } let kwarg = kwarg.flatten(); @@ -38469,10 +38467,10 @@ fn __action524< { if left.is_none() && right.is_empty() && trailing_comma.is_none() { if mid.is_starred_expr() { - Err(LexicalError{ + return Err(LexicalError{ error: LexicalErrorType::OtherError("cannot use starred expression here".to_string()), location: mid.start(), - })? + })?; } Ok(mid) } else { @@ -39137,10 +39135,10 @@ fn __action568< { if left.is_none() && right.is_empty() && trailing_comma.is_none() { if mid.is_starred_expr() { - Err(LexicalError{ + return Err(LexicalError{ error: LexicalErrorType::OtherError("cannot use starred expression here".to_string()), location: mid.start(), - })? + })?; } Ok(mid) } else { diff --git a/rustpython_original/ruff_python_parser/src/soft_keywords.rs b/rustpython_original/ruff_python_parser/src/soft_keywords.rs index 778925fe4d..9b30199bc7 100644 --- a/rustpython_original/ruff_python_parser/src/soft_keywords.rs +++ b/rustpython_original/ruff_python_parser/src/soft_keywords.rs @@ -15,7 +15,7 @@ use itertools::{Itertools, MultiPeek}; /// soft keyword tokens with `identifier` tokens if they are used as identifiers. /// /// Handling soft keywords in this intermediary pass allows us to simplify both the lexer and -/// ruff_python_parser, as neither of them need to be aware of soft keywords. +/// `ruff_python_parser`, as neither of them need to be aware of soft keywords. pub struct SoftKeywordTransformer where I: Iterator, @@ -59,9 +59,7 @@ where // (This is to avoid treating `match` or `case` as identifiers when annotated with // type hints.) type hints.) Tok::Match | Tok::Case => { - if !self.start_of_line { - next = Some(Ok((soft_to_name(tok), *range))); - } else { + if self.start_of_line { let mut nesting = 0; let mut first = true; let mut seen_colon = false; @@ -86,6 +84,8 @@ where if !seen_colon { next = Some(Ok((soft_to_name(tok), *range))); } + } else { + next = Some(Ok((soft_to_name(tok), *range))); } } // For `type` all of the following conditions must be met: @@ -93,9 +93,7 @@ where // 2. The type token is immediately followed by a name token. // 3. The name token is eventually followed by an equality token. Tok::Type => { - if !self.start_of_line { - next = Some(Ok((soft_to_name(tok), *range))); - } else { + if self.start_of_line { let mut is_type_alias = false; if let Some(Ok((tok, _))) = self.underlying.peek() { if matches!( @@ -126,6 +124,8 @@ where if !is_type_alias { next = Some(Ok((soft_to_name(tok), *range))); } + } else { + next = Some(Ok((soft_to_name(tok), *range))); } } _ => (), // Not a soft keyword token diff --git a/rustpython_original/ruff_python_parser/src/string.rs b/rustpython_original/ruff_python_parser/src/string.rs index c231a727ae..230268106c 100644 --- a/rustpython_original/ruff_python_parser/src/string.rs +++ b/rustpython_original/ruff_python_parser/src/string.rs @@ -11,8 +11,9 @@ use ruff_text_size::{TextLen, TextRange, TextSize}; // we have to do the parsing here, manually. use crate::{ lexer::{LexicalError, LexicalErrorType}, - parser::{LalrpopError, Parse, ParseError, ParseErrorType}, + parser::{ParseError, ParseErrorType}, token::{StringKind, Tok}, + Parse, }; // unicode_name2 does not expose `MAX_NAME_LENGTH`, so we replicate that constant here, fix #3798 @@ -68,11 +69,6 @@ impl<'a> StringParser<'a> { TextRange::new(start_location, self.location) } - #[inline] - fn expr(&self, node: Expr) -> Expr { - node - } - fn parse_unicode_literal(&mut self, literal_number: usize) -> Result { let mut p: u32 = 0u32; let unicode_error = LexicalError::new(LexicalErrorType::UnicodeError, self.get_pos()); @@ -96,7 +92,7 @@ impl<'a> StringParser<'a> { octet_content.push(first); while octet_content.len() < 3 { if let Some('0'..='7') = self.peek() { - octet_content.push(self.next_char().unwrap()) + octet_content.push(self.next_char().unwrap()); } else { break; } @@ -157,7 +153,7 @@ impl<'a> StringParser<'a> { 'U' if !self.kind.is_any_bytes() => self.parse_unicode_literal(8)?, 'N' if !self.kind.is_any_bytes() => self.parse_unicode_name()?, // Special cases where the escape sequence is not a single character - '\n' => return Ok("".to_string()), + '\n' => return Ok(String::new()), c => { if self.kind.is_any_bytes() && !c.is_ascii() { return Err(LexicalError { @@ -180,7 +176,10 @@ impl<'a> StringParser<'a> { } fn parse_formatted_value(&mut self, nested: u8) -> Result, LexicalError> { - use FStringErrorType::*; + use FStringErrorType::{ + EmptyExpression, InvalidConversionFlag, InvalidExpression, MismatchedDelimiter, + UnclosedLbrace, Unmatched, UnterminatedString, + }; let mut expression = String::new(); let mut spec = None; @@ -238,15 +237,10 @@ impl<'a> StringParser<'a> { let start_location = self.get_pos(); let parsed_spec = self.parse_spec(nested)?; - spec = Some(Box::new( - self.expr( - ast::ExprJoinedStr { - values: parsed_spec, - range: self.range(start_location), - } - .into(), - ), - )); + spec = Some(Box::new(Expr::from(ast::ExprJoinedStr { + values: parsed_spec, + range: self.range(start_location), + }))); } '(' | '{' | '[' => { expression.push(ch); @@ -309,9 +303,21 @@ impl<'a> StringParser<'a> { return Err(FStringError::new(EmptyExpression, self.get_pos()).into()); } - let ret = if !self_documenting { - vec![self.expr( - ast::ExprFormattedValue { + let ret = if self_documenting { + // TODO: range is wrong but `self_documenting` needs revisiting beyond + // ranges: https://github.com/astral-sh/ruff/issues/5970 + vec![ + Expr::from(ast::ExprConstant { + value: Constant::Str(expression.clone() + "="), + kind: None, + range: self.range(start_location), + }), + Expr::from(ast::ExprConstant { + value: trailing_seq.into(), + kind: None, + range: self.range(start_location), + }), + Expr::from(ast::ExprFormattedValue { value: Box::new( parse_fstring_expr(&expression, start_location).map_err( |e| { @@ -322,57 +328,30 @@ impl<'a> StringParser<'a> { }, )?, ), - conversion, + conversion: if conversion == ConversionFlag::None && spec.is_none() + { + ConversionFlag::Repr + } else { + conversion + }, format_spec: spec, range: self.range(start_location), - } - .into(), - )] - } else { - // TODO: range is wrong but `self_documenting` needs revisiting beyond - // ranges: https://github.com/astral-sh/ruff/issues/5970 - vec![ - self.expr( - ast::ExprConstant { - value: Constant::Str(expression.to_owned() + "="), - kind: None, - range: self.range(start_location), - } - .into(), - ), - self.expr( - ast::ExprConstant { - value: trailing_seq.into(), - kind: None, - range: self.range(start_location), - } - .into(), - ), - self.expr( - ast::ExprFormattedValue { - value: Box::new( - parse_fstring_expr(&expression, start_location).map_err( - |e| { - FStringError::new( - InvalidExpression(Box::new(e.error)), - start_location, - ) - }, - )?, - ), - conversion: if conversion == ConversionFlag::None - && spec.is_none() - { - ConversionFlag::Repr - } else { - conversion - }, - format_spec: spec, - range: self.range(start_location), - } - .into(), - ), + }), ] + } else { + vec![Expr::from(ast::ExprFormattedValue { + value: Box::new( + parse_fstring_expr(&expression, start_location).map_err(|e| { + FStringError::new( + InvalidExpression(Box::new(e.error)), + start_location, + ) + })?, + ), + conversion, + format_spec: spec, + range: self.range(start_location), + })] }; return Ok(ret); } @@ -380,7 +359,9 @@ impl<'a> StringParser<'a> { expression.push(ch); loop { let Some(c) = self.next_char() else { - return Err(FStringError::new(UnterminatedString, self.get_pos()).into()); + return Err( + FStringError::new(UnterminatedString, self.get_pos()).into() + ); }; expression.push(c); if c == ch { @@ -412,16 +393,11 @@ impl<'a> StringParser<'a> { match next { '{' => { if !constant_piece.is_empty() { - spec_constructor.push( - self.expr( - ast::ExprConstant { - value: constant_piece.drain(..).collect::().into(), - kind: None, - range: self.range(start_location), - } - .into(), - ), - ); + spec_constructor.push(Expr::from(ast::ExprConstant { + value: constant_piece.drain(..).collect::().into(), + kind: None, + range: self.range(start_location), + })); } let parsed_expr = self.parse_fstring(nested + 1)?; spec_constructor.extend(parsed_expr); @@ -438,22 +414,17 @@ impl<'a> StringParser<'a> { self.next_char(); } if !constant_piece.is_empty() { - spec_constructor.push( - self.expr( - ast::ExprConstant { - value: constant_piece.drain(..).collect::().into(), - kind: None, - range: self.range(start_location), - } - .into(), - ), - ); + spec_constructor.push(Expr::from(ast::ExprConstant { + value: constant_piece.drain(..).collect::().into(), + kind: None, + range: self.range(start_location), + })); } Ok(spec_constructor) } fn parse_fstring(&mut self, nested: u8) -> Result, LexicalError> { - use FStringErrorType::*; + use FStringErrorType::{ExpressionNestedTooDeeply, SingleRbrace, UnclosedLbrace}; if nested >= 2 { return Err(FStringError::new(ExpressionNestedTooDeeply, self.get_pos()).into()); @@ -481,16 +452,11 @@ impl<'a> StringParser<'a> { } } if !content.is_empty() { - values.push( - self.expr( - ast::ExprConstant { - value: content.drain(..).collect::().into(), - kind: None, - range: self.range(start_location), - } - .into(), - ), - ); + values.push(Expr::from(ast::ExprConstant { + value: content.drain(..).collect::().into(), + kind: None, + range: self.range(start_location), + })); } let parsed_values = self.parse_formatted_value(nested)?; @@ -521,16 +487,11 @@ impl<'a> StringParser<'a> { } if !content.is_empty() { - values.push( - self.expr( - ast::ExprConstant { - value: content.into(), - kind: None, - range: self.range(start_location), - } - .into(), - ), - ) + values.push(Expr::from(ast::ExprConstant { + value: content.into(), + kind: None, + range: self.range(start_location), + })); } Ok(values) @@ -558,14 +519,11 @@ impl<'a> StringParser<'a> { } } - Ok(self.expr( - ast::ExprConstant { - value: Constant::Bytes(content.chars().map(|c| c as u8).collect()), - kind: None, - range: self.range(start_location), - } - .into(), - )) + Ok(Expr::from(ast::ExprConstant { + value: Constant::Bytes(content.chars().map(|c| c as u8).collect()), + kind: None, + range: self.range(start_location), + })) } fn parse_string(&mut self) -> Result { @@ -579,14 +537,11 @@ impl<'a> StringParser<'a> { ch => content.push(ch), } } - Ok(self.expr( - ast::ExprConstant { - value: Constant::Str(content), - kind: self.kind.is_unicode().then(|| "u".to_string()), - range: self.range(start_location), - } - .into(), - )) + Ok(Expr::from(ast::ExprConstant { + value: Constant::Str(content), + kind: self.kind.is_unicode().then(|| "u".to_string()), + range: self.range(start_location), + })) } fn parse(&mut self) -> Result, LexicalError> { @@ -703,7 +658,7 @@ pub(crate) fn parse_strings( if !current.is_empty() { deduped.push(take_current(&mut current, current_start, current_end)); } - deduped.push(value) + deduped.push(value); } Expr::Constant(ast::ExprConstant { value: Constant::Str(inner), @@ -787,7 +742,11 @@ pub enum FStringErrorType { impl std::fmt::Display for FStringErrorType { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - use FStringErrorType::*; + use FStringErrorType::{ + EmptyExpression, ExpectedRbrace, ExpressionCannotInclude, ExpressionNestedTooDeeply, + InvalidConversionFlag, InvalidExpression, MismatchedDelimiter, SingleRbrace, + UnclosedLbrace, Unmatched, UnopenedRbrace, UnterminatedString, + }; match self { UnclosedLbrace => write!(f, "expecting '}}'"), UnopenedRbrace => write!(f, "Unopened '}}'"), @@ -820,7 +779,7 @@ impl std::fmt::Display for FStringErrorType { } } -impl From for LalrpopError { +impl From for crate::parser::LalrpopError { fn from(err: FStringError) -> Self { lalrpop_util::ParseError::User { error: LexicalError { @@ -906,7 +865,10 @@ mod tests { #[test] fn test_parse_invalid_fstring() { - use FStringErrorType::*; + use FStringErrorType::{ + EmptyExpression, ExpressionNestedTooDeeply, InvalidConversionFlag, SingleRbrace, + UnclosedLbrace, + }; assert_eq!(parse_fstring_error("{5!a"), UnclosedLbrace); assert_eq!(parse_fstring_error("{5!a1}"), UnclosedLbrace); assert_eq!(parse_fstring_error("{5!"), UnclosedLbrace); diff --git a/rustpython_original/ruff_python_parser/src/token.rs b/rustpython_original/ruff_python_parser/src/token.rs index 04e9698092..ae0a90238e 100644 --- a/rustpython_original/ruff_python_parser/src/token.rs +++ b/rustpython_original/ruff_python_parser/src/token.rs @@ -1,4 +1,4 @@ -//! Token type for Python source code created by the lexer and consumed by the ruff_python_parser. +//! Token type for Python source code created by the lexer and consumed by the `ruff_python_parser`. //! //! This module defines the tokens that the lexer recognizes. The tokens are //! loosely based on the token definitions found in the [CPython source]. @@ -219,6 +219,7 @@ impl Tok { impl fmt::Display for Tok { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + #[allow(clippy::enum_glob_use)] use Tok::*; match self { Name { name } => write!(f, "'{name}'"), @@ -384,7 +385,7 @@ impl TryFrom<[char; 2]> for StringKind { impl fmt::Display for StringKind { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - use StringKind::*; + use StringKind::{Bytes, FString, RawBytes, RawFString, RawString, String, Unicode}; match self { String => f.write_str(""), FString => f.write_str("f"), @@ -426,7 +427,7 @@ impl StringKind { /// Returns the number of characters in the prefix. pub fn prefix_len(&self) -> TextSize { - use StringKind::*; + use StringKind::{Bytes, FString, RawBytes, RawFString, RawString, String, Unicode}; let len = match self { String => 0, RawString | FString | Unicode | Bytes => 1,