From 38d2562f41caa042f088489ac00dc75bf922bb5b Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Tue, 23 Apr 2024 10:25:02 +0530 Subject: [PATCH] Refactor unary expression parsing (#11088) ## Summary This PR refactors unary expression parsing with the following changes: * Ability to get `OperatorPrecedence` from a unary operator (`UnaryOp`) * Implement methods on `TokenKind` * Add `as_unary_operator` which returns an `Option` * Add `as_unary_arithmetic_operator` which returns an `Option` (used for pattern parsing) * Rename `is_unary` to `is_unary_arithmetic_operator` (used in the linter) resolves: #10752 ## Test Plan Verify that the existing test cases pass, no ecosystem changes, run the Python based fuzzer on 3000 random inputs and run it on dozens of open-source repositories. --- .../missing_whitespace_around_operator.rs | 4 +- .../src/parser/expression.rs | 86 +++++++++++-------- .../ruff_python_parser/src/parser/pattern.rs | 46 +++++----- crates/ruff_python_parser/src/token.rs | 74 ++++++++++------ 4 files changed, 125 insertions(+), 85 deletions(-) diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_around_operator.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_around_operator.rs index fd182648ca..0015c3ac26 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_around_operator.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_around_operator.rs @@ -186,7 +186,9 @@ pub(crate) fn missing_whitespace_around_operator( ); NeedsSpace::from(!slash_in_func) - } else if kind.is_unary() || matches!(kind, TokenKind::Star | TokenKind::DoubleStar) { + } else if kind.is_unary_arithmetic_operator() + || matches!(kind, TokenKind::Star | TokenKind::DoubleStar) + { let is_binary = { let prev_kind = prev_token.kind(); diff --git a/crates/ruff_python_parser/src/parser/expression.rs b/crates/ruff_python_parser/src/parser/expression.rs index 2275ed07b0..6f1caf3995 100644 --- a/crates/ruff_python_parser/src/parser/expression.rs +++ b/crates/ruff_python_parser/src/parser/expression.rs @@ -303,10 +303,21 @@ impl<'src> Parser<'src> { context: ExpressionContext, ) -> ParsedExpr { let start = self.node_start(); + let token = self.current_token_kind(); - let lhs = match self.current_token_kind() { - unary_tok @ (TokenKind::Plus | TokenKind::Minus | TokenKind::Tilde) => { - let unary_expr = self.parse_unary_expression(context); + if let Some(unary_op) = token.as_unary_operator() { + let expr = self.parse_unary_expression(unary_op, context); + + if matches!(unary_op, UnaryOp::Not) { + if left_precedence > OperatorPrecedence::Not { + self.add_error( + ParseErrorType::OtherError( + "Boolean 'not' expression cannot be used here".to_string(), + ), + &expr, + ); + } + } else { if left_precedence > OperatorPrecedence::PosNegBitNot // > The power operator `**` binds less tightly than an arithmetic // > or bitwise unary operator on its right, that is, 2**-1 is 0.5. @@ -316,36 +327,31 @@ impl<'src> Parser<'src> { { self.add_error( ParseErrorType::OtherError(format!( - "Unary {unary_tok} expression cannot be used here", + "Unary '{unary_op}' expression cannot be used here", )), - &unary_expr, + &expr, ); } - Expr::UnaryOp(unary_expr).into() - } - TokenKind::Not => { - let unary_expr = self.parse_unary_expression(context); - if left_precedence > OperatorPrecedence::Not { - self.add_error( - ParseErrorType::OtherError( - "Boolean 'not' expression cannot be used here".to_string(), - ), - &unary_expr, - ); - } - Expr::UnaryOp(unary_expr).into() } + + return Expr::UnaryOp(expr).into(); + } + + match self.current_token_kind() { TokenKind::Star => { let starred_expr = self.parse_starred_expression(context); + if left_precedence > OperatorPrecedence::Initial || !context.is_starred_expression_allowed() { self.add_error(ParseErrorType::InvalidStarredExpressionUsage, &starred_expr); } - Expr::Starred(starred_expr).into() + + return Expr::Starred(starred_expr).into(); } TokenKind::Await => { let await_expr = self.parse_await_expression(); + // `await` expressions cannot be nested if left_precedence >= OperatorPrecedence::Await { self.add_error( @@ -355,26 +361,31 @@ impl<'src> Parser<'src> { &await_expr, ); } - Expr::Await(await_expr).into() + + return Expr::Await(await_expr).into(); } TokenKind::Lambda => { - // Lambda expression isn't allowed in this context but we'll still - // parse it and report an error for better recovery. + // Lambda expression isn't allowed in this context but we'll still parse it and + // report an error for better recovery. let lambda_expr = self.parse_lambda_expr(); self.add_error(ParseErrorType::InvalidLambdaExpressionUsage, &lambda_expr); - Expr::Lambda(lambda_expr).into() + return Expr::Lambda(lambda_expr).into(); } TokenKind::Yield => { let expr = self.parse_yield_expression(); + if left_precedence > OperatorPrecedence::Initial || !context.is_yield_expression_allowed() { self.add_error(ParseErrorType::InvalidYieldExpressionUsage, &expr); } - expr.into() + + return expr.into(); } - _ => self.parse_atom(), - }; + _ => {} + } + + let lhs = self.parse_atom(); ParsedExpr { expr: self.parse_postfix_expression(lhs.expr, start), @@ -881,20 +892,13 @@ impl<'src> Parser<'src> { /// See: pub(super) fn parse_unary_expression( &mut self, + op: UnaryOp, context: ExpressionContext, ) -> ast::ExprUnaryOp { let start = self.node_start(); + self.bump(TokenKind::from(op)); - let op = UnaryOp::try_from(self.current_token_kind()) - .expect("current token should be a unary operator"); - self.bump(self.current_token_kind()); - - let operand = if op.is_not() { - self.parse_binary_expression_or_higher(OperatorPrecedence::Not, context) - } else { - // plus, minus and tilde - self.parse_binary_expression_or_higher(OperatorPrecedence::PosNegBitNot, context) - }; + let operand = self.parse_binary_expression_or_higher(OperatorPrecedence::from(op), context); ast::ExprUnaryOp { op, @@ -2402,6 +2406,16 @@ impl From for OperatorPrecedence { } } +impl From for OperatorPrecedence { + #[inline] + fn from(op: UnaryOp) -> Self { + match op { + UnaryOp::Not => OperatorPrecedence::Not, + _ => OperatorPrecedence::PosNegBitNot, + } + } +} + impl From for OperatorPrecedence { #[inline] fn from(op: Operator) -> Self { diff --git a/crates/ruff_python_parser/src/parser/pattern.rs b/crates/ruff_python_parser/src/parser/pattern.rs index 743722119d..4d200b4e42 100644 --- a/crates/ruff_python_parser/src/parser/pattern.rs +++ b/crates/ruff_python_parser/src/parser/pattern.rs @@ -478,30 +478,34 @@ impl<'src> Parser<'src> { }, }) } - // The `+` is only for better error recovery. - TokenKind::Minus | TokenKind::Plus - if matches!( - self.peek(), - TokenKind::Int | TokenKind::Float | TokenKind::Complex - ) => - { - let unary_expr = self.parse_unary_expression(ExpressionContext::default()); + kind => { + // The `+` is only for better error recovery. + if let Some(unary_arithmetic_op) = kind.as_unary_arithmetic_operator() { + if matches!( + self.peek(), + TokenKind::Int | TokenKind::Float | TokenKind::Complex + ) { + let unary_expr = self.parse_unary_expression( + unary_arithmetic_op, + ExpressionContext::default(), + ); - if unary_expr.op.is_u_add() { - self.add_error( - ParseErrorType::OtherError( - "Unary '+' is not allowed as a literal pattern".to_string(), - ), - &unary_expr, - ); + if unary_expr.op.is_u_add() { + self.add_error( + ParseErrorType::OtherError( + "Unary '+' is not allowed as a literal pattern".to_string(), + ), + &unary_expr, + ); + } + + return Pattern::MatchValue(ast::PatternMatchValue { + value: Box::new(Expr::UnaryOp(unary_expr)), + range: self.node_range(start), + }); + } } - Pattern::MatchValue(ast::PatternMatchValue { - value: Box::new(Expr::UnaryOp(unary_expr)), - range: self.node_range(start), - }) - } - kind => { // Upon encountering an unexpected token, return a `Pattern::MatchValue` containing // an empty `Expr::Name`. let invalid_node = if kind.is_keyword() { diff --git a/crates/ruff_python_parser/src/token.rs b/crates/ruff_python_parser/src/token.rs index 36ea44e818..e9ae4d3beb 100644 --- a/crates/ruff_python_parser/src/token.rs +++ b/crates/ruff_python_parser/src/token.rs @@ -540,11 +540,6 @@ impl TokenKind { matches!(self, TokenKind::Newline | TokenKind::NonLogicalNewline) } - #[inline] - pub const fn is_unary(self) -> bool { - matches!(self, TokenKind::Plus | TokenKind::Minus) - } - #[inline] pub const fn is_keyword(self) -> bool { matches!( @@ -699,6 +694,41 @@ impl TokenKind { matches!(self, TokenKind::Match | TokenKind::Case) } + /// Returns `true` if the current token is a unary arithmetic operator. + #[inline] + pub const fn is_unary_arithmetic_operator(self) -> bool { + matches!(self, TokenKind::Plus | TokenKind::Minus) + } + + /// Returns the [`UnaryOp`] that corresponds to this token kind, if it is an arithmetic unary + /// operator, otherwise return [None]. + /// + /// Use [`TokenKind::as_unary_operator`] to match against any unary operator. + #[inline] + pub(crate) const fn as_unary_arithmetic_operator(self) -> Option { + Some(match self { + TokenKind::Plus => UnaryOp::UAdd, + TokenKind::Minus => UnaryOp::USub, + _ => return None, + }) + } + + /// Returns the [`UnaryOp`] that corresponds to this token kind, if it is a unary operator, + /// otherwise return [None]. + /// + /// Use [`TokenKind::as_unary_arithmetic_operator`] to match against only an arithmetic unary + /// operator. + #[inline] + pub(crate) const fn as_unary_operator(self) -> Option { + Some(match self { + TokenKind::Plus => UnaryOp::UAdd, + TokenKind::Minus => UnaryOp::USub, + TokenKind::Tilde => UnaryOp::Invert, + TokenKind::Not => UnaryOp::Not, + _ => return None, + }) + } + /// Returns the [`BoolOp`] that corresponds to this token kind, if it is a boolean operator, /// otherwise return [None]. #[inline] @@ -879,28 +909,6 @@ impl From for TokenKind { } } -impl TryFrom<&Tok> for UnaryOp { - type Error = String; - - fn try_from(value: &Tok) -> Result { - TokenKind::from_token(value).try_into() - } -} - -impl TryFrom for UnaryOp { - type Error = String; - - fn try_from(value: TokenKind) -> Result { - Ok(match value { - TokenKind::Plus => UnaryOp::UAdd, - TokenKind::Minus => UnaryOp::USub, - TokenKind::Tilde => UnaryOp::Invert, - TokenKind::Not => UnaryOp::Not, - _ => return Err(format!("unexpected token: {value:?}")), - }) - } -} - impl From for TokenKind { #[inline] fn from(op: BoolOp) -> Self { @@ -911,6 +919,18 @@ impl From for TokenKind { } } +impl From for TokenKind { + #[inline] + fn from(op: UnaryOp) -> Self { + match op { + UnaryOp::Invert => TokenKind::Tilde, + UnaryOp::Not => TokenKind::Not, + UnaryOp::UAdd => TokenKind::Plus, + UnaryOp::USub => TokenKind::Minus, + } + } +} + impl From for TokenKind { #[inline] fn from(op: Operator) -> Self {