diff --git a/crates/ruff_python_ast/src/nodes.rs b/crates/ruff_python_ast/src/nodes.rs index 7ed613913f..d536ce8ca6 100644 --- a/crates/ruff_python_ast/src/nodes.rs +++ b/crates/ruff_python_ast/src/nodes.rs @@ -2348,17 +2348,7 @@ impl TryFrom<[char; 2]> for MagicKind { impl fmt::Display for MagicKind { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - MagicKind::Shell => f.write_str("!"), - MagicKind::ShCap => f.write_str("!!"), - MagicKind::Help => f.write_str("?"), - MagicKind::Help2 => f.write_str("??"), - MagicKind::Magic => f.write_str("%"), - MagicKind::Magic2 => f.write_str("%%"), - MagicKind::Quote => f.write_str(","), - MagicKind::Quote2 => f.write_str(";"), - MagicKind::Paren => f.write_str("/"), - } + f.write_str(self.as_str()) } } @@ -2376,6 +2366,30 @@ impl MagicKind { }; len.into() } + + /// Returns `true` if the kind is a help command i.e., `?` or `??`. + pub const fn is_help(self) -> bool { + matches!(self, MagicKind::Help | MagicKind::Help2) + } + + /// Returns `true` if the kind is a magic command i.e., `%` or `%%`. + pub const fn is_magic(self) -> bool { + matches!(self, MagicKind::Magic | MagicKind::Magic2) + } + + pub fn as_str(self) -> &'static str { + match self { + MagicKind::Shell => "!", + MagicKind::ShCap => "!!", + MagicKind::Help => "?", + MagicKind::Help2 => "??", + MagicKind::Magic => "%", + MagicKind::Magic2 => "%%", + MagicKind::Quote => ",", + MagicKind::Quote2 => ";", + MagicKind::Paren => "/", + } + } } #[derive(Clone, Debug, PartialEq, Eq, Hash)] diff --git a/crates/ruff_python_parser/Cargo.toml b/crates/ruff_python_parser/Cargo.toml index 052105013a..9fa52c06c9 100644 --- a/crates/ruff_python_parser/Cargo.toml +++ b/crates/ruff_python_parser/Cargo.toml @@ -14,7 +14,7 @@ build = "build.rs" [lib] [dependencies] -ruff_python_ast = { path = "../ruff_python_ast"} +ruff_python_ast = { path = "../ruff_python_ast" } ruff_text_size = { path = "../ruff_text_size" } anyhow = { workspace = true } diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index a65737c04f..48670d922b 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -428,6 +428,72 @@ impl<'source> Lexer<'source> { self.cursor.bump(); value.push('\\'); } + // Help end escape commands are those that end with 1 or 2 question marks. + // Here, we're only looking for a subset of help end escape commands which + // are the ones that has the escape token at the start of the line as well. + // On the other hand, we're not looking for help end escape commands that + // are strict in the sense that the escape token is only at the end. For example, + // + // * `%foo?` is recognized as a help end escape command but not as a strict one. + // * `foo?` is recognized as a strict help end escape command which is not + // lexed here but is identified at the parser level. + // + // Help end escape commands implemented in the IPython codebase using regex: + // https://github.com/ipython/ipython/blob/292e3a23459ca965b8c1bfe2c3707044c510209a/IPython/core/inputtransformer2.py#L454-L462 + '?' => { + self.cursor.bump(); + let mut question_count = 1u32; + while self.cursor.eat_char('?') { + question_count += 1; + } + + // The original implementation in the IPython codebase is based on regex which + // means that it's strict in the sense that it won't recognize a help end escape: + // * If there's any whitespace before the escape token (e.g. `%foo ?`) + // * If there are more than 2 question mark tokens (e.g. `%foo???`) + // which is what we're doing here as well. In that case, we'll continue with + // the prefixed escape token. + // + // Now, the whitespace and empty value check also makes sure that an empty + // command (e.g. `%?` or `? ??`, no value after/between the escape tokens) + // is not recognized as a help end escape command. So, `%?` and `? ??` are + // `MagicKind::Magic` and `MagicKind::Help` because of the initial `%` and `??` + // tokens. + if question_count > 2 + || value.chars().last().map_or(true, is_python_whitespace) + || !matches!(self.cursor.first(), '\n' | '\r' | EOF_CHAR) + { + // Not a help end escape command, so continue with the lexing. + value.reserve(question_count as usize); + for _ in 0..question_count { + value.push('?'); + } + continue; + } + + if kind.is_help() { + // If we've recognize this as a help end escape command, then + // any question mark token / whitespaces at the start are not + // considered as part of the value. + // + // For example, `??foo?` is recognized as `MagicKind::Help` and + // `value` is `foo` instead of `??foo`. + value = value.trim_start_matches([' ', '?']).to_string(); + } else if kind.is_magic() { + // Between `%` and `?` (at the end), the `?` takes priority + // over the `%` so `%foo?` is recognized as `MagicKind::Help` + // and `value` is `%foo` instead of `foo`. So, we need to + // insert the magic escape token at the start. + value.insert_str(0, kind.as_str()); + } + + let kind = match question_count { + 1 => MagicKind::Help, + 2 => MagicKind::Help2, + _ => unreachable!("`question_count` is always 1 or 2"), + }; + return Tok::MagicCommand { kind, value }; + } '\n' | '\r' | EOF_CHAR => { return Tok::MagicCommand { kind, value }; } @@ -1122,6 +1188,20 @@ fn is_identifier_continuation(c: char) -> bool { } } +/// Returns `true` for [whitespace](https://docs.python.org/3/reference/lexical_analysis.html#whitespace-between-tokens) +/// characters. +/// +/// This is the same as `ruff_python_trivia::is_python_whitespace` and is copied +/// here to avoid a circular dependency as `ruff_python_trivia` has a dev-dependency +/// on `ruff_python_lexer`. +const fn is_python_whitespace(c: char) -> bool { + matches!( + c, + // Space, tab, or form-feed + ' ' | '\t' | '\x0C' + ) +} + #[cfg(test)] mod tests { use num_bigint::BigInt; @@ -1355,6 +1435,117 @@ mod tests { ] ); } + + #[test] + fn test_jupyter_magic_help_end() { + let source = r" +?foo? +?? foo? +?? foo ? +?foo?? +??foo?? +???foo? +???foo?? +??foo??? +???foo??? +?? \ + foo? +?? \ +? +???? +%foo? +%foo?? +%%foo??? +!pwd?" + .trim(); + let tokens = lex_jupyter_source(source); + assert_eq!( + tokens, + [ + Tok::MagicCommand { + value: "foo".to_string(), + kind: MagicKind::Help, + }, + Tok::Newline, + Tok::MagicCommand { + value: "foo".to_string(), + kind: MagicKind::Help, + }, + Tok::Newline, + Tok::MagicCommand { + value: " foo ?".to_string(), + kind: MagicKind::Help2, + }, + Tok::Newline, + Tok::MagicCommand { + value: "foo".to_string(), + kind: MagicKind::Help2, + }, + Tok::Newline, + Tok::MagicCommand { + value: "foo".to_string(), + kind: MagicKind::Help2, + }, + Tok::Newline, + Tok::MagicCommand { + value: "foo".to_string(), + kind: MagicKind::Help, + }, + Tok::Newline, + Tok::MagicCommand { + value: "foo".to_string(), + kind: MagicKind::Help2, + }, + Tok::Newline, + Tok::MagicCommand { + value: "foo???".to_string(), + kind: MagicKind::Help2, + }, + Tok::Newline, + Tok::MagicCommand { + value: "?foo???".to_string(), + kind: MagicKind::Help2, + }, + Tok::Newline, + Tok::MagicCommand { + value: "foo".to_string(), + kind: MagicKind::Help, + }, + Tok::Newline, + Tok::MagicCommand { + value: " ?".to_string(), + kind: MagicKind::Help2, + }, + Tok::Newline, + Tok::MagicCommand { + value: "??".to_string(), + kind: MagicKind::Help2, + }, + Tok::Newline, + Tok::MagicCommand { + value: "%foo".to_string(), + kind: MagicKind::Help, + }, + Tok::Newline, + Tok::MagicCommand { + value: "%foo".to_string(), + kind: MagicKind::Help2, + }, + Tok::Newline, + Tok::MagicCommand { + value: "foo???".to_string(), + kind: MagicKind::Magic2, + }, + Tok::Newline, + Tok::MagicCommand { + value: "pwd".to_string(), + kind: MagicKind::Help, + }, + Tok::Newline, + ] + ); + } + #[test] fn test_jupyter_magic_indentation() { let source = r" diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__parser__tests__jupyter_magic.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__parser__tests__jupyter_magic.snap index adfa86aeed..895867df6e 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__parser__tests__jupyter_magic.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__parser__tests__jupyter_magic.snap @@ -49,14 +49,14 @@ Module( StmtLineMagic { range: 81..88, kind: Help, - value: "a.foo?", + value: "a.foo", }, ), LineMagic( StmtLineMagic { range: 89..100, kind: Help2, - value: "a.foo()??", + value: "a.foo()", }, ), LineMagic(