Lex Jupyter line magic with `Mode::Jupyter` (#23)

Lex Jupyter line magic with `Mode::Jupyter` This PR adds a new token `MagicCommand`[^1] which the lexer will recognize when in `Mode::Jupyter`. The rules for the lexer is as follows: 1. Given that we are at the start of line, skip the indentation and look for [characters that represent the start of a magic command](635815e8f1/IPython/core/inputtransformer2.py (L335-L346)), determine the magic kind and capture all the characters following it as the command string. 2. If the command extends multiple lines, the lexer will skip the line continuation character (`\`) but only if it's followed by a newline (`\n` or `\r`). The reason to skip this only in case of newline is because they can occur in the command string which we should not skip: ```rust // Skip this backslash // v // !pwd \ // && ls -a | sed 's/^/\\ /' // ^^ // Don't skip these backslashes ``` 3. The parser, when in `Mode::Jupyter`, will filter these tokens before the parsing begins. There is a small caveat when the magic command is indented. In the following example, when the parser filters out magic command, it'll throw an indentation error: ```python for i in range(5): !ls # What the parser will see for i in range(5): ``` [^1]: I would prefer to have some other name as this not only represent a line magic (`%`) but also shell command (`!`), help command (`?`) and others. In original implementation, it's named as ["IPython Syntax"](635815e8f1/IPython/core/inputtransformer2.py (L332))
2023-07-18 09:24:24 +05:30 · 2023-07-18 09:24:24 +05:30 · 3b4c8fffe5
parent 126652b684
commit 3b4c8fffe5
5 changed files with 625 additions and 9 deletions
--- a/core/src/mode.rs
+++ b/core/src/mode.rs
@ -1,7 +1,7 @@
 //! Control in the different modes by which a source file can be parsed.

 /// The mode argument specifies in what way code must be parsed.
-#[derive(Clone, Copy, Hash, PartialEq, Eq)]
+#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
 pub enum Mode {
    /// The code consists of a sequence of statements.
    Module,
@ -9,6 +9,34 @@ pub enum Mode {
    Interactive,
    /// The code consists of a single expression.
    Expression,
+    /// The code consists of a sequence of statements which are part of a
+    /// Jupyter Notebook and thus could include escape commands scoped to
+    /// a single line.
+    ///
+    /// ## Limitations:
+    ///
+    /// These escaped commands are only supported when they are the only
+    /// statement on a line. If they're part of a larger statement such as
+    /// on the right-hand side of an assignment, the lexer will not recognize
+    /// them as escape commands.
+    ///
+    /// For [Dynamic object information], the escape characters (`?`, `??`)
+    /// must be used before an object. For example, `?foo` will be recognized,
+    /// but `foo?` will not.
+    ///
+    /// ## Supported escape commands:
+    /// - [Magic command system] which is limited to [line magics] and can start
+    ///   with `?` or `??`.
+    /// - [Dynamic object information] which can start with `?` or `??`.
+    /// - [System shell access] which can start with `!` or `!!`.
+    /// - [Automatic parentheses and quotes] which can start with `/`, `;`, or `,`.
+    ///
+    /// [Magic command system]: https://ipython.readthedocs.io/en/stable/interactive/reference.html#magic-command-system
+    /// [line magics]: https://ipython.readthedocs.io/en/stable/interactive/magics.html#line-magics
+    /// [Dynamic object information]: https://ipython.readthedocs.io/en/stable/interactive/reference.html#dynamic-object-information
+    /// [System shell access]: https://ipython.readthedocs.io/en/stable/interactive/reference.html#system-shell-access
+    /// [Automatic parentheses and quotes]: https://ipython.readthedocs.io/en/stable/interactive/reference.html#automatic-parentheses-and-quotes
+    Jupyter,
 }

 impl std::str::FromStr for Mode {
@ -17,6 +45,7 @@ impl std::str::FromStr for Mode {
        match s {
            "exec" | "single" => Ok(Mode::Module),
            "eval" => Ok(Mode::Expression),
+            "jupyter" => Ok(Mode::Jupyter),
            _ => Err(ModeParseError),
        }
    }
@ -28,6 +57,6 @@ pub struct ModeParseError;

 impl std::fmt::Display for ModeParseError {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, r#"mode must be "exec", "eval", or "single""#)
+        write!(f, r#"mode must be "exec", "eval", "jupyter", or "single""#)
    }
 }
--- a/parser/src/lexer.rs
+++ b/parser/src/lexer.rs
@ -32,7 +32,7 @@ use crate::{
    soft_keywords::SoftKeywordTransformer,
    string::FStringErrorType,
    text_size::{TextLen, TextRange, TextSize},
-    token::{StringKind, Tok},
+    token::{MagicKind, StringKind, Tok},
    Mode,
 };
 use log::trace;
@ -175,6 +175,8 @@ pub struct Lexer<T: Iterator<Item = char>> {
    pending: Vec<Spanned>,
    // The current location.
    location: TextSize,
+    // Lexer mode.
+    mode: Mode,
 }

 // generated in build.rs, in gen_phf()
@ -213,7 +215,7 @@ pub fn lex_starts_at(
    mode: Mode,
    start_offset: TextSize,
 ) -> SoftKeywordTransformer<Lexer<std::str::Chars<'_>>> {
-    SoftKeywordTransformer::new(Lexer::new(source.chars(), start_offset), mode)
+    SoftKeywordTransformer::new(Lexer::new(source.chars(), mode, start_offset), mode)
 }

 impl<T> Lexer<T>
@ -222,7 +224,7 @@ where
 {
    /// Create a new lexer from T and a starting location. You probably want to use
    /// [`lex`] instead.
-    pub fn new(input: T, start: TextSize) -> Self {
+    pub fn new(input: T, mode: Mode, start: TextSize) -> Self {
        let mut lxr = Lexer {
            at_begin_of_line: true,
            nesting: 0,
@ -231,6 +233,7 @@ where
            pending: Vec::with_capacity(5),
            location: start,
            window: CharWindow::new(input),
+            mode,
        };
        // Fill the window.
        lxr.window.slide();
@ -494,6 +497,59 @@ where
        Ok(())
    }

+    /// Lex a single magic command.
+    fn lex_magic_command(&mut self, kind: MagicKind) -> (Tok, TextRange) {
+        let start_pos = self.get_pos();
+        for _ in 0..u32::from(kind.prefix_len()) {
+            self.next_char();
+        }
+        let mut value = String::new();
+        loop {
+            match self.window[0] {
+                Some('\\') => {
+                    // Only skip the line continuation if it is followed by a newline
+                    // otherwise it is a normal backslash which is part of the magic command:
+                    //
+                    //        Skip this backslash
+                    //        v
+                    //   !pwd \
+                    //      && ls -a | sed 's/^/\\    /'
+                    //                          ^^
+                    //                          Don't skip these backslashes
+                    if matches!(self.window[1], Some('\n' | '\r')) {
+                        self.next_char();
+                        self.next_char();
+                        continue;
+                    }
+                }
+                Some('\n' | '\r') | None => {
+                    let end_pos = self.get_pos();
+                    return (
+                        Tok::MagicCommand { kind, value },
+                        TextRange::new(start_pos, end_pos),
+                    );
+                }
+                Some(_) => {}
+            }
+            value.push(self.next_char().unwrap());
+        }
+    }
+
+    fn lex_and_emit_magic_command(&mut self) {
+        let kind = match self.window[..2] {
+            [Some(c1), Some(c2)] => {
+                MagicKind::try_from([c1, c2]).map_or_else(|_| MagicKind::try_from(c1), Ok)
+            }
+            // When the escape character is the last character of the file.
+            [Some(c), None] => MagicKind::try_from(c),
+            _ => return,
+        };
+        if let Ok(kind) = kind {
+            let magic_command = self.lex_magic_command(kind);
+            self.emit(magic_command);
+        }
+    }
+
    /// Lex a string literal.
    fn lex_string(&mut self, kind: StringKind) -> LexResult {
        let start_pos = self.get_pos();
@ -644,6 +700,10 @@ where
                    spaces = 0;
                    tabs = 0;
                }
+                // https://github.com/ipython/ipython/blob/635815e8f1ded5b764d66cacc80bbe25e9e2587f/IPython/core/inputtransformer2.py#L345
+                Some('%' | '!' | '?' | '/' | ';' | ',') if self.mode == Mode::Jupyter => {
+                    self.lex_and_emit_magic_command();
+                }
                Some('\x0C') => {
                    // Form feed character!
                    // Reset indentation for the Emacs user.
@ -1381,6 +1441,11 @@ mod tests {
        lexer.map(|x| x.unwrap().0).collect()
    }

+    pub fn lex_jupyter_source(source: &str) -> Vec<Tok> {
+        let lexer = lex(source, Mode::Jupyter);
+        lexer.map(|x| x.unwrap().0).collect()
+    }
+
    fn str_tok(s: &str) -> Tok {
        Tok::String {
            value: s.to_owned(),
@ -1397,6 +1462,213 @@ mod tests {
        }
    }

+    fn assert_jupyter_magic_line_continuation_with_eol(eol: &str) {
+        let source = format!("%matplotlib \\{}  --inline", eol);
+        let tokens = lex_jupyter_source(&source);
+        assert_eq!(
+            tokens,
+            vec![Tok::MagicCommand {
+                value: "matplotlib   --inline".to_string(),
+                kind: MagicKind::Magic
+            },]
+        )
+    }
+
+    #[test]
+    fn test_jupyter_magic_line_continuation_unix_eol() {
+        assert_jupyter_magic_line_continuation_with_eol(UNIX_EOL);
+    }
+
+    #[test]
+    fn test_jupyter_magic_line_continuation_mac_eol() {
+        assert_jupyter_magic_line_continuation_with_eol(MAC_EOL);
+    }
+
+    #[test]
+    fn test_jupyter_magic_line_continuation_windows_eol() {
+        assert_jupyter_magic_line_continuation_with_eol(WINDOWS_EOL);
+    }
+
+    fn assert_jupyter_magic_line_continuation_with_eol_and_eof(eol: &str) {
+        let source = format!("%matplotlib \\{}", eol);
+        let tokens = lex_jupyter_source(&source);
+        assert_eq!(
+            tokens,
+            vec![Tok::MagicCommand {
+                value: "matplotlib ".to_string(),
+                kind: MagicKind::Magic
+            },]
+        )
+    }
+
+    #[test]
+    fn test_jupyter_magic_line_continuation_unix_eol_and_eof() {
+        assert_jupyter_magic_line_continuation_with_eol_and_eof(UNIX_EOL);
+    }
+
+    #[test]
+    fn test_jupyter_magic_line_continuation_mac_eol_and_eof() {
+        assert_jupyter_magic_line_continuation_with_eol_and_eof(MAC_EOL);
+    }
+
+    #[test]
+    fn test_jupyter_magic_line_continuation_windows_eol_and_eof() {
+        assert_jupyter_magic_line_continuation_with_eol_and_eof(WINDOWS_EOL);
+    }
+
+    #[test]
+    fn test_empty_jupyter_magic() {
+        let source = "%\n%%\n!\n!!\n?\n??\n/\n,\n;";
+        let tokens = lex_jupyter_source(source);
+        assert_eq!(
+            tokens,
+            vec![
+                Tok::MagicCommand {
+                    value: "".to_string(),
+                    kind: MagicKind::Magic,
+                },
+                #[cfg(feature = "full-lexer")]
+                Tok::NonLogicalNewline,
+                Tok::MagicCommand {
+                    value: "".to_string(),
+                    kind: MagicKind::Magic2,
+                },
+                #[cfg(feature = "full-lexer")]
+                Tok::NonLogicalNewline,
+                Tok::MagicCommand {
+                    value: "".to_string(),
+                    kind: MagicKind::Shell,
+                },
+                #[cfg(feature = "full-lexer")]
+                Tok::NonLogicalNewline,
+                Tok::MagicCommand {
+                    value: "".to_string(),
+                    kind: MagicKind::ShCap,
+                },
+                #[cfg(feature = "full-lexer")]
+                Tok::NonLogicalNewline,
+                Tok::MagicCommand {
+                    value: "".to_string(),
+                    kind: MagicKind::Help,
+                },
+                #[cfg(feature = "full-lexer")]
+                Tok::NonLogicalNewline,
+                Tok::MagicCommand {
+                    value: "".to_string(),
+                    kind: MagicKind::Help2,
+                },
+                #[cfg(feature = "full-lexer")]
+                Tok::NonLogicalNewline,
+                Tok::MagicCommand {
+                    value: "".to_string(),
+                    kind: MagicKind::Paren,
+                },
+                #[cfg(feature = "full-lexer")]
+                Tok::NonLogicalNewline,
+                Tok::MagicCommand {
+                    value: "".to_string(),
+                    kind: MagicKind::Quote,
+                },
+                #[cfg(feature = "full-lexer")]
+                Tok::NonLogicalNewline,
+                Tok::MagicCommand {
+                    value: "".to_string(),
+                    kind: MagicKind::Quote2,
+                },
+            ]
+        )
+    }
+
+    #[test]
+    fn test_jupyter_magic() {
+        let source = r"
+?foo
+??foo
+%timeit a = b
+%timeit a % 3
+%matplotlib \
+    --inline
+!pwd \
+  && ls -a | sed 's/^/\\    /'
+!!cd /Users/foo/Library/Application\ Support/
+/foo 1 2
+,foo 1 2
+;foo 1 2
+    !ls
+"
+        .trim();
+        let tokens = lex_jupyter_source(source);
+        assert_eq!(
+            tokens,
+            vec![
+                Tok::MagicCommand {
+                    value: "foo".to_string(),
+                    kind: MagicKind::Help,
+                },
+                #[cfg(feature = "full-lexer")]
+                Tok::NonLogicalNewline,
+                Tok::MagicCommand {
+                    value: "foo".to_string(),
+                    kind: MagicKind::Help2,
+                },
+                #[cfg(feature = "full-lexer")]
+                Tok::NonLogicalNewline,
+                Tok::MagicCommand {
+                    value: "timeit a = b".to_string(),
+                    kind: MagicKind::Magic,
+                },
+                #[cfg(feature = "full-lexer")]
+                Tok::NonLogicalNewline,
+                Tok::MagicCommand {
+                    value: "timeit a % 3".to_string(),
+                    kind: MagicKind::Magic,
+                },
+                #[cfg(feature = "full-lexer")]
+                Tok::NonLogicalNewline,
+                Tok::MagicCommand {
+                    value: "matplotlib     --inline".to_string(),
+                    kind: MagicKind::Magic,
+                },
+                #[cfg(feature = "full-lexer")]
+                Tok::NonLogicalNewline,
+                Tok::MagicCommand {
+                    value: "pwd   && ls -a | sed 's/^/\\\\    /'".to_string(),
+                    kind: MagicKind::Shell,
+                },
+                #[cfg(feature = "full-lexer")]
+                Tok::NonLogicalNewline,
+                Tok::MagicCommand {
+                    value: "cd /Users/foo/Library/Application\\ Support/".to_string(),
+                    kind: MagicKind::ShCap,
+                },
+                #[cfg(feature = "full-lexer")]
+                Tok::NonLogicalNewline,
+                Tok::MagicCommand {
+                    value: "foo 1 2".to_string(),
+                    kind: MagicKind::Paren,
+                },
+                #[cfg(feature = "full-lexer")]
+                Tok::NonLogicalNewline,
+                Tok::MagicCommand {
+                    value: "foo 1 2".to_string(),
+                    kind: MagicKind::Quote,
+                },
+                #[cfg(feature = "full-lexer")]
+                Tok::NonLogicalNewline,
+                Tok::MagicCommand {
+                    value: "foo 1 2".to_string(),
+                    kind: MagicKind::Quote2,
+                },
+                #[cfg(feature = "full-lexer")]
+                Tok::NonLogicalNewline,
+                Tok::MagicCommand {
+                    value: "ls".to_string(),
+                    kind: MagicKind::Shell,
+                },
+            ]
+        )
+    }
+
    #[test]
    fn test_numbers() {
        let source = "0x2f 0o12 0b1101 0 123 123_45_67_890 0.2 1e+2 2.1e3 2j 2.2j";
--- a/parser/src/parser.rs
+++ b/parser/src/parser.rs
@ -250,7 +250,7 @@ impl Parse for ast::Constant {
 }

 /// Parse a full Python program usually consisting of multiple lines.
-///  
+///
 /// This is a convenience function that can be used to parse a full Python program without having to
 /// specify the [`Mode`] or the location. It is probably what you want to use most of the time.
 ///
@ -326,7 +326,8 @@ pub fn parse_expression_starts_at(
 /// Parse the given Python source code using the specified [`Mode`].
 ///
 /// This function is the most general function to parse Python code. Based on the [`Mode`] supplied,
-/// it can be used to parse a single expression, a full Python program or an interactive expression.
+/// it can be used to parse a single expression, a full Python program, an interactive expression
+/// or a Python program containing Jupyter magics.
 ///
 /// # Example
 ///
@ -354,6 +355,20 @@ pub fn parse_expression_starts_at(
 /// let program = parse(source, Mode::Module, "<embedded>");
 /// assert!(program.is_ok());
 /// ```
+///
+/// Additionally, we can parse a Python program containing Jupyter magics:
+///
+/// ```
+/// use rustpython_parser::{Mode, parse};
+///
+/// let source = r#"
+/// %timeit 1 + 2
+/// ?str.replace
+/// !ls
+/// "#;
+/// let program = parse(source, Mode::Jupyter, "<embedded>");
+/// assert!(program.is_ok());
+/// ```
 pub fn parse(source: &str, mode: Mode, source_path: &str) -> Result<ast::Mod, ParseError> {
    parse_starts_at(source, mode, source_path, TextSize::default())
 }
@ -394,6 +409,9 @@ pub fn parse_starts_at(
 ///
 /// This could allow you to perform some preprocessing on the tokens before parsing them.
 ///
+/// When in [`Mode::Jupyter`], this will filter out all the Jupyter magic commands
+/// before parsing the tokens.
+///
 /// # Example
 ///
 /// As an example, instead of parsing a string, we can parse a list of tokens after we generate
@ -414,7 +432,12 @@ pub fn parse_tokens(
    #[cfg(feature = "full-lexer")]
    let lxr =
        lxr.filter_ok(|(tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
-    parse_filtered_tokens(lxr, mode, source_path)
+    if mode == Mode::Jupyter {
+        let lxr = lxr.filter_ok(|(tok, _)| !matches!(tok, Tok::MagicCommand { .. }));
+        parse_filtered_tokens(lxr, mode, source_path)
+    } else {
+        parse_filtered_tokens(lxr, mode, source_path)
+    }
 }

 fn parse_filtered_tokens(
@ -1238,4 +1261,65 @@ class Abcd:
        .unwrap();
        insta::assert_debug_snapshot!(parse_ast);
    }
+
+    #[test]
+    fn test_jupyter_magic() {
+        let parse_ast = parse(
+            r#"
+# Normal Python code
+(
+    a
+    %
+    b
+)
+
+# Dynamic object info
+??a.foo
+?a.foo
+?a.foo?
+??a.foo()??
+
+# Line magic
+%timeit a = b
+%timeit foo(b) % 3
+%alias showPath pwd && ls -a
+%timeit a =\
+  foo(b); b = 2
+%matplotlib --inline
+%matplotlib \
+    --inline
+
+# System shell access
+!pwd && ls -a | sed 's/^/\    /'
+!pwd \
+  && ls -a | sed 's/^/\\    /'
+!!cd /Users/foo/Library/Application\ Support/
+
+# Let's add some Python code to make sure that earlier escapes were handled
+# correctly and that we didn't consume any of the following code as a result
+# of the escapes.
+def foo():
+    return (
+        a
+        !=
+        b
+    )
+
+# Transforms into `foo(..)`
+/foo 1 2
+;foo 1 2
+,foo 1 2
+
+# Indented magic
+for a in range(5):
+    %ls
+    pass
+"#
+            .trim(),
+            Mode::Jupyter,
+            "<test>",
+        )
+        .unwrap();
+        insta::assert_debug_snapshot!(parse_ast);
+    }
 }
--- a/parser/src/snapshots/rustpython_parserparsertests__jupyter_magic.snap
+++ b/parser/src/snapshots/rustpython_parserparsertests__jupyter_magic.snap
@ -0,0 +1,135 @@
+---
+source: parser/src/parser.rs
+expression: parse_ast
+---
+Module(
+    ModModule {
+        range: 0..736,
+        body: [
+            Expr(
+                StmtExpr {
+                    range: 21..42,
+                    value: BinOp(
+                        ExprBinOp {
+                            range: 27..40,
+                            left: Name(
+                                ExprName {
+                                    range: 27..28,
+                                    id: "a",
+                                    ctx: Load,
+                                },
+                            ),
+                            op: Mod,
+                            right: Name(
+                                ExprName {
+                                    range: 39..40,
+                                    id: "b",
+                                    ctx: Load,
+                                },
+                            ),
+                        },
+                    ),
+                },
+            ),
+            FunctionDef(
+                StmtFunctionDef {
+                    range: 566..626,
+                    name: Identifier {
+                        id: "foo",
+                        range: 570..573,
+                    },
+                    args: Arguments {
+                        range: 573..575,
+                        posonlyargs: [],
+                        args: [],
+                        vararg: None,
+                        kwonlyargs: [],
+                        kwarg: None,
+                    },
+                    body: [
+                        Return(
+                            StmtReturn {
+                                range: 581..626,
+                                value: Some(
+                                    Compare(
+                                        ExprCompare {
+                                            range: 598..620,
+                                            left: Name(
+                                                ExprName {
+                                                    range: 598..599,
+                                                    id: "a",
+                                                    ctx: Load,
+                                                },
+                                            ),
+                                            ops: [
+                                                NotEq,
+                                            ],
+                                            comparators: [
+                                                Name(
+                                                    ExprName {
+                                                        range: 619..620,
+                                                        id: "b",
+                                                        ctx: Load,
+                                                    },
+                                                ),
+                                            ],
+                                        },
+                                    ),
+                                ),
+                            },
+                        ),
+                    ],
+                    decorator_list: [],
+                    returns: None,
+                    type_comment: None,
+                },
+            ),
+            For(
+                StmtFor {
+                    range: 701..736,
+                    target: Name(
+                        ExprName {
+                            range: 705..706,
+                            id: "a",
+                            ctx: Store,
+                        },
+                    ),
+                    iter: Call(
+                        ExprCall {
+                            range: 710..718,
+                            func: Name(
+                                ExprName {
+                                    range: 710..715,
+                                    id: "range",
+                                    ctx: Load,
+                                },
+                            ),
+                            args: [
+                                Constant(
+                                    ExprConstant {
+                                        range: 716..717,
+                                        value: Int(
+                                            5,
+                                        ),
+                                        kind: None,
+                                    },
+                                ),
+                            ],
+                            keywords: [],
+                        },
+                    ),
+                    body: [
+                        Pass(
+                            StmtPass {
+                                range: 732..736,
+                            },
+                        ),
+                    ],
+                    orelse: [],
+                    type_comment: None,
+                },
+            ),
+        ],
+        type_ignores: [],
+    },
+)
--- a/parser/src/token.rs
+++ b/parser/src/token.rs
@ -42,6 +42,14 @@ pub enum Tok {
        /// Whether the string is triple quoted.
        triple_quoted: bool,
    },
+    /// Token value for a Jupyter magic commands. These are filtered out of the token stream
+    /// prior to parsing when the mode is [`Mode::Jupyter`].
+    MagicCommand {
+        /// The magic command value.
+        value: String,
+        /// The kind of magic command.
+        kind: MagicKind,
+    },
    /// Token value for a comment. These are filtered out of the token stream prior to parsing.
    #[cfg(feature = "full-lexer")]
    Comment(String),
@ -202,7 +210,7 @@ pub enum Tok {
 impl Tok {
    pub fn start_marker(mode: Mode) -> Self {
        match mode {
-            Mode::Module => Tok::StartModule,
+            Mode::Module | Mode::Jupyter => Tok::StartModule,
            Mode::Interactive => Tok::StartInteractive,
            Mode::Expression => Tok::StartExpression,
        }
@ -225,6 +233,7 @@ impl fmt::Display for Tok {
                let quotes = "\"".repeat(if *triple_quoted { 3 } else { 1 });
                write!(f, "{kind}{quotes}{value}{quotes}")
            }
+            MagicCommand { kind, value } => write!(f, "{kind}{value}"),
            Newline => f.write_str("Newline"),
            #[cfg(feature = "full-lexer")]
            NonLogicalNewline => f.write_str("NonLogicalNewline"),
@ -325,6 +334,93 @@ impl fmt::Display for Tok {
    }
 }

+/// The kind of magic command as defined in [IPython Syntax] in the IPython codebase.
+///
+/// [IPython Syntax]: https://github.com/ipython/ipython/blob/635815e8f1ded5b764d66cacc80bbe25e9e2587f/IPython/core/inputtransformer2.py#L335-L343
+#[derive(PartialEq, Eq, Debug, Clone, Hash, Copy)]
+pub enum MagicKind {
+    /// Send line to underlying system shell.
+    Shell,
+    /// Send line to system shell and capture output.
+    ShCap,
+    /// Show help on object.
+    Help,
+    /// Show help on object, with extra verbosity.
+    Help2,
+    /// Call magic function.
+    Magic,
+    /// Call cell magic function.
+    Magic2,
+    /// Call first argument with rest of line as arguments after splitting on whitespace
+    /// and quote each as string.
+    Quote,
+    /// Call first argument with rest of line as an argument quoted as a single string.
+    Quote2,
+    /// Call first argument with rest of line as arguments.
+    Paren,
+}
+
+impl TryFrom<char> for MagicKind {
+    type Error = String;
+
+    fn try_from(ch: char) -> Result<Self, Self::Error> {
+        match ch {
+            '!' => Ok(MagicKind::Shell),
+            '?' => Ok(MagicKind::Help),
+            '%' => Ok(MagicKind::Magic),
+            ',' => Ok(MagicKind::Quote),
+            ';' => Ok(MagicKind::Quote2),
+            '/' => Ok(MagicKind::Paren),
+            _ => Err(format!("Unexpected magic escape: {ch}")),
+        }
+    }
+}
+
+impl TryFrom<[char; 2]> for MagicKind {
+    type Error = String;
+
+    fn try_from(ch: [char; 2]) -> Result<Self, Self::Error> {
+        match ch {
+            ['!', '!'] => Ok(MagicKind::ShCap),
+            ['?', '?'] => Ok(MagicKind::Help2),
+            ['%', '%'] => Ok(MagicKind::Magic2),
+            [c1, c2] => Err(format!("Unexpected magic escape: {c1}{c2}")),
+        }
+    }
+}
+
+impl fmt::Display for MagicKind {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            MagicKind::Shell => f.write_str("!"),
+            MagicKind::ShCap => f.write_str("!!"),
+            MagicKind::Help => f.write_str("?"),
+            MagicKind::Help2 => f.write_str("??"),
+            MagicKind::Magic => f.write_str("%"),
+            MagicKind::Magic2 => f.write_str("%%"),
+            MagicKind::Quote => f.write_str(","),
+            MagicKind::Quote2 => f.write_str(";"),
+            MagicKind::Paren => f.write_str("/"),
+        }
+    }
+}
+
+impl MagicKind {
+    /// Returns the length of the magic command prefix.
+    pub fn prefix_len(self) -> TextSize {
+        let len = match self {
+            MagicKind::Shell
+            | MagicKind::Magic
+            | MagicKind::Help
+            | MagicKind::Quote
+            | MagicKind::Quote2
+            | MagicKind::Paren => 1,
+            MagicKind::ShCap | MagicKind::Magic2 | MagicKind::Help2 => 2,
+        };
+        len.into()
+    }
+}
+
 /// The kind of string literal as described in the [String and Bytes literals]
 /// section of the Python reference.
 ///