From 65aebf127a64546745af7e346de41cc576371e33 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Mon, 25 Sep 2023 10:34:59 -0400 Subject: [PATCH] Treat form feed as whitespace in `SimpleTokenizer` (#7626) ## Summary This is whitespace as per `is_python_whitespace`, and right now it tends to lead to panics in the formatter. Seems reasonable to treat it as whitespace in the `SimpleTokenizer` too. Closes .https://github.com/astral-sh/ruff/issues/7624. --- .../resources/test/fixtures/ruff/form_feed.py | 6 +++++ .../tests/snapshots/format@form_feed.py.snap | 26 +++++++++++++++++++ crates/ruff_python_trivia/src/tokenizer.rs | 17 +++++++----- 3 files changed, 43 insertions(+), 6 deletions(-) create mode 100644 crates/ruff_python_formatter/resources/test/fixtures/ruff/form_feed.py create mode 100644 crates/ruff_python_formatter/tests/snapshots/format@form_feed.py.snap diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/form_feed.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/form_feed.py new file mode 100644 index 0000000000..311d12f477 --- /dev/null +++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/form_feed.py @@ -0,0 +1,6 @@ +# Regression test for: https://github.com/astral-sh/ruff/issues/7624 +if symbol is not None: + request["market"] = market["id"] + # "remaining_volume": "0.0", +else: + pass diff --git a/crates/ruff_python_formatter/tests/snapshots/format@form_feed.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@form_feed.py.snap new file mode 100644 index 0000000000..f57c380119 --- /dev/null +++ b/crates/ruff_python_formatter/tests/snapshots/format@form_feed.py.snap @@ -0,0 +1,26 @@ +--- +source: crates/ruff_python_formatter/tests/fixtures.rs +input_file: crates/ruff_python_formatter/resources/test/fixtures/ruff/form_feed.py +--- +## Input +```py +# Regression test for: https://github.com/astral-sh/ruff/issues/7624 +if symbol is not None: + request["market"] = market["id"] + # "remaining_volume": "0.0", +else: + pass +``` + +## Output +```py +# Regression test for: https://github.com/astral-sh/ruff/issues/7624 +if symbol is not None: + request["market"] = market["id"] +# "remaining_volume": "0.0", +else: + pass +``` + + + diff --git a/crates/ruff_python_trivia/src/tokenizer.rs b/crates/ruff_python_trivia/src/tokenizer.rs index b0bf36ded7..7f6835edfe 100644 --- a/crates/ruff_python_trivia/src/tokenizer.rs +++ b/crates/ruff_python_trivia/src/tokenizer.rs @@ -566,8 +566,10 @@ impl<'a> SimpleTokenizer<'a> { kind } - ' ' | '\t' => { - self.cursor.eat_while(|c| matches!(c, ' ' | '\t')); + // Space, tab, or form feed. We ignore the true semantics of form feed, and treat it as + // whitespace. + ' ' | '\t' | '\x0C' => { + self.cursor.eat_while(|c| matches!(c, ' ' | '\t' | '\x0C')); SimpleTokenKind::Whitespace } @@ -837,10 +839,13 @@ impl<'a> BackwardsTokenizer<'a> { } let kind = match last { - // This may not be 100% correct because it will lex-out trailing whitespace from a comment - // as whitespace rather than being part of the token. This shouldn't matter for what we use the lexer for. - ' ' | '\t' => { - self.cursor.eat_back_while(|c| matches!(c, ' ' | '\t')); + // Space, tab, or form feed. We ignore the true semantics of form feed, and treat it as + // whitespace. Note that this will lex-out trailing whitespace from a comment as + // whitespace rather than as part of the comment token, but this shouldn't matter for + // our use case. + ' ' | '\t' | '\x0C' => { + self.cursor + .eat_back_while(|c| matches!(c, ' ' | '\t' | '\x0C')); SimpleTokenKind::Whitespace }