diff --git a/Cargo.lock b/Cargo.lock index 97511968ff..ba459f3f1c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2343,6 +2343,7 @@ dependencies = [ "lalrpop", "lalrpop-util", "memchr", + "once_cell", "ruff_python_ast", "ruff_text_size", "rustc-hash", diff --git a/crates/ruff_python_parser/Cargo.toml b/crates/ruff_python_parser/Cargo.toml index 886bb07fec..e1291ab6a2 100644 --- a/crates/ruff_python_parser/Cargo.toml +++ b/crates/ruff_python_parser/Cargo.toml @@ -28,6 +28,7 @@ rustc-hash = { workspace = true } static_assertions = { workspace = true } unicode-ident = { workspace = true } unicode_names2 = { workspace = true } +once_cell = "1.19.0" [dev-dependencies] insta = { workspace = true } diff --git a/crates/ruff_python_parser/src/string.rs b/crates/ruff_python_parser/src/string.rs index 807b645aef..0451604040 100644 --- a/crates/ruff_python_parser/src/string.rs +++ b/crates/ruff_python_parser/src/string.rs @@ -1,6 +1,8 @@ //! Parsing of string literals, bytes literals, and implicit string concatenation. use bstr::ByteSlice; +use memchr::memmem; +use once_cell::sync::Lazy; use ruff_python_ast::{self as ast, Expr}; use ruff_text_size::{Ranged, TextRange, TextSize}; @@ -8,6 +10,8 @@ use ruff_text_size::{Ranged, TextRange, TextSize}; use crate::lexer::{LexicalError, LexicalErrorType}; use crate::token::{StringKind, Tok}; +const BACKSLASH_FINDER: Lazy = Lazy::new(|| memmem::Finder::new(b"\\")); + pub(crate) enum StringType { Str(ast::StringLiteral), Bytes(ast::BytesLiteral), @@ -310,7 +314,7 @@ impl StringParser { })); } - let Some(mut escape) = memchr::memchr(b'\\', self.source.as_bytes()) else { + let Some(mut escape) = BACKSLASH_FINDER.find(self.source.as_bytes()) else { // If the string doesn't contain any escape sequences, return the owned string. return Ok(StringType::Bytes(ast::BytesLiteral { value: self.source.into_boxed_bytes(), @@ -336,7 +340,7 @@ impl StringParser { } } - let Some(next_escape) = memchr::memchr(b'\\', self.source[self.cursor..].as_bytes()) + let Some(next_escape) = BACKSLASH_FINDER.find(self.source[self.cursor..].as_bytes()) else { // Add the rest of the string to the value. let rest = &self.source[self.cursor..]; @@ -364,7 +368,7 @@ impl StringParser { })); } - let Some(mut escape) = memchr::memchr(b'\\', self.source.as_bytes()) else { + let Some(mut escape) = BACKSLASH_FINDER.find(self.source.as_bytes()) else { // If the string doesn't contain any escape sequences, return the owned string. return Ok(StringType::Str(ast::StringLiteral { value: self.source, @@ -392,7 +396,7 @@ impl StringParser { } } - let Some(next_escape) = memchr::memchr(b'\\', self.source[self.cursor..].as_bytes()) + let Some(next_escape) = BACKSLASH_FINDER.find(self.source[self.cursor..].as_bytes()) else { // Add the rest of the string to the value. let rest = &self.source[self.cursor..];