From 2352de227718addbde3bcca018fbf2a3d2490777 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Sat, 3 Feb 2024 06:40:41 -0800 Subject: [PATCH] Slight speed-up for lowercase and uppercase identifier checks (#9798) It turns out that for ASCII identifiers, this is nearly 2x faster: ``` Parser/before time: [15.388 ns 15.395 ns 15.406 ns] Parser/after time: [8.3786 ns 8.5821 ns 8.7715 ns] ``` --- crates/ruff_python_stdlib/src/str.rs | 36 ++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/crates/ruff_python_stdlib/src/str.rs b/crates/ruff_python_stdlib/src/str.rs index 2b7b90b64b..048b2e5929 100644 --- a/crates/ruff_python_stdlib/src/str.rs +++ b/crates/ruff_python_stdlib/src/str.rs @@ -14,9 +14,25 @@ /// assert!(!is_lowercase("ABC")); /// assert!(is_lowercase("")); /// assert!(is_lowercase("_")); +/// assert!(is_lowercase("αbc")); +/// assert!(!is_lowercase("αBC")); +/// assert!(!is_lowercase("Ωbc")); /// ``` pub fn is_lowercase(s: &str) -> bool { - s.chars().all(|c| !c.is_alphabetic() || c.is_lowercase()) + for (i, &c) in s.as_bytes().iter().enumerate() { + match c { + // Match against ASCII uppercase characters. + b'A'..=b'Z' => return false, + _ if c.is_ascii() => {} + // If the character is non-ASCII, fallback to slow path. + _ => { + return s[i..] + .chars() + .all(|c| c.is_lowercase() || !c.is_alphabetic()) + } + } + } + true } /// Return `true` if a string is uppercase. @@ -35,9 +51,25 @@ pub fn is_lowercase(s: &str) -> bool { /// assert!(!is_uppercase("abc")); /// assert!(is_uppercase("")); /// assert!(is_uppercase("_")); +/// assert!(is_uppercase("ΩBC")); +/// assert!(!is_uppercase("Ωbc")); +/// assert!(!is_uppercase("αBC")); /// ``` pub fn is_uppercase(s: &str) -> bool { - s.chars().all(|c| !c.is_alphabetic() || c.is_uppercase()) + for (i, &c) in s.as_bytes().iter().enumerate() { + match c { + // Match against ASCII lowercase characters. + b'a'..=b'z' => return false, + _ if c.is_ascii() => {} + // If the character is non-ASCII, fallback to slow path. + _ => { + return s[i..] + .chars() + .all(|c| c.is_uppercase() || !c.is_alphabetic()) + } + } + } + true } /// Return `true` if a string is _cased_ as lowercase.