Skip unquote allocation for non-quoted strings (#11813)

## Summary

Small optimization: no need to unquote if there aren't any quote
characters.
This commit is contained in:
Charlie Marsh 2025-02-26 16:56:31 -05:00 committed by GitHub
parent a439b7944d
commit f9497432dc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 36 additions and 10 deletions

1
Cargo.lock generated
View File

@ -5531,6 +5531,7 @@ dependencies = [
"indoc", "indoc",
"insta", "insta",
"itertools 0.14.0", "itertools 0.14.0",
"memchr",
"regex", "regex",
"reqwest", "reqwest",
"reqwest-middleware", "reqwest-middleware",

View File

@ -26,7 +26,7 @@ uv-pypi-types = { workspace = true }
uv-warnings = { workspace = true } uv-warnings = { workspace = true }
fs-err = { workspace = true } fs-err = { workspace = true }
regex = { workspace = true } memchr = { workspace = true }
reqwest = { workspace = true, optional = true } reqwest = { workspace = true, optional = true }
reqwest-middleware = { workspace = true, optional = true } reqwest-middleware = { workspace = true, optional = true }
thiserror = { workspace = true } thiserror = { workspace = true }
@ -43,6 +43,7 @@ assert_fs = { version = "1.1.2" }
indoc = { workspace = true } indoc = { workspace = true }
insta = { version = "1.40.0", features = ["filters"] } insta = { version = "1.40.0", features = ["filters"] }
itertools = { version = "0.14.0" } itertools = { version = "0.14.0" }
regex = { workspace = true }
tempfile = { workspace = true } tempfile = { workspace = true }
test-case = { version = "3.3.1" } test-case = { version = "3.3.1" }
tokio = { version = "1.40.0" } tokio = { version = "1.40.0" }

View File

@ -523,7 +523,10 @@ fn parse_entry(
let start = s.cursor(); let start = s.cursor();
Ok(Some(if s.eat_if("-r") || s.eat_if("--requirement") { Ok(Some(if s.eat_if("-r") || s.eat_if("--requirement") {
let filename = parse_value(content, s, |c: char| !is_terminal(c))?; let filename = parse_value(content, s, |c: char| !is_terminal(c))?;
let filename = unquote(filename).unwrap_or_else(|_| filename.to_string()); let filename = unquote(filename)
.ok()
.flatten()
.unwrap_or_else(|| filename.to_string());
let end = s.cursor(); let end = s.cursor();
RequirementsTxtStatement::Requirements { RequirementsTxtStatement::Requirements {
filename, filename,
@ -532,7 +535,10 @@ fn parse_entry(
} }
} else if s.eat_if("-c") || s.eat_if("--constraint") { } else if s.eat_if("-c") || s.eat_if("--constraint") {
let filename = parse_value(content, s, |c: char| !is_terminal(c))?; let filename = parse_value(content, s, |c: char| !is_terminal(c))?;
let filename = unquote(filename).unwrap_or_else(|_| filename.to_string()); let filename = unquote(filename)
.ok()
.flatten()
.unwrap_or_else(|| filename.to_string());
let end = s.cursor(); let end = s.cursor();
RequirementsTxtStatement::Constraint { RequirementsTxtStatement::Constraint {
filename, filename,
@ -577,6 +583,8 @@ fn parse_entry(
} else if s.eat_if("-i") || s.eat_if("--index-url") { } else if s.eat_if("-i") || s.eat_if("--index-url") {
let given = parse_value(content, s, |c: char| !is_terminal(c))?; let given = parse_value(content, s, |c: char| !is_terminal(c))?;
let given = unquote(given) let given = unquote(given)
.ok()
.flatten()
.map(Cow::Owned) .map(Cow::Owned)
.unwrap_or(Cow::Borrowed(given)); .unwrap_or(Cow::Borrowed(given));
let expanded = expand_env_vars(given.as_ref()); let expanded = expand_env_vars(given.as_ref());
@ -606,6 +614,8 @@ fn parse_entry(
} else if s.eat_if("--extra-index-url") { } else if s.eat_if("--extra-index-url") {
let given = parse_value(content, s, |c: char| !is_terminal(c))?; let given = parse_value(content, s, |c: char| !is_terminal(c))?;
let given = unquote(given) let given = unquote(given)
.ok()
.flatten()
.map(Cow::Owned) .map(Cow::Owned)
.unwrap_or(Cow::Borrowed(given)); .unwrap_or(Cow::Borrowed(given));
let expanded = expand_env_vars(given.as_ref()); let expanded = expand_env_vars(given.as_ref());
@ -637,6 +647,8 @@ fn parse_entry(
} else if s.eat_if("--find-links") || s.eat_if("-f") { } else if s.eat_if("--find-links") || s.eat_if("-f") {
let given = parse_value(content, s, |c: char| !is_terminal(c))?; let given = parse_value(content, s, |c: char| !is_terminal(c))?;
let given = unquote(given) let given = unquote(given)
.ok()
.flatten()
.map(Cow::Owned) .map(Cow::Owned)
.unwrap_or(Cow::Borrowed(given)); .unwrap_or(Cow::Borrowed(given));
let expanded = expand_env_vars(given.as_ref()); let expanded = expand_env_vars(given.as_ref());
@ -666,6 +678,8 @@ fn parse_entry(
} else if s.eat_if("--no-binary") { } else if s.eat_if("--no-binary") {
let given = parse_value(content, s, |c: char| !is_terminal(c))?; let given = parse_value(content, s, |c: char| !is_terminal(c))?;
let given = unquote(given) let given = unquote(given)
.ok()
.flatten()
.map(Cow::Owned) .map(Cow::Owned)
.unwrap_or(Cow::Borrowed(given)); .unwrap_or(Cow::Borrowed(given));
let specifier = PackageNameSpecifier::from_str(given.as_ref()).map_err(|err| { let specifier = PackageNameSpecifier::from_str(given.as_ref()).map_err(|err| {
@ -680,6 +694,8 @@ fn parse_entry(
} else if s.eat_if("--only-binary") { } else if s.eat_if("--only-binary") {
let given = parse_value(content, s, |c: char| !is_terminal(c))?; let given = parse_value(content, s, |c: char| !is_terminal(c))?;
let given = unquote(given) let given = unquote(given)
.ok()
.flatten()
.map(Cow::Owned) .map(Cow::Owned)
.unwrap_or(Cow::Borrowed(given)); .unwrap_or(Cow::Borrowed(given));
let specifier = PackageNameSpecifier::from_str(given.as_ref()).map_err(|err| { let specifier = PackageNameSpecifier::from_str(given.as_ref()).map_err(|err| {

View File

@ -142,12 +142,20 @@ fn unquote_open_escape(acc: &mut String, cursor: &mut std::iter::Enumerate<std::
/// ///
/// The result is canonical. There is only one valid unquoted result for a given input. /// The result is canonical. There is only one valid unquoted result for a given input.
/// ///
/// If the string does not require any quoting or escaping, returns `Ok(None)`.
///
/// # Examples /// # Examples
/// ///
/// ``` /// ```
/// assert_eq!(r_shquote::unquote("foobar").unwrap(), "foobar"); /// assert_eq!(r_shquote::unquote("foobar").unwrap(), "foobar");
/// ``` /// ```
pub(crate) fn unquote(source: &str) -> Result<String, UnquoteError> { pub(crate) fn unquote(source: &str) -> Result<Option<String>, UnquoteError> {
// If the string does not contain any single-quotes, double-quotes, or escape sequences, it
// does not require any unquoting.
if memchr::memchr3(b'\'', b'"', b'\\', source.as_bytes()).is_none() {
return Ok(None);
}
// An unquote-operation never results in a longer string. Furthermore, the common case is // An unquote-operation never results in a longer string. Furthermore, the common case is
// most of the string is unquoted / unescaped. Hence, we simply allocate the same space // most of the string is unquoted / unescaped. Hence, we simply allocate the same space
// for the resulting string as the input. // for the resulting string as the input.
@ -182,7 +190,7 @@ pub(crate) fn unquote(source: &str) -> Result<String, UnquoteError> {
acc.push(next_ch); acc.push(next_ch);
} }
None => { None => {
break Ok(acc); break Ok(Some(acc));
} }
} }
} }
@ -194,10 +202,10 @@ mod tests {
#[test] #[test]
fn basic() { fn basic() {
assert_eq!(unquote("foobar").unwrap(), "foobar"); assert_eq!(unquote("foobar").unwrap(), None);
assert_eq!(unquote("foo'bar'").unwrap(), "foobar"); assert_eq!(unquote("foo'bar'").unwrap().unwrap(), "foobar");
assert_eq!(unquote("foo\"bar\"").unwrap(), "foobar"); assert_eq!(unquote("foo\"bar\"").unwrap().unwrap(), "foobar");
assert_eq!(unquote("\\foobar\\").unwrap(), "foobar"); assert_eq!(unquote("\\foobar\\").unwrap().unwrap(), "foobar");
assert_eq!(unquote("\\'foobar\\'").unwrap(), "'foobar'"); assert_eq!(unquote("\\'foobar\\'").unwrap().unwrap(), "'foobar'");
} }
} }