mirror of https://github.com/astral-sh/uv
Vendor r-shquote's unquote implementation (#11812)
## Summary This project is archived, so while it's tiny and arguably complete, I'd rather just vendor the one function we need.
This commit is contained in:
parent
8f0c6f5a6f
commit
a439b7944d
|
|
@ -2844,12 +2844,6 @@ version = "0.5.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "640c9bd8497b02465aeef5375144c26062e0dcd5939dfcbb0f5db76cb8c17c73"
|
||||
|
||||
[[package]]
|
||||
name = "r-shquote"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d76b7a332c240b0b30ad8b52cc9aecf8ec96878ccb927ce1d2feb03920e0f711"
|
||||
|
||||
[[package]]
|
||||
name = "rancor"
|
||||
version = "0.1.0"
|
||||
|
|
@ -5537,7 +5531,6 @@ dependencies = [
|
|||
"indoc",
|
||||
"insta",
|
||||
"itertools 0.14.0",
|
||||
"r-shquote",
|
||||
"regex",
|
||||
"reqwest",
|
||||
"reqwest-middleware",
|
||||
|
|
|
|||
|
|
@ -134,7 +134,6 @@ proc-macro2 = { version = "1.0.86" }
|
|||
procfs = { version = "0.17.0", default-features = false, features = ["flate2"] }
|
||||
pubgrub = { git = "https://github.com/astral-sh/pubgrub", rev = "b70cf707aa43f21b32f3a61b8a0889b15032d5c4" }
|
||||
quote = { version = "1.0.37" }
|
||||
r-shquote = { version = "0.1.1" }
|
||||
rayon = { version = "1.10.0" }
|
||||
reflink-copy = { version = "0.1.19" }
|
||||
regex = { version = "1.10.6" }
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@ uv-pypi-types = { workspace = true }
|
|||
uv-warnings = { workspace = true }
|
||||
|
||||
fs-err = { workspace = true }
|
||||
r-shquote = { workspace = true }
|
||||
regex = { workspace = true }
|
||||
reqwest = { workspace = true, optional = true }
|
||||
reqwest-middleware = { workspace = true, optional = true }
|
||||
|
|
|
|||
|
|
@ -40,7 +40,6 @@ use std::io;
|
|||
use std::path::{Path, PathBuf};
|
||||
use std::str::FromStr;
|
||||
|
||||
use r_shquote::unquote;
|
||||
use tracing::instrument;
|
||||
use unscanny::{Pattern, Scanner};
|
||||
use url::Url;
|
||||
|
|
@ -56,8 +55,10 @@ use uv_pypi_types::{Requirement, VerbatimParsedUrl};
|
|||
|
||||
use crate::requirement::EditableError;
|
||||
pub use crate::requirement::RequirementsTxtRequirement;
|
||||
use crate::shquote::unquote;
|
||||
|
||||
mod requirement;
|
||||
mod shquote;
|
||||
|
||||
/// We emit one of those for each `requirements.txt` entry.
|
||||
enum RequirementsTxtStatement {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,203 @@
|
|||
//! POSIX Shell Compatible Argument Parser
|
||||
//!
|
||||
//! This implementation is vendored from the [`r-shquote`](https://github.com/r-util/r-shquote)
|
||||
//! crate under the Apache 2.0 license:
|
||||
//!
|
||||
//! ```text
|
||||
//! Licensed under the Apache License, Version 2.0 (the "License");
|
||||
//! you may not use this file except in compliance with the License.
|
||||
//! You may obtain a copy of the License at
|
||||
//!
|
||||
//! https://www.apache.org/licenses/LICENSE-2.0
|
||||
//!
|
||||
//! Unless required by applicable law or agreed to in writing, software
|
||||
//! distributed under the License is distributed on an "AS IS" BASIS,
|
||||
//! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
//! See the License for the specific language governing permissions and
|
||||
//! limitations under the License.
|
||||
//! ```
|
||||
#[derive(Debug, Clone)]
|
||||
#[allow(dead_code)]
|
||||
pub(crate) enum UnquoteError {
|
||||
UnterminatedSingleQuote {
|
||||
char_cursor: usize,
|
||||
byte_cursor: usize,
|
||||
},
|
||||
UnterminatedDoubleQuote {
|
||||
char_cursor: usize,
|
||||
byte_cursor: usize,
|
||||
},
|
||||
}
|
||||
|
||||
impl std::fmt::Display for UnquoteError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "{self:?}")
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for UnquoteError {}
|
||||
|
||||
fn unquote_open_single(
|
||||
acc: &mut String,
|
||||
cursor: &mut std::iter::Enumerate<std::str::CharIndices>,
|
||||
) -> bool {
|
||||
// This decodes a single-quote sequence. The opening single-quote was already parsed by
|
||||
// the caller. Both `&source[start]` and `cursor` point to the first character following
|
||||
// the opening single-quote.
|
||||
// Anything inside the single-quote sequence is copied verbatim to the output until the
|
||||
// next single-quote. No escape sequences are supported, not even a single-quote can be
|
||||
// escaped. However, if the sequence is not terminated, the entire operation is considered
|
||||
// invalid.
|
||||
for i in cursor {
|
||||
match i {
|
||||
(_, (_, '\'')) => return true,
|
||||
(_, (_, c)) => acc.push(c),
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
fn unquote_open_double(
|
||||
acc: &mut String,
|
||||
cursor: &mut std::iter::Enumerate<std::str::CharIndices>,
|
||||
) -> bool {
|
||||
// This decodes a double-quote sequence. The opening double-quote was already parsed by
|
||||
// the caller. Both `&source[start]` and `cursor` point to the first character following
|
||||
// the opening double-quote.
|
||||
// A double-quote sequence allows escape-sequences and goes until the closing
|
||||
// double-quote. If the sequence is not terminated, though, the entire operation is
|
||||
// considered invalid.
|
||||
loop {
|
||||
match cursor.next() {
|
||||
Some((_, (_, '"'))) => {
|
||||
// An unescaped double-quote character terminates the double-quote sequence.
|
||||
// It produces no output.
|
||||
return true;
|
||||
}
|
||||
Some((_, (_, '\\'))) => {
|
||||
// Inside a double-quote sequence several escape sequences are allowed. In
|
||||
// general, any unknown sequence is copied verbatim in its entirety including
|
||||
// the backslash. Known sequences produce the escaped character in its output
|
||||
// and makes the parser not interpret it. If the sequence is non-terminated,
|
||||
// it implies that the double-quote sequence is non-terminated and thus
|
||||
// invokes the same behavior, meaning the entire operation is refused.
|
||||
match cursor.next() {
|
||||
Some((_, (_, esc_ch)))
|
||||
if esc_ch == '"'
|
||||
|| esc_ch == '\\'
|
||||
|| esc_ch == '`'
|
||||
|| esc_ch == '$'
|
||||
|| esc_ch == '\n' =>
|
||||
{
|
||||
acc.push(esc_ch);
|
||||
}
|
||||
Some((_, (_, esc_ch))) => {
|
||||
acc.push('\\');
|
||||
acc.push(esc_ch);
|
||||
}
|
||||
None => {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
Some((_, (_, inner_ch))) => {
|
||||
// Any non-special character inside a double-quote is copied
|
||||
// literally just like characters outside of it.
|
||||
acc.push(inner_ch);
|
||||
}
|
||||
None => {
|
||||
// The double-quote sequence was not terminated. The entire
|
||||
// operation is considered invalid and we have to refuse producing
|
||||
// any resulting value.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn unquote_open_escape(acc: &mut String, cursor: &mut std::iter::Enumerate<std::str::CharIndices>) {
|
||||
// This decodes an escape sequence outside of any quote. The opening backslash was already
|
||||
// parsed by the caller. Both `&source[start]` and `cursor` point to the first character
|
||||
// following the opening backslash.
|
||||
// Outside of quotes, an escape sequence simply treats the next character literally, and
|
||||
// does not interpret it. The exceptions are literal <NL> (newline character) and a single
|
||||
// backslash as last character in the string. In these cases the escape-sequence is
|
||||
// stripped and produces no output. The <NL> case is a remnant of human shell input, where
|
||||
// you can input multiple lines by appending a backslash to the previous line. This causes
|
||||
// both the backslash and <NL> to be ignore, since they purely serve readability of user
|
||||
// input.
|
||||
if let Some((_, (_, esc_ch))) = cursor.next() {
|
||||
if esc_ch != '\n' {
|
||||
acc.push(esc_ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Unquote String
|
||||
///
|
||||
/// Unquote a single string according to POSIX Shell quoting and escaping rules. If the input
|
||||
/// string is not a valid input, the operation will fail and provide diagnosis information on
|
||||
/// where the first invalid part was encountered.
|
||||
///
|
||||
/// The result is canonical. There is only one valid unquoted result for a given input.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// assert_eq!(r_shquote::unquote("foobar").unwrap(), "foobar");
|
||||
/// ```
|
||||
pub(crate) fn unquote(source: &str) -> Result<String, UnquoteError> {
|
||||
// An unquote-operation never results in a longer string. Furthermore, the common case is
|
||||
// most of the string is unquoted / unescaped. Hence, we simply allocate the same space
|
||||
// for the resulting string as the input.
|
||||
let mut acc = String::with_capacity(source.len());
|
||||
|
||||
// We loop over the string. When a single-quote, double-quote, or escape sequence is
|
||||
// opened, we let our helpers parse the sub-strings. Anything else is copied over
|
||||
// literally until the end of the line.
|
||||
let mut cursor = source.char_indices().enumerate();
|
||||
loop {
|
||||
match cursor.next() {
|
||||
Some((next_idx, (next_pos, '\''))) => {
|
||||
if !unquote_open_single(&mut acc, &mut cursor) {
|
||||
break Err(UnquoteError::UnterminatedSingleQuote {
|
||||
char_cursor: next_idx,
|
||||
byte_cursor: next_pos,
|
||||
});
|
||||
}
|
||||
}
|
||||
Some((next_idx, (next_pos, '"'))) => {
|
||||
if !unquote_open_double(&mut acc, &mut cursor) {
|
||||
break Err(UnquoteError::UnterminatedDoubleQuote {
|
||||
char_cursor: next_idx,
|
||||
byte_cursor: next_pos,
|
||||
});
|
||||
}
|
||||
}
|
||||
Some((_, (_, '\\'))) => {
|
||||
unquote_open_escape(&mut acc, &mut cursor);
|
||||
}
|
||||
Some((_, (_, next_ch))) => {
|
||||
acc.push(next_ch);
|
||||
}
|
||||
None => {
|
||||
break Ok(acc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn basic() {
|
||||
assert_eq!(unquote("foobar").unwrap(), "foobar");
|
||||
assert_eq!(unquote("foo'bar'").unwrap(), "foobar");
|
||||
assert_eq!(unquote("foo\"bar\"").unwrap(), "foobar");
|
||||
assert_eq!(unquote("\\foobar\\").unwrap(), "foobar");
|
||||
assert_eq!(unquote("\\'foobar\\'").unwrap(), "'foobar'");
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue