mirror of https://github.com/astral-sh/uv
Don't check file URLs for ambiguously parsed URLs (#16759)
Fixes https://github.com/astral-sh/uv/issues/16756 Follow-up for https://github.com/astral-sh/uv/pull/16622 I noticed that rustfmt couldn't handle the check, so I moved the code around in the first two commits.
This commit is contained in:
parent
163729ecc3
commit
2d75aca8e3
|
|
@ -63,43 +63,67 @@ impl DisplaySafeUrl {
|
||||||
pub fn parse(input: &str) -> Result<Self, DisplaySafeUrlError> {
|
pub fn parse(input: &str) -> Result<Self, DisplaySafeUrlError> {
|
||||||
let url = Url::parse(input)?;
|
let url = Url::parse(input)?;
|
||||||
|
|
||||||
// Reject some ambiguous cases, e.g., `https://user/name:password@domain/a/b/c`
|
Self::reject_ambiguous_credentials(input, &url)?;
|
||||||
//
|
|
||||||
// In this case the user *probably* meant to have a username of "user/name", but both RFC
|
Ok(Self(url))
|
||||||
// 3986 and WHATWG URL expect the userinfo (RFC 3986) or authority (WHATWG) to not contain a
|
}
|
||||||
// non-percent-encoded slash or other special character.
|
|
||||||
//
|
/// Reject some ambiguous cases, e.g., `https://user/name:password@domain/a/b/c`
|
||||||
// This ends up being moderately annoying to detect, since the above gets parsed into a
|
///
|
||||||
// "valid" WHATWG URL where the host is `used` and the pathname is
|
/// In this case the user *probably* meant to have a username of "user/name", but both RFC
|
||||||
// `/name:password@domain/a/b/c` rather than causing a parse error.
|
/// 3986 and WHATWG URL expect the userinfo (RFC 3986) or authority (WHATWG) to not contain a
|
||||||
//
|
/// non-percent-encoded slash or other special character.
|
||||||
// To detect it, we use a heuristic: if the password component is missing but the path or
|
///
|
||||||
// fragment contain a `:` followed by a `@`, then we assume the URL is ambiguous.
|
/// This ends up being moderately annoying to detect, since the above gets parsed into a
|
||||||
if url.password().is_none()
|
/// "valid" WHATWG URL where the host is `used` and the pathname is
|
||||||
&& (url
|
/// `/name:password@domain/a/b/c` rather than causing a parse error.
|
||||||
|
///
|
||||||
|
/// To detect it, we use a heuristic: if the password component is missing but the path or
|
||||||
|
/// fragment contain a `:` followed by a `@`, then we assume the URL is ambiguous.
|
||||||
|
fn reject_ambiguous_credentials(input: &str, url: &Url) -> Result<(), DisplaySafeUrlError> {
|
||||||
|
// `git://`, `http://`, and `https://` URLs may carry credentials, while `file://` URLs
|
||||||
|
// on Windows may contain both sigils, but it's always safe, e.g.
|
||||||
|
// `file://C:/Users/ferris/project@home/workspace`.
|
||||||
|
if url.scheme() == "file" {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
if url.password().is_some() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for the suspicious pattern.
|
||||||
|
if !url
|
||||||
.path()
|
.path()
|
||||||
.find(':')
|
.find(':')
|
||||||
.is_some_and(|pos| url.path()[pos..].contains('@'))
|
.is_some_and(|pos| url.path()[pos..].contains('@'))
|
||||||
|| url
|
&& !url
|
||||||
.fragment()
|
.fragment()
|
||||||
.map(|fragment| {
|
.map(|fragment| {
|
||||||
fragment
|
fragment
|
||||||
.find(':')
|
.find(':')
|
||||||
.is_some_and(|pos| fragment[pos..].contains('@'))
|
.is_some_and(|pos| fragment[pos..].contains('@'))
|
||||||
})
|
})
|
||||||
.unwrap_or(false))
|
.unwrap_or(false)
|
||||||
// If the above is true, we should always expect to find these in the given URL
|
|
||||||
&& let Some(col_pos) = input.find(':')
|
|
||||||
&& let Some(at_pos) = input.rfind('@')
|
|
||||||
{
|
{
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the previous check passed, we should always expect to find these in the given URL.
|
||||||
|
let (Some(col_pos), Some(at_pos)) = (input.find(':'), input.rfind('@')) else {
|
||||||
|
if cfg!(debug_assertions) {
|
||||||
|
unreachable!(
|
||||||
|
"`:` or `@` sign missing in URL that was confirmed to contain them: {input}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return Ok(());
|
||||||
|
};
|
||||||
|
|
||||||
// Our ambiguous URL probably has credentials in it, so we don't want to blast it out in
|
// Our ambiguous URL probably has credentials in it, so we don't want to blast it out in
|
||||||
// the error message. We somewhat aggressively replace everything between the scheme's
|
// the error message. We somewhat aggressively replace everything between the scheme's
|
||||||
// ':' and the lastmost `@` with `***`.
|
// ':' and the lastmost `@` with `***`.
|
||||||
let redacted_path = format!("{}***{}", &input[0..=col_pos], &input[at_pos..]);
|
let redacted_path = format!("{}***{}", &input[0..=col_pos], &input[at_pos..]);
|
||||||
return Err(DisplaySafeUrlError::AmbiguousAuthority(redacted_path));
|
Err(DisplaySafeUrlError::AmbiguousAuthority(redacted_path))
|
||||||
}
|
|
||||||
|
|
||||||
Ok(Self(url))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create a new [`DisplaySafeUrl`] from a [`Url`].
|
/// Create a new [`DisplaySafeUrl`] from a [`Url`].
|
||||||
|
|
@ -453,4 +477,15 @@ mod tests {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_url_not_ambiguous() {
|
||||||
|
#[allow(clippy::single_element_loop)]
|
||||||
|
for url in &[
|
||||||
|
// https://github.com/astral-sh/uv/issues/16756
|
||||||
|
"file:///C:/jenkins/ython_Environment_Manager_PR-251@2/venv%201/workspace",
|
||||||
|
] {
|
||||||
|
DisplaySafeUrl::parse(url).unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue