Don't check file URLs for ambiguously parsed URLs (#16759)

Fixes https://github.com/astral-sh/uv/issues/16756
Follow-up for https://github.com/astral-sh/uv/pull/16622

I noticed that rustfmt couldn't handle the check, so I moved the code
around in the first two commits.
This commit is contained in:
konsti 2025-11-17 15:16:13 +01:00 committed by GitHub
parent 163729ecc3
commit 2d75aca8e3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 70 additions and 35 deletions

View File

@ -63,43 +63,67 @@ impl DisplaySafeUrl {
pub fn parse(input: &str) -> Result<Self, DisplaySafeUrlError> { pub fn parse(input: &str) -> Result<Self, DisplaySafeUrlError> {
let url = Url::parse(input)?; let url = Url::parse(input)?;
// Reject some ambiguous cases, e.g., `https://user/name:password@domain/a/b/c` Self::reject_ambiguous_credentials(input, &url)?;
//
// In this case the user *probably* meant to have a username of "user/name", but both RFC Ok(Self(url))
// 3986 and WHATWG URL expect the userinfo (RFC 3986) or authority (WHATWG) to not contain a }
// non-percent-encoded slash or other special character.
// /// Reject some ambiguous cases, e.g., `https://user/name:password@domain/a/b/c`
// This ends up being moderately annoying to detect, since the above gets parsed into a ///
// "valid" WHATWG URL where the host is `used` and the pathname is /// In this case the user *probably* meant to have a username of "user/name", but both RFC
// `/name:password@domain/a/b/c` rather than causing a parse error. /// 3986 and WHATWG URL expect the userinfo (RFC 3986) or authority (WHATWG) to not contain a
// /// non-percent-encoded slash or other special character.
// To detect it, we use a heuristic: if the password component is missing but the path or ///
// fragment contain a `:` followed by a `@`, then we assume the URL is ambiguous. /// This ends up being moderately annoying to detect, since the above gets parsed into a
if url.password().is_none() /// "valid" WHATWG URL where the host is `used` and the pathname is
&& (url /// `/name:password@domain/a/b/c` rather than causing a parse error.
///
/// To detect it, we use a heuristic: if the password component is missing but the path or
/// fragment contain a `:` followed by a `@`, then we assume the URL is ambiguous.
fn reject_ambiguous_credentials(input: &str, url: &Url) -> Result<(), DisplaySafeUrlError> {
// `git://`, `http://`, and `https://` URLs may carry credentials, while `file://` URLs
// on Windows may contain both sigils, but it's always safe, e.g.
// `file://C:/Users/ferris/project@home/workspace`.
if url.scheme() == "file" {
return Ok(());
}
if url.password().is_some() {
return Ok(());
}
// Check for the suspicious pattern.
if !url
.path() .path()
.find(':') .find(':')
.is_some_and(|pos| url.path()[pos..].contains('@')) .is_some_and(|pos| url.path()[pos..].contains('@'))
|| url && !url
.fragment() .fragment()
.map(|fragment| { .map(|fragment| {
fragment fragment
.find(':') .find(':')
.is_some_and(|pos| fragment[pos..].contains('@')) .is_some_and(|pos| fragment[pos..].contains('@'))
}) })
.unwrap_or(false)) .unwrap_or(false)
// If the above is true, we should always expect to find these in the given URL
&& let Some(col_pos) = input.find(':')
&& let Some(at_pos) = input.rfind('@')
{ {
return Ok(());
}
// If the previous check passed, we should always expect to find these in the given URL.
let (Some(col_pos), Some(at_pos)) = (input.find(':'), input.rfind('@')) else {
if cfg!(debug_assertions) {
unreachable!(
"`:` or `@` sign missing in URL that was confirmed to contain them: {input}"
);
}
return Ok(());
};
// Our ambiguous URL probably has credentials in it, so we don't want to blast it out in // Our ambiguous URL probably has credentials in it, so we don't want to blast it out in
// the error message. We somewhat aggressively replace everything between the scheme's // the error message. We somewhat aggressively replace everything between the scheme's
// ':' and the lastmost `@` with `***`. // ':' and the lastmost `@` with `***`.
let redacted_path = format!("{}***{}", &input[0..=col_pos], &input[at_pos..]); let redacted_path = format!("{}***{}", &input[0..=col_pos], &input[at_pos..]);
return Err(DisplaySafeUrlError::AmbiguousAuthority(redacted_path)); Err(DisplaySafeUrlError::AmbiguousAuthority(redacted_path))
}
Ok(Self(url))
} }
/// Create a new [`DisplaySafeUrl`] from a [`Url`]. /// Create a new [`DisplaySafeUrl`] from a [`Url`].
@ -453,4 +477,15 @@ mod tests {
} }
} }
} }
#[test]
fn parse_url_not_ambiguous() {
#[allow(clippy::single_element_loop)]
for url in &[
// https://github.com/astral-sh/uv/issues/16756
"file:///C:/jenkins/ython_Environment_Manager_PR-251@2/venv%201/workspace",
] {
DisplaySafeUrl::parse(url).unwrap();
}
}
} }