From 2d75aca8e3ea7ea30c45483b0872523b92d900d1 Mon Sep 17 00:00:00 2001 From: konsti Date: Mon, 17 Nov 2025 15:16:13 +0100 Subject: [PATCH] Don't check file URLs for ambiguously parsed URLs (#16759) Fixes https://github.com/astral-sh/uv/issues/16756 Follow-up for https://github.com/astral-sh/uv/pull/16622 I noticed that rustfmt couldn't handle the check, so I moved the code around in the first two commits. --- crates/uv-redacted/src/lib.rs | 105 ++++++++++++++++++++++------------ 1 file changed, 70 insertions(+), 35 deletions(-) diff --git a/crates/uv-redacted/src/lib.rs b/crates/uv-redacted/src/lib.rs index c6cb2d5aa..ddd1a5dfc 100644 --- a/crates/uv-redacted/src/lib.rs +++ b/crates/uv-redacted/src/lib.rs @@ -63,45 +63,69 @@ impl DisplaySafeUrl { pub fn parse(input: &str) -> Result { let url = Url::parse(input)?; - // Reject some ambiguous cases, e.g., `https://user/name:password@domain/a/b/c` - // - // In this case the user *probably* meant to have a username of "user/name", but both RFC - // 3986 and WHATWG URL expect the userinfo (RFC 3986) or authority (WHATWG) to not contain a - // non-percent-encoded slash or other special character. - // - // This ends up being moderately annoying to detect, since the above gets parsed into a - // "valid" WHATWG URL where the host is `used` and the pathname is - // `/name:password@domain/a/b/c` rather than causing a parse error. - // - // To detect it, we use a heuristic: if the password component is missing but the path or - // fragment contain a `:` followed by a `@`, then we assume the URL is ambiguous. - if url.password().is_none() - && (url - .path() - .find(':') - .is_some_and(|pos| url.path()[pos..].contains('@')) - || url - .fragment() - .map(|fragment| { - fragment - .find(':') - .is_some_and(|pos| fragment[pos..].contains('@')) - }) - .unwrap_or(false)) - // If the above is true, we should always expect to find these in the given URL - && let Some(col_pos) = input.find(':') - && let Some(at_pos) = input.rfind('@') - { - // Our ambiguous URL probably has credentials in it, so we don't want to blast it out in - // the error message. We somewhat aggressively replace everything between the scheme's - // ':' and the lastmost `@` with `***`. - let redacted_path = format!("{}***{}", &input[0..=col_pos], &input[at_pos..]); - return Err(DisplaySafeUrlError::AmbiguousAuthority(redacted_path)); - } + Self::reject_ambiguous_credentials(input, &url)?; Ok(Self(url)) } + /// Reject some ambiguous cases, e.g., `https://user/name:password@domain/a/b/c` + /// + /// In this case the user *probably* meant to have a username of "user/name", but both RFC + /// 3986 and WHATWG URL expect the userinfo (RFC 3986) or authority (WHATWG) to not contain a + /// non-percent-encoded slash or other special character. + /// + /// This ends up being moderately annoying to detect, since the above gets parsed into a + /// "valid" WHATWG URL where the host is `used` and the pathname is + /// `/name:password@domain/a/b/c` rather than causing a parse error. + /// + /// To detect it, we use a heuristic: if the password component is missing but the path or + /// fragment contain a `:` followed by a `@`, then we assume the URL is ambiguous. + fn reject_ambiguous_credentials(input: &str, url: &Url) -> Result<(), DisplaySafeUrlError> { + // `git://`, `http://`, and `https://` URLs may carry credentials, while `file://` URLs + // on Windows may contain both sigils, but it's always safe, e.g. + // `file://C:/Users/ferris/project@home/workspace`. + if url.scheme() == "file" { + return Ok(()); + } + + if url.password().is_some() { + return Ok(()); + } + + // Check for the suspicious pattern. + if !url + .path() + .find(':') + .is_some_and(|pos| url.path()[pos..].contains('@')) + && !url + .fragment() + .map(|fragment| { + fragment + .find(':') + .is_some_and(|pos| fragment[pos..].contains('@')) + }) + .unwrap_or(false) + { + return Ok(()); + } + + // If the previous check passed, we should always expect to find these in the given URL. + let (Some(col_pos), Some(at_pos)) = (input.find(':'), input.rfind('@')) else { + if cfg!(debug_assertions) { + unreachable!( + "`:` or `@` sign missing in URL that was confirmed to contain them: {input}" + ); + } + return Ok(()); + }; + + // Our ambiguous URL probably has credentials in it, so we don't want to blast it out in + // the error message. We somewhat aggressively replace everything between the scheme's + // ':' and the lastmost `@` with `***`. + let redacted_path = format!("{}***{}", &input[0..=col_pos], &input[at_pos..]); + Err(DisplaySafeUrlError::AmbiguousAuthority(redacted_path)) + } + /// Create a new [`DisplaySafeUrl`] from a [`Url`]. /// /// Unlike [`Self::parse`], this doesn't perform any ambiguity checks. @@ -453,4 +477,15 @@ mod tests { } } } + + #[test] + fn parse_url_not_ambiguous() { + #[allow(clippy::single_element_loop)] + for url in &[ + // https://github.com/astral-sh/uv/issues/16756 + "file:///C:/jenkins/ython_Environment_Manager_PR-251@2/venv%201/workspace", + ] { + DisplaySafeUrl::parse(url).unwrap(); + } + } }