diff --git a/Cargo.lock b/Cargo.lock index 008b8a060..3d2d20f68 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4267,6 +4267,7 @@ dependencies = [ "tokio-util", "tracing", "url", + "urlencoding", "uv-cache", "uv-fs", "uv-normalize", diff --git a/crates/uv-client/Cargo.toml b/crates/uv-client/Cargo.toml index f1df77716..58d9ed901 100644 --- a/crates/uv-client/Cargo.toml +++ b/crates/uv-client/Cargo.toml @@ -41,6 +41,7 @@ tokio = { workspace = true, features = ["fs"] } tokio-util = { workspace = true } tracing = { workspace = true } url = { workspace = true } +urlencoding = { workspace = true } [dev-dependencies] anyhow = { workspace = true } diff --git a/crates/uv-client/src/html.rs b/crates/uv-client/src/html.rs index 0ccbf7b61..3d4bcba62 100644 --- a/crates/uv-client/src/html.rs +++ b/crates/uv-client/src/html.rs @@ -120,7 +120,7 @@ impl SimpleHtml { }, ) } else { - (href, Hashes::default()) + (decoded.as_ref(), Hashes::default()) }; // Extract the filename from the body text, which MUST match that of @@ -130,6 +130,10 @@ impl SimpleHtml { .last() .ok_or_else(|| Error::MissingFilename(href.to_string()))?; + // Unquote the filename. + let filename = urlencoding::decode(filename) + .map_err(|_| Error::UnsupportedFilename(filename.to_string()))?; + // Extract the `requires-python` field, which should be set on the // `data-requires-python` attribute. let requires_python = if let Some(requires_python) = @@ -198,6 +202,9 @@ pub enum Error { #[error("Expected distribution filename as last path component of URL: {0}")] MissingFilename(String), + #[error("Expected distribution filename to be UTF-8: {0}")] + UnsupportedFilename(String), + #[error("Missing hash attribute on URL: {0}")] MissingHash(String), @@ -377,6 +384,57 @@ mod tests { "###); } + #[test] + fn parse_quoted_filepath() { + let text = r#" + + + +

Links for jinja2

+ cpu/torchtext-0.17.0%2Bcpu-cp39-cp39-win_amd64.whl
+ + + + "#; + let base = Url::parse("https://download.pytorch.org/whl/jinja2/").unwrap(); + let result = SimpleHtml::parse(text, &base).unwrap(); + insta::assert_debug_snapshot!(result, @r###" + SimpleHtml { + base: BaseUrl( + Url { + scheme: "https", + cannot_be_a_base: false, + username: "", + password: None, + host: Some( + Domain( + "download.pytorch.org", + ), + ), + port: None, + path: "/whl/jinja2/", + query: None, + fragment: None, + }, + ), + files: [ + File { + dist_info_metadata: None, + filename: "torchtext-0.17.0+cpu-cp39-cp39-win_amd64.whl", + hashes: Hashes { + sha256: None, + }, + requires_python: None, + size: None, + upload_time: None, + url: "cpu/torchtext-0.17.0%2Bcpu-cp39-cp39-win_amd64.whl", + yanked: None, + }, + ], + } + "###); + } + #[test] fn parse_missing_hash() { let text = r#"