From 38ab39c439ca358a836e2033c3d3a69d8f5e03e7 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Wed, 10 Apr 2024 10:25:29 -0400 Subject: [PATCH] Strip query string when parsing filename from HTML index (#2961) ## Summary Closes https://github.com/astral-sh/uv/issues/2958. --- crates/uv-client/src/html.rs | 57 ++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/crates/uv-client/src/html.rs b/crates/uv-client/src/html.rs index 2bffce9b6..5d315c6d4 100644 --- a/crates/uv-client/src/html.rs +++ b/crates/uv-client/src/html.rs @@ -164,6 +164,9 @@ impl SimpleHtml { .last() .ok_or_else(|| Error::MissingFilename(href.to_string()))?; + // Strip any query string from the filename. + let filename = filename.split('?').next().unwrap_or(filename); + // Unquote the filename. let filename = urlencoding::decode(filename) .map_err(|_| Error::UnsupportedFilename(filename.to_string()))?; @@ -681,6 +684,60 @@ mod tests { "###); } + #[test] + fn parse_query_string() { + let text = r#" + + + +

Links for jinja2

+ Jinja2-3.1.2-py3-none-any.whl
+ + + + "#; + let base = Url::parse("https://download.pytorch.org/whl/jinja2/").unwrap(); + let result = SimpleHtml::parse(text, &base).unwrap(); + insta::assert_debug_snapshot!(result, @r###" + SimpleHtml { + base: BaseUrl( + Url { + scheme: "https", + cannot_be_a_base: false, + username: "", + password: None, + host: Some( + Domain( + "download.pytorch.org", + ), + ), + port: None, + path: "/whl/jinja2/", + query: None, + fragment: None, + }, + ), + files: [ + File { + dist_info_metadata: None, + filename: "Jinja2-3.1.2-py3-none-any.whl", + hashes: Hashes { + md5: None, + sha256: None, + sha384: None, + sha512: None, + }, + requires_python: None, + size: None, + upload_time: None, + url: "/whl/Jinja2-3.1.2-py3-none-any.whl?project=legacy", + yanked: None, + }, + ], + } + "###); + } + #[test] fn parse_missing_hash_value() { let text = r#"