diff --git a/crates/uv-client/src/html.rs b/crates/uv-client/src/html.rs index 738a9521f..397cc0142 100644 --- a/crates/uv-client/src/html.rs +++ b/crates/uv-client/src/html.rs @@ -108,7 +108,8 @@ impl SimpleHtml { .ok_or(Error::MissingHref)?; let href = std::str::from_utf8(href.as_bytes())?; - let (path, hashes) = if let Some((path, fragment)) = href.split_once('#') { + let decoded = html_escape::decode_html_entities(href); + let (path, hashes) = if let Some((path, fragment)) = decoded.split_once('#') { // Extract the hash, which should be in the fragment. (path, Self::parse_hash(fragment)?) } else { @@ -316,6 +317,59 @@ mod tests { "###); } + #[test] + fn parse_escaped_fragment() { + let text = r#" + + + +

Links for jinja2

+ Jinja2-3.1.2+233fca715f49-py3-none-any.whl
+ + + + "#; + let base = Url::parse("https://download.pytorch.org/whl/jinja2/").unwrap(); + let result = SimpleHtml::parse(text, &base).unwrap(); + insta::assert_debug_snapshot!(result, @r###" + SimpleHtml { + base: BaseUrl( + Url { + scheme: "https", + cannot_be_a_base: false, + username: "", + password: None, + host: Some( + Domain( + "download.pytorch.org", + ), + ), + port: None, + path: "/whl/jinja2/", + query: None, + fragment: None, + }, + ), + files: [ + File { + dist_info_metadata: None, + filename: "Jinja2-3.1.2+233fca715f49-py3-none-any.whl", + hashes: Hashes { + sha256: Some( + "6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61", + ), + }, + requires_python: None, + size: None, + upload_time: None, + url: "/whl/Jinja2-3.1.2+233fca715f49-py3-none-any.whl#sha256=6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61", + yanked: None, + }, + ], + } + "###); + } + #[test] fn parse_missing_hash() { let text = r#"