mirror of https://github.com/astral-sh/uv
Apply percent-decoding to filepaths in HTML find-links (#1544)
## Summary Closes https://github.com/astral-sh/uv/issues/1542.
This commit is contained in:
parent
3aa7a6b796
commit
4f216f3a74
|
|
@ -4267,6 +4267,7 @@ dependencies = [
|
||||||
"tokio-util",
|
"tokio-util",
|
||||||
"tracing",
|
"tracing",
|
||||||
"url",
|
"url",
|
||||||
|
"urlencoding",
|
||||||
"uv-cache",
|
"uv-cache",
|
||||||
"uv-fs",
|
"uv-fs",
|
||||||
"uv-normalize",
|
"uv-normalize",
|
||||||
|
|
|
||||||
|
|
@ -41,6 +41,7 @@ tokio = { workspace = true, features = ["fs"] }
|
||||||
tokio-util = { workspace = true }
|
tokio-util = { workspace = true }
|
||||||
tracing = { workspace = true }
|
tracing = { workspace = true }
|
||||||
url = { workspace = true }
|
url = { workspace = true }
|
||||||
|
urlencoding = { workspace = true }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
anyhow = { workspace = true }
|
anyhow = { workspace = true }
|
||||||
|
|
|
||||||
|
|
@ -120,7 +120,7 @@ impl SimpleHtml {
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
(href, Hashes::default())
|
(decoded.as_ref(), Hashes::default())
|
||||||
};
|
};
|
||||||
|
|
||||||
// Extract the filename from the body text, which MUST match that of
|
// Extract the filename from the body text, which MUST match that of
|
||||||
|
|
@ -130,6 +130,10 @@ impl SimpleHtml {
|
||||||
.last()
|
.last()
|
||||||
.ok_or_else(|| Error::MissingFilename(href.to_string()))?;
|
.ok_or_else(|| Error::MissingFilename(href.to_string()))?;
|
||||||
|
|
||||||
|
// Unquote the filename.
|
||||||
|
let filename = urlencoding::decode(filename)
|
||||||
|
.map_err(|_| Error::UnsupportedFilename(filename.to_string()))?;
|
||||||
|
|
||||||
// Extract the `requires-python` field, which should be set on the
|
// Extract the `requires-python` field, which should be set on the
|
||||||
// `data-requires-python` attribute.
|
// `data-requires-python` attribute.
|
||||||
let requires_python = if let Some(requires_python) =
|
let requires_python = if let Some(requires_python) =
|
||||||
|
|
@ -198,6 +202,9 @@ pub enum Error {
|
||||||
#[error("Expected distribution filename as last path component of URL: {0}")]
|
#[error("Expected distribution filename as last path component of URL: {0}")]
|
||||||
MissingFilename(String),
|
MissingFilename(String),
|
||||||
|
|
||||||
|
#[error("Expected distribution filename to be UTF-8: {0}")]
|
||||||
|
UnsupportedFilename(String),
|
||||||
|
|
||||||
#[error("Missing hash attribute on URL: {0}")]
|
#[error("Missing hash attribute on URL: {0}")]
|
||||||
MissingHash(String),
|
MissingHash(String),
|
||||||
|
|
||||||
|
|
@ -377,6 +384,57 @@ mod tests {
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_quoted_filepath() {
|
||||||
|
let text = r#"
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<h1>Links for jinja2</h1>
|
||||||
|
<a href="cpu/torchtext-0.17.0%2Bcpu-cp39-cp39-win_amd64.whl">cpu/torchtext-0.17.0%2Bcpu-cp39-cp39-win_amd64.whl</a><br/>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
<!--TIMESTAMP 1703347410-->
|
||||||
|
"#;
|
||||||
|
let base = Url::parse("https://download.pytorch.org/whl/jinja2/").unwrap();
|
||||||
|
let result = SimpleHtml::parse(text, &base).unwrap();
|
||||||
|
insta::assert_debug_snapshot!(result, @r###"
|
||||||
|
SimpleHtml {
|
||||||
|
base: BaseUrl(
|
||||||
|
Url {
|
||||||
|
scheme: "https",
|
||||||
|
cannot_be_a_base: false,
|
||||||
|
username: "",
|
||||||
|
password: None,
|
||||||
|
host: Some(
|
||||||
|
Domain(
|
||||||
|
"download.pytorch.org",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
port: None,
|
||||||
|
path: "/whl/jinja2/",
|
||||||
|
query: None,
|
||||||
|
fragment: None,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
files: [
|
||||||
|
File {
|
||||||
|
dist_info_metadata: None,
|
||||||
|
filename: "torchtext-0.17.0+cpu-cp39-cp39-win_amd64.whl",
|
||||||
|
hashes: Hashes {
|
||||||
|
sha256: None,
|
||||||
|
},
|
||||||
|
requires_python: None,
|
||||||
|
size: None,
|
||||||
|
upload_time: None,
|
||||||
|
url: "cpu/torchtext-0.17.0%2Bcpu-cp39-cp39-win_amd64.whl",
|
||||||
|
yanked: None,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_missing_hash() {
|
fn parse_missing_hash() {
|
||||||
let text = r#"
|
let text = r#"
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue