mirror of https://github.com/astral-sh/uv
Percent-decode URLs in canonical comparisons (#11088)
## Summary
This PR adds an additional normalization step to `CanonicalUrl` whereby
we now percent-decode the path, to ensure that (e.g.)
`torch-2.5.1%2Bcpu.cxx11.abi-cp39-cp39-linux_x86_64.whl` and
`torch-2.5.1+cpu.cxx11.abi-cp39-cp39-linux_x86_64.whl` are considered
equal. Further, when generating the "reinstall" report, we use the
canonical URL rather than the verbatim URL.
In making this change, I also learned that we don't apply any of the
normalization passes to `file://` URLs. I inadvertently removed it in
93d606aba2,
since setting the password or URL on ` file://` URL errors -- but now
suppress those errors anyway.
Closes https://github.com/astral-sh/uv/issues/11082.
## Test Plan
- Downloaded a [PyTorch
wheel](https://download.pytorch.org/whl/cpu-cxx11-abi/torch-2.5.1%2Bcpu.cxx11.abi-cp39-cp39-linux_x86_64.whl)
- `python3.9 -m pip install
torch-2.5.1+cpu.cxx11.abi-cp39-cp39-linux_x86_64.whl --platform
linux_x86_64 --target foo --no-deps`
- `cargo run pip install
torch-2.5.1+cpu.cxx11.abi-cp39-cp39-linux_x86_64.whl --python-platform
linux --python-version 3.9 --target foo --no-deps`
- Verified that the package had the `~` symbol for the reinstall.
This commit is contained in:
parent
1fae8dbf17
commit
26f84e5699
|
|
@ -4750,8 +4750,10 @@ name = "uv-cache-key"
|
||||||
version = "0.0.1"
|
version = "0.0.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"hex",
|
"hex",
|
||||||
|
"memchr",
|
||||||
"seahash",
|
"seahash",
|
||||||
"url",
|
"url",
|
||||||
|
"urlencoding",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
|
||||||
|
|
@ -18,5 +18,7 @@ workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
hex = { workspace = true }
|
hex = { workspace = true }
|
||||||
|
memchr = { workspace = true }
|
||||||
seahash = { workspace = true }
|
seahash = { workspace = true }
|
||||||
url = { workspace = true }
|
url = { workspace = true }
|
||||||
|
urlencoding = { workspace = true }
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
use std::borrow::Cow;
|
||||||
use std::fmt::{Debug, Formatter};
|
use std::fmt::{Debug, Formatter};
|
||||||
use std::hash::{Hash, Hasher};
|
use std::hash::{Hash, Hasher};
|
||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
|
|
@ -26,11 +27,6 @@ impl CanonicalUrl {
|
||||||
return Self(url);
|
return Self(url);
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the URL has no host, then it's not a valid URL anyway.
|
|
||||||
if !url.has_host() {
|
|
||||||
return Self(url);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Strip credentials.
|
// Strip credentials.
|
||||||
let _ = url.set_password(None);
|
let _ = url.set_password(None);
|
||||||
let _ = url.set_username("");
|
let _ = url.set_username("");
|
||||||
|
|
@ -76,6 +72,23 @@ impl CanonicalUrl {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Decode any percent-encoded characters in the path.
|
||||||
|
if memchr::memchr(b'%', url.path().as_bytes()).is_some() {
|
||||||
|
let decoded = url
|
||||||
|
.path_segments()
|
||||||
|
.unwrap()
|
||||||
|
.map(|segment| {
|
||||||
|
urlencoding::decode(segment)
|
||||||
|
.unwrap_or(Cow::Borrowed(segment))
|
||||||
|
.into_owned()
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let mut path_segments = url.path_segments_mut().unwrap();
|
||||||
|
path_segments.clear();
|
||||||
|
path_segments.extend(decoded);
|
||||||
|
}
|
||||||
|
|
||||||
Self(url)
|
Self(url)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -276,6 +289,38 @@ mod tests {
|
||||||
CanonicalUrl::parse("git+https:://github.com/pypa/sample-namespace-packages.git")?,
|
CanonicalUrl::parse("git+https:://github.com/pypa/sample-namespace-packages.git")?,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Two URLs should _not_ be considered equal based on percent-decoding slashes.
|
||||||
|
assert_ne!(
|
||||||
|
CanonicalUrl::parse("https://github.com/pypa/sample%2Fnamespace%2Fpackages")?,
|
||||||
|
CanonicalUrl::parse("https://github.com/pypa/sample/namespace/packages")?,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Two URLs should be considered equal regardless of percent-encoding.
|
||||||
|
assert_eq!(
|
||||||
|
CanonicalUrl::parse("https://github.com/pypa/sample%2Bnamespace%2Bpackages")?,
|
||||||
|
CanonicalUrl::parse("https://github.com/pypa/sample+namespace+packages")?,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Two URLs should _not_ be considered equal based on percent-decoding slashes.
|
||||||
|
assert_ne!(
|
||||||
|
CanonicalUrl::parse(
|
||||||
|
"file:///home/ferris/my_project%2Fmy_project-0.1.0-py3-none-any.whl"
|
||||||
|
)?,
|
||||||
|
CanonicalUrl::parse(
|
||||||
|
"file:///home/ferris/my_project/my_project-0.1.0-py3-none-any.whl"
|
||||||
|
)?,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Two URLs should be considered equal regardless of percent-encoding.
|
||||||
|
assert_eq!(
|
||||||
|
CanonicalUrl::parse(
|
||||||
|
"file:///home/ferris/my_project/my_project-0.1.0+foo-py3-none-any.whl"
|
||||||
|
)?,
|
||||||
|
CanonicalUrl::parse(
|
||||||
|
"file:///home/ferris/my_project/my_project-0.1.0%2Bfoo-py3-none-any.whl"
|
||||||
|
)?,
|
||||||
|
);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,8 @@
|
||||||
use std::hash::Hash;
|
use std::hash::Hash;
|
||||||
|
|
||||||
|
use uv_cache_key::CanonicalUrl;
|
||||||
use uv_normalize::PackageName;
|
use uv_normalize::PackageName;
|
||||||
|
use uv_pep440::Version;
|
||||||
|
|
||||||
use crate::cached::CachedDist;
|
use crate::cached::CachedDist;
|
||||||
use crate::installed::InstalledDist;
|
use crate::installed::InstalledDist;
|
||||||
|
|
@ -8,19 +10,29 @@ use crate::{InstalledMetadata, InstalledVersion, Name};
|
||||||
|
|
||||||
/// A distribution which is either installable, is a wheel in our cache or is already installed.
|
/// A distribution which is either installable, is a wheel in our cache or is already installed.
|
||||||
///
|
///
|
||||||
/// Note equality and hash operations are only based on the name and version, not the kind.
|
/// Note equality and hash operations are only based on the name and canonical version, not the
|
||||||
|
/// kind.
|
||||||
#[derive(Debug, Clone, Eq)]
|
#[derive(Debug, Clone, Eq)]
|
||||||
#[allow(clippy::large_enum_variant)]
|
#[allow(clippy::large_enum_variant)]
|
||||||
pub enum LocalDist {
|
pub enum LocalDist {
|
||||||
Cached(CachedDist),
|
Cached(CachedDist, CanonicalVersion),
|
||||||
Installed(InstalledDist),
|
Installed(InstalledDist, CanonicalVersion),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LocalDist {
|
||||||
|
fn canonical_version(&self) -> &CanonicalVersion {
|
||||||
|
match self {
|
||||||
|
Self::Cached(_, version) => version,
|
||||||
|
Self::Installed(_, version) => version,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Name for LocalDist {
|
impl Name for LocalDist {
|
||||||
fn name(&self) -> &PackageName {
|
fn name(&self) -> &PackageName {
|
||||||
match self {
|
match self {
|
||||||
Self::Cached(dist) => dist.name(),
|
Self::Cached(dist, _) => dist.name(),
|
||||||
Self::Installed(dist) => dist.name(),
|
Self::Installed(dist, _) => dist.name(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -28,33 +40,53 @@ impl Name for LocalDist {
|
||||||
impl InstalledMetadata for LocalDist {
|
impl InstalledMetadata for LocalDist {
|
||||||
fn installed_version(&self) -> InstalledVersion {
|
fn installed_version(&self) -> InstalledVersion {
|
||||||
match self {
|
match self {
|
||||||
Self::Cached(dist) => dist.installed_version(),
|
Self::Cached(dist, _) => dist.installed_version(),
|
||||||
Self::Installed(dist) => dist.installed_version(),
|
Self::Installed(dist, _) => dist.installed_version(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<CachedDist> for LocalDist {
|
impl From<CachedDist> for LocalDist {
|
||||||
fn from(dist: CachedDist) -> Self {
|
fn from(dist: CachedDist) -> Self {
|
||||||
Self::Cached(dist)
|
let version = CanonicalVersion::from(dist.installed_version());
|
||||||
|
Self::Cached(dist, version)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<InstalledDist> for LocalDist {
|
impl From<InstalledDist> for LocalDist {
|
||||||
fn from(dist: InstalledDist) -> Self {
|
fn from(dist: InstalledDist) -> Self {
|
||||||
Self::Installed(dist)
|
let version = CanonicalVersion::from(dist.installed_version());
|
||||||
|
Self::Installed(dist, version)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Hash for LocalDist {
|
impl Hash for LocalDist {
|
||||||
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
||||||
self.name().hash(state);
|
self.name().hash(state);
|
||||||
self.installed_version().hash(state);
|
self.canonical_version().hash(state);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PartialEq for LocalDist {
|
impl PartialEq for LocalDist {
|
||||||
fn eq(&self, other: &Self) -> bool {
|
fn eq(&self, other: &Self) -> bool {
|
||||||
self.name() == other.name() && self.installed_version() == other.installed_version()
|
self.name() == other.name() && self.canonical_version() == other.canonical_version()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Like [`InstalledVersion`], but with [`CanonicalUrl`] to ensure robust URL comparisons.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub enum CanonicalVersion {
|
||||||
|
Version(Version),
|
||||||
|
Url(CanonicalUrl, Version),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<InstalledVersion<'_>> for CanonicalVersion {
|
||||||
|
fn from(installed_version: InstalledVersion<'_>) -> Self {
|
||||||
|
match installed_version {
|
||||||
|
InstalledVersion::Version(version) => Self::Version(version.clone()),
|
||||||
|
InstalledVersion::Url(url, version) => {
|
||||||
|
Self::Url(CanonicalUrl::new(url), version.clone())
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue