From 8adf4a8977060ecc1284a589fc1f420c788f6dcb Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Fri, 31 Jan 2025 16:29:46 -0500 Subject: [PATCH] Migrate from `urlencoding` to `percent-encoding` (#11144) ## Summary This lets us drop a dependency entirely. `percent-encoding` is used by `url` and so is already in the graph, whereas `urlencoding` isn't used by anything else. --- Cargo.lock | 16 +++++----------- Cargo.toml | 2 +- crates/uv-auth/Cargo.toml | 9 ++++----- crates/uv-auth/src/credentials.rs | 6 ++++-- crates/uv-cache-key/Cargo.toml | 2 +- crates/uv-cache-key/src/canonical_url.rs | 3 ++- crates/uv-client/Cargo.toml | 2 +- crates/uv-client/src/html.rs | 5 +++-- crates/uv-distribution-types/Cargo.toml | 2 +- crates/uv-distribution-types/src/error.rs | 2 +- crates/uv-distribution-types/src/lib.rs | 4 ++-- crates/uv-fs/Cargo.toml | 5 ++--- crates/uv-fs/src/path.rs | 4 +++- 13 files changed, 30 insertions(+), 32 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 62b472a79..162c03e7f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4440,12 +4440,6 @@ dependencies = [ "serde", ] -[[package]] -name = "urlencoding" -version = "2.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" - [[package]] name = "usvg" version = "0.29.0" @@ -4619,6 +4613,7 @@ dependencies = [ "futures", "http", "insta", + "percent-encoding", "reqwest", "reqwest-middleware", "rust-netrc", @@ -4628,7 +4623,6 @@ dependencies = [ "tokio", "tracing", "url", - "urlencoding", "uv-once-map", "uv-static", "wiremock", @@ -4771,9 +4765,9 @@ version = "0.0.1" dependencies = [ "hex", "memchr", + "percent-encoding", "seahash", "url", - "urlencoding", ] [[package]] @@ -4822,6 +4816,7 @@ dependencies = [ "insta", "itertools 0.14.0", "jiff", + "percent-encoding", "reqwest", "reqwest-middleware", "reqwest-retry", @@ -4836,7 +4831,6 @@ dependencies = [ "tokio-util", "tracing", "url", - "urlencoding", "uv-auth", "uv-cache", "uv-cache-key", @@ -5048,6 +5042,7 @@ dependencies = [ "itertools 0.14.0", "jiff", "owo-colors", + "percent-encoding", "petgraph", "rkyv", "rustc-hash", @@ -5057,7 +5052,6 @@ dependencies = [ "thiserror 2.0.11", "tracing", "url", - "urlencoding", "uv-auth", "uv-cache-info", "uv-cache-key", @@ -5110,13 +5104,13 @@ dependencies = [ "fs2", "junction", "path-slash", + "percent-encoding", "rustix", "schemars", "serde", "tempfile", "tokio", "tracing", - "urlencoding", "winsafe 0.0.22", ] diff --git a/Cargo.toml b/Cargo.toml index 03b126bc1..8aa58cad9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -128,6 +128,7 @@ nix = { version = "0.29.0" } owo-colors = { version = "4.1.0" } path-slash = { version = "0.2.1" } pathdiff = { version = "0.2.1" } +percent-encoding = { version = "2.3.1" } petgraph = { version = "0.7.1" } platform-info = { version = "2.0.3" } proc-macro2 = { version = "1.0.86" } @@ -176,7 +177,6 @@ tracing-tree = { version = "0.4.0" } unicode-width = { version = "0.1.13" } unscanny = { version = "0.1.0" } url = { version = "2.5.2", features = ["serde"] } -urlencoding = { version = "2.1.3" } version-ranges = { git = "https://github.com/astral-sh/pubgrub", rev = "648aa343486e5529953153781fc86025c73c4a61" } walkdir = { version = "2.5.0" } which = { version = "7.0.0", features = ["regex"] } diff --git a/crates/uv-auth/Cargo.toml b/crates/uv-auth/Cargo.toml index 24d75582b..218e2c390 100644 --- a/crates/uv-auth/Cargo.toml +++ b/crates/uv-auth/Cargo.toml @@ -11,12 +11,14 @@ workspace = true [dependencies] uv-once-map = { workspace = true } +uv-static = { workspace = true } anyhow = { workspace = true } async-trait = { workspace = true } base64 = { workspace = true } futures = { workspace = true } http = { workspace = true } +percent-encoding = { workspace = true } reqwest = { workspace = true } reqwest-middleware = { workspace = true } rust-netrc = { workspace = true } @@ -24,13 +26,10 @@ rustc-hash = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } url = { workspace = true } -urlencoding = { workspace = true } - -uv-static = { workspace = true } [dev-dependencies] +insta = { version = "1.40.0" } tempfile = { workspace = true } +test-log = { version = "0.2.16", features = ["trace"], default-features = false } tokio = { workspace = true } wiremock = { workspace = true } -insta = { version = "1.40.0" } -test-log = { version = "0.2.16", features = ["trace"], default-features = false } diff --git a/crates/uv-auth/src/credentials.rs b/crates/uv-auth/src/credentials.rs index 66ec4cbb5..cba5b264b 100644 --- a/crates/uv-auth/src/credentials.rs +++ b/crates/uv-auth/src/credentials.rs @@ -127,14 +127,16 @@ impl Credentials { None } else { Some( - urlencoding::decode(url.username()) + percent_encoding::percent_decode_str(url.username()) + .decode_utf8() .expect("An encoded username should always decode") .into_owned(), ) } .into(), password: url.password().map(|password| { - urlencoding::decode(password) + percent_encoding::percent_decode_str(password) + .decode_utf8() .expect("An encoded password should always decode") .into_owned() }), diff --git a/crates/uv-cache-key/Cargo.toml b/crates/uv-cache-key/Cargo.toml index fe8f915d6..a50f3ca65 100644 --- a/crates/uv-cache-key/Cargo.toml +++ b/crates/uv-cache-key/Cargo.toml @@ -19,6 +19,6 @@ workspace = true [dependencies] hex = { workspace = true } memchr = { workspace = true } +percent-encoding = { workspace = true } seahash = { workspace = true } url = { workspace = true } -urlencoding = { workspace = true } diff --git a/crates/uv-cache-key/src/canonical_url.rs b/crates/uv-cache-key/src/canonical_url.rs index 5da0e01f7..e0487fa7f 100644 --- a/crates/uv-cache-key/src/canonical_url.rs +++ b/crates/uv-cache-key/src/canonical_url.rs @@ -78,7 +78,8 @@ impl CanonicalUrl { .path_segments() .unwrap() .map(|segment| { - urlencoding::decode(segment) + percent_encoding::percent_decode_str(segment) + .decode_utf8() .unwrap_or(Cow::Borrowed(segment)) .into_owned() }) diff --git a/crates/uv-client/Cargo.toml b/crates/uv-client/Cargo.toml index 7bea5ab97..529e4afcc 100644 --- a/crates/uv-client/Cargo.toml +++ b/crates/uv-client/Cargo.toml @@ -38,6 +38,7 @@ html-escape = { workspace = true } http = { workspace = true } itertools = { workspace = true } jiff = { workspace = true } +percent-encoding = { workspace = true } reqwest = { workspace = true } reqwest-middleware = { workspace = true } reqwest-retry = { workspace = true } @@ -52,7 +53,6 @@ tokio = { workspace = true } tokio-util = { workspace = true } tracing = { workspace = true } url = { workspace = true } -urlencoding = { workspace = true } [dev-dependencies] anyhow = { workspace = true } diff --git a/crates/uv-client/src/html.rs b/crates/uv-client/src/html.rs index 5972a0ae8..5c27df26f 100644 --- a/crates/uv-client/src/html.rs +++ b/crates/uv-client/src/html.rs @@ -93,7 +93,7 @@ impl SimpleHtml { // Extract the hash, which should be in the fragment. let decoded = html_escape::decode_html_entities(href); let (path, hashes) = if let Some((path, fragment)) = decoded.split_once('#') { - let fragment = urlencoding::decode(fragment)?; + let fragment = percent_encoding::percent_decode_str(fragment).decode_utf8()?; ( path, if fragment.trim().is_empty() { @@ -131,7 +131,8 @@ impl SimpleHtml { let filename = filename.split('?').next().unwrap_or(filename); // Unquote the filename. - let filename = urlencoding::decode(filename) + let filename = percent_encoding::percent_decode_str(filename) + .decode_utf8() .map_err(|_| Error::UnsupportedFilename(filename.to_string()))?; // Extract the `requires-python` value, which should be set on the diff --git a/crates/uv-distribution-types/Cargo.toml b/crates/uv-distribution-types/Cargo.toml index 3f2dc8293..c2b5960af 100644 --- a/crates/uv-distribution-types/Cargo.toml +++ b/crates/uv-distribution-types/Cargo.toml @@ -34,6 +34,7 @@ fs-err = { workspace = true } itertools = { workspace = true } jiff = { workspace = true } owo-colors = { workspace = true } +percent-encoding = { workspace = true } petgraph = { workspace = true } rkyv = { workspace = true } rustc-hash = { workspace = true } @@ -43,5 +44,4 @@ serde_json = { workspace = true } thiserror = { workspace = true } tracing = { workspace = true } url = { workspace = true } -urlencoding = { workspace = true } version-ranges = { workspace = true } diff --git a/crates/uv-distribution-types/src/error.rs b/crates/uv-distribution-types/src/error.rs index 8c03d5b40..fc1c4f588 100644 --- a/crates/uv-distribution-types/src/error.rs +++ b/crates/uv-distribution-types/src/error.rs @@ -8,7 +8,7 @@ pub enum Error { Io(#[from] std::io::Error), #[error(transparent)] - Utf8(#[from] std::string::FromUtf8Error), + Utf8(#[from] std::str::Utf8Error), #[error(transparent)] WheelFilename(#[from] uv_distribution_filename::WheelFilenameError), diff --git a/crates/uv-distribution-types/src/lib.rs b/crates/uv-distribution-types/src/lib.rs index 8a7d5a2c8..3f911c2f8 100644 --- a/crates/uv-distribution-types/src/lib.rs +++ b/crates/uv-distribution-types/src/lib.rs @@ -923,7 +923,7 @@ impl RemoteSource for Url { let last = path_segments.last().expect("path segments is non-empty"); // Decode the filename, which may be percent-encoded. - let filename = urlencoding::decode(last)?; + let filename = percent_encoding::percent_decode_str(last).decode_utf8()?; Ok(filename) } @@ -943,7 +943,7 @@ impl RemoteSource for UrlString { .ok_or_else(|| Error::MissingPathSegments(self.to_string()))?; // Decode the filename, which may be percent-encoded. - let filename = urlencoding::decode(last)?; + let filename = percent_encoding::percent_decode_str(last).decode_utf8()?; Ok(filename) } diff --git a/crates/uv-fs/Cargo.toml b/crates/uv-fs/Cargo.toml index 8c58022ac..7d6a46d61 100644 --- a/crates/uv-fs/Cargo.toml +++ b/crates/uv-fs/Cargo.toml @@ -16,7 +16,6 @@ doctest = false workspace = true [dependencies] - cachedir = { workspace = true } dunce = { workspace = true } either = { workspace = true } @@ -24,12 +23,12 @@ encoding_rs_io = { workspace = true } fs-err = { workspace = true } fs2 = { workspace = true } path-slash = { workspace = true } +percent-encoding = { workspace = true } schemars = { workspace = true, optional = true } serde = { workspace = true, optional = true } -tokio = { workspace = true, optional = true} tempfile = { workspace = true } +tokio = { workspace = true, optional = true} tracing = { workspace = true } -urlencoding = { workspace = true } [target.'cfg(target_os = "windows")'.dependencies] winsafe = { workspace = true } diff --git a/crates/uv-fs/src/path.rs b/crates/uv-fs/src/path.rs index 6eaa63663..7c245e978 100644 --- a/crates/uv-fs/src/path.rs +++ b/crates/uv-fs/src/path.rs @@ -139,7 +139,9 @@ impl> PythonExt for T { /// On other platforms, this is a no-op. pub fn normalize_url_path(path: &str) -> Cow<'_, str> { // Apply percent-decoding to the URL. - let path = urlencoding::decode(path).unwrap_or(Cow::Borrowed(path)); + let path = percent_encoding::percent_decode_str(path) + .decode_utf8() + .unwrap_or(Cow::Borrowed(path)); // Return the path. if cfg!(windows) {