From 32ea636585595435c8d1764893cd7877d198c1da Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Fri, 5 Jul 2024 17:57:40 -0400 Subject: [PATCH] Preserve verbatim URLs for `--find-links` (#4838) Also gets rid of a lot of duplicated logic for `--find-links`. Closes https://github.com/astral-sh/uv/issues/4797 --- crates/distribution-types/src/index_url.rs | 161 ++++++++---------- crates/requirements-txt/src/lib.rs | 105 +++--------- crates/uv-client/src/flat_index.rs | 39 +++-- .../src/index/registry_wheel_index.rs | 13 +- crates/uv-requirements/src/specification.rs | 7 +- crates/uv/src/commands/pip/compile.rs | 2 +- crates/uv/tests/pip_compile.rs | 29 ++++ crates/uv/tests/show_settings.rs | 29 ++-- 8 files changed, 171 insertions(+), 214 deletions(-) diff --git a/crates/distribution-types/src/index_url.rs b/crates/distribution-types/src/index_url.rs index c6eaa42fe..1d9b7e120 100644 --- a/crates/distribution-types/src/index_url.rs +++ b/crates/distribution-types/src/index_url.rs @@ -1,7 +1,7 @@ use std::borrow::Cow; use std::fmt::{Display, Formatter}; use std::ops::Deref; -use std::path::{Path, PathBuf}; +use std::path::Path; use std::str::FromStr; use itertools::Either; @@ -9,8 +9,7 @@ use once_cell::sync::Lazy; use thiserror::Error; use url::{ParseError, Url}; -use pep508_rs::{expand_env_vars, split_scheme, strip_host, Scheme, VerbatimUrl, VerbatimUrlError}; -use uv_fs::normalize_url_path; +use pep508_rs::{VerbatimUrl, VerbatimUrlError}; use crate::Verbatim; @@ -91,6 +90,15 @@ impl Verbatim for IndexUrl { } } +impl From for IndexUrl { + fn from(location: FlatIndexLocation) -> Self { + match location { + FlatIndexLocation::Path(url) => Self::Path(url), + FlatIndexLocation::Url(url) => Self::Url(url), + } + } +} + /// An error that can occur when parsing an [`IndexUrl`]. #[derive(Error, Debug)] pub enum IndexUrlError { @@ -173,8 +181,8 @@ impl Deref for IndexUrl { /// Also known as `--find-links`. #[derive(Debug, Clone, Hash, Eq, PartialEq)] pub enum FlatIndexLocation { - Path(PathBuf), - Url(Url), + Path(VerbatimUrl), + Url(VerbatimUrl), } #[cfg(feature = "schemars")] @@ -197,6 +205,60 @@ impl schemars::JsonSchema for FlatIndexLocation { } } +impl FlatIndexLocation { + /// Return the raw URL for the `--find-links` index. + pub fn url(&self) -> &Url { + match self { + Self::Url(url) => url.raw(), + Self::Path(url) => url.raw(), + } + } + + /// Return the redacted URL for the `--find-links` index, omitting any sensitive credentials. + pub fn redacted(&self) -> Cow<'_, Url> { + let url = self.url(); + if url.username().is_empty() && url.password().is_none() { + Cow::Borrowed(url) + } else { + let mut url = url.clone(); + let _ = url.set_username(""); + let _ = url.set_password(None); + Cow::Owned(url) + } + } +} + +impl Display for FlatIndexLocation { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Self::Url(url) => Display::fmt(url, f), + Self::Path(url) => Display::fmt(url, f), + } + } +} + +impl Verbatim for FlatIndexLocation { + fn verbatim(&self) -> Cow<'_, str> { + match self { + Self::Url(url) => url.verbatim(), + Self::Path(url) => url.verbatim(), + } + } +} + +impl FromStr for FlatIndexLocation { + type Err = IndexUrlError; + + fn from_str(s: &str) -> Result { + let url = if let Ok(path) = Path::new(s).canonicalize() { + VerbatimUrl::from_path(path)? + } else { + VerbatimUrl::parse_url(s)? + }; + Ok(Self::from(url.with_given(s))) + } +} + impl serde::ser::Serialize for FlatIndexLocation { fn serialize(&self, serializer: S) -> Result where @@ -216,60 +278,12 @@ impl<'de> serde::de::Deserialize<'de> for FlatIndexLocation { } } -impl FromStr for FlatIndexLocation { - type Err = url::ParseError; - - /// Parse a raw string for a `--find-links` entry, which could be a URL or a local path. - /// - /// For example: - /// - `file:///home/ferris/project/scripts/...` - /// - `file:../ferris/` - /// - `../ferris/` - /// - `https://download.pytorch.org/whl/torch_stable.html` - fn from_str(s: &str) -> Result { - // Expand environment variables. - let expanded = expand_env_vars(s); - - // Parse the expanded path. - if let Some((scheme, path)) = split_scheme(&expanded) { - match Scheme::parse(scheme) { - // Ex) `file:///home/ferris/project/scripts/...`, `file://localhost/home/ferris/project/scripts/...`, or `file:../ferris/` - Some(Scheme::File) => { - // Strip the leading slashes, along with the `localhost` host, if present. - let path = strip_host(path); - - // Transform, e.g., `/C:/Users/ferris/wheel-0.42.0.tar.gz` to `C:\Users\ferris\wheel-0.42.0.tar.gz`. - let path = normalize_url_path(path); - - let path = PathBuf::from(path.as_ref()); - Ok(Self::Path(path)) - } - - // Ex) `https://download.pytorch.org/whl/torch_stable.html` - Some(_) => { - let url = Url::parse(expanded.as_ref())?; - Ok(Self::Url(url)) - } - - // Ex) `C:\Users\ferris\wheel-0.42.0.tar.gz` - None => { - let path = PathBuf::from(expanded.as_ref()); - Ok(Self::Path(path)) - } - } +impl From for FlatIndexLocation { + fn from(url: VerbatimUrl) -> Self { + if url.scheme() == "file" { + Self::Path(url) } else { - // Ex) `../ferris/` - let path = PathBuf::from(expanded.as_ref()); - Ok(Self::Path(path)) - } - } -} - -impl Display for FlatIndexLocation { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self { - Self::Path(path) => Display::fmt(&path.display(), f), - Self::Url(url) => Display::fmt(url, f), + Self::Url(url) } } } @@ -387,7 +401,7 @@ impl<'a> IndexLocations { .map(IndexUrl::url) .chain(self.flat_index.iter().filter_map(|index| match index { FlatIndexLocation::Path(_) => None, - FlatIndexLocation::Url(url) => Some(url), + FlatIndexLocation::Url(url) => Some(url.raw()), })) } } @@ -459,32 +473,3 @@ impl From for IndexUrls { } } } - -#[cfg(test)] -#[cfg(unix)] -mod test { - use super::*; - - #[test] - fn parse_find_links() { - assert_eq!( - FlatIndexLocation::from_str("file:///home/ferris/project/scripts/...").unwrap(), - FlatIndexLocation::Path(PathBuf::from("/home/ferris/project/scripts/...")) - ); - assert_eq!( - FlatIndexLocation::from_str("file:../ferris/").unwrap(), - FlatIndexLocation::Path(PathBuf::from("../ferris/")) - ); - assert_eq!( - FlatIndexLocation::from_str("../ferris/").unwrap(), - FlatIndexLocation::Path(PathBuf::from("../ferris/")) - ); - assert_eq!( - FlatIndexLocation::from_str("https://download.pytorch.org/whl/torch_stable.html") - .unwrap(), - FlatIndexLocation::Url( - Url::parse("https://download.pytorch.org/whl/torch_stable.html").unwrap() - ) - ); - } -} diff --git a/crates/requirements-txt/src/lib.rs b/crates/requirements-txt/src/lib.rs index 34064a859..e69964a40 100644 --- a/crates/requirements-txt/src/lib.rs +++ b/crates/requirements-txt/src/lib.rs @@ -45,15 +45,13 @@ use unscanny::{Pattern, Scanner}; use url::Url; use distribution_types::{UnresolvedRequirement, UnresolvedRequirementSpecification}; -use pep508_rs::{ - expand_env_vars, split_scheme, strip_host, Pep508Error, RequirementOrigin, Scheme, VerbatimUrl, -}; +use pep508_rs::{expand_env_vars, Pep508Error, RequirementOrigin, VerbatimUrl}; use pypi_types::{Requirement, VerbatimParsedUrl}; #[cfg(feature = "http")] use uv_client::BaseClient; use uv_client::BaseClientBuilder; use uv_configuration::{NoBinary, NoBuild, PackageNameSpecifier}; -use uv_fs::{normalize_url_path, Simplified}; +use uv_fs::Simplified; use uv_warnings::warn_user; use crate::requirement::EditableError; @@ -84,7 +82,7 @@ enum RequirementsTxtStatement { /// `--extra-index-url` ExtraIndexUrl(VerbatimUrl), /// `--find-links` - FindLinks(FindLink), + FindLinks(VerbatimUrl), /// `--no-index` NoIndex, /// `--no-binary` @@ -93,73 +91,6 @@ enum RequirementsTxtStatement { OnlyBinary(NoBuild), } -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum FindLink { - Path(PathBuf), - Url(Url), -} - -impl FindLink { - /// Parse a raw string for a `--find-links` entry, which could be a URL or a local path. - /// - /// For example: - /// - `file:///home/ferris/project/scripts/...` - /// - `file:../ferris/` - /// - `../ferris/` - /// - `https://download.pytorch.org/whl/torch_stable.html` - pub fn parse(given: &str, working_dir: impl AsRef) -> Result { - // Expand environment variables. - let expanded = expand_env_vars(given); - - if let Some((scheme, path)) = split_scheme(&expanded) { - match Scheme::parse(scheme) { - // Ex) `file:///home/ferris/project/scripts/...`, `file://localhost/home/ferris/project/scripts/...`, or `file:../ferris/` - Some(Scheme::File) => { - // Strip the leading slashes, along with the `localhost` host, if present. - let path = strip_host(path); - - // Transform, e.g., `/C:/Users/ferris/wheel-0.42.0.tar.gz` to `C:\Users\ferris\wheel-0.42.0.tar.gz`. - let path = normalize_url_path(path); - - let path = PathBuf::from(path.as_ref()); - let path = if path.is_absolute() { - path - } else { - working_dir.as_ref().join(path) - }; - Ok(Self::Path(path)) - } - - // Ex) `https://download.pytorch.org/whl/torch_stable.html` - Some(_) => { - let url = Url::parse(&expanded)?; - Ok(Self::Url(url)) - } - - // Ex) `C:/Users/ferris/wheel-0.42.0.tar.gz` - _ => { - let path = PathBuf::from(expanded.as_ref()); - let path = if path.is_absolute() { - path - } else { - working_dir.as_ref().join(path) - }; - Ok(Self::Path(path)) - } - } - } else { - // Ex) `../ferris/` - let path = PathBuf::from(expanded.as_ref()); - let path = if path.is_absolute() { - path - } else { - working_dir.as_ref().join(path) - }; - Ok(Self::Path(path)) - } - } -} - /// A [Requirement] with additional metadata from the `requirements.txt`, currently only hashes but in /// the future also editable and similar information. #[derive(Debug, Clone, Eq, PartialEq, Hash)] @@ -212,7 +143,7 @@ pub struct RequirementsTxt { /// The extra index URLs, specified with `--extra-index-url`. pub extra_index_urls: Vec, /// The find links locations, specified with `--find-links`. - pub find_links: Vec, + pub find_links: Vec, /// Whether to ignore the index, specified with `--no-index`. pub no_index: bool, /// Whether to disallow wheels, specified with `--no-binary`. @@ -445,8 +376,8 @@ impl RequirementsTxt { RequirementsTxtStatement::ExtraIndexUrl(url) => { data.extra_index_urls.push(url); } - RequirementsTxtStatement::FindLinks(path_or_url) => { - data.find_links.push(path_or_url); + RequirementsTxtStatement::FindLinks(url) => { + data.find_links.push(url); } RequirementsTxtStatement::NoIndex => { data.no_index = true; @@ -592,16 +523,26 @@ fn parse_entry( } else if s.eat_if("--no-index") { RequirementsTxtStatement::NoIndex } else if s.eat_if("--find-links") || s.eat_if("-f") { - let path_or_url = parse_value(content, s, |c: char| !['\n', '\r', '#'].contains(&c))?; - let path_or_url = FindLink::parse(path_or_url, working_dir).map_err(|err| { - RequirementsTxtParserError::Url { + let given = parse_value(content, s, |c: char| !['\n', '\r', '#'].contains(&c))?; + let expanded = expand_env_vars(given); + let url = if let Ok(path) = Path::new(expanded.as_ref()).canonicalize() { + VerbatimUrl::from_path(path).map_err(|err| RequirementsTxtParserError::VerbatimUrl { source: err, - url: path_or_url.to_string(), + url: given.to_string(), start, end: s.cursor(), - } - })?; - RequirementsTxtStatement::FindLinks(path_or_url) + })? + } else { + VerbatimUrl::parse_url(expanded.as_ref()).map_err(|err| { + RequirementsTxtParserError::Url { + source: err, + url: given.to_string(), + start, + end: s.cursor(), + } + })? + }; + RequirementsTxtStatement::FindLinks(url.with_given(given)) } else if s.eat_if("--no-binary") { let given = parse_value(content, s, |c: char| !['\n', '\r', '#'].contains(&c))?; let specifier = PackageNameSpecifier::from_str(given).map_err(|err| { diff --git a/crates/uv-client/src/flat_index.rs b/crates/uv-client/src/flat_index.rs index 72ddadd3c..e88de116e 100644 --- a/crates/uv-client/src/flat_index.rs +++ b/crates/uv-client/src/flat_index.rs @@ -1,4 +1,4 @@ -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use futures::{FutureExt, StreamExt}; use reqwest::Response; @@ -7,7 +7,6 @@ use url::Url; use distribution_filename::DistFilename; use distribution_types::{File, FileLocation, FlatIndexLocation, IndexUrl}; -use pep508_rs::VerbatimUrl; use uv_cache::{Cache, CacheBucket}; use crate::cached_client::{CacheControl, CachedClientError}; @@ -16,6 +15,9 @@ use crate::{Connectivity, Error, ErrorKind, RegistryClient}; #[derive(Debug, thiserror::Error)] pub enum FlatIndexError { + #[error("Expected a file URL, but received: {0}")] + NonFileUrl(Url), + #[error("Failed to read `--find-links` directory: {0}")] FindLinksDirectory(PathBuf, #[source] FindLinksDirectoryError), @@ -97,12 +99,17 @@ impl<'a> FlatIndexClient<'a> { let mut fetches = futures::stream::iter(indexes) .map(|index| async move { let entries = match index { - FlatIndexLocation::Path(path) => Self::read_from_directory(path) - .map_err(|err| FlatIndexError::FindLinksDirectory(path.clone(), err))?, + FlatIndexLocation::Path(url) => { + let path = url + .to_file_path() + .map_err(|()| FlatIndexError::NonFileUrl(url.to_url()))?; + Self::read_from_directory(&path, index) + .map_err(|err| FlatIndexError::FindLinksDirectory(path.clone(), err))? + } FlatIndexLocation::Url(url) => self - .read_from_url(url) + .read_from_url(url, index) .await - .map_err(|err| FlatIndexError::FindLinksUrl(url.clone(), err))?, + .map_err(|err| FlatIndexError::FindLinksUrl(url.to_url(), err))?, }; if entries.is_empty() { warn!("No packages found in `--find-links` entry: {}", index); @@ -126,7 +133,11 @@ impl<'a> FlatIndexClient<'a> { } /// Read a flat remote index from a `--find-links` URL. - async fn read_from_url(&self, url: &Url) -> Result { + async fn read_from_url( + &self, + url: &Url, + flat_index: &FlatIndexLocation, + ) -> Result { let cache_entry = self.cache.entry( CacheBucket::FlatIndex, "html", @@ -189,14 +200,13 @@ impl<'a> FlatIndexClient<'a> { .await; match response { Ok(files) => { - let index_url = IndexUrl::Url(VerbatimUrl::from_url(url.clone())); let files = files .into_iter() .filter_map(|file| { Some(( DistFilename::try_from_normalized_filename(&file.filename)?, file, - index_url.clone(), + IndexUrl::from(flat_index.clone()), )) }) .collect(); @@ -210,11 +220,10 @@ impl<'a> FlatIndexClient<'a> { } /// Read a flat remote index from a `--find-links` directory. - fn read_from_directory(path: &PathBuf) -> Result { - // Absolute paths are required for the URL conversion. - let path = fs_err::canonicalize(path)?; - let index_url = IndexUrl::Path(VerbatimUrl::from_path(&path)?); - + fn read_from_directory( + path: &Path, + flat_index: &FlatIndexLocation, + ) -> Result { let mut dists = Vec::new(); for entry in fs_err::read_dir(path)? { let entry = entry?; @@ -249,7 +258,7 @@ impl<'a> FlatIndexClient<'a> { ); continue; }; - dists.push((filename, file, index_url.clone())); + dists.push((filename, file, IndexUrl::from(flat_index.clone()))); } Ok(FlatIndexEntries::from_entries(dists)) } diff --git a/crates/uv-distribution/src/index/registry_wheel_index.rs b/crates/uv-distribution/src/index/registry_wheel_index.rs index 74eadf6e7..76f97245f 100644 --- a/crates/uv-distribution/src/index/registry_wheel_index.rs +++ b/crates/uv-distribution/src/index/registry_wheel_index.rs @@ -3,9 +3,8 @@ use std::collections::BTreeMap; use rustc_hash::FxHashMap; -use distribution_types::{CachedRegistryDist, FlatIndexLocation, Hashed, IndexLocations, IndexUrl}; +use distribution_types::{CachedRegistryDist, Hashed, IndexLocations, IndexUrl}; use pep440_rs::Version; -use pep508_rs::VerbatimUrl; use platform_tags::Tags; use uv_cache::{Cache, CacheBucket, WheelCache}; use uv_fs::{directories, files, symlinks}; @@ -91,15 +90,7 @@ impl<'a> RegistryWheelIndex<'a> { // Collect into owned `IndexUrl`. let flat_index_urls: Vec = index_locations .flat_index() - .filter_map(|flat_index| match flat_index { - FlatIndexLocation::Path(path) => { - let path = fs_err::canonicalize(path).ok()?; - Some(IndexUrl::Path(VerbatimUrl::from_path(path).ok()?)) - } - FlatIndexLocation::Url(url) => { - Some(IndexUrl::Url(VerbatimUrl::from_url(url.clone()))) - } - }) + .map(|flat_index| IndexUrl::from(flat_index.clone())) .collect(); for index_url in index_locations.indexes().chain(flat_index_urls.iter()) { diff --git a/crates/uv-requirements/src/specification.rs b/crates/uv-requirements/src/specification.rs index 28ee52eed..154471426 100644 --- a/crates/uv-requirements/src/specification.rs +++ b/crates/uv-requirements/src/specification.rs @@ -40,7 +40,7 @@ use distribution_types::{ use pep508_rs::{UnnamedRequirement, UnnamedRequirementUrl}; use pypi_types::Requirement; use pypi_types::VerbatimParsedUrl; -use requirements_txt::{FindLink, RequirementsTxt, RequirementsTxtRequirement}; +use requirements_txt::{RequirementsTxt, RequirementsTxtRequirement}; use uv_client::BaseClientBuilder; use uv_configuration::{NoBinary, NoBuild}; use uv_distribution::pyproject::PyProjectToml; @@ -143,10 +143,7 @@ impl RequirementsSpecification { find_links: requirements_txt .find_links .into_iter() - .map(|link| match link { - FindLink::Url(url) => FlatIndexLocation::Url(url), - FindLink::Path(path) => FlatIndexLocation::Path(path), - }) + .map(FlatIndexLocation::from) .collect(), no_binary: requirements_txt.no_binary, no_build: requirements_txt.only_binary, diff --git a/crates/uv/src/commands/pip/compile.rs b/crates/uv/src/commands/pip/compile.rs index 44fee7c41..ce67844d5 100644 --- a/crates/uv/src/commands/pip/compile.rs +++ b/crates/uv/src/commands/pip/compile.rs @@ -411,7 +411,7 @@ pub(crate) async fn pip_compile( // If necessary, include the `--find-links` locations. if include_find_links { for flat_index in index_locations.flat_index() { - writeln!(writer, "--find-links {flat_index}")?; + writeln!(writer, "--find-links {}", flat_index.verbatim())?; wrote_preamble = true; } } diff --git a/crates/uv/tests/pip_compile.rs b/crates/uv/tests/pip_compile.rs index 4c7465233..49f731488 100644 --- a/crates/uv/tests/pip_compile.rs +++ b/crates/uv/tests/pip_compile.rs @@ -5083,6 +5083,35 @@ fn emit_find_links() -> Result<()> { Ok(()) } +/// Emit the `--find-links` locations using a relative path in a requirements file. The verbatim +/// path should be preserved. +#[test] +fn emit_find_links_relative() -> Result<()> { + let context = TestContext::new("3.12"); + let requirements_in = context.temp_dir.child("requirements.in"); + requirements_in.write_str("-f ./\niniconfig")?; + + uv_snapshot!(context.pip_compile() + .arg("requirements.in") + .arg("--emit-find-links"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + # This file was autogenerated by uv via the following command: + # uv pip compile --cache-dir [CACHE_DIR] requirements.in --emit-find-links + --find-links ./ + + iniconfig==2.0.0 + # via -r requirements.in + + ----- stderr ----- + Resolved 1 package in [TIME] + "### + ); + + Ok(()) +} + /// Emit the `--no-binary` and `--only-binary` options. #[test] fn emit_build_options() -> Result<()> { diff --git a/crates/uv/tests/show_settings.rs b/crates/uv/tests/show_settings.rs index 2d8b4897c..525e43a15 100644 --- a/crates/uv/tests/show_settings.rs +++ b/crates/uv/tests/show_settings.rs @@ -1287,20 +1287,25 @@ fn resolve_find_links() -> anyhow::Result<()> { extra_index: [], flat_index: [ Url( - Url { - scheme: "https", - cannot_be_a_base: false, - username: "", - password: None, - host: Some( - Domain( - "download.pytorch.org", + VerbatimUrl { + url: Url { + scheme: "https", + cannot_be_a_base: false, + username: "", + password: None, + host: Some( + Domain( + "download.pytorch.org", + ), ), + port: None, + path: "/whl/torch_stable.html", + query: None, + fragment: None, + }, + given: Some( + "https://download.pytorch.org/whl/torch_stable.html", ), - port: None, - path: "/whl/torch_stable.html", - query: None, - fragment: None, }, ), ],