From 684f790d5d5ecdc378f028a52067dd3956b81b74 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Tue, 2 Apr 2024 20:24:29 -0400 Subject: [PATCH] Preserve `.git` suffixes and casing in Git dependencies (#2789) ## Summary I noticed in #2769 that I was now stripping `.git` suffixes from Git URLs after resolving to a precise commit. This PR cleans up the internal caching to use a better canonical representation: a `RepositoryUrl` along with a `GitReference`, instead of a `GitUrl` which can contain non-canonical data. This gives us both better fidelity (preserving the `.git`, along with any casing that the user provided when defining the URL) and is overall cleaner and more robust. --- crates/cache-key/src/canonical_url.rs | 6 ++ crates/distribution-types/src/direct_url.rs | 2 +- crates/uv-distribution/src/git.rs | 98 ++++++++++++++------- crates/uv-git/src/git.rs | 30 ++++++- crates/uv-git/src/lib.rs | 16 +--- crates/uv-git/src/source.rs | 2 +- crates/uv/tests/pip_sync.rs | 4 +- 7 files changed, 104 insertions(+), 54 deletions(-) diff --git a/crates/cache-key/src/canonical_url.rs b/crates/cache-key/src/canonical_url.rs index 4157fe3f2..ded73b4ac 100644 --- a/crates/cache-key/src/canonical_url.rs +++ b/crates/cache-key/src/canonical_url.rs @@ -165,6 +165,12 @@ impl Deref for RepositoryUrl { } } +impl std::fmt::Display for RepositoryUrl { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(&self.0, f) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/distribution-types/src/direct_url.rs b/crates/distribution-types/src/direct_url.rs index ac7a6060f..297f5d090 100644 --- a/crates/distribution-types/src/direct_url.rs +++ b/crates/distribution-types/src/direct_url.rs @@ -164,7 +164,7 @@ impl TryFrom<&DirectGitUrl> for pypi_types::DirectUrl { vcs_info: pypi_types::VcsInfo { vcs: pypi_types::VcsKind::Git, commit_id: value.url.precise().as_ref().map(ToString::to_string), - requested_revision: value.url.reference().map(ToString::to_string), + requested_revision: value.url.reference().as_str().map(ToString::to_string), }, subdirectory: value.subdirectory.clone(), }) diff --git a/crates/uv-distribution/src/git.rs b/crates/uv-distribution/src/git.rs index 38b46bef2..8a719845a 100644 --- a/crates/uv-distribution/src/git.rs +++ b/crates/uv-distribution/src/git.rs @@ -8,11 +8,11 @@ use rustc_hash::FxHashMap; use tracing::debug; use url::Url; -use cache_key::CanonicalUrl; +use cache_key::{CanonicalUrl, RepositoryUrl}; use distribution_types::DirectGitUrl; use uv_cache::{Cache, CacheBucket}; use uv_fs::LockedFile; -use uv_git::{Fetch, GitSource, GitUrl}; +use uv_git::{Fetch, GitReference, GitSha, GitSource, GitUrl}; use crate::error::Error; use crate::reporter::Facade; @@ -24,7 +24,25 @@ use crate::Reporter; /// consistent across all invocations. (For example: if a Git URL refers to a branch, like `main`, /// then the resolved URL should always refer to the same commit across the lifetime of the /// process.) -static RESOLVED_GIT_REFS: Lazy>> = Lazy::new(Mutex::default); +static RESOLVED_GIT_REFS: Lazy>> = + Lazy::new(Mutex::default); + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +struct RepositoryReference { + /// The URL of the Git repository, with any query parameters and fragments removed. + url: RepositoryUrl, + /// The reference to the commit to use, which could be a branch, tag or revision. + reference: GitReference, +} + +impl RepositoryReference { + fn new(git: &GitUrl) -> Self { + Self { + url: RepositoryUrl::new(git.repository()), + reference: git.reference().clone(), + } + } +} /// Download a source distribution from a Git repository. pub(crate) async fn fetch_git_archive( @@ -40,10 +58,10 @@ pub(crate) async fn fetch_git_archive( fs::create_dir_all(&lock_dir) .await .map_err(Error::CacheWrite)?; - let canonical_url = CanonicalUrl::new(url); + let repository_url = RepositoryUrl::new(url); let _lock = LockedFile::acquire( - lock_dir.join(cache_key::digest(&canonical_url)), - &canonical_url, + lock_dir.join(cache_key::digest(&repository_url)), + &repository_url, ) .map_err(Error::CacheWrite)?; @@ -52,8 +70,9 @@ pub(crate) async fn fetch_git_archive( // Extract the resolved URL from the in-memory cache, to save a look-up in the fetch. let url = { let resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap(); - if let Some(resolved) = resolved_git_refs.get(&url) { - resolved.clone() + let reference = RepositoryReference::new(&url); + if let Some(resolved) = resolved_git_refs.get(&reference) { + url.with_precise(*resolved) } else { url } @@ -70,10 +89,12 @@ pub(crate) async fn fetch_git_archive( .map_err(Error::Git)?; // Insert the resolved URL into the in-memory cache. - { - let mut resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap(); - let precise = fetch.git().clone(); - resolved_git_refs.insert(url, precise); + if url.precise().is_none() { + if let Some(precise) = fetch.git().precise() { + let mut resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap(); + let reference = RepositoryReference::new(&url); + resolved_git_refs.insert(reference, precise); + } } Ok((fetch, subdirectory)) @@ -92,8 +113,7 @@ pub(crate) async fn resolve_precise( cache: &Cache, reporter: Option<&Arc>, ) -> Result, Error> { - let url = Url::from(CanonicalUrl::new(url)); - let DirectGitUrl { url, subdirectory } = DirectGitUrl::try_from(&url).map_err(Error::Git)?; + let DirectGitUrl { url, subdirectory } = DirectGitUrl::try_from(url).map_err(Error::Git)?; // If the Git reference already contains a complete SHA, short-circuit. if url.precise().is_some() { @@ -103,9 +123,10 @@ pub(crate) async fn resolve_precise( // If the Git reference is in the in-memory cache, return it. { let resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap(); - if let Some(precise) = resolved_git_refs.get(&url) { + let reference = RepositoryReference::new(&url); + if let Some(precise) = resolved_git_refs.get(&reference) { return Ok(Some(Url::from(DirectGitUrl { - url: precise.clone(), + url: url.with_precise(*precise), subdirectory, }))); } @@ -123,17 +144,18 @@ pub(crate) async fn resolve_precise( let fetch = tokio::task::spawn_blocking(move || source.fetch()) .await? .map_err(Error::Git)?; - let precise = fetch.into_git(); + let git = fetch.into_git(); // Insert the resolved URL into the in-memory cache. - { + if let Some(precise) = git.precise() { let mut resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap(); - resolved_git_refs.insert(url.clone(), precise.clone()); + let reference = RepositoryReference::new(&url); + resolved_git_refs.insert(reference, precise); } // Re-encode as a URL. Ok(Some(Url::from(DirectGitUrl { - url: precise, + url: git, subdirectory, }))) } @@ -153,7 +175,7 @@ pub fn is_same_reference<'a>(a: &'a Url, b: &'a Url) -> bool { fn is_same_reference_impl<'a>( a: &'a Url, b: &'a Url, - resolved_refs: &FxHashMap, + resolved_refs: &FxHashMap, ) -> bool { // Convert `a` to a Git URL, if possible. let Ok(a_git) = DirectGitUrl::try_from(&Url::from(CanonicalUrl::new(a))) else { @@ -170,13 +192,19 @@ fn is_same_reference_impl<'a>( return false; } + // Convert `a` to a repository URL. + let a_ref = RepositoryReference::new(&a_git.url); + + // Convert `b` to a repository URL. + let b_ref = RepositoryReference::new(&b_git.url); + // The URLs must refer to the same repository. - if a_git.url.repository() != b_git.url.repository() { + if a_ref.url != b_ref.url { return false; } // If the URLs have the same tag, they refer to the same commit. - if a_git.url.reference() == b_git.url.reference() { + if a_ref.reference == b_ref.reference { return true; } @@ -184,7 +212,7 @@ fn is_same_reference_impl<'a>( let Some(a_precise) = a_git .url .precise() - .or_else(|| resolved_refs.get(&a_git.url).and_then(GitUrl::precise)) + .or_else(|| resolved_refs.get(&a_ref).copied()) else { return false; }; @@ -192,7 +220,7 @@ fn is_same_reference_impl<'a>( let Some(b_precise) = b_git .url .precise() - .or_else(|| resolved_refs.get(&b_git.url).and_then(GitUrl::precise)) + .or_else(|| resolved_refs.get(&b_ref).copied()) else { return false; }; @@ -204,9 +232,11 @@ fn is_same_reference_impl<'a>( mod tests { use anyhow::Result; use rustc_hash::FxHashMap; + use std::str::FromStr; use url::Url; - use uv_git::GitUrl; + use crate::git::RepositoryReference; + use uv_git::{GitSha, GitUrl}; #[test] fn same_reference() -> Result<()> { @@ -244,10 +274,10 @@ mod tests { )?; let mut resolved_refs = FxHashMap::default(); resolved_refs.insert( - GitUrl::try_from(Url::parse("https://example.com/MyProject@main")?)?, - GitUrl::try_from(Url::parse( - "https://example.com/MyProject@164a8735b081663fede48c5041667b194da15d25", - )?)?, + RepositoryReference::new(&GitUrl::try_from(Url::parse( + "https://example.com/MyProject@main", + )?)?), + GitSha::from_str("164a8735b081663fede48c5041667b194da15d25")?, ); assert!(super::is_same_reference_impl(&a, &b, &resolved_refs)); @@ -258,10 +288,10 @@ mod tests { )?; let mut resolved_refs = FxHashMap::default(); resolved_refs.insert( - GitUrl::try_from(Url::parse("https://example.com/MyProject@main")?)?, - GitUrl::try_from(Url::parse( - "https://example.com/MyProject@f2c9e88f3ec9526bbcec68d150b176d96a750aba", - )?)?, + RepositoryReference::new(&GitUrl::try_from(Url::parse( + "https://example.com/MyProject@main", + )?)?), + GitSha::from_str("f2c9e88f3ec9526bbcec68d150b176d96a750aba")?, ); assert!(!super::is_same_reference_impl(&a, &b, &resolved_refs)); diff --git a/crates/uv-git/src/git.rs b/crates/uv-git/src/git.rs index 05e63f2ad..faddeefa1 100644 --- a/crates/uv-git/src/git.rs +++ b/crates/uv-git/src/git.rs @@ -24,7 +24,7 @@ const CHECKOUT_READY_LOCK: &str = ".ok"; /// A reference to commit or commit-ish. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) enum GitReference { +pub enum GitReference { /// From a branch. #[allow(unused)] Branch(String), @@ -66,8 +66,29 @@ impl GitReference { } } - /// Views the short ID as a `str`. - pub(crate) fn as_str(&self) -> &str { + pub fn precise(&self) -> Option<&str> { + match self { + Self::FullCommit(rev) => Some(rev), + Self::ShortCommit(rev) => Some(rev), + _ => None, + } + } + + /// Converts the [`GitReference`] to a `str`. + pub fn as_str(&self) -> Option<&str> { + match self { + Self::Branch(rev) => Some(rev), + Self::Tag(rev) => Some(rev), + Self::BranchOrTag(rev) => Some(rev), + Self::FullCommit(rev) => Some(rev), + Self::ShortCommit(rev) => Some(rev), + Self::Ref(rev) => Some(rev), + Self::DefaultBranch => None, + } + } + + /// Converts the [`GitReference`] to a `str` that can be used as a revision. + pub(crate) fn as_rev(&self) -> &str { match self { Self::Branch(rev) | Self::Tag(rev) @@ -79,6 +100,7 @@ impl GitReference { } } + /// Returns the kind of this reference. pub(crate) fn kind_str(&self) -> &str { match self { Self::Branch(_) => "branch", @@ -1034,7 +1056,7 @@ pub(crate) fn fetch( format!( "failed to fetch {} `{}`", reference.kind_str(), - reference.as_str() + reference.as_rev() ) }), } diff --git a/crates/uv-git/src/lib.rs b/crates/uv-git/src/lib.rs index 4fb25a6ca..084571dd2 100644 --- a/crates/uv-git/src/lib.rs +++ b/crates/uv-git/src/lib.rs @@ -1,7 +1,7 @@ use std::str::FromStr; use url::Url; -use crate::git::GitReference; +pub use crate::git::GitReference; pub use crate::sha::GitSha; pub use crate::source::{Fetch, GitSource, Reporter}; @@ -24,7 +24,7 @@ pub struct GitUrl { impl GitUrl { #[must_use] - pub(crate) fn with_precise(mut self, precise: GitSha) -> Self { + pub fn with_precise(mut self, precise: GitSha) -> Self { self.precise = Some(precise); self } @@ -35,16 +35,8 @@ impl GitUrl { } /// Return the reference to the commit to use, which could be a branch, tag or revision. - pub fn reference(&self) -> Option<&str> { - match &self.reference { - GitReference::Branch(rev) - | GitReference::Tag(rev) - | GitReference::BranchOrTag(rev) - | GitReference::Ref(rev) - | GitReference::FullCommit(rev) - | GitReference::ShortCommit(rev) => Some(rev), - GitReference::DefaultBranch => None, - } + pub fn reference(&self) -> &GitReference { + &self.reference } /// Returns `true` if the reference is a full commit. diff --git a/crates/uv-git/src/source.rs b/crates/uv-git/src/source.rs index cd16a314f..f1061331a 100644 --- a/crates/uv-git/src/source.rs +++ b/crates/uv-git/src/source.rs @@ -70,7 +70,7 @@ impl GitSource { // Report the checkout operation to the reporter. let task = self.reporter.as_ref().map(|reporter| { - reporter.on_checkout_start(remote.url(), self.git.reference.as_str()) + reporter.on_checkout_start(remote.url(), self.git.reference.as_rev()) }); let (db, actual_rev) = remote.checkout( diff --git a/crates/uv/tests/pip_sync.rs b/crates/uv/tests/pip_sync.rs index 25fa18f8d..b95f001cf 100644 --- a/crates/uv/tests/pip_sync.rs +++ b/crates/uv/tests/pip_sync.rs @@ -569,7 +569,7 @@ fn install_git_tag() -> Result<()> { let requirements_txt = context.temp_dir.child("requirements.txt"); requirements_txt.touch()?; - requirements_txt.write_str("werkzeug @ git+https://github.com/pallets/werkzeug.git@2.0.0")?; + requirements_txt.write_str("werkzeug @ git+https://github.com/pallets/WerkZeug.git@2.0.0")?; uv_snapshot!(command(&context) .arg("requirements.txt") @@ -582,7 +582,7 @@ fn install_git_tag() -> Result<()> { Resolved 1 package in [TIME] Downloaded 1 package in [TIME] Installed 1 package in [TIME] - + werkzeug==2.0.0 (from git+https://github.com/pallets/werkzeug@af160e0b6b7ddd81c22f1652c728ff5ac72d5c74) + + werkzeug==2.0.0 (from git+https://github.com/pallets/WerkZeug.git@af160e0b6b7ddd81c22f1652c728ff5ac72d5c74) "### );