Preserve `.git` suffixes and casing in Git dependencies (#2789)

## Summary

I noticed in #2769 that I was now stripping `.git` suffixes from Git
URLs after resolving to a precise commit. This PR cleans up the internal
caching to use a better canonical representation: a `RepositoryUrl`
along with a `GitReference`, instead of a `GitUrl` which can contain
non-canonical data. This gives us both better fidelity (preserving the
`.git`, along with any casing that the user provided when defining the
URL) and is overall cleaner and more robust.
This commit is contained in:
Charlie Marsh 2024-04-02 20:24:29 -04:00 committed by GitHub
parent c30a65ee0c
commit 684f790d5d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 104 additions and 54 deletions

View File

@ -165,6 +165,12 @@ impl Deref for RepositoryUrl {
}
}
impl std::fmt::Display for RepositoryUrl {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(&self.0, f)
}
}
#[cfg(test)]
mod tests {
use super::*;

View File

@ -164,7 +164,7 @@ impl TryFrom<&DirectGitUrl> for pypi_types::DirectUrl {
vcs_info: pypi_types::VcsInfo {
vcs: pypi_types::VcsKind::Git,
commit_id: value.url.precise().as_ref().map(ToString::to_string),
requested_revision: value.url.reference().map(ToString::to_string),
requested_revision: value.url.reference().as_str().map(ToString::to_string),
},
subdirectory: value.subdirectory.clone(),
})

View File

@ -8,11 +8,11 @@ use rustc_hash::FxHashMap;
use tracing::debug;
use url::Url;
use cache_key::CanonicalUrl;
use cache_key::{CanonicalUrl, RepositoryUrl};
use distribution_types::DirectGitUrl;
use uv_cache::{Cache, CacheBucket};
use uv_fs::LockedFile;
use uv_git::{Fetch, GitSource, GitUrl};
use uv_git::{Fetch, GitReference, GitSha, GitSource, GitUrl};
use crate::error::Error;
use crate::reporter::Facade;
@ -24,7 +24,25 @@ use crate::Reporter;
/// consistent across all invocations. (For example: if a Git URL refers to a branch, like `main`,
/// then the resolved URL should always refer to the same commit across the lifetime of the
/// process.)
static RESOLVED_GIT_REFS: Lazy<Mutex<FxHashMap<GitUrl, GitUrl>>> = Lazy::new(Mutex::default);
static RESOLVED_GIT_REFS: Lazy<Mutex<FxHashMap<RepositoryReference, GitSha>>> =
Lazy::new(Mutex::default);
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct RepositoryReference {
/// The URL of the Git repository, with any query parameters and fragments removed.
url: RepositoryUrl,
/// The reference to the commit to use, which could be a branch, tag or revision.
reference: GitReference,
}
impl RepositoryReference {
fn new(git: &GitUrl) -> Self {
Self {
url: RepositoryUrl::new(git.repository()),
reference: git.reference().clone(),
}
}
}
/// Download a source distribution from a Git repository.
pub(crate) async fn fetch_git_archive(
@ -40,10 +58,10 @@ pub(crate) async fn fetch_git_archive(
fs::create_dir_all(&lock_dir)
.await
.map_err(Error::CacheWrite)?;
let canonical_url = CanonicalUrl::new(url);
let repository_url = RepositoryUrl::new(url);
let _lock = LockedFile::acquire(
lock_dir.join(cache_key::digest(&canonical_url)),
&canonical_url,
lock_dir.join(cache_key::digest(&repository_url)),
&repository_url,
)
.map_err(Error::CacheWrite)?;
@ -52,8 +70,9 @@ pub(crate) async fn fetch_git_archive(
// Extract the resolved URL from the in-memory cache, to save a look-up in the fetch.
let url = {
let resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
if let Some(resolved) = resolved_git_refs.get(&url) {
resolved.clone()
let reference = RepositoryReference::new(&url);
if let Some(resolved) = resolved_git_refs.get(&reference) {
url.with_precise(*resolved)
} else {
url
}
@ -70,10 +89,12 @@ pub(crate) async fn fetch_git_archive(
.map_err(Error::Git)?;
// Insert the resolved URL into the in-memory cache.
{
let mut resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
let precise = fetch.git().clone();
resolved_git_refs.insert(url, precise);
if url.precise().is_none() {
if let Some(precise) = fetch.git().precise() {
let mut resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
let reference = RepositoryReference::new(&url);
resolved_git_refs.insert(reference, precise);
}
}
Ok((fetch, subdirectory))
@ -92,8 +113,7 @@ pub(crate) async fn resolve_precise(
cache: &Cache,
reporter: Option<&Arc<dyn Reporter>>,
) -> Result<Option<Url>, Error> {
let url = Url::from(CanonicalUrl::new(url));
let DirectGitUrl { url, subdirectory } = DirectGitUrl::try_from(&url).map_err(Error::Git)?;
let DirectGitUrl { url, subdirectory } = DirectGitUrl::try_from(url).map_err(Error::Git)?;
// If the Git reference already contains a complete SHA, short-circuit.
if url.precise().is_some() {
@ -103,9 +123,10 @@ pub(crate) async fn resolve_precise(
// If the Git reference is in the in-memory cache, return it.
{
let resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
if let Some(precise) = resolved_git_refs.get(&url) {
let reference = RepositoryReference::new(&url);
if let Some(precise) = resolved_git_refs.get(&reference) {
return Ok(Some(Url::from(DirectGitUrl {
url: precise.clone(),
url: url.with_precise(*precise),
subdirectory,
})));
}
@ -123,17 +144,18 @@ pub(crate) async fn resolve_precise(
let fetch = tokio::task::spawn_blocking(move || source.fetch())
.await?
.map_err(Error::Git)?;
let precise = fetch.into_git();
let git = fetch.into_git();
// Insert the resolved URL into the in-memory cache.
{
if let Some(precise) = git.precise() {
let mut resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
resolved_git_refs.insert(url.clone(), precise.clone());
let reference = RepositoryReference::new(&url);
resolved_git_refs.insert(reference, precise);
}
// Re-encode as a URL.
Ok(Some(Url::from(DirectGitUrl {
url: precise,
url: git,
subdirectory,
})))
}
@ -153,7 +175,7 @@ pub fn is_same_reference<'a>(a: &'a Url, b: &'a Url) -> bool {
fn is_same_reference_impl<'a>(
a: &'a Url,
b: &'a Url,
resolved_refs: &FxHashMap<GitUrl, GitUrl>,
resolved_refs: &FxHashMap<RepositoryReference, GitSha>,
) -> bool {
// Convert `a` to a Git URL, if possible.
let Ok(a_git) = DirectGitUrl::try_from(&Url::from(CanonicalUrl::new(a))) else {
@ -170,13 +192,19 @@ fn is_same_reference_impl<'a>(
return false;
}
// Convert `a` to a repository URL.
let a_ref = RepositoryReference::new(&a_git.url);
// Convert `b` to a repository URL.
let b_ref = RepositoryReference::new(&b_git.url);
// The URLs must refer to the same repository.
if a_git.url.repository() != b_git.url.repository() {
if a_ref.url != b_ref.url {
return false;
}
// If the URLs have the same tag, they refer to the same commit.
if a_git.url.reference() == b_git.url.reference() {
if a_ref.reference == b_ref.reference {
return true;
}
@ -184,7 +212,7 @@ fn is_same_reference_impl<'a>(
let Some(a_precise) = a_git
.url
.precise()
.or_else(|| resolved_refs.get(&a_git.url).and_then(GitUrl::precise))
.or_else(|| resolved_refs.get(&a_ref).copied())
else {
return false;
};
@ -192,7 +220,7 @@ fn is_same_reference_impl<'a>(
let Some(b_precise) = b_git
.url
.precise()
.or_else(|| resolved_refs.get(&b_git.url).and_then(GitUrl::precise))
.or_else(|| resolved_refs.get(&b_ref).copied())
else {
return false;
};
@ -204,9 +232,11 @@ fn is_same_reference_impl<'a>(
mod tests {
use anyhow::Result;
use rustc_hash::FxHashMap;
use std::str::FromStr;
use url::Url;
use uv_git::GitUrl;
use crate::git::RepositoryReference;
use uv_git::{GitSha, GitUrl};
#[test]
fn same_reference() -> Result<()> {
@ -244,10 +274,10 @@ mod tests {
)?;
let mut resolved_refs = FxHashMap::default();
resolved_refs.insert(
GitUrl::try_from(Url::parse("https://example.com/MyProject@main")?)?,
GitUrl::try_from(Url::parse(
"https://example.com/MyProject@164a8735b081663fede48c5041667b194da15d25",
)?)?,
RepositoryReference::new(&GitUrl::try_from(Url::parse(
"https://example.com/MyProject@main",
)?)?),
GitSha::from_str("164a8735b081663fede48c5041667b194da15d25")?,
);
assert!(super::is_same_reference_impl(&a, &b, &resolved_refs));
@ -258,10 +288,10 @@ mod tests {
)?;
let mut resolved_refs = FxHashMap::default();
resolved_refs.insert(
GitUrl::try_from(Url::parse("https://example.com/MyProject@main")?)?,
GitUrl::try_from(Url::parse(
"https://example.com/MyProject@f2c9e88f3ec9526bbcec68d150b176d96a750aba",
)?)?,
RepositoryReference::new(&GitUrl::try_from(Url::parse(
"https://example.com/MyProject@main",
)?)?),
GitSha::from_str("f2c9e88f3ec9526bbcec68d150b176d96a750aba")?,
);
assert!(!super::is_same_reference_impl(&a, &b, &resolved_refs));

View File

@ -24,7 +24,7 @@ const CHECKOUT_READY_LOCK: &str = ".ok";
/// A reference to commit or commit-ish.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub(crate) enum GitReference {
pub enum GitReference {
/// From a branch.
#[allow(unused)]
Branch(String),
@ -66,8 +66,29 @@ impl GitReference {
}
}
/// Views the short ID as a `str`.
pub(crate) fn as_str(&self) -> &str {
pub fn precise(&self) -> Option<&str> {
match self {
Self::FullCommit(rev) => Some(rev),
Self::ShortCommit(rev) => Some(rev),
_ => None,
}
}
/// Converts the [`GitReference`] to a `str`.
pub fn as_str(&self) -> Option<&str> {
match self {
Self::Branch(rev) => Some(rev),
Self::Tag(rev) => Some(rev),
Self::BranchOrTag(rev) => Some(rev),
Self::FullCommit(rev) => Some(rev),
Self::ShortCommit(rev) => Some(rev),
Self::Ref(rev) => Some(rev),
Self::DefaultBranch => None,
}
}
/// Converts the [`GitReference`] to a `str` that can be used as a revision.
pub(crate) fn as_rev(&self) -> &str {
match self {
Self::Branch(rev)
| Self::Tag(rev)
@ -79,6 +100,7 @@ impl GitReference {
}
}
/// Returns the kind of this reference.
pub(crate) fn kind_str(&self) -> &str {
match self {
Self::Branch(_) => "branch",
@ -1034,7 +1056,7 @@ pub(crate) fn fetch(
format!(
"failed to fetch {} `{}`",
reference.kind_str(),
reference.as_str()
reference.as_rev()
)
}),
}

View File

@ -1,7 +1,7 @@
use std::str::FromStr;
use url::Url;
use crate::git::GitReference;
pub use crate::git::GitReference;
pub use crate::sha::GitSha;
pub use crate::source::{Fetch, GitSource, Reporter};
@ -24,7 +24,7 @@ pub struct GitUrl {
impl GitUrl {
#[must_use]
pub(crate) fn with_precise(mut self, precise: GitSha) -> Self {
pub fn with_precise(mut self, precise: GitSha) -> Self {
self.precise = Some(precise);
self
}
@ -35,16 +35,8 @@ impl GitUrl {
}
/// Return the reference to the commit to use, which could be a branch, tag or revision.
pub fn reference(&self) -> Option<&str> {
match &self.reference {
GitReference::Branch(rev)
| GitReference::Tag(rev)
| GitReference::BranchOrTag(rev)
| GitReference::Ref(rev)
| GitReference::FullCommit(rev)
| GitReference::ShortCommit(rev) => Some(rev),
GitReference::DefaultBranch => None,
}
pub fn reference(&self) -> &GitReference {
&self.reference
}
/// Returns `true` if the reference is a full commit.

View File

@ -70,7 +70,7 @@ impl GitSource {
// Report the checkout operation to the reporter.
let task = self.reporter.as_ref().map(|reporter| {
reporter.on_checkout_start(remote.url(), self.git.reference.as_str())
reporter.on_checkout_start(remote.url(), self.git.reference.as_rev())
});
let (db, actual_rev) = remote.checkout(

View File

@ -569,7 +569,7 @@ fn install_git_tag() -> Result<()> {
let requirements_txt = context.temp_dir.child("requirements.txt");
requirements_txt.touch()?;
requirements_txt.write_str("werkzeug @ git+https://github.com/pallets/werkzeug.git@2.0.0")?;
requirements_txt.write_str("werkzeug @ git+https://github.com/pallets/WerkZeug.git@2.0.0")?;
uv_snapshot!(command(&context)
.arg("requirements.txt")
@ -582,7 +582,7 @@ fn install_git_tag() -> Result<()> {
Resolved 1 package in [TIME]
Downloaded 1 package in [TIME]
Installed 1 package in [TIME]
+ werkzeug==2.0.0 (from git+https://github.com/pallets/werkzeug@af160e0b6b7ddd81c22f1652c728ff5ac72d5c74)
+ werkzeug==2.0.0 (from git+https://github.com/pallets/WerkZeug.git@af160e0b6b7ddd81c22f1652c728ff5ac72d5c74)
"###
);