diff --git a/crates/uv-distribution/src/distribution_database.rs b/crates/uv-distribution/src/distribution_database.rs index ef2227df6..066290c82 100644 --- a/crates/uv-distribution/src/distribution_database.rs +++ b/crates/uv-distribution/src/distribution_database.rs @@ -523,6 +523,9 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { source: &BuildableSource<'_>, hashes: HashPolicy<'_>, ) -> Result { + // Resolve the source distribution to a precise revision (i.e., a specific Git commit). + self.builder.resolve_revision(source, &self.client).await?; + // If the metadata was provided by the user directly, prefer it. if let Some(dist) = source.as_dist() { if let Some(metadata) = self @@ -530,10 +533,6 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { .dependency_metadata() .get(dist.name(), dist.version()) { - // If we skipped the build, we should still resolve any Git dependencies to precise - // commits. - self.builder.resolve_revision(source, &self.client).await?; - return Ok(ArchiveMetadata::from_metadata23(metadata.clone())); } } diff --git a/crates/uv-distribution/src/source/mod.rs b/crates/uv-distribution/src/source/mod.rs index 769919585..086b58407 100644 --- a/crates/uv-distribution/src/source/mod.rs +++ b/crates/uv-distribution/src/source/mod.rs @@ -1670,68 +1670,49 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { .as_ref() .is_some_and(|cache_shard| cache_shard.is_dir()) { - debug!("Skipping GitHub fast path for: {source} (shard exists)"); + debug!("Skipping GitHub `pyproject.toml` fast path for: {source} (shard exists)"); } else { - debug!("Attempting GitHub fast path for: {source}"); + debug!("Attempting GitHub `pyproject.toml` fast path: {source}"); - // If this is GitHub URL, attempt to resolve to a precise commit using the GitHub API. - match self - .build_context - .git() - .github_fast_path( - resource.git, - client - .unmanaged - .uncached_client(resource.git.repository()) - .raw_client(), - ) - .await - { - Ok(Some(precise)) => { - // There's no need to check the cache, since we can't use cached metadata if there are - // sources, and we can't know if there are sources without fetching the - // `pyproject.toml`. - // - // For the same reason, there's no need to write to the cache, since we won't be able to - // use it on subsequent runs. - match self - .github_metadata(precise, source, resource, client) - .await - { - Ok(Some(metadata)) => { - // Validate the metadata, but ignore it if the metadata doesn't match. - match validate_metadata(source, &metadata) { - Ok(()) => { - debug!( - "Found static metadata via GitHub fast path for: {source}" - ); - return Ok(ArchiveMetadata { - metadata: Metadata::from_metadata23(metadata), - hashes: HashDigests::empty(), - }); - } - Err(err) => { - debug!( - "Ignoring `pyproject.toml` from GitHub for {source}: {err}" - ); - } + if let Some(precise) = self.build_context.git().get_precise(resource.git) { + // If this is GitHub URL, attempt to fetch the `pyproject.toml` directly. + // + // There's no need to check the cache, since we can't use cached metadata if there + // are sources, and we can't know if there are sources without fetching the + // `pyproject.toml`. + // + // For the same reason, there's no need to write to the cache, since we won't be + // able to use it on subsequent runs. + // + // TODO(charlie): Skip this fetch if the GitHub commit resolution fast path failed + // with a 404 or similar. + match self + .github_metadata(precise, source, resource, client) + .await + { + Ok(Some(metadata)) => { + // Validate the metadata, but ignore it if the metadata doesn't match. + match validate_metadata(source, &metadata) { + Ok(()) => { + debug!("Found static metadata via GitHub fast path for: {source}"); + return Ok(ArchiveMetadata { + metadata: Metadata::from_metadata23(metadata), + hashes: HashDigests::empty(), + }); + } + Err(err) => { + debug!("Ignoring `pyproject.toml` from GitHub for {source}: {err}"); } } - Ok(None) => { - // Nothing to do. - } - Err(err) => { - debug!( - "Failed to fetch `pyproject.toml` via GitHub fast path for: {source} ({err})" - ); - } } - } - Ok(None) => { - // Nothing to do. - } - Err(err) => { - debug!("Failed to resolve commit via GitHub fast path for: {source} ({err})"); + Ok(None) => { + // Nothing to do. + } + Err(err) => { + debug!( + "Failed to fetch `pyproject.toml` via GitHub fast path for: {source} ({err})" + ); + } } } } @@ -1978,7 +1959,23 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { ) .await? { - debug!("Resolved to precise commit via GitHub fast path: {source}"); + debug!("Resolved to a precise commit via GitHub fast path: {source}"); + return Ok(Some(precise)); + } + + // Otherwise, attempt to resolve using `git ls-remote`. + if let Some(precise) = self + .build_context + .git() + .ls_remote( + git, + client.unmanaged.disable_ssl(git.repository()), + client.unmanaged.connectivity() == Connectivity::Offline, + self.build_context.cache().bucket(CacheBucket::Git), + ) + .await? + { + debug!("Resolved to a precise commit via `git ls-remote`: {source}"); return Ok(Some(precise)); } diff --git a/crates/uv-git/src/git.rs b/crates/uv-git/src/git.rs index a844baf32..7f8b6db3f 100644 --- a/crates/uv-git/src/git.rs +++ b/crates/uv-git/src/git.rs @@ -286,6 +286,21 @@ impl GitRemote { let repo = GitRepository::open(db_path)?; Ok(GitDatabase { repo }) } + + /// Resolve the OID of a reference or a revision from this remote. + pub(crate) fn ls( + &self, + reference: &GitReference, + locked_rev: Option, + disable_ssl: bool, + offline: bool, + ) -> Result> { + let reference = locked_rev + .map(ReferenceOrOid::Oid) + .unwrap_or(ReferenceOrOid::Reference(reference)); + + ls_remote(&self.url, reference, disable_ssl, offline) + } } impl GitDatabase { @@ -428,6 +443,61 @@ impl GitCheckout { } } +/// Perform a `git ls-remote` operation to resolve a reference or revision to an OID. +fn ls_remote( + remote_url: &Url, + reference: ReferenceOrOid<'_>, + disable_ssl: bool, + offline: bool, +) -> Result> { + debug!("Performing a Git ls-remote for: {remote_url}"); + let mut cmd = ProcessBuilder::new(GIT.as_ref()?); + cmd.arg("ls-remote"); + if disable_ssl { + debug!("Disabling SSL verification for Git ls-remote via `GIT_SSL_NO_VERIFY`"); + cmd.env(EnvVars::GIT_SSL_NO_VERIFY, "true"); + } + if offline { + debug!("Disabling remote protocols for Git ls-remote via `GIT_ALLOW_PROTOCOL=file`"); + cmd.env(EnvVars::GIT_ALLOW_PROTOCOL, "file"); + } + cmd.arg(remote_url.as_str()); + + match reference { + ReferenceOrOid::Reference(r) => match r { + GitReference::Branch(_) => { + cmd.arg("--heads"); + cmd.arg(reference.as_rev()); + } + GitReference::Tag(_) => { + cmd.arg("--tags"); + cmd.arg(reference.as_rev()); + } + _ => { + cmd.arg(reference.as_rev()); + } + }, + ReferenceOrOid::Oid(_) => { + cmd.arg(reference.as_rev()); + } + } + + let output = cmd.exec_with_output()?; + let stdout = str::from_utf8(&output.stdout)?; + + for line in stdout.lines() { + let mut parts = line.split_whitespace(); + if let (Some(oid_str), Some(ref_str)) = (parts.next(), parts.next()) { + if ref_str == reference.as_rev() { + let oid: GitOid = oid_str.parse()?; + return Ok(Some(oid)); + } + } + } + + Ok(None) +} + /// Attempts to fetch the given git `reference` for a Git repository. /// /// This is the main entry for git clone/fetch. It does the following: diff --git a/crates/uv-git/src/resolver.rs b/crates/uv-git/src/resolver.rs index 70593ed73..be4e1ec13 100644 --- a/crates/uv-git/src/resolver.rs +++ b/crates/uv-git/src/resolver.rs @@ -140,6 +140,44 @@ impl GitResolver { Ok(Some(precise)) } + /// Resolve a Git URL to a specific commit via `git ls-remote`. + /// + /// Returns a [`GitOid`] if the URL has already been resolved (i.e., is available in the cache), + /// or if it can be fetched via `git ls-remote`. Otherwise, returns `None`. + pub async fn ls_remote( + &self, + url: &GitUrl, + disable_ssl: bool, + offline: bool, + cache: PathBuf, + ) -> Result, GitResolverError> { + // If the URL is already precise or we know the precise commit, return it. + if let Some(precise) = self.get_precise(url) { + return Ok(Some(precise)); + } + + let source = GitSource::new(url.clone(), cache, offline); + + // If necessary, disable SSL. + let source = if disable_ssl { + source.dangerous() + } else { + source + }; + + let precise = tokio::task::spawn_blocking(move || source.ls_remote()) + .await? + .map_err(GitResolverError::Git)?; + + // Insert the resolved URL into the in-memory cache. This ensures that subsequent fetches + // resolve to the same precise commit. + if let Some(precise) = precise { + self.insert(RepositoryReference::from(url), precise); + } + + Ok(precise) + } + /// Fetch a remote Git repository. pub async fn fetch( &self, diff --git a/crates/uv-git/src/source.rs b/crates/uv-git/src/source.rs index 037c58828..fd23bd871 100644 --- a/crates/uv-git/src/source.rs +++ b/crates/uv-git/src/source.rs @@ -60,6 +60,28 @@ impl GitSource { } } + /// Resolve the OID of a reference or a revision from the Git repository. + #[instrument(skip(self), fields(repository = %self.git.repository(), rev = ?self.git.precise()))] + pub fn ls_remote(&self) -> Result> { + // Compute the canonical URL for the repository. + let canonical = RepositoryUrl::new(self.git.repository()); + + // Authenticate the URL, if necessary. + let remote = if let Some(credentials) = GIT_STORE.get(&canonical) { + Cow::Owned(credentials.apply(self.git.repository().clone())) + } else { + Cow::Borrowed(self.git.repository()) + }; + + let git_remote = GitRemote::new(&remote); + git_remote.ls( + self.git.reference(), + self.git.precise(), + self.disable_ssl, + self.offline, + ) + } + /// Fetch the underlying Git repository at the given revision. #[instrument(skip(self), fields(repository = %self.git.repository(), rev = ?self.git.precise()))] pub fn fetch(self) -> Result {