Use git ls-remote to resolve Git SHAs

This commit is contained in:
Charlie Marsh 2025-11-08 23:07:55 -05:00
parent 1b7faafd7a
commit ff837e5a62
5 changed files with 188 additions and 62 deletions

View File

@ -523,6 +523,9 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
source: &BuildableSource<'_>, source: &BuildableSource<'_>,
hashes: HashPolicy<'_>, hashes: HashPolicy<'_>,
) -> Result<ArchiveMetadata, Error> { ) -> Result<ArchiveMetadata, Error> {
// Resolve the source distribution to a precise revision (i.e., a specific Git commit).
self.builder.resolve_revision(source, &self.client).await?;
// If the metadata was provided by the user directly, prefer it. // If the metadata was provided by the user directly, prefer it.
if let Some(dist) = source.as_dist() { if let Some(dist) = source.as_dist() {
if let Some(metadata) = self if let Some(metadata) = self
@ -530,10 +533,6 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
.dependency_metadata() .dependency_metadata()
.get(dist.name(), dist.version()) .get(dist.name(), dist.version())
{ {
// If we skipped the build, we should still resolve any Git dependencies to precise
// commits.
self.builder.resolve_revision(source, &self.client).await?;
return Ok(ArchiveMetadata::from_metadata23(metadata.clone())); return Ok(ArchiveMetadata::from_metadata23(metadata.clone()));
} }
} }

View File

@ -1670,68 +1670,49 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.as_ref() .as_ref()
.is_some_and(|cache_shard| cache_shard.is_dir()) .is_some_and(|cache_shard| cache_shard.is_dir())
{ {
debug!("Skipping GitHub fast path for: {source} (shard exists)"); debug!("Skipping GitHub `pyproject.toml` fast path for: {source} (shard exists)");
} else { } else {
debug!("Attempting GitHub fast path for: {source}"); debug!("Attempting GitHub `pyproject.toml` fast path: {source}");
// If this is GitHub URL, attempt to resolve to a precise commit using the GitHub API. if let Some(precise) = self.build_context.git().get_precise(resource.git) {
match self // If this is GitHub URL, attempt to fetch the `pyproject.toml` directly.
.build_context //
.git() // There's no need to check the cache, since we can't use cached metadata if there
.github_fast_path( // are sources, and we can't know if there are sources without fetching the
resource.git, // `pyproject.toml`.
client //
.unmanaged // For the same reason, there's no need to write to the cache, since we won't be
.uncached_client(resource.git.repository()) // able to use it on subsequent runs.
.raw_client(), //
) // TODO(charlie): Skip this fetch if the GitHub commit resolution fast path failed
.await // with a 404 or similar.
{ match self
Ok(Some(precise)) => { .github_metadata(precise, source, resource, client)
// There's no need to check the cache, since we can't use cached metadata if there are .await
// sources, and we can't know if there are sources without fetching the {
// `pyproject.toml`. Ok(Some(metadata)) => {
// // Validate the metadata, but ignore it if the metadata doesn't match.
// For the same reason, there's no need to write to the cache, since we won't be able to match validate_metadata(source, &metadata) {
// use it on subsequent runs. Ok(()) => {
match self debug!("Found static metadata via GitHub fast path for: {source}");
.github_metadata(precise, source, resource, client) return Ok(ArchiveMetadata {
.await metadata: Metadata::from_metadata23(metadata),
{ hashes: HashDigests::empty(),
Ok(Some(metadata)) => { });
// Validate the metadata, but ignore it if the metadata doesn't match. }
match validate_metadata(source, &metadata) { Err(err) => {
Ok(()) => { debug!("Ignoring `pyproject.toml` from GitHub for {source}: {err}");
debug!(
"Found static metadata via GitHub fast path for: {source}"
);
return Ok(ArchiveMetadata {
metadata: Metadata::from_metadata23(metadata),
hashes: HashDigests::empty(),
});
}
Err(err) => {
debug!(
"Ignoring `pyproject.toml` from GitHub for {source}: {err}"
);
}
} }
} }
Ok(None) => {
// Nothing to do.
}
Err(err) => {
debug!(
"Failed to fetch `pyproject.toml` via GitHub fast path for: {source} ({err})"
);
}
} }
} Ok(None) => {
Ok(None) => { // Nothing to do.
// Nothing to do. }
} Err(err) => {
Err(err) => { debug!(
debug!("Failed to resolve commit via GitHub fast path for: {source} ({err})"); "Failed to fetch `pyproject.toml` via GitHub fast path for: {source} ({err})"
);
}
} }
} }
} }
@ -1978,7 +1959,23 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
) )
.await? .await?
{ {
debug!("Resolved to precise commit via GitHub fast path: {source}"); debug!("Resolved to a precise commit via GitHub fast path: {source}");
return Ok(Some(precise));
}
// Otherwise, attempt to resolve using `git ls-remote`.
if let Some(precise) = self
.build_context
.git()
.ls_remote(
git,
client.unmanaged.disable_ssl(git.repository()),
client.unmanaged.connectivity() == Connectivity::Offline,
self.build_context.cache().bucket(CacheBucket::Git),
)
.await?
{
debug!("Resolved to a precise commit via `git ls-remote`: {source}");
return Ok(Some(precise)); return Ok(Some(precise));
} }

View File

@ -286,6 +286,21 @@ impl GitRemote {
let repo = GitRepository::open(db_path)?; let repo = GitRepository::open(db_path)?;
Ok(GitDatabase { repo }) Ok(GitDatabase { repo })
} }
/// Resolve the OID of a reference or a revision from this remote.
pub(crate) fn ls(
&self,
reference: &GitReference,
locked_rev: Option<GitOid>,
disable_ssl: bool,
offline: bool,
) -> Result<Option<GitOid>> {
let reference = locked_rev
.map(ReferenceOrOid::Oid)
.unwrap_or(ReferenceOrOid::Reference(reference));
ls_remote(&self.url, reference, disable_ssl, offline)
}
} }
impl GitDatabase { impl GitDatabase {
@ -428,6 +443,61 @@ impl GitCheckout {
} }
} }
/// Perform a `git ls-remote` operation to resolve a reference or revision to an OID.
fn ls_remote(
remote_url: &Url,
reference: ReferenceOrOid<'_>,
disable_ssl: bool,
offline: bool,
) -> Result<Option<GitOid>> {
debug!("Performing a Git ls-remote for: {remote_url}");
let mut cmd = ProcessBuilder::new(GIT.as_ref()?);
cmd.arg("ls-remote");
if disable_ssl {
debug!("Disabling SSL verification for Git ls-remote via `GIT_SSL_NO_VERIFY`");
cmd.env(EnvVars::GIT_SSL_NO_VERIFY, "true");
}
if offline {
debug!("Disabling remote protocols for Git ls-remote via `GIT_ALLOW_PROTOCOL=file`");
cmd.env(EnvVars::GIT_ALLOW_PROTOCOL, "file");
}
cmd.arg(remote_url.as_str());
match reference {
ReferenceOrOid::Reference(r) => match r {
GitReference::Branch(_) => {
cmd.arg("--heads");
cmd.arg(reference.as_rev());
}
GitReference::Tag(_) => {
cmd.arg("--tags");
cmd.arg(reference.as_rev());
}
_ => {
cmd.arg(reference.as_rev());
}
},
ReferenceOrOid::Oid(_) => {
cmd.arg(reference.as_rev());
}
}
let output = cmd.exec_with_output()?;
let stdout = str::from_utf8(&output.stdout)?;
for line in stdout.lines() {
let mut parts = line.split_whitespace();
if let (Some(oid_str), Some(ref_str)) = (parts.next(), parts.next()) {
if ref_str == reference.as_rev() {
let oid: GitOid = oid_str.parse()?;
return Ok(Some(oid));
}
}
}
Ok(None)
}
/// Attempts to fetch the given git `reference` for a Git repository. /// Attempts to fetch the given git `reference` for a Git repository.
/// ///
/// This is the main entry for git clone/fetch. It does the following: /// This is the main entry for git clone/fetch. It does the following:

View File

@ -140,6 +140,44 @@ impl GitResolver {
Ok(Some(precise)) Ok(Some(precise))
} }
/// Resolve a Git URL to a specific commit via `git ls-remote`.
///
/// Returns a [`GitOid`] if the URL has already been resolved (i.e., is available in the cache),
/// or if it can be fetched via `git ls-remote`. Otherwise, returns `None`.
pub async fn ls_remote(
&self,
url: &GitUrl,
disable_ssl: bool,
offline: bool,
cache: PathBuf,
) -> Result<Option<GitOid>, GitResolverError> {
// If the URL is already precise or we know the precise commit, return it.
if let Some(precise) = self.get_precise(url) {
return Ok(Some(precise));
}
let source = GitSource::new(url.clone(), cache, offline);
// If necessary, disable SSL.
let source = if disable_ssl {
source.dangerous()
} else {
source
};
let precise = tokio::task::spawn_blocking(move || source.ls_remote())
.await?
.map_err(GitResolverError::Git)?;
// Insert the resolved URL into the in-memory cache. This ensures that subsequent fetches
// resolve to the same precise commit.
if let Some(precise) = precise {
self.insert(RepositoryReference::from(url), precise);
}
Ok(precise)
}
/// Fetch a remote Git repository. /// Fetch a remote Git repository.
pub async fn fetch( pub async fn fetch(
&self, &self,

View File

@ -60,6 +60,28 @@ impl GitSource {
} }
} }
/// Resolve the OID of a reference or a revision from the Git repository.
#[instrument(skip(self), fields(repository = %self.git.repository(), rev = ?self.git.precise()))]
pub fn ls_remote(&self) -> Result<Option<GitOid>> {
// Compute the canonical URL for the repository.
let canonical = RepositoryUrl::new(self.git.repository());
// Authenticate the URL, if necessary.
let remote = if let Some(credentials) = GIT_STORE.get(&canonical) {
Cow::Owned(credentials.apply(self.git.repository().clone()))
} else {
Cow::Borrowed(self.git.repository())
};
let git_remote = GitRemote::new(&remote);
git_remote.ls(
self.git.reference(),
self.git.precise(),
self.disable_ssl,
self.offline,
)
}
/// Fetch the underlying Git repository at the given revision. /// Fetch the underlying Git repository at the given revision.
#[instrument(skip(self), fields(repository = %self.git.repository(), rev = ?self.git.precise()))] #[instrument(skip(self), fields(repository = %self.git.repository(), rev = ?self.git.precise()))]
pub fn fetch(self) -> Result<Fetch> { pub fn fetch(self) -> Result<Fetch> {