Use git ls-remote to resolve Git SHAs

This commit is contained in:
Charlie Marsh 2025-11-08 23:07:55 -05:00
parent 1b7faafd7a
commit ff837e5a62
5 changed files with 188 additions and 62 deletions

View File

@ -523,6 +523,9 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
source: &BuildableSource<'_>,
hashes: HashPolicy<'_>,
) -> Result<ArchiveMetadata, Error> {
// Resolve the source distribution to a precise revision (i.e., a specific Git commit).
self.builder.resolve_revision(source, &self.client).await?;
// If the metadata was provided by the user directly, prefer it.
if let Some(dist) = source.as_dist() {
if let Some(metadata) = self
@ -530,10 +533,6 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
.dependency_metadata()
.get(dist.name(), dist.version())
{
// If we skipped the build, we should still resolve any Git dependencies to precise
// commits.
self.builder.resolve_revision(source, &self.client).await?;
return Ok(ArchiveMetadata::from_metadata23(metadata.clone()));
}
}

View File

@ -1670,68 +1670,49 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.as_ref()
.is_some_and(|cache_shard| cache_shard.is_dir())
{
debug!("Skipping GitHub fast path for: {source} (shard exists)");
debug!("Skipping GitHub `pyproject.toml` fast path for: {source} (shard exists)");
} else {
debug!("Attempting GitHub fast path for: {source}");
debug!("Attempting GitHub `pyproject.toml` fast path: {source}");
// If this is GitHub URL, attempt to resolve to a precise commit using the GitHub API.
match self
.build_context
.git()
.github_fast_path(
resource.git,
client
.unmanaged
.uncached_client(resource.git.repository())
.raw_client(),
)
.await
{
Ok(Some(precise)) => {
// There's no need to check the cache, since we can't use cached metadata if there are
// sources, and we can't know if there are sources without fetching the
// `pyproject.toml`.
//
// For the same reason, there's no need to write to the cache, since we won't be able to
// use it on subsequent runs.
match self
.github_metadata(precise, source, resource, client)
.await
{
Ok(Some(metadata)) => {
// Validate the metadata, but ignore it if the metadata doesn't match.
match validate_metadata(source, &metadata) {
Ok(()) => {
debug!(
"Found static metadata via GitHub fast path for: {source}"
);
return Ok(ArchiveMetadata {
metadata: Metadata::from_metadata23(metadata),
hashes: HashDigests::empty(),
});
}
Err(err) => {
debug!(
"Ignoring `pyproject.toml` from GitHub for {source}: {err}"
);
}
if let Some(precise) = self.build_context.git().get_precise(resource.git) {
// If this is GitHub URL, attempt to fetch the `pyproject.toml` directly.
//
// There's no need to check the cache, since we can't use cached metadata if there
// are sources, and we can't know if there are sources without fetching the
// `pyproject.toml`.
//
// For the same reason, there's no need to write to the cache, since we won't be
// able to use it on subsequent runs.
//
// TODO(charlie): Skip this fetch if the GitHub commit resolution fast path failed
// with a 404 or similar.
match self
.github_metadata(precise, source, resource, client)
.await
{
Ok(Some(metadata)) => {
// Validate the metadata, but ignore it if the metadata doesn't match.
match validate_metadata(source, &metadata) {
Ok(()) => {
debug!("Found static metadata via GitHub fast path for: {source}");
return Ok(ArchiveMetadata {
metadata: Metadata::from_metadata23(metadata),
hashes: HashDigests::empty(),
});
}
Err(err) => {
debug!("Ignoring `pyproject.toml` from GitHub for {source}: {err}");
}
}
Ok(None) => {
// Nothing to do.
}
Err(err) => {
debug!(
"Failed to fetch `pyproject.toml` via GitHub fast path for: {source} ({err})"
);
}
}
}
Ok(None) => {
// Nothing to do.
}
Err(err) => {
debug!("Failed to resolve commit via GitHub fast path for: {source} ({err})");
Ok(None) => {
// Nothing to do.
}
Err(err) => {
debug!(
"Failed to fetch `pyproject.toml` via GitHub fast path for: {source} ({err})"
);
}
}
}
}
@ -1978,7 +1959,23 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
)
.await?
{
debug!("Resolved to precise commit via GitHub fast path: {source}");
debug!("Resolved to a precise commit via GitHub fast path: {source}");
return Ok(Some(precise));
}
// Otherwise, attempt to resolve using `git ls-remote`.
if let Some(precise) = self
.build_context
.git()
.ls_remote(
git,
client.unmanaged.disable_ssl(git.repository()),
client.unmanaged.connectivity() == Connectivity::Offline,
self.build_context.cache().bucket(CacheBucket::Git),
)
.await?
{
debug!("Resolved to a precise commit via `git ls-remote`: {source}");
return Ok(Some(precise));
}

View File

@ -286,6 +286,21 @@ impl GitRemote {
let repo = GitRepository::open(db_path)?;
Ok(GitDatabase { repo })
}
/// Resolve the OID of a reference or a revision from this remote.
pub(crate) fn ls(
&self,
reference: &GitReference,
locked_rev: Option<GitOid>,
disable_ssl: bool,
offline: bool,
) -> Result<Option<GitOid>> {
let reference = locked_rev
.map(ReferenceOrOid::Oid)
.unwrap_or(ReferenceOrOid::Reference(reference));
ls_remote(&self.url, reference, disable_ssl, offline)
}
}
impl GitDatabase {
@ -428,6 +443,61 @@ impl GitCheckout {
}
}
/// Perform a `git ls-remote` operation to resolve a reference or revision to an OID.
fn ls_remote(
remote_url: &Url,
reference: ReferenceOrOid<'_>,
disable_ssl: bool,
offline: bool,
) -> Result<Option<GitOid>> {
debug!("Performing a Git ls-remote for: {remote_url}");
let mut cmd = ProcessBuilder::new(GIT.as_ref()?);
cmd.arg("ls-remote");
if disable_ssl {
debug!("Disabling SSL verification for Git ls-remote via `GIT_SSL_NO_VERIFY`");
cmd.env(EnvVars::GIT_SSL_NO_VERIFY, "true");
}
if offline {
debug!("Disabling remote protocols for Git ls-remote via `GIT_ALLOW_PROTOCOL=file`");
cmd.env(EnvVars::GIT_ALLOW_PROTOCOL, "file");
}
cmd.arg(remote_url.as_str());
match reference {
ReferenceOrOid::Reference(r) => match r {
GitReference::Branch(_) => {
cmd.arg("--heads");
cmd.arg(reference.as_rev());
}
GitReference::Tag(_) => {
cmd.arg("--tags");
cmd.arg(reference.as_rev());
}
_ => {
cmd.arg(reference.as_rev());
}
},
ReferenceOrOid::Oid(_) => {
cmd.arg(reference.as_rev());
}
}
let output = cmd.exec_with_output()?;
let stdout = str::from_utf8(&output.stdout)?;
for line in stdout.lines() {
let mut parts = line.split_whitespace();
if let (Some(oid_str), Some(ref_str)) = (parts.next(), parts.next()) {
if ref_str == reference.as_rev() {
let oid: GitOid = oid_str.parse()?;
return Ok(Some(oid));
}
}
}
Ok(None)
}
/// Attempts to fetch the given git `reference` for a Git repository.
///
/// This is the main entry for git clone/fetch. It does the following:

View File

@ -140,6 +140,44 @@ impl GitResolver {
Ok(Some(precise))
}
/// Resolve a Git URL to a specific commit via `git ls-remote`.
///
/// Returns a [`GitOid`] if the URL has already been resolved (i.e., is available in the cache),
/// or if it can be fetched via `git ls-remote`. Otherwise, returns `None`.
pub async fn ls_remote(
&self,
url: &GitUrl,
disable_ssl: bool,
offline: bool,
cache: PathBuf,
) -> Result<Option<GitOid>, GitResolverError> {
// If the URL is already precise or we know the precise commit, return it.
if let Some(precise) = self.get_precise(url) {
return Ok(Some(precise));
}
let source = GitSource::new(url.clone(), cache, offline);
// If necessary, disable SSL.
let source = if disable_ssl {
source.dangerous()
} else {
source
};
let precise = tokio::task::spawn_blocking(move || source.ls_remote())
.await?
.map_err(GitResolverError::Git)?;
// Insert the resolved URL into the in-memory cache. This ensures that subsequent fetches
// resolve to the same precise commit.
if let Some(precise) = precise {
self.insert(RepositoryReference::from(url), precise);
}
Ok(precise)
}
/// Fetch a remote Git repository.
pub async fn fetch(
&self,

View File

@ -60,6 +60,28 @@ impl GitSource {
}
}
/// Resolve the OID of a reference or a revision from the Git repository.
#[instrument(skip(self), fields(repository = %self.git.repository(), rev = ?self.git.precise()))]
pub fn ls_remote(&self) -> Result<Option<GitOid>> {
// Compute the canonical URL for the repository.
let canonical = RepositoryUrl::new(self.git.repository());
// Authenticate the URL, if necessary.
let remote = if let Some(credentials) = GIT_STORE.get(&canonical) {
Cow::Owned(credentials.apply(self.git.repository().clone()))
} else {
Cow::Borrowed(self.git.repository())
};
let git_remote = GitRemote::new(&remote);
git_remote.ls(
self.git.reference(),
self.git.precise(),
self.disable_ssl,
self.offline,
)
}
/// Fetch the underlying Git repository at the given revision.
#[instrument(skip(self), fields(repository = %self.git.repository(), rev = ?self.git.precise()))]
pub fn fetch(self) -> Result<Fetch> {