mirror of https://github.com/astral-sh/uv
Fetch `pyproject.toml` from GitHub API (#10765)
## Summary When resolving Git metadata, we may be able to fetch the metadata from GitHub directly in some cases. This is _way_ faster, since we don't need to perform many Git operations and, in particular, don't need to clone the repo. This only works in the following cases: - The Git repository is public. Otherwise, I believe you need an access token, which we don't have. - The `pyproject.toml` has static metadata. - The `pyproject.toml` has no `tool.uv.sources`. Otherwise, we need to lower them... And, if there are any paths or workspace sources, that requires an install path (i.e., we need the content on-disk). - The project is in the repo root. If it's in a subdirectory, it could be a workspace member. And if it's a workspace member, there could be sources defined in the workspace root. But we can't know without fetching the workspace root -- and we need the workspace in order to find the root... Closes #10568.
This commit is contained in:
parent
b2d06f01cc
commit
5ee4cf6ff5
|
|
@ -4972,6 +4972,7 @@ dependencies = [
|
||||||
"thiserror 2.0.11",
|
"thiserror 2.0.11",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tokio-util",
|
"tokio-util",
|
||||||
|
"toml",
|
||||||
"tracing",
|
"tracing",
|
||||||
"url",
|
"url",
|
||||||
"uv-cache",
|
"uv-cache",
|
||||||
|
|
|
||||||
|
|
@ -51,6 +51,7 @@ tempfile = { workspace = true }
|
||||||
thiserror = { workspace = true }
|
thiserror = { workspace = true }
|
||||||
tokio = { workspace = true }
|
tokio = { workspace = true }
|
||||||
tokio-util = { workspace = true, features = ["compat"] }
|
tokio-util = { workspace = true, features = ["compat"] }
|
||||||
|
toml = { workspace = true }
|
||||||
tracing = { workspace = true }
|
tracing = { workspace = true }
|
||||||
url = { workspace = true }
|
url = { workspace = true }
|
||||||
walkdir = { workspace = true }
|
walkdir = { workspace = true }
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,7 @@ use crate::source::revision::Revision;
|
||||||
use crate::{Reporter, RequiresDist};
|
use crate::{Reporter, RequiresDist};
|
||||||
use fs_err::tokio as fs;
|
use fs_err::tokio as fs;
|
||||||
use futures::{FutureExt, TryStreamExt};
|
use futures::{FutureExt, TryStreamExt};
|
||||||
use reqwest::Response;
|
use reqwest::{Response, StatusCode};
|
||||||
use tokio_util::compat::FuturesAsyncReadCompatExt;
|
use tokio_util::compat::FuturesAsyncReadCompatExt;
|
||||||
use tracing::{debug, info_span, instrument, warn, Instrument};
|
use tracing::{debug, info_span, instrument, warn, Instrument};
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
@ -40,12 +40,14 @@ use uv_distribution_types::{
|
||||||
};
|
};
|
||||||
use uv_extract::hash::Hasher;
|
use uv_extract::hash::Hasher;
|
||||||
use uv_fs::{rename_with_retry, write_atomic, LockedFile};
|
use uv_fs::{rename_with_retry, write_atomic, LockedFile};
|
||||||
|
use uv_git::{GitHubRepository, GitSha};
|
||||||
use uv_metadata::read_archive_metadata;
|
use uv_metadata::read_archive_metadata;
|
||||||
use uv_normalize::PackageName;
|
use uv_normalize::PackageName;
|
||||||
use uv_pep440::{release_specifiers_to_ranges, Version};
|
use uv_pep440::{release_specifiers_to_ranges, Version};
|
||||||
use uv_platform_tags::Tags;
|
use uv_platform_tags::Tags;
|
||||||
use uv_pypi_types::{HashAlgorithm, HashDigest, Metadata12, RequiresTxt, ResolutionMetadata};
|
use uv_pypi_types::{HashAlgorithm, HashDigest, Metadata12, RequiresTxt, ResolutionMetadata};
|
||||||
use uv_types::{BuildContext, BuildStack, SourceBuildTrait};
|
use uv_types::{BuildContext, BuildStack, SourceBuildTrait};
|
||||||
|
use uv_workspace::pyproject::ToolUvSources;
|
||||||
use zip::ZipArchive;
|
use zip::ZipArchive;
|
||||||
|
|
||||||
mod built_wheel_metadata;
|
mod built_wheel_metadata;
|
||||||
|
|
@ -1496,6 +1498,34 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
|
||||||
return Err(Error::HashesNotSupportedGit(source.to_string()));
|
return Err(Error::HashesNotSupportedGit(source.to_string()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If this is GitHub URL, attempt to resolve to a precise commit using the GitHub API.
|
||||||
|
if let Some(precise) = self
|
||||||
|
.build_context
|
||||||
|
.git()
|
||||||
|
.github_fast_path(
|
||||||
|
resource.git,
|
||||||
|
client.unmanaged.uncached_client(resource.url).clone(),
|
||||||
|
)
|
||||||
|
.await?
|
||||||
|
{
|
||||||
|
// There's no need to check the cache, since we can't use cached metadata if there are
|
||||||
|
// sources, and we can't know if there are sources without fetching the
|
||||||
|
// `pyproject.toml`.
|
||||||
|
//
|
||||||
|
// For the same reason, there's no need to write to the cache, since we won't be able to
|
||||||
|
// use it on subsequent runs.
|
||||||
|
if let Some(metadata) = self
|
||||||
|
.github_metadata(precise, source, resource, client)
|
||||||
|
.await?
|
||||||
|
{
|
||||||
|
debug!("Found static metadata via GitHub fast path for: {source}");
|
||||||
|
return Ok(ArchiveMetadata {
|
||||||
|
metadata: Metadata::from_metadata23(metadata),
|
||||||
|
hashes: vec![],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Fetch the Git repository.
|
// Fetch the Git repository.
|
||||||
let fetch = self
|
let fetch = self
|
||||||
.build_context
|
.build_context
|
||||||
|
|
@ -1698,38 +1728,139 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
|
||||||
source: &BuildableSource<'_>,
|
source: &BuildableSource<'_>,
|
||||||
client: &ManagedClient<'_>,
|
client: &ManagedClient<'_>,
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
match source {
|
let git = match source {
|
||||||
BuildableSource::Dist(SourceDist::Git(source)) => {
|
BuildableSource::Dist(SourceDist::Git(source)) => &*source.git,
|
||||||
self.build_context
|
BuildableSource::Url(SourceUrl::Git(source)) => source.git,
|
||||||
.git()
|
_ => {
|
||||||
.fetch(
|
return Ok(());
|
||||||
&source.git,
|
|
||||||
client.unmanaged.uncached_client(&source.url).clone(),
|
|
||||||
self.build_context.cache().bucket(CacheBucket::Git),
|
|
||||||
self.reporter
|
|
||||||
.clone()
|
|
||||||
.map(|reporter| reporter.into_git_reporter()),
|
|
||||||
)
|
|
||||||
.await?;
|
|
||||||
}
|
}
|
||||||
BuildableSource::Url(SourceUrl::Git(source)) => {
|
};
|
||||||
self.build_context
|
|
||||||
.git()
|
// If this is GitHub URL, attempt to resolve to a precise commit using the GitHub API.
|
||||||
.fetch(
|
if self
|
||||||
source.git,
|
.build_context
|
||||||
client.unmanaged.uncached_client(source.url).clone(),
|
.git()
|
||||||
self.build_context.cache().bucket(CacheBucket::Git),
|
.github_fast_path(
|
||||||
self.reporter
|
git,
|
||||||
.clone()
|
client.unmanaged.uncached_client(git.repository()).clone(),
|
||||||
.map(|reporter| reporter.into_git_reporter()),
|
)
|
||||||
)
|
.await?
|
||||||
.await?;
|
.is_some()
|
||||||
}
|
{
|
||||||
_ => {}
|
debug!("Resolved to precise commit via GitHub fast path: {source}");
|
||||||
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Otherwise, fetch the Git repository.
|
||||||
|
self.build_context
|
||||||
|
.git()
|
||||||
|
.fetch(
|
||||||
|
git,
|
||||||
|
client.unmanaged.uncached_client(git.repository()).clone(),
|
||||||
|
self.build_context.cache().bucket(CacheBucket::Git),
|
||||||
|
self.reporter
|
||||||
|
.clone()
|
||||||
|
.map(|reporter| reporter.into_git_reporter()),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Fetch static [`ResolutionMetadata`] from a GitHub repository, if possible.
|
||||||
|
///
|
||||||
|
/// Attempts to fetch the `pyproject.toml` from the resolved commit using the GitHub API.
|
||||||
|
async fn github_metadata(
|
||||||
|
&self,
|
||||||
|
commit: GitSha,
|
||||||
|
source: &BuildableSource<'_>,
|
||||||
|
resource: &GitSourceUrl<'_>,
|
||||||
|
client: &ManagedClient<'_>,
|
||||||
|
) -> Result<Option<ResolutionMetadata>, Error> {
|
||||||
|
let GitSourceUrl {
|
||||||
|
git, subdirectory, ..
|
||||||
|
} = resource;
|
||||||
|
|
||||||
|
// The fast path isn't available for subdirectories. If a `pyproject.toml` is in a
|
||||||
|
// subdirectory, it could be part of a workspace; and if it's part of a workspace, it could
|
||||||
|
// have `tool.uv.sources` entries that it inherits from the workspace root.
|
||||||
|
if subdirectory.is_some() {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
let Some(GitHubRepository { owner, repo }) = GitHubRepository::parse(git.repository())
|
||||||
|
else {
|
||||||
|
return Ok(None);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Fetch the `pyproject.toml` from the resolved commit.
|
||||||
|
let url =
|
||||||
|
format!("https://raw.githubusercontent.com/{owner}/{repo}/{commit}/pyproject.toml");
|
||||||
|
|
||||||
|
debug!("Attempting to fetch `pyproject.toml` from: {url}");
|
||||||
|
|
||||||
|
let content = client
|
||||||
|
.managed(|client| async {
|
||||||
|
let response = client
|
||||||
|
.uncached_client(git.repository())
|
||||||
|
.get(&url)
|
||||||
|
.send()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
// If the `pyproject.toml` does not exist, the GitHub API will return a 404.
|
||||||
|
if response.status() == StatusCode::NOT_FOUND {
|
||||||
|
return Ok::<Option<String>, Error>(None);
|
||||||
|
}
|
||||||
|
response.error_for_status_ref()?;
|
||||||
|
|
||||||
|
let content = response.text().await?;
|
||||||
|
Ok::<Option<String>, Error>(Some(content))
|
||||||
|
})
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let Some(content) = content else {
|
||||||
|
debug!("GitHub API returned a 404 for: {url}");
|
||||||
|
return Ok(None);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Parse the metadata.
|
||||||
|
let metadata = match ResolutionMetadata::parse_pyproject_toml(&content, source.version()) {
|
||||||
|
Ok(metadata) => metadata,
|
||||||
|
Err(
|
||||||
|
uv_pypi_types::MetadataError::Pep508Error(_)
|
||||||
|
| uv_pypi_types::MetadataError::DynamicField(_)
|
||||||
|
| uv_pypi_types::MetadataError::FieldNotFound(_)
|
||||||
|
| uv_pypi_types::MetadataError::PoetrySyntax,
|
||||||
|
) => {
|
||||||
|
debug!("Failed to extract static metadata from GitHub API for: {url}");
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
Err(err) => return Err(err.into()),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Determine whether the project has `tool.uv.sources`. If the project has sources, it must
|
||||||
|
// be lowered, which requires access to the workspace. For example, it could have workspace
|
||||||
|
// members that need to be translated to concrete paths on disk.
|
||||||
|
//
|
||||||
|
// TODO(charlie): We could still use the `pyproject.toml` if the sources are all `git` or
|
||||||
|
// `url` sources; this is only applicable to `workspace` and `path` sources. It's awkward,
|
||||||
|
// though, because we'd need to pass a path into the lowering routine, and that path would
|
||||||
|
// be incorrect (we'd just be relying on it not being used).
|
||||||
|
match has_sources(&content) {
|
||||||
|
Ok(false) => {}
|
||||||
|
Ok(true) => {
|
||||||
|
debug!("Skipping GitHub fast path; `pyproject.toml` has sources: {url}");
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
Err(err) => {
|
||||||
|
debug!("Failed to parse `tool.uv.sources` from GitHub API for: {url} ({err})");
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Some(metadata))
|
||||||
|
}
|
||||||
|
|
||||||
/// Heal a [`Revision`] for a local archive.
|
/// Heal a [`Revision`] for a local archive.
|
||||||
async fn heal_archive_revision(
|
async fn heal_archive_revision(
|
||||||
&self,
|
&self,
|
||||||
|
|
@ -2341,6 +2472,37 @@ impl StaticMetadata {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns `true` if a `pyproject.toml` has `tool.uv.sources`.
|
||||||
|
fn has_sources(content: &str) -> Result<bool, toml::de::Error> {
|
||||||
|
#[derive(serde::Deserialize)]
|
||||||
|
struct PyProjectToml {
|
||||||
|
tool: Option<Tool>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(serde::Deserialize)]
|
||||||
|
struct Tool {
|
||||||
|
uv: Option<ToolUv>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(serde::Deserialize)]
|
||||||
|
struct ToolUv {
|
||||||
|
sources: Option<ToolUvSources>,
|
||||||
|
}
|
||||||
|
|
||||||
|
let PyProjectToml { tool } = toml::from_str(content)?;
|
||||||
|
if let Some(tool) = tool {
|
||||||
|
if let Some(uv) = tool.uv {
|
||||||
|
if let Some(sources) = uv.sources {
|
||||||
|
if !sources.inner().is_empty() {
|
||||||
|
return Ok(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(false)
|
||||||
|
}
|
||||||
|
|
||||||
/// Validate that the source distribution matches the built metadata.
|
/// Validate that the source distribution matches the built metadata.
|
||||||
fn validate_metadata(
|
fn validate_metadata(
|
||||||
source: &BuildableSource<'_>,
|
source: &BuildableSource<'_>,
|
||||||
|
|
|
||||||
|
|
@ -102,7 +102,7 @@ impl GitReference {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Converts the [`GitReference`] to a `str` that can be used as a revision.
|
/// Converts the [`GitReference`] to a `str` that can be used as a revision.
|
||||||
pub(crate) fn as_rev(&self) -> &str {
|
pub fn as_rev(&self) -> &str {
|
||||||
match self {
|
match self {
|
||||||
Self::Tag(rev) => rev,
|
Self::Tag(rev) => rev,
|
||||||
Self::Branch(rev) => rev,
|
Self::Branch(rev) => rev,
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
use std::str::FromStr;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
@ -11,7 +12,7 @@ use reqwest_middleware::ClientWithMiddleware;
|
||||||
use uv_cache_key::{cache_digest, RepositoryUrl};
|
use uv_cache_key::{cache_digest, RepositoryUrl};
|
||||||
use uv_fs::LockedFile;
|
use uv_fs::LockedFile;
|
||||||
|
|
||||||
use crate::{Fetch, GitReference, GitSha, GitSource, GitUrl, Reporter};
|
use crate::{Fetch, GitHubRepository, GitReference, GitSha, GitSource, GitUrl, Reporter};
|
||||||
|
|
||||||
#[derive(Debug, thiserror::Error)]
|
#[derive(Debug, thiserror::Error)]
|
||||||
pub enum GitResolverError {
|
pub enum GitResolverError {
|
||||||
|
|
@ -21,6 +22,10 @@ pub enum GitResolverError {
|
||||||
Join(#[from] tokio::task::JoinError),
|
Join(#[from] tokio::task::JoinError),
|
||||||
#[error("Git operation failed")]
|
#[error("Git operation failed")]
|
||||||
Git(#[source] anyhow::Error),
|
Git(#[source] anyhow::Error),
|
||||||
|
#[error(transparent)]
|
||||||
|
Reqwest(#[from] reqwest::Error),
|
||||||
|
#[error(transparent)]
|
||||||
|
ReqwestMiddleware(#[from] reqwest_middleware::Error),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A resolver for Git repositories.
|
/// A resolver for Git repositories.
|
||||||
|
|
@ -38,6 +43,61 @@ impl GitResolver {
|
||||||
self.0.get(reference)
|
self.0.get(reference)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Resolve a Git URL to a specific commit without performing any Git operations.
|
||||||
|
///
|
||||||
|
/// Returns a [`GitSha`] if the URL has already been resolved (i.e., is available in the cache),
|
||||||
|
/// or if it can be fetched via the GitHub API. Otherwise, returns `None`.
|
||||||
|
pub async fn github_fast_path(
|
||||||
|
&self,
|
||||||
|
url: &GitUrl,
|
||||||
|
client: ClientWithMiddleware,
|
||||||
|
) -> Result<Option<GitSha>, GitResolverError> {
|
||||||
|
let reference = RepositoryReference::from(url);
|
||||||
|
|
||||||
|
// If we know the precise commit already, return it.
|
||||||
|
if let Some(precise) = self.get(&reference) {
|
||||||
|
return Ok(Some(*precise));
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the URL is a GitHub URL, attempt to resolve it via the GitHub API.
|
||||||
|
let Some(GitHubRepository { owner, repo }) = GitHubRepository::parse(url.repository())
|
||||||
|
else {
|
||||||
|
return Ok(None);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Determine the Git reference.
|
||||||
|
let rev = url.reference().as_rev();
|
||||||
|
|
||||||
|
let url = format!("https://api.github.com/repos/{owner}/{repo}/commits/{rev}");
|
||||||
|
|
||||||
|
debug!("Attempting GitHub fast path for: {url}");
|
||||||
|
let mut request = client.get(&url);
|
||||||
|
request = request.header("Accept", "application/vnd.github.3.sha");
|
||||||
|
request = request.header("User-Agent", "uv");
|
||||||
|
|
||||||
|
let response = request.send().await?;
|
||||||
|
if !response.status().is_success() {
|
||||||
|
// Returns a 404 if the repository does not exist, and a 422 if GitHub is unable to
|
||||||
|
// resolve the requested rev.
|
||||||
|
debug!(
|
||||||
|
"GitHub API request failed for: {url} ({})",
|
||||||
|
response.status()
|
||||||
|
);
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse the response as a Git SHA.
|
||||||
|
let precise = response.text().await?;
|
||||||
|
let precise =
|
||||||
|
GitSha::from_str(&precise).map_err(|err| GitResolverError::Git(err.into()))?;
|
||||||
|
|
||||||
|
// Insert the resolved URL into the in-memory cache. This ensures that subsequent fetches
|
||||||
|
// resolve to the same precise commit.
|
||||||
|
self.insert(reference, precise);
|
||||||
|
|
||||||
|
Ok(Some(precise))
|
||||||
|
}
|
||||||
|
|
||||||
/// Fetch a remote Git repository.
|
/// Fetch a remote Git repository.
|
||||||
pub async fn fetch(
|
pub async fn fetch(
|
||||||
&self,
|
&self,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue