Add parsed URL to `PubGrubPackage` (#3426)

Avoid reparsing urls by storing the parsed parts across resolution on
`PubGrubPackage`.

Part 1 of #3408
This commit is contained in:
konsti 2024-05-14 02:55:21 +02:00 committed by GitHub
parent 5132c6a6e2
commit 0010954ca7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 194 additions and 79 deletions

1
Cargo.lock generated
View File

@ -5036,6 +5036,7 @@ dependencies = [
"uv-client",
"uv-configuration",
"uv-distribution",
"uv-git",
"uv-interpreter",
"uv-normalize",
"uv-types",

View File

@ -341,19 +341,19 @@ impl Dist {
// TODO(konsti): We should carry the parsed URL through the codebase.
/// Create a [`Dist`] for a URL-based distribution.
pub fn from_url(name: PackageName, url: VerbatimUrl) -> Result<Self, Error> {
match Scheme::parse(url.scheme()) {
Some(Scheme::Http | Scheme::Https) => Self::from_http_url(name, url),
Some(Scheme::File) => Self::from_file_url(name, url, false),
Some(Scheme::GitSsh | Scheme::GitHttps) => Self::from_git_url(name, url),
pub fn from_url(name: PackageName, url: VerbatimParsedUrl) -> Result<Self, Error> {
match Scheme::parse(url.verbatim.scheme()) {
Some(Scheme::Http | Scheme::Https) => Self::from_http_url(name, url.verbatim),
Some(Scheme::File) => Self::from_file_url(name, url.verbatim, false),
Some(Scheme::GitSsh | Scheme::GitHttps) => Self::from_git_url(name, url.verbatim),
Some(Scheme::GitGit | Scheme::GitHttp) => Err(Error::UnsupportedScheme(
url.scheme().to_owned(),
url.verbatim().to_string(),
url.verbatim.scheme().to_owned(),
url.verbatim.verbatim().to_string(),
"insecure Git protocol; use `git+https` or `git+ssh` instead".to_string(),
)),
Some(Scheme::GitFile) => Err(Error::UnsupportedScheme(
url.scheme().to_owned(),
url.verbatim().to_string(),
url.verbatim.scheme().to_owned(),
url.verbatim.verbatim().to_string(),
"local Git protocol".to_string(),
)),
Some(
@ -365,8 +365,8 @@ impl Dist {
| Scheme::BzrLp
| Scheme::BzrFile,
) => Err(Error::UnsupportedScheme(
url.scheme().to_owned(),
url.verbatim().to_string(),
url.verbatim.scheme().to_owned(),
url.verbatim.verbatim().to_string(),
"Bazaar is not supported".to_string(),
)),
Some(
@ -376,8 +376,8 @@ impl Dist {
| Scheme::HgSsh
| Scheme::HgStaticHttp,
) => Err(Error::UnsupportedScheme(
url.scheme().to_owned(),
url.verbatim().to_string(),
url.verbatim.scheme().to_owned(),
url.verbatim.verbatim().to_string(),
"Mercurial is not supported".to_string(),
)),
Some(
@ -387,13 +387,13 @@ impl Dist {
| Scheme::SvnSvn
| Scheme::SvnFile,
) => Err(Error::UnsupportedScheme(
url.scheme().to_owned(),
url.verbatim().to_string(),
url.verbatim.scheme().to_owned(),
url.verbatim.verbatim().to_string(),
"Subversion is not supported".to_string(),
)),
None => Err(Error::UnsupportedScheme(
url.scheme().to_owned(),
url.verbatim().to_string(),
url.verbatim.scheme().to_owned(),
url.verbatim.verbatim().to_string(),
"unknown scheme".to_string(),
)),
}

View File

@ -4,6 +4,7 @@ use anyhow::{Error, Result};
use thiserror::Error;
use url::Url;
use pep508_rs::VerbatimUrl;
use uv_git::{GitSha, GitUrl};
#[derive(Debug, Error)]
@ -22,13 +23,19 @@ pub enum ParsedUrlError {
UrlParse(String, #[source] url::ParseError),
}
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct VerbatimParsedUrl {
pub parsed_url: ParsedUrl,
pub verbatim: VerbatimUrl,
}
/// We support three types of URLs for distributions:
/// * The path to a file or directory (`file://`)
/// * A Git repository (`git+https://` or `git+ssh://`), optionally with a subdirectory and/or
/// string to checkout.
/// * A remote archive (`https://`), optional with a subdirectory (source dist only)
/// A URL in a requirement `foo @ <url>` must be one of the above.
#[derive(Debug)]
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub enum ParsedUrl {
/// The direct URL is a path to a local directory or file.
LocalFile(ParsedLocalFileUrl),
@ -42,7 +49,7 @@ pub enum ParsedUrl {
///
/// Examples:
/// * `file:///home/ferris/my_project`
#[derive(Debug, Eq, PartialEq)]
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct ParsedLocalFileUrl {
pub url: Url,
pub path: PathBuf,
@ -54,7 +61,7 @@ pub struct ParsedLocalFileUrl {
/// Examples:
/// * `git+https://git.example.com/MyProject.git`
/// * `git+https://git.example.com/MyProject.git@v1.0#egg=pkg&subdirectory=pkg_dir`
#[derive(Debug, Eq, PartialEq)]
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct ParsedGitUrl {
pub url: GitUrl,
pub subdirectory: Option<PathBuf>,
@ -87,7 +94,7 @@ impl TryFrom<Url> for ParsedGitUrl {
/// * wheel: `https://download.pytorch.org/whl/torch-2.0.1-cp39-cp39-manylinux2014_aarch64.whl#sha256=423e0ae257b756bb45a4b49072046772d1ad0c592265c5080070e0767da4e490`
/// * source dist, correctly named: `https://files.pythonhosted.org/packages/62/06/d5604a70d160f6a6ca5fd2ba25597c24abd5c5ca5f437263d177ac242308/tqdm-4.66.1.tar.gz`
/// * source dist, only extension recognizable: `https://github.com/foo-labs/foo/archive/master.zip#egg=pkg&subdirectory=packages/bar`
#[derive(Debug, Eq, PartialEq)]
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct ParsedArchiveUrl {
pub url: Url,
pub subdirectory: Option<PathBuf>,

View File

@ -155,15 +155,11 @@ pub(crate) async fn resolve_precise(
///
/// This method will only return precise URLs for URLs that have already been resolved via
/// [`resolve_precise`].
pub fn to_precise(url: &Url) -> Option<Url> {
let ParsedGitUrl { url, subdirectory } = ParsedGitUrl::try_from(url.clone()).ok()?;
pub fn git_url_to_precise(url: GitUrl) -> Option<GitUrl> {
let resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
let reference = RepositoryReference::new(&url);
let precise = resolved_git_refs.get(&reference)?;
Some(Url::from(ParsedGitUrl {
url: url.with_precise(*precise),
subdirectory,
}))
Some(url.with_precise(*precise))
}
/// Returns `true` if the URLs refer to the same Git commit.

View File

@ -2,7 +2,7 @@ pub use archive::Archive;
pub use distribution_database::{DistributionDatabase, HttpArchivePointer, LocalArchivePointer};
pub use download::LocalWheel;
pub use error::Error;
pub use git::{is_same_reference, to_precise};
pub use git::{git_url_to_precise, is_same_reference};
pub use index::{BuiltWheelIndex, RegistryWheelIndex};
use pypi_types::{HashDigest, Metadata23};
pub use reporter::Reporter;

View File

@ -54,7 +54,7 @@ enum RefspecStrategy {
impl GitReference {
/// Creates a [`GitReference`] from an arbitrary revision string, which could represent a
/// branch, tag, commit, or named ref.
pub(crate) fn from_rev(rev: &str) -> Self {
pub fn from_rev(rev: &str) -> Self {
if rev.starts_with("refs/") {
Self::NamedRef(rev.to_owned())
} else if looks_like_commit_hash(rev) {

View File

@ -25,12 +25,13 @@ pypi-types = { workspace = true }
requirements-txt = { workspace = true }
uv-cache = { workspace = true }
uv-client = { workspace = true }
uv-configuration = { workspace = true }
uv-distribution = { workspace = true }
uv-git = { workspace = true }
uv-interpreter = { workspace = true }
uv-normalize = { workspace = true }
uv-types = { workspace = true }
uv-warnings = { workspace = true }
uv-configuration = { workspace = true }
anstream = { workspace = true }
anyhow = { workspace = true }

View File

@ -235,10 +235,10 @@ impl PubGrubRequirement {
));
};
if !Urls::is_allowed(expected, url) {
if !Urls::is_allowed(&expected.verbatim, url) {
return Err(ResolveError::ConflictingUrlsTransitive(
requirement.name.clone(),
expected.verbatim().to_string(),
expected.verbatim.verbatim().to_string(),
url.verbatim().to_string(),
));
}
@ -260,10 +260,10 @@ impl PubGrubRequirement {
));
};
if !Urls::is_allowed(expected, url) {
if !Urls::is_allowed(&expected.verbatim, url) {
return Err(ResolveError::ConflictingUrlsTransitive(
requirement.name.clone(),
expected.verbatim().to_string(),
expected.verbatim.verbatim().to_string(),
url.verbatim().to_string(),
));
}
@ -285,10 +285,10 @@ impl PubGrubRequirement {
));
};
if !Urls::is_allowed(expected, url) {
if !Urls::is_allowed(&expected.verbatim, url) {
return Err(ResolveError::ConflictingUrlsTransitive(
requirement.name.clone(),
expected.verbatim().to_string(),
expected.verbatim.verbatim().to_string(),
url.verbatim().to_string(),
));
}

View File

@ -1,12 +1,11 @@
use distribution_types::{DistributionMetadata, Name, VersionOrUrlRef};
use distribution_types::{DistributionMetadata, Name, VerbatimParsedUrl, VersionOrUrlRef};
use pep440_rs::Version;
use pep508_rs::VerbatimUrl;
use uv_normalize::PackageName;
#[derive(Debug)]
pub(crate) enum PubGrubDistribution<'a> {
Registry(&'a PackageName, &'a Version),
Url(&'a PackageName, &'a VerbatimUrl),
Url(&'a PackageName, &'a VerbatimParsedUrl),
}
impl<'a> PubGrubDistribution<'a> {
@ -14,7 +13,7 @@ impl<'a> PubGrubDistribution<'a> {
Self::Registry(name, version)
}
pub(crate) fn from_url(name: &'a PackageName, url: &'a VerbatimUrl) -> Self {
pub(crate) fn from_url(name: &'a PackageName, url: &'a VerbatimParsedUrl) -> Self {
Self::Url(name, url)
}
}
@ -32,7 +31,7 @@ impl DistributionMetadata for PubGrubDistribution<'_> {
fn version_or_url(&self) -> VersionOrUrlRef {
match self {
Self::Registry(_, version) => VersionOrUrlRef::Version(version),
Self::Url(_, url) => VersionOrUrlRef::Url(url),
Self::Url(_, url) => VersionOrUrlRef::Url(&url.verbatim),
}
}
}

View File

@ -1,6 +1,6 @@
use derivative::Derivative;
use pep508_rs::VerbatimUrl;
use distribution_types::VerbatimParsedUrl;
use uv_normalize::{ExtraName, PackageName};
use crate::resolver::Urls;
@ -59,7 +59,7 @@ pub enum PubGrubPackage {
/// we're going to have a dependency that's provided as a URL, we _need_ to visit the URL
/// version before the registry version. So we could just error if we visit a URL variant
/// _after_ a registry variant.
Option<VerbatimUrl>,
Option<VerbatimParsedUrl>,
),
/// A proxy package to represent a dependency with an extra (e.g., `black[colorama]`).
///
@ -74,7 +74,7 @@ pub enum PubGrubPackage {
/// the exact same version of the base variant. Without the proxy package, then when provided
/// requirements like `black==23.0.1` and `black[colorama]`, PubGrub may attempt to retrieve
/// metadata for `black[colorama]` versions other than `23.0.1`.
Extra(PackageName, ExtraName, Option<VerbatimUrl>),
Extra(PackageName, ExtraName, Option<VerbatimParsedUrl>),
}
impl PubGrubPackage {

View File

@ -1,10 +1,13 @@
use url::Url;
use distribution_types::{ParsedGitUrl, ParsedUrl, VerbatimParsedUrl};
use pep508_rs::VerbatimUrl;
use uv_distribution::git_url_to_precise;
use uv_git::{GitReference, GitUrl};
/// Given a [`VerbatimUrl`] and a redirect, apply the redirect to the URL while preserving as much
/// of the verbatim representation as possible.
pub(crate) fn apply_redirect(url: &VerbatimUrl, redirect: Url) -> VerbatimUrl {
fn apply_redirect(url: &VerbatimUrl, redirect: Url) -> VerbatimUrl {
let redirect = VerbatimUrl::from_url(redirect);
// The redirect should be the "same" URL, but with a specific commit hash added after the `@`.
@ -36,9 +39,53 @@ pub(crate) fn apply_redirect(url: &VerbatimUrl, redirect: Url) -> VerbatimUrl {
redirect
}
pub(crate) fn url_to_precise(url: VerbatimParsedUrl) -> VerbatimParsedUrl {
let ParsedUrl::Git(ParsedGitUrl {
url: git_url,
subdirectory,
}) = url.parsed_url.clone()
else {
return url;
};
// TODO(konsti): Remove once we carry more context on the `Dist`s.
let lowered_git_ref = git_url
.reference()
.as_str()
.map_or(GitReference::DefaultBranch, |rev| {
GitReference::from_rev(rev)
});
let git_url = GitUrl::new(git_url.repository().clone(), lowered_git_ref);
let Some(new_git_url) = git_url_to_precise(git_url.clone()) else {
debug_assert!(
matches!(git_url.reference(), GitReference::FullCommit(_)),
"Unseen git url: {}, {:?}",
url.verbatim,
git_url
);
return url;
};
let new_parsed_url = ParsedGitUrl {
url: new_git_url,
subdirectory,
};
let new_url = Url::from(new_parsed_url.clone());
let new_verbatim_url = apply_redirect(&url.verbatim, new_url);
VerbatimParsedUrl {
parsed_url: ParsedUrl::Git(new_parsed_url),
verbatim: new_verbatim_url,
}
}
#[cfg(test)]
mod tests {
use super::*;
use url::Url;
use pep508_rs::VerbatimUrl;
use crate::redirect::apply_redirect;
#[test]
fn test_apply_redirect() -> Result<(), url::ParseError> {

View File

@ -21,7 +21,6 @@ use once_map::OnceMap;
use pep440_rs::Version;
use pep508_rs::MarkerEnvironment;
use pypi_types::HashDigest;
use uv_distribution::to_precise;
use uv_normalize::{ExtraName, PackageName};
use crate::dependency_provider::UvDependencyProvider;
@ -30,7 +29,7 @@ use crate::lock::{self, Lock, LockError};
use crate::pins::FilePins;
use crate::preferences::Preferences;
use crate::pubgrub::{PubGrubDistribution, PubGrubPackage};
use crate::redirect::apply_redirect;
use crate::redirect::url_to_precise;
use crate::resolver::{InMemoryIndex, MetadataResponse, VersionsResponse};
use crate::{Manifest, ResolveError};
@ -129,9 +128,7 @@ impl ResolutionGraph {
{
Dist::from_editable(package_name.clone(), editable.clone())?
} else {
let url = to_precise(url)
.map_or_else(|| url.clone(), |precise| apply_redirect(url, precise));
Dist::from_url(package_name.clone(), url)?
Dist::from_url(package_name.clone(), url_to_precise(url.clone()))?
};
// Add its hashes to the index, preserving those that were already present in
@ -249,11 +246,8 @@ impl ResolutionGraph {
.or_insert_with(Vec::new)
.push(extra.clone());
} else {
let url = to_precise(url).map_or_else(
|| url.clone(),
|precise| apply_redirect(url, precise),
);
let pinned_package = Dist::from_url(package_name.clone(), url)?;
let pinned_package =
Dist::from_url(package_name.clone(), url_to_precise(url.clone()))?;
diagnostics.push(Diagnostic::MissingExtra {
dist: pinned_package.into(),

View File

@ -612,7 +612,7 @@ impl<'a, Provider: ResolverProvider, InstalledPackages: InstalledPackagesProvide
}
PubGrubPackage::Package(name, _extra, Some(url)) => {
// Verify that the package is allowed under the hash-checking policy.
if !self.hasher.allows_url(url) {
if !self.hasher.allows_url(&url.verbatim) {
return Err(ResolveError::UnhashedPackage(name.clone()));
}
@ -685,7 +685,10 @@ impl<'a, Provider: ResolverProvider, InstalledPackages: InstalledPackagesProvide
PubGrubPackage::Extra(package_name, _, Some(url))
| PubGrubPackage::Package(package_name, _, Some(url)) => {
debug!("Searching for a compatible version of {package} @ {url} ({range})");
debug!(
"Searching for a compatible version of {package} @ {} ({range})",
url.verbatim
);
// If the dist is an editable, return the version from the editable metadata.
if let Some((_local, metadata, _)) = self.editables.get(package_name) {
@ -1373,7 +1376,7 @@ impl<'a, Provider: ResolverProvider, InstalledPackages: InstalledPackagesProvide
PubGrubPackage::Python(_) => {}
PubGrubPackage::Extra(_, _, _) => {}
PubGrubPackage::Package(package_name, _extra, Some(url)) => {
reporter.on_progress(package_name, &VersionOrUrlRef::Url(url));
reporter.on_progress(package_name, &VersionOrUrlRef::Url(&url.verbatim));
}
PubGrubPackage::Package(package_name, _extra, None) => {
reporter.on_progress(package_name, &VersionOrUrlRef::Version(version));

View File

@ -1,16 +1,20 @@
use rustc_hash::FxHashMap;
use tracing::debug;
use distribution_types::{RequirementSource, Verbatim};
use distribution_types::{
ParsedArchiveUrl, ParsedGitUrl, ParsedLocalFileUrl, ParsedUrl, RequirementSource, Verbatim,
VerbatimParsedUrl,
};
use pep508_rs::{MarkerEnvironment, VerbatimUrl};
use uv_distribution::is_same_reference;
use uv_git::GitUrl;
use uv_normalize::PackageName;
use crate::{DependencyMode, Manifest, ResolveError};
/// A map of package names to their associated, required URLs.
#[derive(Debug, Default)]
pub(crate) struct Urls(FxHashMap<PackageName, VerbatimUrl>);
pub(crate) struct Urls(FxHashMap<PackageName, VerbatimParsedUrl>);
impl Urls {
pub(crate) fn from_manifest(
@ -18,19 +22,30 @@ impl Urls {
markers: Option<&MarkerEnvironment>,
dependencies: DependencyMode,
) -> Result<Self, ResolveError> {
let mut urls: FxHashMap<PackageName, VerbatimUrl> = FxHashMap::default();
let mut urls: FxHashMap<PackageName, VerbatimParsedUrl> = FxHashMap::default();
// Add the editables themselves to the list of required URLs.
for (editable, metadata, _) in &manifest.editables {
if let Some(previous) = urls.insert(metadata.name.clone(), editable.url.clone()) {
if !is_equal(&previous, &editable.url) {
if is_same_reference(&previous, &editable.url) {
debug!("Allowing {} as a variant of {previous}", editable.url);
let editable_url = VerbatimParsedUrl {
parsed_url: ParsedUrl::LocalFile(ParsedLocalFileUrl {
url: editable.url.to_url(),
path: editable.path.clone(),
editable: true,
}),
verbatim: editable.url.clone(),
};
if let Some(previous) = urls.insert(metadata.name.clone(), editable_url.clone()) {
if !is_equal(&previous.verbatim, &editable_url.verbatim) {
if is_same_reference(&previous.verbatim, &editable_url.verbatim) {
debug!(
"Allowing {} as a variant of {}",
editable_url.verbatim, previous.verbatim
);
} else {
return Err(ResolveError::ConflictingUrlsDirect(
metadata.name.clone(),
previous.verbatim().to_string(),
editable.verbatim().to_string(),
previous.verbatim.verbatim().to_string(),
editable_url.verbatim.verbatim().to_string(),
));
}
}
@ -41,27 +56,76 @@ impl Urls {
for requirement in manifest.requirements(markers, dependencies) {
match &requirement.source {
RequirementSource::Registry { .. } => {}
RequirementSource::Url { url, .. } | RequirementSource::Path { url, .. } => {
RequirementSource::Url {
subdirectory,
location,
url,
} => {
let url = VerbatimParsedUrl {
parsed_url: ParsedUrl::Archive(ParsedArchiveUrl {
url: location.clone(),
subdirectory: subdirectory.clone(),
}),
verbatim: url.clone(),
};
if let Some(previous) = urls.insert(requirement.name.clone(), url.clone()) {
if !is_equal(&previous, url) {
if !is_equal(&previous.verbatim, &url.verbatim) {
return Err(ResolveError::ConflictingUrlsDirect(
requirement.name.clone(),
previous.verbatim().to_string(),
url.verbatim().to_string(),
previous.verbatim.verbatim().to_string(),
url.verbatim.verbatim().to_string(),
));
}
}
}
RequirementSource::Git { url, .. } => {
RequirementSource::Path {
path,
editable,
url,
} => {
let url = VerbatimParsedUrl {
parsed_url: ParsedUrl::LocalFile(ParsedLocalFileUrl {
url: url.to_url(),
path: path.clone(),
editable: (*editable).unwrap_or_default(),
}),
verbatim: url.clone(),
};
if let Some(previous) = urls.insert(requirement.name.clone(), url.clone()) {
if !is_equal(&previous, url) {
if is_same_reference(&previous, url) {
debug!("Allowing {url} as a variant of {previous}");
if !is_equal(&previous.verbatim, &url.verbatim) {
return Err(ResolveError::ConflictingUrlsDirect(
requirement.name.clone(),
previous.verbatim.verbatim().to_string(),
url.verbatim.verbatim().to_string(),
));
}
}
}
RequirementSource::Git {
repository,
reference,
subdirectory,
url,
} => {
let url = VerbatimParsedUrl {
parsed_url: ParsedUrl::Git(ParsedGitUrl {
url: GitUrl::new(repository.clone(), reference.clone()),
subdirectory: subdirectory.clone(),
}),
verbatim: url.clone(),
};
if let Some(previous) = urls.insert(requirement.name.clone(), url.clone()) {
if !is_equal(&previous.verbatim, &url.verbatim) {
if is_same_reference(&previous.verbatim, &url.verbatim) {
debug!(
"Allowing {} as a variant of {}",
&url.verbatim, previous.verbatim
);
} else {
return Err(ResolveError::ConflictingUrlsDirect(
requirement.name.clone(),
previous.verbatim().to_string(),
url.verbatim().to_string(),
previous.verbatim.verbatim().to_string(),
url.verbatim.verbatim().to_string(),
));
}
}
@ -74,7 +138,7 @@ impl Urls {
}
/// Return the [`VerbatimUrl`] associated with the given package name, if any.
pub(crate) fn get(&self, package: &PackageName) -> Option<&VerbatimUrl> {
pub(crate) fn get(&self, package: &PackageName) -> Option<&VerbatimParsedUrl> {
self.0.get(package)
}

View File

@ -3592,6 +3592,7 @@ fn compile_html() -> Result<()> {
.arg("https://download.pytorch.org/whl")
.env("VIRTUAL_ENV", context.venv.as_os_str())
.env("UV_NO_WRAP", "1")
.env("UV_STACK_SIZE", (2 * 1024 * 1024).to_string())
.current_dir(context.temp_dir.path()), @r###"
success: true
exit_code: 0

View File

@ -315,6 +315,7 @@ fn link() -> Result<()> {
.arg(context1.cache_dir.path())
.env("VIRTUAL_ENV", context2.venv.as_os_str())
.env("UV_NO_WRAP", "1")
.env("UV_STACK_SIZE", (2 * 1024 * 1024).to_string())
.current_dir(&context2.temp_dir);
if cfg!(all(windows, debug_assertions)) {

View File

@ -54,6 +54,7 @@ impl VenvTestContext {
.arg(EXCLUDE_NEWER)
.env("UV_TEST_PYTHON_PATH", self.python_path.clone())
.env("UV_NO_WRAP", "1")
.env("UV_STACK_SIZE", (2 * 1024 * 1024).to_string())
.current_dir(self.temp_dir.path());
command
}