diff --git a/crates/uv-resolver/src/lock.rs b/crates/uv-resolver/src/lock.rs index eeba08d01..f08e6a581 100644 --- a/crates/uv-resolver/src/lock.rs +++ b/crates/uv-resolver/src/lock.rs @@ -1,27 +1,133 @@ // Temporarily allowed because this module is still in a state of flux // as we build out universal locking. -#![allow(dead_code, unreachable_code)] +#![allow(dead_code, unreachable_code, unused_variables)] +use std::collections::VecDeque; + +use distribution_filename::WheelFilename; use distribution_types::{ - BuiltDist, DirectUrlBuiltDist, DirectUrlSourceDist, Dist, DistributionMetadata, GitSourceDist, - IndexUrl, Name, PathBuiltDist, PathSourceDist, RegistryBuiltDist, RegistrySourceDist, - ResolvedDist, ToUrlError, VersionOrUrl, + BuiltDist, DirectUrlBuiltDist, DirectUrlSourceDist, Dist, DistributionMetadata, FileLocation, + GitSourceDist, IndexUrl, Name, PathBuiltDist, PathSourceDist, RegistryBuiltDist, + RegistrySourceDist, Resolution, ResolvedDist, ToUrlError, VersionOrUrl, }; use pep440_rs::Version; +use pep508_rs::{MarkerEnvironment, VerbatimUrl}; +use platform_tags::{TagCompatibility, TagPriority, Tags}; use pypi_types::HashDigest; +use rustc_hash::FxHashMap; use url::Url; +use uv_normalize::PackageName; #[derive(Clone, Debug)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] +#[cfg_attr(feature = "serde", serde(into = "LockWire", try_from = "LockWire"))] pub struct Lock { + version: u32, + distributions: Vec, + /// A map from distribution ID to index in `distributions`. + /// + /// This can be used to quickly lookup the full distribution for any ID + /// in this lock. For example, the dependencies for each distribution are + /// listed as distributions IDs. This map can be used to find the full + /// distribution for each such dependency. + /// + /// It is guaranteed that every distribution in this lock has an entry in + /// this map, and that every dependency for every distribution has an ID + /// that exists in this map. That is, there are no dependencies that don't + /// have a corresponding locked distribution entry in the same lock file. + by_id: FxHashMap, +} + +impl Lock { + pub(crate) fn new(distributions: Vec) -> Result { + let wire = LockWire { + version: 1, + distributions, + }; + Lock::try_from(wire) + } + + pub fn to_resolution( + &self, + marker_env: &MarkerEnvironment, + tags: &Tags, + root_name: &PackageName, + ) -> Resolution { + let root = self + .find_by_name(root_name) + // TODO: In the future, we should derive the root distribution + // from the pyproject.toml, but I don't think the infrastructure + // for that is in place yet. For now, we ask the caller to specify + // the root package name explicitly, and we assume here that it is + // correct. + .expect("found too many distributions matching root") + .expect("could not find root"); + let mut queue: VecDeque<&Distribution> = VecDeque::new(); + queue.push_back(root); + + let mut map = FxHashMap::default(); + while let Some(dist) = queue.pop_front() { + for dep in &dist.dependencies { + let dep_dist = self.find_by_id(&dep.id); + queue.push_back(dep_dist); + } + let name = PackageName::new(dist.id.name.to_string()).unwrap(); + let resolved_dist = ResolvedDist::Installable(dist.to_dist(marker_env, tags)); + map.insert(name, resolved_dist); + } + Resolution::new(map) + } + + /// Returns the distribution with the given name. If there are multiple + /// matching distributions, then an error is returned. If there are no + /// matching distributions, then `Ok(None)` is returned. + fn find_by_name(&self, name: &PackageName) -> Result, String> { + let mut found_dist = None; + for dist in &self.distributions { + if &dist.id.name == name { + if found_dist.is_some() { + return Err(format!("found multiple distributions matching `{name}`")); + } + found_dist = Some(dist); + } + } + Ok(found_dist) + } + + fn find_by_id(&self, id: &DistributionId) -> &Distribution { + let index = *self.by_id.get(id).expect("locked distribution for ID"); + let dist = self + .distributions + .get(index) + .expect("valid index for distribution"); + dist + } +} + +#[derive(Clone, Debug)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] +struct LockWire { version: u32, #[cfg_attr(feature = "serde", serde(rename = "distribution"))] distributions: Vec, } -impl Lock { - pub(crate) fn new(mut distributions: Vec) -> Result { - for dist in &mut distributions { +impl From for LockWire { + fn from(lock: Lock) -> LockWire { + LockWire { + version: lock.version, + distributions: lock.distributions, + } + } +} + +impl TryFrom for Lock { + type Error = LockError; + + fn try_from(mut wire: LockWire) -> Result { + // Put all dependencies for each distribution in a canonical order and + // check for duplicates. + for dist in &mut wire.distributions { dist.dependencies.sort(); for windows in dist.dependencies.windows(2) { let (dep1, dep2) = (&windows[0], &windows[1]); @@ -33,16 +139,34 @@ impl Lock { } } } - distributions.sort_by(|dist1, dist2| dist1.id.cmp(&dist2.id)); - for window in distributions.windows(2) { - let (dist1, dist2) = (&window[0], &window[1]); - if dist1.id == dist2.id { - return Err(LockError::duplicate_distribution(dist1.id.clone())); + wire.distributions + .sort_by(|dist1, dist2| dist1.id.cmp(&dist2.id)); + + // Check for duplicate distribution IDs and also build up the map for + // distributions keyed by their ID. + let mut by_id = FxHashMap::default(); + for (i, dist) in wire.distributions.iter().enumerate() { + if by_id.insert(dist.id.clone(), i).is_some() { + return Err(LockError::duplicate_distribution(dist.id.clone())); + } + } + // Check that every dependency has an entry in `by_id`. If any don't, + // it implies we somehow have a dependency with no corresponding locked + // distribution. + for dist in &wire.distributions { + for dep in &dist.dependencies { + if !by_id.contains_key(&dep.id) { + return Err(LockError::unrecognized_dependency( + dist.id.clone(), + dep.id.clone(), + )); + } } } Ok(Lock { - version: 1, - distributions, + version: wire.version, + distributions: wire.distributions, + by_id, }) } } @@ -52,14 +176,19 @@ impl Lock { pub(crate) struct Distribution { #[cfg_attr(feature = "serde", serde(flatten))] pub(crate) id: DistributionId, + #[cfg_attr(feature = "serde", serde(default))] pub(crate) marker: Option, + #[cfg_attr(feature = "serde", serde(default))] pub(crate) sourcedist: Option, #[cfg_attr( feature = "serde", - serde(rename = "wheel", skip_serializing_if = "Vec::is_empty") + serde(default, rename = "wheel", skip_serializing_if = "Vec::is_empty") )] pub(crate) wheels: Vec, - #[cfg_attr(feature = "serde", serde(skip_serializing_if = "Vec::is_empty"))] + #[cfg_attr( + feature = "serde", + serde(default, skip_serializing_if = "Vec::is_empty") + )] pub(crate) dependencies: Vec, } @@ -90,19 +219,73 @@ impl Distribution { self.dependencies .push(Dependency::from_resolved_dist(resolved_dist)); } + + fn to_dist(&self, _marker_env: &MarkerEnvironment, tags: &Tags) -> Dist { + if let Some(wheel) = self.find_best_wheel(tags) { + return match self.id.source.kind { + SourceKind::Registry => { + let filename: WheelFilename = wheel.filename.clone(); + let file = Box::new(distribution_types::File { + dist_info_metadata: false, + filename: filename.to_string(), + hashes: vec![], + requires_python: None, + size: None, + upload_time_utc_ms: None, + url: FileLocation::AbsoluteUrl(wheel.url.to_string()), + yanked: None, + }); + let index = IndexUrl::Pypi(VerbatimUrl::from_url(self.id.source.url.clone())); + let reg_dist = RegistryBuiltDist { + filename, + file, + index, + }; + let built_dist = BuiltDist::Registry(reg_dist); + Dist::Built(built_dist) + } + // TODO: Handle other kinds of sources. + _ => todo!(), + }; + } + // TODO: Handle source dists. + + // TODO: Convert this to a deserialization error. + panic!("invalid lock distribution") + } + + fn find_best_wheel(&self, tags: &Tags) -> Option<&Wheel> { + let mut best: Option<(TagPriority, &Wheel)> = None; + for wheel in &self.wheels { + let TagCompatibility::Compatible(priority) = wheel.filename.compatibility(tags) else { + continue; + }; + match best { + None => { + best = Some((priority, wheel)); + } + Some((best_priority, _)) => { + if priority > best_priority { + best = Some((priority, wheel)); + } + } + } + } + best.map(|(_, wheel)| wheel) + } } -#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord)] +#[derive(Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] pub(crate) struct DistributionId { - pub(crate) name: String, + pub(crate) name: PackageName, pub(crate) version: Version, pub(crate) source: Source, } impl DistributionId { fn from_resolved_dist(resolved_dist: &ResolvedDist) -> DistributionId { - let name = resolved_dist.name().to_string(); + let name = resolved_dist.name().clone(); let version = match resolved_dist.version_or_url() { VersionOrUrl::Version(v) => v.clone(), // TODO: We need a way to thread the version number for these @@ -125,7 +308,7 @@ impl std::fmt::Display for DistributionId { } } -#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord)] +#[derive(Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)] pub(crate) struct Source { kind: SourceKind, url: Url, @@ -298,7 +481,7 @@ impl<'de> serde::Deserialize<'de> for Source { /// variants should be added without changing the relative ordering of other /// variants. Otherwise, this could cause the lock file to have a different /// canonical ordering of distributions. -#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord)] +#[derive(Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] #[cfg_attr(feature = "serde", serde(rename_all = "kebab-case"))] pub(crate) enum SourceKind { @@ -323,7 +506,7 @@ impl SourceKind { /// variants should be added without changing the relative ordering of other /// variants. Otherwise, this could cause the lock file to have a different /// canonical ordering of distributions. -#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord)] +#[derive(Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] pub(crate) struct GitSource { precise: Option, @@ -353,7 +536,7 @@ impl GitSource { } } -#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord)] +#[derive(Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] enum GitSourceKind { Tag(String), @@ -451,6 +634,7 @@ impl SourceDist { /// Inspired by: #[derive(Clone, Debug)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] +#[cfg_attr(feature = "serde", serde(into = "WheelWire", try_from = "WheelWire"))] pub(crate) struct Wheel { /// A URL or file path (via `file://`) where the wheel that was locked /// against was found. The location does not need to exist in the future, @@ -459,13 +643,13 @@ pub(crate) struct Wheel { url: Url, /// A hash of the source distribution. hash: Hash, - // THOUGHT: Would it be better to include a more structured representation - // of the wheel's filename in the lock file itself? e.g., All of the wheel - // tags. This would avoid needing to parse the wheel tags out of the URL, - // which is a potentially fallible operation. But, I think it is nice to - // have just the URL which is more succinct and doesn't result in encoding - // the same information twice. Probably the best thing to do here is to add - // the wheel tags fields here, but don't serialize them. + /// The filename of the wheel. + /// + /// This isn't part of the wire format since it's redundant with the + /// URL. But we do use it for various things, and thus compute it at + /// deserialization time. Not being able to extract a wheel filename from a + /// wheel URL is thus a deserialization error. + filename: WheelFilename, } impl Wheel { @@ -496,13 +680,18 @@ impl Wheel { fn from_registry_dist(reg_dist: &RegistryBuiltDist) -> Result { // FIXME: Is it guaranteed that there is at least one hash? // If not, we probably need to make this fallible. + let filename = reg_dist.filename.clone(); let url = reg_dist .file .url .to_url() .map_err(LockError::invalid_file_url)?; let hash = Hash::from(reg_dist.file.hashes[0].clone()); - Ok(Wheel { url, hash }) + Ok(Wheel { + url, + hash, + filename, + }) } fn from_direct_dist(direct_dist: &DirectUrlBuiltDist) -> Wheel { @@ -510,6 +699,7 @@ impl Wheel { url: direct_dist.url.to_url(), // TODO: We want a hash for the artifact at the URL. hash: todo!(), + filename: direct_dist.filename.clone(), } } @@ -518,10 +708,53 @@ impl Wheel { url: path_dist.url.to_url(), // TODO: We want a hash for the artifact at the URL. hash: todo!(), + filename: path_dist.filename.clone(), } } } +#[derive(Clone, Debug)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] +struct WheelWire { + /// A URL or file path (via `file://`) where the wheel that was locked + /// against was found. The location does not need to exist in the future, + /// so this should be treated as only a hint to where to look and/or + /// recording where the wheel file originally came from. + url: Url, + /// A hash of the source distribution. + hash: Hash, +} + +impl From for WheelWire { + fn from(wheel: Wheel) -> WheelWire { + WheelWire { + url: wheel.url, + hash: wheel.hash, + } + } +} + +impl TryFrom for Wheel { + type Error = String; + + fn try_from(wire: WheelWire) -> Result { + let path_segments = wire + .url + .path_segments() + .ok_or_else(|| format!("could not extract path from URL `{}`", wire.url))?; + // This is guaranteed by the contract of Url::path_segments. + let last = path_segments.last().expect("path segments is non-empty"); + let filename = last + .parse::() + .map_err(|err| format!("failed to parse `{last}` as wheel filename: {err}"))?; + Ok(Wheel { + url: wire.url, + hash: wire.hash, + filename, + }) + } +} + /// A single dependency of a distribution in a lock file. #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] @@ -626,6 +859,14 @@ impl LockError { kind: Box::new(kind), } } + + fn unrecognized_dependency(id: DistributionId, dependency_id: DistributionId) -> LockError { + let err = UnrecognizedDependencyError { id, dependency_id }; + let kind = LockErrorKind::UnrecognizedDependency { err }; + LockError { + kind: Box::new(kind), + } + } } impl std::error::Error for LockError { @@ -634,6 +875,7 @@ impl std::error::Error for LockError { LockErrorKind::DuplicateDistribution { .. } => None, LockErrorKind::DuplicateDependency { .. } => None, LockErrorKind::InvalidFileUrl { ref err } => Some(err), + LockErrorKind::UnrecognizedDependency { ref err } => Some(err), } } } @@ -656,6 +898,9 @@ impl std::fmt::Display for LockError { LockErrorKind::InvalidFileUrl { .. } => { write!(f, "failed to parse wheel or source dist URL") } + LockErrorKind::UnrecognizedDependency { .. } => { + write!(f, "found unrecognized dependency") + } } } } @@ -684,6 +929,40 @@ enum LockErrorKind { /// errant URL in its error message. err: ToUrlError, }, + /// An error that occurs when the caller provides a distribution with a + /// dependency that doesn't correspond to any other distribution in the + /// lock file. + UnrecognizedDependency { + /// The actual error. + err: UnrecognizedDependencyError, + }, +} + +/// An error that occurs when there's an unrecognized dependency. +/// +/// That is, a dependency for a distribution that isn't in the lock file. +#[derive(Clone, Debug, Eq, PartialEq)] +pub(crate) struct UnrecognizedDependencyError { + /// The ID of the distribution that has an unrecognized dependency. + id: DistributionId, + /// The ID of the dependency that doesn't have a corresponding distribution + /// entry. + dependency_id: DistributionId, +} + +impl std::error::Error for UnrecognizedDependencyError {} + +impl std::fmt::Display for UnrecognizedDependencyError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + let UnrecognizedDependencyError { + ref id, + ref dependency_id, + } = *self; + write!( + f, + "found dependency `{dependency_id}` for `{id}` with no locked distribution" + ) + } } /// An error that occurs when a source string could not be parsed. diff --git a/crates/uv/src/cli.rs b/crates/uv/src/cli.rs index 2ea9d46ca..4a3372c05 100644 --- a/crates/uv/src/cli.rs +++ b/crates/uv/src/cli.rs @@ -1329,6 +1329,9 @@ pub(crate) struct PipInstallArgs { /// print the resulting plan. #[arg(long)] pub(crate) dry_run: bool, + + #[arg(long, hide = true)] + pub(crate) unstable_uv_lock_file: Option, } #[derive(Args)] diff --git a/crates/uv/src/commands/pip_install.rs b/crates/uv/src/commands/pip_install.rs index a9b7bcfac..5fab569f7 100644 --- a/crates/uv/src/commands/pip_install.rs +++ b/crates/uv/src/commands/pip_install.rs @@ -4,6 +4,7 @@ use std::path::Path; use anstream::eprint; use anyhow::{anyhow, Context, Result}; +use fs_err as fs; use itertools::Itertools; use owo_colors::OwoColorize; use tempfile::tempdir_in; @@ -40,7 +41,7 @@ use uv_requirements::{ RequirementsSpecification, SourceTreeResolver, }; use uv_resolver::{ - DependencyMode, ExcludeNewer, Exclusions, FlatIndex, InMemoryIndex, Manifest, Options, + DependencyMode, ExcludeNewer, Exclusions, FlatIndex, InMemoryIndex, Lock, Manifest, Options, OptionsBuilder, PreReleaseMode, Preference, ResolutionGraph, ResolutionMode, Resolver, }; use uv_types::{BuildIsolation, HashStrategy, InFlight}; @@ -84,6 +85,7 @@ pub(crate) async fn pip_install( system: bool, break_system_packages: bool, target: Option, + uv_lock: Option, native_tls: bool, cache: Cache, dry_run: bool, @@ -329,47 +331,6 @@ pub(crate) async fn pip_install( ) .with_options(OptionsBuilder::new().exclude_newer(exclude_newer).build()); - // Resolve the requirements from the provided sources. - let requirements = { - // Convert from unnamed to named requirements. - let mut requirements = NamedRequirementsResolver::new( - requirements, - &hasher, - &resolve_dispatch, - &client, - &index, - ) - .with_reporter(ResolverReporter::from(printer)) - .resolve() - .await?; - - // Resolve any source trees into requirements. - if !source_trees.is_empty() { - requirements.extend( - SourceTreeResolver::new( - source_trees, - extras, - &hasher, - &resolve_dispatch, - &client, - &index, - ) - .with_reporter(ResolverReporter::from(printer)) - .resolve() - .await?, - ); - } - - requirements - }; - - // Resolve the overrides from the provided sources. - let overrides = - NamedRequirementsResolver::new(overrides, &hasher, &resolve_dispatch, &client, &index) - .with_reporter(ResolverReporter::from(printer)) - .resolve() - .await?; - // Build all editable distributions. The editables are shared between resolution and // installation, and should live for the duration of the command. If an editable is already // installed in the environment, we'll still re-build it here. @@ -392,45 +353,93 @@ pub(crate) async fn pip_install( .await? }; - let options = OptionsBuilder::new() - .resolution_mode(resolution_mode) - .prerelease_mode(prerelease_mode) - .dependency_mode(dependency_mode) - .exclude_newer(exclude_newer) - .index_strategy(index_strategy) - .build(); - // Resolve the requirements. - let resolution = match resolve( - requirements, - constraints, - overrides, - project, - &editables, - &hasher, - &site_packages, - &reinstall, - &upgrade, - &interpreter, - &tags, - &markers, - &client, - &flat_index, - &index, - &resolve_dispatch, - options, - printer, - ) - .await - { - Ok(resolution) => Resolution::from(resolution), - Err(Error::Resolve(uv_resolver::ResolveError::NoSolution(err))) => { - let report = miette::Report::msg(format!("{err}")) - .context("No solution found when resolving dependencies:"); - eprint!("{report:?}"); - return Ok(ExitStatus::Failure); + let resolution = if let Some(ref root) = uv_lock { + let root = PackageName::new(root.to_string())?; + let encoded = fs::tokio::read_to_string("uv.lock").await?; + let lock: Lock = toml::from_str(&encoded)?; + lock.to_resolution(&markers, &tags, &root) + } else { + // Resolve the requirements from the provided sources. + let requirements = { + // Convert from unnamed to named requirements. + let mut requirements = NamedRequirementsResolver::new( + requirements, + &hasher, + &resolve_dispatch, + &client, + &index, + ) + .with_reporter(ResolverReporter::from(printer)) + .resolve() + .await?; + + // Resolve any source trees into requirements. + if !source_trees.is_empty() { + requirements.extend( + SourceTreeResolver::new( + source_trees, + extras, + &hasher, + &resolve_dispatch, + &client, + &index, + ) + .with_reporter(ResolverReporter::from(printer)) + .resolve() + .await?, + ); + } + + requirements + }; + + // Resolve the overrides from the provided sources. + let overrides = + NamedRequirementsResolver::new(overrides, &hasher, &resolve_dispatch, &client, &index) + .with_reporter(ResolverReporter::from(printer)) + .resolve() + .await?; + + let options = OptionsBuilder::new() + .resolution_mode(resolution_mode) + .prerelease_mode(prerelease_mode) + .dependency_mode(dependency_mode) + .exclude_newer(exclude_newer) + .index_strategy(index_strategy) + .build(); + + match resolve( + requirements, + constraints, + overrides, + project, + &editables, + &hasher, + &site_packages, + &reinstall, + &upgrade, + &interpreter, + &tags, + &markers, + &client, + &flat_index, + &index, + &resolve_dispatch, + options, + printer, + ) + .await + { + Ok(resolution) => Resolution::from(resolution), + Err(Error::Resolve(uv_resolver::ResolveError::NoSolution(err))) => { + let report = miette::Report::msg(format!("{err}")) + .context("No solution found when resolving dependencies:"); + eprint!("{report:?}"); + return Ok(ExitStatus::Failure); + } + Err(err) => return Err(err.into()), } - Err(err) => return Err(err.into()), }; // Re-initialize the in-flight map. diff --git a/crates/uv/src/main.rs b/crates/uv/src/main.rs index 8cd50bb50..a210a8658 100644 --- a/crates/uv/src/main.rs +++ b/crates/uv/src/main.rs @@ -339,6 +339,7 @@ async fn run() -> Result { args.shared.system, args.shared.break_system_packages, args.shared.target, + args.uv_lock, globals.native_tls, cache, args.dry_run, diff --git a/crates/uv/src/settings.rs b/crates/uv/src/settings.rs index fb7562a55..4a771b0ae 100644 --- a/crates/uv/src/settings.rs +++ b/crates/uv/src/settings.rs @@ -390,6 +390,7 @@ pub(crate) struct PipInstallSettings { pub(crate) reinstall: Reinstall, pub(crate) refresh: Refresh, pub(crate) dry_run: bool, + pub(crate) uv_lock: Option, // Shared settings. pub(crate) shared: PipSharedSettings, } @@ -454,6 +455,7 @@ impl PipInstallSettings { no_strict, exclude_newer, dry_run, + unstable_uv_lock_file, } = args; Self { @@ -470,6 +472,7 @@ impl PipInstallSettings { reinstall: Reinstall::from_args(flag(reinstall, no_reinstall), reinstall_package), refresh: Refresh::from_args(flag(refresh, no_refresh), refresh_package), dry_run, + uv_lock: unstable_uv_lock_file, // Shared settings. shared: PipSharedSettings::combine(