From 4fb2e0955ee7e4a46813198d67e0ca667eae45db Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Tue, 12 Dec 2023 12:43:12 -0500 Subject: [PATCH] Add a fast-path to skip resolution when installation is complete (#613) For a very large resolution (a few hundred packages), I see 13ms vs. 400ms for a no-op. It's worth optimizing this case, in my opinion. --- crates/distribution-types/src/installed.rs | 14 +++- crates/puffin-cli/src/commands/pip_install.rs | 26 +++++++- crates/puffin-cli/tests/pip_install.rs | 6 +- crates/puffin-installer/src/plan.rs | 5 ++ crates/puffin-installer/src/site_packages.rs | 65 +++++++++++++++++-- 5 files changed, 104 insertions(+), 12 deletions(-) diff --git a/crates/distribution-types/src/installed.rs b/crates/distribution-types/src/installed.rs index 0d65c31c8..ed470723e 100644 --- a/crates/distribution-types/src/installed.rs +++ b/crates/distribution-types/src/installed.rs @@ -1,11 +1,12 @@ use std::path::{Path, PathBuf}; use std::str::FromStr; -use anyhow::{anyhow, Result}; +use anyhow::{anyhow, Context, Result}; +use fs_err as fs; use pep440_rs::Version; use puffin_normalize::PackageName; -use pypi_types::DirectUrl; +use pypi_types::{DirectUrl, Metadata21}; use crate::{Metadata, VersionOrUrl}; @@ -114,6 +115,7 @@ impl InstalledDist { } } + /// Return the [`Version`] of the distribution. pub fn version(&self) -> &Version { match self { Self::Registry(dist) => &dist.version, @@ -130,4 +132,12 @@ impl InstalledDist { let direct_url = serde_json::from_reader::(file)?; Ok(Some(direct_url)) } + + /// Read the `METADATA` file from a `.dist-info` directory. + pub fn metadata(&self) -> Result { + let path = self.path().join("METADATA"); + let contents = fs::read(&path)?; + Metadata21::parse(&contents) + .with_context(|| format!("Failed to parse METADATA file at: {}", path.display())) + } } diff --git a/crates/puffin-cli/src/commands/pip_install.rs b/crates/puffin-cli/src/commands/pip_install.rs index eb26ad646..27016a352 100644 --- a/crates/puffin-cli/src/commands/pip_install.rs +++ b/crates/puffin-cli/src/commands/pip_install.rs @@ -42,7 +42,7 @@ pub(crate) async fn pip_install( no_build: bool, exclude_newer: Option>, cache: Cache, - printer: Printer, + mut printer: Printer, ) -> Result { miette::set_hook(Box::new(|_| { Box::new( @@ -54,6 +54,8 @@ pub(crate) async fn pip_install( ) }))?; + let start = std::time::Instant::now(); + // Determine the requirements. let spec = specification(requirements, constraints, extras)?; @@ -65,6 +67,23 @@ pub(crate) async fn pip_install( venv.python_executable().display() ); + // If the requirements are already satisfied, we're done. Ideally, the resolver would be fast + // enough to let us remove this check. But right now, for large environments, it's an order of + // magnitude faster to validate the environment than to resolve the requirements. + if reinstall.is_none() && satisfied(&spec, &venv)? { + writeln!( + printer, + "{}", + format!( + "Audited {} in {}", + format!("{} package{}", spec.requirements.len(), "s").bold(), + elapsed(start.elapsed()) + ) + .dimmed() + )?; + return Ok(ExitStatus::Success); + } + // Resolve the requirements. let resolution = resolve( spec, @@ -140,6 +159,11 @@ fn specification( Ok(spec) } +/// Returns `true` if the requirements are already satisfied. +fn satisfied(spec: &RequirementsSpecification, venv: &Virtualenv) -> Result { + SitePackages::try_from_executable(venv)?.satisfies(&spec.requirements, &spec.constraints) +} + /// Resolve a set of requirements, similar to running `pip-compile`. #[allow(clippy::too_many_arguments)] async fn resolve( diff --git a/crates/puffin-cli/tests/pip_install.rs b/crates/puffin-cli/tests/pip_install.rs index 85908516d..83abd7e55 100644 --- a/crates/puffin-cli/tests/pip_install.rs +++ b/crates/puffin-cli/tests/pip_install.rs @@ -161,8 +161,7 @@ fn install_requirements_txt() -> Result<()> { ----- stdout ----- ----- stderr ----- - Resolved 2 packages in [TIME] - Audited 2 packages in [TIME] + Audited 1 packages in [TIME] "###); }); @@ -239,8 +238,7 @@ fn respect_installed() -> Result<()> { ----- stdout ----- ----- stderr ----- - Resolved 7 packages in [TIME] - Audited 7 packages in [TIME] + Audited 1 packages in [TIME] "###); }); diff --git a/crates/puffin-installer/src/plan.rs b/crates/puffin-installer/src/plan.rs index a7096ddc9..a3db22f71 100644 --- a/crates/puffin-installer/src/plan.rs +++ b/crates/puffin-installer/src/plan.rs @@ -307,4 +307,9 @@ impl Reinstall { Self::None } } + + /// Returns `true` if no packages should be reinstalled. + pub fn is_none(&self) -> bool { + matches!(self, Self::None) + } } diff --git a/crates/puffin-installer/src/site_packages.rs b/crates/puffin-installer/src/site_packages.rs index 2659f3e40..da89f90e6 100644 --- a/crates/puffin-installer/src/site_packages.rs +++ b/crates/puffin-installer/src/site_packages.rs @@ -1,14 +1,15 @@ use std::collections::BTreeMap; +use std::hash::BuildHasherDefault; use anyhow::{Context, Result}; use fs_err as fs; +use rustc_hash::FxHashSet; use distribution_types::{InstalledDist, Metadata, VersionOrUrl}; use pep440_rs::Version; use pep508_rs::Requirement; use puffin_interpreter::Virtualenv; use puffin_normalize::PackageName; -use pypi_types::Metadata21; #[derive(Debug)] pub struct SitePackages<'a> { @@ -92,10 +93,7 @@ impl<'a> SitePackages<'a> { for (package, distribution) in &self.index { // Determine the dependencies for the given package. - let path = distribution.path().join("METADATA"); - let contents = fs::read(&path)?; - let metadata = Metadata21::parse(&contents) - .with_context(|| format!("Failed to parse METADATA file at: {}", path.display()))?; + let metadata = distribution.metadata()?; // Verify that the dependencies are installed. for requirement in &metadata.requires_dist { @@ -130,6 +128,63 @@ impl<'a> SitePackages<'a> { Ok(diagnostics) } + + /// Returns `true` if the installed packages satisfy the given requirements. + pub fn satisfies( + &self, + requirements: &[Requirement], + constraints: &[Requirement], + ) -> Result { + let mut requirements = requirements.to_vec(); + let mut seen = + FxHashSet::with_capacity_and_hasher(requirements.len(), BuildHasherDefault::default()); + + while let Some(requirement) = requirements.pop() { + if !requirement.evaluate_markers(self.venv.interpreter().markers(), &[]) { + continue; + } + + let Some(distribution) = self.index.get(&requirement.name) else { + // The package isn't installed. + return Ok(false); + }; + + // Validate that the installed version matches the requirement. + match &requirement.version_or_url { + None | Some(pep508_rs::VersionOrUrl::Url(_)) => {} + Some(pep508_rs::VersionOrUrl::VersionSpecifier(version_specifier)) => { + // The installed version doesn't satisfy the requirement. + if !version_specifier.contains(distribution.version()) { + return Ok(false); + } + } + } + + // Validate that the installed version satisfies the constraints. + for constraint in constraints { + if !constraint.evaluate_markers(self.venv.interpreter().markers(), &[]) { + continue; + } + + match &constraint.version_or_url { + None | Some(pep508_rs::VersionOrUrl::Url(_)) => {} + Some(pep508_rs::VersionOrUrl::VersionSpecifier(version_specifier)) => { + // The installed version doesn't satisfy the constraint. + if !version_specifier.contains(distribution.version()) { + return Ok(false); + } + } + } + } + + // Recurse into the dependencies. + if seen.insert(requirement) { + requirements.extend(distribution.metadata()?.requires_dist); + } + } + + Ok(true) + } } impl IntoIterator for SitePackages<'_> {