Add a fast-path to skip resolution when installation is complete (#613)

For a very large resolution (a few hundred packages), I see 13ms vs.
400ms for a no-op. It's worth optimizing this case, in my opinion.
This commit is contained in:
Charlie Marsh 2023-12-12 12:43:12 -05:00 committed by GitHub
parent 3aaab32a9d
commit 4fb2e0955e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 104 additions and 12 deletions

View File

@ -1,11 +1,12 @@
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::str::FromStr; use std::str::FromStr;
use anyhow::{anyhow, Result}; use anyhow::{anyhow, Context, Result};
use fs_err as fs;
use pep440_rs::Version; use pep440_rs::Version;
use puffin_normalize::PackageName; use puffin_normalize::PackageName;
use pypi_types::DirectUrl; use pypi_types::{DirectUrl, Metadata21};
use crate::{Metadata, VersionOrUrl}; use crate::{Metadata, VersionOrUrl};
@ -114,6 +115,7 @@ impl InstalledDist {
} }
} }
/// Return the [`Version`] of the distribution.
pub fn version(&self) -> &Version { pub fn version(&self) -> &Version {
match self { match self {
Self::Registry(dist) => &dist.version, Self::Registry(dist) => &dist.version,
@ -130,4 +132,12 @@ impl InstalledDist {
let direct_url = serde_json::from_reader::<fs_err::File, DirectUrl>(file)?; let direct_url = serde_json::from_reader::<fs_err::File, DirectUrl>(file)?;
Ok(Some(direct_url)) Ok(Some(direct_url))
} }
/// Read the `METADATA` file from a `.dist-info` directory.
pub fn metadata(&self) -> Result<Metadata21> {
let path = self.path().join("METADATA");
let contents = fs::read(&path)?;
Metadata21::parse(&contents)
.with_context(|| format!("Failed to parse METADATA file at: {}", path.display()))
}
} }

View File

@ -42,7 +42,7 @@ pub(crate) async fn pip_install(
no_build: bool, no_build: bool,
exclude_newer: Option<DateTime<Utc>>, exclude_newer: Option<DateTime<Utc>>,
cache: Cache, cache: Cache,
printer: Printer, mut printer: Printer,
) -> Result<ExitStatus> { ) -> Result<ExitStatus> {
miette::set_hook(Box::new(|_| { miette::set_hook(Box::new(|_| {
Box::new( Box::new(
@ -54,6 +54,8 @@ pub(crate) async fn pip_install(
) )
}))?; }))?;
let start = std::time::Instant::now();
// Determine the requirements. // Determine the requirements.
let spec = specification(requirements, constraints, extras)?; let spec = specification(requirements, constraints, extras)?;
@ -65,6 +67,23 @@ pub(crate) async fn pip_install(
venv.python_executable().display() venv.python_executable().display()
); );
// If the requirements are already satisfied, we're done. Ideally, the resolver would be fast
// enough to let us remove this check. But right now, for large environments, it's an order of
// magnitude faster to validate the environment than to resolve the requirements.
if reinstall.is_none() && satisfied(&spec, &venv)? {
writeln!(
printer,
"{}",
format!(
"Audited {} in {}",
format!("{} package{}", spec.requirements.len(), "s").bold(),
elapsed(start.elapsed())
)
.dimmed()
)?;
return Ok(ExitStatus::Success);
}
// Resolve the requirements. // Resolve the requirements.
let resolution = resolve( let resolution = resolve(
spec, spec,
@ -140,6 +159,11 @@ fn specification(
Ok(spec) Ok(spec)
} }
/// Returns `true` if the requirements are already satisfied.
fn satisfied(spec: &RequirementsSpecification, venv: &Virtualenv) -> Result<bool> {
SitePackages::try_from_executable(venv)?.satisfies(&spec.requirements, &spec.constraints)
}
/// Resolve a set of requirements, similar to running `pip-compile`. /// Resolve a set of requirements, similar to running `pip-compile`.
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
async fn resolve( async fn resolve(

View File

@ -161,8 +161,7 @@ fn install_requirements_txt() -> Result<()> {
----- stdout ----- ----- stdout -----
----- stderr ----- ----- stderr -----
Resolved 2 packages in [TIME] Audited 1 packages in [TIME]
Audited 2 packages in [TIME]
"###); "###);
}); });
@ -239,8 +238,7 @@ fn respect_installed() -> Result<()> {
----- stdout ----- ----- stdout -----
----- stderr ----- ----- stderr -----
Resolved 7 packages in [TIME] Audited 1 packages in [TIME]
Audited 7 packages in [TIME]
"###); "###);
}); });

View File

@ -307,4 +307,9 @@ impl Reinstall {
Self::None Self::None
} }
} }
/// Returns `true` if no packages should be reinstalled.
pub fn is_none(&self) -> bool {
matches!(self, Self::None)
}
} }

View File

@ -1,14 +1,15 @@
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::hash::BuildHasherDefault;
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use fs_err as fs; use fs_err as fs;
use rustc_hash::FxHashSet;
use distribution_types::{InstalledDist, Metadata, VersionOrUrl}; use distribution_types::{InstalledDist, Metadata, VersionOrUrl};
use pep440_rs::Version; use pep440_rs::Version;
use pep508_rs::Requirement; use pep508_rs::Requirement;
use puffin_interpreter::Virtualenv; use puffin_interpreter::Virtualenv;
use puffin_normalize::PackageName; use puffin_normalize::PackageName;
use pypi_types::Metadata21;
#[derive(Debug)] #[derive(Debug)]
pub struct SitePackages<'a> { pub struct SitePackages<'a> {
@ -92,10 +93,7 @@ impl<'a> SitePackages<'a> {
for (package, distribution) in &self.index { for (package, distribution) in &self.index {
// Determine the dependencies for the given package. // Determine the dependencies for the given package.
let path = distribution.path().join("METADATA"); let metadata = distribution.metadata()?;
let contents = fs::read(&path)?;
let metadata = Metadata21::parse(&contents)
.with_context(|| format!("Failed to parse METADATA file at: {}", path.display()))?;
// Verify that the dependencies are installed. // Verify that the dependencies are installed.
for requirement in &metadata.requires_dist { for requirement in &metadata.requires_dist {
@ -130,6 +128,63 @@ impl<'a> SitePackages<'a> {
Ok(diagnostics) Ok(diagnostics)
} }
/// Returns `true` if the installed packages satisfy the given requirements.
pub fn satisfies(
&self,
requirements: &[Requirement],
constraints: &[Requirement],
) -> Result<bool> {
let mut requirements = requirements.to_vec();
let mut seen =
FxHashSet::with_capacity_and_hasher(requirements.len(), BuildHasherDefault::default());
while let Some(requirement) = requirements.pop() {
if !requirement.evaluate_markers(self.venv.interpreter().markers(), &[]) {
continue;
}
let Some(distribution) = self.index.get(&requirement.name) else {
// The package isn't installed.
return Ok(false);
};
// Validate that the installed version matches the requirement.
match &requirement.version_or_url {
None | Some(pep508_rs::VersionOrUrl::Url(_)) => {}
Some(pep508_rs::VersionOrUrl::VersionSpecifier(version_specifier)) => {
// The installed version doesn't satisfy the requirement.
if !version_specifier.contains(distribution.version()) {
return Ok(false);
}
}
}
// Validate that the installed version satisfies the constraints.
for constraint in constraints {
if !constraint.evaluate_markers(self.venv.interpreter().markers(), &[]) {
continue;
}
match &constraint.version_or_url {
None | Some(pep508_rs::VersionOrUrl::Url(_)) => {}
Some(pep508_rs::VersionOrUrl::VersionSpecifier(version_specifier)) => {
// The installed version doesn't satisfy the constraint.
if !version_specifier.contains(distribution.version()) {
return Ok(false);
}
}
}
}
// Recurse into the dependencies.
if seen.insert(requirement) {
requirements.extend(distribution.metadata()?.requires_dist);
}
}
Ok(true)
}
} }
impl IntoIterator for SitePackages<'_> { impl IntoIterator for SitePackages<'_> {