Invalidate interpreter marker cache (#529)

In a refactor, we lost the cache invalidation behavior for interpreter
markers, leading to stale interpreter errors for me when creating
environments with different Python versions. Specifically, the
modification timestamp used to be part of the _cache key_ when we used
`cacache`. Now it's not -- but it's stored within the cache. So we need
to validate the key after-the-fact.
This commit is contained in:
Charlie Marsh 2023-12-03 17:44:43 -05:00 committed by GitHub
parent ee2fca3a48
commit 2613382747
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 33 additions and 25 deletions

View File

@ -35,7 +35,7 @@ static MISSING_HEADER_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(
r".*\.(c|c..|h|h..):\d+:\d+: fatal error: (?<header>.*\.(h|h..)): No such file or directory"
)
.unwrap()
.unwrap()
});
#[derive(Error, Debug)]
@ -133,6 +133,7 @@ pub struct PyProjectToml {
}
/// `[build-backend]` from pyproject.toml
#[derive(Debug)]
struct Pep517Backend {
/// The build backend string such as `setuptools.build_meta:__legacy__` or `maturin` from
/// `build-backend.backend` in pyproject.toml
@ -252,14 +253,15 @@ impl SourceBuild {
let venv = gourgeist::create_venv(&temp_dir.path().join(".venv"), interpreter)?;
// There are packages such as DTLSSocket 0.1.16 that say
// There are packages such as DTLSSocket 0.1.16 that say:
// ```toml
// [build-system]
// requires = ["Cython<3", "setuptools", "wheel"]
// ```
// In this case we need to install requires PEP 517 style but then call setup.py in the
// legacy way
let requirements = if let Some(build_system) = &build_system {
// In this case, we need to install requires PEP 517 style but then call setup.py in the
// legacy way.
let requirements = if let Some(build_system) = build_system.as_ref() {
// .filter(|build_system| build_system.build_backend.is_some()) {
let resolved_requirements = build_context
.resolve(&build_system.requires)
.await
@ -324,13 +326,11 @@ impl SourceBuild {
source_dist,
)
.await?;
} else {
if !source_tree.join("setup.py").is_file() {
return Err(Error::InvalidSourceDist(
"The archive contains neither a pyproject.toml or a setup.py at the top level"
.to_string(),
));
}
} else if !source_tree.join("setup.py").is_file() {
return Err(Error::InvalidSourceDist(
"The archive contains neither a `pyproject.toml` nor a `setup.py` file at the top level"
.to_string(),
));
}
Ok(Self {
@ -381,7 +381,7 @@ impl SourceBuild {
return Err(Error::from_command_output(
"Build backend failed to determine metadata through `prepare_metadata_for_build_wheel`".to_string(),
&output,
&self.package_id));
&self.package_id));
}
let message = output
.stdout
@ -445,7 +445,7 @@ impl SourceBuild {
"Expected exactly wheel in `dist/` after invoking setup.py, found {dist_dir:?}"
),
&output,
&self.package_id));
&self.package_id));
};
// TODO(konstin): Faster copy such as reflink? Or maybe don't really let the user pick the target dir
let wheel = wheel_dir.join(dist_wheel.file_name());
@ -598,8 +598,9 @@ async fn create_pep517_build_environment(
}
Ok(())
}
/// Returns the directory with the `pyproject.toml`/`setup.py`
#[instrument(skip_all, fields(sdist = ?sdist.file_name().unwrap_or(sdist.as_os_str())))]
#[instrument(skip_all, fields(sdist = ? sdist.file_name().unwrap_or(sdist.as_os_str())))]
fn extract_archive(sdist: &Path, extracted: &PathBuf) -> Result<PathBuf, Error> {
if sdist
.extension()

View File

@ -103,7 +103,7 @@ impl Interpreter {
}
}
#[derive(Deserialize, Serialize, Clone)]
#[derive(Debug, Deserialize, Serialize, Clone)]
pub(crate) struct InterpreterQueryResult {
pub(crate) markers: MarkerEnvironment,
pub(crate) base_exec_prefix: PathBuf,
@ -166,24 +166,31 @@ impl InterpreterQueryResult {
let cache_dir = cache.bucket(CacheBucket::Interpreter);
let cache_path = cache_dir.join(format!("{}.json", digest(&executable_bytes)));
let modified = fs_err::metadata(executable)?
// Note: This is infallible on windows and unix (i.e., all platforms we support).
.modified()?
.duration_since(UNIX_EPOCH)
.map_err(|err| Error::SystemTime(executable.to_path_buf(), err))?;
// Read from the cache.
if let Ok(data) = fs::read(&cache_path) {
if let Ok(cached) = serde_json::from_slice::<CachedByTimestamp<Self>>(&data) {
debug!("Using cached markers for {}", executable.display());
return Ok(cached.data);
if cached.timestamp == modified.as_millis() {
debug!("Using cached markers for: {}", executable.display());
return Ok(cached.data);
}
debug!(
"Ignoring stale cached markers for: {}",
executable.display()
);
}
}
// Otherwise, run the Python script.
debug!("Detecting markers for {}", executable.display());
debug!("Detecting markers for: {}", executable.display());
let info = Self::query(executable)?;
let modified = fs_err::metadata(executable)?
// Note: This is infallible on windows and unix (i.e. all platforms we support)
.modified()?
.duration_since(UNIX_EPOCH)
.map_err(|err| Error::SystemTime(executable.to_path_buf(), err))?;
// If `executable` is a pyenv shim, a bash script that redirects to the activated
// python executable at another path, we're not allowed to cache the interpreter info
if executable == info.sys_executable {