Allow environment variables to be included in cache keys (#10170)

## Summary

Closes https://github.com/astral-sh/uv/issues/8130.
This commit is contained in:
Charlie Marsh 2024-12-26 10:31:49 -05:00 committed by GitHub
parent e6126ce0dc
commit 0b5c0220b5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 125 additions and 8 deletions

View File

@ -1,11 +1,13 @@
use crate::git_info::{Commit, Tags};
use crate::timestamp::Timestamp;
use std::cmp::max;
use std::collections::BTreeMap;
use std::path::{Path, PathBuf};
use serde::Deserialize;
use std::cmp::max;
use std::path::{Path, PathBuf};
use tracing::{debug, warn};
use crate::git_info::{Commit, Tags};
use crate::timestamp::Timestamp;
#[derive(Debug, thiserror::Error)]
pub enum CacheInfoError {
#[error("Failed to parse glob patterns for `cache-keys`: {0}")]
@ -28,6 +30,9 @@ pub struct CacheInfo {
commit: Option<Commit>,
/// The Git tags present at the time of the build.
tags: Option<Tags>,
/// Environment variables to include in the cache key.
#[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
env: BTreeMap<String, Option<String>>,
}
impl CacheInfo {
@ -54,6 +59,7 @@ impl CacheInfo {
let mut commit = None;
let mut tags = None;
let mut timestamp = None;
let mut env = BTreeMap::new();
// Read the cache keys.
let cache_keys =
@ -81,7 +87,7 @@ impl CacheInfo {
// Incorporate timestamps from any direct filepaths.
let mut globs = vec![];
for cache_key in &cache_keys {
for cache_key in cache_keys {
match cache_key {
CacheKey::Path(file) | CacheKey::File { file } => {
if file.chars().any(|c| matches!(c, '*' | '?' | '[' | '{')) {
@ -91,7 +97,7 @@ impl CacheInfo {
}
// Treat the path as a file.
let path = directory.join(file);
let path = directory.join(&file);
let metadata = match path.metadata() {
Ok(metadata) => metadata,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
@ -142,6 +148,10 @@ impl CacheInfo {
CacheKey::Git {
git: GitPattern::Bool(false),
} => {}
CacheKey::Environment { env: var } => {
let value = std::env::var(&var).ok();
env.insert(var, value);
}
}
}
@ -180,6 +190,7 @@ impl CacheInfo {
timestamp,
commit,
tags,
env,
})
}
@ -194,8 +205,12 @@ impl CacheInfo {
})
}
/// Returns `true` if the cache info is empty.
pub fn is_empty(&self) -> bool {
self.timestamp.is_none() && self.commit.is_none() && self.tags.is_none()
self.timestamp.is_none()
&& self.commit.is_none()
&& self.tags.is_none()
&& self.env.is_empty()
}
}
@ -228,6 +243,8 @@ pub enum CacheKey {
File { file: String },
/// Ex) `{ git = true }` or `{ git = { commit = true, tags = false } }`
Git { git: GitPattern },
/// Ex) `{ env = "UV_CACHE_INFO" }`
Environment { env: String },
}
#[derive(Debug, Clone, serde::Deserialize)]

View File

@ -79,6 +79,11 @@ pub struct Options {
/// to include the current Git commit hash in the cache key (in addition to the
/// `pyproject.toml`). Git tags are also supported via `cache-keys = [{ git = { commit = true, tags = true } }]`.
///
/// Cache keys can also include environment variables. For example, if a project relies on
/// `MACOSX_DEPLOYMENT_TARGET` or other environment variables to determine its behavior, you can
/// specify `cache-keys = [{ env = "MACOSX_DEPLOYMENT_TARGET" }]` to invalidate the cache
/// whenever the environment variable changes.
///
/// Cache keys only affect the project defined by the `pyproject.toml` in which they're
/// specified (as opposed to, e.g., affecting all members in a workspace), and all paths and
/// globs are interpreted as relative to the project directory.

View File

@ -3826,6 +3826,75 @@ fn invalidate_path_on_commit() -> Result<()> {
Ok(())
}
#[test]
fn invalidate_path_on_env_var() -> Result<()> {
let context = TestContext::new("3.12");
// Create a local package.
context.temp_dir.child("pyproject.toml").write_str(
r#"[project]
name = "example"
version = "0.0.0"
dependencies = ["anyio==4.0.0"]
requires-python = ">=3.8"
[tool.uv]
cache-keys = [{ env = "FOO" }]
"#,
)?;
// Install the package.
uv_snapshot!(context.filters(), context.pip_install()
.arg(".")
.env_remove("FOO"), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 4 packages in [TIME]
Prepared 4 packages in [TIME]
Installed 4 packages in [TIME]
+ anyio==4.0.0
+ example==0.0.0 (from file://[TEMP_DIR]/)
+ idna==3.6
+ sniffio==1.3.1
"###
);
// Installing again should be a no-op.
uv_snapshot!(context.filters(), context.pip_install()
.arg(".")
.env_remove("FOO"), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Audited 1 package in [TIME]
"###
);
// Installing again should update the package.
uv_snapshot!(context.filters(), context.pip_install()
.arg(".")
.env("FOO", "BAR"), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 4 packages in [TIME]
Prepared 1 package in [TIME]
Uninstalled 1 package in [TIME]
Installed 1 package in [TIME]
~ example==0.0.0 (from file://[TEMP_DIR]/)
"###
);
Ok(())
}
/// Install from a direct path (wheel) with changed versions in the file name.
#[test]
fn path_name_version_change() {

View File

@ -75,6 +75,14 @@ cache-keys = [{ file = "**/*.toml" }]
The use of globs can be expensive, as uv may need to walk the filesystem to determine whether any files have changed.
This may, in turn, requiring traversal of large or deeply nested directories.
Similarly, if a project relies on an environment variable, you can add the following to the
project's `pyproject.toml` to invalidate the cache whenever the environment variable changes:
```toml title="pyproject.toml"
[tool.uv]
cache-keys = [{ env = "MY_ENV_VAR" }]
```
As an escape hatch, if a project uses `dynamic` metadata that isn't covered by `tool.uv.cache-keys`,
you can instruct uv to _always_ rebuild and reinstall it by adding the project to the
`tool.uv.reinstall-package` list:

View File

@ -433,6 +433,11 @@ Cache keys can also include version control information. For example, if a proje
to include the current Git commit hash in the cache key (in addition to the
`pyproject.toml`). Git tags are also supported via `cache-keys = [{ git = { commit = true, tags = true } }]`.
Cache keys can also include environment variables. For example, if a project relies on
`MACOSX_DEPLOYMENT_TARGET` or other environment variables to determine its behavior, you can
specify `cache-keys = [{ env = "MACOSX_DEPLOYMENT_TARGET" }]` to invalidate the cache
whenever the environment variable changes.
Cache keys only affect the project defined by the `pyproject.toml` in which they're
specified (as opposed to, e.g., affecting all members in a workspace), and all paths and
globs are interpreted as relative to the project directory.

15
uv.schema.json generated
View File

@ -22,7 +22,7 @@
]
},
"cache-keys": {
"description": "The keys to consider when caching builds for the project.\n\nCache keys enable you to specify the files or directories that should trigger a rebuild when modified. By default, uv will rebuild a project whenever the `pyproject.toml`, `setup.py`, or `setup.cfg` files in the project directory are modified, i.e.:\n\n```toml cache-keys = [{ file = \"pyproject.toml\" }, { file = \"setup.py\" }, { file = \"setup.cfg\" }] ```\n\nAs an example: if a project uses dynamic metadata to read its dependencies from a `requirements.txt` file, you can specify `cache-keys = [{ file = \"requirements.txt\" }, { file = \"pyproject.toml\" }]` to ensure that the project is rebuilt whenever the `requirements.txt` file is modified (in addition to watching the `pyproject.toml`).\n\nGlobs are supported, following the syntax of the [`glob`](https://docs.rs/glob/0.3.1/glob/struct.Pattern.html) crate. For example, to invalidate the cache whenever a `.toml` file in the project directory or any of its subdirectories is modified, you can specify `cache-keys = [{ file = \"**/*.toml\" }]`. Note that the use of globs can be expensive, as uv may need to walk the filesystem to determine whether any files have changed.\n\nCache keys can also include version control information. For example, if a project uses `setuptools_scm` to read its version from a Git commit, you can specify `cache-keys = [{ git = { commit = true }, { file = \"pyproject.toml\" }]` to include the current Git commit hash in the cache key (in addition to the `pyproject.toml`). Git tags are also supported via `cache-keys = [{ git = { commit = true, tags = true } }]`.\n\nCache keys only affect the project defined by the `pyproject.toml` in which they're specified (as opposed to, e.g., affecting all members in a workspace), and all paths and globs are interpreted as relative to the project directory.",
"description": "The keys to consider when caching builds for the project.\n\nCache keys enable you to specify the files or directories that should trigger a rebuild when modified. By default, uv will rebuild a project whenever the `pyproject.toml`, `setup.py`, or `setup.cfg` files in the project directory are modified, i.e.:\n\n```toml cache-keys = [{ file = \"pyproject.toml\" }, { file = \"setup.py\" }, { file = \"setup.cfg\" }] ```\n\nAs an example: if a project uses dynamic metadata to read its dependencies from a `requirements.txt` file, you can specify `cache-keys = [{ file = \"requirements.txt\" }, { file = \"pyproject.toml\" }]` to ensure that the project is rebuilt whenever the `requirements.txt` file is modified (in addition to watching the `pyproject.toml`).\n\nGlobs are supported, following the syntax of the [`glob`](https://docs.rs/glob/0.3.1/glob/struct.Pattern.html) crate. For example, to invalidate the cache whenever a `.toml` file in the project directory or any of its subdirectories is modified, you can specify `cache-keys = [{ file = \"**/*.toml\" }]`. Note that the use of globs can be expensive, as uv may need to walk the filesystem to determine whether any files have changed.\n\nCache keys can also include version control information. For example, if a project uses `setuptools_scm` to read its version from a Git commit, you can specify `cache-keys = [{ git = { commit = true }, { file = \"pyproject.toml\" }]` to include the current Git commit hash in the cache key (in addition to the `pyproject.toml`). Git tags are also supported via `cache-keys = [{ git = { commit = true, tags = true } }]`.\n\nCache keys can also include environment variables. For example, if a project relies on `MACOSX_DEPLOYMENT_TARGET` or other environment variables to determine its behavior, you can specify `cache-keys = [{ env = \"MACOSX_DEPLOYMENT_TARGET\" }]` to invalidate the cache whenever the environment variable changes.\n\nCache keys only affect the project defined by the `pyproject.toml` in which they're specified (as opposed to, e.g., affecting all members in a workspace), and all paths and globs are interpreted as relative to the project directory.",
"type": [
"array",
"null"
@ -556,6 +556,19 @@
}
},
"additionalProperties": false
},
{
"description": "Ex) `{ env = \"UV_CACHE_INFO\" }`",
"type": "object",
"required": [
"env"
],
"properties": {
"env": {
"type": "string"
}
},
"additionalProperties": false
}
]
},