use std::cmp::max; use std::collections::BTreeMap; use std::path::{Path, PathBuf}; use serde::Deserialize; use tracing::{debug, warn}; use crate::git_info::{Commit, Tags}; use crate::timestamp::Timestamp; #[derive(Debug, thiserror::Error)] pub enum CacheInfoError { #[error("Failed to parse glob patterns for `cache-keys`: {0}")] Glob(#[from] globwalk::GlobError), #[error(transparent)] Io(#[from] std::io::Error), } /// The information used to determine whether a built distribution is up-to-date, based on the /// timestamps of relevant files, the current commit of a repository, etc. #[derive(Default, Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] #[serde(rename_all = "kebab-case")] pub struct CacheInfo { /// The timestamp of the most recent `ctime` of any relevant files, at the time of the build. /// The timestamp will typically be the maximum of the `ctime` values of the `pyproject.toml`, /// `setup.py`, and `setup.cfg` files, if they exist; however, users can provide additional /// files to timestamp via the `cache-keys` field. timestamp: Option, /// The commit at which the distribution was built. commit: Option, /// The Git tags present at the time of the build. tags: Option, /// Environment variables to include in the cache key. #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] env: BTreeMap>, } impl CacheInfo { /// Return the [`CacheInfo`] for a given timestamp. pub fn from_timestamp(timestamp: Timestamp) -> Self { Self { timestamp: Some(timestamp), ..Self::default() } } /// Compute the cache info for a given path, which may be a file or a directory. pub fn from_path(path: &Path) -> Result { let metadata = fs_err::metadata(path)?; if metadata.is_file() { Ok(Self::from_file(path)?) } else { Self::from_directory(path) } } /// Compute the cache info for a given directory. pub fn from_directory(directory: &Path) -> Result { let mut commit = None; let mut tags = None; let mut timestamp = None; let mut env = BTreeMap::new(); // Read the cache keys. let cache_keys = if let Ok(contents) = fs_err::read_to_string(directory.join("pyproject.toml")) { if let Ok(pyproject_toml) = toml::from_str::(&contents) { pyproject_toml .tool .and_then(|tool| tool.uv) .and_then(|tool_uv| tool_uv.cache_keys) } else { None } } else { None }; // If no cache keys were defined, use the defaults. let cache_keys = cache_keys.unwrap_or_else(|| { vec![ CacheKey::Path("pyproject.toml".to_string()), CacheKey::Path("setup.py".to_string()), CacheKey::Path("setup.cfg".to_string()), ] }); // Incorporate timestamps from any direct filepaths. let mut globs = vec![]; for cache_key in cache_keys { match cache_key { CacheKey::Path(file) | CacheKey::File { file } => { if file.chars().any(|c| matches!(c, '*' | '?' | '[' | '{')) { // Defer globs to a separate pass. globs.push(file); continue; } // Treat the path as a file. let path = directory.join(&file); let metadata = match path.metadata() { Ok(metadata) => metadata, Err(err) if err.kind() == std::io::ErrorKind::NotFound => { continue; } Err(err) => { warn!("Failed to read metadata for file: {err}"); continue; } }; if !metadata.is_file() { warn!( "Expected file for cache key, but found directory: `{}`", path.display() ); continue; } timestamp = max(timestamp, Some(Timestamp::from_metadata(&metadata))); } CacheKey::Git { git: GitPattern::Bool(true), } => match Commit::from_repository(directory) { Ok(commit_info) => commit = Some(commit_info), Err(err) => { debug!("Failed to read the current commit: {err}"); } }, CacheKey::Git { git: GitPattern::Set(set), } => { if set.commit.unwrap_or(false) { match Commit::from_repository(directory) { Ok(commit_info) => commit = Some(commit_info), Err(err) => { debug!("Failed to read the current commit: {err}"); } } } if set.tags.unwrap_or(false) { match Tags::from_repository(directory) { Ok(tags_info) => tags = Some(tags_info), Err(err) => { debug!("Failed to read the current tags: {err}"); } } } } CacheKey::Git { git: GitPattern::Bool(false), } => {} CacheKey::Environment { env: var } => { let value = std::env::var(&var).ok(); env.insert(var, value); } } } // If we have any globs, process them in a single pass. if !globs.is_empty() { let walker = globwalk::GlobWalkerBuilder::from_patterns(directory, &globs) .file_type(globwalk::FileType::FILE | globwalk::FileType::SYMLINK) .build()?; for entry in walker { let entry = match entry { Ok(entry) => entry, Err(err) => { warn!("Failed to read glob entry: {err}"); continue; } }; let metadata = match entry.metadata() { Ok(metadata) => metadata, Err(err) => { warn!("Failed to read metadata for glob entry: {err}"); continue; } }; if !metadata.is_file() { warn!( "Expected file for cache key, but found directory: `{}`", entry.path().display() ); continue; } timestamp = max(timestamp, Some(Timestamp::from_metadata(&metadata))); } } Ok(Self { timestamp, commit, tags, env, }) } /// Compute the cache info for a given file, assumed to be a binary or source distribution /// represented as (e.g.) a `.whl` or `.tar.gz` archive. pub fn from_file(path: impl AsRef) -> std::io::Result { let metadata = fs_err::metadata(path.as_ref())?; let timestamp = Timestamp::from_metadata(&metadata); Ok(Self { timestamp: Some(timestamp), ..Self::default() }) } /// Returns `true` if the cache info is empty. pub fn is_empty(&self) -> bool { self.timestamp.is_none() && self.commit.is_none() && self.tags.is_none() && self.env.is_empty() } } /// A `pyproject.toml` with an (optional) `[tool.uv]` section. #[derive(Debug, Deserialize)] #[serde(rename_all = "kebab-case")] struct PyProjectToml { tool: Option, } #[derive(Debug, Deserialize)] #[serde(rename_all = "kebab-case")] struct Tool { uv: Option, } #[derive(Debug, Deserialize)] #[serde(rename_all = "kebab-case")] struct ToolUv { cache_keys: Option>, } #[derive(Debug, Clone, serde::Deserialize)] #[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))] #[serde(untagged, rename_all = "kebab-case", deny_unknown_fields)] pub enum CacheKey { /// Ex) `"Cargo.lock"` or `"**/*.toml"` Path(String), /// Ex) `{ file = "Cargo.lock" }` or `{ file = "**/*.toml" }` File { file: String }, /// Ex) `{ git = true }` or `{ git = { commit = true, tags = false } }` Git { git: GitPattern }, /// Ex) `{ env = "UV_CACHE_INFO" }` Environment { env: String }, } #[derive(Debug, Clone, serde::Deserialize)] #[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))] #[serde(untagged, rename_all = "kebab-case", deny_unknown_fields)] pub enum GitPattern { Bool(bool), Set(GitSet), } #[derive(Debug, Clone, serde::Deserialize)] #[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))] #[serde(rename_all = "kebab-case", deny_unknown_fields)] pub struct GitSet { commit: Option, tags: Option, } pub enum FilePattern { Glob(String), Path(PathBuf), }