mirror of https://github.com/astral-sh/ruff
Keep track of when files are last seen in the cache (#5214)
## Summary And remove cached files that we haven't seen for a certain period of time, currently 30 days. For the last seen timestamp we actually use an `u64`, it's smaller on disk than `SystemTime` (which size is OS dependent) and fits in an `AtomicU64` which we can use to update it without locks. ## Test Plan Added a new unit test, run by `cargo test`.
This commit is contained in:
parent
2dfa6ff58d
commit
1c638264b2
|
|
@ -0,0 +1,4 @@
|
||||||
|
# NOTE: sync with cache::invalidation test
|
||||||
|
a = 1
|
||||||
|
|
||||||
|
__all__ = list(["a", "b"])
|
||||||
|
|
@ -3,8 +3,9 @@ use std::fs::{self, File};
|
||||||
use std::hash::Hasher;
|
use std::hash::Hasher;
|
||||||
use std::io::{self, BufReader, BufWriter, Write};
|
use std::io::{self, BufReader, BufWriter, Write};
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::sync::atomic::{AtomicU64, Ordering};
|
||||||
use std::sync::Mutex;
|
use std::sync::Mutex;
|
||||||
use std::time::SystemTime;
|
use std::time::{Duration, SystemTime};
|
||||||
|
|
||||||
use anyhow::{Context, Result};
|
use anyhow::{Context, Result};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
@ -20,6 +21,9 @@ use ruff_text_size::{TextRange, TextSize};
|
||||||
|
|
||||||
use crate::diagnostics::Diagnostics;
|
use crate::diagnostics::Diagnostics;
|
||||||
|
|
||||||
|
/// Maximum duration for which we keep a file in cache that hasn't been seen.
|
||||||
|
const MAX_LAST_SEEN: Duration = Duration::from_secs(30 * 24 * 60 * 60); // 30 days.
|
||||||
|
|
||||||
/// [`Path`] that is relative to the package root in [`PackageCache`].
|
/// [`Path`] that is relative to the package root in [`PackageCache`].
|
||||||
pub(crate) type RelativePath = Path;
|
pub(crate) type RelativePath = Path;
|
||||||
/// [`PathBuf`] that is relative to the package root in [`PackageCache`].
|
/// [`PathBuf`] that is relative to the package root in [`PackageCache`].
|
||||||
|
|
@ -33,6 +37,7 @@ pub(crate) type RelativePathBuf = PathBuf;
|
||||||
///
|
///
|
||||||
/// This type manages the cache file, reading it from disk and writing it back
|
/// This type manages the cache file, reading it from disk and writing it back
|
||||||
/// to disk (if required).
|
/// to disk (if required).
|
||||||
|
#[derive(Debug)]
|
||||||
pub(crate) struct Cache {
|
pub(crate) struct Cache {
|
||||||
/// Location of the cache.
|
/// Location of the cache.
|
||||||
path: PathBuf,
|
path: PathBuf,
|
||||||
|
|
@ -44,6 +49,9 @@ pub(crate) struct Cache {
|
||||||
/// `package.files` but are outdated. This gets merged with `package.files`
|
/// `package.files` but are outdated. This gets merged with `package.files`
|
||||||
/// when the cache is written back to disk in [`Cache::store`].
|
/// when the cache is written back to disk in [`Cache::store`].
|
||||||
new_files: Mutex<HashMap<RelativePathBuf, FileCache>>,
|
new_files: Mutex<HashMap<RelativePathBuf, FileCache>>,
|
||||||
|
/// The "current" timestamp used as cache for the updates of
|
||||||
|
/// [`FileCache::last_seen`]
|
||||||
|
last_seen_cache: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Cache {
|
impl Cache {
|
||||||
|
|
@ -94,26 +102,32 @@ impl Cache {
|
||||||
);
|
);
|
||||||
package.files.clear();
|
package.files.clear();
|
||||||
}
|
}
|
||||||
Cache {
|
Cache::new(path, package)
|
||||||
path,
|
|
||||||
package,
|
|
||||||
new_files: Mutex::new(HashMap::new()),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create an empty `Cache`.
|
/// Create an empty `Cache`.
|
||||||
fn empty(path: PathBuf, package_root: PathBuf) -> Cache {
|
fn empty(path: PathBuf, package_root: PathBuf) -> Cache {
|
||||||
|
let package = PackageCache {
|
||||||
|
package_root,
|
||||||
|
files: HashMap::new(),
|
||||||
|
};
|
||||||
|
Cache::new(path, package)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::cast_possible_truncation)]
|
||||||
|
fn new(path: PathBuf, package: PackageCache) -> Cache {
|
||||||
Cache {
|
Cache {
|
||||||
path,
|
path,
|
||||||
package: PackageCache {
|
package,
|
||||||
package_root,
|
|
||||||
files: HashMap::new(),
|
|
||||||
},
|
|
||||||
new_files: Mutex::new(HashMap::new()),
|
new_files: Mutex::new(HashMap::new()),
|
||||||
|
// SAFETY: this will be truncated to the year ~2554 (so don't use
|
||||||
|
// this code after that!).
|
||||||
|
last_seen_cache: SystemTime::UNIX_EPOCH.elapsed().unwrap().as_millis() as u64,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Store the cache to disk, if it has been changed.
|
/// Store the cache to disk, if it has been changed.
|
||||||
|
#[allow(clippy::cast_possible_truncation)]
|
||||||
pub(crate) fn store(mut self) -> Result<()> {
|
pub(crate) fn store(mut self) -> Result<()> {
|
||||||
let new_files = self.new_files.into_inner().unwrap();
|
let new_files = self.new_files.into_inner().unwrap();
|
||||||
if new_files.is_empty() {
|
if new_files.is_empty() {
|
||||||
|
|
@ -122,7 +136,14 @@ impl Cache {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add/overwrite the changes made.
|
// Remove cached files that we haven't seen in a while.
|
||||||
|
let now = self.last_seen_cache;
|
||||||
|
self.package.files.retain(|_, file| {
|
||||||
|
// SAFETY: this will be truncated to the year ~2554.
|
||||||
|
(now - *file.last_seen.get_mut()) <= MAX_LAST_SEEN.as_millis() as u64
|
||||||
|
});
|
||||||
|
|
||||||
|
// Apply any changes made and keep track of when we last saw files.
|
||||||
self.package.files.extend(new_files);
|
self.package.files.extend(new_files);
|
||||||
|
|
||||||
let file = File::create(&self.path)
|
let file = File::create(&self.path)
|
||||||
|
|
@ -161,51 +182,19 @@ impl Cache {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
file.last_seen.store(self.last_seen_cache, Ordering::SeqCst);
|
||||||
|
|
||||||
Some(file)
|
Some(file)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Add or update a file cache at `path` relative to the package root.
|
/// Add or update a file cache at `path` relative to the package root.
|
||||||
pub(crate) fn update(&self, path: RelativePathBuf, file: FileCache) {
|
pub(crate) fn update(
|
||||||
self.new_files.lock().unwrap().insert(path, file);
|
&self,
|
||||||
}
|
path: RelativePathBuf,
|
||||||
}
|
|
||||||
|
|
||||||
/// On disk representation of a cache of a package.
|
|
||||||
#[derive(Deserialize, Debug, Serialize)]
|
|
||||||
struct PackageCache {
|
|
||||||
/// Path to the root of the package.
|
|
||||||
///
|
|
||||||
/// Usually this is a directory, but it can also be a single file in case of
|
|
||||||
/// single file "packages", e.g. scripts.
|
|
||||||
package_root: PathBuf,
|
|
||||||
/// Mapping of source file path to it's cached data.
|
|
||||||
files: HashMap<RelativePathBuf, FileCache>,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// On disk representation of the cache per source file.
|
|
||||||
#[derive(Clone, Deserialize, Debug, Serialize)]
|
|
||||||
pub(crate) struct FileCache {
|
|
||||||
/// Timestamp when the file was last modified before the (cached) check.
|
|
||||||
last_modified: SystemTime,
|
|
||||||
/// Imports made.
|
|
||||||
imports: ImportMap,
|
|
||||||
/// Diagnostic messages.
|
|
||||||
messages: Vec<CacheMessage>,
|
|
||||||
/// Source code of the file.
|
|
||||||
///
|
|
||||||
/// # Notes
|
|
||||||
///
|
|
||||||
/// This will be empty if `messages` is empty.
|
|
||||||
source: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl FileCache {
|
|
||||||
/// Create a new source file cache.
|
|
||||||
pub(crate) fn new(
|
|
||||||
last_modified: SystemTime,
|
last_modified: SystemTime,
|
||||||
messages: &[Message],
|
messages: &[Message],
|
||||||
imports: &ImportMap,
|
imports: &ImportMap,
|
||||||
) -> FileCache {
|
) {
|
||||||
let source = if let Some(msg) = messages.first() {
|
let source = if let Some(msg) = messages.first() {
|
||||||
msg.file.source_text().to_owned()
|
msg.file.source_text().to_owned()
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -229,14 +218,52 @@ impl FileCache {
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
FileCache {
|
let file = FileCache {
|
||||||
last_modified,
|
last_modified,
|
||||||
|
last_seen: AtomicU64::new(self.last_seen_cache),
|
||||||
imports: imports.clone(),
|
imports: imports.clone(),
|
||||||
messages,
|
messages,
|
||||||
source,
|
source,
|
||||||
}
|
};
|
||||||
|
self.new_files.lock().unwrap().insert(path, file);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// On disk representation of a cache of a package.
|
||||||
|
#[derive(Deserialize, Debug, Serialize)]
|
||||||
|
struct PackageCache {
|
||||||
|
/// Path to the root of the package.
|
||||||
|
///
|
||||||
|
/// Usually this is a directory, but it can also be a single file in case of
|
||||||
|
/// single file "packages", e.g. scripts.
|
||||||
|
package_root: PathBuf,
|
||||||
|
/// Mapping of source file path to it's cached data.
|
||||||
|
files: HashMap<RelativePathBuf, FileCache>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// On disk representation of the cache per source file.
|
||||||
|
#[derive(Deserialize, Debug, Serialize)]
|
||||||
|
pub(crate) struct FileCache {
|
||||||
|
/// Timestamp when the file was last modified before the (cached) check.
|
||||||
|
last_modified: SystemTime,
|
||||||
|
/// Timestamp when we last linted this file.
|
||||||
|
///
|
||||||
|
/// Represented as the number of milliseconds since Unix epoch. This will
|
||||||
|
/// break in 1970 + ~584 years (~2554).
|
||||||
|
last_seen: AtomicU64,
|
||||||
|
/// Imports made.
|
||||||
|
imports: ImportMap,
|
||||||
|
/// Diagnostic messages.
|
||||||
|
messages: Vec<CacheMessage>,
|
||||||
|
/// Source code of the file.
|
||||||
|
///
|
||||||
|
/// # Notes
|
||||||
|
///
|
||||||
|
/// This will be empty if `messages` is empty.
|
||||||
|
source: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FileCache {
|
||||||
/// Convert the file cache into `Diagnostics`, using `path` as file name.
|
/// Convert the file cache into `Diagnostics`, using `path` as file name.
|
||||||
pub(crate) fn as_diagnostics(&self, path: &Path) -> Diagnostics {
|
pub(crate) fn as_diagnostics(&self, path: &Path) -> Diagnostics {
|
||||||
let messages = if self.messages.is_empty() {
|
let messages = if self.messages.is_empty() {
|
||||||
|
|
@ -259,7 +286,7 @@ impl FileCache {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// On disk representation of a diagnostic message.
|
/// On disk representation of a diagnostic message.
|
||||||
#[derive(Clone, Deserialize, Debug, Serialize)]
|
#[derive(Deserialize, Debug, Serialize)]
|
||||||
struct CacheMessage {
|
struct CacheMessage {
|
||||||
kind: DiagnosticKind,
|
kind: DiagnosticKind,
|
||||||
/// Range into the message's [`FileCache::source`].
|
/// Range into the message's [`FileCache::source`].
|
||||||
|
|
@ -303,12 +330,15 @@ mod tests {
|
||||||
use std::env::temp_dir;
|
use std::env::temp_dir;
|
||||||
use std::fs;
|
use std::fs;
|
||||||
use std::io::{self, Write};
|
use std::io::{self, Write};
|
||||||
use std::path::Path;
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::sync::atomic::AtomicU64;
|
||||||
|
use std::time::SystemTime;
|
||||||
|
|
||||||
use ruff::settings::{flags, AllSettings};
|
use ruff::settings::{flags, AllSettings};
|
||||||
use ruff_cache::CACHE_DIR_NAME;
|
use ruff_cache::CACHE_DIR_NAME;
|
||||||
|
use ruff_python_ast::imports::ImportMap;
|
||||||
|
|
||||||
use crate::cache::{self, Cache};
|
use crate::cache::{self, Cache, FileCache};
|
||||||
use crate::diagnostics::{lint_path, Diagnostics};
|
use crate::diagnostics::{lint_path, Diagnostics};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
@ -497,4 +527,54 @@ mod tests {
|
||||||
assert!(expected_diagnostics == got_diagnostics);
|
assert!(expected_diagnostics == got_diagnostics);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn remove_old_files() {
|
||||||
|
let mut cache_dir = temp_dir();
|
||||||
|
cache_dir.push("ruff_tests/cache_remove_old_files");
|
||||||
|
let _ = fs::remove_dir_all(&cache_dir);
|
||||||
|
cache::init(&cache_dir).unwrap();
|
||||||
|
|
||||||
|
let settings = AllSettings::default();
|
||||||
|
let package_root =
|
||||||
|
fs::canonicalize("resources/test/fixtures/cache_remove_old_files").unwrap();
|
||||||
|
let mut cache = Cache::open(&cache_dir, package_root.clone(), &settings.lib);
|
||||||
|
assert_eq!(cache.new_files.lock().unwrap().len(), 0);
|
||||||
|
|
||||||
|
// Add a file to the cache that hasn't been linted or seen since the
|
||||||
|
// '70s!
|
||||||
|
cache.package.files.insert(
|
||||||
|
PathBuf::from("old.py"),
|
||||||
|
FileCache {
|
||||||
|
last_modified: SystemTime::UNIX_EPOCH,
|
||||||
|
last_seen: AtomicU64::new(123),
|
||||||
|
imports: ImportMap::new(),
|
||||||
|
messages: Vec::new(),
|
||||||
|
source: String::new(),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
// Now actually lint a file.
|
||||||
|
let path = package_root.join("source.py");
|
||||||
|
lint_path(
|
||||||
|
&path,
|
||||||
|
Some(&package_root),
|
||||||
|
&settings,
|
||||||
|
Some(&cache),
|
||||||
|
flags::Noqa::Enabled,
|
||||||
|
flags::FixMode::Generate,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Storing the cache should remove the old (`old.py`) file.
|
||||||
|
cache.store().unwrap();
|
||||||
|
// So we when we open the cache again it shouldn't contain `old.py`.
|
||||||
|
let cache = Cache::open(&cache_dir, package_root, &settings.lib);
|
||||||
|
|
||||||
|
assert_eq!(cache.package.files.len(), 1, "didn't remove the old file");
|
||||||
|
assert!(
|
||||||
|
!cache.package.files.contains_key(&path),
|
||||||
|
"removed the wrong file"
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ use ruff_python_ast::imports::ImportMap;
|
||||||
use ruff_python_ast::source_code::{LineIndex, SourceCode, SourceFileBuilder};
|
use ruff_python_ast::source_code::{LineIndex, SourceCode, SourceFileBuilder};
|
||||||
use ruff_python_stdlib::path::is_project_toml;
|
use ruff_python_stdlib::path::is_project_toml;
|
||||||
|
|
||||||
use crate::cache::{Cache, FileCache};
|
use crate::cache::Cache;
|
||||||
|
|
||||||
#[derive(Debug, Default, PartialEq)]
|
#[derive(Debug, Default, PartialEq)]
|
||||||
pub(crate) struct Diagnostics {
|
pub(crate) struct Diagnostics {
|
||||||
|
|
@ -208,10 +208,14 @@ pub(crate) fn lint_path(
|
||||||
let imports = imports.unwrap_or_default();
|
let imports = imports.unwrap_or_default();
|
||||||
|
|
||||||
if let Some((cache, relative_path, file_last_modified)) = caching {
|
if let Some((cache, relative_path, file_last_modified)) = caching {
|
||||||
|
// We don't cache parsing errors.
|
||||||
if parse_error.is_none() {
|
if parse_error.is_none() {
|
||||||
// We don't cache parsing error.
|
cache.update(
|
||||||
let file_cache = FileCache::new(file_last_modified, &messages, &imports);
|
relative_path.to_owned(),
|
||||||
cache.update(relative_path.to_owned(), file_cache);
|
file_last_modified,
|
||||||
|
&messages,
|
||||||
|
&imports,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue