Include file permissions in key for cached files (#5901)

Reimplements https://github.com/astral-sh/ruff/pull/3104
Closes https://github.com/astral-sh/ruff/issues/5726

Note that we will generate the hash for a cache key twice in normal
operation. Once to check for the cached item and again to update the
cache. We could optimize this by generating the hash once in
`diagnostics::lint_file` and passing the `u64` into `get` and `update`.
We'd probably want to wrap it in a `CacheKeyHash` enum for type safety.

## Test plan

Unit tests for Windows and Unix.

Manual test with case from issue

```
❯ touch fake.py
❯ chmod +x fake.py
❯ ./target/debug/ruff --select EXE fake.py
fake.py:1:1: EXE002 The file is executable but no shebang is present
Found 1 error.
❯ chmod -x fake.py
❯ ./target/debug/ruff --select EXE fake.py
```
This commit is contained in:
Zanie Blue 2023-07-25 12:06:47 -05:00 committed by GitHub
parent cbf6085375
commit 3000a47fe8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 284 additions and 122 deletions

1
Cargo.lock generated
View File

@ -1995,6 +1995,7 @@ dependencies = [
"ruff", "ruff",
"ruff_cache", "ruff_cache",
"ruff_diagnostics", "ruff_diagnostics",
"ruff_macros",
"ruff_python_ast", "ruff_python_ast",
"ruff_python_formatter", "ruff_python_formatter",
"ruff_python_stdlib", "ruff_python_stdlib",

View File

@ -30,6 +30,7 @@ ruff_python_ast = { path = "../ruff_python_ast" }
ruff_python_formatter = { path = "../ruff_python_formatter" } ruff_python_formatter = { path = "../ruff_python_formatter" }
ruff_text_size = { workspace = true } ruff_text_size = { workspace = true }
ruff_textwrap = { path = "../ruff_textwrap" } ruff_textwrap = { path = "../ruff_textwrap" }
ruff_macros = { path = "../ruff_macros" }
annotate-snippets = { version = "0.9.1", features = ["color"] } annotate-snippets = { version = "0.9.1", features = ["color"] }
anyhow = { workspace = true } anyhow = { workspace = true }

View File

@ -1,4 +1,3 @@
# NOTE: sync with cache::invalidation test
a = 1 a = 1
__all__ = list(["a", "b"]) __all__ = list(["a", "b"])

View File

@ -164,21 +164,20 @@ impl Cache {
path.strip_prefix(&self.package.package_root).ok() path.strip_prefix(&self.package.package_root).ok()
} }
/// Get the cached results for a single file at relative `path`. This uses /// Get the cached results for a single file at relative `path`. This
/// `file_last_modified` to determine if the results are still accurate /// uses `key` to determine if the results are still accurate.
/// (i.e. if the file hasn't been modified since the cached run). /// (i.e. if the file hasn't been modified since the cached run).
/// ///
/// This returns `None` if `file_last_modified` differs from the cached /// This returns `None` if `key` differs from the cached key or if the
/// timestamp or if the cache doesn't contain results for the file. /// cache doesn't contain results for the file.
pub(crate) fn get( pub(crate) fn get<T: CacheKey>(&self, path: &RelativePath, key: &T) -> Option<&FileCache> {
&self,
path: &RelativePath,
file_last_modified: SystemTime,
) -> Option<&FileCache> {
let file = self.package.files.get(path)?; let file = self.package.files.get(path)?;
let mut hasher = CacheKeyHasher::new();
key.cache_key(&mut hasher);
// Make sure the file hasn't changed since the cached run. // Make sure the file hasn't changed since the cached run.
if file.last_modified != file_last_modified { if file.key != hasher.finish() {
return None; return None;
} }
@ -188,10 +187,10 @@ impl Cache {
} }
/// Add or update a file cache at `path` relative to the package root. /// Add or update a file cache at `path` relative to the package root.
pub(crate) fn update( pub(crate) fn update<T: CacheKey>(
&self, &self,
path: RelativePathBuf, path: RelativePathBuf,
last_modified: SystemTime, key: T,
messages: &[Message], messages: &[Message],
imports: &ImportMap, imports: &ImportMap,
) { ) {
@ -218,8 +217,11 @@ impl Cache {
}) })
.collect(); .collect();
let mut hasher = CacheKeyHasher::new();
key.cache_key(&mut hasher);
let file = FileCache { let file = FileCache {
last_modified, key: hasher.finish(),
last_seen: AtomicU64::new(self.last_seen_cache), last_seen: AtomicU64::new(self.last_seen_cache),
imports: imports.clone(), imports: imports.clone(),
messages, messages,
@ -244,8 +246,8 @@ struct PackageCache {
/// On disk representation of the cache per source file. /// On disk representation of the cache per source file.
#[derive(Deserialize, Debug, Serialize)] #[derive(Deserialize, Debug, Serialize)]
pub(crate) struct FileCache { pub(crate) struct FileCache {
/// Timestamp when the file was last modified before the (cached) check. /// Key that determines if the cached item is still valid.
last_modified: SystemTime, key: u64,
/// Timestamp when we last linted this file. /// Timestamp when we last linted this file.
/// ///
/// Represented as the number of milliseconds since Unix epoch. This will /// Represented as the number of milliseconds since Unix epoch. This will
@ -327,20 +329,27 @@ pub(crate) fn init(path: &Path) -> Result<()> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use filetime::{set_file_mtime, FileTime};
use std::env::temp_dir; use std::env::temp_dir;
use std::fs; use std::fs;
use std::io::{self, Write}; use std::io;
use std::io::Write;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::sync::atomic::AtomicU64;
use std::time::SystemTime; use std::time::SystemTime;
use itertools::Itertools;
use ruff::settings::{flags, AllSettings}; use ruff::settings::{flags, AllSettings};
use ruff_cache::CACHE_DIR_NAME; use ruff_cache::CACHE_DIR_NAME;
use ruff_python_ast::imports::ImportMap;
use crate::cache::RelativePathBuf;
use crate::cache::{self, Cache, FileCache}; use crate::cache::{self, Cache, FileCache};
use crate::diagnostics::{lint_path, Diagnostics}; use crate::diagnostics::{lint_path, Diagnostics};
use std::sync::atomic::AtomicU64;
use anyhow::Result;
use ruff_python_ast::imports::ImportMap;
#[test] #[test]
fn same_results() { fn same_results() {
let mut cache_dir = temp_dir(); let mut cache_dir = temp_dir();
@ -442,52 +451,99 @@ mod tests {
} }
#[test] #[test]
fn invalidation() { fn cache_adds_file_on_lint() {
// NOTE: keep in sync with actual file. let source: &[u8] = b"a = 1\n\n__all__ = list([\"a\", \"b\"])\n";
const SOURCE: &[u8] = b"# NOTE: sync with cache::invalidation test\na = 1\n\n__all__ = list([\"a\", \"b\"])\n";
let mut cache_dir = temp_dir(); let test_cache = TestCache::new("cache_adds_file_on_lint");
cache_dir.push("ruff_tests/cache_invalidation"); let cache = test_cache.open();
let _ = fs::remove_dir_all(&cache_dir); test_cache.write_source_file("source.py", source);
cache::init(&cache_dir).unwrap();
let settings = AllSettings::default();
let package_root = fs::canonicalize("resources/test/fixtures/cache_mutable").unwrap();
let cache = Cache::open(&cache_dir, package_root.clone(), &settings.lib);
assert_eq!(cache.new_files.lock().unwrap().len(), 0); assert_eq!(cache.new_files.lock().unwrap().len(), 0);
let path = package_root.join("source.py"); cache.store().unwrap();
let mut expected_diagnostics = lint_path( let cache = test_cache.open();
&path,
Some(&package_root), test_cache
&settings, .lint_file_with_cache("source.py", &cache)
Some(&cache), .expect("Failed to lint test file");
flags::Noqa::Enabled, assert_eq!(
flags::FixMode::Generate, cache.new_files.lock().unwrap().len(),
) 1,
.unwrap(); "A single new file should be added to the cache"
assert_eq!(cache.new_files.lock().unwrap().len(), 1); );
cache.store().unwrap(); cache.store().unwrap();
}
let tests = [ #[test]
// File change. fn cache_adds_files_on_lint() {
(|path| { let source: &[u8] = b"a = 1\n\n__all__ = list([\"a\", \"b\"])\n";
let mut file = fs::OpenOptions::new()
.write(true) let test_cache = TestCache::new("cache_adds_files_on_lint");
.truncate(true) let cache = test_cache.open();
.open(path)?; test_cache.write_source_file("source_1.py", source);
file.write_all(SOURCE)?; test_cache.write_source_file("source_2.py", source);
file.sync_data()?; assert_eq!(cache.new_files.lock().unwrap().len(), 0);
Ok(|_| Ok(()))
}) as fn(&Path) -> io::Result<fn(&Path) -> io::Result<()>>, cache.store().unwrap();
// Regression for issue #3086. let cache = test_cache.open();
#[cfg(unix)]
|path| { test_cache
flip_execute_permission_bit(path)?; .lint_file_with_cache("source_1.py", &cache)
Ok(flip_execute_permission_bit) .expect("Failed to lint test file");
}, test_cache
]; .lint_file_with_cache("source_2.py", &cache)
.expect("Failed to lint test file");
assert_eq!(
cache.new_files.lock().unwrap().len(),
2,
"Both files should be added to the cache"
);
cache.store().unwrap();
}
#[test]
fn cache_invalidated_on_file_modified_time() {
let source: &[u8] = b"a = 1\n\n__all__ = list([\"a\", \"b\"])\n";
let test_cache = TestCache::new("cache_invalidated_on_file_modified_time");
let cache = test_cache.open();
let source_path = test_cache.write_source_file("source.py", source);
assert_eq!(cache.new_files.lock().unwrap().len(), 0);
let expected_diagnostics = test_cache
.lint_file_with_cache("source.py", &cache)
.expect("Failed to lint test file");
cache.store().unwrap();
let cache = test_cache.open();
// Update the modified time of the file to a time in the future
set_file_mtime(
source_path,
FileTime::from_system_time(SystemTime::now() + std::time::Duration::from_secs(1)),
)
.unwrap();
let got_diagnostics = test_cache
.lint_file_with_cache("source.py", &cache)
.expect("Failed to lint test file");
assert_eq!(
cache.new_files.lock().unwrap().len(),
1,
"Cache should not be used, the file should be treated as new and added to the cache"
);
assert_eq!(
expected_diagnostics, got_diagnostics,
"The diagnostics should not change"
);
}
#[test]
fn cache_invalidated_on_permission_change() {
// Regression test for issue #3086.
#[cfg(unix)] #[cfg(unix)]
#[allow(clippy::items_after_statements)] #[allow(clippy::items_after_statements)]
@ -498,55 +554,62 @@ mod tests {
file.set_permissions(PermissionsExt::from_mode(perms.mode() ^ 0o111)) file.set_permissions(PermissionsExt::from_mode(perms.mode() ^ 0o111))
} }
for change_file in tests { #[cfg(windows)]
let cleanup = change_file(&path).unwrap(); #[allow(clippy::items_after_statements)]
fn flip_read_only_permission(path: &Path) -> io::Result<()> {
let file = fs::OpenOptions::new().write(true).open(path)?;
let mut perms = file.metadata()?.permissions();
perms.set_readonly(!perms.readonly());
file.set_permissions(perms)
}
let cache = Cache::open(&cache_dir, package_root.clone(), &settings.lib); let source: &[u8] = b"a = 1\n\n__all__ = list([\"a\", \"b\"])\n";
let mut got_diagnostics = lint_path( let test_cache = TestCache::new("cache_invalidated_on_permission_change");
&path, let cache = test_cache.open();
Some(&package_root), let path = test_cache.write_source_file("source.py", source);
&settings, assert_eq!(cache.new_files.lock().unwrap().len(), 0);
Some(&cache),
flags::Noqa::Enabled, let expected_diagnostics = test_cache
flags::FixMode::Generate, .lint_file_with_cache("source.py", &cache)
)
.unwrap(); .unwrap();
cleanup(&path).unwrap(); cache.store().unwrap();
let cache = test_cache.open();
assert_eq!( // Flip the permissions on the file
cache.new_files.lock().unwrap().len(), #[cfg(unix)]
1, flip_execute_permission_bit(&path).unwrap();
"cache must not be used" #[cfg(windows)]
); flip_read_only_permission(&path).unwrap();
// Not store in the cache. let got_diagnostics = test_cache
expected_diagnostics.source_kind.clear(); .lint_file_with_cache("source.py", &cache)
got_diagnostics.source_kind.clear(); .unwrap();
assert!(expected_diagnostics == got_diagnostics);
} assert_eq!(
cache.new_files.lock().unwrap().len(),
1,
"Cache should not be used, the file should be treated as new and added to the cache"
);
assert_eq!(
expected_diagnostics, got_diagnostics,
"The diagnostics should not change"
);
} }
#[test] #[test]
fn remove_old_files() { fn cache_removes_stale_files_on_store() {
let mut cache_dir = temp_dir(); let test_cache = TestCache::new("cache_removes_stale_files_on_store");
cache_dir.push("ruff_tests/cache_remove_old_files"); let mut cache = test_cache.open();
let _ = fs::remove_dir_all(&cache_dir);
cache::init(&cache_dir).unwrap();
let settings = AllSettings::default(); // Add a file to the cache that hasn't been linted or seen since the '70s!
let package_root = let old_path_key = RelativePathBuf::from("old.py");
fs::canonicalize("resources/test/fixtures/cache_remove_old_files").unwrap();
let mut cache = Cache::open(&cache_dir, package_root.clone(), &settings.lib);
assert_eq!(cache.new_files.lock().unwrap().len(), 0);
// Add a file to the cache that hasn't been linted or seen since the
// '70s!
cache.package.files.insert( cache.package.files.insert(
PathBuf::from("old.py"), old_path_key,
FileCache { FileCache {
last_modified: SystemTime::UNIX_EPOCH, key: 123,
last_seen: AtomicU64::new(123), last_seen: AtomicU64::new(123),
imports: ImportMap::new(), imports: ImportMap::new(),
messages: Vec::new(), messages: Vec::new(),
@ -555,26 +618,97 @@ mod tests {
); );
// Now actually lint a file. // Now actually lint a file.
let path = package_root.join("source.py"); let source: &[u8] = b"a = 1\n\n__all__ = list([\"a\", \"b\"])\n";
lint_path( test_cache.write_source_file("new.py", source);
&path, let new_path_key = RelativePathBuf::from("new.py");
Some(&package_root), assert_eq!(cache.new_files.lock().unwrap().len(), 0);
&settings,
Some(&cache), test_cache
flags::Noqa::Enabled, .lint_file_with_cache("new.py", &cache)
flags::FixMode::Generate, .expect("Failed to lint test file");
)
.unwrap();
// Storing the cache should remove the old (`old.py`) file. // Storing the cache should remove the old (`old.py`) file.
cache.store().unwrap(); cache.store().unwrap();
// So we when we open the cache again it shouldn't contain `old.py`. // So we when we open the cache again it shouldn't contain `old.py`.
let cache = Cache::open(&cache_dir, package_root, &settings.lib); let cache = test_cache.open();
assert_eq!(cache.package.files.len(), 1, "didn't remove the old file");
assert!( assert!(
!cache.package.files.contains_key(&path), cache.package.files.keys().collect_vec() == vec![&new_path_key],
"removed the wrong file" "Only the new file should be present"
); );
} }
struct TestCache {
cache_dir: PathBuf,
package_root: PathBuf,
settings: AllSettings,
}
impl TestCache {
fn new(name: &str) -> Self {
// Build a new cache directory and clear it
let mut test_dir = temp_dir();
test_dir.push("ruff_tests/cache");
test_dir.push(name);
let _ = fs::remove_dir_all(&test_dir);
// Create separate directories for the cache and the test package
let cache_dir = test_dir.join("cache");
let package_root = test_dir.join("package");
cache::init(&cache_dir).unwrap();
fs::create_dir(package_root.clone()).unwrap();
let settings = AllSettings::default();
Self {
cache_dir,
package_root,
settings,
}
}
fn write_source_file(&self, path: &str, contents: &[u8]) -> PathBuf {
let path = self.package_root.join(path);
let mut file = fs::OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.open(&*path)
.unwrap();
file.write_all(contents).unwrap();
file.sync_data().unwrap();
path
}
fn open(&self) -> Cache {
Cache::open(
&self.cache_dir,
self.package_root.clone(),
&self.settings.lib,
)
}
fn lint_file_with_cache(
&self,
path: &str,
cache: &Cache,
) -> Result<Diagnostics, anyhow::Error> {
lint_path(
&self.package_root.join(path),
Some(&self.package_root),
&self.settings,
Some(cache),
flags::Noqa::Enabled,
flags::FixMode::Generate,
)
}
}
impl Drop for TestCache {
fn drop(&mut self) {
let _ = fs::remove_dir_all(&self.cache_dir);
}
}
} }

View File

@ -6,13 +6,17 @@ use std::io::Write;
use std::ops::AddAssign; use std::ops::AddAssign;
use std::path::Path; use std::path::Path;
use anyhow::{anyhow, Result}; use anyhow::{anyhow, Context, Result};
use colored::Colorize; use colored::Colorize;
use filetime::FileTime;
use log::{debug, error, warn}; use log::{debug, error, warn};
use ruff_text_size::{TextRange, TextSize}; use ruff_text_size::{TextRange, TextSize};
use rustc_hash::FxHashMap; use rustc_hash::FxHashMap;
use similar::TextDiff; use similar::TextDiff;
#[cfg(unix)]
use std::os::unix::fs::PermissionsExt;
use crate::cache::Cache;
use ruff::jupyter::Notebook; use ruff::jupyter::Notebook;
use ruff::linter::{lint_fix, lint_only, FixTable, FixerResult, LinterResult}; use ruff::linter::{lint_fix, lint_only, FixTable, FixerResult, LinterResult};
use ruff::logging::DisplayParseError; use ruff::logging::DisplayParseError;
@ -23,11 +27,35 @@ use ruff::settings::{flags, AllSettings, Settings};
use ruff::source_kind::SourceKind; use ruff::source_kind::SourceKind;
use ruff::{fs, IOError}; use ruff::{fs, IOError};
use ruff_diagnostics::Diagnostic; use ruff_diagnostics::Diagnostic;
use ruff_macros::CacheKey;
use ruff_python_ast::imports::ImportMap; use ruff_python_ast::imports::ImportMap;
use ruff_python_ast::source_code::{LineIndex, SourceCode, SourceFileBuilder}; use ruff_python_ast::source_code::{LineIndex, SourceCode, SourceFileBuilder};
use ruff_python_stdlib::path::{is_jupyter_notebook, is_project_toml}; use ruff_python_stdlib::path::{is_jupyter_notebook, is_project_toml};
use crate::cache::Cache; #[derive(CacheKey)]
pub(crate) struct FileCacheKey {
/// Timestamp when the file was last modified before the (cached) check.
file_last_modified: FileTime,
/// Permissions of the file before the (cached) check.
file_permissions_mode: u32,
}
impl FileCacheKey {
fn from_path(path: &Path) -> io::Result<FileCacheKey> {
// Construct a cache key for the file
let metadata = path.metadata()?;
#[cfg(unix)]
let permissions = metadata.permissions().mode();
#[cfg(windows)]
let permissions: u32 = metadata.permissions().readonly().into();
Ok(FileCacheKey {
file_last_modified: FileTime::from_last_modification_time(&metadata),
file_permissions_mode: permissions,
})
}
}
#[derive(Debug, Default, PartialEq)] #[derive(Debug, Default, PartialEq)]
pub(crate) struct Diagnostics { pub(crate) struct Diagnostics {
@ -117,12 +145,16 @@ pub(crate) fn lint_path(
let relative_path = cache let relative_path = cache
.relative_path(path) .relative_path(path)
.expect("wrong package cache for file"); .expect("wrong package cache for file");
let last_modified = path.metadata()?.modified()?;
if let Some(cache) = cache.get(relative_path, last_modified) { let cache_key = FileCacheKey::from_path(path).context("Failed to create cache key")?;
if let Some(cache) = cache.get(relative_path, &cache_key) {
return Ok(cache.as_diagnostics(path)); return Ok(cache.as_diagnostics(path));
} }
Some((cache, relative_path, last_modified)) // Stash the file metadata for later so when we update the cache it reflects the prerun
// information
Some((cache, relative_path, cache_key))
} }
_ => None, _ => None,
}; };
@ -269,15 +301,10 @@ pub(crate) fn lint_path(
let imports = imports.unwrap_or_default(); let imports = imports.unwrap_or_default();
if let Some((cache, relative_path, file_last_modified)) = caching { if let Some((cache, relative_path, key)) = caching {
// We don't cache parsing errors. // We don't cache parsing errors.
if parse_error.is_none() { if parse_error.is_none() {
cache.update( cache.update(relative_path.to_owned(), key, &messages, &imports);
relative_path.to_owned(),
file_last_modified,
&messages,
&imports,
);
} }
} }