diff --git a/Cargo.lock b/Cargo.lock index baeeb3ac3e..3d4fd1f216 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1982,6 +1982,7 @@ dependencies = [ "path-absolutize", "regex", "result-like", + "ruff_cache", "ruff_macros", "ruff_python", "ruff_rustpython", @@ -2005,6 +2006,17 @@ dependencies = [ "wasm-bindgen-test", ] +[[package]] +name = "ruff_cache" +version = "0.0.0" +dependencies = [ + "filetime", + "globset", + "itertools", + "regex", + "ruff_macros", +] + [[package]] name = "ruff_cli" version = "0.0.253" @@ -2033,6 +2045,7 @@ dependencies = [ "rayon", "regex", "ruff", + "ruff_cache", "rustc-hash", "serde", "serde_json", diff --git a/crates/ruff/Cargo.toml b/crates/ruff/Cargo.toml index cd0597148e..420d48f18f 100644 --- a/crates/ruff/Cargo.toml +++ b/crates/ruff/Cargo.toml @@ -19,6 +19,7 @@ doctest = false ruff_macros = { path = "../ruff_macros" } ruff_python = { path = "../ruff_python" } ruff_rustpython = { path = "../ruff_rustpython" } +ruff_cache = { path = "../ruff_cache" } anyhow = { workspace = true } bisection = { version = "0.1.0" } diff --git a/crates/ruff/src/fs.rs b/crates/ruff/src/fs.rs index 7750a124a7..b7f3a4e86e 100644 --- a/crates/ruff/src/fs.rs +++ b/crates/ruff/src/fs.rs @@ -1,13 +1,12 @@ -use std::ops::Deref; use std::path::{Path, PathBuf}; use anyhow::{anyhow, Result}; +use globset::GlobMatcher; use log::debug; use path_absolutize::{path_dedot, Absolutize}; use rustc_hash::FxHashSet; use crate::registry::Rule; -use crate::settings::hashable::{HashableGlobMatcher, HashableHashSet}; /// Extract the absolute path and basename (as strings) from a Path. pub fn extract_path_names(path: &Path) -> Result<(&str, &str)> { @@ -25,11 +24,7 @@ pub fn extract_path_names(path: &Path) -> Result<(&str, &str)> { /// Create a set with codes matching the pattern/code pairs. pub(crate) fn ignores_from_path<'a>( path: &Path, - pattern_code_pairs: &'a [( - HashableGlobMatcher, - HashableGlobMatcher, - HashableHashSet, - )], + pattern_code_pairs: &'a [(GlobMatcher, GlobMatcher, FxHashSet)], ) -> FxHashSet<&'a Rule> { let (file_path, file_basename) = extract_path_names(path).expect("Unable to parse filename"); pattern_code_pairs @@ -39,8 +34,8 @@ pub(crate) fn ignores_from_path<'a>( debug!( "Adding per-file ignores for {:?} due to basename match on {:?}: {:?}", path, - basename.deref().glob().regex(), - &**codes + basename.glob().regex(), + codes ); return Some(codes.iter()); } @@ -48,8 +43,8 @@ pub(crate) fn ignores_from_path<'a>( debug!( "Adding per-file ignores for {:?} due to absolute match on {:?}: {:?}", path, - absolute.deref().glob().regex(), - &**codes + absolute.glob().regex(), + codes ); return Some(codes.iter()); } diff --git a/crates/ruff/src/lib.rs b/crates/ruff/src/lib.rs index df85cd0838..1d0de8ec31 100644 --- a/crates/ruff/src/lib.rs +++ b/crates/ruff/src/lib.rs @@ -13,7 +13,6 @@ pub use violation::{AutofixKind, Availability as AutofixAvailability}; mod ast; mod autofix; -pub mod cache; mod checkers; mod codes; mod cst; diff --git a/crates/ruff/src/rules/flake8_annotations/settings.rs b/crates/ruff/src/rules/flake8_annotations/settings.rs index d7b89827ce..e02769d4c0 100644 --- a/crates/ruff/src/rules/flake8_annotations/settings.rs +++ b/crates/ruff/src/rules/flake8_annotations/settings.rs @@ -1,5 +1,6 @@ //! Settings for the `flake-annotations` plugin. +use ruff_macros::CacheKey; use ruff_macros::ConfigurationOptions; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -60,7 +61,7 @@ pub struct Options { pub ignore_fully_untyped: Option, } -#[derive(Debug, Default, Hash)] +#[derive(Debug, Default, CacheKey)] #[allow(clippy::struct_excessive_bools)] pub struct Settings { pub mypy_init_return: bool, diff --git a/crates/ruff/src/rules/flake8_bandit/settings.rs b/crates/ruff/src/rules/flake8_bandit/settings.rs index 9b0d64d062..79f0d76e86 100644 --- a/crates/ruff/src/rules/flake8_bandit/settings.rs +++ b/crates/ruff/src/rules/flake8_bandit/settings.rs @@ -1,6 +1,6 @@ //! Settings for the `flake8-bandit` plugin. -use ruff_macros::ConfigurationOptions; +use ruff_macros::{CacheKey, ConfigurationOptions}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -45,7 +45,7 @@ pub struct Options { pub check_typed_exception: Option, } -#[derive(Debug, Hash)] +#[derive(Debug, CacheKey)] pub struct Settings { pub hardcoded_tmp_directory: Vec, pub check_typed_exception: bool, diff --git a/crates/ruff/src/rules/flake8_bugbear/settings.rs b/crates/ruff/src/rules/flake8_bugbear/settings.rs index 266f7048bf..962331b337 100644 --- a/crates/ruff/src/rules/flake8_bugbear/settings.rs +++ b/crates/ruff/src/rules/flake8_bugbear/settings.rs @@ -1,6 +1,6 @@ //! Settings for the `flake8-bugbear` plugin. -use ruff_macros::ConfigurationOptions; +use ruff_macros::{CacheKey, ConfigurationOptions}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -26,7 +26,7 @@ pub struct Options { pub extend_immutable_calls: Option>, } -#[derive(Debug, Default, Hash)] +#[derive(Debug, Default, CacheKey)] pub struct Settings { pub extend_immutable_calls: Vec, } diff --git a/crates/ruff/src/rules/flake8_builtins/settings.rs b/crates/ruff/src/rules/flake8_builtins/settings.rs index 243145586a..41ec8aa434 100644 --- a/crates/ruff/src/rules/flake8_builtins/settings.rs +++ b/crates/ruff/src/rules/flake8_builtins/settings.rs @@ -1,6 +1,6 @@ //! Settings for the `flake8-builtins` plugin. -use ruff_macros::ConfigurationOptions; +use ruff_macros::{CacheKey, ConfigurationOptions}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -22,7 +22,7 @@ pub struct Options { pub builtins_ignorelist: Option>, } -#[derive(Debug, Default, Hash)] +#[derive(Debug, Default, CacheKey)] pub struct Settings { pub builtins_ignorelist: Vec, } diff --git a/crates/ruff/src/rules/flake8_comprehensions/settings.rs b/crates/ruff/src/rules/flake8_comprehensions/settings.rs index 17a5398e08..7fac6da1c4 100644 --- a/crates/ruff/src/rules/flake8_comprehensions/settings.rs +++ b/crates/ruff/src/rules/flake8_comprehensions/settings.rs @@ -1,6 +1,6 @@ //! Settings for the `flake8-comprehensions` plugin. -use ruff_macros::ConfigurationOptions; +use ruff_macros::{CacheKey, ConfigurationOptions}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -22,7 +22,7 @@ pub struct Options { pub allow_dict_calls_with_keyword_arguments: Option, } -#[derive(Debug, Default, Hash)] +#[derive(Debug, Default, CacheKey)] pub struct Settings { pub allow_dict_calls_with_keyword_arguments: bool, } diff --git a/crates/ruff/src/rules/flake8_errmsg/settings.rs b/crates/ruff/src/rules/flake8_errmsg/settings.rs index 54f37d3779..20b71a29a1 100644 --- a/crates/ruff/src/rules/flake8_errmsg/settings.rs +++ b/crates/ruff/src/rules/flake8_errmsg/settings.rs @@ -1,6 +1,6 @@ //! Settings for the `flake8-errmsg` plugin. -use ruff_macros::ConfigurationOptions; +use ruff_macros::{CacheKey, ConfigurationOptions}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -18,7 +18,7 @@ pub struct Options { pub max_string_length: Option, } -#[derive(Debug, Default, Hash)] +#[derive(Debug, Default, CacheKey)] pub struct Settings { pub max_string_length: usize, } diff --git a/crates/ruff/src/rules/flake8_implicit_str_concat/settings.rs b/crates/ruff/src/rules/flake8_implicit_str_concat/settings.rs index 59aadc996d..ab66b192b5 100644 --- a/crates/ruff/src/rules/flake8_implicit_str_concat/settings.rs +++ b/crates/ruff/src/rules/flake8_implicit_str_concat/settings.rs @@ -1,6 +1,6 @@ //! Settings for the `flake8-implicit-str-concat` plugin. -use ruff_macros::ConfigurationOptions; +use ruff_macros::{CacheKey, ConfigurationOptions}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -32,7 +32,7 @@ pub struct Options { pub allow_multiline: Option, } -#[derive(Debug, Hash)] +#[derive(Debug, CacheKey)] pub struct Settings { pub allow_multiline: bool, } diff --git a/crates/ruff/src/rules/flake8_import_conventions/settings.rs b/crates/ruff/src/rules/flake8_import_conventions/settings.rs index e2f5f1aa00..e4a8a93d09 100644 --- a/crates/ruff/src/rules/flake8_import_conventions/settings.rs +++ b/crates/ruff/src/rules/flake8_import_conventions/settings.rs @@ -1,14 +1,10 @@ //! Settings for import conventions. -use std::hash::Hash; - -use ruff_macros::ConfigurationOptions; +use ruff_macros::{CacheKey, ConfigurationOptions}; use rustc_hash::FxHashMap; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; -use crate::settings::hashable::HashableHashMap; - const CONVENTIONAL_ALIASES: &[(&str, &str)] = &[ ("altair", "alt"), ("matplotlib", "mpl"), @@ -64,9 +60,9 @@ pub struct Options { pub extend_aliases: Option>, } -#[derive(Debug, Hash)] +#[derive(Debug, CacheKey)] pub struct Settings { - pub aliases: HashableHashMap, + pub aliases: FxHashMap, } fn default_aliases() -> FxHashMap { @@ -90,7 +86,7 @@ fn resolve_aliases(options: Options) -> FxHashMap { impl Default for Settings { fn default() -> Self { Self { - aliases: default_aliases().into(), + aliases: default_aliases(), } } } @@ -98,7 +94,7 @@ impl Default for Settings { impl From for Settings { fn from(options: Options) -> Self { Self { - aliases: resolve_aliases(options).into(), + aliases: resolve_aliases(options), } } } @@ -106,7 +102,7 @@ impl From for Settings { impl From for Options { fn from(settings: Settings) -> Self { Self { - aliases: Some(settings.aliases.into()), + aliases: Some(settings.aliases), extend_aliases: None, } } diff --git a/crates/ruff/src/rules/flake8_pytest_style/settings.rs b/crates/ruff/src/rules/flake8_pytest_style/settings.rs index 60ccdd4fc2..d7969ccf8e 100644 --- a/crates/ruff/src/rules/flake8_pytest_style/settings.rs +++ b/crates/ruff/src/rules/flake8_pytest_style/settings.rs @@ -1,6 +1,6 @@ //! Settings for the `flake8-pytest-style` plugin. -use ruff_macros::ConfigurationOptions; +use ruff_macros::{CacheKey, ConfigurationOptions}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -113,7 +113,7 @@ pub struct Options { pub mark_parentheses: Option, } -#[derive(Debug, Hash)] +#[derive(Debug, CacheKey)] pub struct Settings { pub fixture_parentheses: bool, pub parametrize_names_type: types::ParametrizeNameType, diff --git a/crates/ruff/src/rules/flake8_pytest_style/types.rs b/crates/ruff/src/rules/flake8_pytest_style/types.rs index 0b5aa044c7..cf310957b2 100644 --- a/crates/ruff/src/rules/flake8_pytest_style/types.rs +++ b/crates/ruff/src/rules/flake8_pytest_style/types.rs @@ -1,9 +1,10 @@ use std::fmt::{Display, Formatter}; +use ruff_macros::CacheKey; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; -#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] +#[derive(Clone, Copy, Debug, CacheKey, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] pub enum ParametrizeNameType { #[serde(rename = "csv")] Csv, @@ -29,7 +30,7 @@ impl Display for ParametrizeNameType { } } -#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] +#[derive(Clone, Copy, Debug, CacheKey, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] pub enum ParametrizeValuesType { #[serde(rename = "tuple")] Tuple, @@ -52,7 +53,7 @@ impl Display for ParametrizeValuesType { } } -#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] +#[derive(Clone, Copy, Debug, CacheKey, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] pub enum ParametrizeValuesRowType { #[serde(rename = "tuple")] Tuple, diff --git a/crates/ruff/src/rules/flake8_quotes/settings.rs b/crates/ruff/src/rules/flake8_quotes/settings.rs index ba82d47caf..3050b35966 100644 --- a/crates/ruff/src/rules/flake8_quotes/settings.rs +++ b/crates/ruff/src/rules/flake8_quotes/settings.rs @@ -1,10 +1,10 @@ //! Settings for the `flake8-quotes` plugin. -use ruff_macros::ConfigurationOptions; +use ruff_macros::{CacheKey, ConfigurationOptions}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash, JsonSchema)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, CacheKey, JsonSchema)] #[serde(deny_unknown_fields, rename_all = "kebab-case")] pub enum Quote { /// Use single quotes. @@ -71,7 +71,7 @@ pub struct Options { pub avoid_escape: Option, } -#[derive(Debug, Hash)] +#[derive(Debug, CacheKey)] pub struct Settings { pub inline_quotes: Quote, pub multiline_quotes: Quote, diff --git a/crates/ruff/src/rules/flake8_self/settings.rs b/crates/ruff/src/rules/flake8_self/settings.rs index aa62600662..a2492f3cfd 100644 --- a/crates/ruff/src/rules/flake8_self/settings.rs +++ b/crates/ruff/src/rules/flake8_self/settings.rs @@ -1,6 +1,6 @@ //! Settings for the `flake8-self` plugin. -use ruff_macros::ConfigurationOptions; +use ruff_macros::{CacheKey, ConfigurationOptions}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -28,7 +28,7 @@ pub struct Options { pub ignore_names: Option>, } -#[derive(Debug, Hash)] +#[derive(Debug, CacheKey)] pub struct Settings { pub ignore_names: Vec, } diff --git a/crates/ruff/src/rules/flake8_tidy_imports/banned_api.rs b/crates/ruff/src/rules/flake8_tidy_imports/banned_api.rs index 16e236e173..0125427eab 100644 --- a/crates/ruff/src/rules/flake8_tidy_imports/banned_api.rs +++ b/crates/ruff/src/rules/flake8_tidy_imports/banned_api.rs @@ -1,4 +1,4 @@ -use ruff_macros::{define_violation, derive_message_formats}; +use ruff_macros::{define_violation, derive_message_formats, CacheKey}; use rustc_hash::FxHashMap; use rustpython_parser::ast::{Alias, Expr, Located}; use schemars::JsonSchema; @@ -7,12 +7,11 @@ use serde::{Deserialize, Serialize}; use crate::ast::types::{CallPath, Range}; use crate::checkers::ast::Checker; use crate::registry::Diagnostic; -use crate::settings::hashable::HashableHashMap; use crate::violation::Violation; -pub type Settings = HashableHashMap; +pub type Settings = FxHashMap; -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash, JsonSchema)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, CacheKey, JsonSchema)] #[serde(deny_unknown_fields, rename_all = "kebab-case")] pub struct ApiBan { /// The message to display when the API is used. @@ -147,8 +146,7 @@ mod tests { msg: "Use typing_extensions.TypedDict instead.".to_string(), }, ), - ]) - .into(), + ]), ..Default::default() }, ..Settings::for_rules(vec![Rule::BannedApi]) diff --git a/crates/ruff/src/rules/flake8_tidy_imports/mod.rs b/crates/ruff/src/rules/flake8_tidy_imports/mod.rs index 2b58e26d7e..f9dc34868e 100644 --- a/crates/ruff/src/rules/flake8_tidy_imports/mod.rs +++ b/crates/ruff/src/rules/flake8_tidy_imports/mod.rs @@ -1,10 +1,12 @@ //! Rules from [flake8-tidy-imports](https://pypi.org/project/flake8-tidy-imports/). +use ruff_macros::CacheKey; + pub mod options; pub mod banned_api; pub mod relative_imports; -#[derive(Debug, Hash, Default)] +#[derive(Debug, CacheKey, Default)] pub struct Settings { pub ban_relative_imports: relative_imports::Settings, pub banned_api: banned_api::Settings, diff --git a/crates/ruff/src/rules/flake8_tidy_imports/options.rs b/crates/ruff/src/rules/flake8_tidy_imports/options.rs index 0be3e4c41f..d5d3db8531 100644 --- a/crates/ruff/src/rules/flake8_tidy_imports/options.rs +++ b/crates/ruff/src/rules/flake8_tidy_imports/options.rs @@ -48,7 +48,7 @@ impl From for Settings { fn from(options: Options) -> Self { Self { ban_relative_imports: options.ban_relative_imports.unwrap_or(Strictness::Parents), - banned_api: options.banned_api.unwrap_or_default().into(), + banned_api: options.banned_api.unwrap_or_default(), } } } @@ -57,7 +57,7 @@ impl From for Options { fn from(settings: Settings) -> Self { Self { ban_relative_imports: Some(settings.ban_relative_imports), - banned_api: Some(settings.banned_api.into()), + banned_api: Some(settings.banned_api), } } } diff --git a/crates/ruff/src/rules/flake8_tidy_imports/relative_imports.rs b/crates/ruff/src/rules/flake8_tidy_imports/relative_imports.rs index 5ea48b1e04..3658f74355 100644 --- a/crates/ruff/src/rules/flake8_tidy_imports/relative_imports.rs +++ b/crates/ruff/src/rules/flake8_tidy_imports/relative_imports.rs @@ -2,7 +2,7 @@ use rustpython_parser::ast::{Stmt, StmtKind}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; -use ruff_macros::{define_violation, derive_message_formats}; +use ruff_macros::{define_violation, derive_message_formats, CacheKey}; use ruff_python::identifiers::is_module_name; use crate::ast::helpers::{create_stmt, from_relative_import, unparse_stmt}; @@ -15,7 +15,7 @@ use crate::violation::{AutofixKind, Availability, Violation}; pub type Settings = Strictness; -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash, JsonSchema, Default)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, CacheKey, JsonSchema, Default)] #[serde(deny_unknown_fields, rename_all = "kebab-case")] pub enum Strictness { /// Ban imports that extend into the parent module or beyond. diff --git a/crates/ruff/src/rules/flake8_type_checking/settings.rs b/crates/ruff/src/rules/flake8_type_checking/settings.rs index 49dcc7ffcb..81345377fa 100644 --- a/crates/ruff/src/rules/flake8_type_checking/settings.rs +++ b/crates/ruff/src/rules/flake8_type_checking/settings.rs @@ -1,6 +1,6 @@ //! Settings for the `flake8-type-checking` plugin. -use ruff_macros::ConfigurationOptions; +use ruff_macros::{CacheKey, ConfigurationOptions}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -36,7 +36,7 @@ pub struct Options { pub exempt_modules: Option>, } -#[derive(Debug, Hash)] +#[derive(Debug, CacheKey)] pub struct Settings { pub strict: bool, pub exempt_modules: Vec, diff --git a/crates/ruff/src/rules/flake8_unused_arguments/settings.rs b/crates/ruff/src/rules/flake8_unused_arguments/settings.rs index 91aa86cde4..66e5983740 100644 --- a/crates/ruff/src/rules/flake8_unused_arguments/settings.rs +++ b/crates/ruff/src/rules/flake8_unused_arguments/settings.rs @@ -1,6 +1,6 @@ //! Settings for the `flake8-unused-arguments` plugin. -use ruff_macros::ConfigurationOptions; +use ruff_macros::{CacheKey, ConfigurationOptions}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -22,7 +22,7 @@ pub struct Options { pub ignore_variadic_names: Option, } -#[derive(Debug, Default, Hash)] +#[derive(Debug, Default, CacheKey)] pub struct Settings { pub ignore_variadic_names: bool, } diff --git a/crates/ruff/src/rules/isort/categorize.rs b/crates/ruff/src/rules/isort/categorize.rs index c25ff18e11..a71f80d9d3 100644 --- a/crates/ruff/src/rules/isort/categorize.rs +++ b/crates/ruff/src/rules/isort/categorize.rs @@ -3,6 +3,7 @@ use std::fs; use std::path::{Path, PathBuf}; use log::debug; +use ruff_macros::CacheKey; use ruff_python::sys::KNOWN_STANDARD_LIBRARY; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -12,7 +13,17 @@ use super::types::{ImportBlock, Importable}; use crate::settings::types::PythonVersion; #[derive( - Debug, PartialOrd, Ord, PartialEq, Eq, Clone, Serialize, Deserialize, JsonSchema, Hash, EnumIter, + Debug, + PartialOrd, + Ord, + PartialEq, + Eq, + Clone, + Serialize, + Deserialize, + JsonSchema, + CacheKey, + EnumIter, )] #[serde(deny_unknown_fields, rename_all = "kebab-case")] pub enum ImportType { diff --git a/crates/ruff/src/rules/isort/settings.rs b/crates/ruff/src/rules/isort/settings.rs index 24d6c328a5..dba9034846 100644 --- a/crates/ruff/src/rules/isort/settings.rs +++ b/crates/ruff/src/rules/isort/settings.rs @@ -2,13 +2,13 @@ use std::collections::BTreeSet; -use ruff_macros::ConfigurationOptions; +use ruff_macros::{CacheKey, ConfigurationOptions}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use super::categorize::ImportType; -#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Hash, JsonSchema)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, CacheKey, JsonSchema)] #[serde(deny_unknown_fields, rename_all = "kebab-case")] pub enum RelativeImportsOrder { /// Place "closer" imports (fewer `.` characters, most local) before @@ -265,7 +265,7 @@ pub struct Options { pub forced_separate: Option>, } -#[derive(Debug, Hash)] +#[derive(Debug, CacheKey)] #[allow(clippy::struct_excessive_bools)] pub struct Settings { pub required_imports: BTreeSet, diff --git a/crates/ruff/src/rules/mccabe/settings.rs b/crates/ruff/src/rules/mccabe/settings.rs index 95a1b63172..bea840a5de 100644 --- a/crates/ruff/src/rules/mccabe/settings.rs +++ b/crates/ruff/src/rules/mccabe/settings.rs @@ -1,6 +1,6 @@ //! Settings for the `mccabe` plugin. -use ruff_macros::ConfigurationOptions; +use ruff_macros::{CacheKey, ConfigurationOptions}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -25,7 +25,7 @@ pub struct Options { pub max_complexity: Option, } -#[derive(Debug, Hash)] +#[derive(Debug, CacheKey)] pub struct Settings { pub max_complexity: usize, } diff --git a/crates/ruff/src/rules/pep8_naming/settings.rs b/crates/ruff/src/rules/pep8_naming/settings.rs index e70fe73596..fcd1dc08f7 100644 --- a/crates/ruff/src/rules/pep8_naming/settings.rs +++ b/crates/ruff/src/rules/pep8_naming/settings.rs @@ -1,6 +1,6 @@ //! Settings for the `pep8-naming` plugin. -use ruff_macros::ConfigurationOptions; +use ruff_macros::{CacheKey, ConfigurationOptions}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -69,7 +69,7 @@ pub struct Options { pub staticmethod_decorators: Option>, } -#[derive(Debug, Hash)] +#[derive(Debug, CacheKey)] pub struct Settings { pub ignore_names: Vec, pub classmethod_decorators: Vec, diff --git a/crates/ruff/src/rules/pycodestyle/settings.rs b/crates/ruff/src/rules/pycodestyle/settings.rs index 5ef57e11d0..2b6e16d55f 100644 --- a/crates/ruff/src/rules/pycodestyle/settings.rs +++ b/crates/ruff/src/rules/pycodestyle/settings.rs @@ -1,6 +1,6 @@ //! Settings for the `pycodestyle` plugin. -use ruff_macros::ConfigurationOptions; +use ruff_macros::{CacheKey, ConfigurationOptions}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -32,7 +32,7 @@ pub struct Options { pub ignore_overlong_task_comments: Option, } -#[derive(Debug, Default, Hash)] +#[derive(Debug, Default, CacheKey)] pub struct Settings { pub max_doc_length: Option, pub ignore_overlong_task_comments: bool, diff --git a/crates/ruff/src/rules/pydocstyle/settings.rs b/crates/ruff/src/rules/pydocstyle/settings.rs index 423f077786..3dc0b1bf7a 100644 --- a/crates/ruff/src/rules/pydocstyle/settings.rs +++ b/crates/ruff/src/rules/pydocstyle/settings.rs @@ -2,13 +2,13 @@ use std::collections::BTreeSet; -use ruff_macros::ConfigurationOptions; +use ruff_macros::{CacheKey, ConfigurationOptions}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use crate::registry::Rule; -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Hash, JsonSchema)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema, CacheKey)] #[serde(deny_unknown_fields, rename_all = "kebab-case")] pub enum Convention { /// Use Google-style docstrings. @@ -112,7 +112,7 @@ pub struct Options { pub property_decorators: Option>, } -#[derive(Debug, Default, Hash)] +#[derive(Debug, Default, CacheKey)] pub struct Settings { pub convention: Option, pub ignore_decorators: BTreeSet, diff --git a/crates/ruff/src/rules/pyflakes/mod.rs b/crates/ruff/src/rules/pyflakes/mod.rs index 56d1e75ab2..fcaa7888f3 100644 --- a/crates/ruff/src/rules/pyflakes/mod.rs +++ b/crates/ruff/src/rules/pyflakes/mod.rs @@ -129,7 +129,7 @@ mod tests { let diagnostics = test_path( Path::new("pyflakes/F841_0.py"), &settings::Settings { - dummy_variable_rgx: Regex::new(r"^z$").unwrap().into(), + dummy_variable_rgx: Regex::new(r"^z$").unwrap(), ..settings::Settings::for_rule(Rule::UnusedVariable) }, )?; diff --git a/crates/ruff/src/rules/pylint/mod.rs b/crates/ruff/src/rules/pylint/mod.rs index 631312f717..6a65fe3b63 100644 --- a/crates/ruff/src/rules/pylint/mod.rs +++ b/crates/ruff/src/rules/pylint/mod.rs @@ -99,7 +99,7 @@ mod tests { let diagnostics = test_path( Path::new("pylint/too_many_arguments_params.py"), &Settings { - dummy_variable_rgx: Regex::new(r"skip_.*").unwrap().into(), + dummy_variable_rgx: Regex::new(r"skip_.*").unwrap(), ..Settings::for_rules(vec![Rule::TooManyArguments]) }, )?; diff --git a/crates/ruff/src/rules/pylint/rules/redefined_loop_name.rs b/crates/ruff/src/rules/pylint/rules/redefined_loop_name.rs index abf7b5224e..55e399fa36 100644 --- a/crates/ruff/src/rules/pylint/rules/redefined_loop_name.rs +++ b/crates/ruff/src/rules/pylint/rules/redefined_loop_name.rs @@ -1,3 +1,4 @@ +use regex::Regex; use std::{fmt, iter}; use rustpython_parser::ast::{Expr, ExprContext, ExprKind, Stmt, StmtKind, Withitem}; @@ -12,7 +13,6 @@ use crate::ast::visitor; use crate::ast::visitor::Visitor; use crate::checkers::ast::Checker; use crate::registry::Diagnostic; -use crate::settings::hashable::HashableRegex; use crate::violation::Violation; #[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone, Copy)] @@ -142,7 +142,7 @@ struct ExprWithInnerBindingKind<'a> { } struct InnerForWithAssignTargetsVisitor<'a> { - dummy_variable_rgx: &'a HashableRegex, + dummy_variable_rgx: &'a Regex, assignment_targets: Vec>, } @@ -213,7 +213,7 @@ where fn assignment_targets_from_expr<'a, U>( expr: &'a Expr, - dummy_variable_rgx: &'a HashableRegex, + dummy_variable_rgx: &'a Regex, ) -> Box> + 'a> { // The Box is necessary to ensure the match arms have the same return type - we can't use // a cast to "impl Iterator", since at the time of writing that is only allowed for @@ -266,7 +266,7 @@ fn assignment_targets_from_expr<'a, U>( fn assignment_targets_from_with_items<'a, U>( items: &'a [Withitem], - dummy_variable_rgx: &'a HashableRegex, + dummy_variable_rgx: &'a Regex, ) -> impl Iterator> + 'a { items .iter() @@ -280,7 +280,7 @@ fn assignment_targets_from_with_items<'a, U>( fn assignment_targets_from_assign_targets<'a, U>( targets: &'a [Expr], - dummy_variable_rgx: &'a HashableRegex, + dummy_variable_rgx: &'a Regex, ) -> impl Iterator> + 'a { targets .iter() diff --git a/crates/ruff/src/rules/pylint/settings.rs b/crates/ruff/src/rules/pylint/settings.rs index 1aa8027ba9..569a8e3dc0 100644 --- a/crates/ruff/src/rules/pylint/settings.rs +++ b/crates/ruff/src/rules/pylint/settings.rs @@ -1,13 +1,11 @@ //! Settings for the `pylint` plugin. -use std::hash::Hash; - use anyhow::anyhow; -use ruff_macros::ConfigurationOptions; +use ruff_macros::{CacheKey, ConfigurationOptions}; use rustpython_parser::ast::Constant; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; -#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Hash, JsonSchema)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, CacheKey, JsonSchema)] #[serde(deny_unknown_fields, rename_all = "kebab-case")] pub enum ConstantType { Bytes, @@ -72,7 +70,7 @@ pub struct Options { pub max_statements: Option, } -#[derive(Debug, Hash)] +#[derive(Debug, CacheKey)] pub struct Settings { pub allow_magic_value_types: Vec, pub max_args: usize, diff --git a/crates/ruff/src/rules/pyupgrade/settings.rs b/crates/ruff/src/rules/pyupgrade/settings.rs index 767f7e1499..ceb5ec4bee 100644 --- a/crates/ruff/src/rules/pyupgrade/settings.rs +++ b/crates/ruff/src/rules/pyupgrade/settings.rs @@ -1,6 +1,6 @@ //! Settings for the `pyupgrade` plugin. -use ruff_macros::ConfigurationOptions; +use ruff_macros::{CacheKey, ConfigurationOptions}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -31,7 +31,7 @@ pub struct Options { pub keep_runtime_typing: Option, } -#[derive(Debug, Default, Hash)] +#[derive(Debug, Default, CacheKey)] pub struct Settings { pub keep_runtime_typing: bool, } diff --git a/crates/ruff/src/rules/ruff/mod.rs b/crates/ruff/src/rules/ruff/mod.rs index 3c950f6031..0f5d48bde4 100644 --- a/crates/ruff/src/rules/ruff/mod.rs +++ b/crates/ruff/src/rules/ruff/mod.rs @@ -34,7 +34,7 @@ mod tests { let diagnostics = test_path( Path::new("ruff/confusables.py"), &settings::Settings { - allowed_confusables: FxHashSet::from_iter(['−', 'ρ', '∗']).into(), + allowed_confusables: FxHashSet::from_iter(['−', 'ρ', '∗']), ..settings::Settings::for_rules(vec![ Rule::AmbiguousUnicodeCharacterString, Rule::AmbiguousUnicodeCharacterDocstring, diff --git a/crates/ruff/src/settings/defaults.rs b/crates/ruff/src/settings/defaults.rs index df1f556766..cd1bbacdaa 100644 --- a/crates/ruff/src/settings/defaults.rs +++ b/crates/ruff/src/settings/defaults.rs @@ -2,8 +2,8 @@ use once_cell::sync::Lazy; use path_absolutize::path_dedot; use regex::Regex; use rustc_hash::FxHashSet; +use std::collections::HashSet; -use super::hashable::{HashableGlobSet, HashableHashSet}; use super::types::{FilePattern, PythonVersion}; use super::Settings; use crate::codes::{self, RuleCodePrefix}; @@ -15,6 +15,7 @@ use crate::rules::{ flake8_quotes, flake8_self, flake8_tidy_imports, flake8_type_checking, flake8_unused_arguments, isort, mccabe, pep8_naming, pycodestyle, pydocstyle, pylint, pyupgrade, }; +use crate::settings::types::FilePatternSet; pub const PREFIXES: &[RuleSelector] = &[ prefix_to_selector(RuleCodePrefix::Pycodestyle(codes::Pycodestyle::E)), @@ -59,12 +60,12 @@ impl Default for Settings { fn default() -> Self { Self { rules: PREFIXES.iter().flat_map(IntoIterator::into_iter).into(), - allowed_confusables: FxHashSet::from_iter([]).into(), + allowed_confusables: FxHashSet::from_iter([]), builtins: vec![], - dummy_variable_rgx: DUMMY_VARIABLE_RGX.clone().into(), - exclude: HashableGlobSet::new(EXCLUDE.clone()).unwrap(), - extend_exclude: HashableGlobSet::empty(), - external: HashableHashSet::default(), + dummy_variable_rgx: DUMMY_VARIABLE_RGX.clone(), + exclude: FilePatternSet::try_from_vec(EXCLUDE.clone()).unwrap(), + extend_exclude: FilePatternSet::default(), + external: HashSet::default(), force_exclude: false, ignore_init_module_imports: false, line_length: LINE_LENGTH, diff --git a/crates/ruff/src/settings/flags.rs b/crates/ruff/src/settings/flags.rs index eeab044057..28dd602088 100644 --- a/crates/ruff/src/settings/flags.rs +++ b/crates/ruff/src/settings/flags.rs @@ -1,6 +1,7 @@ use crate::fix; +use ruff_macros::CacheKey; -#[derive(Debug, Copy, Clone, Hash, result_like::BoolLike)] +#[derive(Debug, Copy, Clone, CacheKey, result_like::BoolLike)] pub enum Autofix { Enabled, Disabled, diff --git a/crates/ruff/src/settings/hashable.rs b/crates/ruff/src/settings/hashable.rs deleted file mode 100644 index 504ca9bb7e..0000000000 --- a/crates/ruff/src/settings/hashable.rs +++ /dev/null @@ -1,179 +0,0 @@ -use derivative::Derivative; -use std::hash::{Hash, Hasher}; -use std::ops::{Deref, DerefMut}; - -use globset::{GlobMatcher, GlobSet}; -use itertools::Itertools; -use regex::Regex; -use rustc_hash::{FxHashMap, FxHashSet}; - -use super::types::FilePattern; - -#[derive(Debug)] -pub struct HashableRegex(Regex); - -impl Hash for HashableRegex { - fn hash(&self, state: &mut H) { - self.0.as_str().hash(state); - } -} - -impl From for HashableRegex { - fn from(regex: Regex) -> Self { - Self(regex) - } -} - -impl Deref for HashableRegex { - type Target = Regex; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -#[derive(Debug)] -pub struct HashableGlobMatcher(GlobMatcher); - -impl From for HashableGlobMatcher { - fn from(matcher: GlobMatcher) -> Self { - Self(matcher) - } -} - -impl Deref for HashableGlobMatcher { - type Target = GlobMatcher; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl Hash for HashableGlobMatcher { - fn hash(&self, state: &mut H) { - self.0.glob().hash(state); - } -} - -#[derive(Derivative)] -#[derivative(Debug)] -pub struct HashableGlobSet { - patterns: Vec, - #[derivative(Debug = "ignore")] - globset: GlobSet, -} - -impl HashableGlobSet { - pub fn new(patterns: Vec) -> anyhow::Result { - let mut builder = globset::GlobSetBuilder::new(); - for pattern in &patterns { - pattern.clone().add_to(&mut builder)?; - } - let globset = builder.build()?; - Ok(HashableGlobSet { patterns, globset }) - } - - pub fn empty() -> Self { - Self { - patterns: Vec::new(), - globset: GlobSet::empty(), - } - } -} - -impl Deref for HashableGlobSet { - type Target = GlobSet; - - fn deref(&self) -> &Self::Target { - &self.globset - } -} - -impl Hash for HashableGlobSet { - fn hash(&self, state: &mut H) { - for pattern in self.patterns.iter().sorted() { - pattern.hash(state); - } - } -} - -#[derive(Debug, Clone)] -pub struct HashableHashSet(FxHashSet); - -impl Hash for HashableHashSet { - fn hash(&self, state: &mut H) { - for v in self.0.iter().sorted() { - v.hash(state); - } - } -} - -impl Default for HashableHashSet { - fn default() -> Self { - Self(FxHashSet::default()) - } -} - -impl From> for HashableHashSet { - fn from(set: FxHashSet) -> Self { - Self(set) - } -} - -impl From> for FxHashSet { - fn from(set: HashableHashSet) -> Self { - set.0 - } -} - -impl Deref for HashableHashSet { - type Target = FxHashSet; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -#[derive(Debug, Clone)] -pub struct HashableHashMap(FxHashMap); - -impl Hash for HashableHashMap { - fn hash(&self, state: &mut H) { - for key in self.0.keys().sorted() { - key.hash(state); - self.0[key].hash(state); - } - } -} - -impl Default for HashableHashMap { - fn default() -> Self { - Self(FxHashMap::default()) - } -} - -impl From> for HashableHashMap { - fn from(map: FxHashMap) -> Self { - Self(map) - } -} - -impl From> for FxHashMap { - fn from(map: HashableHashMap) -> Self { - map.0 - } -} - -impl Deref for HashableHashMap { - type Target = FxHashMap; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl DerefMut for HashableHashMap { - fn deref_mut(&mut self) -> &mut ::Target { - &mut self.0 - } -} diff --git a/crates/ruff/src/settings/mod.rs b/crates/ruff/src/settings/mod.rs index b046fc0a8a..8f9ae8246e 100644 --- a/crates/ruff/src/settings/mod.rs +++ b/crates/ruff/src/settings/mod.rs @@ -5,13 +5,13 @@ use std::path::{Path, PathBuf}; use anyhow::{anyhow, Result}; -use globset::Glob; +use globset::{Glob, GlobMatcher}; +use regex::Regex; +use ruff_cache::cache_dir; use rustc_hash::{FxHashMap, FxHashSet}; use strum::IntoEnumIterator; -use self::hashable::{HashableGlobMatcher, HashableGlobSet, HashableHashSet, HashableRegex}; use self::rule_table::RuleTable; -use crate::cache::cache_dir; use crate::registry::{Rule, RuleNamespace, INCOMPATIBLE_CODES}; use crate::rule_selector::{RuleSelector, Specificity}; use crate::rules::{ @@ -21,13 +21,13 @@ use crate::rules::{ isort, mccabe, pep8_naming, pycodestyle, pydocstyle, pylint, pyupgrade, }; use crate::settings::configuration::Configuration; -use crate::settings::types::{PerFileIgnore, PythonVersion, SerializationFormat}; +use crate::settings::types::{FilePatternSet, PerFileIgnore, PythonVersion, SerializationFormat}; use crate::warn_user_once; +use ruff_macros::CacheKey; pub mod configuration; pub mod defaults; pub mod flags; -pub mod hashable; pub mod options; pub mod options_base; pub mod pyproject; @@ -74,31 +74,27 @@ pub struct CliSettings { pub update_check: bool, } -#[derive(Debug, Hash)] +#[derive(Debug, CacheKey)] #[allow(clippy::struct_excessive_bools)] pub struct Settings { pub rules: RuleTable, - pub per_file_ignores: Vec<( - HashableGlobMatcher, - HashableGlobMatcher, - HashableHashSet, - )>, + pub per_file_ignores: Vec<(GlobMatcher, GlobMatcher, FxHashSet)>, pub show_source: bool, pub target_version: PythonVersion, // Resolver settings - pub exclude: HashableGlobSet, - pub extend_exclude: HashableGlobSet, + pub exclude: FilePatternSet, + pub extend_exclude: FilePatternSet, pub force_exclude: bool, pub respect_gitignore: bool, pub project_root: PathBuf, // Rule-specific settings - pub allowed_confusables: HashableHashSet, + pub allowed_confusables: FxHashSet, pub builtins: Vec, - pub dummy_variable_rgx: HashableRegex, - pub external: HashableHashSet, + pub dummy_variable_rgx: Regex, + pub external: FxHashSet, pub ignore_init_module_imports: bool, pub line_length: usize, pub namespace_packages: Vec, @@ -146,18 +142,16 @@ impl Settings { allowed_confusables: config .allowed_confusables .map(FxHashSet::from_iter) - .unwrap_or_default() - .into(), + .unwrap_or_default(), builtins: config.builtins.unwrap_or_default(), dummy_variable_rgx: config .dummy_variable_rgx - .unwrap_or_else(|| defaults::DUMMY_VARIABLE_RGX.clone()) - .into(), - exclude: HashableGlobSet::new( + .unwrap_or_else(|| defaults::DUMMY_VARIABLE_RGX.clone()), + exclude: FilePatternSet::try_from_vec( config.exclude.unwrap_or_else(|| defaults::EXCLUDE.clone()), )?, - extend_exclude: HashableGlobSet::new(config.extend_exclude)?, - external: FxHashSet::from_iter(config.external.unwrap_or_default()).into(), + extend_exclude: FilePatternSet::try_from_vec(config.extend_exclude)?, + external: FxHashSet::from_iter(config.external.unwrap_or_default()), force_exclude: config.force_exclude.unwrap_or(false), @@ -414,13 +408,7 @@ impl From<&Configuration> for RuleTable { /// Given a list of patterns, create a `GlobSet`. pub fn resolve_per_file_ignores( per_file_ignores: Vec, -) -> Result< - Vec<( - HashableGlobMatcher, - HashableGlobMatcher, - HashableHashSet, - )>, -> { +) -> Result)>> { per_file_ignores .into_iter() .map(|per_file_ignore| { @@ -431,7 +419,7 @@ pub fn resolve_per_file_ignores( // Construct basename matcher. let basename = Glob::new(&per_file_ignore.basename)?.compile_matcher(); - Ok((absolute.into(), basename.into(), per_file_ignore.rules)) + Ok((absolute, basename, per_file_ignore.rules)) }) .collect() } diff --git a/crates/ruff/src/settings/rule_table.rs b/crates/ruff/src/settings/rule_table.rs index 455d26c689..d1cc0e23e9 100644 --- a/crates/ruff/src/settings/rule_table.rs +++ b/crates/ruff/src/settings/rule_table.rs @@ -1,23 +1,23 @@ -use std::collections::hash_map; +use std::collections::{hash_map, HashMap}; +use ruff_macros::CacheKey; use rustc_hash::FxHashMap; -use super::hashable::HashableHashMap; use crate::registry::Rule; /// A table to keep track of which rules are enabled /// and Whether they should be autofixed. -#[derive(Debug, Hash)] +#[derive(Debug, CacheKey)] pub struct RuleTable { /// Maps rule codes to a boolean indicating if the rule should be autofixed. - enabled: HashableHashMap, + enabled: FxHashMap, } impl RuleTable { /// Creates a new empty rule table. pub fn empty() -> Self { Self { - enabled: HashableHashMap::default(), + enabled: HashMap::default(), } } @@ -53,8 +53,6 @@ impl> From for RuleTable { for code in codes { enabled.insert(code, true); } - Self { - enabled: enabled.into(), - } + Self { enabled } } } diff --git a/crates/ruff/src/settings/types.rs b/crates/ruff/src/settings/types.rs index 59755e12f8..2c9d643919 100644 --- a/crates/ruff/src/settings/types.rs +++ b/crates/ruff/src/settings/types.rs @@ -1,22 +1,23 @@ -use std::hash::Hash; +use std::hash::{Hash, Hasher}; use std::ops::Deref; use std::path::{Path, PathBuf}; use std::str::FromStr; use anyhow::{anyhow, bail, Result}; use clap::ValueEnum; -use globset::{Glob, GlobSetBuilder}; +use globset::{Glob, GlobSet, GlobSetBuilder}; +use ruff_cache::{CacheKey, CacheKeyHasher}; +use ruff_macros::CacheKey; use rustc_hash::FxHashSet; use schemars::JsonSchema; use serde::{de, Deserialize, Deserializer, Serialize}; -use super::hashable::HashableHashSet; use crate::registry::Rule; use crate::rule_selector::RuleSelector; use crate::{fs, warn_user_once}; #[derive( - Clone, Copy, Debug, PartialOrd, Ord, PartialEq, Eq, Serialize, Deserialize, Hash, JsonSchema, + Clone, Copy, Debug, PartialOrd, Ord, PartialEq, Eq, Serialize, Deserialize, JsonSchema, CacheKey, )] #[serde(rename_all = "lowercase")] pub enum PythonVersion { @@ -61,7 +62,7 @@ impl PythonVersion { } } -#[derive(Debug, Clone, Hash, PartialEq, PartialOrd, Eq, Ord)] +#[derive(Debug, Clone, CacheKey, PartialEq, PartialOrd, Eq, Ord)] pub enum FilePattern { Builtin(&'static str), User(String, PathBuf), @@ -97,11 +98,51 @@ impl FromStr for FilePattern { } } +#[derive(Debug, Clone, Default)] +pub struct FilePatternSet { + set: GlobSet, + cache_key: u64, +} + +impl FilePatternSet { + pub fn try_from_vec(patterns: Vec) -> Result { + let mut builder = GlobSetBuilder::new(); + let mut hasher = CacheKeyHasher::new(); + + for pattern in patterns { + pattern.cache_key(&mut hasher); + pattern.add_to(&mut builder)?; + } + + let set = builder.build()?; + + Ok(FilePatternSet { + set, + cache_key: hasher.finish(), + }) + } +} + +impl Deref for FilePatternSet { + type Target = GlobSet; + + fn deref(&self) -> &Self::Target { + &self.set + } +} + +impl CacheKey for FilePatternSet { + fn cache_key(&self, state: &mut CacheKeyHasher) { + state.write_usize(self.set.len()); + state.write_u64(self.cache_key); + } +} + #[derive(Debug, Clone)] pub struct PerFileIgnore { pub(crate) basename: String, pub(crate) absolute: PathBuf, - pub(crate) rules: HashableHashSet, + pub(crate) rules: FxHashSet, } impl PerFileIgnore { @@ -116,7 +157,7 @@ impl PerFileIgnore { Self { basename: pattern, absolute, - rules: rules.into(), + rules, } } } diff --git a/crates/ruff_cache/Cargo.toml b/crates/ruff_cache/Cargo.toml new file mode 100644 index 0000000000..faf06f72f5 --- /dev/null +++ b/crates/ruff_cache/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "ruff_cache" +version = "0.0.0" +publish = false +edition = { workspace = true } +rust-version = { workspace = true } + +[dependencies] +itertools = { workspace = true } +globset = { version = "0.4.9" } +regex = { workspace = true } +filetime = { version = "0.2.17" } + +[dev-dependencies] +ruff_macros = { path = "../ruff_macros" } diff --git a/crates/ruff_cache/src/cache_key.rs b/crates/ruff_cache/src/cache_key.rs new file mode 100644 index 0000000000..6b24ba82c4 --- /dev/null +++ b/crates/ruff_cache/src/cache_key.rs @@ -0,0 +1,376 @@ +use itertools::Itertools; +use regex::Regex; +use std::borrow::Cow; +use std::collections::hash_map::DefaultHasher; +use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; +use std::hash::{Hash, Hasher}; +use std::ops::{Deref, DerefMut}; +use std::path::{Path, PathBuf}; + +#[derive(Clone, Debug, Default)] +pub struct CacheKeyHasher { + inner: DefaultHasher, +} + +impl CacheKeyHasher { + pub fn new() -> Self { + Self { + inner: DefaultHasher::new(), + } + } +} + +impl Deref for CacheKeyHasher { + type Target = DefaultHasher; + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl DerefMut for CacheKeyHasher { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inner + } +} + +/// A type that be used as part of a cache key. +/// +/// A cache looks up artefacts by a cache key. Many cache keys are composed of sub-keys. For example, +/// caching the lint results of a file depend at least on the file content, the user settings, and linter version. +/// Types implementing the [`CacheKey`] trait can be used as part of a cache key by which artefacts are queried. +/// +/// ## Implementing `CacheKey` +/// +/// You can derive [`CacheKey`] with `#[derive(CacheKey)]` if all fields implement [`CacheKey`]. The resulting +/// cache key will be the combination of the values from calling `cache_key` on each field. +/// +/// ``` +/// # use ruff_macros::CacheKey; +/// +/// #[derive(CacheKey)] +/// struct Test { +/// name: String, +/// version: u32, +/// } +/// ``` +/// +/// If you need more control over computing the cache key, you can of course implement the [`CacheKey]` yourself: +/// +/// ``` +/// use ruff_cache::{CacheKey, CacheKeyHasher}; +/// +/// struct Test { +/// name: String, +/// version: u32, +/// other: String +/// } +/// +/// impl CacheKey for Test { +/// fn cache_key(&self, state: &mut CacheKeyHasher) { +/// self.name.cache_key(state); +/// self.version.cache_key(state); +/// } +/// } +/// ``` +/// +/// ## Portability +/// +/// Ideally, the cache key is portable across platforms but this is not yet a strict requirement. +/// +/// ## Using [`Hash`] +/// +/// You can defer to the [`Hash`] implementation for non-composite types. +/// Be aware, that the [`Hash`] implementation may not be portable. +/// +/// ## Why a new trait rather than reusing [`Hash`]? +/// The main reason is that hashes and cache keys have different constraints: +/// +/// * Cache keys are less performance sensitive: Hashes must be super fast to compute for performant hashed-collections. That's +/// why some standard types don't implement [`Hash`] where it would be safe to to implement [`CacheKey`], e.g. `HashSet` +/// * Cache keys must be deterministic where hash keys do not have this constraint. That's why pointers don't implement [`CacheKey`] but they implement [`Hash`]. +/// * Ideally, cache keys are portable +/// +/// [`Hash`](std::hash::Hash) +pub trait CacheKey { + fn cache_key(&self, state: &mut CacheKeyHasher); + + fn cache_key_slice(data: &[Self], state: &mut CacheKeyHasher) + where + Self: Sized, + { + for piece in data { + piece.cache_key(state); + } + } +} + +impl CacheKey for bool { + #[inline] + fn cache_key(&self, state: &mut CacheKeyHasher) { + state.write_u8(u8::from(*self)); + } +} + +impl CacheKey for char { + #[inline] + fn cache_key(&self, state: &mut CacheKeyHasher) { + state.write_u32(*self as u32); + } +} + +impl CacheKey for usize { + #[inline] + fn cache_key(&self, state: &mut CacheKeyHasher) { + state.write_usize(*self); + } +} + +impl CacheKey for u128 { + #[inline] + fn cache_key(&self, state: &mut CacheKeyHasher) { + state.write_u128(*self); + } +} + +impl CacheKey for u64 { + #[inline] + fn cache_key(&self, state: &mut CacheKeyHasher) { + state.write_u64(*self); + } +} + +impl CacheKey for u32 { + #[inline] + fn cache_key(&self, state: &mut CacheKeyHasher) { + state.write_u32(*self); + } +} + +impl CacheKey for u16 { + #[inline] + fn cache_key(&self, state: &mut CacheKeyHasher) { + state.write_u16(*self); + } +} + +impl CacheKey for u8 { + #[inline] + fn cache_key(&self, state: &mut CacheKeyHasher) { + state.write_u8(*self); + } +} + +impl CacheKey for isize { + #[inline] + fn cache_key(&self, state: &mut CacheKeyHasher) { + state.write_isize(*self); + } +} + +impl CacheKey for i128 { + #[inline] + fn cache_key(&self, state: &mut CacheKeyHasher) { + state.write_i128(*self); + } +} + +impl CacheKey for i64 { + #[inline] + fn cache_key(&self, state: &mut CacheKeyHasher) { + state.write_i64(*self); + } +} + +impl CacheKey for i32 { + #[inline] + fn cache_key(&self, state: &mut CacheKeyHasher) { + state.write_i32(*self); + } +} + +impl CacheKey for i16 { + #[inline] + fn cache_key(&self, state: &mut CacheKeyHasher) { + state.write_i16(*self); + } +} + +impl CacheKey for i8 { + #[inline] + fn cache_key(&self, state: &mut CacheKeyHasher) { + state.write_i8(*self); + } +} + +macro_rules! impl_cache_key_tuple { + () => ( + impl CacheKey for () { + #[inline] + fn cache_key(&self, _state: &mut CacheKeyHasher) {} + } + ); + + ( $($name:ident)+) => ( + impl<$($name: CacheKey),+> CacheKey for ($($name,)+) where last_type!($($name,)+): ?Sized { + #[allow(non_snake_case)] + #[inline] + fn cache_key(&self, state: &mut CacheKeyHasher) { + let ($(ref $name,)+) = *self; + $($name.cache_key(state);)+ + } + } + ); +} + +macro_rules! last_type { + ($a:ident,) => { $a }; + ($a:ident, $($rest_a:ident,)+) => { last_type!($($rest_a,)+) }; +} + +impl_cache_key_tuple! {} +impl_cache_key_tuple! { T } +impl_cache_key_tuple! { T B } +impl_cache_key_tuple! { T B C } +impl_cache_key_tuple! { T B C D } +impl_cache_key_tuple! { T B C D E } +impl_cache_key_tuple! { T B C D E F } +impl_cache_key_tuple! { T B C D E F G } +impl_cache_key_tuple! { T B C D E F G H } +impl_cache_key_tuple! { T B C D E F G H I } +impl_cache_key_tuple! { T B C D E F G H I J } +impl_cache_key_tuple! { T B C D E F G H I J K } +impl_cache_key_tuple! { T B C D E F G H I J K L } + +impl CacheKey for str { + #[inline] + fn cache_key(&self, state: &mut CacheKeyHasher) { + self.hash(&mut **state); + } +} + +impl CacheKey for String { + #[inline] + fn cache_key(&self, state: &mut CacheKeyHasher) { + self.hash(&mut **state); + } +} + +impl CacheKey for Option { + #[inline] + fn cache_key(&self, state: &mut CacheKeyHasher) { + match self { + None => state.write_usize(0), + Some(value) => { + state.write_usize(1); + value.cache_key(state); + } + } + } +} + +impl CacheKey for [T] { + #[inline] + fn cache_key(&self, state: &mut CacheKeyHasher) { + state.write_usize(self.len()); + CacheKey::cache_key_slice(self, state); + } +} + +impl CacheKey for &T { + #[inline] + fn cache_key(&self, state: &mut CacheKeyHasher) { + (**self).cache_key(state); + } +} + +impl CacheKey for &mut T { + #[inline] + fn cache_key(&self, state: &mut CacheKeyHasher) { + (**self).cache_key(state); + } +} + +impl CacheKey for Vec +where + T: CacheKey, +{ + fn cache_key(&self, state: &mut CacheKeyHasher) { + state.write_usize(self.len()); + CacheKey::cache_key_slice(self, state); + } +} + +impl CacheKey for HashMap +where + K: CacheKey + Ord, + V: CacheKey, +{ + fn cache_key(&self, state: &mut CacheKeyHasher) { + state.write_usize(self.len()); + for (key, value) in self + .iter() + .sorted_by(|(left, _), (right, _)| left.cmp(right)) + { + key.cache_key(state); + value.cache_key(state); + } + } +} + +impl CacheKey for HashSet { + fn cache_key(&self, state: &mut CacheKeyHasher) { + state.write_usize(self.len()); + for value in self.iter().sorted() { + value.cache_key(state); + } + } +} + +impl CacheKey for BTreeSet { + fn cache_key(&self, state: &mut CacheKeyHasher) { + state.write_usize(self.len()); + for item in self { + item.cache_key(state); + } + } +} + +impl CacheKey for BTreeMap { + fn cache_key(&self, state: &mut CacheKeyHasher) { + state.write_usize(self.len()); + + for (key, value) in self { + key.cache_key(state); + value.cache_key(state); + } + } +} + +impl CacheKey for Path { + #[inline] + fn cache_key(&self, state: &mut CacheKeyHasher) { + self.hash(&mut **state); + } +} + +impl CacheKey for PathBuf { + #[inline] + fn cache_key(&self, state: &mut CacheKeyHasher) { + self.hash(&mut **state); + } +} + +impl CacheKey for Cow<'_, V> +where + V: CacheKey + ToOwned, +{ + fn cache_key(&self, state: &mut CacheKeyHasher) { + (**self).cache_key(state); + } +} + +impl CacheKey for Regex { + fn cache_key(&self, state: &mut CacheKeyHasher) { + self.as_str().cache_key(state); + } +} diff --git a/crates/ruff_cache/src/filetime.rs b/crates/ruff_cache/src/filetime.rs new file mode 100644 index 0000000000..17e8eb551e --- /dev/null +++ b/crates/ruff_cache/src/filetime.rs @@ -0,0 +1,9 @@ +use crate::{CacheKey, CacheKeyHasher}; +use filetime::FileTime; +use std::hash::Hash; + +impl CacheKey for FileTime { + fn cache_key(&self, state: &mut CacheKeyHasher) { + self.hash(&mut **state); + } +} diff --git a/crates/ruff_cache/src/globset.rs b/crates/ruff_cache/src/globset.rs new file mode 100644 index 0000000000..fbaf79ca05 --- /dev/null +++ b/crates/ruff_cache/src/globset.rs @@ -0,0 +1,14 @@ +use crate::{CacheKey, CacheKeyHasher}; +use globset::{Glob, GlobMatcher}; + +impl CacheKey for GlobMatcher { + fn cache_key(&self, state: &mut CacheKeyHasher) { + self.glob().cache_key(state); + } +} + +impl CacheKey for Glob { + fn cache_key(&self, state: &mut CacheKeyHasher) { + self.glob().cache_key(state); + } +} diff --git a/crates/ruff/src/cache.rs b/crates/ruff_cache/src/lib.rs similarity index 74% rename from crates/ruff/src/cache.rs rename to crates/ruff_cache/src/lib.rs index 5ef86e16dc..ddde67db1f 100644 --- a/crates/ruff/src/cache.rs +++ b/crates/ruff_cache/src/lib.rs @@ -1,3 +1,9 @@ +mod cache_key; +pub mod filetime; +pub mod globset; + +pub use cache_key::{CacheKey, CacheKeyHasher}; + use std::path::{Path, PathBuf}; pub const CACHE_DIR_NAME: &str = ".ruff_cache"; diff --git a/crates/ruff_cache/tests/cache_key.rs b/crates/ruff_cache/tests/cache_key.rs new file mode 100644 index 0000000000..0041afd253 --- /dev/null +++ b/crates/ruff_cache/tests/cache_key.rs @@ -0,0 +1,108 @@ +use ruff_cache::{CacheKey, CacheKeyHasher}; +use ruff_macros::CacheKey; +use std::collections::hash_map::DefaultHasher; +use std::hash::{Hash, Hasher}; + +#[derive(CacheKey, Hash)] +struct UnitStruct; + +#[derive(CacheKey, Hash)] +struct NamedFieldsStruct { + a: String, + b: String, +} + +#[derive(CacheKey, Hash)] +struct UnnamedFieldsStruct(String, String); + +#[derive(CacheKey, Hash)] +enum Enum { + Unit, + UnnamedFields(String, String), + NamedFields { a: String, b: String }, +} + +#[test] +fn unit_struct_cache_key() { + let mut key = CacheKeyHasher::new(); + + UnitStruct.cache_key(&mut key); + + let mut hash = DefaultHasher::new(); + UnitStruct.hash(&mut hash); + + assert_eq!(hash.finish(), key.finish()); +} + +#[test] +fn named_field_struct() { + let mut key = CacheKeyHasher::new(); + + let named_fields = NamedFieldsStruct { + a: "Hello".into(), + b: "World".into(), + }; + + named_fields.cache_key(&mut key); + + let mut hash = DefaultHasher::new(); + named_fields.hash(&mut hash); + + assert_eq!(hash.finish(), key.finish()); +} + +#[test] +fn unnamed_field_struct() { + let mut key = CacheKeyHasher::new(); + + let unnamed_fields = UnnamedFieldsStruct("Hello".into(), "World".into()); + + unnamed_fields.cache_key(&mut key); + + let mut hash = DefaultHasher::new(); + unnamed_fields.hash(&mut hash); + + assert_eq!(hash.finish(), key.finish()); +} + +#[test] +fn enum_unit_variant() { + let mut key = CacheKeyHasher::new(); + + let variant = Enum::Unit; + variant.cache_key(&mut key); + + let mut hash = DefaultHasher::new(); + variant.hash(&mut hash); + + assert_eq!(hash.finish(), key.finish()); +} + +#[test] +fn enum_named_fields_variant() { + let mut key = CacheKeyHasher::new(); + + let variant = Enum::NamedFields { + a: "Hello".to_string(), + b: "World".to_string(), + }; + variant.cache_key(&mut key); + + let mut hash = DefaultHasher::new(); + variant.hash(&mut hash); + + assert_eq!(hash.finish(), key.finish()); +} + +#[test] +fn enum_unnamed_fields_variant() { + let mut key = CacheKeyHasher::new(); + + let variant = Enum::UnnamedFields("Hello".to_string(), "World".to_string()); + variant.cache_key(&mut key); + + let mut hash = DefaultHasher::new(); + variant.hash(&mut hash); + + assert_eq!(hash.finish(), key.finish()); +} diff --git a/crates/ruff_cli/Cargo.toml b/crates/ruff_cli/Cargo.toml index 1cb83d03a3..faa473bdb5 100644 --- a/crates/ruff_cli/Cargo.toml +++ b/crates/ruff_cli/Cargo.toml @@ -25,6 +25,7 @@ doc = false [dependencies] ruff = { path = "../ruff" } +ruff_cache = { path = "../ruff_cache" } annotate-snippets = { version = "0.9.1", features = ["color"] } anyhow = { workspace = true } diff --git a/crates/ruff_cli/src/cache.rs b/crates/ruff_cli/src/cache.rs index 63fb9ecd20..3ea5e795bb 100644 --- a/crates/ruff_cli/src/cache.rs +++ b/crates/ruff_cli/src/cache.rs @@ -1,6 +1,5 @@ -use std::collections::hash_map::DefaultHasher; use std::fs; -use std::hash::{Hash, Hasher}; +use std::hash::Hasher; use std::io::Write; use std::path::Path; @@ -10,6 +9,7 @@ use log::error; use path_absolutize::Absolutize; use ruff::message::Message; use ruff::settings::{flags, AllSettings, Settings}; +use ruff_cache::{CacheKey, CacheKeyHasher}; use serde::{Deserialize, Serialize}; #[cfg(unix)] use std::os::unix::fs::PermissionsExt; @@ -37,18 +37,18 @@ fn cache_key>( settings: &Settings, autofix: flags::Autofix, ) -> u64 { - let mut hasher = DefaultHasher::new(); - CARGO_PKG_VERSION.hash(&mut hasher); - path.as_ref().absolutize().unwrap().hash(&mut hasher); + let mut hasher = CacheKeyHasher::new(); + CARGO_PKG_VERSION.cache_key(&mut hasher); + path.as_ref().absolutize().unwrap().cache_key(&mut hasher); package .as_ref() .map(|path| path.as_ref().absolutize().unwrap()) - .hash(&mut hasher); - FileTime::from_last_modification_time(metadata).hash(&mut hasher); + .cache_key(&mut hasher); + FileTime::from_last_modification_time(metadata).cache_key(&mut hasher); #[cfg(unix)] - metadata.permissions().mode().hash(&mut hasher); - settings.hash(&mut hasher); - autofix.hash(&mut hasher); + metadata.permissions().mode().cache_key(&mut hasher); + settings.cache_key(&mut hasher); + autofix.cache_key(&mut hasher); hasher.finish() } diff --git a/crates/ruff_cli/src/commands/clean.rs b/crates/ruff_cli/src/commands/clean.rs index dee19f7dc4..0a00290920 100644 --- a/crates/ruff_cli/src/commands/clean.rs +++ b/crates/ruff_cli/src/commands/clean.rs @@ -6,9 +6,9 @@ use colored::Colorize; use path_absolutize::path_dedot; use walkdir::WalkDir; -use ruff::cache::CACHE_DIR_NAME; use ruff::fs; use ruff::logging::LogLevel; +use ruff_cache::CACHE_DIR_NAME; /// Clear any caches in the current directory or any subdirectories. pub fn clean(level: LogLevel) -> Result<()> { diff --git a/crates/ruff_macros/Cargo.toml b/crates/ruff_macros/Cargo.toml index 99136a6874..e48c4e45d4 100644 --- a/crates/ruff_macros/Cargo.toml +++ b/crates/ruff_macros/Cargo.toml @@ -15,3 +15,4 @@ quote = { version = "1.0.21" } syn = { version = "1.0.103", features = ["derive", "parsing", "extra-traits"] } textwrap = { version = "0.16.0" } itertools = { workspace = true } + diff --git a/crates/ruff_macros/src/cache_key.rs b/crates/ruff_macros/src/cache_key.rs new file mode 100644 index 0000000000..85087f392f --- /dev/null +++ b/crates/ruff_macros/src/cache_key.rs @@ -0,0 +1,103 @@ +use proc_macro2::TokenStream; +use quote::{format_ident, quote}; +use syn::spanned::Spanned; +use syn::{Data, DeriveInput, Error, Fields}; + +pub fn derive_cache_key(item: &DeriveInput) -> syn::Result { + let fields = match &item.data { + Data::Enum(item_enum) => { + let arms = item_enum.variants.iter().enumerate().map(|(i, variant)| { + let variant_name = &variant.ident; + + match &variant.fields { + Fields::Named(fields) => { + let field_names: Vec<_> = fields + .named + .iter() + .map(|field| field.ident.clone().unwrap()) + .collect(); + + let fields_code = field_names + .iter() + .map(|field| quote!(#field.cache_key(key);)); + + quote! { + Self::#variant_name{#(#field_names),*} => { + key.write_usize(#i); + #(#fields_code)* + } + } + } + Fields::Unnamed(fields) => { + let field_names: Vec<_> = fields + .unnamed + .iter() + .enumerate() + .map(|(i, _)| format_ident!("field_{i}")) + .collect(); + + let fields_code = field_names + .iter() + .map(|field| quote!(#field.cache_key(key);)); + + quote! { + Self::#variant_name(#(#field_names),*) => { + key.write_usize(#i); + #(#fields_code)* + } + } + } + Fields::Unit => { + quote! { + Self::#variant_name => { + key.write_usize(#i); + } + } + } + } + }); + + quote! { + match self { + #(#arms)* + } + } + } + + Data::Struct(item_struct) => { + let fields = item_struct.fields.iter().enumerate().map(|(i, field)| { + let field_attr = match &field.ident { + Some(ident) => quote!(self.#ident), + None => { + let index = syn::Index::from(i); + quote!(self.#index) + } + }; + + quote!(#field_attr.cache_key(key);) + }); + + quote! {#(#fields)*} + } + + Data::Union(_) => { + return Err(Error::new( + item.span(), + "CacheKey does not support unions. Only structs and enums are supported", + )) + } + }; + + let name = &item.ident; + let (impl_generics, ty_generics, where_clause) = &item.generics.split_for_impl(); + + Ok(quote!( + impl #impl_generics ruff_cache::CacheKey for #name #ty_generics #where_clause { + fn cache_key(&self, key: &mut ruff_cache::CacheKeyHasher) { + use std::hash::Hasher; + use ruff_cache::CacheKey; + #fields + } + } + )) +} diff --git a/crates/ruff_macros/src/lib.rs b/crates/ruff_macros/src/lib.rs index 505ea40918..6c6811ba45 100644 --- a/crates/ruff_macros/src/lib.rs +++ b/crates/ruff_macros/src/lib.rs @@ -1,8 +1,10 @@ //! This crate implements internal macros for the `ruff` library. +use crate::cache_key::derive_cache_key; use proc_macro::TokenStream; use syn::{parse_macro_input, DeriveInput, ItemFn}; +mod cache_key; mod config; mod define_violation; mod derive_message_formats; @@ -20,6 +22,16 @@ pub fn derive_config(input: proc_macro::TokenStream) -> proc_macro::TokenStream .into() } +#[proc_macro_derive(CacheKey)] +pub fn cache_key(input: TokenStream) -> TokenStream { + let item = parse_macro_input!(input as DeriveInput); + + let result = derive_cache_key(&item); + let stream = result.unwrap_or_else(|err| err.to_compile_error()); + + TokenStream::from(stream) +} + #[proc_macro] pub fn register_rules(item: proc_macro::TokenStream) -> proc_macro::TokenStream { let mapping = parse_macro_input!(item as register_rules::Input); diff --git a/crates/ruff_macros/src/register_rules.rs b/crates/ruff_macros/src/register_rules.rs index 08a0372f28..97fcad548d 100644 --- a/crates/ruff_macros/src/register_rules.rs +++ b/crates/ruff_macros/src/register_rules.rs @@ -56,6 +56,7 @@ pub fn register_rules(input: &Input) -> proc_macro2::TokenStream { Hash, PartialOrd, Ord, + ::ruff_macros::CacheKey, AsRefStr, ::strum_macros::IntoStaticStr, )]