refactor: Decouple Rule from linter prefixes

543865c96b introduced RuleCode::origin() -> RuleOrigin generation via a macro, while that signature now has been renamed to Rule::origin() -> Linter we actually want to get rid of it since rules and linters shouldn't be this tightly coupled (since one rule can exist in multiple linters). Another disadvantage of the previous approach was that the prefixes had to be defined in ruff_macros/src/prefixes.rs, which was easy to miss when defining new linters in src/*, case in point INP001 => violations::ImplicitNamespacePackage has in the meantime been added without ruff_macros/src/prefixes.rs being updated accordingly which resulted in `ruff --explain INP001` mistakenly reporting that the rule belongs to isort (since INP001 starts with the isort prefix "I"). The derive proc macro introduced in this commit requires every variant to have at least one #[prefix = "..."], eliminating such mistakes.
2023-01-20 18:44:38 +01:00 · 2023-01-20 18:44:38 +01:00 · 4e4643aa5d
parent b19258a243
commit 4e4643aa5d
7 changed files with 122 additions and 81 deletions
--- a/ruff_cli/src/commands.rs
+++ b/ruff_cli/src/commands.rs
@ -15,7 +15,7 @@ use ruff::cache::CACHE_DIR_NAME;
 use ruff::linter::add_noqa_to_path;
 use ruff::logging::LogLevel;
 use ruff::message::{Location, Message};
-use ruff::registry::Rule;
+use ruff::registry::{Linter, ParseCode, Rule};
 use ruff::resolver::{FileDiscovery, PyprojectDiscovery};
 use ruff::settings::flags;
 use ruff::settings::types::SerializationFormat;
@ -291,10 +291,11 @@ struct Explanation<'a> {
 /// Explain a `Rule` to the user.
 pub fn explain(rule: &Rule, format: SerializationFormat) -> Result<()> {
    let (linter, _) = Linter::parse_code(rule.code()).unwrap();
    match format {
        SerializationFormat::Text | SerializationFormat::Grouped => {
            println!("{}\n", rule.as_ref());
-            println!("Code: {} ({})\n", rule.code(), rule.origin().name());
+            println!("Code: {} ({})\n", rule.code(), linter.name());
            if let Some(autofix) = rule.autofixable() {
                println!(
@ -315,7 +316,7 @@ pub fn explain(rule: &Rule, format: SerializationFormat) -> Result<()> {
                "{}",
                serde_json::to_string_pretty(&Explanation {
                    code: rule.code(),
-                    linter: rule.origin().name(),
+                    linter: linter.name(),
                    summary: rule.message_formats()[0],
                })?
            );
--- a/ruff_macros/src/define_rule_mapping.rs
+++ b/ruff_macros/src/define_rule_mapping.rs
@ -10,7 +10,6 @@ pub fn define_rule_mapping(mapping: &Mapping) -> proc_macro2::TokenStream {
    let mut diagkind_variants = quote!();
    let mut rule_message_formats_match_arms = quote!();
    let mut rule_autofixable_match_arms = quote!();
    let mut rule_origin_match_arms = quote!();
    let mut rule_code_match_arms = quote!();
    let mut rule_from_code_match_arms = quote!();
    let mut diagkind_code_match_arms = quote!();
@ -29,8 +28,6 @@ pub fn define_rule_mapping(mapping: &Mapping) -> proc_macro2::TokenStream {
        rule_message_formats_match_arms
            .extend(quote! {Self::#name => <#path as Violation>::message_formats(),});
        rule_autofixable_match_arms.extend(quote! {Self::#name => <#path as Violation>::AUTOFIX,});
        let origin = get_origin(code);
        rule_origin_match_arms.extend(quote! {Self::#name => Linter::#origin,});
        rule_code_match_arms.extend(quote! {Self::#name => #code_str,});
        rule_from_code_match_arms.extend(quote! {#code_str => Ok(&Rule::#name), });
        diagkind_code_match_arms.extend(quote! {Self::#name(..) => &Rule::#name, });
@ -95,10 +92,6 @@ pub fn define_rule_mapping(mapping: &Mapping) -> proc_macro2::TokenStream {
                match self { #rule_autofixable_match_arms }
            }
            pub fn origin(&self) -> Linter {
                match self { #rule_origin_match_arms }
            }
            pub fn code(&self) -> &'static str {
                match self { #rule_code_match_arms }
            }
@ -140,19 +133,6 @@ pub fn define_rule_mapping(mapping: &Mapping) -> proc_macro2::TokenStream {
    }
 }
 fn get_origin(ident: &Ident) -> Ident {
    let ident = ident.to_string();
    let mut iter = crate::prefixes::PREFIX_TO_LINTER.iter();
    let linter = loop {
        let (prefix, linter) = iter
            .next()
            .unwrap_or_else(|| panic!("code doesn't start with any recognized prefix: {ident}"));
        if ident.starts_with(prefix) {
            break linter;
        }
    };
    Ident::new(linter, Span::call_site())
 }
 pub struct Mapping {
    entries: Vec<(Ident, Path, Ident)>,
 }
--- a/ruff_macros/src/lib.rs
+++ b/ruff_macros/src/lib.rs
@ -19,7 +19,7 @@ use syn::{parse_macro_input, DeriveInput, ItemFn};
 mod config;
 mod define_rule_mapping;
 mod derive_message_formats;
-mod prefixes;
+mod parse_code;
 mod rule_code_prefix;
 #[proc_macro_derive(ConfigurationOptions, attributes(option, doc, option_group))]
@ -37,6 +37,15 @@ pub fn define_rule_mapping(item: proc_macro::TokenStream) -> proc_macro::TokenSt
    define_rule_mapping::define_rule_mapping(&mapping).into()
 }
 #[proc_macro_derive(ParseCode, attributes(prefix))]
 pub fn derive_rule_code_prefix(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
    let input = parse_macro_input!(input as DeriveInput);
    parse_code::derive_impl(input)
        .unwrap_or_else(syn::Error::into_compile_error)
        .into()
 }
 #[proc_macro_attribute]
 pub fn derive_message_formats(_attr: TokenStream, item: TokenStream) -> TokenStream {
    let func = parse_macro_input!(item as ItemFn);
--- a/ruff_macros/src/parse_code.rs
+++ b/ruff_macros/src/parse_code.rs
@ -0,0 +1,54 @@
 use quote::quote;
 use syn::spanned::Spanned;
 use syn::{Data, DataEnum, DeriveInput, Error, Lit, Meta, MetaNameValue};
 pub fn derive_impl(input: DeriveInput) -> syn::Result<proc_macro2::TokenStream> {
    let DeriveInput { ident, data: Data::Enum(DataEnum {
        variants, ..
    }), .. } = input else {
        return Err(Error::new(input.ident.span(), "only named fields are supported"));
    };
    let mut parsed = Vec::new();
    for variant in variants {
        let prefix_attrs: Vec<_> = variant
            .attrs
            .iter()
            .filter(|a| a.path.is_ident("prefix"))
            .collect();
        if prefix_attrs.is_empty() {
            return Err(Error::new(
                variant.span(),
                r#"Missing [#prefix = "..."] attribute"#,
            ));
        }
        for attr in prefix_attrs {
            let Ok(Meta::NameValue(MetaNameValue{lit: Lit::Str(lit), ..})) = attr.parse_meta() else {
                return Err(Error::new(attr.span(), r#"expected attribute to be in the form of [#prefix = "..."]"#))
            };
            parsed.push((lit, variant.ident.clone()));
        }
    }
    parsed.sort_by_key(|(prefix, _)| prefix.value().len());
    let mut if_statements = quote!();
    for (prefix, field) in parsed {
        if_statements.extend(quote! {if let Some(rest) = code.strip_prefix(#prefix) {
            return Some((#ident::#field, rest));
        }});
    }
    Ok(quote! {
        impl crate::registry::ParseCode for #ident {
            fn parse_code(code: &str) -> Option<(Self, &str)> {
                #if_statements
                None
            }
        }
    })
 }
--- a/ruff_macros/src/prefixes.rs
+++ b/ruff_macros/src/prefixes.rs
@ -1,54 +0,0 @@
 // Longer prefixes should come first so that you can find a linter for a code
 // by simply picking the first entry that starts with the given prefix.
 pub const PREFIX_TO_LINTER: &[(&str, &str)] = &[
    ("ANN", "Flake8Annotations"),
    ("ARG", "Flake8UnusedArguments"),
    ("A", "Flake8Builtins"),
    ("BLE", "Flake8BlindExcept"),
    ("B", "Flake8Bugbear"),
    ("C4", "Flake8Comprehensions"),
    ("C9", "McCabe"),
    ("COM", "Flake8Commas"),
    ("DTZ", "Flake8Datetimez"),
    ("D", "Pydocstyle"),
    ("ERA", "Eradicate"),
    ("EM", "Flake8ErrMsg"),
    ("E", "Pycodestyle"),
    ("FBT", "Flake8BooleanTrap"),
    ("F", "Pyflakes"),
    ("ICN", "Flake8ImportConventions"),
    ("ISC", "Flake8ImplicitStrConcat"),
    ("I", "Isort"),
    ("N", "PEP8Naming"),
    ("PD", "PandasVet"),
    ("PGH", "PygrepHooks"),
    ("PL", "Pylint"),
    ("PT", "Flake8PytestStyle"),
    ("Q", "Flake8Quotes"),
    ("RET", "Flake8Return"),
    ("SIM", "Flake8Simplify"),
    ("S", "Flake8Bandit"),
    ("T10", "Flake8Debugger"),
    ("T20", "Flake8Print"),
    ("TID", "Flake8TidyImports"),
    ("UP", "Pyupgrade"),
    ("W", "Pycodestyle"),
    ("YTT", "Flake82020"),
    ("PIE", "Flake8Pie"),
    ("RUF", "Ruff"),
 ];
 #[cfg(test)]
 mod tests {
    use super::PREFIX_TO_LINTER;
    #[test]
    fn order() {
        for (idx, (prefix, _)) in PREFIX_TO_LINTER.iter().enumerate() {
            for (prior_prefix, _) in PREFIX_TO_LINTER[..idx].iter() {
                assert!(!prefix.starts_with(prior_prefix));
            }
        }
    }
 }
--- a/scripts/add_plugin.py
+++ b/scripts/add_plugin.py
@ -74,7 +74,8 @@ mod tests {
                fp.write(f"{indent}// {plugin}")
                fp.write("\n")
-            elif line.strip() == "Ruff,":
+            elif line.strip() == '#[prefix = "RUF"]':
                fp.write(f'{indent}#[prefix = "TODO"]\n')
                fp.write(f"{indent}{pascal_case(plugin)},")
                fp.write("\n")
--- a/src/registry.rs
+++ b/src/registry.rs
@ -2,6 +2,7 @@
 use itertools::Itertools;
 use once_cell::sync::Lazy;
 use ruff_macros::ParseCode;
 use rustc_hash::FxHashMap;
 use rustpython_parser::ast::Location;
 use serde::{Deserialize, Serialize};
@ -432,46 +433,87 @@ ruff_macros::define_rule_mapping!(
    RUF100 => violations::UnusedNOQA,
 );
-#[derive(EnumIter, Debug, PartialEq, Eq)]
+#[derive(EnumIter, Debug, PartialEq, Eq, ParseCode)]
 pub enum Linter {
    #[prefix = "F"]
    Pyflakes,
    #[prefix = "E"]
    #[prefix = "W"]
    Pycodestyle,
    #[prefix = "C9"]
    McCabe,
    #[prefix = "I"]
    Isort,
    #[prefix = "D"]
    Pydocstyle,
    #[prefix = "UP"]
    Pyupgrade,
    #[prefix = "N"]
    PEP8Naming,
    #[prefix = "YTT"]
    Flake82020,
    #[prefix = "ANN"]
    Flake8Annotations,
    #[prefix = "S"]
    Flake8Bandit,
    #[prefix = "BLE"]
    Flake8BlindExcept,
    #[prefix = "FBT"]
    Flake8BooleanTrap,
    #[prefix = "B"]
    Flake8Bugbear,
    #[prefix = "A"]
    Flake8Builtins,
    #[prefix = "C4"]
    Flake8Comprehensions,
    #[prefix = "T10"]
    Flake8Debugger,
    #[prefix = "EM"]
    Flake8ErrMsg,
    #[prefix = "ISC"]
    Flake8ImplicitStrConcat,
    #[prefix = "ICN"]
    Flake8ImportConventions,
    #[prefix = "T20"]
    Flake8Print,
    #[prefix = "PT"]
    Flake8PytestStyle,
    #[prefix = "Q"]
    Flake8Quotes,
    #[prefix = "RET"]
    Flake8Return,
    #[prefix = "SIM"]
    Flake8Simplify,
    #[prefix = "TID"]
    Flake8TidyImports,
    #[prefix = "ARG"]
    Flake8UnusedArguments,
    #[prefix = "DTZ"]
    Flake8Datetimez,
    #[prefix = "ERA"]
    Eradicate,
    #[prefix = "PD"]
    PandasVet,
    #[prefix = "PGH"]
    PygrepHooks,
    #[prefix = "PL"]
    Pylint,
    #[prefix = "PIE"]
    Flake8Pie,
    #[prefix = "COM"]
    Flake8Commas,
    #[prefix = "INP"]
    Flake8NoPep420,
    #[prefix = "EXE"]
    Flake8Executable,
    #[prefix = "RUF"]
    Ruff,
 }
 pub trait ParseCode: Sized {
    fn parse_code(code: &str) -> Option<(Self, &str)>;
 }
 pub enum Prefixes {
    Single(RuleSelector),
    Multiple(Vec<(RuleSelector, &'static str)>),
@ -688,7 +730,7 @@ pub static CODE_REDIRECTS: Lazy<FxHashMap<&'static str, Rule>> = Lazy::new(|| {
 mod tests {
    use strum::IntoEnumIterator;
-    use crate::registry::Rule;
+    use super::{Linter, ParseCode, Rule};
    #[test]
    fn check_code_serialization() {
@ -699,4 +741,12 @@ mod tests {
            );
        }
    }
    #[test]
    fn test_linter_prefixes() {
        for rule in Rule::iter() {
            Linter::parse_code(rule.code())
                .unwrap_or_else(|| panic!("couldn't parse {:?}", rule.code()));
        }
    }
 }