From 4e4643aa5d1587c024ce98f0c51363a4830b491d Mon Sep 17 00:00:00 2001
From: Martin Fischer <martin@push-f.com>
Date: Fri, 20 Jan 2023 18:44:38 +0100
Subject: [PATCH] refactor: Decouple Rule from linter prefixes

543865c96b29a9d3e8d086883cc126950e6a9968 introduced
RuleCode::origin() -> RuleOrigin generation via a macro, while that
signature now has been renamed to Rule::origin() -> Linter we actually
want to get rid of it since rules and linters shouldn't be this tightly
coupled (since one rule can exist in multiple linters).

Another disadvantage of the previous approach was that the prefixes
had to be defined in ruff_macros/src/prefixes.rs, which was easy to
miss when defining new linters in src/*, case in point
INP001 => violations::ImplicitNamespacePackage has in the meantime been
added without ruff_macros/src/prefixes.rs being updated accordingly
which resulted in `ruff --explain INP001` mistakenly reporting that the
rule belongs to isort (since INP001 starts with the isort prefix "I").
The derive proc macro introduced in this commit requires every variant
to have at least one #[prefix = "..."], eliminating such mistakes.
---
 ruff_cli/src/commands.rs               |  7 ++--
 ruff_macros/src/define_rule_mapping.rs | 20 ----------
 ruff_macros/src/lib.rs                 | 11 +++++-
 ruff_macros/src/parse_code.rs          | 54 ++++++++++++++++++++++++++
 ruff_macros/src/prefixes.rs            | 54 --------------------------
 scripts/add_plugin.py                  |  3 +-
 src/registry.rs                        | 54 +++++++++++++++++++++++++-
 7 files changed, 122 insertions(+), 81 deletions(-)
 create mode 100644 ruff_macros/src/parse_code.rs
 delete mode 100644 ruff_macros/src/prefixes.rs

diff --git a/ruff_cli/src/commands.rs b/ruff_cli/src/commands.rs
index 3f3f51ec42..08d28d944a 100644
--- a/ruff_cli/src/commands.rs
+++ b/ruff_cli/src/commands.rs
@@ -15,7 +15,7 @@ use ruff::cache::CACHE_DIR_NAME;
 use ruff::linter::add_noqa_to_path;
 use ruff::logging::LogLevel;
 use ruff::message::{Location, Message};
-use ruff::registry::Rule;
+use ruff::registry::{Linter, ParseCode, Rule};
 use ruff::resolver::{FileDiscovery, PyprojectDiscovery};
 use ruff::settings::flags;
 use ruff::settings::types::SerializationFormat;
@@ -291,10 +291,11 @@ struct Explanation<'a> {
 
 /// Explain a `Rule` to the user.
 pub fn explain(rule: &Rule, format: SerializationFormat) -> Result<()> {
+    let (linter, _) = Linter::parse_code(rule.code()).unwrap();
     match format {
         SerializationFormat::Text | SerializationFormat::Grouped => {
             println!("{}\n", rule.as_ref());
-            println!("Code: {} ({})\n", rule.code(), rule.origin().name());
+            println!("Code: {} ({})\n", rule.code(), linter.name());
 
             if let Some(autofix) = rule.autofixable() {
                 println!(
@@ -315,7 +316,7 @@ pub fn explain(rule: &Rule, format: SerializationFormat) -> Result<()> {
                 "{}",
                 serde_json::to_string_pretty(&Explanation {
                     code: rule.code(),
-                    linter: rule.origin().name(),
+                    linter: linter.name(),
                     summary: rule.message_formats()[0],
                 })?
             );
diff --git a/ruff_macros/src/define_rule_mapping.rs b/ruff_macros/src/define_rule_mapping.rs
index d142fcb3b6..c96fb5d671 100644
--- a/ruff_macros/src/define_rule_mapping.rs
+++ b/ruff_macros/src/define_rule_mapping.rs
@@ -10,7 +10,6 @@ pub fn define_rule_mapping(mapping: &Mapping) -> proc_macro2::TokenStream {
     let mut diagkind_variants = quote!();
     let mut rule_message_formats_match_arms = quote!();
     let mut rule_autofixable_match_arms = quote!();
-    let mut rule_origin_match_arms = quote!();
     let mut rule_code_match_arms = quote!();
     let mut rule_from_code_match_arms = quote!();
     let mut diagkind_code_match_arms = quote!();
@@ -29,8 +28,6 @@ pub fn define_rule_mapping(mapping: &Mapping) -> proc_macro2::TokenStream {
         rule_message_formats_match_arms
             .extend(quote! {Self::#name => <#path as Violation>::message_formats(),});
         rule_autofixable_match_arms.extend(quote! {Self::#name => <#path as Violation>::AUTOFIX,});
-        let origin = get_origin(code);
-        rule_origin_match_arms.extend(quote! {Self::#name => Linter::#origin,});
         rule_code_match_arms.extend(quote! {Self::#name => #code_str,});
         rule_from_code_match_arms.extend(quote! {#code_str => Ok(&Rule::#name), });
         diagkind_code_match_arms.extend(quote! {Self::#name(..) => &Rule::#name, });
@@ -95,10 +92,6 @@ pub fn define_rule_mapping(mapping: &Mapping) -> proc_macro2::TokenStream {
                 match self { #rule_autofixable_match_arms }
             }
 
-            pub fn origin(&self) -> Linter {
-                match self { #rule_origin_match_arms }
-            }
-
             pub fn code(&self) -> &'static str {
                 match self { #rule_code_match_arms }
             }
@@ -140,19 +133,6 @@ pub fn define_rule_mapping(mapping: &Mapping) -> proc_macro2::TokenStream {
     }
 }
 
-fn get_origin(ident: &Ident) -> Ident {
-    let ident = ident.to_string();
-    let mut iter = crate::prefixes::PREFIX_TO_LINTER.iter();
-    let linter = loop {
-        let (prefix, linter) = iter
-            .next()
-            .unwrap_or_else(|| panic!("code doesn't start with any recognized prefix: {ident}"));
-        if ident.starts_with(prefix) {
-            break linter;
-        }
-    };
-    Ident::new(linter, Span::call_site())
-}
 pub struct Mapping {
     entries: Vec<(Ident, Path, Ident)>,
 }
diff --git a/ruff_macros/src/lib.rs b/ruff_macros/src/lib.rs
index d4b19dc521..2aa086b316 100644
--- a/ruff_macros/src/lib.rs
+++ b/ruff_macros/src/lib.rs
@@ -19,7 +19,7 @@ use syn::{parse_macro_input, DeriveInput, ItemFn};
 mod config;
 mod define_rule_mapping;
 mod derive_message_formats;
-mod prefixes;
+mod parse_code;
 mod rule_code_prefix;
 
 #[proc_macro_derive(ConfigurationOptions, attributes(option, doc, option_group))]
@@ -37,6 +37,15 @@ pub fn define_rule_mapping(item: proc_macro::TokenStream) -> proc_macro::TokenSt
     define_rule_mapping::define_rule_mapping(&mapping).into()
 }
 
+#[proc_macro_derive(ParseCode, attributes(prefix))]
+pub fn derive_rule_code_prefix(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
+    let input = parse_macro_input!(input as DeriveInput);
+
+    parse_code::derive_impl(input)
+        .unwrap_or_else(syn::Error::into_compile_error)
+        .into()
+}
+
 #[proc_macro_attribute]
 pub fn derive_message_formats(_attr: TokenStream, item: TokenStream) -> TokenStream {
     let func = parse_macro_input!(item as ItemFn);
diff --git a/ruff_macros/src/parse_code.rs b/ruff_macros/src/parse_code.rs
new file mode 100644
index 0000000000..50c1f8058c
--- /dev/null
+++ b/ruff_macros/src/parse_code.rs
@@ -0,0 +1,54 @@
+use quote::quote;
+use syn::spanned::Spanned;
+use syn::{Data, DataEnum, DeriveInput, Error, Lit, Meta, MetaNameValue};
+
+pub fn derive_impl(input: DeriveInput) -> syn::Result<proc_macro2::TokenStream> {
+    let DeriveInput { ident, data: Data::Enum(DataEnum {
+        variants, ..
+    }), .. } = input else {
+        return Err(Error::new(input.ident.span(), "only named fields are supported"));
+    };
+
+    let mut parsed = Vec::new();
+
+    for variant in variants {
+        let prefix_attrs: Vec<_> = variant
+            .attrs
+            .iter()
+            .filter(|a| a.path.is_ident("prefix"))
+            .collect();
+
+        if prefix_attrs.is_empty() {
+            return Err(Error::new(
+                variant.span(),
+                r#"Missing [#prefix = "..."] attribute"#,
+            ));
+        }
+
+        for attr in prefix_attrs {
+            let Ok(Meta::NameValue(MetaNameValue{lit: Lit::Str(lit), ..})) = attr.parse_meta() else {
+                return Err(Error::new(attr.span(), r#"expected attribute to be in the form of [#prefix = "..."]"#))
+            };
+            parsed.push((lit, variant.ident.clone()));
+        }
+    }
+
+    parsed.sort_by_key(|(prefix, _)| prefix.value().len());
+
+    let mut if_statements = quote!();
+
+    for (prefix, field) in parsed {
+        if_statements.extend(quote! {if let Some(rest) = code.strip_prefix(#prefix) {
+            return Some((#ident::#field, rest));
+        }});
+    }
+
+    Ok(quote! {
+        impl crate::registry::ParseCode for #ident {
+            fn parse_code(code: &str) -> Option<(Self, &str)> {
+                #if_statements
+                None
+            }
+        }
+    })
+}
diff --git a/ruff_macros/src/prefixes.rs b/ruff_macros/src/prefixes.rs
deleted file mode 100644
index 7a096d2d9a..0000000000
--- a/ruff_macros/src/prefixes.rs
+++ /dev/null
@@ -1,54 +0,0 @@
-// Longer prefixes should come first so that you can find a linter for a code
-// by simply picking the first entry that starts with the given prefix.
-
-pub const PREFIX_TO_LINTER: &[(&str, &str)] = &[
-    ("ANN", "Flake8Annotations"),
-    ("ARG", "Flake8UnusedArguments"),
-    ("A", "Flake8Builtins"),
-    ("BLE", "Flake8BlindExcept"),
-    ("B", "Flake8Bugbear"),
-    ("C4", "Flake8Comprehensions"),
-    ("C9", "McCabe"),
-    ("COM", "Flake8Commas"),
-    ("DTZ", "Flake8Datetimez"),
-    ("D", "Pydocstyle"),
-    ("ERA", "Eradicate"),
-    ("EM", "Flake8ErrMsg"),
-    ("E", "Pycodestyle"),
-    ("FBT", "Flake8BooleanTrap"),
-    ("F", "Pyflakes"),
-    ("ICN", "Flake8ImportConventions"),
-    ("ISC", "Flake8ImplicitStrConcat"),
-    ("I", "Isort"),
-    ("N", "PEP8Naming"),
-    ("PD", "PandasVet"),
-    ("PGH", "PygrepHooks"),
-    ("PL", "Pylint"),
-    ("PT", "Flake8PytestStyle"),
-    ("Q", "Flake8Quotes"),
-    ("RET", "Flake8Return"),
-    ("SIM", "Flake8Simplify"),
-    ("S", "Flake8Bandit"),
-    ("T10", "Flake8Debugger"),
-    ("T20", "Flake8Print"),
-    ("TID", "Flake8TidyImports"),
-    ("UP", "Pyupgrade"),
-    ("W", "Pycodestyle"),
-    ("YTT", "Flake82020"),
-    ("PIE", "Flake8Pie"),
-    ("RUF", "Ruff"),
-];
-
-#[cfg(test)]
-mod tests {
-    use super::PREFIX_TO_LINTER;
-
-    #[test]
-    fn order() {
-        for (idx, (prefix, _)) in PREFIX_TO_LINTER.iter().enumerate() {
-            for (prior_prefix, _) in PREFIX_TO_LINTER[..idx].iter() {
-                assert!(!prefix.starts_with(prior_prefix));
-            }
-        }
-    }
-}
diff --git a/scripts/add_plugin.py b/scripts/add_plugin.py
index 7b9e851b0a..3a7cc80fbc 100755
--- a/scripts/add_plugin.py
+++ b/scripts/add_plugin.py
@@ -74,7 +74,8 @@ mod tests {
                 fp.write(f"{indent}// {plugin}")
                 fp.write("\n")
 
-            elif line.strip() == "Ruff,":
+            elif line.strip() == '#[prefix = "RUF"]':
+                fp.write(f'{indent}#[prefix = "TODO"]\n')
                 fp.write(f"{indent}{pascal_case(plugin)},")
                 fp.write("\n")
 
diff --git a/src/registry.rs b/src/registry.rs
index a68e38b96c..189c6854bd 100644
--- a/src/registry.rs
+++ b/src/registry.rs
@@ -2,6 +2,7 @@
 
 use itertools::Itertools;
 use once_cell::sync::Lazy;
+use ruff_macros::ParseCode;
 use rustc_hash::FxHashMap;
 use rustpython_parser::ast::Location;
 use serde::{Deserialize, Serialize};
@@ -432,46 +433,87 @@ ruff_macros::define_rule_mapping!(
     RUF100 => violations::UnusedNOQA,
 );
 
-#[derive(EnumIter, Debug, PartialEq, Eq)]
+#[derive(EnumIter, Debug, PartialEq, Eq, ParseCode)]
 pub enum Linter {
+    #[prefix = "F"]
     Pyflakes,
+    #[prefix = "E"]
+    #[prefix = "W"]
     Pycodestyle,
+    #[prefix = "C9"]
     McCabe,
+    #[prefix = "I"]
     Isort,
+    #[prefix = "D"]
     Pydocstyle,
+    #[prefix = "UP"]
     Pyupgrade,
+    #[prefix = "N"]
     PEP8Naming,
+    #[prefix = "YTT"]
     Flake82020,
+    #[prefix = "ANN"]
     Flake8Annotations,
+    #[prefix = "S"]
     Flake8Bandit,
+    #[prefix = "BLE"]
     Flake8BlindExcept,
+    #[prefix = "FBT"]
     Flake8BooleanTrap,
+    #[prefix = "B"]
     Flake8Bugbear,
+    #[prefix = "A"]
     Flake8Builtins,
+    #[prefix = "C4"]
     Flake8Comprehensions,
+    #[prefix = "T10"]
     Flake8Debugger,
+    #[prefix = "EM"]
     Flake8ErrMsg,
+    #[prefix = "ISC"]
     Flake8ImplicitStrConcat,
+    #[prefix = "ICN"]
     Flake8ImportConventions,
+    #[prefix = "T20"]
     Flake8Print,
+    #[prefix = "PT"]
     Flake8PytestStyle,
+    #[prefix = "Q"]
     Flake8Quotes,
+    #[prefix = "RET"]
     Flake8Return,
+    #[prefix = "SIM"]
     Flake8Simplify,
+    #[prefix = "TID"]
     Flake8TidyImports,
+    #[prefix = "ARG"]
     Flake8UnusedArguments,
+    #[prefix = "DTZ"]
     Flake8Datetimez,
+    #[prefix = "ERA"]
     Eradicate,
+    #[prefix = "PD"]
     PandasVet,
+    #[prefix = "PGH"]
     PygrepHooks,
+    #[prefix = "PL"]
     Pylint,
+    #[prefix = "PIE"]
     Flake8Pie,
+    #[prefix = "COM"]
     Flake8Commas,
+    #[prefix = "INP"]
     Flake8NoPep420,
+    #[prefix = "EXE"]
     Flake8Executable,
+    #[prefix = "RUF"]
     Ruff,
 }
 
+pub trait ParseCode: Sized {
+    fn parse_code(code: &str) -> Option<(Self, &str)>;
+}
+
 pub enum Prefixes {
     Single(RuleSelector),
     Multiple(Vec<(RuleSelector, &'static str)>),
@@ -688,7 +730,7 @@ pub static CODE_REDIRECTS: Lazy<FxHashMap<&'static str, Rule>> = Lazy::new(|| {
 mod tests {
     use strum::IntoEnumIterator;
 
-    use crate::registry::Rule;
+    use super::{Linter, ParseCode, Rule};
 
     #[test]
     fn check_code_serialization() {
@@ -699,4 +741,12 @@ mod tests {
             );
         }
     }
+
+    #[test]
+    fn test_linter_prefixes() {
+        for rule in Rule::iter() {
+            Linter::parse_code(rule.code())
+                .unwrap_or_else(|| panic!("couldn't parse {:?}", rule.code()));
+        }
+    }
 }