From 7e5e03fb153fc17af8f4e6392a8b0a1f324e7106 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Mon, 31 Oct 2022 11:34:40 -0400 Subject: [PATCH] Add a Flake8-to-Ruff configuration conversion tool (#527) --- Cargo.lock | 7 + Cargo.toml | 1 + examples/flake8_to_ruff.rs | 35 +++ examples/generate_check_code_prefix.rs | 2 +- src/checks_gen.rs | 4 +- src/flake8_quotes/settings.rs | 4 +- src/flake8_to_ruff/mod.rs | 56 ++++ src/flake8_to_ruff/parser.rs | 360 +++++++++++++++++++++++++ src/lib.rs | 1 + src/pep8_naming/settings.rs | 4 +- src/settings/options.rs | 4 +- src/settings/pyproject.rs | 18 +- src/settings/types.rs | 12 +- 13 files changed, 494 insertions(+), 14 deletions(-) create mode 100644 examples/flake8_to_ruff.rs create mode 100644 src/flake8_to_ruff/mod.rs create mode 100644 src/flake8_to_ruff/parser.rs diff --git a/Cargo.lock b/Cargo.lock index d1dcb22af7..4f7adc7487 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -571,6 +571,12 @@ dependencies = [ "cache-padded", ] +[[package]] +name = "configparser" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5458d9d1a587efaf5091602c59d299696a3877a439c8f6d461a2d3cce11df87a" + [[package]] name = "console" version = "0.15.2" @@ -2202,6 +2208,7 @@ dependencies = [ "codegen", "colored", "common-path", + "configparser", "criterion", "dirs 4.0.0", "fern", diff --git a/Cargo.toml b/Cargo.toml index ae009716a5..bf290d20c2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ chrono = { version = "0.4.21" } clap = { version = "4.0.1", features = ["derive"] } colored = { version = "2.0.0" } common-path = { version = "1.0.0" } +configparser = { version = "3.0.2" } dirs = { version = "4.0.0" } fern = { version = "0.6.1" } filetime = { version = "0.2.17" } diff --git a/examples/flake8_to_ruff.rs b/examples/flake8_to_ruff.rs new file mode 100644 index 0000000000..74d4b328a7 --- /dev/null +++ b/examples/flake8_to_ruff.rs @@ -0,0 +1,35 @@ +//! Utility to generate Ruff's pyproject.toml section from a Flake8 INI file. + +use std::path::PathBuf; + +use anyhow::Result; +use clap::Parser; +use configparser::ini::Ini; + +use ruff::flake8_to_ruff; + +#[derive(Parser)] +#[command( + about = "Convert an existing Flake8 configuration to Ruff.", + long_about = None +)] +struct Cli { + /// Path to the Flake8 configuration file (e.g., 'setup.cfg', 'tox.ini', or '.flake8'). + #[arg(required = true)] + file: PathBuf, +} + +fn main() -> Result<()> { + let cli = Cli::parse(); + + // Read the INI file. + let mut ini = Ini::new_cs(); + ini.set_multiline(true); + let config = ini.load(cli.file).map_err(|msg| anyhow::anyhow!(msg))?; + + // Create the pyproject.toml. + let pyproject = flake8_to_ruff::convert(config)?; + println!("{}", toml::to_string(&pyproject)?); + + Ok(()) +} diff --git a/examples/generate_check_code_prefix.rs b/examples/generate_check_code_prefix.rs index 547d493d06..3a005f7755 100644 --- a/examples/generate_check_code_prefix.rs +++ b/examples/generate_check_code_prefix.rs @@ -96,7 +96,7 @@ fn main() { println!("//! File automatically generated by examples/generate_check_code_prefix.rs."); println!(); - println!("use serde::{{Deserialize, Serialize}};"); + println!("use serde::{{Serialize, Deserialize}};"); println!("use strum_macros::EnumString;"); println!(); println!("use crate::checks::CheckCode;"); diff --git a/src/checks_gen.rs b/src/checks_gen.rs index 12ae884289..f4125b20b7 100644 --- a/src/checks_gen.rs +++ b/src/checks_gen.rs @@ -1,11 +1,11 @@ //! File automatically generated by examples/generate_check_code_prefix.rs. use serde::{Deserialize, Serialize}; -use strum_macros::EnumString; +use strum_macros::{AsRefStr, EnumString}; use crate::checks::CheckCode; -#[derive(EnumString, Debug, PartialEq, Eq, Clone, Serialize, Deserialize)] +#[derive(AsRefStr, EnumString, Debug, PartialEq, Eq, Clone, Serialize, Deserialize)] pub enum CheckCodePrefix { A, A0, diff --git a/src/flake8_quotes/settings.rs b/src/flake8_quotes/settings.rs index 29ad1db65f..dab7e5076c 100644 --- a/src/flake8_quotes/settings.rs +++ b/src/flake8_quotes/settings.rs @@ -2,14 +2,14 @@ use serde::{Deserialize, Serialize}; -#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[serde(deny_unknown_fields, rename_all = "kebab-case")] pub enum Quote { Single, Double, } -#[derive(Debug, PartialEq, Eq, Deserialize)] +#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] #[serde(deny_unknown_fields, rename_all = "kebab-case")] pub struct Options { pub inline_quotes: Option, diff --git a/src/flake8_to_ruff/mod.rs b/src/flake8_to_ruff/mod.rs new file mode 100644 index 0000000000..05575d023f --- /dev/null +++ b/src/flake8_to_ruff/mod.rs @@ -0,0 +1,56 @@ +//! Utility to generate Ruff's pyproject.toml section from a Flake8 INI file. + +use std::collections::HashMap; + +use anyhow::Result; + +use crate::settings::options::Options; +use crate::settings::pyproject::Pyproject; + +mod parser; + +pub fn convert(config: HashMap>>) -> Result { + // Extract the Flake8 section. + let flake8 = config + .get("flake8") + .expect("Unable to find flake8 section in INI file."); + + // Parse each supported option. + let mut options: Options = Default::default(); + for (key, value) in flake8 { + match key.as_str() { + "line-length" | "line_length" => match value.clone().unwrap().parse::() { + Ok(line_length) => options.line_length = Some(line_length), + Err(e) => eprintln!("Unable to parse '{key}' property: {e}"), + }, + "select" => { + options.select = Some(parser::parse_prefix_codes(value.as_ref().unwrap())); + } + "extend-select" | "extend_select" => { + options.extend_select = parser::parse_prefix_codes(value.as_ref().unwrap()); + } + "ignore" => { + options.ignore = parser::parse_prefix_codes(value.as_ref().unwrap()); + } + "extend-ignore" | "extend_ignore" => { + options.extend_ignore = parser::parse_prefix_codes(value.as_ref().unwrap()); + } + "exclude" => { + options.exclude = Some(parser::parse_strings(value.as_ref().unwrap())); + } + "extend-exclude" | "extend_exclude" => { + options.extend_exclude = parser::parse_strings(value.as_ref().unwrap()); + } + "per-file-ignores" | "per_file_ignores" => { + match parser::parse_files_to_codes_mapping(value.as_ref().unwrap()) { + Ok(per_file_ignores) => options.per_file_ignores = per_file_ignores, + Err(e) => eprintln!("Unable to parse '{key}' property: {e}"), + } + } + _ => eprintln!("Skipping unsupported property: {key}"), + } + } + + // Create the pyproject.toml. + Ok(Pyproject::new(options)) +} diff --git a/src/flake8_to_ruff/parser.rs b/src/flake8_to_ruff/parser.rs new file mode 100644 index 0000000000..e6cf91d79d --- /dev/null +++ b/src/flake8_to_ruff/parser.rs @@ -0,0 +1,360 @@ +use std::str::FromStr; + +use anyhow::Result; +use once_cell::sync::Lazy; +use regex::Regex; + +use crate::checks_gen::CheckCodePrefix; +use crate::settings::types::StrCheckCodePair; + +static COMMA_SEPARATED_LIST_RE: Lazy = Lazy::new(|| Regex::new(r"[,\s]").unwrap()); + +/// Parse a comma-separated list of `CheckCodePrefix` values (e.g., "F401,E501"). +pub fn parse_prefix_codes(value: &str) -> Vec { + let mut codes: Vec = vec![]; + for code in COMMA_SEPARATED_LIST_RE.split(value) { + let code = code.trim(); + if code.is_empty() { + continue; + } + if let Ok(code) = CheckCodePrefix::from_str(code) { + codes.push(code); + } else { + eprintln!("Unsupported prefix code: {code}"); + } + } + codes +} + +/// Parse a comma-separated list of strings (e.g., "__init__.py,__main__.py"). +pub fn parse_strings(value: &str) -> Vec { + COMMA_SEPARATED_LIST_RE + .split(value) + .map(|part| part.trim()) + .filter(|part| !part.is_empty()) + .map(String::from) + .collect() +} + +#[derive(Debug)] +struct Token { + token_name: TokenType, + src: String, +} + +#[derive(Debug)] +enum TokenType { + Code, + File, + Colon, + Comma, + Ws, + Eof, +} + +struct State { + seen_sep: bool, + seen_colon: bool, + filenames: Vec, + codes: Vec, +} + +impl State { + fn new() -> Self { + Self { + seen_sep: true, + seen_colon: false, + filenames: vec![], + codes: vec![], + } + } + + /// Generate the list of `StrCheckCodePair` pairs for the current state. + fn parse(&self) -> Vec { + let mut codes: Vec = vec![]; + for code in &self.codes { + match CheckCodePrefix::from_str(code) { + Ok(code) => { + for filename in &self.filenames { + codes.push(StrCheckCodePair { + pattern: filename.clone(), + code: code.clone(), + }); + } + } + Err(_) => eprintln!("Skipping unrecognized prefix: {}", code), + } + } + codes + } +} + +/// Tokenize the raw 'files-to-codes' mapping. +fn tokenize_files_to_codes_mapping(value: &str) -> Vec { + let mut tokens = vec![]; + let mut i = 0; + while i < value.len() { + for (token_re, token_name) in [ + ( + Regex::new(r"([A-Z]+[0-9]*)(?:$|\s|,)").unwrap(), + TokenType::Code, + ), + (Regex::new(r"([^\s:,]+)").unwrap(), TokenType::File), + (Regex::new(r"(\s*:\s*)").unwrap(), TokenType::Colon), + (Regex::new(r"(\s*,\s*)").unwrap(), TokenType::Comma), + (Regex::new(r"(\s+)").unwrap(), TokenType::Ws), + ] { + if let Some(cap) = token_re.captures(&value[i..]) { + let mat = cap.get(1).unwrap(); + if mat.start() == 0 { + tokens.push(Token { + token_name, + src: mat.as_str().to_string().trim().to_string(), + }); + i += mat.end(); + break; + } + } + } + } + tokens.push(Token { + token_name: TokenType::Eof, + src: "".to_string(), + }); + tokens +} + +/// Parse a 'files-to-codes' mapping, mimicking Flake8's internal logic. +/// +/// See: https://github.com/PyCQA/flake8/blob/7dfe99616fc2f07c0017df2ba5fa884158f3ea8a/src/flake8/utils.py#L45 +pub fn parse_files_to_codes_mapping(value: &str) -> Result> { + if value.trim().is_empty() { + return Ok(vec![]); + } + let mut codes: Vec = vec![]; + let mut state = State::new(); + for token in tokenize_files_to_codes_mapping(value) { + if matches!(token.token_name, TokenType::Comma | TokenType::Ws) { + state.seen_sep = true; + } else if !state.seen_colon { + if matches!(token.token_name, TokenType::Colon) { + state.seen_colon = true; + state.seen_sep = true; + } else if state.seen_sep && matches!(token.token_name, TokenType::File) { + state.filenames.push(token.src); + state.seen_sep = false; + } else { + return Err(anyhow::anyhow!("Unexpected token: {:?}", token.token_name)); + } + } else { + if matches!(token.token_name, TokenType::Eof) { + codes.extend(state.parse()); + state = State::new(); + } else if state.seen_sep && matches!(token.token_name, TokenType::Code) { + state.codes.push(token.src); + state.seen_sep = false; + } else if state.seen_sep && matches!(token.token_name, TokenType::File) { + codes.extend(state.parse()); + state = State::new(); + state.filenames.push(token.src); + state.seen_sep = false; + } else { + return Err(anyhow::anyhow!("Unexpected token: {:?}", token.token_name)); + } + } + } + Ok(codes) +} + +#[cfg(test)] +mod tests { + use anyhow::Result; + + use crate::checks_gen::CheckCodePrefix; + use crate::flake8_to_ruff::parser::{ + parse_files_to_codes_mapping, parse_prefix_codes, parse_strings, + }; + use crate::settings::types::StrCheckCodePair; + + #[test] + fn it_parses_prefix_codes() { + let actual = parse_prefix_codes(""); + let expected: Vec = vec![]; + assert_eq!(actual, expected); + + let actual = parse_prefix_codes(" "); + let expected: Vec = vec![]; + assert_eq!(actual, expected); + + let actual = parse_prefix_codes("F401"); + let expected = vec![CheckCodePrefix::F401]; + assert_eq!(actual, expected); + + let actual = parse_prefix_codes("F401,"); + let expected = vec![CheckCodePrefix::F401]; + assert_eq!(actual, expected); + + let actual = parse_prefix_codes("F401,E501"); + let expected = vec![CheckCodePrefix::F401, CheckCodePrefix::E501]; + assert_eq!(actual, expected); + + let actual = parse_prefix_codes("F401, E501"); + let expected = vec![CheckCodePrefix::F401, CheckCodePrefix::E501]; + assert_eq!(actual, expected); + } + + #[test] + fn it_parses_strings() { + let actual = parse_strings(""); + let expected: Vec = vec![]; + assert_eq!(actual, expected); + + let actual = parse_strings(" "); + let expected: Vec = vec![]; + assert_eq!(actual, expected); + + let actual = parse_strings("__init__.py"); + let expected = vec!["__init__.py".to_string()]; + assert_eq!(actual, expected); + + let actual = parse_strings("__init__.py,"); + let expected = vec!["__init__.py".to_string()]; + assert_eq!(actual, expected); + + let actual = parse_strings("__init__.py,__main__.py"); + let expected = vec!["__init__.py".to_string(), "__main__.py".to_string()]; + assert_eq!(actual, expected); + + let actual = parse_strings("__init__.py, __main__.py"); + let expected = vec!["__init__.py".to_string(), "__main__.py".to_string()]; + assert_eq!(actual, expected); + } + + #[test] + fn it_parse_files_to_codes_mapping() -> Result<()> { + let actual = parse_files_to_codes_mapping("")?; + let expected: Vec = vec![]; + assert_eq!(actual, expected); + + let actual = parse_files_to_codes_mapping(" ")?; + let expected: Vec = vec![]; + assert_eq!(actual, expected); + + // Ex) locust + let actual = parse_files_to_codes_mapping( + "per-file-ignores = + locust/test/*: F841 + examples/*: F841 + *.pyi: E302,E704" + .strip_prefix("per-file-ignores =") + .unwrap(), + )?; + let expected: Vec = vec![ + StrCheckCodePair { + pattern: "locust/test/*".to_string(), + code: CheckCodePrefix::F841, + }, + StrCheckCodePair { + pattern: "examples/*".to_string(), + code: CheckCodePrefix::F841, + }, + ]; + assert_eq!(actual, expected); + + // Ex) celery + let actual = parse_files_to_codes_mapping( + "per-file-ignores = + t/*,setup.py,examples/*,docs/*,extra/*: + D," + .strip_prefix("per-file-ignores =") + .unwrap(), + )?; + let expected: Vec = vec![ + StrCheckCodePair { + pattern: "t/*".to_string(), + code: CheckCodePrefix::D, + }, + StrCheckCodePair { + pattern: "setup.py".to_string(), + code: CheckCodePrefix::D, + }, + StrCheckCodePair { + pattern: "examples/*".to_string(), + code: CheckCodePrefix::D, + }, + StrCheckCodePair { + pattern: "docs/*".to_string(), + code: CheckCodePrefix::D, + }, + StrCheckCodePair { + pattern: "extra/*".to_string(), + code: CheckCodePrefix::D, + }, + ]; + assert_eq!(actual, expected); + + // Ex) scrapy + let actual = parse_files_to_codes_mapping( + "per-file-ignores = + scrapy/__init__.py:E402 + scrapy/core/downloader/handlers/http.py:F401 + scrapy/http/__init__.py:F401 + scrapy/linkextractors/__init__.py:E402,F401 + scrapy/selector/__init__.py:F401 + scrapy/spiders/__init__.py:E402,F401 + scrapy/utils/url.py:F403,F405 + tests/test_loader.py:E741" + .strip_prefix("per-file-ignores =") + .unwrap(), + )?; + let expected: Vec = vec![ + StrCheckCodePair { + pattern: "scrapy/__init__.py".to_string(), + code: CheckCodePrefix::E402, + }, + StrCheckCodePair { + pattern: "scrapy/core/downloader/handlers/http.py".to_string(), + code: CheckCodePrefix::F401, + }, + StrCheckCodePair { + pattern: "scrapy/http/__init__.py".to_string(), + code: CheckCodePrefix::F401, + }, + StrCheckCodePair { + pattern: "scrapy/linkextractors/__init__.py".to_string(), + code: CheckCodePrefix::E402, + }, + StrCheckCodePair { + pattern: "scrapy/linkextractors/__init__.py".to_string(), + code: CheckCodePrefix::F401, + }, + StrCheckCodePair { + pattern: "scrapy/selector/__init__.py".to_string(), + code: CheckCodePrefix::F401, + }, + StrCheckCodePair { + pattern: "scrapy/spiders/__init__.py".to_string(), + code: CheckCodePrefix::E402, + }, + StrCheckCodePair { + pattern: "scrapy/spiders/__init__.py".to_string(), + code: CheckCodePrefix::F401, + }, + StrCheckCodePair { + pattern: "scrapy/utils/url.py".to_string(), + code: CheckCodePrefix::F403, + }, + StrCheckCodePair { + pattern: "scrapy/utils/url.py".to_string(), + code: CheckCodePrefix::F405, + }, + StrCheckCodePair { + pattern: "tests/test_loader.py".to_string(), + code: CheckCodePrefix::E741, + }, + ]; + assert_eq!(actual, expected); + + Ok(()) + } +} diff --git a/src/lib.rs b/src/lib.rs index 74c01b72e1..5519e7552f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -31,6 +31,7 @@ mod flake8_builtins; mod flake8_comprehensions; mod flake8_print; mod flake8_quotes; +pub mod flake8_to_ruff; pub mod fs; pub mod linter; pub mod logging; diff --git a/src/pep8_naming/settings.rs b/src/pep8_naming/settings.rs index 2d917e21af..158cae4e19 100644 --- a/src/pep8_naming/settings.rs +++ b/src/pep8_naming/settings.rs @@ -1,6 +1,6 @@ //! Settings for the `pep8-naming` plugin. -use serde::Deserialize; +use serde::{Deserialize, Serialize}; const IGNORE_NAMES: [&str; 12] = [ "setUp", @@ -21,7 +21,7 @@ const CLASSMETHOD_DECORATORS: [&str; 1] = ["classmethod"]; const STATICMETHOD_DECORATORS: [&str; 1] = ["staticmethod"]; -#[derive(Debug, PartialEq, Eq, Deserialize)] +#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] #[serde(deny_unknown_fields, rename_all = "kebab-case")] pub struct Options { pub ignore_names: Option>, diff --git a/src/settings/options.rs b/src/settings/options.rs index 0c14bdddcc..bb00bfdc80 100644 --- a/src/settings/options.rs +++ b/src/settings/options.rs @@ -1,12 +1,12 @@ //! Options that the user can provide via pyproject.toml. -use serde::Deserialize; +use serde::{Deserialize, Serialize}; use crate::checks_gen::CheckCodePrefix; use crate::settings::types::{PythonVersion, StrCheckCodePair}; use crate::{flake8_quotes, pep8_naming}; -#[derive(Debug, PartialEq, Eq, Deserialize, Default)] +#[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Default)] #[serde(deny_unknown_fields, rename_all = "kebab-case")] pub struct Options { pub line_length: Option, diff --git a/src/settings/pyproject.rs b/src/settings/pyproject.rs index 9ad68bd3c1..417e8f5bcb 100644 --- a/src/settings/pyproject.rs +++ b/src/settings/pyproject.rs @@ -6,21 +6,31 @@ use anyhow::Result; use common_path::common_path_all; use log::debug; use path_absolutize::Absolutize; -use serde::Deserialize; +use serde::{Deserialize, Serialize}; use crate::fs; use crate::settings::options::Options; -#[derive(Debug, PartialEq, Eq, Deserialize)] +#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] struct Tools { ruff: Option, } -#[derive(Debug, PartialEq, Eq, Deserialize)] -struct Pyproject { +#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] +pub struct Pyproject { tool: Option, } +impl Pyproject { + pub fn new(options: Options) -> Self { + Self { + tool: Some(Tools { + ruff: Some(options), + }), + } + } +} + fn parse_pyproject_toml(path: &Path) -> Result { let contents = fs::read_file(path)?; toml::from_str(&contents).map_err(|e| e.into()) diff --git a/src/settings/types.rs b/src/settings/types.rs index 8144d678b6..7b0847992c 100644 --- a/src/settings/types.rs +++ b/src/settings/types.rs @@ -5,7 +5,7 @@ use std::str::FromStr; use anyhow::{anyhow, Result}; use glob::Pattern; -use serde::{de, Deserialize, Deserializer, Serialize}; +use serde::{de, Deserialize, Deserializer, Serialize, Serializer}; use crate::checks::CheckCode; use crate::checks_gen::CheckCodePrefix; @@ -107,6 +107,16 @@ impl<'de> Deserialize<'de> for StrCheckCodePair { } } +impl Serialize for StrCheckCodePair { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let as_str = format!("{}:{}", self.pattern, self.code.as_ref()); + serializer.serialize_str(&as_str) + } +} + impl FromStr for StrCheckCodePair { type Err = anyhow::Error;