diff --git a/Cargo.lock b/Cargo.lock index 29f8c4a60c..2e3c77221f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1871,9 +1871,9 @@ dependencies = [ "ropey", "ruff_macros", "rustc-hash", - "rustpython-ast", - "rustpython-common", - "rustpython-parser", + "rustpython-ast 0.2.0 (git+https://github.com/RustPython/RustPython.git?rev=62aa942bf506ea3d41ed0503b947b84141fdaa3c)", + "rustpython-common 0.2.0 (git+https://github.com/RustPython/RustPython.git?rev=62aa942bf506ea3d41ed0503b947b84141fdaa3c)", + "rustpython-parser 0.2.0 (git+https://github.com/RustPython/RustPython.git?rev=62aa942bf506ea3d41ed0503b947b84141fdaa3c)", "schemars", "semver", "serde", @@ -1939,9 +1939,9 @@ dependencies = [ "once_cell", "ruff", "ruff_cli", - "rustpython-ast", - "rustpython-common", - "rustpython-parser", + "rustpython-ast 0.2.0 (git+https://github.com/RustPython/RustPython.git?rev=ff90fe52eea578c8ebdd9d95e078cc041a5959fa)", + "rustpython-common 0.2.0 (git+https://github.com/RustPython/RustPython.git?rev=ff90fe52eea578c8ebdd9d95e078cc041a5959fa)", + "rustpython-parser 0.2.0 (git+https://github.com/RustPython/RustPython.git?rev=ff90fe52eea578c8ebdd9d95e078cc041a5959fa)", "schemars", "serde_json", "strum", @@ -2002,14 +2002,49 @@ dependencies = [ "webpki", ] +[[package]] +name = "rustpython-ast" +version = "0.2.0" +source = "git+https://github.com/RustPython/RustPython.git?rev=62aa942bf506ea3d41ed0503b947b84141fdaa3c#62aa942bf506ea3d41ed0503b947b84141fdaa3c" +dependencies = [ + "num-bigint", + "rustpython-common 0.2.0 (git+https://github.com/RustPython/RustPython.git?rev=62aa942bf506ea3d41ed0503b947b84141fdaa3c)", + "rustpython-compiler-core 0.2.0 (git+https://github.com/RustPython/RustPython.git?rev=62aa942bf506ea3d41ed0503b947b84141fdaa3c)", +] + [[package]] name = "rustpython-ast" version = "0.2.0" source = "git+https://github.com/RustPython/RustPython.git?rev=ff90fe52eea578c8ebdd9d95e078cc041a5959fa#ff90fe52eea578c8ebdd9d95e078cc041a5959fa" dependencies = [ "num-bigint", - "rustpython-common", - "rustpython-compiler-core", + "rustpython-common 0.2.0 (git+https://github.com/RustPython/RustPython.git?rev=ff90fe52eea578c8ebdd9d95e078cc041a5959fa)", + "rustpython-compiler-core 0.2.0 (git+https://github.com/RustPython/RustPython.git?rev=ff90fe52eea578c8ebdd9d95e078cc041a5959fa)", +] + +[[package]] +name = "rustpython-common" +version = "0.2.0" +source = "git+https://github.com/RustPython/RustPython.git?rev=62aa942bf506ea3d41ed0503b947b84141fdaa3c#62aa942bf506ea3d41ed0503b947b84141fdaa3c" +dependencies = [ + "ascii", + "bitflags", + "cfg-if", + "hexf-parse", + "itertools", + "lexical-parse-float", + "libc", + "lock_api", + "num-bigint", + "num-complex", + "num-traits", + "once_cell", + "radium", + "rand", + "siphasher", + "unic-ucd-category", + "volatile", + "widestring", ] [[package]] @@ -2037,6 +2072,23 @@ dependencies = [ "widestring", ] +[[package]] +name = "rustpython-compiler-core" +version = "0.2.0" +source = "git+https://github.com/RustPython/RustPython.git?rev=62aa942bf506ea3d41ed0503b947b84141fdaa3c#62aa942bf506ea3d41ed0503b947b84141fdaa3c" +dependencies = [ + "bincode", + "bitflags", + "bstr 0.2.17", + "itertools", + "lz4_flex", + "num-bigint", + "num-complex", + "num_enum", + "serde", + "thiserror", +] + [[package]] name = "rustpython-compiler-core" version = "0.2.0" @@ -2054,6 +2106,31 @@ dependencies = [ "thiserror", ] +[[package]] +name = "rustpython-parser" +version = "0.2.0" +source = "git+https://github.com/RustPython/RustPython.git?rev=62aa942bf506ea3d41ed0503b947b84141fdaa3c#62aa942bf506ea3d41ed0503b947b84141fdaa3c" +dependencies = [ + "ahash", + "anyhow", + "itertools", + "lalrpop", + "lalrpop-util", + "log", + "num-bigint", + "num-traits", + "phf 0.10.1", + "phf_codegen 0.10.0", + "rustc-hash", + "rustpython-ast 0.2.0 (git+https://github.com/RustPython/RustPython.git?rev=62aa942bf506ea3d41ed0503b947b84141fdaa3c)", + "rustpython-compiler-core 0.2.0 (git+https://github.com/RustPython/RustPython.git?rev=62aa942bf506ea3d41ed0503b947b84141fdaa3c)", + "thiserror", + "tiny-keccak", + "unic-emoji-char", + "unic-ucd-ident", + "unicode_names2", +] + [[package]] name = "rustpython-parser" version = "0.2.0" @@ -2070,8 +2147,8 @@ dependencies = [ "phf 0.10.1", "phf_codegen 0.10.0", "rustc-hash", - "rustpython-ast", - "rustpython-compiler-core", + "rustpython-ast 0.2.0 (git+https://github.com/RustPython/RustPython.git?rev=ff90fe52eea578c8ebdd9d95e078cc041a5959fa)", + "rustpython-compiler-core 0.2.0 (git+https://github.com/RustPython/RustPython.git?rev=ff90fe52eea578c8ebdd9d95e078cc041a5959fa)", "thiserror", "tiny-keccak", "unic-emoji-char", diff --git a/Cargo.toml b/Cargo.toml index 9559bad0f2..8eef34c8a4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,9 +49,9 @@ regex = { version = "1.6.0" } ropey = { version = "1.5.0", features = ["cr_lines", "simd"], default-features = false } ruff_macros = { version = "0.0.228", path = "ruff_macros" } rustc-hash = { version = "1.1.0" } -rustpython-ast = { features = ["unparse"], git = "https://github.com/RustPython/RustPython.git", rev = "ff90fe52eea578c8ebdd9d95e078cc041a5959fa" } -rustpython-common = { git = "https://github.com/RustPython/RustPython.git", rev = "ff90fe52eea578c8ebdd9d95e078cc041a5959fa" } -rustpython-parser = { features = ["lalrpop"], git = "https://github.com/RustPython/RustPython.git", rev = "ff90fe52eea578c8ebdd9d95e078cc041a5959fa" } +rustpython-ast = { features = ["unparse"], git = "https://github.com/RustPython/RustPython.git", rev = "62aa942bf506ea3d41ed0503b947b84141fdaa3c" } +rustpython-common = { git = "https://github.com/RustPython/RustPython.git", rev = "62aa942bf506ea3d41ed0503b947b84141fdaa3c" } +rustpython-parser = { features = ["lalrpop"], git = "https://github.com/RustPython/RustPython.git", rev = "62aa942bf506ea3d41ed0503b947b84141fdaa3c" } schemars = { version = "0.8.11" } semver = { version = "1.0.16" } serde = { version = "1.0.147", features = ["derive"] } diff --git a/README.md b/README.md index 4030a8b02e..75ed559530 100644 --- a/README.md +++ b/README.md @@ -727,6 +727,7 @@ For more, see [pyupgrade](https://pypi.org/project/pyupgrade/3.2.0/) on PyPI. | UP028 | rewrite-yield-from | Replace `yield` over `for` loop with `yield from` | 🛠 | | UP029 | unnecessary-builtin-import | Unnecessary builtin import: `{import}` | 🛠 | | UP030 | format-literals | Use implicit references for positional format fields | 🛠 | +| UP031 | printf-string-formatting | Use format specifiers instead of percent format | 🛠 | | UP032 | f-string | Use f-string instead of `format` call | 🛠 | | UP033 | functools-cache | Use `@functools.cache` instead of `@functools.lru_cache(maxsize=None)` | 🛠 | | UP034 | extraneous-parentheses | Avoid extraneous parentheses | 🛠 | diff --git a/resources/test/fixtures/pyupgrade/UP031_0.py b/resources/test/fixtures/pyupgrade/UP031_0.py new file mode 100644 index 0000000000..7ca4dfc39b --- /dev/null +++ b/resources/test/fixtures/pyupgrade/UP031_0.py @@ -0,0 +1,69 @@ +a, b, x, y = 1, 2, 3, 4 + +# UP031 +print('%s %s' % (a, b)) + +print('%s%s' % (a, b)) + +print("trivial" % ()) + +print("%s" % ("simple",)) + +print("%s" % ("%s" % ("nested",),)) + +print("%s%% percent" % (15,)) + +print("%f" % (15,)) + +print("%.f" % (15,)) + +print("%.3f" % (15,)) + +print("%3f" % (15,)) + +print("%-5f" % (5,)) + +print("%9f" % (5,)) + +print("%#o" % (123,)) + +print("brace {} %s" % (1,)) + +print( + "%s" % ( + "trailing comma", + ) +) + +print("foo %s " % (x,)) + +print("%(k)s" % {"k": "v"}) + +print("%(k)s" % { + "k": "v", + "i": "j" +}) + +print("%(to_list)s" % {"to_list": []}) + +print("%(k)s" % {"k": "v", "i": 1, "j": []}) + +print("%(ab)s" % {"a" "b": 1}) + +print("%(a)s" % {"a" : 1}) + +print(( + "foo %s " + "bar %s" % (x, y) +)) + +print( + "foo %(foo)s " + "bar %(bar)s" % {"foo": x, "bar": y} +) + +print("%s \N{snowman}" % (a,)) + +print("%(foo)s \N{snowman}" % {"foo": 1}) + +print(("foo %s " "bar %s") % (x, y)) diff --git a/resources/test/fixtures/pyupgrade/UP031_1.py b/resources/test/fixtures/pyupgrade/UP031_1.py new file mode 100644 index 0000000000..ba27262925 --- /dev/null +++ b/resources/test/fixtures/pyupgrade/UP031_1.py @@ -0,0 +1,59 @@ +# OK +"%s" % unknown_type + +b"%s" % (b"bytestring",) + +"%*s" % (5, "hi") + +"%d" % (flt,) + +"%c" % (some_string,) + +"%4%" % () + +"%.2r" % (1.25) + +i % 3 + +"%.*s" % (5, "hi") + +"%i" % (flt,) + +"%()s" % {"": "empty"} + +"%s" % {"k": "v"} + +"%(1)s" % {"1": "bar"} + +"%(a)s" % {"a": 1, "a": 2} + +pytest.param('"%8s" % (None,)', id="unsafe width-string conversion"), + +"%()s" % {"": "bar"} + +"%(1)s" % {1: 2, "1": 2} + +"%(and)s" % {"and": 2} + +# OK (arguably false negatives) +( + "foo %s " + "bar %s" +) % (x, y) + +( + "foo %(foo)s " + "bar %(bar)s" +) % {"foo": x, "bar": y} + +( + """foo %s""" + % (x,) +) + +( + """ + foo %s + """ + % (x,) +) diff --git a/ruff.schema.json b/ruff.schema.json index a231064a85..c712aa637a 100644 --- a/ruff.schema.json +++ b/ruff.schema.json @@ -1780,6 +1780,7 @@ "UP029", "UP03", "UP030", + "UP031", "UP032", "UP033", "UP034", diff --git a/setup.py b/setup.py index f9734c974a..aeef6f2ca3 100644 --- a/setup.py +++ b/setup.py @@ -13,6 +13,7 @@ Please use `python -m pip install .` instead. ) sys.exit(1) +"abc".isidentifier() # The below code will never execute, however GitHub is particularly # picky about where it finds Python packaging metadata. diff --git a/src/checkers/ast.rs b/src/checkers/ast.rs index 221fe367c8..055b652024 100644 --- a/src/checkers/ast.rs +++ b/src/checkers/ast.rs @@ -2627,7 +2627,7 @@ where .enabled(&Rule::PercentFormatUnsupportedFormatCharacter) { let location = Range::from_located(expr); - match pyflakes::cformat::CFormatSummary::try_from(value.as_ref()) { + match pyflakes::cformat::CFormatSummary::try_from(value.as_str()) { Err(CFormatError { typ: CFormatErrorType::UnsupportedFormatChar(c), .. @@ -2722,6 +2722,10 @@ where } } } + + if self.settings.rules.enabled(&Rule::PrintfStringFormatting) { + pyupgrade::rules::printf_string_formatting(self, expr, left, right); + } } } ExprKind::BinOp { diff --git a/src/python/identifiers.rs b/src/python/identifiers.rs index 34d366aab6..a93365c891 100644 --- a/src/python/identifiers.rs +++ b/src/python/identifiers.rs @@ -1,5 +1,15 @@ -use once_cell::sync::Lazy; -use regex::Regex; +/// Returns `true` if a string is a valid Python identifier (e.g., variable +/// name). +pub fn is_identifier(s: &str) -> bool { + // Is the first character a letter or underscore? + if !s + .chars() + .next() + .map_or(false, |c| c.is_alphabetic() || c == '_') + { + return false; + } -pub static IDENTIFIER_REGEX: Lazy = - Lazy::new(|| Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap()); + // Are the rest of the characters letters, digits, or underscores? + s.chars().skip(1).all(|c| c.is_alphanumeric() || c == '_') +} diff --git a/src/registry.rs b/src/registry.rs index 8710a1f78a..7ca27911d2 100644 --- a/src/registry.rs +++ b/src/registry.rs @@ -252,6 +252,7 @@ ruff_macros::define_rule_mapping!( UP028 => violations::RewriteYieldFrom, UP029 => violations::UnnecessaryBuiltinImport, UP030 => violations::FormatLiterals, + UP031 => violations::PrintfStringFormatting, UP032 => violations::FString, UP033 => violations::FunctoolsCache, UP034 => violations::ExtraneousParentheses, diff --git a/src/rules/flake8_bugbear/rules/getattr_with_constant.rs b/src/rules/flake8_bugbear/rules/getattr_with_constant.rs index e7a9e9501c..a96895b829 100644 --- a/src/rules/flake8_bugbear/rules/getattr_with_constant.rs +++ b/src/rules/flake8_bugbear/rules/getattr_with_constant.rs @@ -3,7 +3,7 @@ use rustpython_ast::{Constant, Expr, ExprContext, ExprKind, Location}; use crate::ast::types::Range; use crate::checkers::ast::Checker; use crate::fix::Fix; -use crate::python::identifiers::IDENTIFIER_REGEX; +use crate::python::identifiers::is_identifier; use crate::python::keyword::KWLIST; use crate::registry::Diagnostic; use crate::source_code::Generator; @@ -38,7 +38,7 @@ pub fn getattr_with_constant(checker: &mut Checker, expr: &Expr, func: &Expr, ar } = &arg.node else { return; }; - if !IDENTIFIER_REGEX.is_match(value) { + if !is_identifier(value) { return; } if KWLIST.contains(&value.as_str()) { diff --git a/src/rules/flake8_bugbear/rules/setattr_with_constant.rs b/src/rules/flake8_bugbear/rules/setattr_with_constant.rs index 5111994638..179e35562a 100644 --- a/src/rules/flake8_bugbear/rules/setattr_with_constant.rs +++ b/src/rules/flake8_bugbear/rules/setattr_with_constant.rs @@ -3,7 +3,7 @@ use rustpython_ast::{Constant, Expr, ExprContext, ExprKind, Location, Stmt, Stmt use crate::ast::types::Range; use crate::checkers::ast::Checker; use crate::fix::Fix; -use crate::python::identifiers::IDENTIFIER_REGEX; +use crate::python::identifiers::is_identifier; use crate::python::keyword::KWLIST; use crate::registry::Diagnostic; use crate::source_code::{Generator, Stylist}; @@ -49,7 +49,7 @@ pub fn setattr_with_constant(checker: &mut Checker, expr: &Expr, func: &Expr, ar } = &name.node else { return; }; - if !IDENTIFIER_REGEX.is_match(name) { + if !is_identifier(name) { return; } if KWLIST.contains(&name.as_str()) { diff --git a/src/rules/pyflakes/cformat.rs b/src/rules/pyflakes/cformat.rs index cfbf019558..ceb56ad69c 100644 --- a/src/rules/pyflakes/cformat.rs +++ b/src/rules/pyflakes/cformat.rs @@ -4,7 +4,7 @@ use std::str::FromStr; use rustc_hash::FxHashSet; use rustpython_common::cformat::{ - CFormatError, CFormatPart, CFormatQuantity, CFormatSpec, CFormatString, + CFormatError, CFormatPart, CFormatPrecision, CFormatQuantity, CFormatSpec, CFormatString, }; pub(crate) struct CFormatSummary { @@ -13,12 +13,8 @@ pub(crate) struct CFormatSummary { pub keywords: FxHashSet, } -impl TryFrom<&str> for CFormatSummary { - type Error = CFormatError; - - fn try_from(literal: &str) -> Result { - let format_string = CFormatString::from_str(literal)?; - +impl From<&CFormatString> for CFormatSummary { + fn from(format_string: &CFormatString) -> Self { let mut starred = false; let mut num_positional = 0; let mut keywords = FxHashSet::default(); @@ -45,17 +41,26 @@ impl TryFrom<&str> for CFormatSummary { num_positional += 1; starred = true; } - if precision == &Some(CFormatQuantity::FromValuesTuple) { + if precision == &Some(CFormatPrecision::Quantity(CFormatQuantity::FromValuesTuple)) { num_positional += 1; starred = true; } } - Ok(CFormatSummary { + Self { starred, num_positional, keywords, - }) + } + } +} + +impl TryFrom<&str> for CFormatSummary { + type Error = CFormatError; + + fn try_from(literal: &str) -> Result { + let format_string = CFormatString::from_str(literal)?; + Ok(Self::from(&format_string)) } } diff --git a/src/rules/pyupgrade/helpers.rs b/src/rules/pyupgrade/helpers.rs new file mode 100644 index 0000000000..99081f8498 --- /dev/null +++ b/src/rules/pyupgrade/helpers.rs @@ -0,0 +1,21 @@ +use once_cell::sync::Lazy; +use regex::{Captures, Regex}; + +static CURLY_ESCAPE: Lazy = Lazy::new(|| Regex::new(r"(\\N\{[^}]+})|([{}])").unwrap()); + +pub fn curly_escape(text: &str) -> String { + // We don't support emojis right now. + CURLY_ESCAPE + .replace_all(text, |caps: &Captures| { + if let Some(match_) = caps.get(1) { + match_.as_str().to_string() + } else { + if &caps[2] == "{" { + "{{".to_string() + } else { + "}}".to_string() + } + } + }) + .to_string() +} diff --git a/src/rules/pyupgrade/mod.rs b/src/rules/pyupgrade/mod.rs index 2dceb37c14..0d218117bd 100644 --- a/src/rules/pyupgrade/mod.rs +++ b/src/rules/pyupgrade/mod.rs @@ -1,5 +1,6 @@ //! Rules from [pyupgrade](https://pypi.org/project/pyupgrade/3.2.0/). mod fixes; +mod helpers; pub(crate) mod rules; pub mod settings; pub(crate) mod types; @@ -52,6 +53,8 @@ mod tests { #[test_case(Rule::UnnecessaryBuiltinImport, Path::new("UP029.py"); "UP029")] #[test_case(Rule::FormatLiterals, Path::new("UP030_0.py"); "UP030_0")] #[test_case(Rule::FormatLiterals, Path::new("UP030_1.py"); "UP030_1")] + #[test_case(Rule::PrintfStringFormatting, Path::new("UP031_0.py"); "UP031_0")] + #[test_case(Rule::PrintfStringFormatting, Path::new("UP031_1.py"); "UP031_1")] #[test_case(Rule::FString, Path::new("UP032.py"); "UP032")] #[test_case(Rule::FunctoolsCache, Path::new("UP033.py"); "UP033")] #[test_case(Rule::ExtraneousParentheses, Path::new("UP034.py"); "UP034")] diff --git a/src/rules/pyupgrade/rules/convert_named_tuple_functional_to_class.rs b/src/rules/pyupgrade/rules/convert_named_tuple_functional_to_class.rs index c61afccf3a..23833c6228 100644 --- a/src/rules/pyupgrade/rules/convert_named_tuple_functional_to_class.rs +++ b/src/rules/pyupgrade/rules/convert_named_tuple_functional_to_class.rs @@ -6,7 +6,7 @@ use crate::ast::helpers::{create_expr, create_stmt, unparse_stmt}; use crate::ast::types::Range; use crate::checkers::ast::Checker; use crate::fix::Fix; -use crate::python::identifiers::IDENTIFIER_REGEX; +use crate::python::identifiers::is_identifier; use crate::python::keyword::KWLIST; use crate::registry::Diagnostic; use crate::source_code::Stylist; @@ -104,7 +104,7 @@ fn create_properties_from_args(args: &[Expr], defaults: &[Expr]) -> Result Result { - if IDENTIFIER_REGEX.is_match(property) && !KWLIST.contains(&property.as_str()) { + if is_identifier(property) && !KWLIST.contains(&property.as_str()) { Ok(create_property_assignment_stmt(property, &value.node)) } else { bail!("Property name is not valid identifier: {}", property) diff --git a/src/rules/pyupgrade/rules/mod.rs b/src/rules/pyupgrade/rules/mod.rs index 8920b6afa7..c878a310b7 100644 --- a/src/rules/pyupgrade/rules/mod.rs +++ b/src/rules/pyupgrade/rules/mod.rs @@ -11,6 +11,7 @@ pub(crate) use native_literals::native_literals; use once_cell::sync::Lazy; pub(crate) use open_alias::open_alias; pub(crate) use os_error_alias::os_error_alias; +pub(crate) use printf_string_formatting::printf_string_formatting; pub(crate) use redundant_open_modes::redundant_open_modes; use regex::Regex; pub(crate) use remove_six_compat::remove_six_compat; @@ -52,6 +53,7 @@ mod lru_cache_without_parameters; mod native_literals; mod open_alias; mod os_error_alias; +mod printf_string_formatting; mod redundant_open_modes; mod remove_six_compat; mod replace_stdout_stderr; diff --git a/src/rules/pyupgrade/rules/printf_string_formatting.rs b/src/rules/pyupgrade/rules/printf_string_formatting.rs new file mode 100644 index 0000000000..a071cafda2 --- /dev/null +++ b/src/rules/pyupgrade/rules/printf_string_formatting.rs @@ -0,0 +1,458 @@ +use std::str::FromStr; + +use rustpython_ast::Location; +use rustpython_common::cformat::{ + CConversionFlags, CFormatPart, CFormatPrecision, CFormatQuantity, CFormatString, +}; +use rustpython_parser::ast::{Constant, Expr, ExprKind}; +use rustpython_parser::lexer; +use rustpython_parser::lexer::Tok; + +use crate::ast::types::Range; +use crate::ast::whitespace::indentation; +use crate::checkers::ast::Checker; +use crate::fix::Fix; +use crate::python::identifiers::is_identifier; +use crate::python::keyword::KWLIST; +use crate::registry::{Diagnostic, Rule}; +use crate::rules::pydocstyle::helpers::{leading_quote, trailing_quote}; +use crate::rules::pyupgrade::helpers::curly_escape; +use crate::violations; + +fn simplify_conversion_flag(flags: CConversionFlags) -> String { + let mut flag_string = String::new(); + if flags.contains(CConversionFlags::LEFT_ADJUST) { + flag_string.push('<'); + } + if flags.contains(CConversionFlags::SIGN_CHAR) { + flag_string.push('+'); + } + if flags.contains(CConversionFlags::ALTERNATE_FORM) { + flag_string.push('#'); + } + if flags.contains(CConversionFlags::BLANK_SIGN) { + if !flags.contains(CConversionFlags::SIGN_CHAR) { + flag_string.push(' '); + } + } + if flags.contains(CConversionFlags::ZERO_PAD) { + if !flags.contains(CConversionFlags::LEFT_ADJUST) { + flag_string.push('0'); + } + } + flag_string +} + +/// Convert a [`PercentFormat`] struct into a `String`. +fn handle_part(part: &CFormatPart) -> String { + match part { + CFormatPart::Literal(item) => curly_escape(item), + CFormatPart::Spec(spec) => { + let mut format_string = String::new(); + + // TODO(charlie): What case is this? + if spec.format_char == '%' { + format_string.push('%'); + return format_string; + } + + format_string.push('{'); + + // Ex) `{foo}` + if let Some(key_item) = &spec.mapping_key { + format_string.push_str(key_item); + } + + if !spec.flags.is_empty() + || spec.min_field_width.is_some() + || spec.precision.is_some() + || (spec.format_char != 's' && spec.format_char != 'r' && spec.format_char != 'a') + { + format_string.push(':'); + + if !spec.flags.is_empty() { + format_string.push_str(&simplify_conversion_flag(spec.flags)); + } + + if let Some(width) = &spec.min_field_width { + let amount = match width { + CFormatQuantity::Amount(amount) => amount, + CFormatQuantity::FromValuesTuple => { + unreachable!("FromValuesTuple is unsupported") + } + }; + format_string.push_str(&amount.to_string()); + } + + if let Some(precision) = &spec.precision { + match precision { + CFormatPrecision::Quantity(quantity) => match quantity { + CFormatQuantity::Amount(amount) => { + format_string.push('.'); + format_string.push_str(&amount.to_string()); + } + CFormatQuantity::FromValuesTuple => { + unreachable!("Width should be a usize") + } + }, + CFormatPrecision::Dot => { + format_string.push('.'); + format_string.push('0'); + } + } + } + } + if spec.format_char != 's' && spec.format_char != 'r' && spec.format_char != 'a' { + format_string.push(spec.format_char); + } + if spec.format_char == 'r' || spec.format_char == 'a' { + format_string.push('!'); + format_string.push(spec.format_char); + } + format_string.push('}'); + format_string + } + } +} + +/// Convert a [`CFormatString`] into a `String`. +fn percent_to_format(format_string: &CFormatString) -> String { + let mut contents = String::new(); + for (.., format_part) in format_string.iter() { + contents.push_str(&handle_part(format_part)); + } + contents +} + +/// If a tuple has one argument, remove the comma; otherwise, return it as-is. +fn clean_params_tuple(checker: &mut Checker, right: &Expr) -> String { + let mut contents = checker + .locator + .slice_source_code_range(&Range::from_located(right)) + .to_string(); + if let ExprKind::Tuple { elts, .. } = &right.node { + if elts.len() == 1 { + if right.location.row() == right.end_location.unwrap().row() { + for (i, character) in contents.chars().rev().enumerate() { + if character == ',' { + let correct_index = contents.len() - i - 1; + contents.remove(correct_index); + break; + } + } + } + } + } + contents +} + +/// Converts a dictionary to a function call while preserving as much styling as +/// possible. +fn clean_params_dictionary(checker: &mut Checker, right: &Expr) -> Option { + let is_multi_line = right.location.row() < right.end_location.unwrap().row(); + let mut contents = String::new(); + if let ExprKind::Dict { keys, values } = &right.node { + let mut arguments: Vec = vec![]; + let mut seen: Vec<&str> = vec![]; + let mut indent = None; + for (key, value) in keys.iter().zip(values.iter()) { + if let ExprKind::Constant { + value: Constant::Str(key_string), + .. + } = &key.node + { + // If the dictionary key is not a valid variable name, abort. + if !is_identifier(key_string) { + return None; + } + // If the key is a Python keyword, abort. + if KWLIST.contains(&key_string.as_str()) { + return None; + } + // If there are multiple entries of the same key, abort. + if seen.contains(&key_string.as_str()) { + return None; + } + seen.push(key_string); + let mut contents = String::new(); + if is_multi_line { + if indent.is_none() { + indent = indentation(checker.locator, key); + } + } + + let value_string = checker + .locator + .slice_source_code_range(&Range::from_located(value)); + contents.push_str(key_string); + contents.push('='); + contents.push_str(&value_string); + arguments.push(contents); + } else { + // If there are any non-string keys, abort. + return None; + } + } + // If we couldn't parse out key values, abort. + if arguments.is_empty() { + return None; + } + contents.push('('); + if is_multi_line { + let Some(indent) = indent else { + return None; + }; + + for item in &arguments { + contents.push('\n'); + contents.push_str(&indent); + contents.push_str(item); + contents.push(','); + } + + contents.push('\n'); + + // For the ending parentheses, go back one indent. + let default_indent: &str = checker.stylist.indentation(); + if let Some(ident) = indent.strip_prefix(default_indent) { + contents.push_str(ident); + } else { + contents.push_str(&indent); + } + } else { + contents.push_str(&arguments.join(", ")); + } + contents.push(')'); + } + Some(contents) +} + +/// Returns `true` if the sequence of [`PercentFormatPart`] indicate that an +/// [`Expr`] can be converted. +fn convertible(format_string: &CFormatString, params: &Expr) -> bool { + for (.., format_part) in format_string.iter() { + let CFormatPart::Spec(ref fmt) = format_part else { + continue; + }; + + // These require out-of-order parameter consumption. + if matches!(fmt.min_field_width, Some(CFormatQuantity::FromValuesTuple)) { + return false; + } + if matches!( + fmt.precision, + Some(CFormatPrecision::Quantity(CFormatQuantity::FromValuesTuple)) + ) { + return false; + } + + // These conversions require modification of parameters. + if fmt.format_char == 'd' + || fmt.format_char == 'i' + || fmt.format_char == 'u' + || fmt.format_char == 'c' + { + return false; + } + + // No equivalent in format. + if fmt.mapping_key.as_ref().map_or(false, String::is_empty) { + return false; + } + + let is_nontrivial = + !fmt.flags.is_empty() || fmt.min_field_width.is_some() || fmt.precision.is_some(); + + // Conversion is subject to modifiers. + if is_nontrivial && fmt.format_char == '%' { + return false; + } + + // No equivalent in `format`. + if is_nontrivial && (fmt.format_char == 'a' || fmt.format_char == 'r') { + return false; + } + + // "%s" with None and width is not supported. + if fmt.min_field_width.is_some() && fmt.format_char == 's' { + return false; + } + + // All dict substitutions must be named. + if let ExprKind::Dict { .. } = ¶ms.node { + if fmt.mapping_key.is_none() { + return false; + } + } + } + true +} + +/// UP031 +pub(crate) fn printf_string_formatting( + checker: &mut Checker, + expr: &Expr, + left: &Expr, + right: &Expr, +) { + // If the modulo symbol is on a separate line, abort. + if right.location.row() != left.end_location.unwrap().row() { + return; + } + + // Grab each string segment (in case there's an implicit concatenation). + let mut strings: Vec<(Location, Location)> = vec![]; + let mut extension = None; + for (start, tok, end) in lexer::make_tokenizer_located( + &checker + .locator + .slice_source_code_range(&Range::from_located(expr)), + expr.location, + ) + .flatten() + { + if matches!(tok, Tok::String { .. }) { + strings.push((start, end)); + } else if matches!(tok, Tok::Rpar) { + // If we hit a right paren, we have to preserve it. + extension = Some((start, end)); + } else if matches!(tok, Tok::Percent) { + // Break as soon as we find the modulo symbol. + break; + } + } + + // If there are no string segments, abort. + if strings.is_empty() { + return; + } + + // Parse each string segment. + let mut format_strings = vec![]; + for (start, end) in &strings { + let string = checker + .locator + .slice_source_code_range(&Range::new(*start, *end)); + let (Some(leader), Some(trailer)) = (leading_quote(&string), trailing_quote(&string)) else { + return; + }; + let string = &string[leader.len()..string.len() - trailer.len()]; + + // Parse the format string (e.g. `"%s"`) into a list of `PercentFormat`. + let Ok(format_string) = CFormatString::from_str(string) else { + return; + }; + if !convertible(&format_string, right) { + return; + } + + let format_string = percent_to_format(&format_string); + format_strings.push(format!("{leader}{format_string}{trailer}")); + } + + // Parse the parameters. + let params_string = match right.node { + ExprKind::Tuple { .. } => clean_params_tuple(checker, right), + ExprKind::Dict { .. } => { + if let Some(params_string) = clean_params_dictionary(checker, right) { + params_string + } else { + return; + } + } + _ => return, + }; + + // Reconstruct the string. + let mut contents = String::new(); + let mut prev = None; + for ((start, end), format_string) in strings.iter().zip(format_strings) { + // Add the content before the string segment. + match prev { + None => { + contents.push_str( + &checker + .locator + .slice_source_code_range(&Range::new(expr.location, *start)), + ); + } + Some(prev) => { + contents.push_str( + &checker + .locator + .slice_source_code_range(&Range::new(prev, *start)), + ); + } + } + // Add the string itself. + contents.push_str(&format_string); + prev = Some(*end); + } + + if let Some((.., end)) = extension { + contents.push_str( + &checker + .locator + .slice_source_code_range(&Range::new(prev.unwrap(), end)), + ); + } + + // Add the `.format` call. + contents.push_str(&format!(".format{params_string}")); + + let mut diagnostic = Diagnostic::new( + violations::PrintfStringFormatting, + Range::from_located(expr), + ); + if checker.patch(&Rule::PrintfStringFormatting) { + diagnostic.amend(Fix::replacement( + contents, + expr.location, + expr.end_location.unwrap(), + )); + } + checker.diagnostics.push(diagnostic); +} + +#[cfg(test)] +mod test { + use test_case::test_case; + + use super::*; + + #[test_case("\"%s\"", "\"{}\""; "simple string")] + #[test_case("\"%%%s\"", "\"%{}\""; "three percents")] + #[test_case("\"%(foo)s\"", "\"{foo}\""; "word in string")] + #[test_case("\"%2f\"", "\"{:2f}\""; "formatting in string")] + #[test_case("\"%r\"", "\"{!r}\""; "format an r")] + #[test_case("\"%a\"", "\"{!a}\""; "format an a")] + fn test_percent_to_format(sample: &str, expected: &str) { + let format_string = CFormatString::from_str(sample).unwrap(); + let actual = percent_to_format(&format_string); + assert_eq!(actual, expected); + } + + #[test] + fn preserve_blanks() { + assert_eq!( + simplify_conversion_flag(CConversionFlags::empty()), + String::new() + ); + } + + #[test] + fn preserve_space() { + assert_eq!( + simplify_conversion_flag(CConversionFlags::BLANK_SIGN), + " ".to_string() + ); + } + + #[test] + fn complex_format() { + assert_eq!( + simplify_conversion_flag(CConversionFlags::all()), + "<+#".to_string() + ); + } +} diff --git a/src/rules/pyupgrade/snapshots/ruff__rules__pyupgrade__tests__UP031_UP031_0.py.snap b/src/rules/pyupgrade/snapshots/ruff__rules__pyupgrade__tests__UP031_UP031_0.py.snap new file mode 100644 index 0000000000..17e6e2b60e --- /dev/null +++ b/src/rules/pyupgrade/snapshots/ruff__rules__pyupgrade__tests__UP031_UP031_0.py.snap @@ -0,0 +1,481 @@ +--- +source: src/rules/pyupgrade/mod.rs +expression: diagnostics +--- +- kind: + PrintfStringFormatting: ~ + location: + row: 4 + column: 6 + end_location: + row: 4 + column: 22 + fix: + content: "'{} {}'.format(a, b)" + location: + row: 4 + column: 6 + end_location: + row: 4 + column: 22 + parent: ~ +- kind: + PrintfStringFormatting: ~ + location: + row: 6 + column: 6 + end_location: + row: 6 + column: 21 + fix: + content: "'{}{}'.format(a, b)" + location: + row: 6 + column: 6 + end_location: + row: 6 + column: 21 + parent: ~ +- kind: + PrintfStringFormatting: ~ + location: + row: 8 + column: 6 + end_location: + row: 8 + column: 20 + fix: + content: "\"trivial\".format()" + location: + row: 8 + column: 6 + end_location: + row: 8 + column: 20 + parent: ~ +- kind: + PrintfStringFormatting: ~ + location: + row: 10 + column: 6 + end_location: + row: 10 + column: 24 + fix: + content: "\"{}\".format(\"simple\")" + location: + row: 10 + column: 6 + end_location: + row: 10 + column: 24 + parent: ~ +- kind: + PrintfStringFormatting: ~ + location: + row: 12 + column: 6 + end_location: + row: 12 + column: 34 + fix: + content: "\"{}\".format(\"%s\" % (\"nested\",))" + location: + row: 12 + column: 6 + end_location: + row: 12 + column: 34 + parent: ~ +- kind: + PrintfStringFormatting: ~ + location: + row: 12 + column: 14 + end_location: + row: 12 + column: 32 + fix: + content: "\"{}\".format(\"nested\")" + location: + row: 12 + column: 14 + end_location: + row: 12 + column: 32 + parent: ~ +- kind: + PrintfStringFormatting: ~ + location: + row: 14 + column: 6 + end_location: + row: 14 + column: 28 + fix: + content: "\"{}% percent\".format(15)" + location: + row: 14 + column: 6 + end_location: + row: 14 + column: 28 + parent: ~ +- kind: + PrintfStringFormatting: ~ + location: + row: 16 + column: 6 + end_location: + row: 16 + column: 18 + fix: + content: "\"{:f}\".format(15)" + location: + row: 16 + column: 6 + end_location: + row: 16 + column: 18 + parent: ~ +- kind: + PrintfStringFormatting: ~ + location: + row: 18 + column: 6 + end_location: + row: 18 + column: 19 + fix: + content: "\"{:.0f}\".format(15)" + location: + row: 18 + column: 6 + end_location: + row: 18 + column: 19 + parent: ~ +- kind: + PrintfStringFormatting: ~ + location: + row: 20 + column: 6 + end_location: + row: 20 + column: 20 + fix: + content: "\"{:.3f}\".format(15)" + location: + row: 20 + column: 6 + end_location: + row: 20 + column: 20 + parent: ~ +- kind: + PrintfStringFormatting: ~ + location: + row: 22 + column: 6 + end_location: + row: 22 + column: 19 + fix: + content: "\"{:3f}\".format(15)" + location: + row: 22 + column: 6 + end_location: + row: 22 + column: 19 + parent: ~ +- kind: + PrintfStringFormatting: ~ + location: + row: 24 + column: 6 + end_location: + row: 24 + column: 19 + fix: + content: "\"{:<5f}\".format(5)" + location: + row: 24 + column: 6 + end_location: + row: 24 + column: 19 + parent: ~ +- kind: + PrintfStringFormatting: ~ + location: + row: 26 + column: 6 + end_location: + row: 26 + column: 18 + fix: + content: "\"{:9f}\".format(5)" + location: + row: 26 + column: 6 + end_location: + row: 26 + column: 18 + parent: ~ +- kind: + PrintfStringFormatting: ~ + location: + row: 28 + column: 6 + end_location: + row: 28 + column: 20 + fix: + content: "\"{:#o}\".format(123)" + location: + row: 28 + column: 6 + end_location: + row: 28 + column: 20 + parent: ~ +- kind: + PrintfStringFormatting: ~ + location: + row: 30 + column: 6 + end_location: + row: 30 + column: 26 + fix: + content: "\"brace {{}} {}\".format(1)" + location: + row: 30 + column: 6 + end_location: + row: 30 + column: 26 + parent: ~ +- kind: + PrintfStringFormatting: ~ + location: + row: 33 + column: 2 + end_location: + row: 35 + column: 9 + fix: + content: "\"{}\".format(\n \"trailing comma\",\n )" + location: + row: 33 + column: 2 + end_location: + row: 35 + column: 9 + parent: ~ +- kind: + PrintfStringFormatting: ~ + location: + row: 38 + column: 6 + end_location: + row: 38 + column: 22 + fix: + content: "\"foo {} \".format(x)" + location: + row: 38 + column: 6 + end_location: + row: 38 + column: 22 + parent: ~ +- kind: + PrintfStringFormatting: ~ + location: + row: 40 + column: 6 + end_location: + row: 40 + column: 26 + fix: + content: "\"{k}\".format(k=\"v\")" + location: + row: 40 + column: 6 + end_location: + row: 40 + column: 26 + parent: ~ +- kind: + PrintfStringFormatting: ~ + location: + row: 42 + column: 6 + end_location: + row: 45 + column: 1 + fix: + content: "\"{k}\".format(\n k=\"v\",\n i=\"j\",\n)" + location: + row: 42 + column: 6 + end_location: + row: 45 + column: 1 + parent: ~ +- kind: + PrintfStringFormatting: ~ + location: + row: 47 + column: 6 + end_location: + row: 47 + column: 37 + fix: + content: "\"{to_list}\".format(to_list=[])" + location: + row: 47 + column: 6 + end_location: + row: 47 + column: 37 + parent: ~ +- kind: + PrintfStringFormatting: ~ + location: + row: 49 + column: 6 + end_location: + row: 49 + column: 43 + fix: + content: "\"{k}\".format(k=\"v\", i=1, j=[])" + location: + row: 49 + column: 6 + end_location: + row: 49 + column: 43 + parent: ~ +- kind: + PrintfStringFormatting: ~ + location: + row: 51 + column: 6 + end_location: + row: 51 + column: 29 + fix: + content: "\"{ab}\".format(ab=1)" + location: + row: 51 + column: 6 + end_location: + row: 51 + column: 29 + parent: ~ +- kind: + PrintfStringFormatting: ~ + location: + row: 53 + column: 6 + end_location: + row: 53 + column: 27 + fix: + content: "\"{a}\".format(a=1)" + location: + row: 53 + column: 6 + end_location: + row: 53 + column: 27 + parent: ~ +- kind: + PrintfStringFormatting: ~ + location: + row: 56 + column: 4 + end_location: + row: 57 + column: 21 + fix: + content: "\"foo {} \"\n \"bar {}\".format(x, y)" + location: + row: 56 + column: 4 + end_location: + row: 57 + column: 21 + parent: ~ +- kind: + PrintfStringFormatting: ~ + location: + row: 61 + column: 4 + end_location: + row: 62 + column: 40 + fix: + content: "\"foo {foo} \"\n \"bar {bar}\".format(foo=x, bar=y)" + location: + row: 61 + column: 4 + end_location: + row: 62 + column: 40 + parent: ~ +- kind: + PrintfStringFormatting: ~ + location: + row: 65 + column: 6 + end_location: + row: 65 + column: 29 + fix: + content: "\"{} \\N{snowman}\".format(a)" + location: + row: 65 + column: 6 + end_location: + row: 65 + column: 29 + parent: ~ +- kind: + PrintfStringFormatting: ~ + location: + row: 67 + column: 6 + end_location: + row: 67 + column: 40 + fix: + content: "\"{foo} \\N{snowman}\".format(foo=1)" + location: + row: 67 + column: 6 + end_location: + row: 67 + column: 40 + parent: ~ +- kind: + PrintfStringFormatting: ~ + location: + row: 69 + column: 6 + end_location: + row: 69 + column: 35 + fix: + content: "(\"foo {} \" \"bar {}\").format(x, y)" + location: + row: 69 + column: 6 + end_location: + row: 69 + column: 35 + parent: ~ + diff --git a/src/rules/pyupgrade/snapshots/ruff__rules__pyupgrade__tests__UP031_UP031_1.py.snap b/src/rules/pyupgrade/snapshots/ruff__rules__pyupgrade__tests__UP031_UP031_1.py.snap new file mode 100644 index 0000000000..ca9e52f5c5 --- /dev/null +++ b/src/rules/pyupgrade/snapshots/ruff__rules__pyupgrade__tests__UP031_UP031_1.py.snap @@ -0,0 +1,6 @@ +--- +source: src/rules/pyupgrade/mod.rs +expression: diagnostics +--- +[] + diff --git a/src/violations.rs b/src/violations.rs index ff53ffff71..5c3cdf00d6 100644 --- a/src/violations.rs +++ b/src/violations.rs @@ -3015,6 +3015,20 @@ impl AlwaysAutofixableViolation for ReplaceUniversalNewlines { } } +define_violation!( + pub struct PrintfStringFormatting; +); +impl AlwaysAutofixableViolation for PrintfStringFormatting { + #[derive_message_formats] + fn message(&self) -> String { + format!("Use format specifiers instead of percent format") + } + + fn autofix_title(&self) -> String { + "Replace with format specifiers".to_string() + } +} + define_violation!( pub struct ReplaceStdoutStderr; );