diff --git a/crates/ruff/resources/test/fixtures/pylint/bad_string_format_character.py b/crates/ruff/resources/test/fixtures/pylint/bad_string_format_character.py new file mode 100644 index 0000000000..4d668418d4 --- /dev/null +++ b/crates/ruff/resources/test/fixtures/pylint/bad_string_format_character.py @@ -0,0 +1,29 @@ +# pylint: disable=missing-docstring,consider-using-f-string, pointless-statement + +## Old style formatting + +"%s %z" % ("hello", "world") # [bad-format-character] + +"%s" "%z" % ("hello", "world") # [bad-format-character] + +"""%s %z""" % ("hello", "world") # [bad-format-character] + +"""%s""" """%z""" % ("hello", "world") # [bad-format-character] + +## New style formatting + +"{:s} {:y}".format("hello", "world") # [bad-format-character] + +"{:*^30s}".format("centered") + +## f-strings + +H, W = "hello", "world" +f"{H} {W}" +f"{H:s} {W:z}" # [bad-format-character] + +f"{1:z}" # [bad-format-character] + +## False negatives + +print(("%" "z") % 1) diff --git a/crates/ruff/src/checkers/ast/analyze/expression.rs b/crates/ruff/src/checkers/ast/analyze/expression.rs index 38a2638d84..922ff248f2 100644 --- a/crates/ruff/src/checkers/ast/analyze/expression.rs +++ b/crates/ruff/src/checkers/ast/analyze/expression.rs @@ -419,6 +419,14 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) { } } } + + if checker.enabled(Rule::BadStringFormatCharacter) { + pylint::rules::bad_string_format_character::call( + checker, + val.as_str(), + location, + ); + } } } } @@ -1025,6 +1033,9 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) { checker.locator, ); } + if checker.enabled(Rule::BadStringFormatCharacter) { + pylint::rules::bad_string_format_character::percent(checker, expr); + } if checker.enabled(Rule::BadStringFormatType) { pylint::rules::bad_string_format_type(checker, expr, right); } diff --git a/crates/ruff/src/codes.rs b/crates/ruff/src/codes.rs index 64e795cde8..4bbc0cbc9f 100644 --- a/crates/ruff/src/codes.rs +++ b/crates/ruff/src/codes.rs @@ -191,6 +191,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> { (Pylint, "E1142") => (RuleGroup::Unspecified, rules::pylint::rules::AwaitOutsideAsync), (Pylint, "E1205") => (RuleGroup::Unspecified, rules::pylint::rules::LoggingTooManyArgs), (Pylint, "E1206") => (RuleGroup::Unspecified, rules::pylint::rules::LoggingTooFewArgs), + (Pylint, "E1300") => (RuleGroup::Unspecified, rules::pylint::rules::BadStringFormatCharacter), (Pylint, "E1307") => (RuleGroup::Unspecified, rules::pylint::rules::BadStringFormatType), (Pylint, "E1310") => (RuleGroup::Unspecified, rules::pylint::rules::BadStrStripCall), (Pylint, "E1507") => (RuleGroup::Unspecified, rules::pylint::rules::InvalidEnvvarValue), diff --git a/crates/ruff/src/rules/pylint/mod.rs b/crates/ruff/src/rules/pylint/mod.rs index 4e0a8f7e1b..fbe0c1abe3 100644 --- a/crates/ruff/src/rules/pylint/mod.rs +++ b/crates/ruff/src/rules/pylint/mod.rs @@ -18,8 +18,12 @@ mod tests { use crate::settings::Settings; use crate::test::test_path; - #[test_case(Rule::AwaitOutsideAsync, Path::new("await_outside_async.py"))] #[test_case(Rule::AssertOnStringLiteral, Path::new("assert_on_string_literal.py"))] + #[test_case(Rule::AwaitOutsideAsync, Path::new("await_outside_async.py"))] + #[test_case( + Rule::BadStringFormatCharacter, + Path::new("bad_string_format_character.py") + )] #[test_case(Rule::BadStrStripCall, Path::new("bad_str_strip_call.py"))] #[test_case(Rule::BadStringFormatType, Path::new("bad_string_format_type.py"))] #[test_case(Rule::BidirectionalUnicode, Path::new("bidirectional_unicode.py"))] diff --git a/crates/ruff/src/rules/pylint/rules/bad_string_format_character.rs b/crates/ruff/src/rules/pylint/rules/bad_string_format_character.rs new file mode 100644 index 0000000000..39fd1c4b34 --- /dev/null +++ b/crates/ruff/src/rules/pylint/rules/bad_string_format_character.rs @@ -0,0 +1,108 @@ +use std::str::FromStr; + +use ruff_diagnostics::{Diagnostic, Violation}; +use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::str::{leading_quote, trailing_quote}; +use ruff_python_ast::{Expr, Ranged}; +use ruff_python_literal::{ + cformat::{CFormatErrorType, CFormatString}, + format::FormatPart, + format::FromTemplate, + format::{FormatSpec, FormatSpecError, FormatString}, +}; +use ruff_python_parser::{lexer, Mode}; +use ruff_text_size::TextRange; + +use crate::checkers::ast::Checker; + +/// ## What it does +/// Checks for unsupported format types in format strings. +/// +/// ## Why is this bad? +/// The format string is not checked at compile time, so it is easy to +/// introduce bugs by mistyping the format string. +/// +/// ## Example +/// ```python +/// # `z` is not a valid format type. +/// print("%z" % "1") +/// +/// print("{:z}".format("1")) +/// ``` +#[violation] +pub struct BadStringFormatCharacter { + format_char: char, +} + +impl Violation for BadStringFormatCharacter { + #[derive_message_formats] + fn message(&self) -> String { + format!("Unsupported format character '{}'", self.format_char) + } +} + +/// Ex) `"{:z}".format("1")` +pub(crate) fn call(checker: &mut Checker, string: &str, range: TextRange) { + if let Ok(format_string) = FormatString::from_str(string) { + for part in &format_string.format_parts { + let FormatPart::Field { format_spec, .. } = part else { + continue; + }; + + if let Err(FormatSpecError::InvalidFormatType) = FormatSpec::parse(format_spec) { + checker.diagnostics.push(Diagnostic::new( + BadStringFormatCharacter { + // The format type character is always the last one. + // More info in the official spec: + // https://docs.python.org/3/library/string.html#format-specification-mini-language + format_char: format_spec.chars().last().unwrap(), + }, + range, + )); + } + } + } +} + +/// Ex) `"%z" % "1"` +pub(crate) fn percent(checker: &mut Checker, expr: &Expr) { + // Grab each string segment (in case there's an implicit concatenation). + let mut strings: Vec = vec![]; + for (tok, range) in lexer::lex_starts_at( + checker.locator().slice(expr.range()), + Mode::Module, + expr.start(), + ) + .flatten() + { + if tok.is_string() { + strings.push(range); + } else if tok.is_percent() { + // Break as soon as we find the modulo symbol. + break; + } + } + + // If there are no string segments, abort. + if strings.is_empty() { + return; + } + + for range in &strings { + let string = checker.locator().slice(*range); + let (Some(leader), Some(trailer)) = (leading_quote(string), trailing_quote(string)) else { + return; + }; + let string = &string[leader.len()..string.len() - trailer.len()]; + + // Parse the format string (e.g. `"%s"`) into a list of `PercentFormat`. + if let Err(format_error) = CFormatString::from_str(string) { + if let CFormatErrorType::UnsupportedFormatChar(format_char) = format_error.typ { + checker.diagnostics.push(Diagnostic::new( + BadStringFormatCharacter { format_char }, + expr.range(), + )); + } + }; + } +} diff --git a/crates/ruff/src/rules/pylint/rules/mod.rs b/crates/ruff/src/rules/pylint/rules/mod.rs index 1cc36b2293..bfd44d030e 100644 --- a/crates/ruff/src/rules/pylint/rules/mod.rs +++ b/crates/ruff/src/rules/pylint/rules/mod.rs @@ -1,6 +1,7 @@ pub(crate) use assert_on_string_literal::*; pub(crate) use await_outside_async::*; pub(crate) use bad_str_strip_call::*; +pub(crate) use bad_string_format_character::BadStringFormatCharacter; pub(crate) use bad_string_format_type::*; pub(crate) use bidirectional_unicode::*; pub(crate) use binary_op_exception::*; @@ -55,6 +56,7 @@ pub(crate) use yield_in_init::*; mod assert_on_string_literal; mod await_outside_async; mod bad_str_strip_call; +pub(crate) mod bad_string_format_character; mod bad_string_format_type; mod bidirectional_unicode; mod binary_op_exception; diff --git a/crates/ruff/src/rules/pylint/snapshots/ruff__rules__pylint__tests__PLE1300_bad_string_format_character.py.snap b/crates/ruff/src/rules/pylint/snapshots/ruff__rules__pylint__tests__PLE1300_bad_string_format_character.py.snap new file mode 100644 index 0000000000..fc8bb36435 --- /dev/null +++ b/crates/ruff/src/rules/pylint/snapshots/ruff__rules__pylint__tests__PLE1300_bad_string_format_character.py.snap @@ -0,0 +1,44 @@ +--- +source: crates/ruff/src/rules/pylint/mod.rs +--- +bad_string_format_character.py:5:1: PLE1300 Unsupported format character 'z' + | +3 | ## Old style formatting +4 | +5 | "%s %z" % ("hello", "world") # [bad-format-character] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PLE1300 +6 | +7 | "%s" "%z" % ("hello", "world") # [bad-format-character] + | + +bad_string_format_character.py:7:1: PLE1300 Unsupported format character 'z' + | +5 | "%s %z" % ("hello", "world") # [bad-format-character] +6 | +7 | "%s" "%z" % ("hello", "world") # [bad-format-character] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PLE1300 +8 | +9 | """%s %z""" % ("hello", "world") # [bad-format-character] + | + +bad_string_format_character.py:9:1: PLE1300 Unsupported format character 'z' + | + 7 | "%s" "%z" % ("hello", "world") # [bad-format-character] + 8 | + 9 | """%s %z""" % ("hello", "world") # [bad-format-character] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PLE1300 +10 | +11 | """%s""" """%z""" % ("hello", "world") # [bad-format-character] + | + +bad_string_format_character.py:11:1: PLE1300 Unsupported format character 'z' + | + 9 | """%s %z""" % ("hello", "world") # [bad-format-character] +10 | +11 | """%s""" """%z""" % ("hello", "world") # [bad-format-character] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PLE1300 +12 | +13 | ## New style formatting + | + + diff --git a/crates/ruff_python_literal/src/format.rs b/crates/ruff_python_literal/src/format.rs index 7ea57c05b9..43565966f2 100644 --- a/crates/ruff_python_literal/src/format.rs +++ b/crates/ruff_python_literal/src/format.rs @@ -180,6 +180,7 @@ impl FormatParse for FormatType { Some('g') => (Some(Self::GeneralFormat(Case::Lower)), chars.as_str()), Some('G') => (Some(Self::GeneralFormat(Case::Upper)), chars.as_str()), Some('%') => (Some(Self::Percentage), chars.as_str()), + Some(_) => (None, chars.as_str()), _ => (None, text), } } @@ -283,10 +284,20 @@ impl FormatSpec { let (width, text) = parse_number(text)?; let (grouping_option, text) = FormatGrouping::parse(text); let (precision, text) = parse_precision(text)?; - let (format_type, text) = FormatType::parse(text); - if !text.is_empty() { - return Err(FormatSpecError::InvalidFormatSpecifier); - } + let (format_type, _text) = if text.is_empty() { + (None, text) + } else { + // If there's any remaining text, we should yield a valid format type and consume it + // all. + let (format_type, text) = FormatType::parse(text); + if format_type.is_none() { + return Err(FormatSpecError::InvalidFormatType); + } + if !text.is_empty() { + return Err(FormatSpecError::InvalidFormatSpecifier); + } + (format_type, text) + }; if zero && fill.is_none() { fill.replace('0'); @@ -724,6 +735,7 @@ pub enum FormatSpecError { DecimalDigitsTooMany, PrecisionTooBig, InvalidFormatSpecifier, + InvalidFormatType, UnspecifiedFormat(char, char), UnknownFormatCode(char, &'static str), PrecisionNotAllowed, @@ -1275,6 +1287,10 @@ mod tests { FormatSpec::parse("d "), Err(FormatSpecError::InvalidFormatSpecifier) ); + assert_eq!( + FormatSpec::parse("z"), + Err(FormatSpecError::InvalidFormatType) + ); } #[test] diff --git a/ruff.schema.json b/ruff.schema.json index 96ff8fe0a8..802011030a 100644 --- a/ruff.schema.json +++ b/ruff.schema.json @@ -2203,6 +2203,7 @@ "PLE1206", "PLE13", "PLE130", + "PLE1300", "PLE1307", "PLE131", "PLE1310",