From 100904adb99cf38f4d9985d46b641997fca4419d Mon Sep 17 00:00:00 2001 From: Zanie Blue Date: Fri, 25 Aug 2023 12:42:57 -0500 Subject: [PATCH] Avoid parsing other parts of a format specification if replacements are present (#6858) Closes #6767 Replaces https://github.com/astral-sh/ruff/pull/6773 (this cherry-picks some parts from there) Alternative to the approach introduced in #6616 which added support for placeholders in format specifications while retaining parsing of other format specification parts. The idea is that if there are placeholders in a format specification we will not attempt to glean semantic meaning from the other parts of the format specification we'll just extract all of the placeholders ignoring other characters. The dynamic content of placeholders can drastically change the meaning of the format specification in ways unknowable by static analysis. This change prevents false analysis and will ensure safety if we build other rules on top of this at the cost of missing detection of some bad specifications. Minor note: I've use "replacements" and "placeholders" interchangeably but am trying to go with "placeholder" as I think it's a better term for the static analysis concept here --- .../pylint/bad_string_format_character.py | 8 +- .../rules/bad_string_format_character.rs | 9 +- ...LE1300_bad_string_format_character.py.snap | 12 +- crates/ruff_python_literal/src/format.rs | 157 ++++++++---------- 4 files changed, 87 insertions(+), 99 deletions(-) diff --git a/crates/ruff/resources/test/fixtures/pylint/bad_string_format_character.py b/crates/ruff/resources/test/fixtures/pylint/bad_string_format_character.py index 4b2a194abb..cffe53d723 100644 --- a/crates/ruff/resources/test/fixtures/pylint/bad_string_format_character.py +++ b/crates/ruff/resources/test/fixtures/pylint/bad_string_format_character.py @@ -15,9 +15,11 @@ "{:s} {:y}".format("hello", "world") # [bad-format-character] "{:*^30s}".format("centered") # OK -"{:{s}}".format("hello", s="s") # OK (nested replacement value not checked) - -"{:{s:y}}".format("hello", s="s") # [bad-format-character] (nested replacement format spec checked) +"{:{s}}".format("hello", s="s") # OK (nested placeholder value not checked) +"{:{s:y}}".format("hello", s="s") # [bad-format-character] (nested placeholder format spec checked) +"{0:.{prec}g}".format(1.23, prec=15) # OK (cannot validate after nested placeholder) +"{0:.{foo}{bar}{foobar}y}".format(...) # OK (cannot validate after nested placeholders) +"{0:.{foo}x{bar}y{foobar}g}".format(...) # OK (all nested placeholders are consumed without considering in between chars) ## f-strings diff --git a/crates/ruff/src/rules/pylint/rules/bad_string_format_character.rs b/crates/ruff/src/rules/pylint/rules/bad_string_format_character.rs index fdf221d10a..cd8002bca0 100644 --- a/crates/ruff/src/rules/pylint/rules/bad_string_format_character.rs +++ b/crates/ruff/src/rules/pylint/rules/bad_string_format_character.rs @@ -63,13 +63,14 @@ pub(crate) fn call(checker: &mut Checker, string: &str, range: TextRange) { )); } Err(_) => {} - Ok(format_spec) => { - for replacement in format_spec.replacements() { - let FormatPart::Field { format_spec, .. } = replacement else { + Ok(FormatSpec::Static(_)) => {} + Ok(FormatSpec::Dynamic(format_spec)) => { + for placeholder in format_spec.placeholders { + let FormatPart::Field { format_spec, .. } = placeholder else { continue; }; if let Err(FormatSpecError::InvalidFormatType) = - FormatSpec::parse(format_spec) + FormatSpec::parse(&format_spec) { checker.diagnostics.push(Diagnostic::new( BadStringFormatCharacter { diff --git a/crates/ruff/src/rules/pylint/snapshots/ruff__rules__pylint__tests__PLE1300_bad_string_format_character.py.snap b/crates/ruff/src/rules/pylint/snapshots/ruff__rules__pylint__tests__PLE1300_bad_string_format_character.py.snap index f34f9cc876..709c8ec814 100644 --- a/crates/ruff/src/rules/pylint/snapshots/ruff__rules__pylint__tests__PLE1300_bad_string_format_character.py.snap +++ b/crates/ruff/src/rules/pylint/snapshots/ruff__rules__pylint__tests__PLE1300_bad_string_format_character.py.snap @@ -51,14 +51,14 @@ bad_string_format_character.py:15:1: PLE1300 Unsupported format character 'y' 17 | "{:*^30s}".format("centered") # OK | -bad_string_format_character.py:20:1: PLE1300 Unsupported format character 'y' +bad_string_format_character.py:19:1: PLE1300 Unsupported format character 'y' | -18 | "{:{s}}".format("hello", s="s") # OK (nested replacement value not checked) -19 | -20 | "{:{s:y}}".format("hello", s="s") # [bad-format-character] (nested replacement format spec checked) +17 | "{:*^30s}".format("centered") # OK +18 | "{:{s}}".format("hello", s="s") # OK (nested placeholder value not checked) +19 | "{:{s:y}}".format("hello", s="s") # [bad-format-character] (nested placeholder format spec checked) | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PLE1300 -21 | -22 | ## f-strings +20 | "{0:.{prec}g}".format(1.23, prec=15) # OK (cannot validate after nested placeholder) +21 | "{0:.{foo}{bar}{foobar}y}".format(...) # OK (cannot validate after nested placeholders) | diff --git a/crates/ruff_python_literal/src/format.rs b/crates/ruff_python_literal/src/format.rs index 2e2364e218..01f9858763 100644 --- a/crates/ruff_python_literal/src/format.rs +++ b/crates/ruff_python_literal/src/format.rs @@ -190,7 +190,7 @@ impl FormatParse for FormatType { /// "hello {name:<20}".format(name="test") /// ``` /// -/// Format specifications allow nested replacements for dynamic formatting. +/// Format specifications allow nested placeholders for dynamic formatting. /// For example, the following statements are equivalent: /// ```python /// "hello {name:{fmt}}".format(name="test", fmt="<20") @@ -198,22 +198,27 @@ impl FormatParse for FormatType { /// "hello {name:<20{empty}>}".format(name="test", empty="") /// ``` /// -/// Nested replacements can include additional format specifiers. +/// Nested placeholders can include additional format specifiers. /// ```python /// "hello {name:{fmt:*>}}".format(name="test", fmt="<20") /// ``` /// -/// However, replacements can only be singly nested (preserving our sanity). +/// However, placeholders can only be singly nested (preserving our sanity). /// A [`FormatSpecError::PlaceholderRecursionExceeded`] will be raised while parsing in this case. /// ```python /// "hello {name:{fmt:{not_allowed}}}".format(name="test", fmt="<20") # Syntax error /// ``` /// -/// When replacements are present in a format specification, we will parse them and -/// store them in [`FormatSpec`] but will otherwise ignore them if they would introduce -/// an invalid format specification at runtime. +/// When placeholders are present in a format specification, parsing will return a [`DynamicFormatSpec`] +/// and avoid attempting to parse any of the clauses. Otherwise, a [`StaticFormatSpec`] will be used. #[derive(Debug, PartialEq)] -pub struct FormatSpec { +pub enum FormatSpec { + Static(StaticFormatSpec), + Dynamic(DynamicFormatSpec), +} + +#[derive(Debug, PartialEq)] +pub struct StaticFormatSpec { // Ex) `!s` in `'{!s}'` conversion: Option, // Ex) `*` in `'{:*^30}'` @@ -232,8 +237,12 @@ pub struct FormatSpec { precision: Option, // Ex) `f` in `'{:+f}'` format_type: Option, +} + +#[derive(Debug, PartialEq)] +pub struct DynamicFormatSpec { // Ex) `x` and `y` in `'{:*{x},{y}b}'` - replacements: Vec, + pub placeholders: Vec, } #[derive(Copy, Clone, Debug, PartialEq, Default)] @@ -318,43 +327,46 @@ fn parse_precision(text: &str) -> Result<(Option, &str), FormatSpecError> }) } -/// Parses a format part within a format spec -fn parse_nested_placeholder<'a>( - parts: &mut Vec, - text: &'a str, -) -> Result<&'a str, FormatSpecError> { +/// Parses a placeholder format part within a format specification +fn parse_nested_placeholder(text: &str) -> Result, FormatSpecError> { match FormatString::parse_spec(text, AllowPlaceholderNesting::No) { - // Not a nested replacement, OK - Err(FormatParseError::MissingStartBracket) => Ok(text), + // Not a nested placeholder, OK + Err(FormatParseError::MissingStartBracket) => Ok(None), Err(err) => Err(FormatSpecError::InvalidPlaceholder(err)), - Ok((format_part, text)) => { - parts.push(format_part); - Ok(text) + Ok((format_part, text)) => Ok(Some((format_part, text))), + } +} + +/// Parse all placeholders in a format specification +/// If no placeholders are present, an empty vector will be returned +fn parse_nested_placeholders(mut text: &str) -> Result, FormatSpecError> { + let mut placeholders = vec![]; + while let Some(bracket) = text.find('{') { + if let Some((format_part, rest)) = parse_nested_placeholder(&text[bracket..])? { + text = rest; + placeholders.push(format_part); + } else { + text = &text[bracket + 1..]; } } + Ok(placeholders) } impl FormatSpec { pub fn parse(text: &str) -> Result { - let mut replacements = vec![]; - // get_integer in CPython - let text = parse_nested_placeholder(&mut replacements, text)?; + let placeholders = parse_nested_placeholders(text)?; + if !placeholders.is_empty() { + return Ok(FormatSpec::Dynamic(DynamicFormatSpec { placeholders })); + } + let (conversion, text) = FormatConversion::parse(text); - let text = parse_nested_placeholder(&mut replacements, text)?; let (mut fill, mut align, text) = parse_fill_and_align(text); - let text = parse_nested_placeholder(&mut replacements, text)?; let (sign, text) = FormatSign::parse(text); - let text = parse_nested_placeholder(&mut replacements, text)?; let (alternate_form, text) = parse_alternate_form(text); - let text = parse_nested_placeholder(&mut replacements, text)?; let (zero, text) = parse_zero(text); - let text = parse_nested_placeholder(&mut replacements, text)?; let (width, text) = parse_number(text)?; - let text = parse_nested_placeholder(&mut replacements, text)?; let (grouping_option, text) = FormatGrouping::parse(text); - let text = parse_nested_placeholder(&mut replacements, text)?; let (precision, text) = parse_precision(text)?; - let text = parse_nested_placeholder(&mut replacements, text)?; let (format_type, _text) = if text.is_empty() { (None, text) @@ -376,7 +388,7 @@ impl FormatSpec { align = align.or(Some(FormatAlign::AfterSign)); } - Ok(FormatSpec { + Ok(FormatSpec::Static(StaticFormatSpec { conversion, fill, align, @@ -386,12 +398,7 @@ impl FormatSpec { grouping_option, precision, format_type, - replacements, - }) - } - - pub fn replacements(&self) -> &[FormatPart] { - return self.replacements.as_slice(); + })) } } @@ -437,7 +444,7 @@ impl std::fmt::Display for FormatParseError { std::write!(fmt, "unescaped start bracket in literal") } Self::PlaceholderRecursionExceeded => { - std::write!(fmt, "multiply nested replacement not allowed") + std::write!(fmt, "multiply nested placeholder not allowed") } Self::UnknownConversion => { std::write!(fmt, "unknown conversion") @@ -730,7 +737,7 @@ mod tests { #[test] fn test_width_only() { - let expected = Ok(FormatSpec { + let expected = Ok(FormatSpec::Static(StaticFormatSpec { conversion: None, fill: None, align: None, @@ -740,14 +747,13 @@ mod tests { grouping_option: None, precision: None, format_type: None, - replacements: vec![], - }); + })); assert_eq!(FormatSpec::parse("33"), expected); } #[test] fn test_fill_and_width() { - let expected = Ok(FormatSpec { + let expected = Ok(FormatSpec::Static(StaticFormatSpec { conversion: None, fill: Some('<'), align: Some(FormatAlign::Right), @@ -757,45 +763,26 @@ mod tests { grouping_option: None, precision: None, format_type: None, - replacements: vec![], - }); + })); assert_eq!(FormatSpec::parse("<>33"), expected); } #[test] fn test_format_part() { - let expected = Ok(FormatSpec { - conversion: None, - fill: None, - align: None, - sign: None, - alternate_form: false, - width: None, - grouping_option: None, - precision: None, - format_type: None, - replacements: vec![FormatPart::Field { + let expected = Ok(FormatSpec::Dynamic(DynamicFormatSpec { + placeholders: vec![FormatPart::Field { field_name: "x".to_string(), conversion_spec: None, format_spec: String::new(), }], - }); + })); assert_eq!(FormatSpec::parse("{x}"), expected); } #[test] - fn test_format_parts() { - let expected = Ok(FormatSpec { - conversion: None, - fill: None, - align: None, - sign: None, - alternate_form: false, - width: None, - grouping_option: None, - precision: None, - format_type: None, - replacements: vec![ + fn test_dynamic_format_spec() { + let expected = Ok(FormatSpec::Dynamic(DynamicFormatSpec { + placeholders: vec![ FormatPart::Field { field_name: "x".to_string(), conversion_spec: None, @@ -812,34 +799,25 @@ mod tests { format_spec: String::new(), }, ], - }); + })); assert_eq!(FormatSpec::parse("{x}{y:<2}{z}"), expected); } #[test] - fn test_format_part_with_others() { - let expected = Ok(FormatSpec { - conversion: None, - fill: None, - align: Some(FormatAlign::Left), - sign: None, - alternate_form: false, - width: Some(20), - grouping_option: None, - precision: None, - format_type: Some(FormatType::Binary), - replacements: vec![FormatPart::Field { + fn test_dynamic_format_spec_with_others() { + let expected = Ok(FormatSpec::Dynamic(DynamicFormatSpec { + placeholders: vec![FormatPart::Field { field_name: "x".to_string(), conversion_spec: None, format_spec: String::new(), }], - }); + })); assert_eq!(FormatSpec::parse("<{x}20b"), expected); } #[test] fn test_all() { - let expected = Ok(FormatSpec { + let expected = Ok(FormatSpec::Static(StaticFormatSpec { conversion: None, fill: Some('<'), align: Some(FormatAlign::Right), @@ -849,8 +827,7 @@ mod tests { grouping_option: Some(FormatGrouping::Comma), precision: Some(11), format_type: Some(FormatType::Binary), - replacements: vec![], - }); + })); assert_eq!(FormatSpec::parse("<>-#23,.11b"), expected); } @@ -877,7 +854,7 @@ mod tests { } #[test] - fn test_format_parse_nested_replacement() { + fn test_format_parse_nested_placeholder() { let expected = Ok(FormatString { format_parts: vec![ FormatPart::Literal("abcd".to_owned()), @@ -966,7 +943,15 @@ mod tests { ); assert_eq!( FormatSpec::parse("{}}"), - Err(FormatSpecError::InvalidFormatType) + // Note this should be an `InvalidFormatType` but we give up + // on all other parsing validation when we see a placeholder + Ok(FormatSpec::Dynamic(DynamicFormatSpec { + placeholders: vec![FormatPart::Field { + field_name: String::new(), + conversion_spec: None, + format_spec: String::new() + }] + })) ); assert_eq!( FormatSpec::parse("{{x}}"),