From 33184dc6a4019ab5dabc8542e060540188faaddc Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Wed, 14 Feb 2024 17:14:28 +0100 Subject: [PATCH] Extract `AnyString` nodes from `string/mod` --- .../ruff_python_formatter/src/string/any.rs | 210 ++++++++++++++++++ .../ruff_python_formatter/src/string/mod.rs | 210 +----------------- 2 files changed, 214 insertions(+), 206 deletions(-) create mode 100644 crates/ruff_python_formatter/src/string/any.rs diff --git a/crates/ruff_python_formatter/src/string/any.rs b/crates/ruff_python_formatter/src/string/any.rs new file mode 100644 index 0000000000..c4034c43e6 --- /dev/null +++ b/crates/ruff_python_formatter/src/string/any.rs @@ -0,0 +1,210 @@ +use crate::expression::expr_f_string::f_string_quoting; +use crate::other::f_string::FormatFString; +use crate::other::string_literal::{FormatStringLiteral, StringLiteralKind}; +use crate::string::{Quoting, StringPrefix, StringQuotes}; +use crate::{AsFormat, PyFormatContext, PyFormatter}; +use memchr::memchr2; +use ruff_formatter::{Format, FormatResult}; +use ruff_python_ast::{ + self as ast, AnyNodeRef, Expr, ExprBytesLiteral, ExprFString, ExprStringLiteral, ExpressionRef, + StringLiteral, +}; +use ruff_source_file::Locator; +use ruff_text_size::{Ranged, TextLen, TextRange}; +use std::iter::FusedIterator; + +/// Represents any kind of string expression. This could be either a string, +/// bytes or f-string. +#[derive(Copy, Clone, Debug)] +pub(crate) enum AnyString<'a> { + String(&'a ExprStringLiteral), + Bytes(&'a ExprBytesLiteral), + FString(&'a ExprFString), +} + +impl<'a> AnyString<'a> { + /// Creates a new [`AnyString`] from the given [`Expr`]. + /// + /// Returns `None` if the expression is not either a string, bytes or f-string. + pub(crate) fn from_expression(expression: &'a Expr) -> Option> { + match expression { + Expr::StringLiteral(string) => Some(AnyString::String(string)), + Expr::BytesLiteral(bytes) => Some(AnyString::Bytes(bytes)), + Expr::FString(fstring) => Some(AnyString::FString(fstring)), + _ => None, + } + } + + /// Returns `true` if the string is implicitly concatenated. + pub(crate) fn is_implicit_concatenated(self) -> bool { + match self { + Self::String(ExprStringLiteral { value, .. }) => value.is_implicit_concatenated(), + Self::Bytes(ExprBytesLiteral { value, .. }) => value.is_implicit_concatenated(), + Self::FString(ExprFString { value, .. }) => value.is_implicit_concatenated(), + } + } + + /// Returns the quoting to be used for this string. + pub(super) fn quoting(self, locator: &Locator<'_>) -> Quoting { + match self { + Self::String(_) | Self::Bytes(_) => Quoting::CanChange, + Self::FString(f_string) => f_string_quoting(f_string, locator), + } + } + + /// Returns a vector of all the [`AnyStringPart`] of this string. + pub(super) fn parts(self, quoting: Quoting) -> AnyStringPartsIter<'a> { + match self { + Self::String(ExprStringLiteral { value, .. }) => { + AnyStringPartsIter::String(value.iter()) + } + Self::Bytes(ExprBytesLiteral { value, .. }) => AnyStringPartsIter::Bytes(value.iter()), + Self::FString(ExprFString { value, .. }) => { + AnyStringPartsIter::FString(value.iter(), quoting) + } + } + } + + pub(crate) fn is_multiline(self, source: &str) -> bool { + match self { + AnyString::String(_) | AnyString::Bytes(_) => { + let contents = &source[self.range()]; + let prefix = StringPrefix::parse(contents); + let quotes = StringQuotes::parse( + &contents[TextRange::new(prefix.text_len(), contents.text_len())], + ); + + quotes.is_some_and(StringQuotes::is_triple) + && memchr2(b'\n', b'\r', contents.as_bytes()).is_some() + } + AnyString::FString(fstring) => { + memchr2(b'\n', b'\r', source[fstring.range].as_bytes()).is_some() + } + } + } +} + +impl Ranged for AnyString<'_> { + fn range(&self) -> TextRange { + match self { + Self::String(expr) => expr.range(), + Self::Bytes(expr) => expr.range(), + Self::FString(expr) => expr.range(), + } + } +} + +impl<'a> From<&AnyString<'a>> for AnyNodeRef<'a> { + fn from(value: &AnyString<'a>) -> Self { + match value { + AnyString::String(expr) => AnyNodeRef::ExprStringLiteral(expr), + AnyString::Bytes(expr) => AnyNodeRef::ExprBytesLiteral(expr), + AnyString::FString(expr) => AnyNodeRef::ExprFString(expr), + } + } +} + +impl<'a> From> for AnyNodeRef<'a> { + fn from(value: AnyString<'a>) -> Self { + AnyNodeRef::from(&value) + } +} + +impl<'a> From<&AnyString<'a>> for ExpressionRef<'a> { + fn from(value: &AnyString<'a>) -> Self { + match value { + AnyString::String(expr) => ExpressionRef::StringLiteral(expr), + AnyString::Bytes(expr) => ExpressionRef::BytesLiteral(expr), + AnyString::FString(expr) => ExpressionRef::FString(expr), + } + } +} + +pub(super) enum AnyStringPartsIter<'a> { + String(std::slice::Iter<'a, StringLiteral>), + Bytes(std::slice::Iter<'a, ast::BytesLiteral>), + FString(std::slice::Iter<'a, ast::FStringPart>, Quoting), +} + +impl<'a> Iterator for AnyStringPartsIter<'a> { + type Item = AnyStringPart<'a>; + + fn next(&mut self) -> Option { + let part = match self { + Self::String(inner) => { + let part = inner.next()?; + AnyStringPart::String { + part, + layout: StringLiteralKind::String, + } + } + Self::Bytes(inner) => AnyStringPart::Bytes(inner.next()?), + Self::FString(inner, quoting) => { + let part = inner.next()?; + match part { + ast::FStringPart::Literal(string_literal) => AnyStringPart::String { + part: string_literal, + layout: StringLiteralKind::InImplicitlyConcatenatedFString(*quoting), + }, + ast::FStringPart::FString(f_string) => AnyStringPart::FString { + part: f_string, + quoting: *quoting, + }, + } + } + }; + + Some(part) + } +} + +impl FusedIterator for AnyStringPartsIter<'_> {} + +/// Represents any kind of string which is part of an implicitly concatenated +/// string. This could be either a string, bytes or f-string. +/// +/// This is constructed from the [`AnyString::parts`] method on [`AnyString`]. +#[derive(Clone, Debug)] +pub(super) enum AnyStringPart<'a> { + String { + part: &'a ast::StringLiteral, + layout: StringLiteralKind, + }, + Bytes(&'a ast::BytesLiteral), + FString { + part: &'a ast::FString, + quoting: Quoting, + }, +} + +impl<'a> From<&AnyStringPart<'a>> for AnyNodeRef<'a> { + fn from(value: &AnyStringPart<'a>) -> Self { + match value { + AnyStringPart::String { part, .. } => AnyNodeRef::StringLiteral(part), + AnyStringPart::Bytes(part) => AnyNodeRef::BytesLiteral(part), + AnyStringPart::FString { part, .. } => AnyNodeRef::FString(part), + } + } +} + +impl Ranged for AnyStringPart<'_> { + fn range(&self) -> TextRange { + match self { + Self::String { part, .. } => part.range(), + Self::Bytes(part) => part.range(), + Self::FString { part, .. } => part.range(), + } + } +} + +impl Format> for AnyStringPart<'_> { + fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> { + match self { + AnyStringPart::String { part, layout } => { + FormatStringLiteral::new(part, *layout).fmt(f) + } + AnyStringPart::Bytes(bytes_literal) => bytes_literal.format().fmt(f), + AnyStringPart::FString { part, quoting } => FormatFString::new(part, *quoting).fmt(f), + } + } +} diff --git a/crates/ruff_python_formatter/src/string/mod.rs b/crates/ruff_python_formatter/src/string/mod.rs index 047ae7cd36..a8b34bb6b0 100644 --- a/crates/ruff_python_formatter/src/string/mod.rs +++ b/crates/ruff_python_formatter/src/string/mod.rs @@ -1,26 +1,19 @@ use std::borrow::Cow; -use std::iter::FusedIterator; use bitflags::bitflags; -use memchr::memchr2; +pub(crate) use any::AnyString; use ruff_formatter::{format_args, write}; -use ruff_python_ast::{ - self as ast, Expr, ExprBytesLiteral, ExprFString, ExprStringLiteral, ExpressionRef, -}; -use ruff_python_ast::{AnyNodeRef, StringLiteral}; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; use crate::comments::{leading_comments, trailing_comments}; -use crate::expression::expr_f_string::f_string_quoting; use crate::expression::parentheses::in_parentheses_only_soft_line_break_or_space; -use crate::other::f_string::FormatFString; -use crate::other::string_literal::{FormatStringLiteral, StringLiteralKind}; use crate::prelude::*; use crate::preview::is_hex_codes_in_unicode_sequences_enabled; use crate::QuoteStyle; +mod any; pub(crate) mod docstring; #[derive(Copy, Clone, Debug, Default)] @@ -30,202 +23,6 @@ pub(crate) enum Quoting { Preserve, } -/// Represents any kind of string expression. This could be either a string, -/// bytes or f-string. -#[derive(Copy, Clone, Debug)] -pub(crate) enum AnyString<'a> { - String(&'a ExprStringLiteral), - Bytes(&'a ExprBytesLiteral), - FString(&'a ExprFString), -} - -impl<'a> AnyString<'a> { - /// Creates a new [`AnyString`] from the given [`Expr`]. - /// - /// Returns `None` if the expression is not either a string, bytes or f-string. - pub(crate) fn from_expression(expression: &'a Expr) -> Option> { - match expression { - Expr::StringLiteral(string) => Some(AnyString::String(string)), - Expr::BytesLiteral(bytes) => Some(AnyString::Bytes(bytes)), - Expr::FString(fstring) => Some(AnyString::FString(fstring)), - _ => None, - } - } - - /// Returns `true` if the string is implicitly concatenated. - pub(crate) fn is_implicit_concatenated(self) -> bool { - match self { - Self::String(ExprStringLiteral { value, .. }) => value.is_implicit_concatenated(), - Self::Bytes(ExprBytesLiteral { value, .. }) => value.is_implicit_concatenated(), - Self::FString(ExprFString { value, .. }) => value.is_implicit_concatenated(), - } - } - - /// Returns the quoting to be used for this string. - fn quoting(self, locator: &Locator<'_>) -> Quoting { - match self { - Self::String(_) | Self::Bytes(_) => Quoting::CanChange, - Self::FString(f_string) => f_string_quoting(f_string, locator), - } - } - - /// Returns a vector of all the [`AnyStringPart`] of this string. - fn parts(self, quoting: Quoting) -> AnyStringPartsIter<'a> { - match self { - Self::String(ExprStringLiteral { value, .. }) => { - AnyStringPartsIter::String(value.iter()) - } - Self::Bytes(ExprBytesLiteral { value, .. }) => AnyStringPartsIter::Bytes(value.iter()), - Self::FString(ExprFString { value, .. }) => { - AnyStringPartsIter::FString(value.iter(), quoting) - } - } - } - - pub(crate) fn is_multiline(self, source: &str) -> bool { - match self { - AnyString::String(_) | AnyString::Bytes(_) => { - let contents = &source[self.range()]; - let prefix = StringPrefix::parse(contents); - let quotes = StringQuotes::parse( - &contents[TextRange::new(prefix.text_len(), contents.text_len())], - ); - - quotes.is_some_and(StringQuotes::is_triple) - && memchr2(b'\n', b'\r', contents.as_bytes()).is_some() - } - AnyString::FString(fstring) => { - memchr2(b'\n', b'\r', source[fstring.range].as_bytes()).is_some() - } - } - } -} - -impl Ranged for AnyString<'_> { - fn range(&self) -> TextRange { - match self { - Self::String(expr) => expr.range(), - Self::Bytes(expr) => expr.range(), - Self::FString(expr) => expr.range(), - } - } -} - -impl<'a> From<&AnyString<'a>> for AnyNodeRef<'a> { - fn from(value: &AnyString<'a>) -> Self { - match value { - AnyString::String(expr) => AnyNodeRef::ExprStringLiteral(expr), - AnyString::Bytes(expr) => AnyNodeRef::ExprBytesLiteral(expr), - AnyString::FString(expr) => AnyNodeRef::ExprFString(expr), - } - } -} - -impl<'a> From> for AnyNodeRef<'a> { - fn from(value: AnyString<'a>) -> Self { - AnyNodeRef::from(&value) - } -} - -impl<'a> From<&AnyString<'a>> for ExpressionRef<'a> { - fn from(value: &AnyString<'a>) -> Self { - match value { - AnyString::String(expr) => ExpressionRef::StringLiteral(expr), - AnyString::Bytes(expr) => ExpressionRef::BytesLiteral(expr), - AnyString::FString(expr) => ExpressionRef::FString(expr), - } - } -} - -enum AnyStringPartsIter<'a> { - String(std::slice::Iter<'a, StringLiteral>), - Bytes(std::slice::Iter<'a, ast::BytesLiteral>), - FString(std::slice::Iter<'a, ast::FStringPart>, Quoting), -} - -impl<'a> Iterator for AnyStringPartsIter<'a> { - type Item = AnyStringPart<'a>; - - fn next(&mut self) -> Option { - let part = match self { - Self::String(inner) => { - let part = inner.next()?; - AnyStringPart::String { - part, - layout: StringLiteralKind::String, - } - } - Self::Bytes(inner) => AnyStringPart::Bytes(inner.next()?), - Self::FString(inner, quoting) => { - let part = inner.next()?; - match part { - ast::FStringPart::Literal(string_literal) => AnyStringPart::String { - part: string_literal, - layout: StringLiteralKind::InImplicitlyConcatenatedFString(*quoting), - }, - ast::FStringPart::FString(f_string) => AnyStringPart::FString { - part: f_string, - quoting: *quoting, - }, - } - } - }; - - Some(part) - } -} - -impl FusedIterator for AnyStringPartsIter<'_> {} - -/// Represents any kind of string which is part of an implicitly concatenated -/// string. This could be either a string, bytes or f-string. -/// -/// This is constructed from the [`AnyString::parts`] method on [`AnyString`]. -#[derive(Clone, Debug)] -enum AnyStringPart<'a> { - String { - part: &'a ast::StringLiteral, - layout: StringLiteralKind, - }, - Bytes(&'a ast::BytesLiteral), - FString { - part: &'a ast::FString, - quoting: Quoting, - }, -} - -impl<'a> From<&AnyStringPart<'a>> for AnyNodeRef<'a> { - fn from(value: &AnyStringPart<'a>) -> Self { - match value { - AnyStringPart::String { part, .. } => AnyNodeRef::StringLiteral(part), - AnyStringPart::Bytes(part) => AnyNodeRef::BytesLiteral(part), - AnyStringPart::FString { part, .. } => AnyNodeRef::FString(part), - } - } -} - -impl Ranged for AnyStringPart<'_> { - fn range(&self) -> TextRange { - match self { - Self::String { part, .. } => part.range(), - Self::Bytes(part) => part.range(), - Self::FString { part, .. } => part.range(), - } - } -} - -impl Format> for AnyStringPart<'_> { - fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> { - match self { - AnyStringPart::String { part, layout } => { - FormatStringLiteral::new(part, *layout).fmt(f) - } - AnyStringPart::Bytes(bytes_literal) => bytes_literal.format().fmt(f), - AnyStringPart::FString { part, quoting } => FormatFString::new(part, *quoting).fmt(f), - } - } -} - /// Formats any implicitly concatenated string. This could be any valid combination /// of string, bytes or f-string literals. pub(crate) struct FormatStringContinuation<'a> { @@ -1057,9 +854,10 @@ impl UnicodeEscape { #[cfg(test)] mod tests { - use crate::string::{normalize_string, QuoteChar, StringPrefix, StringQuotes, UnicodeEscape}; use std::borrow::Cow; + use crate::string::{normalize_string, QuoteChar, StringPrefix, StringQuotes, UnicodeEscape}; + #[test] fn normalize_32_escape() { let escape_sequence = UnicodeEscape::new('U', true).unwrap();