Files
ruff/src/vendored/format.rs
2022-11-24 18:09:36 -05:00

361 lines
11 KiB
Rust

//! Vendored from [format.rs in rustpython-vm](https://github.com/RustPython/RustPython/blob/f54b5556e28256763c5506813ea977c9e1445af0/vm/src/format.rs).
//! The only changes we make are to remove dead code and code involving the vm.
use itertools::{Itertools, PeekingNext};
#[derive(Debug, PartialEq)]
pub(crate) enum FormatParseError {
UnmatchedBracket,
MissingStartBracket,
UnescapedStartBracketInLiteral,
InvalidFormatSpecifier,
UnknownConversion,
EmptyAttribute,
MissingRightBracket,
InvalidCharacterAfterRightBracket,
}
#[derive(Debug, PartialEq)]
pub(crate) enum FieldNamePart {
Attribute(String),
Index(usize),
StringIndex(String),
}
impl FieldNamePart {
fn parse_part(
chars: &mut impl PeekingNext<Item = char>,
) -> Result<Option<FieldNamePart>, FormatParseError> {
chars
.next()
.map(|ch| match ch {
'.' => {
let mut attribute = String::new();
for ch in chars.peeking_take_while(|ch| *ch != '.' && *ch != '[') {
attribute.push(ch);
}
if attribute.is_empty() {
Err(FormatParseError::EmptyAttribute)
} else {
Ok(FieldNamePart::Attribute(attribute))
}
}
'[' => {
let mut index = String::new();
for ch in chars {
if ch == ']' {
return if index.is_empty() {
Err(FormatParseError::EmptyAttribute)
} else if let Ok(index) = index.parse::<usize>() {
Ok(FieldNamePart::Index(index))
} else {
Ok(FieldNamePart::StringIndex(index))
};
}
index.push(ch);
}
Err(FormatParseError::MissingRightBracket)
}
_ => Err(FormatParseError::InvalidCharacterAfterRightBracket),
})
.transpose()
}
}
#[derive(Debug, PartialEq)]
pub(crate) enum FieldType {
Auto,
Index(usize),
Keyword(String),
}
#[derive(Debug, PartialEq)]
pub(crate) struct FieldName {
pub field_type: FieldType,
pub parts: Vec<FieldNamePart>,
}
impl FieldName {
pub(crate) fn parse(text: &str) -> Result<FieldName, FormatParseError> {
let mut chars = text.chars().peekable();
let mut first = String::new();
for ch in chars.peeking_take_while(|ch| *ch != '.' && *ch != '[') {
first.push(ch);
}
let field_type = if first.is_empty() {
FieldType::Auto
} else if let Ok(index) = first.parse::<usize>() {
FieldType::Index(index)
} else {
FieldType::Keyword(first)
};
let mut parts = Vec::new();
while let Some(part) = FieldNamePart::parse_part(&mut chars)? {
parts.push(part);
}
Ok(FieldName { field_type, parts })
}
}
#[derive(Debug, PartialEq)]
pub(crate) enum FormatPart {
Field {
field_name: String,
preconversion_spec: Option<char>,
format_spec: String,
},
Literal(String),
}
#[derive(Debug, PartialEq)]
pub(crate) struct FormatString {
pub format_parts: Vec<FormatPart>,
}
impl FormatString {
fn parse_literal_single(text: &str) -> Result<(char, &str), FormatParseError> {
let mut chars = text.chars();
// This should never be called with an empty str
let first_char = chars.next().unwrap();
// isn't this detectable only with bytes operation?
if first_char == '{' || first_char == '}' {
let maybe_next_char = chars.next();
// if we see a bracket, it has to be escaped by doubling up to be in a literal
return if maybe_next_char.is_none() || maybe_next_char.unwrap() != first_char {
Err(FormatParseError::UnescapedStartBracketInLiteral)
} else {
Ok((first_char, chars.as_str()))
};
}
Ok((first_char, chars.as_str()))
}
fn parse_literal(text: &str) -> Result<(FormatPart, &str), FormatParseError> {
let mut cur_text = text;
let mut result_string = String::new();
while !cur_text.is_empty() {
match FormatString::parse_literal_single(cur_text) {
Ok((next_char, remaining)) => {
result_string.push(next_char);
cur_text = remaining;
}
Err(err) => {
return if result_string.is_empty() {
Err(err)
} else {
Ok((FormatPart::Literal(result_string), cur_text))
};
}
}
}
Ok((FormatPart::Literal(result_string), ""))
}
fn parse_part_in_brackets(text: &str) -> Result<FormatPart, FormatParseError> {
let parts: Vec<&str> = text.splitn(2, ':').collect();
// before the comma is a keyword or arg index, after the comma is maybe a spec.
let arg_part = parts[0];
let format_spec = if parts.len() > 1 {
parts[1].to_owned()
} else {
String::new()
};
// On parts[0] can still be the preconversor (!r, !s, !a)
let parts: Vec<&str> = arg_part.splitn(2, '!').collect();
// before the bang is a keyword or arg index, after the comma is maybe a
// conversor spec.
let arg_part = parts[0];
let preconversion_spec = parts
.get(1)
.map(|conversion| {
// conversions are only every one character
conversion
.chars()
.exactly_one()
.map_err(|_| FormatParseError::UnknownConversion)
})
.transpose()?;
Ok(FormatPart::Field {
field_name: arg_part.to_owned(),
preconversion_spec,
format_spec,
})
}
fn parse_spec(text: &str) -> Result<(FormatPart, &str), FormatParseError> {
let mut nested = false;
let mut end_bracket_pos = None;
let mut left = String::new();
// There may be one layer nesting brackets in spec
for (idx, c) in text.chars().enumerate() {
if idx == 0 {
if c != '{' {
return Err(FormatParseError::MissingStartBracket);
}
} else if c == '{' {
if nested {
return Err(FormatParseError::InvalidFormatSpecifier);
}
nested = true;
left.push(c);
continue;
} else if c == '}' {
if nested {
nested = false;
left.push(c);
continue;
}
end_bracket_pos = Some(idx);
break;
} else {
left.push(c);
}
}
if let Some(pos) = end_bracket_pos {
let (_, right) = text.split_at(pos);
let format_part = FormatString::parse_part_in_brackets(&left)?;
Ok((format_part, &right[1..]))
} else {
Err(FormatParseError::UnmatchedBracket)
}
}
}
pub(crate) trait FromTemplate<'a>: Sized {
type Err;
fn from_str(s: &'a str) -> Result<Self, Self::Err>;
}
impl<'a> FromTemplate<'a> for FormatString {
type Err = FormatParseError;
fn from_str(text: &'a str) -> Result<Self, Self::Err> {
let mut cur_text: &str = text;
let mut parts: Vec<FormatPart> = Vec::new();
while !cur_text.is_empty() {
// Try to parse both literals and bracketed format parts until we
// run out of text
cur_text = FormatString::parse_literal(cur_text)
.or_else(|_| FormatString::parse_spec(cur_text))
.map(|(part, new_text)| {
parts.push(part);
new_text
})?;
}
Ok(FormatString {
format_parts: parts,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_format_parse() {
let expected = Ok(FormatString {
format_parts: vec![
FormatPart::Literal("abcd".to_owned()),
FormatPart::Field {
field_name: "1".to_owned(),
preconversion_spec: None,
format_spec: String::new(),
},
FormatPart::Literal(":".to_owned()),
FormatPart::Field {
field_name: "key".to_owned(),
preconversion_spec: None,
format_spec: String::new(),
},
],
});
assert_eq!(FormatString::from_str("abcd{1}:{key}"), expected);
}
#[test]
fn test_format_parse_fail() {
assert_eq!(
FormatString::from_str("{s"),
Err(FormatParseError::UnmatchedBracket)
);
}
#[test]
fn test_format_parse_escape() {
let expected = Ok(FormatString {
format_parts: vec![
FormatPart::Literal("{".to_owned()),
FormatPart::Field {
field_name: "key".to_owned(),
preconversion_spec: None,
format_spec: String::new(),
},
FormatPart::Literal("}ddfe".to_owned()),
],
});
assert_eq!(FormatString::from_str("{{{key}}}ddfe"), expected);
}
#[test]
fn test_parse_field_name() {
assert_eq!(
FieldName::parse(""),
Ok(FieldName {
field_type: FieldType::Auto,
parts: Vec::new(),
})
);
assert_eq!(
FieldName::parse("0"),
Ok(FieldName {
field_type: FieldType::Index(0),
parts: Vec::new(),
})
);
assert_eq!(
FieldName::parse("key"),
Ok(FieldName {
field_type: FieldType::Keyword("key".to_owned()),
parts: Vec::new(),
})
);
assert_eq!(
FieldName::parse("key.attr[0][string]"),
Ok(FieldName {
field_type: FieldType::Keyword("key".to_owned()),
parts: vec![
FieldNamePart::Attribute("attr".to_owned()),
FieldNamePart::Index(0),
FieldNamePart::StringIndex("string".to_owned())
],
})
);
assert_eq!(
FieldName::parse("key.."),
Err(FormatParseError::EmptyAttribute)
);
assert_eq!(
FieldName::parse("key[]"),
Err(FormatParseError::EmptyAttribute)
);
assert_eq!(
FieldName::parse("key["),
Err(FormatParseError::MissingRightBracket)
);
assert_eq!(
FieldName::parse("key[0]after"),
Err(FormatParseError::InvalidCharacterAfterRightBracket)
);
}
}