mirror of https://github.com/astral-sh/uv
Avoid lookaheads in `sysconfig` parser (#9879)
## Summary Based on some review feedback from https://github.com/astral-sh/uv/pull/9857.
This commit is contained in:
parent
08ea79cc33
commit
53dfe0de52
|
|
@ -4,7 +4,7 @@ use std::str::FromStr;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use serde_json::ser::PrettyFormatter;
|
use serde_json::ser::PrettyFormatter;
|
||||||
|
|
||||||
use crate::sysconfig::cursor::{Cursor, EOF_CHAR};
|
use crate::sysconfig::cursor::Cursor;
|
||||||
|
|
||||||
/// A value in the [`SysconfigData`] map.
|
/// A value in the [`SysconfigData`] map.
|
||||||
///
|
///
|
||||||
|
|
@ -106,7 +106,11 @@ impl FromStr for SysconfigData {
|
||||||
|
|
||||||
let mut map = BTreeMap::new();
|
let mut map = BTreeMap::new();
|
||||||
loop {
|
loop {
|
||||||
match cursor.first() {
|
let Some(next) = cursor.bump() else {
|
||||||
|
return Err(Error::UnexpectedEof);
|
||||||
|
};
|
||||||
|
|
||||||
|
match next {
|
||||||
'\'' | '"' => {
|
'\'' | '"' => {
|
||||||
// Parse key.
|
// Parse key.
|
||||||
let key = parse_string(&mut cursor)?;
|
let key = parse_string(&mut cursor)?;
|
||||||
|
|
@ -136,13 +140,10 @@ impl FromStr for SysconfigData {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip whitespace.
|
// Skip whitespace.
|
||||||
' ' | '\n' | '\r' | '\t' => {
|
' ' | '\n' | '\r' | '\t' => {}
|
||||||
cursor.bump();
|
|
||||||
}
|
|
||||||
|
|
||||||
// When we see a closing brace, we're done.
|
// When we see a closing brace, we're done.
|
||||||
'}' => {
|
'}' => {
|
||||||
cursor.bump();
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -155,23 +156,24 @@ impl FromStr for SysconfigData {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse a Python string literal.
|
/// Parse a Python string literal.
|
||||||
|
///
|
||||||
|
/// Expects the previous character to be the opening quote character.
|
||||||
fn parse_string(cursor: &mut Cursor) -> Result<String, Error> {
|
fn parse_string(cursor: &mut Cursor) -> Result<String, Error> {
|
||||||
let quote = cursor.bump().expect("Expected opening quote");
|
let quote = cursor.previous();
|
||||||
assert!(quote == '\'' || quote == '"', "Invalid quote character");
|
assert!(quote == '\'' || quote == '"', "Invalid quote character");
|
||||||
|
|
||||||
let mut result = String::new();
|
let mut result = String::new();
|
||||||
loop {
|
loop {
|
||||||
if cursor.first() == EOF_CHAR {
|
let Some(c) = cursor.bump() else {
|
||||||
return Err(Error::UnexpectedCharacter(EOF_CHAR));
|
return Err(Error::UnexpectedEof);
|
||||||
}
|
};
|
||||||
|
match c {
|
||||||
|
'\\' => {
|
||||||
// Handle escaped quotes.
|
// Handle escaped quotes.
|
||||||
if cursor.first() == '\\' {
|
if cursor.first() == quote {
|
||||||
// Consume the backslash.
|
// Consume the backslash.
|
||||||
cursor.bump();
|
cursor.bump();
|
||||||
if cursor.first() == quote {
|
|
||||||
result.push(quote);
|
result.push(quote);
|
||||||
cursor.bump();
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -179,53 +181,56 @@ fn parse_string(cursor: &mut Cursor) -> Result<String, Error> {
|
||||||
result.push('\\');
|
result.push('\\');
|
||||||
result.push(cursor.first());
|
result.push(cursor.first());
|
||||||
cursor.bump();
|
cursor.bump();
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Consume closing quote.
|
// Consume closing quote.
|
||||||
if cursor.first() == quote {
|
c if c == quote => {
|
||||||
cursor.bump();
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
result.push(cursor.first());
|
c => {
|
||||||
cursor.bump();
|
result.push(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse a Python string, which may be a concatenation of multiple string literals.
|
/// Parse a Python string, which may be a concatenation of multiple string literals.
|
||||||
|
///
|
||||||
|
/// Expects the cursor to start at an opening quote character.
|
||||||
fn parse_concatenated_string(cursor: &mut Cursor) -> Result<String, Error> {
|
fn parse_concatenated_string(cursor: &mut Cursor) -> Result<String, Error> {
|
||||||
let mut result = String::new();
|
let mut result = String::new();
|
||||||
loop {
|
loop {
|
||||||
let c = cursor.first();
|
let Some(c) = cursor.bump() else {
|
||||||
if c == EOF_CHAR {
|
return Err(Error::UnexpectedEof);
|
||||||
break;
|
};
|
||||||
}
|
match c {
|
||||||
if c == '\'' || c == '"' {
|
'\'' | '"' => {
|
||||||
// Parse a new string fragment and append it.
|
// Parse a new string fragment and append it.
|
||||||
result.push_str(&parse_string(cursor)?);
|
result.push_str(&parse_string(cursor)?);
|
||||||
} else if is_python_whitespace(c) {
|
}
|
||||||
|
c if is_python_whitespace(c) => {
|
||||||
// Skip whitespace between fragments
|
// Skip whitespace between fragments
|
||||||
cursor.bump();
|
}
|
||||||
} else if c == ',' || c == '}' {
|
c => return Err(Error::UnexpectedCharacter(c)),
|
||||||
// End of value.
|
}
|
||||||
|
|
||||||
|
// Lookahead to the end of the string.
|
||||||
|
if matches!(cursor.first(), ',' | '}') {
|
||||||
break;
|
break;
|
||||||
} else {
|
|
||||||
return Err(Error::UnexpectedCharacter(c));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse an integer literal.
|
/// Parse an integer literal.
|
||||||
|
///
|
||||||
|
/// Expects the cursor to start at the first digit of the integer.
|
||||||
fn parse_int(cursor: &mut Cursor) -> Result<i32, std::num::ParseIntError> {
|
fn parse_int(cursor: &mut Cursor) -> Result<i32, std::num::ParseIntError> {
|
||||||
let mut result = String::new();
|
let mut result = String::new();
|
||||||
loop {
|
loop {
|
||||||
let c = cursor.first();
|
let c = cursor.first();
|
||||||
if c == EOF_CHAR {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if !c.is_ascii_digit() {
|
if !c.is_ascii_digit() {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -251,6 +256,8 @@ pub enum Error {
|
||||||
MissingOpenBrace,
|
MissingOpenBrace,
|
||||||
#[error("Unexpected character: {0}")]
|
#[error("Unexpected character: {0}")]
|
||||||
UnexpectedCharacter(char),
|
UnexpectedCharacter(char),
|
||||||
|
#[error("Unexpected end of file")]
|
||||||
|
UnexpectedEof,
|
||||||
#[error("Failed to parse integer")]
|
#[error("Failed to parse integer")]
|
||||||
ParseInt(#[from] std::num::ParseIntError),
|
ParseInt(#[from] std::num::ParseIntError),
|
||||||
#[error("`_sysconfigdata_` is missing a header comment")]
|
#[error("`_sysconfigdata_` is missing a header comment")]
|
||||||
|
|
@ -289,6 +296,32 @@ mod tests {
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_trailing_comma() {
|
||||||
|
let input = indoc::indoc!(
|
||||||
|
r#"
|
||||||
|
# system configuration generated and used by the sysconfig module
|
||||||
|
build_time_vars = {
|
||||||
|
"key1": "value1",
|
||||||
|
"key2": 42,
|
||||||
|
"key3": "multi-part" " string",
|
||||||
|
}
|
||||||
|
"#
|
||||||
|
);
|
||||||
|
|
||||||
|
let result = input.parse::<SysconfigData>().expect("Parsing failed");
|
||||||
|
let snapshot = result.to_string_pretty().unwrap();
|
||||||
|
|
||||||
|
insta::assert_snapshot!(snapshot, @r###"
|
||||||
|
# system configuration generated and used by the sysconfig module
|
||||||
|
build_time_vars = {
|
||||||
|
"key1": "value1",
|
||||||
|
"key2": 42,
|
||||||
|
"key3": "multi-part string"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_parse_integer_values() {
|
fn test_parse_integer_values() {
|
||||||
let input = indoc::indoc!(
|
let input = indoc::indoc!(
|
||||||
|
|
@ -407,4 +440,39 @@ mod tests {
|
||||||
"Expected parsing to fail due to unexpected character"
|
"Expected parsing to fail due to unexpected character"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_unexpected_eof() {
|
||||||
|
let input = indoc::indoc!(
|
||||||
|
r#"
|
||||||
|
# system configuration generated and used by the sysconfig module
|
||||||
|
build_time_vars = {
|
||||||
|
"key1": 123
|
||||||
|
"#
|
||||||
|
);
|
||||||
|
|
||||||
|
let result = input.parse::<SysconfigData>();
|
||||||
|
assert!(
|
||||||
|
result.is_err(),
|
||||||
|
"Expected parsing to fail due to unexpected character"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_unexpected_comma() {
|
||||||
|
let input = indoc::indoc!(
|
||||||
|
r#"
|
||||||
|
# system configuration generated and used by the sysconfig module
|
||||||
|
build_time_vars = {
|
||||||
|
"key1": 123,,
|
||||||
|
}
|
||||||
|
"#
|
||||||
|
);
|
||||||
|
|
||||||
|
let result = input.parse::<SysconfigData>();
|
||||||
|
assert!(
|
||||||
|
result.is_err(),
|
||||||
|
"Expected parsing to fail due to unexpected character"
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue