Misc. edits to script parsing (#5999)

This commit is contained in:
Charlie Marsh 2024-08-10 22:07:05 -04:00 committed by GitHub
parent 2d53e35e39
commit 5c44937742
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 59 additions and 39 deletions

View File

@ -23,10 +23,10 @@ pub struct Pep723Script {
pub path: PathBuf, pub path: PathBuf,
/// The parsed [`Pep723Metadata`] table from the script. /// The parsed [`Pep723Metadata`] table from the script.
pub metadata: Pep723Metadata, pub metadata: Pep723Metadata,
/// The content of the script after the metadata table.
pub raw: String,
/// The content of the script before the metadata table. /// The content of the script before the metadata table.
pub prelude: String, pub prelude: String,
/// The content of the script after the metadata table.
pub postlude: String,
} }
impl Pep723Script { impl Pep723Script {
@ -41,18 +41,23 @@ impl Pep723Script {
}; };
// Extract the `script` tag. // Extract the `script` tag.
let Some(script_tag) = ScriptTag::parse(&contents)? else { let Some(ScriptTag {
prelude,
metadata,
postlude,
}) = ScriptTag::parse(&contents)?
else {
return Ok(None); return Ok(None);
}; };
// Parse the metadata. // Parse the metadata.
let metadata = Pep723Metadata::from_str(&script_tag.metadata)?; let metadata = Pep723Metadata::from_str(&metadata)?;
Ok(Some(Self { Ok(Some(Self {
path: file.as_ref().to_path_buf(), path: file.as_ref().to_path_buf(),
metadata, metadata,
raw: script_tag.script, prelude,
prelude: script_tag.prelude, postlude,
})) }))
} }
@ -63,29 +68,25 @@ impl Pep723Script {
file: impl AsRef<Path>, file: impl AsRef<Path>,
requires_python: &VersionSpecifiers, requires_python: &VersionSpecifiers,
) -> Result<Self, Pep723Error> { ) -> Result<Self, Pep723Error> {
let contents = match fs_err::tokio::read(&file).await { let contents = fs_err::tokio::read(&file).await?;
Ok(contents) => contents,
Err(err) => return Err(err.into()),
};
// Extract the `script` tag. // Define the default metadata.
let default_metadata = indoc::formatdoc! {r#" let default_metadata = indoc::formatdoc! {r#"
requires-python = "{requires_python}" requires-python = "{requires_python}"
dependencies = [] dependencies = []
"#, "#,
requires_python = requires_python, requires_python = requires_python,
}; };
let (prelude, raw) = extract_shebang(&contents)?;
// Parse the metadata.
let metadata = Pep723Metadata::from_str(&default_metadata)?; let metadata = Pep723Metadata::from_str(&default_metadata)?;
// Extract the shebang and script content.
let (prelude, postlude) = extract_shebang(&contents)?;
Ok(Self { Ok(Self {
path: file.as_ref().to_path_buf(), path: file.as_ref().to_path_buf(),
prelude: prelude.unwrap_or_default(), prelude,
metadata, metadata,
raw, postlude,
}) })
} }
@ -99,7 +100,7 @@ impl Pep723Script {
format!("{}\n", self.prelude) format!("{}\n", self.prelude)
}, },
serialize_metadata(metadata), serialize_metadata(metadata),
self.raw self.postlude
); );
Ok(fs_err::tokio::write(&self.path, content).await?) Ok(fs_err::tokio::write(&self.path, content).await?)
@ -126,7 +127,7 @@ impl FromStr for Pep723Metadata {
/// Parse `Pep723Metadata` from a raw TOML string. /// Parse `Pep723Metadata` from a raw TOML string.
fn from_str(raw: &str) -> Result<Self, Self::Err> { fn from_str(raw: &str) -> Result<Self, Self::Err> {
let metadata = toml::from_str(raw)?; let metadata = toml::from_str(raw)?;
Ok(Pep723Metadata { Ok(Self {
raw: raw.to_string(), raw: raw.to_string(),
..metadata ..metadata
}) })
@ -166,7 +167,7 @@ struct ScriptTag {
/// The metadata block. /// The metadata block.
metadata: String, metadata: String,
/// The content of the script after the metadata block. /// The content of the script after the metadata block.
script: String, postlude: String,
} }
impl ScriptTag { impl ScriptTag {
@ -195,7 +196,7 @@ impl ScriptTag {
/// ///
/// - Preamble: `#!/usr/bin/env python3\n` /// - Preamble: `#!/usr/bin/env python3\n`
/// - Metadata: `requires-python = '>=3.11'\ndependencies = [\n 'requests<3',\n 'rich',\n]` /// - Metadata: `requires-python = '>=3.11'\ndependencies = [\n 'requests<3',\n 'rich',\n]`
/// - Script: `import requests\n\nprint("Hello, World!")\n` /// - Postlude: `import requests\n\nprint("Hello, World!")\n`
/// ///
/// See: <https://peps.python.org/pep-0723/> /// See: <https://peps.python.org/pep-0723/>
fn parse(contents: &[u8]) -> Result<Option<Self>, Pep723Error> { fn parse(contents: &[u8]) -> Result<Option<Self>, Pep723Error> {
@ -292,40 +293,58 @@ impl ScriptTag {
// Join the lines into a single string. // Join the lines into a single string.
let prelude = prelude.to_string(); let prelude = prelude.to_string();
let metadata = toml.join("\n") + "\n"; let metadata = toml.join("\n") + "\n";
let script = python_script.join("\n") + "\n"; let postlude = python_script.join("\n") + "\n";
Ok(Some(Self { Ok(Some(Self {
prelude, prelude,
metadata, metadata,
script, postlude,
})) }))
} }
} }
/// Extracts the shebang line from the given file contents and returns it along with the remaining /// Extracts the shebang line from the given file contents and returns it along with the remaining
/// content. /// content.
fn extract_shebang(contents: &[u8]) -> Result<(Option<String>, String), Pep723Error> { fn extract_shebang(contents: &[u8]) -> Result<(String, String), Pep723Error> {
let contents = std::str::from_utf8(contents)?; let contents = std::str::from_utf8(contents)?;
let mut lines = contents.lines(); if contents.starts_with("#!") {
// Find the first newline.
let bytes = contents.as_bytes();
let index = bytes
.iter()
.position(|&b| b == b'\r' || b == b'\n')
.unwrap_or(bytes.len());
// Check the first line for a shebang // Support `\r`, `\n`, and `\r\n` line endings.
if let Some(first_line) = lines.next() { let width = match bytes.get(index) {
if first_line.starts_with("#!") { Some(b'\r') => {
let shebang = first_line.to_string(); if bytes.get(index + 1) == Some(&b'\n') {
let remaining_content: String = lines.collect::<Vec<&str>>().join("\n"); 2
return Ok((Some(shebang), remaining_content)); } else {
} 1
}
}
Some(b'\n') => 1,
_ => 0,
};
// Extract the shebang line.
let shebang = contents[..index].to_string();
let script = contents[index + width..].to_string();
Ok((shebang, script))
} else {
Ok((String::new(), contents.to_string()))
} }
Ok((None, contents.to_string()))
} }
/// Formats the provided metadata by prefixing each line with `#` and wrapping it with script markers. /// Formats the provided metadata by prefixing each line with `#` and wrapping it with script markers.
fn serialize_metadata(metadata: &str) -> String { fn serialize_metadata(metadata: &str) -> String {
let mut output = String::with_capacity(metadata.len() + 2); let mut output = String::with_capacity(metadata.len() + 32);
output.push_str("# /// script\n"); output.push_str("# /// script");
output.push('\n');
for line in metadata.lines() { for line in metadata.lines() {
if line.is_empty() { if line.is_empty() {
@ -337,7 +356,8 @@ fn serialize_metadata(metadata: &str) -> String {
} }
} }
output.push_str("# ///\n"); output.push_str("# ///");
output.push('\n');
output output
} }
@ -427,7 +447,7 @@ mod tests {
assert_eq!(actual.prelude, String::new()); assert_eq!(actual.prelude, String::new());
assert_eq!(actual.metadata, expected_metadata); assert_eq!(actual.metadata, expected_metadata);
assert_eq!(actual.script, expected_data); assert_eq!(actual.postlude, expected_data);
} }
#[test] #[test]
@ -470,7 +490,7 @@ mod tests {
assert_eq!(actual.prelude, "#!/usr/bin/env python3\n".to_string()); assert_eq!(actual.prelude, "#!/usr/bin/env python3\n".to_string());
assert_eq!(actual.metadata, expected_metadata); assert_eq!(actual.metadata, expected_metadata);
assert_eq!(actual.script, expected_data); assert_eq!(actual.postlude, expected_data);
} }
#[test] #[test]
fn embedded_comment() { fn embedded_comment() {