[red-knot] Reduce allocations when normalizing `VendoredPath`s (#11992)

This commit is contained in:
Alex Waygood 2024-06-24 13:08:01 +01:00 committed by GitHub
parent e2e98d005c
commit cd2af3be73
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 63 additions and 31 deletions

View File

@ -1,3 +1,4 @@
use std::borrow::Cow;
use std::cell::RefCell; use std::cell::RefCell;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::fmt::{self, Debug}; use std::fmt::{self, Debug};
@ -30,7 +31,7 @@ impl VendoredFileSystem {
} }
pub fn exists(&self, path: &VendoredPath) -> bool { pub fn exists(&self, path: &VendoredPath) -> bool {
let normalized = normalize_vendored_path(path); let normalized = NormalizedVendoredPath::from(path);
let inner_locked = self.inner.lock(); let inner_locked = self.inner.lock();
let mut archive = inner_locked.borrow_mut(); let mut archive = inner_locked.borrow_mut();
@ -45,7 +46,7 @@ impl VendoredFileSystem {
} }
pub fn metadata(&self, path: &VendoredPath) -> Option<Metadata> { pub fn metadata(&self, path: &VendoredPath) -> Option<Metadata> {
let normalized = normalize_vendored_path(path); let normalized = NormalizedVendoredPath::from(path);
let inner_locked = self.inner.lock(); let inner_locked = self.inner.lock();
// Must probe the zipfile twice, as "stdlib" and "stdlib/" are considered // Must probe the zipfile twice, as "stdlib" and "stdlib/" are considered
@ -72,7 +73,7 @@ impl VendoredFileSystem {
pub fn read(&self, path: &VendoredPath) -> Result<String> { pub fn read(&self, path: &VendoredPath) -> Result<String> {
let inner_locked = self.inner.lock(); let inner_locked = self.inner.lock();
let mut archive = inner_locked.borrow_mut(); let mut archive = inner_locked.borrow_mut();
let mut zip_file = archive.lookup_path(&normalize_vendored_path(path))?; let mut zip_file = archive.lookup_path(&NormalizedVendoredPath::from(path))?;
let mut buffer = String::new(); let mut buffer = String::new();
zip_file.read_to_string(&mut buffer)?; zip_file.read_to_string(&mut buffer)?;
Ok(buffer) Ok(buffer)
@ -240,45 +241,76 @@ impl VendoredZipArchive {
/// but trailing slashes are crucial for distinguishing between /// but trailing slashes are crucial for distinguishing between
/// files and directories inside zip archives. /// files and directories inside zip archives.
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
struct NormalizedVendoredPath(String); struct NormalizedVendoredPath<'a>(Cow<'a, str>);
impl NormalizedVendoredPath { impl<'a> NormalizedVendoredPath<'a> {
fn with_trailing_slash(mut self) -> Self { fn with_trailing_slash(self) -> Self {
debug_assert!(!self.0.ends_with('/')); debug_assert!(!self.0.ends_with('/'));
self.0.push('/'); let mut data = self.0.into_owned();
self data.push('/');
Self(Cow::Owned(data))
} }
fn as_str(&self) -> &str { fn as_str(&self) -> &str {
self.0.as_str() &self.0
} }
} }
/// Normalizes the path by removing `.` and `..` components. impl<'a> From<&'a VendoredPath> for NormalizedVendoredPath<'a> {
/// /// Normalize the path.
/// ## Panics: ///
/// If a path with an unsupported component for vendored paths is passed. /// The normalizations are:
/// Unsupported components are path prefixes, /// - Remove `.` and `..` components
/// and path root directories appearing anywhere except at the start of the path. /// - Strip trailing slashes
fn normalize_vendored_path(path: &VendoredPath) -> NormalizedVendoredPath { /// - Normalize `\\` separators to `/`
// Allow the `RootDir` component, but only if it is at the very start of the string. /// - Validate that the path does not have any unsupported components
let mut components = path.components().peekable(); ///
if let Some(camino::Utf8Component::RootDir) = components.peek() { /// ## Panics:
components.next(); /// If a path with an unsupported component for vendored paths is passed.
} /// Unsupported components are path prefixes and path root directories.
fn from(path: &'a VendoredPath) -> Self {
/// Remove `.` and `..` components, and validate that unsupported components are not present.
///
/// This inner routine also strips trailing slashes,
/// and normalizes paths to use Unix `/` separators.
/// However, it always allocates, so avoid calling it if possible.
/// In most cases, the path should already be normalized.
fn normalize_unnormalized_path(path: &VendoredPath) -> String {
let mut normalized_parts = Vec::new(); let mut normalized_parts = Vec::new();
for component in components { for component in path.components() {
match component { match component {
camino::Utf8Component::Normal(part) => normalized_parts.push(part), camino::Utf8Component::Normal(part) => normalized_parts.push(part),
camino::Utf8Component::CurDir => continue, camino::Utf8Component::CurDir => continue,
camino::Utf8Component::ParentDir => { camino::Utf8Component::ParentDir => {
// `VendoredPath("")`, `VendoredPath("..")` and `VendoredPath("../..")`
// all resolve to the same path relative to the zip archive
// (see https://github.com/astral-sh/ruff/pull/11991#issuecomment-2185278014)
normalized_parts.pop(); normalized_parts.pop();
} }
unsupported => panic!("Unsupported component in a vendored path: {unsupported}"), unsupported => {
panic!("Unsupported component in a vendored path: {unsupported}")
}
}
}
normalized_parts.join("/")
}
let path_str = path.as_str();
if std::path::MAIN_SEPARATOR == '\\' && path_str.contains('\\') {
// Normalize paths so that they always use Unix path separators
NormalizedVendoredPath(Cow::Owned(normalize_unnormalized_path(path)))
} else if !path
.components()
.all(|component| matches!(component, camino::Utf8Component::Normal(_)))
{
// Remove non-`Normal` components
NormalizedVendoredPath(Cow::Owned(normalize_unnormalized_path(path)))
} else {
// Strip trailing slashes from the path
NormalizedVendoredPath(Cow::Borrowed(path_str.trim_end_matches('/')))
} }
} }
NormalizedVendoredPath(normalized_parts.join("/"))
} }
#[cfg(test)] #[cfg(test)]