diff --git a/crates/uv-build-backend/src/metadata.rs b/crates/uv-build-backend/src/metadata.rs index 9436c5c19..29dfc23af 100644 --- a/crates/uv-build-backend/src/metadata.rs +++ b/crates/uv-build-backend/src/metadata.rs @@ -12,7 +12,7 @@ use version_ranges::Ranges; use walkdir::WalkDir; use uv_fs::Simplified; -use uv_globfilter::{parse_portable_glob, GlobDirFilter}; +use uv_globfilter::{GlobDirFilter, PortableGlobParser}; use uv_normalize::{ExtraName, PackageName}; use uv_pep440::{Version, VersionSpecifiers}; use uv_pep508::{ @@ -395,11 +395,12 @@ impl PyProjectToml { let mut license_files = Vec::new(); let mut license_globs_parsed = Vec::new(); for license_glob in license_globs { - let pep639_glob = - parse_portable_glob(license_glob).map_err(|err| Error::PortableGlob { + let pep639_glob = PortableGlobParser.parse(license_glob).map_err(|err| { + Error::PortableGlob { field: license_glob.to_string(), source: err, - })?; + } + })?; license_globs_parsed.push(pep639_glob); } let license_globs = diff --git a/crates/uv-build-backend/src/source_dist.rs b/crates/uv-build-backend/src/source_dist.rs index ba519e89b..56fe92e29 100644 --- a/crates/uv-build-backend/src/source_dist.rs +++ b/crates/uv-build-backend/src/source_dist.rs @@ -15,7 +15,7 @@ use tar::{EntryType, Header}; use tracing::{debug, trace}; use uv_distribution_filename::{SourceDistExtension, SourceDistFilename}; use uv_fs::Simplified; -use uv_globfilter::{parse_portable_glob, GlobDirFilter}; +use uv_globfilter::{GlobDirFilter, PortableGlobParser}; use uv_pypi_types::Identifier; use uv_warnings::warn_user_once; use walkdir::WalkDir; @@ -88,10 +88,12 @@ fn source_dist_matcher( .to_string(); includes.push(format!("{}/**", globset::escape(import_path))); for include in includes { - let glob = parse_portable_glob(&include).map_err(|err| Error::PortableGlob { - field: "tool.uv.build-backend.source-include".to_string(), - source: err, - })?; + let glob = PortableGlobParser + .parse(&include) + .map_err(|err| Error::PortableGlob { + field: "tool.uv.build-backend.source-include".to_string(), + source: err, + })?; include_globs.push(glob.clone()); } @@ -111,21 +113,22 @@ fn source_dist_matcher( // Include the license files for license_files in pyproject_toml.license_files_source_dist() { trace!("Including license files at: `{license_files}`"); - let glob = parse_portable_glob(license_files).map_err(|err| Error::PortableGlob { - field: "project.license-files".to_string(), - source: err, - })?; + let glob = PortableGlobParser + .parse(license_files) + .map_err(|err| Error::PortableGlob { + field: "project.license-files".to_string(), + source: err, + })?; include_globs.push(glob); } // Include the data files for (name, directory) in settings.data.iter() { - let glob = - parse_portable_glob(&format!("{}/**", globset::escape(directory))).map_err(|err| { - Error::PortableGlob { - field: format!("tool.uv.build-backend.data.{name}"), - source: err, - } + let glob = PortableGlobParser + .parse(&format!("{}/**", globset::escape(directory))) + .map_err(|err| Error::PortableGlob { + field: format!("tool.uv.build-backend.data.{name}"), + source: err, })?; trace!("Including data ({name}) at: `{directory}`"); include_globs.push(glob); diff --git a/crates/uv-build-backend/src/wheel.rs b/crates/uv-build-backend/src/wheel.rs index 4fa0ca73c..c3ae7c432 100644 --- a/crates/uv-build-backend/src/wheel.rs +++ b/crates/uv-build-backend/src/wheel.rs @@ -12,7 +12,7 @@ use zip::{CompressionMethod, ZipWriter}; use uv_distribution_filename::WheelFilename; use uv_fs::Simplified; -use uv_globfilter::{parse_portable_glob, GlobDirFilter}; +use uv_globfilter::{GlobDirFilter, PortableGlobParser}; use uv_platform_tags::{AbiTag, LanguageTag, PlatformTag}; use uv_pypi_types::Identifier; use uv_warnings::warn_user_once; @@ -432,10 +432,12 @@ pub(crate) fn build_exclude_matcher( } else { format!("**/{exclude}").to_string() }; - let glob = parse_portable_glob(&exclude).map_err(|err| Error::PortableGlob { - field: "tool.uv.build-backend.*-exclude".to_string(), - source: err, - })?; + let glob = PortableGlobParser + .parse(&exclude) + .map_err(|err| Error::PortableGlob { + field: "tool.uv.build-backend.*-exclude".to_string(), + source: err, + })?; exclude_builder.add(glob); } let exclude_matcher = exclude_builder @@ -467,7 +469,7 @@ fn wheel_subdir_from_globs( src.user_display(), license_files ); - parse_portable_glob(license_files) + PortableGlobParser.parse(license_files) }) .collect::>() .map_err(|err| Error::PortableGlob { diff --git a/crates/uv-globfilter/src/glob_dir_filter.rs b/crates/uv-globfilter/src/glob_dir_filter.rs index 9ed022faf..08945af16 100644 --- a/crates/uv-globfilter/src/glob_dir_filter.rs +++ b/crates/uv-globfilter/src/glob_dir_filter.rs @@ -124,7 +124,7 @@ impl GlobDirFilter { #[cfg(test)] mod tests { use crate::glob_dir_filter::GlobDirFilter; - use crate::portable_glob::parse_portable_glob; + use crate::portable_glob::PortableGlobParser; use std::path::{Path, MAIN_SEPARATOR}; use tempfile::tempdir; use walkdir::WalkDir; @@ -152,7 +152,7 @@ mod tests { #[test] fn match_directory() { - let patterns = PATTERNS.map(|pattern| parse_portable_glob(pattern).unwrap()); + let patterns = PATTERNS.map(|pattern| PortableGlobParser.parse(pattern).unwrap()); let matcher = GlobDirFilter::from_globs(&patterns).unwrap(); assert!(matcher.match_directory(&Path::new("path1").join("dir1"))); assert!(matcher.match_directory(&Path::new("path2").join("dir2"))); @@ -170,7 +170,7 @@ mod tests { fs_err::create_dir_all(file.parent().unwrap()).unwrap(); fs_err::File::create(file).unwrap(); } - let patterns = PATTERNS.map(|pattern| parse_portable_glob(pattern).unwrap()); + let patterns = PATTERNS.map(|pattern| PortableGlobParser.parse(pattern).unwrap()); let matcher = GlobDirFilter::from_globs(&patterns).unwrap(); // Test the prefix filtering @@ -228,7 +228,7 @@ mod tests { fs_err::create_dir_all(file.parent().unwrap()).unwrap(); fs_err::File::create(file).unwrap(); } - let patterns = PATTERNS.map(|pattern| parse_portable_glob(pattern).unwrap()); + let patterns = PATTERNS.map(|pattern| PortableGlobParser.parse(pattern).unwrap()); let include_matcher = GlobDirFilter::from_globs(&patterns).unwrap(); diff --git a/crates/uv-globfilter/src/lib.rs b/crates/uv-globfilter/src/lib.rs index 3f18b15f0..6fc386ee3 100644 --- a/crates/uv-globfilter/src/lib.rs +++ b/crates/uv-globfilter/src/lib.rs @@ -7,4 +7,4 @@ mod glob_dir_filter; mod portable_glob; pub use glob_dir_filter::GlobDirFilter; -pub use portable_glob::{check_portable_glob, parse_portable_glob, PortableGlobError}; +pub use portable_glob::{PortableGlobError, PortableGlobParser}; diff --git a/crates/uv-globfilter/src/main.rs b/crates/uv-globfilter/src/main.rs index 32174d780..77866fe47 100644 --- a/crates/uv-globfilter/src/main.rs +++ b/crates/uv-globfilter/src/main.rs @@ -3,7 +3,7 @@ use globset::GlobSetBuilder; use std::env::args; use tracing::trace; -use uv_globfilter::{parse_portable_glob, GlobDirFilter}; +use uv_globfilter::{GlobDirFilter, PortableGlobParser}; use walkdir::WalkDir; fn main() { @@ -12,7 +12,7 @@ fn main() { let mut include_globs = Vec::new(); for include in includes { - let glob = parse_portable_glob(include).unwrap(); + let glob = PortableGlobParser.parse(include).unwrap(); include_globs.push(glob.clone()); } let include_matcher = GlobDirFilter::from_globs(&include_globs).unwrap(); @@ -25,7 +25,7 @@ fn main() { } else { format!("**/{exclude}").to_string() }; - let glob = parse_portable_glob(&exclude).unwrap(); + let glob = PortableGlobParser.parse(&exclude).unwrap(); exclude_builder.add(glob); } // https://github.com/BurntSushi/ripgrep/discussions/2927 diff --git a/crates/uv-globfilter/src/portable_glob.rs b/crates/uv-globfilter/src/portable_glob.rs index 20c62c68d..7496938d4 100644 --- a/crates/uv-globfilter/src/portable_glob.rs +++ b/crates/uv-globfilter/src/portable_glob.rs @@ -1,4 +1,5 @@ -//! Cross-language glob syntax from [PEP 639](https://packaging.python.org/en/latest/specifications/glob-patterns/). +//! Cross-language glob syntax from +//! [PEP 639](https://packaging.python.org/en/latest/specifications/glob-patterns/). use globset::{Glob, GlobBuilder}; use thiserror::Error; @@ -28,99 +29,106 @@ pub enum PortableGlobError { TooManyStars { glob: String, pos: usize }, } -/// Parse cross-language glob syntax from [PEP 639](https://packaging.python.org/en/latest/specifications/glob-patterns/): -/// -/// - Alphanumeric characters, underscores (`_`), hyphens (`-`) and dots (`.`) are matched verbatim. -/// - The special glob characters are: -/// - `*`: Matches any number of characters except path separators -/// - `?`: Matches a single character except the path separator -/// - `**`: Matches any number of characters including path separators -/// - `[]`, containing only the verbatim matched characters: Matches a single of the characters contained. Within -/// `[...]`, the hyphen indicates a locale-agnostic range (e.g. `a-z`, order based on Unicode code points). Hyphens at -/// the start or end are matched literally. -/// - The path separator is the forward slash character (`/`). Patterns are relative to the given directory, a leading slash -/// character for absolute paths is not supported. -/// - Parent directory indicators (`..`) are not allowed. -/// -/// These rules mean that matching the backslash (`\`) is forbidden, which avoid collisions with the windows path separator. -pub fn parse_portable_glob(glob: &str) -> Result { - check_portable_glob(glob)?; - Ok(GlobBuilder::new(glob).literal_separator(true).build()?) -} +/// Cross-language glob parser with the glob syntax from +/// [PEP 639](https://packaging.python.org/en/latest/specifications/glob-patterns/). +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub struct PortableGlobParser; -/// See [`parse_portable_glob`]. -pub fn check_portable_glob(glob: &str) -> Result<(), PortableGlobError> { - let mut chars = glob.chars().enumerate().peekable(); - // A `..` is on a parent directory indicator at the start of the string or after a directory - // separator. - let mut start_or_slash = true; - // The number of consecutive stars before the current character. - while let Some((pos, c)) = chars.next() { - // `***` or `**literals` can be correctly represented with less stars. They are banned by - // `glob`, they are allowed by `globset` and PEP 639 is ambiguous, so we're filtering them - // out. - if c == '*' { - let mut star_run = 1; - while let Some((_, c)) = chars.peek() { - if *c == '*' { - star_run += 1; - chars.next(); - } else { - break; +impl PortableGlobParser { + /// Parse cross-language glob syntax from [PEP 639](https://packaging.python.org/en/latest/specifications/glob-patterns/): + /// + /// - Alphanumeric characters, underscores (`_`), hyphens (`-`) and dots (`.`) are matched verbatim. + /// - The special glob characters are: + /// - `*`: Matches any number of characters except path separators + /// - `?`: Matches a single character except the path separator + /// - `**`: Matches any number of characters including path separators + /// - `[]`, containing only the verbatim matched characters: Matches a single of the characters contained. Within + /// `[...]`, the hyphen indicates a locale-agnostic range (e.g. `a-z`, order based on Unicode code points). Hyphens at + /// the start or end are matched literally. + /// - The path separator is the forward slash character (`/`). Patterns are relative to the given directory, a leading slash + /// character for absolute paths is not supported. + /// - Parent directory indicators (`..`) are not allowed. + /// + /// These rules mean that matching the backslash (`\`) is forbidden, which avoid collisions with the windows path separator. + pub fn parse(&self, glob: &str) -> Result { + self.check(glob)?; + Ok(GlobBuilder::new(glob).literal_separator(true).build()?) + } + + /// See [`Self::parse`]. + pub fn check(&self, glob: &str) -> Result<(), PortableGlobError> { + let mut chars = glob.chars().enumerate().peekable(); + // A `..` is on a parent directory indicator at the start of the string or after a directory + // separator. + let mut start_or_slash = true; + // The number of consecutive stars before the current character. + while let Some((pos, c)) = chars.next() { + // `***` or `**literals` can be correctly represented with less stars. They are banned by + // `glob`, they are allowed by `globset` and PEP 639 is ambiguous, so we're filtering them + // out. + if c == '*' { + let mut star_run = 1; + while let Some((_, c)) = chars.peek() { + if *c == '*' { + star_run += 1; + chars.next(); + } else { + break; + } } - } - if star_run >= 3 { - return Err(PortableGlobError::TooManyStars { - glob: glob.to_string(), - // We don't update pos for the stars. - pos, - }); - } else if star_run == 2 { - if chars.peek().is_some_and(|(_, c)| *c != '/') { + if star_run >= 3 { return Err(PortableGlobError::TooManyStars { glob: glob.to_string(), // We don't update pos for the stars. pos, }); + } else if star_run == 2 { + if chars.peek().is_some_and(|(_, c)| *c != '/') { + return Err(PortableGlobError::TooManyStars { + glob: glob.to_string(), + // We don't update pos for the stars. + pos, + }); + } } - } - start_or_slash = false; - } else if c.is_alphanumeric() || matches!(c, '_' | '-' | '?') { - start_or_slash = false; - } else if c == '.' { - if start_or_slash && matches!(chars.peek(), Some((_, '.'))) { - return Err(PortableGlobError::ParentDirectory { - pos, - glob: glob.to_string(), - }); - } - start_or_slash = false; - } else if c == '/' { - start_or_slash = true; - } else if c == '[' { - for (pos, c) in chars.by_ref() { - if c.is_alphanumeric() || matches!(c, '_' | '-' | '.') { - // Allowed. - } else if c == ']' { - break; - } else { - return Err(PortableGlobError::InvalidCharacterRange { - glob: glob.to_string(), + start_or_slash = false; + } else if c.is_alphanumeric() || matches!(c, '_' | '-' | '?') { + start_or_slash = false; + } else if c == '.' { + if start_or_slash && matches!(chars.peek(), Some((_, '.'))) { + return Err(PortableGlobError::ParentDirectory { pos, - invalid: c, + glob: glob.to_string(), }); } + start_or_slash = false; + } else if c == '/' { + start_or_slash = true; + } else if c == '[' { + for (pos, c) in chars.by_ref() { + if c.is_alphanumeric() || matches!(c, '_' | '-' | '.') { + // Allowed. + } else if c == ']' { + break; + } else { + return Err(PortableGlobError::InvalidCharacterRange { + glob: glob.to_string(), + pos, + invalid: c, + }); + } + } + start_or_slash = false; + } else { + return Err(PortableGlobError::InvalidCharacter { + glob: glob.to_string(), + pos, + invalid: c, + }); } - start_or_slash = false; - } else { - return Err(PortableGlobError::InvalidCharacter { - glob: glob.to_string(), - pos, - invalid: c, - }); } + Ok(()) } - Ok(()) } #[cfg(test)] @@ -130,7 +138,7 @@ mod tests { #[test] fn test_error() { - let parse_err = |glob| parse_portable_glob(glob).unwrap_err().to_string(); + let parse_err = |glob| PortableGlobParser.parse(glob).unwrap_err().to_string(); assert_snapshot!( parse_err(".."), @"The parent directory operator (`..`) at position 0 is not allowed in glob: `..`" @@ -188,7 +196,7 @@ mod tests { "src/**", ]; for case in cases { - parse_portable_glob(case).unwrap(); + PortableGlobParser.parse(case).unwrap(); } } }