diff --git a/Cargo.lock b/Cargo.lock index 8d329c2d5..26bf8aef8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5258,9 +5258,11 @@ dependencies = [ name = "uv-globfilter" version = "0.1.0" dependencies = [ + "anstream", "fs-err 3.1.0", "globset", "insta", + "owo-colors", "regex", "regex-automata 0.4.9", "tempfile", diff --git a/crates/uv-build-backend/src/metadata.rs b/crates/uv-build-backend/src/metadata.rs index b91a2342c..36d2319fb 100644 --- a/crates/uv-build-backend/src/metadata.rs +++ b/crates/uv-build-backend/src/metadata.rs @@ -395,12 +395,13 @@ impl PyProjectToml { let mut license_files = Vec::new(); let mut license_globs_parsed = Vec::new(); for license_glob in license_globs { - let pep639_glob = PortableGlobParser.parse(license_glob).map_err(|err| { - Error::PortableGlob { - field: license_glob.to_string(), - source: err, - } - })?; + let pep639_glob = + PortableGlobParser::Pep639 + .parse(license_glob) + .map_err(|err| Error::PortableGlob { + field: license_glob.to_string(), + source: err, + })?; license_globs_parsed.push(pep639_glob); } let license_globs = diff --git a/crates/uv-build-backend/src/source_dist.rs b/crates/uv-build-backend/src/source_dist.rs index 03489275d..da26e02ad 100644 --- a/crates/uv-build-backend/src/source_dist.rs +++ b/crates/uv-build-backend/src/source_dist.rs @@ -86,7 +86,7 @@ fn source_dist_matcher( .to_string(); includes.push(format!("{}/**", globset::escape(import_path))); for include in includes { - let glob = PortableGlobParser + let glob = PortableGlobParser::Uv .parse(&include) .map_err(|err| Error::PortableGlob { field: "tool.uv.build-backend.source-include".to_string(), @@ -111,7 +111,7 @@ fn source_dist_matcher( // Include the license files for license_files in pyproject_toml.license_files_source_dist() { trace!("Including license files at: `{license_files}`"); - let glob = PortableGlobParser + let glob = PortableGlobParser::Pep639 .parse(license_files) .map_err(|err| Error::PortableGlob { field: "project.license-files".to_string(), @@ -122,7 +122,7 @@ fn source_dist_matcher( // Include the data files for (name, directory) in settings.data.iter() { - let glob = PortableGlobParser + let glob = PortableGlobParser::Uv .parse(&format!("{}/**", globset::escape(directory))) .map_err(|err| Error::PortableGlob { field: format!("tool.uv.build-backend.data.{name}"), diff --git a/crates/uv-build-backend/src/wheel.rs b/crates/uv-build-backend/src/wheel.rs index 6ec36dae4..d3126f88a 100644 --- a/crates/uv-build-backend/src/wheel.rs +++ b/crates/uv-build-backend/src/wheel.rs @@ -431,7 +431,7 @@ pub(crate) fn build_exclude_matcher( } else { format!("**/{exclude}").to_string() }; - let glob = PortableGlobParser + let glob = PortableGlobParser::Uv .parse(&exclude) .map_err(|err| Error::PortableGlob { field: "tool.uv.build-backend.*-exclude".to_string(), @@ -468,7 +468,7 @@ fn wheel_subdir_from_globs( src.user_display(), license_files ); - PortableGlobParser.parse(license_files) + PortableGlobParser::Pep639.parse(license_files) }) .collect::>() .map_err(|err| Error::PortableGlob { diff --git a/crates/uv-globfilter/Cargo.toml b/crates/uv-globfilter/Cargo.toml index 70ba3373a..ca45a92f6 100644 --- a/crates/uv-globfilter/Cargo.toml +++ b/crates/uv-globfilter/Cargo.toml @@ -12,6 +12,7 @@ license.workspace = true [dependencies] globset = { workspace = true } +owo-colors = { workspace = true } regex = { workspace = true } regex-automata = { workspace = true } thiserror = { workspace = true } @@ -19,6 +20,7 @@ tracing = { workspace = true } walkdir = { workspace = true } [dev-dependencies] +anstream = { workspace = true } fs-err = { workspace = true } insta = "1.41.1" tempfile = { workspace = true } diff --git a/crates/uv-globfilter/src/glob_dir_filter.rs b/crates/uv-globfilter/src/glob_dir_filter.rs index e8bece135..fd14bffff 100644 --- a/crates/uv-globfilter/src/glob_dir_filter.rs +++ b/crates/uv-globfilter/src/glob_dir_filter.rs @@ -124,7 +124,7 @@ impl GlobDirFilter { #[cfg(test)] mod tests { use crate::glob_dir_filter::GlobDirFilter; - use crate::portable_glob::PortableGlobParser; + use crate::PortableGlobParser; use std::path::{Path, MAIN_SEPARATOR}; use tempfile::tempdir; use walkdir::WalkDir; @@ -152,7 +152,7 @@ mod tests { #[test] fn match_directory() { - let patterns = PATTERNS.map(|pattern| PortableGlobParser.parse(pattern).unwrap()); + let patterns = PATTERNS.map(|pattern| PortableGlobParser::Pep639.parse(pattern).unwrap()); let matcher = GlobDirFilter::from_globs(&patterns).unwrap(); assert!(matcher.match_directory(&Path::new("path1").join("dir1"))); assert!(matcher.match_directory(&Path::new("path2").join("dir2"))); @@ -170,7 +170,7 @@ mod tests { fs_err::create_dir_all(file.parent().unwrap()).unwrap(); fs_err::File::create(file).unwrap(); } - let patterns = PATTERNS.map(|pattern| PortableGlobParser.parse(pattern).unwrap()); + let patterns = PATTERNS.map(|pattern| PortableGlobParser::Pep639.parse(pattern).unwrap()); let matcher = GlobDirFilter::from_globs(&patterns).unwrap(); // Test the prefix filtering @@ -228,7 +228,7 @@ mod tests { fs_err::create_dir_all(file.parent().unwrap()).unwrap(); fs_err::File::create(file).unwrap(); } - let patterns = PATTERNS.map(|pattern| PortableGlobParser.parse(pattern).unwrap()); + let patterns = PATTERNS.map(|pattern| PortableGlobParser::Pep639.parse(pattern).unwrap()); let include_matcher = GlobDirFilter::from_globs(&patterns).unwrap(); diff --git a/crates/uv-globfilter/src/main.rs b/crates/uv-globfilter/src/main.rs index 602a46e0e..0f54d75ad 100644 --- a/crates/uv-globfilter/src/main.rs +++ b/crates/uv-globfilter/src/main.rs @@ -12,7 +12,7 @@ fn main() { let mut include_globs = Vec::new(); for include in includes { - let glob = PortableGlobParser.parse(include).unwrap(); + let glob = PortableGlobParser::Pep639.parse(include).unwrap(); include_globs.push(glob.clone()); } let include_matcher = GlobDirFilter::from_globs(&include_globs).unwrap(); @@ -25,7 +25,7 @@ fn main() { } else { format!("**/{exclude}").to_string() }; - let glob = PortableGlobParser.parse(&exclude).unwrap(); + let glob = PortableGlobParser::Pep639.parse(&exclude).unwrap(); exclude_builder.add(glob); } // https://github.com/BurntSushi/ripgrep/discussions/2927 diff --git a/crates/uv-globfilter/src/portable_glob.rs b/crates/uv-globfilter/src/portable_glob.rs index 7496938d4..367b7db0f 100644 --- a/crates/uv-globfilter/src/portable_glob.rs +++ b/crates/uv-globfilter/src/portable_glob.rs @@ -2,6 +2,7 @@ //! [PEP 639](https://packaging.python.org/en/latest/specifications/glob-patterns/). use globset::{Glob, GlobBuilder}; +use owo_colors::OwoColorize; use thiserror::Error; #[derive(Debug, Error)] @@ -19,6 +20,24 @@ pub enum PortableGlobError { pos: usize, invalid: char, }, + #[error( + "Invalid character `{invalid}` at position {pos} in glob: `{glob}`. {}{} Characters can be escaped with a backslash", + "hint".bold().cyan(), + ":".bold() + )] + InvalidCharacterUv { + glob: String, + pos: usize, + invalid: char, + }, + #[error( + "Only forward slashes are allowed as path separator, invalid character at position {pos} in glob: `{glob}`" + )] + InvalidBackslash { glob: String, pos: usize }, + #[error( + "Path separators can't be escaped, invalid character at position {pos} in glob: `{glob}`" + )] + InvalidEscapee { glob: String, pos: usize }, #[error("Invalid character `{invalid}` in range at position {pos} in glob: `{glob}`")] InvalidCharacterRange { glob: String, @@ -27,15 +46,35 @@ pub enum PortableGlobError { }, #[error("Too many at stars at position {pos} in glob: `{glob}`")] TooManyStars { glob: String, pos: usize }, + #[error("Trailing backslash at position {pos} in glob: `{glob}`")] + TrailingEscape { glob: String, pos: usize }, } -/// Cross-language glob parser with the glob syntax from +/// Cross-language glob syntax from /// [PEP 639](https://packaging.python.org/en/latest/specifications/glob-patterns/). +/// +/// The variant determines whether the parser strictly adheres to PEP 639 rules or allows extensions +/// such as backslash escapes. #[derive(Debug, PartialEq, Eq, Clone, Copy)] -pub struct PortableGlobParser; +pub enum PortableGlobParser { + /// Follow the PEP 639 rules strictly. + Pep639, + /// In addition to the PEP 639 syntax, allow escaping characters with backslashes. + /// + /// For cross-platform compatibility, escaping path separators is not allowed, i.e., forward + /// slashes and backslashes can't be escaped. + Uv, +} impl PortableGlobParser { - /// Parse cross-language glob syntax from [PEP 639](https://packaging.python.org/en/latest/specifications/glob-patterns/): + fn backslash_escape(self) -> bool { + match self { + PortableGlobParser::Pep639 => false, + PortableGlobParser::Uv => true, + } + } + + /// Parse cross-language glob syntax based on [PEP 639](https://packaging.python.org/en/latest/specifications/glob-patterns/): /// /// - Alphanumeric characters, underscores (`_`), hyphens (`-`) and dots (`.`) are matched verbatim. /// - The special glob characters are: @@ -45,6 +84,7 @@ impl PortableGlobParser { /// - `[]`, containing only the verbatim matched characters: Matches a single of the characters contained. Within /// `[...]`, the hyphen indicates a locale-agnostic range (e.g. `a-z`, order based on Unicode code points). Hyphens at /// the start or end are matched literally. + /// - `\`: Disallowed in PEP 639 mode. In uv mode, it escapes the following character to be matched verbatim. /// - The path separator is the forward slash character (`/`). Patterns are relative to the given directory, a leading slash /// character for absolute paths is not supported. /// - Parent directory indicators (`..`) are not allowed. @@ -52,10 +92,13 @@ impl PortableGlobParser { /// These rules mean that matching the backslash (`\`) is forbidden, which avoid collisions with the windows path separator. pub fn parse(&self, glob: &str) -> Result { self.check(glob)?; - Ok(GlobBuilder::new(glob).literal_separator(true).build()?) + Ok(GlobBuilder::new(glob) + .literal_separator(true) + .backslash_escape(self.backslash_escape()) + .build()?) } - /// See [`Self::parse`]. + /// See [`parse_portable_glob`]. pub fn check(&self, glob: &str) -> Result<(), PortableGlobError> { let mut chars = glob.chars().enumerate().peekable(); // A `..` is on a parent directory indicator at the start of the string or after a directory @@ -119,12 +162,50 @@ impl PortableGlobParser { } } start_or_slash = false; + } else if c == '\\' { + match *self { + PortableGlobParser::Pep639 => { + return Err(PortableGlobError::InvalidBackslash { + glob: glob.to_string(), + pos, + }); + } + PortableGlobParser::Uv => { + match chars.next() { + Some((pos, '/' | '\\')) => { + // For cross-platform compatibility, we don't allow forward slashes or + // backslashes to be escaped. + return Err(PortableGlobError::InvalidEscapee { + glob: glob.to_string(), + pos, + }); + } + Some(_) => { + // Escaped character + } + None => { + return Err(PortableGlobError::TrailingEscape { + glob: glob.to_string(), + pos, + }); + } + } + } + } } else { - return Err(PortableGlobError::InvalidCharacter { - glob: glob.to_string(), - pos, - invalid: c, - }); + let err = match *self { + PortableGlobParser::Pep639 => PortableGlobError::InvalidCharacter { + glob: glob.to_string(), + pos, + invalid: c, + }, + PortableGlobParser::Uv => PortableGlobError::InvalidCharacterUv { + glob: glob.to_string(), + pos, + invalid: c, + }, + }; + return Err(err); } } Ok(()) @@ -138,7 +219,10 @@ mod tests { #[test] fn test_error() { - let parse_err = |glob| PortableGlobParser.parse(glob).unwrap_err().to_string(); + let parse_err = |glob| { + let error = PortableGlobParser::Pep639.parse(glob).unwrap_err(); + anstream::adapter::strip_str(&error.to_string()).to_string() + }; assert_snapshot!( parse_err(".."), @"The parent directory operator (`..`) at position 0 is not allowed in glob: `..`" @@ -173,30 +257,64 @@ mod tests { ); assert_snapshot!( parse_err(r"licenses\eula.txt"), - @r"Invalid character `\` at position 8 in glob: `licenses\eula.txt`" + @r"Only forward slashes are allowed as path separator, invalid character at position 8 in glob: `licenses\eula.txt`" + ); + assert_snapshot!( + parse_err(r"**/@test"), + @"Invalid character `@` at position 3 in glob: `**/@test`" + ); + // Escapes are not allowed in strict PEP 639 mode + assert_snapshot!( + parse_err(r"public domain/Gulliver\\’s Travels.txt"), + @r"Invalid character ` ` at position 6 in glob: `public domain/Gulliver\\’s Travels.txt`" + ); + let parse_err_uv = |glob| { + let error = PortableGlobParser::Uv.parse(glob).unwrap_err(); + anstream::adapter::strip_str(&error.to_string()).to_string() + }; + assert_snapshot!( + parse_err_uv(r"**/@test"), + @"Invalid character `@` at position 3 in glob: `**/@test`. hint: Characters can be escaped with a backslash" + ); + // Escaping slashes is not allowed. + assert_snapshot!( + parse_err_uv(r"licenses\\MIT.txt"), + @r"Path separators can't be escaped, invalid character at position 9 in glob: `licenses\\MIT.txt`" + ); + assert_snapshot!( + parse_err_uv(r"licenses\/MIT.txt"), + @r"Path separators can't be escaped, invalid character at position 9 in glob: `licenses\/MIT.txt`" ); } #[test] fn test_valid() { let cases = [ - "licenses/*.txt", - "licenses/**/*.txt", - "LICEN[CS]E.txt", - "LICEN?E.txt", - "[a-z].txt", - "[a-z._-].txt", - "*/**", - "LICENSE..txt", - "LICENSE_file-1.txt", + r"licenses/*.txt", + r"licenses/**/*.txt", + r"LICEN[CS]E.txt", + r"LICEN?E.txt", + r"[a-z].txt", + r"[a-z._-].txt", + r"*/**", + r"LICENSE..txt", + r"LICENSE_file-1.txt", // (google translate) - "licenses/라이센스*.txt", - "licenses/ライセンス*.txt", - "licenses/执照*.txt", - "src/**", + r"licenses/라이센스*.txt", + r"licenses/ライセンス*.txt", + r"licenses/执照*.txt", + r"src/**", + ]; + let cases_uv = [ + r"public-domain/Gulliver\’s\ Travels.txt", + // https://github.com/astral-sh/uv/issues/13280 + r"**/\@test", ]; for case in cases { - PortableGlobParser.parse(case).unwrap(); + PortableGlobParser::Pep639.parse(case).unwrap(); + } + for case in cases.iter().chain(cases_uv.iter()) { + PortableGlobParser::Uv.parse(case).unwrap(); } } } diff --git a/docs/configuration/build-backend.md b/docs/configuration/build-backend.md index 6825919f0..b05856bdd 100644 --- a/docs/configuration/build-backend.md +++ b/docs/configuration/build-backend.md @@ -19,7 +19,7 @@ existing project, add it to the `[build-system]` section in your `pyproject.toml ```toml [build-system] -requires = ["uv_build>=0.6.13,<0.7"] +requires = ["uv_build>=0.7.2,<0.8.0"] build-backend = "uv_build" ``` @@ -89,4 +89,5 @@ Excludes are not anchored, which means that `__pycache__` excludes all directori exclude only `/dist`. All fields accepting patterns use the reduced portable glob syntax from -[PEP 639](https://peps.python.org/pep-0639/#add-license-FILES-key). +[PEP 639](https://peps.python.org/pep-0639/#add-license-FILES-key), with the addition that +characters can be escaped with a backslash.