diff --git a/Cargo.lock b/Cargo.lock index ddd116f14..0f738bb09 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5387,6 +5387,7 @@ dependencies = [ "anyhow", "hashbrown 0.15.2", "indexmap", + "insta", "itertools 0.14.0", "jiff", "mailparse", diff --git a/crates/uv-build-backend/src/lib.rs b/crates/uv-build-backend/src/lib.rs index 6aa0bb860..4f038256d 100644 --- a/crates/uv-build-backend/src/lib.rs +++ b/crates/uv-build-backend/src/lib.rs @@ -15,6 +15,7 @@ use thiserror::Error; use tracing::debug; use uv_fs::Simplified; use uv_globfilter::PortableGlobError; +use uv_pypi_types::IdentifierParseError; #[derive(Debug, Error)] pub enum Error { @@ -24,6 +25,8 @@ pub enum Error { Toml(#[from] toml::de::Error), #[error("Invalid pyproject.toml")] Validation(#[from] ValidationError), + #[error(transparent)] + Identifier(#[from] IdentifierParseError), #[error("Unsupported glob expression in: `{field}`")] PortableGlob { field: String, diff --git a/crates/uv-build-backend/src/metadata.rs b/crates/uv-build-backend/src/metadata.rs index 19676d655..50de430af 100644 --- a/crates/uv-build-backend/src/metadata.rs +++ b/crates/uv-build-backend/src/metadata.rs @@ -17,7 +17,7 @@ use uv_pep440::{Version, VersionSpecifiers}; use uv_pep508::{ ExtraOperator, MarkerExpression, MarkerTree, MarkerValueExtra, Requirement, VersionOrUrl, }; -use uv_pypi_types::{Metadata23, VerbatimParsedUrl}; +use uv_pypi_types::{Identifier, Metadata23, VerbatimParsedUrl}; use crate::serde_verbatim::SerdeVerbatim; use crate::Error; @@ -803,7 +803,7 @@ pub(crate) struct ToolUv { /// When building the source distribution, the following files and directories are included: /// * `pyproject.toml` /// * The module under `tool.uv.build-backend.module-root`, by default -/// `src//**`. +/// `src//**`. /// * `project.license-files` and `project.readme`. /// * All directories under `tool.uv.build-backend.data`. /// * All patterns from `tool.uv.build-backend.source-include`. @@ -812,7 +812,7 @@ pub(crate) struct ToolUv { /// /// When building the wheel, the following files and directories are included: /// * The module under `tool.uv.build-backend.module-root`, by default -/// `src//**`. +/// `src//**`. /// * `project.license-files` and `project.readme`, as part of the project metadata. /// * Each directory under `tool.uv.build-backend.data`, as data directories. /// @@ -846,6 +846,15 @@ pub(crate) struct BuildBackendSettings { /// using the flat layout over the src layout. pub(crate) module_root: PathBuf, + /// The name of the module directory inside `module-root`. + /// + /// The default module name is the package name with dots and dashes replaced by underscores. + /// + /// Note that using this option runs the risk of creating two packages with different names but + /// the same module names. Installing such packages together leads to unspecified behavior, + /// often with corrupted files or directory trees. + pub(crate) module_name: Option, + /// Glob expressions which files and directories to additionally include in the source /// distribution. /// @@ -877,6 +886,7 @@ impl Default for BuildBackendSettings { fn default() -> Self { Self { module_root: PathBuf::from("src"), + module_name: None, source_include: Vec::new(), default_excludes: true, source_exclude: Vec::new(), diff --git a/crates/uv-build-backend/src/source_dist.rs b/crates/uv-build-backend/src/source_dist.rs index fa40840d2..3449ee4e0 100644 --- a/crates/uv-build-backend/src/source_dist.rs +++ b/crates/uv-build-backend/src/source_dist.rs @@ -8,11 +8,13 @@ use globset::{Glob, GlobSet}; use std::io; use std::io::{BufReader, Cursor}; use std::path::{Path, PathBuf}; +use std::str::FromStr; use tar::{EntryType, Header}; use tracing::{debug, trace}; use uv_distribution_filename::{SourceDistExtension, SourceDistFilename}; use uv_fs::Simplified; use uv_globfilter::{parse_portable_glob, GlobDirFilter}; +use uv_pypi_types::Identifier; use uv_warnings::warn_user_once; use walkdir::WalkDir; @@ -65,11 +67,21 @@ fn source_dist_matcher( let mut includes: Vec = settings.source_include; // pyproject.toml is always included. includes.push(globset::escape("pyproject.toml")); + + let module_name = if let Some(module_name) = settings.module_name { + module_name + } else { + // Should never error, the rules for package names (in dist-info formatting) are stricter + // than those for identifiers + Identifier::from_str(pyproject_toml.name().as_dist_info_name().as_ref())? + }; + debug!("Module name: `{:?}`", module_name); + // The wheel must not include any files included by the source distribution (at least until we // have files generated in the source dist -> wheel build step). let import_path = &settings .module_root - .join(pyproject_toml.name().as_dist_info_name().as_ref()) + .join(module_name.as_ref()) .portable_display() .to_string(); includes.push(format!("{}/**", globset::escape(import_path))); diff --git a/crates/uv-build-backend/src/wheel.rs b/crates/uv-build-backend/src/wheel.rs index 806700665..f14cdc526 100644 --- a/crates/uv-build-backend/src/wheel.rs +++ b/crates/uv-build-backend/src/wheel.rs @@ -1,11 +1,11 @@ -use std::io::{BufReader, Read, Write}; -use std::path::{Path, PathBuf}; -use std::{io, mem}; - use fs_err::File; use globset::{GlobSet, GlobSetBuilder}; use itertools::Itertools; use sha2::{Digest, Sha256}; +use std::io::{BufReader, Read, Write}; +use std::path::{Path, PathBuf}; +use std::str::FromStr; +use std::{io, mem}; use tracing::{debug, trace}; use walkdir::WalkDir; use zip::{CompressionMethod, ZipWriter}; @@ -14,6 +14,7 @@ use uv_distribution_filename::WheelFilename; use uv_fs::Simplified; use uv_globfilter::{parse_portable_glob, GlobDirFilter}; use uv_platform_tags::{AbiTag, LanguageTag, PlatformTag}; +use uv_pypi_types::Identifier; use uv_warnings::warn_user_once; use crate::metadata::{BuildBackendSettings, DEFAULT_EXCLUDES}; @@ -128,7 +129,17 @@ fn write_wheel( return Err(Error::AbsoluteModuleRoot(settings.module_root.clone())); } let strip_root = source_tree.join(settings.module_root); - let module_root = strip_root.join(pyproject_toml.name().as_dist_info_name().as_ref()); + + let module_name = if let Some(module_name) = settings.module_name { + module_name + } else { + // Should never error, the rules for package names (in dist-info formatting) are stricter + // than those for identifiers + Identifier::from_str(pyproject_toml.name().as_dist_info_name().as_ref())? + }; + debug!("Module name: `{:?}`", module_name); + + let module_root = strip_root.join(module_name.as_ref()); if !module_root.join("__init__.py").is_file() { return Err(Error::MissingModule(module_root)); } diff --git a/crates/uv-pypi-types/Cargo.toml b/crates/uv-pypi-types/Cargo.toml index fe97710d7..db515b86a 100644 --- a/crates/uv-pypi-types/Cargo.toml +++ b/crates/uv-pypi-types/Cargo.toml @@ -32,7 +32,7 @@ mailparse = { workspace = true } regex = { workspace = true } rkyv = { workspace = true } schemars = { workspace = true, optional = true } -serde = { workspace = true, optional = true } +serde = { workspace = true } serde-untagged = { workspace = true } thiserror = { workspace = true } toml = { workspace = true } @@ -42,6 +42,7 @@ url = { workspace = true } [dev-dependencies] anyhow = { workspace = true } +insta = { version = "1.40.0" } [features] schemars = ["dep:schemars", "uv-normalize/schemars"] diff --git a/crates/uv-pypi-types/src/identifier.rs b/crates/uv-pypi-types/src/identifier.rs new file mode 100644 index 000000000..42bda8175 --- /dev/null +++ b/crates/uv-pypi-types/src/identifier.rs @@ -0,0 +1,160 @@ +use std::fmt::Display; +use std::str::FromStr; +use thiserror::Error; + +/// Simplified Python identifier. +/// +/// We don't match Python's identifier rules +/// () exactly +/// (we just use Rust's `is_alphabetic`) and we don't convert to NFKC, but it's good enough +/// for our validation purposes. +#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub struct Identifier(Box); + +#[derive(Debug, Clone, Error)] +pub enum IdentifierParseError { + #[error("An identifier must not be empty")] + Empty, + #[error( + "Invalid first character `{first}` for identifier `{identifier}`, expected an underscore or an alphabetic character" + )] + InvalidFirstChar { first: char, identifier: Box }, + #[error( + "Invalid character `{invalid_char}` at position {pos} for identifier `{identifier}`, \ + expected an underscore or an alphanumeric character" + )] + InvalidChar { + pos: usize, + invalid_char: char, + identifier: Box, + }, +} + +impl Identifier { + pub fn new(identifier: impl Into>) -> Result { + let identifier = identifier.into(); + let mut chars = identifier.chars().enumerate(); + let (_, first_char) = chars.next().ok_or(IdentifierParseError::Empty)?; + if first_char != '_' && !first_char.is_alphabetic() { + return Err(IdentifierParseError::InvalidFirstChar { + first: first_char, + identifier, + }); + } + + for (pos, current_char) in chars { + if current_char != '_' && !current_char.is_alphanumeric() { + return Err(IdentifierParseError::InvalidChar { + // Make the position 1-indexed + pos: pos + 1, + invalid_char: current_char, + identifier, + }); + } + } + + Ok(Self(identifier)) + } +} + +impl FromStr for Identifier { + type Err = IdentifierParseError; + + fn from_str(identifier: &str) -> Result { + Self::new(identifier.to_string()) + } +} + +impl Display for Identifier { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +impl AsRef for Identifier { + fn as_ref(&self) -> &str { + &self.0 + } +} + +impl<'de> serde::de::Deserialize<'de> for Identifier { + fn deserialize(deserializer: D) -> Result + where + D: serde::de::Deserializer<'de>, + { + let s = String::deserialize(deserializer)?; + Identifier::from_str(&s).map_err(serde::de::Error::custom) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use insta::assert_snapshot; + + #[test] + fn valid() { + let valid_ids = vec![ + "abc", + "_abc", + "a_bc", + "a123", + "snake_case", + "camelCase", + "PascalCase", + // A single character is valid + "_", + "a", + // Unicode + "α", + "férrîs", + "안녕하세요", + ]; + + for valid_id in valid_ids { + assert!(Identifier::from_str(valid_id).is_ok(), "{}", valid_id); + } + } + + #[test] + fn empty() { + assert_snapshot!(Identifier::from_str("").unwrap_err(), @"An identifier must not be empty"); + } + + #[test] + fn invalid_first_char() { + assert_snapshot!( + Identifier::from_str("1foo").unwrap_err(), + @"Invalid first character `1` for identifier `1foo`, expected an underscore or an alphabetic character" + ); + assert_snapshot!( + Identifier::from_str("$foo").unwrap_err(), + @"Invalid first character `$` for identifier `$foo`, expected an underscore or an alphabetic character" + ); + assert_snapshot!( + Identifier::from_str(".foo").unwrap_err(), + @"Invalid first character `.` for identifier `.foo`, expected an underscore or an alphabetic character" + ); + } + + #[test] + fn invalid_char() { + // A dot in module names equals a path separator, which is a separate problem. + assert_snapshot!( + Identifier::from_str("foo.bar").unwrap_err(), + @"Invalid character `.` at position 4 for identifier `foo.bar`, expected an underscore or an alphanumeric character" + ); + assert_snapshot!( + Identifier::from_str("foo-bar").unwrap_err(), + @"Invalid character `-` at position 4 for identifier `foo-bar`, expected an underscore or an alphanumeric character" + ); + assert_snapshot!( + Identifier::from_str("foo_bar$").unwrap_err(), + @"Invalid character `$` at position 8 for identifier `foo_bar$`, expected an underscore or an alphanumeric character" + ); + assert_snapshot!( + Identifier::from_str("foo🦀bar").unwrap_err(), + @"Invalid character `🦀` at position 4 for identifier `foo🦀bar`, expected an underscore or an alphanumeric character" + ); + } +} diff --git a/crates/uv-pypi-types/src/lib.rs b/crates/uv-pypi-types/src/lib.rs index a3b2b6e05..a5c9ec8a4 100644 --- a/crates/uv-pypi-types/src/lib.rs +++ b/crates/uv-pypi-types/src/lib.rs @@ -2,6 +2,7 @@ pub use base_url::*; pub use conflicts::*; pub use dependency_groups::*; pub use direct_url::*; +pub use identifier::*; pub use lenient_requirement::*; pub use marker_environment::*; pub use metadata::*; @@ -15,6 +16,7 @@ mod base_url; mod conflicts; mod dependency_groups; mod direct_url; +mod identifier; mod lenient_requirement; mod marker_environment; mod metadata; diff --git a/crates/uv-workspace/Cargo.toml b/crates/uv-workspace/Cargo.toml index 43f4a53a9..0ba386acc 100644 --- a/crates/uv-workspace/Cargo.toml +++ b/crates/uv-workspace/Cargo.toml @@ -25,7 +25,7 @@ uv-normalize = { workspace = true } uv-options-metadata = { workspace = true } uv-pep440 = { workspace = true } uv-pep508 = { workspace = true } -uv-pypi-types = { workspace = true, features = ["serde"] } +uv-pypi-types = { workspace = true } uv-static = { workspace = true } uv-warnings = { workspace = true } @@ -38,8 +38,8 @@ schemars = { workspace = true, optional = true } serde = { workspace = true, features = ["derive"] } thiserror = { workspace = true } tokio = { workspace = true } -toml = { workspace = true } -toml_edit = { workspace = true } +toml = { workspace = true } +toml_edit = { workspace = true } tracing = { workspace = true } url = { workspace = true } diff --git a/crates/uv/tests/it/build_backend.rs b/crates/uv/tests/it/build_backend.rs index 7c9320532..731bcdfaf 100644 --- a/crates/uv/tests/it/build_backend.rs +++ b/crates/uv/tests/it/build_backend.rs @@ -1,6 +1,7 @@ use crate::common::{uv_snapshot, venv_bin_path, TestContext}; use anyhow::Result; use assert_cmd::assert::OutputAssertExt; +use assert_fs::fixture::{FileWriteStr, PathChild}; use flate2::bufread::GzDecoder; use fs_err::File; use indoc::indoc; @@ -278,3 +279,92 @@ fn preserve_executable_bit() -> Result<()> { Ok(()) } + +/// Test `tool.uv.build-backend.module-name`. +/// +/// We include only the module specified by `module-name`, ignoring the project name and all other +/// potential modules. +#[test] +fn rename_module() -> Result<()> { + let context = TestContext::new("3.12"); + let temp_dir = TempDir::new()?; + + context + .temp_dir + .child("pyproject.toml") + .write_str(indoc! {r#" + [project] + name = "foo" + version = "1.0.0" + + [tool.uv.build-backend] + module-name = "bar" + + [build-system] + requires = ["uv_build>=0.5,<0.7"] + build-backend = "uv_build" + "#})?; + + // This is the module we would usually include, but due to the renaming by `module-name` must + // ignore. + context + .temp_dir + .child("src/foo/__init__.py") + .write_str(r#"print("Hi from foo")"#)?; + // This module would be ignored from just `project.name`, but is selected due to the renaming. + context + .temp_dir + .child("src/bar/__init__.py") + .write_str(r#"print("Hi from bar")"#)?; + + uv_snapshot!(context + .build_backend() + .arg("build-wheel") + .arg(temp_dir.path()) + .env("UV_PREVIEW", "1"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + foo-1.0.0-py3-none-any.whl + + ----- stderr ----- + "###); + + context + .pip_install() + .arg(temp_dir.path().join("foo-1.0.0-py3-none-any.whl")) + .assert() + .success(); + + // Importing the module with the `module-name` name succeeds. + uv_snapshot!(Command::new(context.interpreter()) + .arg("-c") + .arg("import bar") + // Python on windows + .env(EnvVars::PYTHONUTF8, "1"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + Hi from bar + + ----- stderr ----- + "###); + + // Importing the package name fails, it was overridden by `module-name`. + uv_snapshot!(Command::new(context.interpreter()) + .arg("-c") + .arg("import foo") + // Python on windows + .env(EnvVars::PYTHONUTF8, "1"), @r###" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + Traceback (most recent call last): + File "", line 1, in + ModuleNotFoundError: No module named 'foo' + "###); + + Ok(()) +}