From 2688a12609ee286f173442ec4d53777213bf8fe2 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Tue, 2 Dec 2025 22:21:54 -0800 Subject: [PATCH] Always treat paths after -e as directories --- crates/uv-pep508/src/lib.rs | 2 +- crates/uv-pep508/src/unnamed.rs | 87 +++++++++++++++++-- crates/uv-pypi-types/src/parsed_url.rs | 32 ++++++- crates/uv-requirements-txt/src/lib.rs | 5 +- crates/uv-requirements-txt/src/requirement.rs | 19 +++- ...s_txt__test__parse-unix-semicolon.txt.snap | 76 ++++++++++++---- 6 files changed, 191 insertions(+), 30 deletions(-) diff --git a/crates/uv-pep508/src/lib.rs b/crates/uv-pep508/src/lib.rs index 30c7b9362..c0735b41c 100644 --- a/crates/uv-pep508/src/lib.rs +++ b/crates/uv-pep508/src/lib.rs @@ -40,7 +40,7 @@ pub use crate::marker::{ }; pub use crate::origin::RequirementOrigin; #[cfg(feature = "non-pep508-extensions")] -pub use crate::unnamed::{UnnamedRequirement, UnnamedRequirementUrl}; +pub use crate::unnamed::{PathHint, UnnamedRequirement, UnnamedRequirementUrl}; pub use crate::verbatim_url::{ Scheme, VerbatimUrl, VerbatimUrlError, expand_env_vars, looks_like_git_repository, split_scheme, strip_host, diff --git a/crates/uv-pep508/src/unnamed.rs b/crates/uv-pep508/src/unnamed.rs index 9dbd854de..d7ac2b66b 100644 --- a/crates/uv-pep508/src/unnamed.rs +++ b/crates/uv-pep508/src/unnamed.rs @@ -13,6 +13,15 @@ use crate::{ parse_extras_cursor, split_extras, split_scheme, strip_host, }; +/// A hint about whether a path refers to a file or directory. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PathHint { + /// The path is known to be a file (e.g., a wheel or source distribution). + File, + /// The path is known to be a directory (e.g., an editable install). + Directory, +} + /// An extension over [`Pep508Url`] that also supports parsing unnamed requirements, namely paths. /// /// The error type is fixed to the same as the [`Pep508Url`] impl error. @@ -21,9 +30,40 @@ pub trait UnnamedRequirementUrl: Pep508Url { fn parse_path(path: impl AsRef, working_dir: impl AsRef) -> Result; + /// Parse a URL from a relative or absolute path, with a hint about whether the path is a file + /// or directory. + /// + /// The hint is used to determine whether the path should be parsed as a file or directory. If + /// no such hint is provided, we'll query for the path kind; if the path doesn't exist, it'll be + /// treated as a directory if and only if it is extensionless. + fn parse_path_with_hint( + path: impl AsRef, + working_dir: impl AsRef, + hint: Option, + ) -> Result { + // Default implementation ignores the hint. + let _ = hint; + Self::parse_path(path, working_dir) + } + /// Parse a URL from an absolute path. fn parse_absolute_path(path: impl AsRef) -> Result; + /// Parse a URL from an absolute path, with a hint about whether the path is a file or + /// directory. + /// + /// The hint is used to determine whether the path should be parsed as a file or directory. If + /// no such hint is provided, we'll query for the path kind; if the path doesn't exist, it'll be + /// treated as a directory if and only if it is extensionless. + fn parse_absolute_path_with_hint( + path: impl AsRef, + hint: Option, + ) -> Result { + // Default implementation ignores the hint. + let _ = hint; + Self::parse_absolute_path(path) + } + /// Parse a URL from a string. fn parse_unnamed_url(given: impl AsRef) -> Result; @@ -110,10 +150,26 @@ impl UnnamedRequirement { working_dir: impl AsRef, reporter: &mut impl Reporter, ) -> Result> { - parse_unnamed_requirement( + Self::parse_with_hint(input, working_dir, reporter, None) + } + + /// Parse a PEP 508-like direct URL requirement without a package name, with a hint about + /// whether the path is a file or directory. + /// + /// The hint is used to determine whether the path should be parsed as a file or directory. If + /// no such hint is provided, we'll query for the path kind; if the path doesn't exist, it'll be + /// treated as a directory if and only if it is extensionless. + pub fn parse_with_hint( + input: &str, + working_dir: impl AsRef, + reporter: &mut impl Reporter, + hint: Option, + ) -> Result> { + parse_unnamed_requirement_with_hint( &mut Cursor::new(input), Some(working_dir.as_ref()), reporter, + hint, ) } } @@ -155,11 +211,22 @@ fn parse_unnamed_requirement( cursor: &mut Cursor, working_dir: Option<&Path>, reporter: &mut impl Reporter, +) -> Result, Pep508Error> { + parse_unnamed_requirement_with_hint(cursor, working_dir, reporter, None) +} + +/// Parse a PEP 508-like direct URL specifier without a package name, with a hint about whether +/// the path is a file or directory. +fn parse_unnamed_requirement_with_hint( + cursor: &mut Cursor, + working_dir: Option<&Path>, + reporter: &mut impl Reporter, + hint: Option, ) -> Result, Pep508Error> { cursor.eat_whitespace(); // Parse the URL itself, along with any extras. - let (url, extras) = parse_unnamed_url::(cursor, working_dir)?; + let (url, extras) = parse_unnamed_url::(cursor, working_dir, hint)?; // wsp* cursor.eat_whitespace(); @@ -213,6 +280,7 @@ fn preprocess_unnamed_url( cursor: &Cursor, start: usize, len: usize, + #[cfg_attr(not(feature = "non-pep508-extensions"), allow(unused))] hint: Option, ) -> Result<(Url, Vec), Pep508Error> { // Split extras _before_ expanding the URL. We assume that the extras are not environment // variables. If we parsed the extras after expanding the URL, then the verbatim representation @@ -251,7 +319,7 @@ fn preprocess_unnamed_url( #[cfg(feature = "non-pep508-extensions")] if let Some(working_dir) = working_dir { - let url = Url::parse_path(path.as_ref(), working_dir) + let url = Url::parse_path_with_hint(path.as_ref(), working_dir, hint) .map_err(|err| Pep508Error { message: Pep508ErrorSource::UrlError(err), start, @@ -262,7 +330,7 @@ fn preprocess_unnamed_url( return Ok((url, extras)); } - let url = Url::parse_absolute_path(path.as_ref()) + let url = Url::parse_absolute_path_with_hint(path.as_ref(), hint) .map_err(|err| Pep508Error { message: Pep508ErrorSource::UrlError(err), start, @@ -289,7 +357,7 @@ fn preprocess_unnamed_url( // Ex) `C:\Users\ferris\wheel-0.42.0.tar.gz` _ => { if let Some(working_dir) = working_dir { - let url = Url::parse_path(expanded.as_ref(), working_dir) + let url = Url::parse_path_with_hint(expanded.as_ref(), working_dir, hint) .map_err(|err| Pep508Error { message: Pep508ErrorSource::UrlError(err), start, @@ -300,7 +368,7 @@ fn preprocess_unnamed_url( return Ok((url, extras)); } - let url = Url::parse_absolute_path(expanded.as_ref()) + let url = Url::parse_absolute_path_with_hint(expanded.as_ref(), hint) .map_err(|err| Pep508Error { message: Pep508ErrorSource::UrlError(err), start, @@ -314,7 +382,7 @@ fn preprocess_unnamed_url( } else { // Ex) `../editable/` if let Some(working_dir) = working_dir { - let url = Url::parse_path(expanded.as_ref(), working_dir) + let url = Url::parse_path_with_hint(expanded.as_ref(), working_dir, hint) .map_err(|err| Pep508Error { message: Pep508ErrorSource::UrlError(err), start, @@ -325,7 +393,7 @@ fn preprocess_unnamed_url( return Ok((url, extras)); } - let url = Url::parse_absolute_path(expanded.as_ref()) + let url = Url::parse_absolute_path_with_hint(expanded.as_ref(), hint) .map_err(|err| Pep508Error { message: Pep508ErrorSource::UrlError(err), start, @@ -357,6 +425,7 @@ fn preprocess_unnamed_url( fn parse_unnamed_url( cursor: &mut Cursor, working_dir: Option<&Path>, + hint: Option, ) -> Result<(Url, Vec), Pep508Error> { // wsp* cursor.eat_whitespace(); @@ -413,7 +482,7 @@ fn parse_unnamed_url( }); } - let url = preprocess_unnamed_url(url, working_dir, cursor, start, len)?; + let url = preprocess_unnamed_url(url, working_dir, cursor, start, len, hint)?; Ok(url) } diff --git a/crates/uv-pypi-types/src/parsed_url.rs b/crates/uv-pypi-types/src/parsed_url.rs index 0f324e8a2..a838ab179 100644 --- a/crates/uv-pypi-types/src/parsed_url.rs +++ b/crates/uv-pypi-types/src/parsed_url.rs @@ -8,7 +8,8 @@ use uv_cache_key::{CacheKey, CacheKeyHasher}; use uv_distribution_filename::{DistExtension, ExtensionError}; use uv_git_types::{GitUrl, GitUrlParseError}; use uv_pep508::{ - Pep508Url, UnnamedRequirementUrl, VerbatimUrl, VerbatimUrlError, looks_like_git_repository, + PathHint, Pep508Url, UnnamedRequirementUrl, VerbatimUrl, VerbatimUrlError, + looks_like_git_repository, }; use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError}; @@ -79,11 +80,26 @@ impl UnnamedRequirementUrl for VerbatimParsedUrl { fn parse_path( path: impl AsRef, working_dir: impl AsRef, + ) -> Result { + Self::parse_path_with_hint(path, working_dir, None) + } + + fn parse_path_with_hint( + path: impl AsRef, + working_dir: impl AsRef, + hint: Option, ) -> Result { let verbatim = VerbatimUrl::from_path(&path, &working_dir)?; let verbatim_path = verbatim.as_path()?; let is_dir = if let Ok(metadata) = verbatim_path.metadata() { metadata.is_dir() + } else if DistExtension::from_path(&path).is_ok() { + // If the path has a recognized distribution extension (like `.whl` or `.tar.gz`), + // treat it as a file regardless of the hint. + false + } else if let Some(hint) = hint { + // Use the hint for ambiguous paths (paths with unrecognized extensions like `.bar`). + hint == PathHint::Directory } else { verbatim_path.extension().is_none() }; @@ -112,10 +128,24 @@ impl UnnamedRequirementUrl for VerbatimParsedUrl { } fn parse_absolute_path(path: impl AsRef) -> Result { + Self::parse_absolute_path_with_hint(path, None) + } + + fn parse_absolute_path_with_hint( + path: impl AsRef, + hint: Option, + ) -> Result { let verbatim = VerbatimUrl::from_absolute_path(&path)?; let verbatim_path = verbatim.as_path()?; let is_dir = if let Ok(metadata) = verbatim_path.metadata() { metadata.is_dir() + } else if DistExtension::from_path(&path).is_ok() { + // If the path has a recognized distribution extension (like `.whl` or `.tar.gz`), + // treat it as a file regardless of the hint. + false + } else if let Some(hint) = hint { + // Use the hint for ambiguous paths (paths with unrecognized extensions like `.bar`). + hint == PathHint::Directory } else { verbatim_path.extension().is_none() }; diff --git a/crates/uv-requirements-txt/src/lib.rs b/crates/uv-requirements-txt/src/lib.rs index d48c21386..9df3680a8 100644 --- a/crates/uv-requirements-txt/src/lib.rs +++ b/crates/uv-requirements-txt/src/lib.rs @@ -1643,6 +1643,10 @@ mod test { #[cfg(unix)] #[test_case(Path::new("bare-url.txt"))] #[test_case(Path::new("editable.txt"))] + // Note: `semicolon.txt` contains a syntax error (missing whitespace before `;`), but since + // it's an editable requirement, we parse it as a directory path. The error will occur later + // when the path doesn't exist. + #[test_case(Path::new("semicolon.txt"))] #[tokio::test] async fn parse_unix(path: &Path) { let working_dir = workspace_test_data_dir().join("requirements-txt"); @@ -1662,7 +1666,6 @@ mod test { } #[cfg(unix)] - #[test_case(Path::new("semicolon.txt"))] #[test_case(Path::new("hash.txt"))] #[tokio::test] async fn parse_err(path: &Path) { diff --git a/crates/uv-requirements-txt/src/requirement.rs b/crates/uv-requirements-txt/src/requirement.rs index 849f82327..1780a5ded 100644 --- a/crates/uv-requirements-txt/src/requirement.rs +++ b/crates/uv-requirements-txt/src/requirement.rs @@ -3,7 +3,8 @@ use std::path::Path; use uv_normalize::PackageName; use uv_pep508::{ - Pep508Error, Pep508ErrorSource, RequirementOrigin, TracingReporter, UnnamedRequirement, + PathHint, Pep508Error, Pep508ErrorSource, RequirementOrigin, TracingReporter, + UnnamedRequirement, }; use uv_pypi_types::{ParsedDirectoryUrl, ParsedUrl, VerbatimParsedUrl}; @@ -132,15 +133,26 @@ impl RequirementsTxtRequirement { working_dir: impl AsRef, editable: bool, ) -> Result>> { + // When parsing an editable requirement, hint that the path is a directory. Editable + // requirements always refer to local directories (containing a `pyproject.toml` or + // `setup.py`), so when the path doesn't exist, we should assume it's a directory rather + // than a file with an unrecognized extension (like `foo.bar`). + let hint = if editable { + Some(PathHint::Directory) + } else { + None + }; + // Attempt to parse as a PEP 508-compliant requirement. match uv_pep508::Requirement::parse(input, &working_dir) { Ok(requirement) => { // As a special-case, interpret `dagster` as `./dagster` if we're in editable mode. if editable && requirement.version_or_url.is_none() { - Ok(Self::Unnamed(UnnamedRequirement::parse( + Ok(Self::Unnamed(UnnamedRequirement::parse_with_hint( input, &working_dir, &mut TracingReporter, + hint, )?)) } else { Ok(Self::Named(requirement)) @@ -149,10 +161,11 @@ impl RequirementsTxtRequirement { Err(err) => match err.message { Pep508ErrorSource::UnsupportedRequirement(_) => { // If that fails, attempt to parse as a direct URL requirement. - Ok(Self::Unnamed(UnnamedRequirement::parse( + Ok(Self::Unnamed(UnnamedRequirement::parse_with_hint( input, &working_dir, &mut TracingReporter, + hint, )?)) } _ => Err(err), diff --git a/crates/uv-requirements-txt/src/snapshots/uv_requirements_txt__test__parse-unix-semicolon.txt.snap b/crates/uv-requirements-txt/src/snapshots/uv_requirements_txt__test__parse-unix-semicolon.txt.snap index 310807dad..6c24dba43 100644 --- a/crates/uv-requirements-txt/src/snapshots/uv_requirements_txt__test__parse-unix-semicolon.txt.snap +++ b/crates/uv-requirements-txt/src/snapshots/uv_requirements_txt__test__parse-unix-semicolon.txt.snap @@ -2,21 +2,67 @@ source: crates/uv-requirements-txt/src/lib.rs expression: actual --- -RequirementsTxtFileError { - file: "/semicolon.txt", - error: Pep508 { - source: Pep508Error { - message: UrlError( - MissingExtensionPath( - "./editable;python_version >= \"3.9\" and os_name == \"posix\"", - Dist, - ), +RequirementsTxt { + requirements: [], + constraints: [], + editables: [ + RequirementEntry { + requirement: Unnamed( + UnnamedRequirement { + url: VerbatimParsedUrl { + parsed_url: Directory( + ParsedDirectoryUrl { + url: DisplaySafeUrl { + scheme: "file", + cannot_be_a_base: false, + username: "", + password: None, + host: None, + port: None, + path: "/editable;python_version%20%3E=%20%223.9%22%20and%20os_name%20==%20%22posix%22", + query: None, + fragment: None, + }, + install_path: "/editable;python_version >= \"3.9\" and os_name == \"posix\"", + editable: Some( + true, + ), + virtual: None, + }, + ), + verbatim: VerbatimUrl { + url: DisplaySafeUrl { + scheme: "file", + cannot_be_a_base: false, + username: "", + password: None, + host: None, + port: None, + path: "/editable;python_version%20%3E=%20%223.9%22%20and%20os_name%20==%20%22posix%22", + query: None, + fragment: None, + }, + given: Some( + "./editable;python_version >= \"3.9\" and os_name == \"posix\"", + ), + }, + }, + extras: [], + marker: true, + origin: Some( + File( + "/semicolon.txt", + ), + ), + }, ), - start: 0, - len: 57, - input: "./editable;python_version >= \"3.9\" and os_name == \"posix\"", + hashes: [], }, - start: 50, - end: 107, - }, + ], + index_url: None, + extra_index_urls: [], + find_links: [], + no_index: false, + no_binary: None, + only_binary: None, }