From ce38163bc4a3ae3b2d764d4c7630c78b284411f8 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Thu, 30 Oct 2025 15:16:38 -0400 Subject: [PATCH] Tweaks --- Cargo.lock | 1 - crates/uv-requirements-txt/Cargo.toml | 1 - crates/uv-requirements-txt/src/lib.rs | 390 +++++++++--------- ...test__parse-options-comprehensive.txt.snap | 72 +--- .../options-comprehensive.txt | 25 +- crates/uv/tests/it/pip_install.rs | 30 ++ req.txt | 5 - 7 files changed, 224 insertions(+), 300 deletions(-) delete mode 100644 req.txt diff --git a/Cargo.lock b/Cargo.lock index 9aac5cad3..7709575e7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6397,7 +6397,6 @@ dependencies = [ "indoc", "insta", "itertools 0.14.0", - "memchr", "regex", "reqwest", "reqwest-middleware", diff --git a/crates/uv-requirements-txt/Cargo.toml b/crates/uv-requirements-txt/Cargo.toml index 1d18e9199..648431a15 100644 --- a/crates/uv-requirements-txt/Cargo.toml +++ b/crates/uv-requirements-txt/Cargo.toml @@ -27,7 +27,6 @@ uv-redacted = { workspace = true } uv-warnings = { workspace = true } fs-err = { workspace = true } -memchr = { workspace = true } reqwest = { workspace = true, optional = true } reqwest-middleware = { workspace = true, optional = true } rustc-hash = { workspace = true } diff --git a/crates/uv-requirements-txt/src/lib.rs b/crates/uv-requirements-txt/src/lib.rs index 4a2155a09..f528e53b3 100644 --- a/crates/uv-requirements-txt/src/lib.rs +++ b/crates/uv-requirements-txt/src/lib.rs @@ -592,7 +592,7 @@ fn parse_entry( let start = s.cursor(); Ok(Some(if s.eat_if("-r") || s.eat_if("--requirement") { - let filename = parse_value("--requirement", content, s, requirements_txt)?; + let filename = parse_option("--requirement", content, s, requirements_txt)?; let end = s.cursor(); RequirementsTxtStatement::Requirements { filename, @@ -600,7 +600,7 @@ fn parse_entry( end, } } else if s.eat_if("-c") || s.eat_if("--constraint") { - let filename = parse_value("--constraint", content, s, requirements_txt)?; + let filename = parse_option("--constraint", content, s, requirements_txt)?; let end = s.cursor(); RequirementsTxtStatement::Constraint { filename, @@ -643,7 +643,7 @@ fn parse_entry( hashes, }) } else if s.eat_if("-i") || s.eat_if("--index-url") { - let given = parse_value("--index-url", content, s, requirements_txt)?; + let given = parse_option("--index-url", content, s, requirements_txt)?; let expanded = expand_env_vars(&given); let url = if let Some(path) = std::path::absolute(expanded.as_ref()) .ok() @@ -669,7 +669,7 @@ fn parse_entry( }; RequirementsTxtStatement::IndexUrl(url.with_given(given)) } else if s.eat_if("--extra-index-url") { - let given = parse_value("--extra-index-url", content, s, requirements_txt)?; + let given = parse_option("--extra-index-url", content, s, requirements_txt)?; let expanded = expand_env_vars(&given); let url = if let Some(path) = std::path::absolute(expanded.as_ref()) .ok() @@ -697,7 +697,7 @@ fn parse_entry( } else if s.eat_if("--no-index") { RequirementsTxtStatement::NoIndex } else if s.eat_if("--find-links") || s.eat_if("-f") { - let given = parse_value("--find-links", content, s, requirements_txt)?; + let given = parse_option("--find-links", content, s, requirements_txt)?; let expanded = expand_env_vars(&given); let url = if let Some(path) = std::path::absolute(expanded.as_ref()) .ok() @@ -723,7 +723,7 @@ fn parse_entry( }; RequirementsTxtStatement::FindLinks(url.with_given(given)) } else if s.eat_if("--no-binary") { - let given = parse_value("--no-binary", content, s, requirements_txt)?; + let given = parse_option("--no-binary", content, s, requirements_txt)?; let specifier = PackageNameSpecifier::from_str(&given).map_err(|err| { RequirementsTxtParserError::NoBinary { source: err, @@ -734,7 +734,7 @@ fn parse_entry( })?; RequirementsTxtStatement::NoBinary(NoBinary::from_pip_arg(specifier)) } else if s.eat_if("--only-binary") { - let given = parse_value("--only-binary", content, s, requirements_txt)?; + let given = parse_option("--only-binary", content, s, requirements_txt)?; let specifier = PackageNameSpecifier::from_str(&given).map_err(|err| { RequirementsTxtParserError::NoBinary { source: err, @@ -928,13 +928,13 @@ fn parse_hashes(content: &str, s: &mut Scanner) -> Result, Requireme Ok(hashes) } -/// Parse an option value (for --index-url, --find-links, etc.). +/// Parse an option value (for `--index-url`, `--find-links`, etc.). /// /// This function: -/// - Handles quoting (single/double quotes with POSIX shell escaping) -/// - Consumes and strips markers (` ; ` or `; ` followed by marker expression) -/// - Returns the unquoted, unescaped value -fn parse_value( +/// - Handles quoting (single/double quotes with POSIX shell escaping). +/// - Consumes and strips markers (` ; ` or `; ` followed by marker expression). +/// - Returns the unquoted, unescaped value. +fn parse_option( option: &str, content: &str, s: &mut Scanner, @@ -955,7 +955,179 @@ fn parse_value( }); } - parse_quoted_value_with_markers(option, content, s, requirements_txt) + let start = s.cursor(); + let mut result = String::with_capacity(option.len()); + let mut in_single_quote = false; + let mut in_double_quote = false; + let mut escape_next = false; + let mut marker_start = None; + let mut marker_text = String::new(); + + loop { + let Some(ch) = s.peek() else { + break; + }; + + // Check for terminal characters (always, even when quoted). + if !escape_next && matches!(ch, '\n' | '\r' | '#') { + break; + } + + // Check for marker syntax: ` ; ` or `; `. + if !escape_next && !in_single_quote && !in_double_quote { + if ch == ';' { + // If the next character is whitespace, this is a marker. + if s.after().chars().nth(1).is_some_and(char::is_whitespace) { + marker_start = Some(result.len()); + marker_text.push(s.eat().unwrap()); + + // Consume until we find the closing quote or end of line. + while let Some(c) = s.peek() { + if matches!(c, '\n' | '\r' | '#') { + break; + } + let c = s.eat().unwrap(); + marker_text.push(c); + + // Track quote state to avoid unterminated quote errors. + if !escape_next { + if c == '\'' && !in_double_quote { + in_single_quote = !in_single_quote; + } else if c == '"' && !in_single_quote { + in_double_quote = !in_double_quote; + } else if c == '\\' && !in_single_quote { + escape_next = true; + } + } else { + escape_next = false; + } + } + break; + } + } else if ch == ' ' { + // If the next character is a semicolon, this is a marker. + if s.after().chars().nth(1) == Some(';') { + marker_start = Some(result.len()); + + // Consume until we find the closing quote or end of line. + while let Some(c) = s.peek() { + if matches!(c, '\n' | '\r' | '#') { + break; + } + let c = s.eat().unwrap(); + marker_text.push(c); + + // Track quote state to avoid unterminated quote errors. + if !escape_next { + if c == '\'' && !in_double_quote { + in_single_quote = !in_single_quote; + } else if c == '"' && !in_single_quote { + in_double_quote = !in_double_quote; + } else if c == '\\' && !in_single_quote { + escape_next = true; + } + } else { + escape_next = false; + } + } + break; + } + } + } + + // Consume the character + let ch = s.eat().unwrap(); + + if escape_next { + escape_next = false; + if in_double_quote { + match ch { + // Inside double quotes, only specific characters are escaped. + '"' | '\\' | '$' | '`' => result.push(ch), + // Escaped newline is stripped (continuation). + '\n' => {} + // Unknown escape (preserve backslash and character). + _ => { + result.push('\\'); + result.push(ch); + } + } + } else { + if ch != '\n' { + // Escaped newline is stripped; everything else is a literal. + result.push(ch); + } + } + continue; + } + + match ch { + '\\' if !in_single_quote => { + // Start an escape sequence. + escape_next = true; + } + '\'' if !in_double_quote => { + // Toggle single quotes. + in_single_quote = !in_single_quote; + } + '"' if !in_single_quote => { + // Toggle double quotes. + in_double_quote = !in_double_quote; + } + _ => { + // Regular character + result.push(ch); + } + } + } + + if in_single_quote { + let (line, column) = calculate_row_column(content, start); + return Err(RequirementsTxtParserError::Parser { + message: "Unterminated single quote".to_string(), + line, + column, + }); + } + if in_double_quote { + let (line, column) = calculate_row_column(content, start); + return Err(RequirementsTxtParserError::Parser { + message: "Unterminated double quote".to_string(), + line, + column, + }); + } + + // If we found a marker, truncate the result. + if let Some(trim_at) = marker_start { + result.truncate(trim_at); + + let (line, _) = calculate_row_column(content, start); + let marker_display = marker_text.trim(); + if requirements_txt == Path::new("-") { + uv_warnings::warn_user!( + "Ignoring environment marker on `{option}` in stdin at line {line}: `{marker_display}`" + ); + } else { + uv_warnings::warn_user!( + "Ignoring environment marker on `{option}` in `{path}` at line {line}: `{marker_display}`", + path = requirements_txt.user_display().cyan() + ); + } + } + + let result = result.trim().to_string(); + + if result.is_empty() { + let (line, column) = calculate_row_column(content, s.cursor()); + return Err(RequirementsTxtParserError::Parser { + message: format!("`{option}` must be followed by an argument"), + line, + column, + }); + } + + Ok(result) } /// Parse a hash value (for --hash). @@ -996,198 +1168,6 @@ fn parse_hash_value( Ok(value.to_string()) } -/// Parse a quoted value that may contain markers. -/// -/// This handles: -/// - Quoted values (single/double quotes with POSIX shell escaping) -/// - Markers - consumes the entire line but returns only the part before the marker -fn parse_quoted_value_with_markers( - option: &str, - content: &str, - s: &mut Scanner, - requirements_txt: &Path, -) -> Result { - let start = s.cursor(); - let mut result = String::new(); - let mut in_single_quote = false; - let mut in_double_quote = false; - let mut escape_next = false; - let mut marker_start = None; - let mut marker_text = String::new(); - - loop { - let Some(ch) = s.peek() else { - // End of input - break; - }; - - // Check for terminal characters (always, even when quoted) - if !escape_next && matches!(ch, '\n' | '\r' | '#') { - // Terminal character - stop parsing - break; - } - - // Check for marker syntax: ` ; ` or `; ` - // Only check for markers when NOT inside quotes - if !escape_next && !in_single_quote && !in_double_quote { - if ch == ';' { - let rest = s.after(); - if rest.len() > 1 && rest.chars().nth(1).is_some_and(char::is_whitespace) { - // Found "; " - this is a marker - marker_start = Some(result.len()); - marker_text.push(s.eat().unwrap()); // consume ';' - // Consume until we find the closing quote or end of line - while let Some(c) = s.peek() { - if matches!(c, '\n' | '\r' | '#') { - break; - } - let c = s.eat().unwrap(); - marker_text.push(c); - // Track quote state to avoid unterminated quote errors - if !escape_next { - if c == '\'' && !in_double_quote { - in_single_quote = !in_single_quote; - } else if c == '"' && !in_single_quote { - in_double_quote = !in_double_quote; - } else if c == '\\' && !in_single_quote { - escape_next = true; - } - } else { - escape_next = false; - } - } - break; - } - } else if ch == ' ' { - let rest = s.after(); - if rest[1..].trim_start().starts_with(';') { - // Found " ;" - this is a marker - marker_start = Some(result.len()); - // Consume until we find the closing quote or end of line - while let Some(c) = s.peek() { - if matches!(c, '\n' | '\r' | '#') { - break; - } - let c = s.eat().unwrap(); - marker_text.push(c); - // Track quote state to avoid unterminated quote errors - if !escape_next { - if c == '\'' && !in_double_quote { - in_single_quote = !in_single_quote; - } else if c == '"' && !in_single_quote { - in_double_quote = !in_double_quote; - } else if c == '\\' && !in_single_quote { - escape_next = true; - } - } else { - escape_next = false; - } - } - break; - } - } - } - - // Consume the character - let ch = s.eat().unwrap(); - - if escape_next { - escape_next = false; - if in_double_quote { - // Inside double quotes, only specific characters are escaped - match ch { - '"' | '\\' | '$' | '`' => result.push(ch), - '\n' => { - // Escaped newline is stripped (continuation) - } - _ => { - // Unknown escape - preserve backslash and character - result.push('\\'); - result.push(ch); - } - } - } else { - // Outside quotes - if ch != '\n' { - // Escaped newline is stripped, everything else is literal - result.push(ch); - } - } - continue; - } - - match ch { - '\\' if !in_single_quote => { - // Start escape sequence (not in single quotes) - escape_next = true; - } - '\'' if !in_double_quote => { - // Toggle single quote mode - in_single_quote = !in_single_quote; - } - '"' if !in_single_quote => { - // Toggle double quote mode - in_double_quote = !in_double_quote; - } - _ => { - // Regular character - result.push(ch); - } - } - } - - // Check for unterminated quotes - if in_single_quote { - let (line, column) = calculate_row_column(content, start); - return Err(RequirementsTxtParserError::Parser { - message: "Unterminated single quote".to_string(), - line, - column, - }); - } - if in_double_quote { - let (line, column) = calculate_row_column(content, start); - return Err(RequirementsTxtParserError::Parser { - message: "Unterminated double quote".to_string(), - line, - column, - }); - } - - // If we found a marker, truncate the result there and warn the user - if let Some(trim_at) = marker_start { - result.truncate(trim_at); - - // Warn the user that we're ignoring the marker, showing the marker text - let (line, _) = calculate_row_column(content, start); - let marker_display = marker_text.trim(); - if requirements_txt == Path::new("-") { - uv_warnings::warn_user!( - "Ignoring environment marker on `{option}` in stdin at line {line}: `{marker_display}` (environment markers are not supported for requirements file options)" - ); - } else { - uv_warnings::warn_user!( - "Ignoring environment marker on `{option}` in `{path}` at line {line}: `{marker_display}` (environment markers are not supported for requirements file options)", - path = requirements_txt.user_display().cyan() - ); - } - } - - // Trim trailing whitespace - let result = result.trim_end().to_string(); - - if result.is_empty() { - let (line, column) = calculate_row_column(content, s.cursor()); - return Err(RequirementsTxtParserError::Parser { - message: format!("`{option}` must be followed by an argument"), - line, - column, - }); - } - - Ok(result) -} - /// Fetch the contents of a URL and return them as a string. #[cfg(feature = "http")] async fn read_url_to_string( diff --git a/crates/uv-requirements-txt/src/snapshots/uv_requirements_txt__test__parse-options-comprehensive.txt.snap b/crates/uv-requirements-txt/src/snapshots/uv_requirements_txt__test__parse-options-comprehensive.txt.snap index d52243a4a..7fe6833b1 100644 --- a/crates/uv-requirements-txt/src/snapshots/uv_requirements_txt__test__parse-options-comprehensive.txt.snap +++ b/crates/uv-requirements-txt/src/snapshots/uv_requirements_txt__test__parse-options-comprehensive.txt.snap @@ -3,37 +3,7 @@ source: crates/uv-requirements-txt/src/lib.rs expression: actual --- RequirementsTxt { - requirements: [ - RequirementEntry { - requirement: Named( - Requirement { - name: PackageName( - "requests", - ), - extras: [], - version_or_url: Some( - VersionSpecifier( - VersionSpecifiers( - [ - VersionSpecifier { - operator: GreaterThanEqual, - version: "2.28.0", - }, - ], - ), - ), - ), - marker: true, - origin: Some( - File( - "/options-comprehensive.txt", - ), - ), - }, - ), - hashes: [], - }, - ], + requirements: [], constraints: [], editables: [], index_url: Some( @@ -99,46 +69,6 @@ RequirementsTxt { "https://example.com/simple", ), }, - VerbatimUrl { - url: DisplaySafeUrl { - scheme: "https", - cannot_be_a_base: false, - username: "", - password: None, - host: Some( - Domain( - "example1.com", - ), - ), - port: None, - path: "/simple", - query: None, - fragment: None, - }, - given: Some( - "https://example1.com/simple", - ), - }, - VerbatimUrl { - url: DisplaySafeUrl { - scheme: "https", - cannot_be_a_base: false, - username: "", - password: None, - host: Some( - Domain( - "example2.com", - ), - ), - port: None, - path: "/simple", - query: None, - fragment: None, - }, - given: Some( - "https://example2.com/simple", - ), - }, VerbatimUrl { url: DisplaySafeUrl { scheme: "https", diff --git a/crates/uv-requirements-txt/test-data/requirements-txt/options-comprehensive.txt b/crates/uv-requirements-txt/test-data/requirements-txt/options-comprehensive.txt index 74c75f0f0..d63e2e634 100644 --- a/crates/uv-requirements-txt/test-data/requirements-txt/options-comprehensive.txt +++ b/crates/uv-requirements-txt/test-data/requirements-txt/options-comprehensive.txt @@ -1,34 +1,25 @@ -# Comprehensive test for option parsing with various combinations - -# 1. Options with markers (space before semicolon) +# Options with markers (space before semicolon). --index-url https://pypi.org/simple/ ; python_version > "3.8" -# 2. Options with markers (no space before semicolon) +# Options with markers (no space before semicolon). --extra-index-url=https://download.pytorch.org/whl/cpu; sys_platform == "linux" -# 3. Options with quoted values containing spaces +# Options with quoted values containing spaces. --find-links "file:///path/with spaces/packages" -# 4. Options with quoted values and markers +# Options with quoted values and markers. --find-links="https://example.com/simple" ; python_version >= "3.9" -# 5. Options with escaped quotes +# Options with escaped quotes. --extra-index-url='https://example.com/simple' -# 6. Options with equals sign vs space separator +# Options with equals sign vs. space separator. --no-binary :all: --only-binary=numpy -# 7. Options with markers on --no-binary and --only-binary (NOTE: semicolon with quotes can be confusing) +# Options with markers on `--no-binary` and `--only-binary`. --no-binary :all:; sys_platform == "win32" --only-binary scipy; python_version < "3.10" -# 8. Multiple options on separate lines ---extra-index-url https://example1.com/simple ---extra-index-url https://example2.com/simple - -# 9. Options with comments after them (not markers) +# Options with comments after them (not markers). --extra-index-url https://test.pypi.org/simple/ # Test PyPI - -# Actual requirements -requests>=2.28.0 diff --git a/crates/uv/tests/it/pip_install.rs b/crates/uv/tests/it/pip_install.rs index 825de07a0..e5795108a 100644 --- a/crates/uv/tests/it/pip_install.rs +++ b/crates/uv/tests/it/pip_install.rs @@ -717,6 +717,36 @@ fn install_unsupported_flag() -> Result<()> { Ok(()) } +#[test] +fn install_option_with_marker() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str(indoc! {r#" + --index-url="https://pypi.org/simple" ; python_version >= "3.9" + iniconfig + "#})?; + + uv_snapshot!(context.filters(), context.pip_install() + .arg("-r") + .arg("requirements.txt") + .arg("--strict"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + warning: Ignoring environment marker on `--index-url` in `requirements.txt` at line 1: `; python_version >= "3.9"` + Resolved 1 package in [TIME] + Prepared 1 package in [TIME] + Installed 1 package in [TIME] + + iniconfig==2.0.0 + "### + ); + + Ok(()) +} + /// Install a requirements file with pins that conflict /// /// This is likely to occur in the real world when compiled on one platform then installed on another. diff --git a/req.txt b/req.txt deleted file mode 100644 index 8a2d59f88..000000000 --- a/req.txt +++ /dev/null @@ -1,5 +0,0 @@ ---extra-index-url="https://download.pytorch.org/whl/cu129; sys_platform != 'darwin'" -torch==2.9.0 ; sys_platform != 'darwin' -torch ; sys_platform == 'darwin' -torchvision -torchaudio