//! Parses a subset of requirement.txt syntax
//!
//!
//!
//! Supported:
//! * [PEP 508 requirements](https://packaging.python.org/en/latest/specifications/dependency-specifiers/)
//! * `-r`
//! * `-c`
//! * `--hash` (postfix)
//! * `-e`
//!
//! Unsupported:
//! * ``. TBD
//! * ``. TBD
//! * Options without a requirement, such as `--find-links` or `--index-url`
//!
//! Grammar as implemented:
//!
//! ```text
//! file = (statement | empty ('#' any*)? '\n')*
//! empty = whitespace*
//! statement = constraint_include | requirements_include | editable_requirement | requirement
//! constraint_include = '-c' ('=' | wrappable_whitespaces) filepath
//! requirements_include = '-r' ('=' | wrappable_whitespaces) filepath
//! editable_requirement = '-e' ('=' | wrappable_whitespaces) requirement
//! # We check whether the line starts with a letter or a number, in that case we assume it's a
//! # PEP 508 requirement
//! # https://packaging.python.org/en/latest/specifications/name-normalization/#valid-non-normalized-names
//! # This does not (yet?) support plain files or urls, we use a letter or a number as first
//! # character to assume a PEP 508 requirement
//! requirement = [a-zA-Z0-9] pep508_grammar_tail wrappable_whitespaces hashes
//! hashes = ('--hash' ('=' | wrappable_whitespaces) [a-zA-Z0-9-_]+ ':' [a-zA-Z0-9-_] wrappable_whitespaces+)*
//! # This should indicate a single backslash before a newline
//! wrappable_whitespaces = whitespace ('\\\n' | whitespace)*
//! ```
use std::borrow::Cow;
use std::fmt::{Display, Formatter};
use std::io;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use rustc_hash::FxHashSet;
use tracing::instrument;
use unscanny::Scanner;
use url::Url;
#[cfg(feature = "http")]
use uv_client::BaseClient;
use uv_client::BaseClientBuilder;
use uv_configuration::{NoBinary, NoBuild, PackageNameSpecifier};
use uv_distribution_types::{
Requirement, UnresolvedRequirement, UnresolvedRequirementSpecification,
};
use uv_fs::Simplified;
use uv_pep508::{Pep508Error, RequirementOrigin, VerbatimUrl, expand_env_vars};
use uv_pypi_types::VerbatimParsedUrl;
#[cfg(feature = "http")]
use uv_redacted::DisplaySafeUrl;
use crate::requirement::EditableError;
pub use crate::requirement::RequirementsTxtRequirement;
mod requirement;
/// We emit one of those for each `requirements.txt` entry.
enum RequirementsTxtStatement {
/// `-r` inclusion filename
Requirements {
filename: String,
start: usize,
end: usize,
},
/// `-c` inclusion filename
Constraint {
filename: String,
start: usize,
end: usize,
},
/// PEP 508 requirement plus metadata
RequirementEntry(RequirementEntry),
/// `-e`
EditableRequirementEntry(RequirementEntry),
/// `--index-url`
IndexUrl(VerbatimUrl),
/// `--extra-index-url`
ExtraIndexUrl(VerbatimUrl),
/// `--find-links`
FindLinks(VerbatimUrl),
/// `--no-index`
NoIndex,
/// `--no-binary`
NoBinary(NoBinary),
/// `--only-binary`
OnlyBinary(NoBuild),
/// An unsupported option (e.g., `--trusted-host`).
UnsupportedOption(UnsupportedOption),
}
/// A [Requirement] with additional metadata from the `requirements.txt`, currently only hashes but in
/// the future also editable and similar information.
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct RequirementEntry {
/// The actual PEP 508 requirement.
pub requirement: RequirementsTxtRequirement,
/// Hashes of the downloadable packages.
pub hashes: Vec,
}
// We place the impl here instead of next to `UnresolvedRequirementSpecification` because
// `UnresolvedRequirementSpecification` is defined in `distribution-types` and `requirements-txt`
// depends on `distribution-types`.
impl From for UnresolvedRequirementSpecification {
fn from(value: RequirementEntry) -> Self {
Self {
requirement: match value.requirement {
RequirementsTxtRequirement::Named(named) => {
UnresolvedRequirement::Named(Requirement::from(named))
}
RequirementsTxtRequirement::Unnamed(unnamed) => {
UnresolvedRequirement::Unnamed(unnamed)
}
},
hashes: value.hashes,
}
}
}
impl From for UnresolvedRequirementSpecification {
fn from(value: RequirementsTxtRequirement) -> Self {
Self::from(RequirementEntry {
requirement: value,
hashes: vec![],
})
}
}
/// Parsed and flattened requirements.txt with requirements and constraints
#[derive(Debug, Default, Clone, PartialEq, Eq)]
pub struct RequirementsTxt {
/// The actual requirements with the hashes.
pub requirements: Vec,
/// Constraints included with `-c`.
pub constraints: Vec>,
/// Editables with `-e`.
pub editables: Vec,
/// The index URL, specified with `--index-url`.
pub index_url: Option,
/// The extra index URLs, specified with `--extra-index-url`.
pub extra_index_urls: Vec,
/// The find links locations, specified with `--find-links`.
pub find_links: Vec,
/// Whether to ignore the index, specified with `--no-index`.
pub no_index: bool,
/// Whether to disallow wheels, specified with `--no-binary`.
pub no_binary: NoBinary,
/// Whether to allow only wheels, specified with `--only-binary`.
pub only_binary: NoBuild,
}
impl RequirementsTxt {
/// See module level documentation
#[instrument(
skip_all,
fields(requirements_txt = requirements_txt.as_ref().as_os_str().to_str())
)]
pub async fn parse(
requirements_txt: impl AsRef,
working_dir: impl AsRef,
client_builder: &BaseClientBuilder<'_>,
) -> Result {
let mut visited = VisitedFiles::Requirements {
requirements: &mut FxHashSet::default(),
constraints: &mut FxHashSet::default(),
};
Self::parse_impl(requirements_txt, working_dir, client_builder, &mut visited).await
}
/// See module level documentation
#[instrument(
skip_all,
fields(requirements_txt = requirements_txt.as_ref().as_os_str().to_str())
)]
async fn parse_impl(
requirements_txt: impl AsRef,
working_dir: impl AsRef,
client_builder: &BaseClientBuilder<'_>,
visited: &mut VisitedFiles<'_>,
) -> Result {
let requirements_txt = requirements_txt.as_ref();
let working_dir = working_dir.as_ref();
let content =
if requirements_txt.starts_with("http://") | requirements_txt.starts_with("https://") {
#[cfg(not(feature = "http"))]
{
return Err(RequirementsTxtFileError {
file: requirements_txt.to_path_buf(),
error: RequirementsTxtParserError::Io(io::Error::new(
io::ErrorKind::InvalidInput,
"Remote file not supported without `http` feature",
)),
});
}
#[cfg(feature = "http")]
{
// Avoid constructing a client if network is disabled already
if client_builder.is_offline() {
return Err(RequirementsTxtFileError {
file: requirements_txt.to_path_buf(),
error: RequirementsTxtParserError::Io(io::Error::new(
io::ErrorKind::InvalidInput,
format!("Network connectivity is disabled, but a remote requirements file was requested: {}", requirements_txt.display()),
)),
});
}
let client = client_builder.build();
read_url_to_string(&requirements_txt, client).await
}
} else {
// Ex) `file:///home/ferris/project/requirements.txt`
uv_fs::read_to_string_transcode(&requirements_txt)
.await
.map_err(RequirementsTxtParserError::Io)
}
.map_err(|err| RequirementsTxtFileError {
file: requirements_txt.to_path_buf(),
error: err,
})?;
let requirements_dir = requirements_txt.parent().unwrap_or(working_dir);
let data = Self::parse_inner(
&content,
working_dir,
requirements_dir,
client_builder,
requirements_txt,
visited,
)
.await
.map_err(|err| RequirementsTxtFileError {
file: requirements_txt.to_path_buf(),
error: err,
})?;
Ok(data)
}
/// See module level documentation.
///
/// When parsing, relative paths to requirements (e.g., `-e ../editable/`) are resolved against
/// the current working directory. However, relative paths to sub-files (e.g., `-r ../requirements.txt`)
/// are resolved against the directory of the containing `requirements.txt` file, to match
/// `pip`'s behavior.
async fn parse_inner(
content: &str,
working_dir: &Path,
requirements_dir: &Path,
client_builder: &BaseClientBuilder<'_>,
requirements_txt: &Path,
visited: &mut VisitedFiles<'_>,
) -> Result {
let mut s = Scanner::new(content);
let mut data = Self::default();
while let Some(statement) = parse_entry(&mut s, content, working_dir, requirements_txt)? {
match statement {
RequirementsTxtStatement::Requirements {
filename,
start,
end,
} => {
let filename = expand_env_vars(&filename);
let sub_file =
if filename.starts_with("http://") || filename.starts_with("https://") {
PathBuf::from(filename.as_ref())
} else if filename.starts_with("file://") {
requirements_txt.join(
Url::parse(filename.as_ref())
.map_err(|err| RequirementsTxtParserError::Url {
source: err,
url: filename.to_string(),
start,
end,
})?
.to_file_path()
.map_err(|()| RequirementsTxtParserError::FileUrl {
url: filename.to_string(),
start,
end,
})?,
)
} else {
requirements_dir.join(filename.as_ref())
};
match visited {
VisitedFiles::Requirements { requirements, .. } => {
if !requirements.insert(sub_file.clone()) {
continue;
}
}
// Treat any nested requirements or constraints as constraints. This differs
// from `pip`, which seems to treat `-r` requirements in constraints files as
// _requirements_, but we don't want to support that.
VisitedFiles::Constraints { constraints } => {
if !constraints.insert(sub_file.clone()) {
continue;
}
}
}
let sub_requirements = Box::pin(Self::parse_impl(
&sub_file,
working_dir,
client_builder,
visited,
))
.await
.map_err(|err| RequirementsTxtParserError::Subfile {
source: Box::new(err),
start,
end,
})?;
// Disallow conflicting `--index-url` in nested `requirements` files.
if sub_requirements.index_url.is_some()
&& data.index_url.is_some()
&& sub_requirements.index_url != data.index_url
{
let (line, column) = calculate_row_column(content, s.cursor());
return Err(RequirementsTxtParserError::Parser {
message:
"Nested `requirements` file contains conflicting `--index-url`"
.to_string(),
line,
column,
});
}
// Add each to the correct category.
data.update_from(sub_requirements);
}
RequirementsTxtStatement::Constraint {
filename,
start,
end,
} => {
let filename = expand_env_vars(&filename);
let sub_file =
if filename.starts_with("http://") || filename.starts_with("https://") {
PathBuf::from(filename.as_ref())
} else if filename.starts_with("file://") {
requirements_txt.join(
Url::parse(filename.as_ref())
.map_err(|err| RequirementsTxtParserError::Url {
source: err,
url: filename.to_string(),
start,
end,
})?
.to_file_path()
.map_err(|()| RequirementsTxtParserError::FileUrl {
url: filename.to_string(),
start,
end,
})?,
)
} else {
requirements_dir.join(filename.as_ref())
};
// Switch to constraints mode, if we aren't in it already.
let mut visited = match visited {
VisitedFiles::Requirements { constraints, .. } => {
if !constraints.insert(sub_file.clone()) {
continue;
}
VisitedFiles::Constraints { constraints }
}
VisitedFiles::Constraints { constraints } => {
if !constraints.insert(sub_file.clone()) {
continue;
}
VisitedFiles::Constraints { constraints }
}
};
let sub_constraints = Box::pin(Self::parse_impl(
&sub_file,
working_dir,
client_builder,
&mut visited,
))
.await
.map_err(|err| RequirementsTxtParserError::Subfile {
source: Box::new(err),
start,
end,
})?;
// Treat any nested requirements or constraints as constraints. This differs
// from `pip`, which seems to treat `-r` requirements in constraints files as
// _requirements_, but we don't want to support that.
for entry in sub_constraints.requirements {
match entry.requirement {
RequirementsTxtRequirement::Named(requirement) => {
data.constraints.push(requirement);
}
RequirementsTxtRequirement::Unnamed(_) => {
return Err(RequirementsTxtParserError::UnnamedConstraint {
start,
end,
});
}
}
}
for constraint in sub_constraints.constraints {
data.constraints.push(constraint);
}
}
RequirementsTxtStatement::RequirementEntry(requirement_entry) => {
data.requirements.push(requirement_entry);
}
RequirementsTxtStatement::EditableRequirementEntry(editable) => {
data.editables.push(editable);
}
RequirementsTxtStatement::IndexUrl(url) => {
if data.index_url.is_some() {
let (line, column) = calculate_row_column(content, s.cursor());
return Err(RequirementsTxtParserError::Parser {
message: "Multiple `--index-url` values provided".to_string(),
line,
column,
});
}
data.index_url = Some(url);
}
RequirementsTxtStatement::ExtraIndexUrl(url) => {
data.extra_index_urls.push(url);
}
RequirementsTxtStatement::FindLinks(url) => {
data.find_links.push(url);
}
RequirementsTxtStatement::NoIndex => {
data.no_index = true;
}
RequirementsTxtStatement::NoBinary(no_binary) => {
data.no_binary.extend(no_binary);
}
RequirementsTxtStatement::OnlyBinary(only_binary) => {
data.only_binary.extend(only_binary);
}
RequirementsTxtStatement::UnsupportedOption(flag) => {
if requirements_txt == Path::new("-") {
if flag.cli() {
uv_warnings::warn_user!(
"Ignoring unsupported option from stdin: `{flag}` (hint: pass `{flag}` on the command line instead)",
flag = flag.green()
);
} else {
uv_warnings::warn_user!(
"Ignoring unsupported option from stdin: `{flag}`",
flag = flag.green()
);
}
} else {
if flag.cli() {
uv_warnings::warn_user!(
"Ignoring unsupported option in `{path}`: `{flag}` (hint: pass `{flag}` on the command line instead)",
path = requirements_txt.user_display().cyan(),
flag = flag.green()
);
} else {
uv_warnings::warn_user!(
"Ignoring unsupported option in `{path}`: `{flag}`",
path = requirements_txt.user_display().cyan(),
flag = flag.green()
);
}
}
}
}
}
Ok(data)
}
/// Merge the data from a nested `requirements` file (`other`) into this one.
pub fn update_from(&mut self, other: Self) {
let Self {
requirements,
constraints,
editables,
index_url,
extra_index_urls,
find_links,
no_index,
no_binary,
only_binary,
} = other;
self.requirements.extend(requirements);
self.constraints.extend(constraints);
self.editables.extend(editables);
if self.index_url.is_none() {
self.index_url = index_url;
}
self.extra_index_urls.extend(extra_index_urls);
self.find_links.extend(find_links);
self.no_index = self.no_index || no_index;
self.no_binary.extend(no_binary);
self.only_binary.extend(only_binary);
}
}
/// An unsupported option (e.g., `--trusted-host`).
///
/// See:
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum UnsupportedOption {
PreferBinary,
RequireHashes,
Pre,
TrustedHost,
UseFeature,
}
impl UnsupportedOption {
/// The name of the unsupported option.
fn name(self) -> &'static str {
match self {
Self::PreferBinary => "--prefer-binary",
Self::RequireHashes => "--require-hashes",
Self::Pre => "--pre",
Self::TrustedHost => "--trusted-host",
Self::UseFeature => "--use-feature",
}
}
/// Returns `true` if the option is supported on the CLI.
fn cli(self) -> bool {
match self {
Self::PreferBinary => false,
Self::RequireHashes => true,
Self::Pre => true,
Self::TrustedHost => true,
Self::UseFeature => false,
}
}
/// Returns an iterator over all unsupported options.
fn iter() -> impl Iterator- {
[
Self::PreferBinary,
Self::RequireHashes,
Self::Pre,
Self::TrustedHost,
Self::UseFeature,
]
.iter()
.copied()
}
}
impl Display for UnsupportedOption {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.name())
}
}
/// Returns `true` if the character is a newline or a comment character.
const fn is_terminal(c: char) -> bool {
matches!(c, '\n' | '\r' | '#')
}
/// Parse a single entry, that is a requirement, an inclusion or a comment line.
///
/// Consumes all preceding trivia (whitespace and comments). If it returns `None`, we've reached
/// the end of file.
fn parse_entry(
s: &mut Scanner,
content: &str,
working_dir: &Path,
requirements_txt: &Path,
) -> Result