mirror of https://github.com/astral-sh/uv
Merge commit from fork
* feat: reject ZIP archives with improbable filenames Signed-off-by: William Woodruff <william@astral.sh> * use my PR for async_zip temporarily Signed-off-by: William Woodruff <william@astral.sh> * update snapshot Signed-off-by: William Woodruff <william@astral.sh> * two more tests Signed-off-by: William Woodruff <william@astral.sh> * update rev Signed-off-by: William Woodruff <william@astral.sh> --------- Signed-off-by: William Woodruff <william@astral.sh>
This commit is contained in:
parent
41cd3d1926
commit
da659fee48
|
|
@ -299,7 +299,7 @@ dependencies = [
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "async_zip"
|
name = "async_zip"
|
||||||
version = "0.0.17"
|
version = "0.0.17"
|
||||||
source = "git+https://github.com/astral-sh/rs-async-zip?rev=285e48742b74ab109887d62e1ae79e7c15fd4878#285e48742b74ab109887d62e1ae79e7c15fd4878"
|
source = "git+https://github.com/astral-sh/rs-async-zip?rev=f6a41d32866003c868d03ed791a89c794f61b703#f6a41d32866003c868d03ed791a89c794f61b703"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-compression",
|
"async-compression",
|
||||||
"crc32fast",
|
"crc32fast",
|
||||||
|
|
@ -5867,6 +5867,7 @@ dependencies = [
|
||||||
"futures",
|
"futures",
|
||||||
"md-5",
|
"md-5",
|
||||||
"rayon",
|
"rayon",
|
||||||
|
"regex",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
"rustc-hash",
|
"rustc-hash",
|
||||||
"sha2",
|
"sha2",
|
||||||
|
|
|
||||||
|
|
@ -88,7 +88,7 @@ async-channel = { version = "2.3.1" }
|
||||||
async-compression = { version = "0.4.12", features = ["bzip2", "gzip", "xz", "zstd"] }
|
async-compression = { version = "0.4.12", features = ["bzip2", "gzip", "xz", "zstd"] }
|
||||||
async-trait = { version = "0.1.82" }
|
async-trait = { version = "0.1.82" }
|
||||||
async_http_range_reader = { version = "0.9.1" }
|
async_http_range_reader = { version = "0.9.1" }
|
||||||
async_zip = { git = "https://github.com/astral-sh/rs-async-zip", rev = "285e48742b74ab109887d62e1ae79e7c15fd4878", features = ["bzip2", "deflate", "lzma", "tokio", "xz", "zstd"] }
|
async_zip = { git = "https://github.com/astral-sh/rs-async-zip", rev = "f6a41d32866003c868d03ed791a89c794f61b703", features = ["bzip2", "deflate", "lzma", "tokio", "xz", "zstd"] }
|
||||||
axoupdater = { version = "0.9.0", default-features = false }
|
axoupdater = { version = "0.9.0", default-features = false }
|
||||||
backon = { version = "1.3.0" }
|
backon = { version = "1.3.0" }
|
||||||
base64 = { version = "0.22.1" }
|
base64 = { version = "0.22.1" }
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,7 @@ fs-err = { workspace = true, features = ["tokio"] }
|
||||||
futures = { workspace = true }
|
futures = { workspace = true }
|
||||||
md-5 = { workspace = true }
|
md-5 = { workspace = true }
|
||||||
rayon = { workspace = true }
|
rayon = { workspace = true }
|
||||||
|
regex = { workspace = true }
|
||||||
reqwest = { workspace = true }
|
reqwest = { workspace = true }
|
||||||
rustc-hash = { workspace = true }
|
rustc-hash = { workspace = true }
|
||||||
sha2 = { workspace = true }
|
sha2 = { workspace = true }
|
||||||
|
|
|
||||||
|
|
@ -89,6 +89,10 @@ pub enum Error {
|
||||||
ExtensibleData,
|
ExtensibleData,
|
||||||
#[error("ZIP file end-of-central-directory record contains multiple entries with the same path, but conflicting modes: {}", path.display())]
|
#[error("ZIP file end-of-central-directory record contains multiple entries with the same path, but conflicting modes: {}", path.display())]
|
||||||
DuplicateExecutableFileHeader { path: PathBuf },
|
DuplicateExecutableFileHeader { path: PathBuf },
|
||||||
|
#[error("Archive contains a file with an empty filename")]
|
||||||
|
EmptyFilename,
|
||||||
|
#[error("Archive contains unacceptable filename: {filename}")]
|
||||||
|
UnacceptableFilename { filename: String },
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Error {
|
impl Error {
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,101 @@
|
||||||
|
use std::sync::LazyLock;
|
||||||
|
|
||||||
pub use error::Error;
|
pub use error::Error;
|
||||||
|
use regex::Regex;
|
||||||
pub use sync::*;
|
pub use sync::*;
|
||||||
|
use uv_static::EnvVars;
|
||||||
|
|
||||||
mod error;
|
mod error;
|
||||||
pub mod hash;
|
pub mod hash;
|
||||||
pub mod stream;
|
pub mod stream;
|
||||||
mod sync;
|
mod sync;
|
||||||
mod vendor;
|
mod vendor;
|
||||||
|
|
||||||
|
static CONTROL_CHARACTERS_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\p{C}").unwrap());
|
||||||
|
static REPLACEMENT_CHARACTER: &'static str = "\u{FFFD}";
|
||||||
|
|
||||||
|
/// Validate that a given filename (e.g. reported by a ZIP archive's
|
||||||
|
/// local file entries or central directory entries) is "safe" to use.
|
||||||
|
///
|
||||||
|
/// "Safe" in this context doesn't refer to directory traversal
|
||||||
|
/// risk, but whether we believe that other ZIP implementations
|
||||||
|
/// handle the name correctly and consistently.
|
||||||
|
///
|
||||||
|
/// Specifically, we want to avoid names that:
|
||||||
|
///
|
||||||
|
/// - Contain *any* non-printable characters
|
||||||
|
/// - Are empty
|
||||||
|
///
|
||||||
|
/// In the future, we may also want to check for names that contain
|
||||||
|
/// leading/trailing whitespace, or names that are exceedingly long.
|
||||||
|
pub(crate) fn validate_archive_member_name(name: &str) -> Result<(), Error> {
|
||||||
|
if name.is_empty() {
|
||||||
|
return Err(Error::EmptyFilename);
|
||||||
|
}
|
||||||
|
|
||||||
|
match CONTROL_CHARACTERS_RE.replace_all(name, REPLACEMENT_CHARACTER) {
|
||||||
|
// No replacements mean no control characters.
|
||||||
|
std::borrow::Cow::Borrowed(_) => Ok(()),
|
||||||
|
std::borrow::Cow::Owned(sanitized) => Err(Error::UnacceptableFilename {
|
||||||
|
filename: sanitized,
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns `true` if ZIP validation is disabled.
|
||||||
|
pub(crate) fn insecure_no_validate() -> bool {
|
||||||
|
// TODO(charlie) Parse this in `EnvironmentOptions`.
|
||||||
|
let Some(value) = std::env::var_os(EnvVars::UV_INSECURE_NO_ZIP_VALIDATION) else {
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
let Some(value) = value.to_str() else {
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
matches!(
|
||||||
|
value.to_lowercase().as_str(),
|
||||||
|
"y" | "yes" | "t" | "true" | "on" | "1"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
#[test]
|
||||||
|
fn test_validate_archive_member_name() {
|
||||||
|
for (testcase, ok) in &[
|
||||||
|
// Valid cases.
|
||||||
|
("normal.txt", true),
|
||||||
|
("__init__.py", true),
|
||||||
|
("fine i guess.py", true),
|
||||||
|
("🌈.py", true),
|
||||||
|
// Invalid cases.
|
||||||
|
("", false),
|
||||||
|
("new\nline.py", false),
|
||||||
|
("carriage\rreturn.py", false),
|
||||||
|
("tab\tcharacter.py", false),
|
||||||
|
("null\0byte.py", false),
|
||||||
|
("control\x01code.py", false),
|
||||||
|
("control\x02code.py", false),
|
||||||
|
("control\x03code.py", false),
|
||||||
|
("control\x04code.py", false),
|
||||||
|
("backspace\x08code.py", false),
|
||||||
|
("delete\x7fcode.py", false),
|
||||||
|
] {
|
||||||
|
assert_eq!(
|
||||||
|
super::validate_archive_member_name(testcase).is_ok(),
|
||||||
|
*ok,
|
||||||
|
"testcase: {testcase}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_unacceptable_filename_error_replaces_control_characters() {
|
||||||
|
let err = super::validate_archive_member_name("bad\nname").unwrap_err();
|
||||||
|
match err {
|
||||||
|
super::Error::UnacceptableFilename { filename } => {
|
||||||
|
assert_eq!(filename, "bad<EFBFBD>name");
|
||||||
|
}
|
||||||
|
_ => panic!("expected UnacceptableFilename error"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -9,9 +9,8 @@ use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt};
|
||||||
use tracing::{debug, warn};
|
use tracing::{debug, warn};
|
||||||
|
|
||||||
use uv_distribution_filename::SourceDistExtension;
|
use uv_distribution_filename::SourceDistExtension;
|
||||||
use uv_static::EnvVars;
|
|
||||||
|
|
||||||
use crate::Error;
|
use crate::{Error, insecure_no_validate, validate_archive_member_name};
|
||||||
|
|
||||||
const DEFAULT_BUF_SIZE: usize = 128 * 1024;
|
const DEFAULT_BUF_SIZE: usize = 128 * 1024;
|
||||||
|
|
||||||
|
|
@ -39,21 +38,6 @@ struct ComputedEntry {
|
||||||
compressed_size: u64,
|
compressed_size: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns `true` if ZIP validation is disabled.
|
|
||||||
fn insecure_no_validate() -> bool {
|
|
||||||
// TODO(charlie) Parse this in `EnvironmentOptions`.
|
|
||||||
let Some(value) = std::env::var_os(EnvVars::UV_INSECURE_NO_ZIP_VALIDATION) else {
|
|
||||||
return false;
|
|
||||||
};
|
|
||||||
let Some(value) = value.to_str() else {
|
|
||||||
return false;
|
|
||||||
};
|
|
||||||
matches!(
|
|
||||||
value.to_lowercase().as_str(),
|
|
||||||
"y" | "yes" | "t" | "true" | "on" | "1"
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Unpack a `.zip` archive into the target directory, without requiring `Seek`.
|
/// Unpack a `.zip` archive into the target directory, without requiring `Seek`.
|
||||||
///
|
///
|
||||||
/// This is useful for unzipping files as they're being downloaded. If the archive
|
/// This is useful for unzipping files as they're being downloaded. If the archive
|
||||||
|
|
@ -102,6 +86,13 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
|
||||||
Err(err) => return Err(err.into()),
|
Err(err) => return Err(err.into()),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Apply sanity checks to the file names in local headers.
|
||||||
|
if let Err(e) = validate_archive_member_name(path) {
|
||||||
|
if !skip_validation {
|
||||||
|
return Err(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Sanitize the file name to prevent directory traversal attacks.
|
// Sanitize the file name to prevent directory traversal attacks.
|
||||||
let Some(relpath) = enclosed_name(path) else {
|
let Some(relpath) = enclosed_name(path) else {
|
||||||
warn!("Skipping unsafe file name: {path}");
|
warn!("Skipping unsafe file name: {path}");
|
||||||
|
|
@ -373,6 +364,13 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
|
||||||
Err(err) => return Err(err.into()),
|
Err(err) => return Err(err.into()),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Apply sanity checks to the file names in CD headers.
|
||||||
|
if let Err(e) = validate_archive_member_name(path) {
|
||||||
|
if !skip_validation {
|
||||||
|
return Err(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Sanitize the file name to prevent directory traversal attacks.
|
// Sanitize the file name to prevent directory traversal attacks.
|
||||||
let Some(relpath) = enclosed_name(path) else {
|
let Some(relpath) = enclosed_name(path) else {
|
||||||
continue;
|
continue;
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::sync::{LazyLock, Mutex};
|
use std::sync::{LazyLock, Mutex};
|
||||||
|
|
||||||
use crate::Error;
|
|
||||||
use crate::vendor::{CloneableSeekableReader, HasLength};
|
use crate::vendor::{CloneableSeekableReader, HasLength};
|
||||||
|
use crate::{Error, insecure_no_validate, validate_archive_member_name};
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
use rustc_hash::FxHashSet;
|
use rustc_hash::FxHashSet;
|
||||||
use tracing::warn;
|
use tracing::warn;
|
||||||
|
|
@ -18,6 +18,7 @@ pub fn unzip<R: Send + std::io::Read + std::io::Seek + HasLength>(
|
||||||
let reader = std::io::BufReader::new(reader);
|
let reader = std::io::BufReader::new(reader);
|
||||||
let archive = ZipArchive::new(CloneableSeekableReader::new(reader))?;
|
let archive = ZipArchive::new(CloneableSeekableReader::new(reader))?;
|
||||||
let directories = Mutex::new(FxHashSet::default());
|
let directories = Mutex::new(FxHashSet::default());
|
||||||
|
let skip_validation = insecure_no_validate();
|
||||||
// Initialize the threadpool with the user settings.
|
// Initialize the threadpool with the user settings.
|
||||||
LazyLock::force(&RAYON_INITIALIZE);
|
LazyLock::force(&RAYON_INITIALIZE);
|
||||||
(0..archive.len())
|
(0..archive.len())
|
||||||
|
|
@ -26,6 +27,12 @@ pub fn unzip<R: Send + std::io::Read + std::io::Seek + HasLength>(
|
||||||
let mut archive = archive.clone();
|
let mut archive = archive.clone();
|
||||||
let mut file = archive.by_index(file_number)?;
|
let mut file = archive.by_index(file_number)?;
|
||||||
|
|
||||||
|
if let Err(e) = validate_archive_member_name(file.name()) {
|
||||||
|
if !skip_validation {
|
||||||
|
return Err(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Determine the path of the file within the wheel.
|
// Determine the path of the file within the wheel.
|
||||||
let Some(enclosed_name) = file.enclosed_name() else {
|
let Some(enclosed_name) = file.enclosed_name() else {
|
||||||
warn!("Skipping unsafe file name: {}", file.name());
|
warn!("Skipping unsafe file name: {}", file.name());
|
||||||
|
|
|
||||||
|
|
@ -12189,6 +12189,25 @@ fn config_settings_package() -> Result<()> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn reject_invalid_archive_member_names() {
|
||||||
|
let context = TestContext::new("3.12").with_exclude_newer("2025-10-07T00:00:00Z");
|
||||||
|
|
||||||
|
uv_snapshot!(context.filters(), context.pip_install()
|
||||||
|
.arg("cbwheeldiff2==0.0.1"), @r"
|
||||||
|
success: false
|
||||||
|
exit_code: 1
|
||||||
|
----- stdout -----
|
||||||
|
|
||||||
|
----- stderr -----
|
||||||
|
Resolved 1 package in [TIME]
|
||||||
|
× Failed to download `cbwheeldiff2==0.0.1`
|
||||||
|
├─▶ Failed to extract archive: cbwheeldiff2-0.0.1-py2.py3-none-any.whl
|
||||||
|
╰─▶ Archive contains unacceptable filename: cbwheeldiff2-0.0.1.dist-info/RECORD<EFBFBD>
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn reject_invalid_streaming_zip() {
|
fn reject_invalid_streaming_zip() {
|
||||||
let context = TestContext::new("3.12").with_exclude_newer("2025-07-10T00:00:00Z");
|
let context = TestContext::new("3.12").with_exclude_newer("2025-07-10T00:00:00Z");
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue