From 4d34b286574a2230b5127a2b58e5f8c1708a0522 Mon Sep 17 00:00:00 2001 From: konsti Date: Wed, 16 Apr 2025 09:27:46 +0200 Subject: [PATCH] Set 4MB stack size for all threads, introduce `UV_STACK_SIZE` (#12839) See #12769 for the motivation. We set the 4MB not only for the main thread, but also for all tokio and rayon threads to fix a stack overflow while unpacking wheels in production on Windows. There are two variables for setting the stack size: A new `UV_STACK_SIZE` that takes precedent, and the existing `RUST_MIN_STACK`. When setting the stack size, `UV_STACK_SIZE` should be preferred, since `RUST_MIN_STACK` affects all Rust applications, including build backends we call (e.g., maturin). The minimum stack size is set to 1MB, the lowest stack size we observed on a platform (Windows main thread). Fixes #12769 ## Test Plan Tested manually with the example from #12769 --- crates/uv-configuration/src/lib.rs | 4 +- crates/uv-configuration/src/rayon.rs | 21 -------- crates/uv-configuration/src/threading.rs | 69 ++++++++++++++++++++++++ crates/uv-static/src/env_vars.rs | 20 ++++++- crates/uv/src/lib.rs | 29 ++-------- docs/configuration/environment.md | 21 +++++++- 6 files changed, 113 insertions(+), 51 deletions(-) delete mode 100644 crates/uv-configuration/src/rayon.rs create mode 100644 crates/uv-configuration/src/threading.rs diff --git a/crates/uv-configuration/src/lib.rs b/crates/uv-configuration/src/lib.rs index 7878693e5..ffc15c2d3 100644 --- a/crates/uv-configuration/src/lib.rs +++ b/crates/uv-configuration/src/lib.rs @@ -15,10 +15,10 @@ pub use overrides::*; pub use package_options::*; pub use preview::*; pub use project_build_backend::*; -pub use rayon::*; pub use required_version::*; pub use sources::*; pub use target_triple::*; +pub use threading::*; pub use trusted_host::*; pub use trusted_publishing::*; pub use vcs::*; @@ -40,10 +40,10 @@ mod overrides; mod package_options; mod preview; mod project_build_backend; -mod rayon; mod required_version; mod sources; mod target_triple; +mod threading; mod trusted_host; mod trusted_publishing; mod vcs; diff --git a/crates/uv-configuration/src/rayon.rs b/crates/uv-configuration/src/rayon.rs deleted file mode 100644 index d00877b8c..000000000 --- a/crates/uv-configuration/src/rayon.rs +++ /dev/null @@ -1,21 +0,0 @@ -//! Initialize the rayon threadpool once, before we need it. -//! -//! The `uv` crate sets [`RAYON_PARALLELISM`] from the user settings, and the extract and install -//! code initialize the threadpool lazily only if they are actually used by calling -//! `LazyLock::force(&RAYON_INITIALIZE)`. - -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::LazyLock; - -/// The number of threads for the rayon threadpool. -/// -/// The default of 0 makes rayon use its default. -pub static RAYON_PARALLELISM: AtomicUsize = AtomicUsize::new(0); - -/// Initialize the threadpool lazily. Always call before using rayon the potentially first time. -pub static RAYON_INITIALIZE: LazyLock<()> = LazyLock::new(|| { - rayon::ThreadPoolBuilder::new() - .num_threads(RAYON_PARALLELISM.load(Ordering::SeqCst)) - .build_global() - .expect("failed to initialize global rayon pool"); -}); diff --git a/crates/uv-configuration/src/threading.rs b/crates/uv-configuration/src/threading.rs new file mode 100644 index 000000000..ee11702b5 --- /dev/null +++ b/crates/uv-configuration/src/threading.rs @@ -0,0 +1,69 @@ +//! Configure rayon and determine thread stack sizes. + +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::LazyLock; +use uv_static::EnvVars; + +/// The default minimum stack size for uv threads. +pub const UV_DEFAULT_STACK_SIZE: usize = 4 * 1024 * 1024; +/// We don't allow setting a smaller stack size than 1MB. +#[allow(clippy::identity_op)] +pub const UV_MIN_STACK_SIZE: usize = 1 * 1024 * 1024; + +/// Running out of stack has been an issue for us. We box types and futures in various places +/// to mitigate this. +/// +/// Main thread stack-size has a BIG variety here across platforms and it's harder to control +/// (which is why Rust doesn't by default). Notably on macOS and Linux you will typically get 8MB +/// main thread, while on Windows you will typically get 1MB, which is *tiny*: +/// +/// +/// To normalize this we just spawn a new thread called main2 with a size we can set +/// ourselves. 2MB is typically too small (especially for our debug builds), while 4MB +/// seems fine. This value can be changed with `UV_STACK_SIZE`, with a fallback to reading +/// `RUST_MIN_STACK`, to allow checking a larger or smaller stack size. There is a hardcoded stack +/// size minimum of 1MB, which is the lowest platform default we observed. +/// +/// Non-main threads should all have 2MB, as Rust forces platform consistency there, +/// but even then stack overflows can occur in release mode +/// (), so rayon and tokio get the same stack size, +/// with the 4MB default. +pub fn min_stack_size() -> usize { + let stack_size = if let Some(uv_stack_size) = std::env::var(EnvVars::UV_STACK_SIZE) + .ok() + .and_then(|var| var.parse::().ok()) + { + uv_stack_size + } else if let Some(uv_stack_size) = std::env::var(EnvVars::RUST_MIN_STACK) + .ok() + .and_then(|var| var.parse::().ok()) + { + uv_stack_size + } else { + UV_DEFAULT_STACK_SIZE + }; + + if stack_size < UV_MIN_STACK_SIZE { + return UV_DEFAULT_STACK_SIZE; + } + + stack_size +} + +/// The number of threads for the rayon threadpool. +/// +/// The default of 0 makes rayon use its default. +pub static RAYON_PARALLELISM: AtomicUsize = AtomicUsize::new(0); + +/// Initialize the threadpool lazily. Always call before using rayon the potentially first time. +/// +/// The `uv` crate sets [`RAYON_PARALLELISM`] from the user settings, and the extract and install +/// code initialize the threadpool lazily only if they are actually used by calling +/// `LazyLock::force(&RAYON_INITIALIZE)`. +pub static RAYON_INITIALIZE: LazyLock<()> = LazyLock::new(|| { + rayon::ThreadPoolBuilder::new() + .num_threads(RAYON_PARALLELISM.load(Ordering::SeqCst)) + .stack_size(min_stack_size()) + .build_global() + .expect("failed to initialize global rayon pool"); +}); diff --git a/crates/uv-static/src/env_vars.rs b/crates/uv-static/src/env_vars.rs index e66c6581c..388e934ae 100644 --- a/crates/uv-static/src/env_vars.rs +++ b/crates/uv-static/src/env_vars.rs @@ -598,11 +598,27 @@ impl EnvVars { /// Use to set the stack size used by uv. /// - /// The value is in bytes, and the default is typically 2MB (2097152). + /// The value is in bytes, and if both `UV_STACK_SIZE` are `RUST_MIN_STACK` unset, uv uses a 4MB + /// (4194304) stack. `UV_STACK_SIZE` takes precedence over `RUST_MIN_STACK`. + /// /// Unlike the normal `RUST_MIN_STACK` semantics, this can affect main thread /// stack size, because we actually spawn our own main2 thread to work around /// the fact that Windows' real main thread is only 1MB. That thread has size - /// `max(RUST_MIN_STACK, 4MB)`. + /// `max(UV_STACK_SIZE, 1MB)`. + pub const UV_STACK_SIZE: &'static str = "UV_STACK_SIZE"; + + /// Use to set the stack size used by uv. + /// + /// The value is in bytes, and if both `UV_STACK_SIZE` are `RUST_MIN_STACK` unset, uv uses a 4MB + /// (4194304) stack. `UV_STACK_SIZE` takes precedence over `RUST_MIN_STACK`. + /// + /// Prefer setting `UV_STACK_SIZE`, since `RUST_MIN_STACK` also affects subprocesses, such as + /// build backends that use Rust code. + /// + /// Unlike the normal `RUST_MIN_STACK` semantics, this can affect main thread + /// stack size, because we actually spawn our own main2 thread to work around + /// the fact that Windows' real main thread is only 1MB. That thread has size + /// `max(RUST_MIN_STACK, 1MB)`. pub const RUST_MIN_STACK: &'static str = "RUST_MIN_STACK"; /// The directory containing the `Cargo.toml` manifest for a package. diff --git a/crates/uv/src/lib.rs b/crates/uv/src/lib.rs index 203e6db56..b9c01b002 100644 --- a/crates/uv/src/lib.rs +++ b/crates/uv/src/lib.rs @@ -26,6 +26,7 @@ use uv_cli::{ use uv_cli::{PythonCommand, PythonNamespace, ToolCommand, ToolNamespace, TopLevelArgs}; #[cfg(feature = "self-update")] use uv_cli::{SelfCommand, SelfNamespace, SelfUpdateArgs}; +use uv_configuration::min_stack_size; use uv_fs::{Simplified, CWD}; use uv_pep508::VersionOrUrl; use uv_pypi_types::{ParsedDirectoryUrl, ParsedUrl}; @@ -2036,32 +2037,12 @@ where } }; - // Running out of stack has been an issue for us. We box types and futures in various places - // to mitigate this, with this being an especially important case. - // - // Non-main threads should all have 2MB, as Rust forces platform consistency there, - // but that can be overridden with the RUST_MIN_STACK environment variable if you need more. - // - // Main thread stack-size is the real issue. There's BIG variety here across platforms - // and it's harder to control (which is why Rust doesn't by default). Notably - // on macOS and Linux you will typically get 8MB main thread, while on Windows you will - // typically get 1MB, which is *tiny*: - // https://learn.microsoft.com/en-us/cpp/build/reference/stack-stack-allocations?view=msvc-170 - // - // To normalize this we just spawn a new thread called main2 with a size we can set - // ourselves. 2MB is typically too small (especially for our debug builds), while 4MB - // seems fine. Also we still try to respect RUST_MIN_STACK if it's set, in case useful, - // but don't let it ask for a smaller stack to avoid messy misconfiguration since we - // know we use quite a bit of main stack space. - let main_stack_size = std::env::var(EnvVars::RUST_MIN_STACK) - .ok() - .and_then(|var| var.parse::().ok()) - .unwrap_or(0) - .max(4 * 1024 * 1024); - + // See `min_stack_size` doc comment about `main2` + let min_stack_size = min_stack_size(); let main2 = move || { let runtime = tokio::runtime::Builder::new_current_thread() .enable_all() + .thread_stack_size(min_stack_size) .build() .expect("Failed building the Runtime"); // Box the large main future to avoid stack overflows. @@ -2076,7 +2057,7 @@ where }; let result = std::thread::Builder::new() .name("main2".to_owned()) - .stack_size(main_stack_size) + .stack_size(min_stack_size) .spawn(main2) .expect("Tokio executor failed, was there a panic?") .join() diff --git a/docs/configuration/environment.md b/docs/configuration/environment.md index b93016c5d..08a9d5adf 100644 --- a/docs/configuration/environment.md +++ b/docs/configuration/environment.md @@ -386,6 +386,18 @@ uv will require that all dependencies have a hash specified in the requirements Equivalent to the `--resolution` command-line argument. For example, if set to `lowest-direct`, uv will install the lowest compatible versions of all direct dependencies. +### `UV_STACK_SIZE` + +Use to set the stack size used by uv. + +The value is in bytes, and if both `UV_STACK_SIZE` are `RUST_MIN_STACK` unset, uv uses a 4MB +(4194304) stack. `UV_STACK_SIZE` takes precedence over `RUST_MIN_STACK`. + +Unlike the normal `RUST_MIN_STACK` semantics, this can affect main thread +stack size, because we actually spawn our own main2 thread to work around +the fact that Windows' real main thread is only 1MB. That thread has size +`max(UV_STACK_SIZE, 1MB)`. + ### `UV_SYSTEM_PYTHON` Equivalent to the `--system` command-line argument. If set to `true`, uv will @@ -574,11 +586,16 @@ for more. Use to set the stack size used by uv. -The value is in bytes, and the default is typically 2MB (2097152). +The value is in bytes, and if both `UV_STACK_SIZE` are `RUST_MIN_STACK` unset, uv uses a 4MB +(4194304) stack. `UV_STACK_SIZE` takes precedence over `RUST_MIN_STACK`. + +Prefer setting `UV_STACK_SIZE`, since `RUST_MIN_STACK` also affects subprocesses, such as +build backends that use Rust code. + Unlike the normal `RUST_MIN_STACK` semantics, this can affect main thread stack size, because we actually spawn our own main2 thread to work around the fact that Windows' real main thread is only 1MB. That thread has size -`max(RUST_MIN_STACK, 4MB)`. +`max(RUST_MIN_STACK, 1MB)`. ### `SHELL`