From 03f8808d11dd7acec91c83b0cc76dcdb5f4a3f33 Mon Sep 17 00:00:00 2001 From: Jp Date: Sun, 1 Dec 2024 23:57:09 +0100 Subject: [PATCH] Handle Windows AV/EDR file locks during script installations (#9543) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #9531 ## Context While working with [uv](https://github.com/astral-sh/uv), I encountered issues with a python dependency, [httpx](https://www.python-httpx.org/) unable to be installed because of a **os error 5 permission denied**. The error occur when we try to persist a **.exe file** from a temporary folder into a persistent one. I only reproduce the issue in an enterprise **Windows** Jenkins Runner. In my virtual machines, I don't have any issues. So I think this is most probably coming from the system configuration. This windows runner **contains an AV/EDR**. And the fact that the file locked occured only once for an executable make me think that it's most probably the cause. While doing some research and speaking with some colleagues (hi @vmeurisse), it seems that the issue is a very recurrent one on Windows. In the Javascript ecosystem, there is this package, created by the @isaacs, `npm` inventor: https://www.npmjs.com/package/graceful-fs, used inside `npm`, allowing its package installations to be more resilient to filesystem errors: > The improvements are meant to normalize behavior across different platforms and environments, and to make filesystem access more resilient to errors. One of its core feature is this one: > On Windows, it retries renaming a file for up to one second if EACCESS or EPERM error occurs, likely because antivirus software has locked the directory. So I tried to implement the same algorithm on `uv`, **and it fixed my issue**! I can finally install `httpx`. Then, [as I mentionned in this issue](https://github.com/astral-sh/uv/issues/9531#issuecomment-2508981316), I saw that you already implemented exactly the same algorithm in an asynchronous function for renames 😄 https://github.com/astral-sh/uv/blob/22fd9f7ff17adfbec6880c6d92c162e3acb6a41c/crates/uv-fs/src/lib.rs#L221 ## Summary of changes - I added a similar function for `persist` (was not easy to have the benediction of the borrow checker 😄) - I added a `sync` variant of `rename_with_retry` - I edited `install_script` to use the function including retries on Windows Let me know if I should change anything 🙂 Thanks!! ## Test Plan This pull-request should be totally iso-functional, so I think it should be covered by existing tests in case of regression. All tests are still passing on my side. Also, of course validated that my windows machines (windows 10 & windows 11) containing AV/EDR software are now able to install `httpx.exe` script. However, if you think any additional test is needed, feel free to tell me! --- crates/uv-fs/src/lib.rs | 124 +++++++++++++++++++++++++-- crates/uv-install-wheel/src/wheel.rs | 18 ++-- 2 files changed, 123 insertions(+), 19 deletions(-) diff --git a/crates/uv-fs/src/lib.rs b/crates/uv-fs/src/lib.rs index be4bbb869..b13486ec2 100644 --- a/crates/uv-fs/src/lib.rs +++ b/crates/uv-fs/src/lib.rs @@ -216,6 +216,13 @@ pub fn copy_atomic_sync(from: impl AsRef, to: impl AsRef) -> std::io Ok(()) } +fn backoff_file_move() -> backoff::ExponentialBackoff { + backoff::ExponentialBackoffBuilder::default() + .with_initial_interval(std::time::Duration::from_millis(10)) + .with_max_elapsed_time(Some(std::time::Duration::from_secs(10))) + .build() +} + /// Rename a file, retrying (on Windows) if it fails due to transient operating system errors. #[cfg(feature = "tokio")] pub async fn rename_with_retry( @@ -227,15 +234,11 @@ pub async fn rename_with_retry( // This is most common for DLLs, and the common suggestion is to retry the operation with // some backoff. // - // See: + // See: & let from = from.as_ref(); let to = to.as_ref(); - let backoff = backoff::ExponentialBackoffBuilder::default() - .with_initial_interval(std::time::Duration::from_millis(10)) - .with_max_elapsed_time(Some(std::time::Duration::from_secs(10))) - .build(); - + let backoff = backoff_file_move(); backoff::future::retry(backoff, || async move { match fs_err::rename(from, to) { Ok(()) => Ok(()), @@ -257,6 +260,115 @@ pub async fn rename_with_retry( } } +/// Rename a file, retrying (on Windows) if it fails due to transient operating system errors, in a synchronous context. +pub fn rename_with_retry_sync( + from: impl AsRef, + to: impl AsRef, +) -> Result<(), std::io::Error> { + if cfg!(windows) { + // On Windows, antivirus software can lock files temporarily, making them inaccessible. + // This is most common for DLLs, and the common suggestion is to retry the operation with + // some backoff. + // + // See: & + let from = from.as_ref(); + let to = to.as_ref(); + + let backoff = backoff_file_move(); + backoff::retry(backoff, || match fs_err::rename(from, to) { + Ok(()) => Ok(()), + Err(err) if err.kind() == std::io::ErrorKind::PermissionDenied => { + warn!( + "Retrying rename from {} to {} due to transient error: {}", + from.display(), + to.display(), + err + ); + Err(backoff::Error::transient(err)) + } + Err(err) => Err(backoff::Error::permanent(err)), + }) + .map_err(|err| { + std::io::Error::new( + std::io::ErrorKind::Other, + format!( + "Failed to rename {} to {}: {}", + from.display(), + to.display(), + err + ), + ) + }) + } else { + fs_err::rename(from, to) + } +} + +/// Persist a `NamedTempFile`, retrying (on Windows) if it fails due to transient operating system errors, in a synchronous context. +pub fn persist_with_retry_sync( + from: NamedTempFile, + to: impl AsRef, +) -> Result<(), std::io::Error> { + if cfg!(windows) { + // On Windows, antivirus software can lock files temporarily, making them inaccessible. + // This is most common for DLLs, and the common suggestion is to retry the operation with + // some backoff. + // + // See: & + let to = to.as_ref(); + + // the `NamedTempFile` `persist` method consumes `self`, and returns it back inside the Error in case of `PersistError` + // https://docs.rs/tempfile/latest/tempfile/struct.NamedTempFile.html#method.persist + // So we will update the `from` optional value in safe and borrow-checker friendly way every retry + // Allows us to use the NamedTempFile inside a FnMut closure used for backoff::retry + let mut from = Some(from); + + let backoff = backoff_file_move(); + let persisted = backoff::retry(backoff, move || { + if let Some(file) = from.take() { + file.persist(to).map_err(|err| { + let error_message = err.to_string(); + warn!( + "Retrying to persist temporary file to {}: {}", + to.display(), + error_message + ); + + // Set back the NamedTempFile returned back by the Error + from = Some(err.file); + + backoff::Error::transient(std::io::Error::new( + std::io::ErrorKind::Other, + format!( + "Failed to persist temporary file to {}: {}", + to.display(), + error_message + ), + )) + }) + } else { + Err(backoff::Error::permanent(std::io::Error::new( + std::io::ErrorKind::Other, + format!( + "Failed to retrieve temporary file while trying to persist to {}", + to.display() + ), + ))) + } + }); + + match persisted { + Ok(_) => Ok(()), + Err(err) => Err(std::io::Error::new( + std::io::ErrorKind::Other, + format!("{err:?}"), + )), + } + } else { + fs_err::rename(from, to) + } +} + /// Iterate over the subdirectories of a directory. /// /// If the directory does not exist, returns an empty iterator. diff --git a/crates/uv-install-wheel/src/wheel.rs b/crates/uv-install-wheel/src/wheel.rs index f5fb9ab96..9b31c3278 100644 --- a/crates/uv-install-wheel/src/wheel.rs +++ b/crates/uv-install-wheel/src/wheel.rs @@ -13,7 +13,7 @@ use tracing::{instrument, warn}; use walkdir::WalkDir; use uv_cache_info::CacheInfo; -use uv_fs::{relative_to, Simplified}; +use uv_fs::{persist_with_retry_sync, relative_to, rename_with_retry_sync, Simplified}; use uv_normalize::PackageName; use uv_pypi_types::DirectUrl; use uv_shell::escape_posix_for_single_quotes; @@ -424,16 +424,8 @@ fn install_script( let mut target = uv_fs::tempfile_in(&layout.scheme.scripts)?; let size_and_encoded_hash = copy_and_hash(&mut start.chain(script), &mut target)?; - target.persist(&script_absolute).map_err(|err| { - io::Error::new( - io::ErrorKind::Other, - format!( - "Failed to persist temporary file to {}: {}", - path.user_display(), - err.error - ), - ) - })?; + + persist_with_retry_sync(target, &script_absolute)?; fs::remove_file(&path)?; // Make the script executable. We just created the file, so we can set permissions directly. @@ -467,7 +459,7 @@ fn install_script( if permissions.mode() & 0o111 == 0o111 { // If the permissions are already executable, we don't need to change them. - fs::rename(&path, &script_absolute)?; + rename_with_retry_sync(&path, &script_absolute)?; } else { // If we have to modify the permissions, copy the file, since we might not own it. warn!( @@ -488,7 +480,7 @@ fn install_script( #[cfg(not(unix))] { - fs::rename(&path, &script_absolute)?; + rename_with_retry_sync(&path, &script_absolute)?; } None