From ac145d8150987335307eb5433f3795d439560319 Mon Sep 17 00:00:00 2001 From: Zanie Blue Date: Thu, 3 Apr 2025 11:13:07 -0500 Subject: [PATCH] Skip repeated directories in `PATH` when searching for Python interpreters (#12367) Closes https://github.com/astral-sh/uv/issues/12302 The change is visible in [this commit](https://github.com/astral-sh/uv/pull/12367/commits/49be22dad9b091a39e1c013032fb740dba652abf). --- Cargo.lock | 1 + crates/uv-python/Cargo.toml | 1 + crates/uv-python/src/discovery.rs | 63 ++++++++++++++++++---------- crates/uv/tests/it/python_list.rs | 68 +++++++++++++++++++++++++++++++ 4 files changed, 111 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a284b1093..70054639b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5526,6 +5526,7 @@ dependencies = [ "reqwest-middleware", "reqwest-retry", "rmp-serde", + "rustc-hash", "same-file", "schemars", "serde", diff --git a/crates/uv-python/Cargo.toml b/crates/uv-python/Cargo.toml index 0a26279f9..f6ab41193 100644 --- a/crates/uv-python/Cargo.toml +++ b/crates/uv-python/Cargo.toml @@ -47,6 +47,7 @@ reqwest = { workspace = true } reqwest-middleware = { workspace = true } reqwest-retry = { workspace = true } rmp-serde = { workspace = true } +rustc-hash = { workspace = true } same-file = { workspace = true } schemars = { workspace = true, optional = true } serde = { workspace = true, features = ["derive"] } diff --git a/crates/uv-python/src/discovery.rs b/crates/uv-python/src/discovery.rs index b5a4972d6..c406a249e 100644 --- a/crates/uv-python/src/discovery.rs +++ b/crates/uv-python/src/discovery.rs @@ -1,5 +1,6 @@ use itertools::{Either, Itertools}; use regex::Regex; +use rustc_hash::{FxBuildHasher, FxHashSet}; use same_file::is_same_file; use std::env::consts::EXE_SUFFIX; use std::fmt::{self, Debug, Formatter}; @@ -496,6 +497,7 @@ fn python_executables_from_search_path<'a>( // check multiple names per directory while respecting the search path order and python names // precedence. let search_dirs: Vec<_> = env::split_paths(&search_path).collect(); + let mut seen_dirs = FxHashSet::with_capacity_and_hasher(search_dirs.len(), FxBuildHasher); search_dirs .into_iter() .filter(|dir| dir.is_dir()) @@ -506,33 +508,50 @@ fn python_executables_from_search_path<'a>( "Checking `PATH` directory for interpreters: {}", dir.display() ); - possible_names - .clone() - .into_iter() - .flat_map(move |name| { - // Since we're just working with a single directory at a time, we collect to simplify ownership - which::which_in_global(&*name, Some(&dir)) - .into_iter() - .flatten() - // We have to collect since `which` requires that the regex outlives its - // parameters, and the dir is local while we return the iterator. - .collect::>() + same_file::Handle::from_path(&dir) + // Skip directories we've already seen, to avoid inspecting interpreters multiple + // times when directories are repeated or symlinked in the `PATH` + .map(|handle| seen_dirs.insert(handle)) + .inspect(|fresh_dir| { + if !fresh_dir { + trace!("Skipping already seen directory: {}", dir.display()); + } }) - .chain(find_all_minor(implementation, version, &dir_clone)) - .filter(|path| !is_windows_store_shim(path)) - .inspect(|path| trace!("Found possible Python executable: {}", path.display())) - .chain( - // TODO(zanieb): Consider moving `python.bat` into `possible_names` to avoid a chain - cfg!(windows) - .then(move || { - which::which_in_global("python.bat", Some(&dir_clone)) + // If we cannot determine if the directory is unique, we'll assume it is + .unwrap_or(true) + .then(|| { + possible_names + .clone() + .into_iter() + .flat_map(move |name| { + // Since we're just working with a single directory at a time, we collect to simplify ownership + which::which_in_global(&*name, Some(&dir)) .into_iter() .flatten() + // We have to collect since `which` requires that the regex outlives its + // parameters, and the dir is local while we return the iterator. .collect::>() }) - .into_iter() - .flatten(), - ) + .chain(find_all_minor(implementation, version, &dir_clone)) + .filter(|path| !is_windows_store_shim(path)) + .inspect(|path| { + trace!("Found possible Python executable: {}", path.display()); + }) + .chain( + // TODO(zanieb): Consider moving `python.bat` into `possible_names` to avoid a chain + cfg!(windows) + .then(move || { + which::which_in_global("python.bat", Some(&dir_clone)) + .into_iter() + .flatten() + .collect::>() + }) + .into_iter() + .flatten(), + ) + }) + .into_iter() + .flatten() }) } diff --git a/crates/uv/tests/it/python_list.rs b/crates/uv/tests/it/python_list.rs index 43a1090eb..347993934 100644 --- a/crates/uv/tests/it/python_list.rs +++ b/crates/uv/tests/it/python_list.rs @@ -280,3 +280,71 @@ fn python_list_unsupported_version() { error: Invalid version request: Python <3.13 does not support free-threading but 3.12t was requested. "); } + +#[test] +fn python_list_duplicate_path_entries() { + let context: TestContext = TestContext::new_with_versions(&["3.11", "3.12"]) + .with_filtered_python_symlinks() + .with_filtered_python_keys(); + + // Construct a `PATH` with all entries duplicated + let path = std::env::join_paths( + std::env::split_paths(&context.python_path()) + .chain(std::env::split_paths(&context.python_path())), + ) + .unwrap(); + + uv_snapshot!(context.filters(), context.python_list().env(EnvVars::UV_TEST_PYTHON_PATH, &path), @r" + success: true + exit_code: 0 + ----- stdout ----- + cpython-3.12.[X]-[PLATFORM] [PYTHON-3.12] + cpython-3.11.[X]-[PLATFORM] [PYTHON-3.11] + + ----- stderr ----- + "); + + #[cfg(unix)] + { + // Construct a `PATH` with symlinks + let path = std::env::join_paths(std::env::split_paths(&context.python_path()).chain( + std::env::split_paths(&context.python_path()).map(|path| { + let dst = format!("{}-link", path.display()); + fs_err::os::unix::fs::symlink(&path, &dst).unwrap(); + std::path::PathBuf::from(dst) + }), + )) + .unwrap(); + + uv_snapshot!(context.filters(), context.python_list().env(EnvVars::UV_TEST_PYTHON_PATH, &path), @r" + success: true + exit_code: 0 + ----- stdout ----- + cpython-3.12.[X]-[PLATFORM] [PYTHON-3.12] + cpython-3.11.[X]-[PLATFORM] [PYTHON-3.11] + + ----- stderr ----- + "); + + // Reverse the order so the symlinks are first + let path = std::env::join_paths( + { + let mut paths = std::env::split_paths(&path).collect::>(); + paths.reverse(); + paths + } + .iter(), + ) + .unwrap(); + + uv_snapshot!(context.filters(), context.python_list().env(EnvVars::UV_TEST_PYTHON_PATH, &path), @r" + success: true + exit_code: 0 + ----- stdout ----- + cpython-3.12.[X]-[PLATFORM] [PYTHON-3.12]-link/python3 + cpython-3.11.[X]-[PLATFORM] [PYTHON-3.11]-link/python3 + + ----- stderr ----- + "); + } +}