diff --git a/Cargo.lock b/Cargo.lock index 2898f7af2f..2da1e90d49 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2005,6 +2005,7 @@ dependencies = [ "notify", "red_knot_module_resolver", "red_knot_python_semantic", + "ruff_cache", "ruff_db", "ruff_python_ast", "rustc-hash 2.0.0", diff --git a/crates/red_knot/tests/file_watching.rs b/crates/red_knot/tests/file_watching.rs index dad017280f..257c0baab7 100644 --- a/crates/red_knot/tests/file_watching.rs +++ b/crates/red_knot/tests/file_watching.rs @@ -1184,8 +1184,9 @@ mod unix { ModuleName::new_static("bar.baz").unwrap(), ) .expect("Expected bar.baz to exist in site-packages."); - let baz_site_packages = + let baz_site_packages_path = case.workspace_path(".venv/lib/python3.12/site-packages/bar/baz.py"); + let baz_site_packages = case.system_file(&baz_site_packages_path).unwrap(); let baz_original = case.root_path().join("site-packages/bar/baz.py"); let baz_original_file = case.system_file(&baz_original).unwrap(); @@ -1195,12 +1196,12 @@ mod unix { ); assert_eq!( - source_text(case.db(), baz.file()).as_str(), + source_text(case.db(), baz_site_packages).as_str(), "def baz(): ..." ); assert_eq!( baz.file().path(case.db()).as_system_path(), - Some(&*baz_site_packages) + Some(&*baz_original) ); // Write to the symlink target. @@ -1212,7 +1213,7 @@ mod unix { case.db_mut().apply_changes(changes); assert_eq!( - source_text(case.db(), baz.file()).as_str(), + source_text(case.db(), baz_original_file).as_str(), "def baz(): print('Version 2')" ); @@ -1224,7 +1225,7 @@ mod unix { // it doesn't seem worth doing considering that as prominent tools like PyCharm don't support it. // Pyright does support it, thanks to chokidar. assert_ne!( - source_text(case.db(), baz_original_file).as_str(), + source_text(case.db(), baz_site_packages).as_str(), "def baz(): print('Version 2')" ); diff --git a/crates/red_knot_module_resolver/src/path.rs b/crates/red_knot_module_resolver/src/path.rs index 232ee9d55b..ec58973495 100644 --- a/crates/red_knot_module_resolver/src/path.rs +++ b/crates/red_knot_module_resolver/src/path.rs @@ -382,22 +382,27 @@ enum SearchPathInner { pub(crate) struct SearchPath(Arc); impl SearchPath { + fn directory_path(system: &dyn System, root: SystemPathBuf) -> SearchPathResult { + let canonicalized = system.canonicalize_path(&root).unwrap_or(root); + if system.is_directory(&canonicalized) { + Ok(canonicalized) + } else { + Err(SearchPathValidationError::NotADirectory(canonicalized)) + } + } + /// Create a new "Extra" search path pub(crate) fn extra(system: &dyn System, root: SystemPathBuf) -> SearchPathResult { - if system.is_directory(&root) { - Ok(Self(Arc::new(SearchPathInner::Extra(root)))) - } else { - Err(SearchPathValidationError::NotADirectory(root)) - } + Ok(Self(Arc::new(SearchPathInner::Extra( + Self::directory_path(system, root)?, + )))) } /// Create a new first-party search path, pointing to the user code we were directly invoked on pub(crate) fn first_party(system: &dyn System, root: SystemPathBuf) -> SearchPathResult { - if system.is_directory(&root) { - Ok(Self(Arc::new(SearchPathInner::FirstParty(root)))) - } else { - Err(SearchPathValidationError::NotADirectory(root)) - } + Ok(Self(Arc::new(SearchPathInner::FirstParty( + Self::directory_path(system, root)?, + )))) } /// Create a new standard-library search path pointing to a custom directory on disk @@ -408,12 +413,13 @@ impl SearchPath { typeshed.to_path_buf(), )); } - let stdlib = typeshed.join("stdlib"); - if !system.is_directory(&stdlib) { - return Err(SearchPathValidationError::NoStdlibSubdirectory( - typeshed.to_path_buf(), - )); - } + let stdlib = + Self::directory_path(system, typeshed.join("stdlib")).map_err(|err| match err { + SearchPathValidationError::NotADirectory(path) => { + SearchPathValidationError::NoStdlibSubdirectory(path) + } + err => err, + })?; let typeshed_versions = system_path_to_file(db.upcast(), stdlib.join("VERSIONS")).map_err(|err| match err { FileError::NotFound => SearchPathValidationError::NoVersionsFile(typeshed), @@ -444,20 +450,16 @@ impl SearchPath { system: &dyn System, root: SystemPathBuf, ) -> SearchPathResult { - if system.is_directory(&root) { - Ok(Self(Arc::new(SearchPathInner::SitePackages(root)))) - } else { - Err(SearchPathValidationError::NotADirectory(root)) - } + Ok(Self(Arc::new(SearchPathInner::SitePackages( + Self::directory_path(system, root)?, + )))) } /// Create a new search path pointing to an editable installation pub(crate) fn editable(system: &dyn System, root: SystemPathBuf) -> SearchPathResult { - if system.is_directory(&root) { - Ok(Self(Arc::new(SearchPathInner::Editable(root)))) - } else { - Err(SearchPathValidationError::NotADirectory(root)) - } + Ok(Self(Arc::new(SearchPathInner::Editable( + Self::directory_path(system, root)?, + )))) } #[must_use] diff --git a/crates/red_knot_workspace/Cargo.toml b/crates/red_knot_workspace/Cargo.toml index 3bcb9688a5..35c8cb0efa 100644 --- a/crates/red_knot_workspace/Cargo.toml +++ b/crates/red_knot_workspace/Cargo.toml @@ -15,6 +15,7 @@ license.workspace = true red_knot_module_resolver = { workspace = true } red_knot_python_semantic = { workspace = true } +ruff_cache = { workspace = true } ruff_db = { workspace = true, features = ["os", "cache"] } ruff_python_ast = { workspace = true } diff --git a/crates/red_knot_workspace/src/watch/workspace_watcher.rs b/crates/red_knot_workspace/src/watch/workspace_watcher.rs index 7853c11201..bac78414fa 100644 --- a/crates/red_knot_workspace/src/watch/workspace_watcher.rs +++ b/crates/red_knot_workspace/src/watch/workspace_watcher.rs @@ -1,20 +1,28 @@ +use std::fmt::{Formatter, Write}; +use std::hash::Hasher; + +use tracing::info; + +use red_knot_module_resolver::system_module_search_paths; +use ruff_cache::{CacheKey, CacheKeyHasher}; +use ruff_db::system::{SystemPath, SystemPathBuf}; +use ruff_db::Upcast; + use crate::db::RootDatabase; use crate::watch::Watcher; -use ruff_db::system::SystemPathBuf; -use rustc_hash::FxHashSet; -use std::fmt::{Formatter, Write}; -use tracing::info; /// Wrapper around a [`Watcher`] that watches the relevant paths of a workspace. pub struct WorkspaceWatcher { watcher: Watcher, /// The paths that need to be watched. This includes paths for which setting up file watching failed. - watched_paths: FxHashSet, + watched_paths: Vec, - /// Paths that should be watched but setting up the watcher failed for some reason. - /// This should be rare. - errored_paths: Vec, + /// True if registering a watcher for any path failed. + has_errored_paths: bool, + + /// Cache key over the paths that need watching. It allows short-circuiting if the paths haven't changed. + cache_key: Option, } impl WorkspaceWatcher { @@ -22,8 +30,9 @@ impl WorkspaceWatcher { pub fn new(watcher: Watcher, db: &RootDatabase) -> Self { let mut watcher = Self { watcher, - watched_paths: FxHashSet::default(), - errored_paths: Vec::new(), + watched_paths: Vec::new(), + cache_key: None, + has_errored_paths: false, }; watcher.update(db); @@ -32,53 +41,83 @@ impl WorkspaceWatcher { } pub fn update(&mut self, db: &RootDatabase) { - let new_watch_paths = db.workspace().paths_to_watch(db); + let search_paths: Vec<_> = system_module_search_paths(db.upcast()).collect(); + let workspace_path = db.workspace().root(db).to_path_buf(); - let mut added_folders = new_watch_paths.difference(&self.watched_paths).peekable(); - let mut removed_folders = self.watched_paths.difference(&new_watch_paths).peekable(); + let new_cache_key = Self::compute_cache_key(&workspace_path, &search_paths); - if added_folders.peek().is_none() && removed_folders.peek().is_none() { + if self.cache_key == Some(new_cache_key) { return; } - for added_folder in added_folders { - // Log a warning. It's not worth aborting if registering a single folder fails because - // Ruff otherwise stills works as expected. - if let Err(error) = self.watcher.watch(added_folder) { - // TODO: Log a user-facing warning. - tracing::warn!("Failed to setup watcher for path '{added_folder}': {error}. You have to restart Ruff after making changes to files under this path or you might see stale results."); - self.errored_paths.push(added_folder.clone()); + // Unregister all watch paths because ordering is important for linux because + // it only emits an event for the last added watcher if a subtree is covered by multiple watchers. + // A path can be covered by multiple watchers if a subdirectory symlinks to a path that's covered by another watch path: + // ```text + // - bar + // - baz.py + // - workspace + // - bar -> /bar + // - foo.py + // ``` + for path in self.watched_paths.drain(..) { + if let Err(error) = self.watcher.unwatch(&path) { + info!("Failed to remove the file watcher for the path '{path}: {error}."); } } - for removed_path in removed_folders { - if let Some(index) = self - .errored_paths - .iter() - .position(|path| path == removed_path) - { - self.errored_paths.swap_remove(index); - continue; - } + self.has_errored_paths = false; - if let Err(error) = self.watcher.unwatch(removed_path) { - info!("Failed to remove the file watcher for the path '{removed_path}: {error}."); + let workspace_path = workspace_path + .as_utf8_path() + .canonicalize_utf8() + .map(SystemPathBuf::from_utf8_path_buf) + .unwrap_or(workspace_path); + + // Find the non-overlapping module search paths and filter out paths that are already covered by the workspace. + // Module search paths are already canonicalized. + let unique_module_paths = ruff_db::system::deduplicate_nested_paths( + search_paths + .into_iter() + .filter(|path| !path.starts_with(&workspace_path)), + ) + .map(SystemPath::to_path_buf); + + // Now add the new paths, first starting with the workspace path and then + // adding the library search paths. + for path in std::iter::once(workspace_path).chain(unique_module_paths) { + // Log a warning. It's not worth aborting if registering a single folder fails because + // Ruff otherwise stills works as expected. + if let Err(error) = self.watcher.watch(&path) { + // TODO: Log a user-facing warning. + tracing::warn!("Failed to setup watcher for path '{path}': {error}. You have to restart Ruff after making changes to files under this path or you might see stale results."); + self.has_errored_paths = true; + } else { + self.watched_paths.push(path); } } info!( "Set up file watchers for {}", DisplayWatchedPaths { - paths: &new_watch_paths + paths: &self.watched_paths } ); - self.watched_paths = new_watch_paths; + self.cache_key = Some(new_cache_key); + } + + fn compute_cache_key(workspace_root: &SystemPath, search_paths: &[&SystemPath]) -> u64 { + let mut cache_key_hasher = CacheKeyHasher::new(); + search_paths.cache_key(&mut cache_key_hasher); + workspace_root.cache_key(&mut cache_key_hasher); + + cache_key_hasher.finish() } /// Returns `true` if setting up watching for any path failed. pub fn has_errored_paths(&self) -> bool { - !self.errored_paths.is_empty() + self.has_errored_paths } pub fn flush(&self) { @@ -91,7 +130,7 @@ impl WorkspaceWatcher { } struct DisplayWatchedPaths<'a> { - paths: &'a FxHashSet, + paths: &'a [SystemPathBuf], } impl std::fmt::Display for DisplayWatchedPaths<'_> { diff --git a/crates/red_knot_workspace/src/workspace.rs b/crates/red_knot_workspace/src/workspace.rs index f07a55ee11..584eae83da 100644 --- a/crates/red_knot_workspace/src/workspace.rs +++ b/crates/red_knot_workspace/src/workspace.rs @@ -4,7 +4,6 @@ use std::{collections::BTreeMap, sync::Arc}; use rustc_hash::{FxBuildHasher, FxHashSet}; pub use metadata::{PackageMetadata, WorkspaceMetadata}; -use red_knot_module_resolver::system_module_search_paths; use ruff_db::{ files::{system_path_to_file, File}, system::{walk_directory::WalkState, SystemPath, SystemPathBuf}, @@ -242,17 +241,6 @@ impl Workspace { FxHashSet::default() } } - - /// Returns the paths that should be watched. - /// - /// The paths that require watching might change with every revision. - pub fn paths_to_watch(self, db: &dyn Db) -> FxHashSet { - ruff_db::system::deduplicate_nested_paths( - std::iter::once(self.root(db)).chain(system_module_search_paths(db.upcast())), - ) - .map(SystemPath::to_path_buf) - .collect() - } } #[salsa::tracked] diff --git a/crates/ruff_db/src/system.rs b/crates/ruff_db/src/system.rs index ed1cea552d..eee02c363a 100644 --- a/crates/ruff_db/src/system.rs +++ b/crates/ruff_db/src/system.rs @@ -43,6 +43,16 @@ pub trait System: Debug { /// This function will traverse symbolic links to query information about the destination file. fn path_metadata(&self, path: &SystemPath) -> Result; + /// Returns the canonical, absolute form of a path with all intermediate components normalized + /// and symbolic links resolved. + /// + /// # Errors + /// This function will return an error in the following situations, but is not limited to just these cases: + /// * `path` does not exist. + /// * A non-final component in `path` is not a directory. + /// * the symlink target path is not valid Unicode. + fn canonicalize_path(&self, path: &SystemPath) -> Result; + /// Reads the content of the file at `path` into a [`String`]. fn read_to_string(&self, path: &SystemPath) -> Result; diff --git a/crates/ruff_db/src/system/memory_fs.rs b/crates/ruff_db/src/system/memory_fs.rs index 3754a5b9c2..0194fb646d 100644 --- a/crates/ruff_db/src/system/memory_fs.rs +++ b/crates/ruff_db/src/system/memory_fs.rs @@ -94,6 +94,10 @@ impl MemoryFileSystem { metadata(self, path.as_ref()) } + pub fn canonicalize(&self, path: impl AsRef) -> SystemPathBuf { + SystemPathBuf::from_utf8_path_buf(self.normalize_path(path)) + } + pub fn is_file(&self, path: impl AsRef) -> bool { let by_path = self.inner.by_path.read().unwrap(); let normalized = self.normalize_path(path.as_ref()); diff --git a/crates/ruff_db/src/system/os.rs b/crates/ruff_db/src/system/os.rs index 0a0102d6c3..28678a7148 100644 --- a/crates/ruff_db/src/system/os.rs +++ b/crates/ruff_db/src/system/os.rs @@ -63,6 +63,12 @@ impl System for OsSystem { }) } + fn canonicalize_path(&self, path: &SystemPath) -> Result { + path.as_utf8_path() + .canonicalize_utf8() + .map(SystemPathBuf::from_utf8_path_buf) + } + fn read_to_string(&self, path: &SystemPath) -> Result { std::fs::read_to_string(path.as_std_path()) } diff --git a/crates/ruff_db/src/system/path.rs b/crates/ruff_db/src/system/path.rs index 16b257f9fc..df98280c1d 100644 --- a/crates/ruff_db/src/system/path.rs +++ b/crates/ruff_db/src/system/path.rs @@ -733,26 +733,30 @@ impl ruff_cache::CacheKey for SystemVirtualPathBuf { /// let paths = vec![SystemPath::new("/a/b/c"), SystemPath::new("/a/b"), SystemPath::new("/a/beta"), SystemPath::new("/a/b/c")]; /// assert_eq!(deduplicate_nested_paths(paths).collect::>(), &[SystemPath::new("/a/b"), SystemPath::new("/a/beta")]); /// ``` -pub fn deduplicate_nested_paths<'a, I>(paths: I) -> DeduplicatedNestedPathsIter<'a> +pub fn deduplicate_nested_paths(paths: I) -> DeduplicatedNestedPathsIter

where - I: IntoIterator, + I: IntoIterator, + P: AsRef, { DeduplicatedNestedPathsIter::new(paths) } -pub struct DeduplicatedNestedPathsIter<'a> { - inner: std::vec::IntoIter<&'a SystemPath>, - next: Option<&'a SystemPath>, +pub struct DeduplicatedNestedPathsIter

{ + inner: std::vec::IntoIter

, + next: Option

, } -impl<'a> DeduplicatedNestedPathsIter<'a> { +impl

DeduplicatedNestedPathsIter

+where + P: AsRef, +{ fn new(paths: I) -> Self where - I: IntoIterator, + I: IntoIterator, { let mut paths = paths.into_iter().collect::>(); // Sort the path to ensure that e.g. `/a/b/c`, comes right after `/a/b`. - paths.sort_unstable(); + paths.sort_unstable_by(|left, right| left.as_ref().cmp(right.as_ref())); let mut iter = paths.into_iter(); @@ -763,15 +767,18 @@ impl<'a> DeduplicatedNestedPathsIter<'a> { } } -impl<'a> Iterator for DeduplicatedNestedPathsIter<'a> { - type Item = &'a SystemPath; +impl

Iterator for DeduplicatedNestedPathsIter

+where + P: AsRef, +{ + type Item = P; fn next(&mut self) -> Option { let current = self.next.take()?; for next in self.inner.by_ref() { // Skip all paths that have the same prefix as the current path - if !next.starts_with(current) { + if !next.as_ref().starts_with(current.as_ref()) { self.next = Some(next); break; } diff --git a/crates/ruff_db/src/system/test.rs b/crates/ruff_db/src/system/test.rs index c5f6dd4060..6cb01c79c7 100644 --- a/crates/ruff_db/src/system/test.rs +++ b/crates/ruff_db/src/system/test.rs @@ -7,7 +7,8 @@ use ruff_python_trivia::textwrap; use crate::files::File; use crate::system::{ - DirectoryEntry, MemoryFileSystem, Metadata, Result, System, SystemPath, SystemVirtualPath, + DirectoryEntry, MemoryFileSystem, Metadata, Result, System, SystemPath, SystemPathBuf, + SystemVirtualPath, }; use crate::Db; @@ -140,6 +141,13 @@ impl System for TestSystem { TestSystemInner::Stub(fs) => Ok(Box::new(fs.read_directory(path)?)), } } + + fn canonicalize_path(&self, path: &SystemPath) -> Result { + match &self.inner { + TestSystemInner::System(fs) => fs.canonicalize_path(path), + TestSystemInner::Stub(fs) => Ok(fs.canonicalize(path)), + } + } } /// Extension trait for databases that use [`TestSystem`].