diff --git a/Cargo.lock b/Cargo.lock index f3055c99e6..21a0e64998 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1866,6 +1866,7 @@ dependencies = [ "ruff_python_ast", "rustc-hash 2.0.0", "salsa", + "tempfile", "tracing", "tracing-subscriber", "tracing-tree", diff --git a/crates/red_knot/Cargo.toml b/crates/red_knot/Cargo.toml index f244b16bb5..a4c6166d60 100644 --- a/crates/red_knot/Cargo.toml +++ b/crates/red_knot/Cargo.toml @@ -31,6 +31,9 @@ tracing = { workspace = true } tracing-subscriber = { workspace = true } tracing-tree = { workspace = true } +[dev-dependencies] +tempfile = { workspace = true } + [lints] workspace = true diff --git a/crates/red_knot/src/db.rs b/crates/red_knot/src/db.rs index 23c1acc16a..f5c366c5d5 100644 --- a/crates/red_knot/src/db.rs +++ b/crates/red_knot/src/db.rs @@ -5,16 +5,17 @@ use salsa::{Cancelled, Database, DbWithJar}; use red_knot_module_resolver::{vendored_typeshed_stubs, Db as ResolverDb, Jar as ResolverJar}; use red_knot_python_semantic::{Db as SemanticDb, Jar as SemanticJar}; -use ruff_db::files::{system_path_to_file, File, Files}; +use ruff_db::files::{File, Files}; use ruff_db::program::{Program, ProgramSettings}; use ruff_db::system::System; use ruff_db::vendored::VendoredFileSystem; use ruff_db::{Db as SourceDb, Jar as SourceJar, Upcast}; use crate::lint::{lint_semantic, lint_syntax, unwind_if_cancelled, Diagnostics}; -use crate::watch::{FileChangeKind, FileWatcherChange}; use crate::workspace::{check_file, Package, Workspace, WorkspaceMetadata}; +mod changes; + pub trait Db: DbWithJar + SemanticDb + Upcast {} #[salsa::jar(db=Db)] @@ -59,58 +60,6 @@ impl RootDatabase { self.workspace.unwrap() } - #[tracing::instrument(level = "debug", skip(self, changes))] - pub fn apply_changes(&mut self, changes: Vec) { - let workspace = self.workspace(); - let workspace_path = workspace.root(self).to_path_buf(); - - // TODO: Optimize change tracking by only reloading a package if a file that is part of the package was changed. - let mut structural_change = false; - for change in changes { - if matches!( - change.path.file_name(), - Some(".gitignore" | ".ignore" | "ruff.toml" | ".ruff.toml" | "pyproject.toml") - ) { - // Changes to ignore files or settings can change the workspace structure or add/remove files - // from packages. - structural_change = true; - } else { - match change.kind { - FileChangeKind::Created => { - // Reload the package when a new file was added. This is necessary because the file might be excluded - // by a gitignore. - if workspace.package(self, &change.path).is_some() { - structural_change = true; - } - } - FileChangeKind::Modified => {} - FileChangeKind::Deleted => { - if let Some(package) = workspace.package(self, &change.path) { - if let Some(file) = system_path_to_file(self, &change.path) { - package.remove_file(self, file); - } - } - } - } - } - - File::touch_path(self, &change.path); - } - - if structural_change { - match WorkspaceMetadata::from_path(&workspace_path, self.system()) { - Ok(metadata) => { - tracing::debug!("Reload workspace after structural change."); - // TODO: Handle changes in the program settings. - workspace.reload(self, metadata); - } - Err(error) => { - tracing::error!("Failed to load workspace, keep old workspace: {error}"); - } - } - } - } - /// Checks all open files in the workspace and its dependencies. pub fn check(&self) -> Result, Cancelled> { self.with_db(|db| db.workspace().check(db)) @@ -152,18 +101,29 @@ impl Upcast for RootDatabase { fn upcast(&self) -> &(dyn SemanticDb + 'static) { self } + + fn upcast_mut(&mut self) -> &mut (dyn SemanticDb + 'static) { + self + } } impl Upcast for RootDatabase { fn upcast(&self) -> &(dyn SourceDb + 'static) { self } + + fn upcast_mut(&mut self) -> &mut (dyn SourceDb + 'static) { + self + } } impl Upcast for RootDatabase { fn upcast(&self) -> &(dyn ResolverDb + 'static) { self } + fn upcast_mut(&mut self) -> &mut (dyn ResolverDb + 'static) { + self + } } impl ResolverDb for RootDatabase {} diff --git a/crates/red_knot/src/db/changes.rs b/crates/red_knot/src/db/changes.rs new file mode 100644 index 0000000000..df527f68fd --- /dev/null +++ b/crates/red_knot/src/db/changes.rs @@ -0,0 +1,190 @@ +use rustc_hash::FxHashSet; + +use ruff_db::files::{system_path_to_file, File, Files}; +use ruff_db::system::walk_directory::WalkState; +use ruff_db::system::SystemPath; +use ruff_db::Db; + +use crate::db::RootDatabase; +use crate::watch; +use crate::watch::{CreatedKind, DeletedKind}; +use crate::workspace::WorkspaceMetadata; + +impl RootDatabase { + #[tracing::instrument(level = "debug", skip(self, changes))] + pub fn apply_changes(&mut self, changes: Vec) { + let workspace = self.workspace(); + let workspace_path = workspace.root(self).to_path_buf(); + + let mut workspace_change = false; + // Packages that need reloading + let mut changed_packages = FxHashSet::default(); + // Paths that were added + let mut added_paths = FxHashSet::default(); + + // Deduplicate the `sync` calls. Many file watchers emit multiple events for the same path. + let mut synced_files = FxHashSet::default(); + let mut synced_recursively = FxHashSet::default(); + + let mut sync_path = |db: &mut RootDatabase, path: &SystemPath| { + if synced_files.insert(path.to_path_buf()) { + File::sync_path(db, path); + } + }; + + let mut sync_recursively = |db: &mut RootDatabase, path: &SystemPath| { + if synced_recursively.insert(path.to_path_buf()) { + Files::sync_recursively(db, path); + } + }; + + for change in changes { + if let Some(path) = change.path() { + if matches!( + path.file_name(), + Some(".gitignore" | ".ignore" | "ruff.toml" | ".ruff.toml" | "pyproject.toml") + ) { + // Changes to ignore files or settings can change the workspace structure or add/remove files + // from packages. + if let Some(package) = workspace.package(self, path) { + changed_packages.insert(package); + } else { + workspace_change = true; + } + + continue; + } + } + + match change { + watch::ChangeEvent::Changed { path, kind: _ } => sync_path(self, &path), + + watch::ChangeEvent::Created { kind, path } => { + match kind { + CreatedKind::File => sync_path(self, &path), + CreatedKind::Directory | CreatedKind::Any => { + sync_recursively(self, &path); + } + } + + if self.system().is_file(&path) { + // Add the parent directory because `walkdir` always visits explicitly passed files + // even if they match an exclude filter. + added_paths.insert(path.parent().unwrap().to_path_buf()); + } else { + added_paths.insert(path); + } + } + + watch::ChangeEvent::Deleted { kind, path } => { + let is_file = match kind { + DeletedKind::File => true, + DeletedKind::Directory => { + // file watchers emit an event for every deleted file. No need to scan the entire dir. + continue; + } + DeletedKind::Any => self + .files + .try_system(self, &path) + .is_some_and(|file| file.exists(self)), + }; + + if is_file { + sync_path(self, &path); + + if let Some(package) = workspace.package(self, &path) { + if let Some(file) = self.files().try_system(self, &path) { + package.remove_file(self, file); + } + } + } else { + sync_recursively(self, &path); + + // TODO: Remove after converting `package.files()` to a salsa query. + if let Some(package) = workspace.package(self, &path) { + changed_packages.insert(package); + } else { + workspace_change = true; + } + } + } + + watch::ChangeEvent::Rescan => { + workspace_change = true; + Files::sync_all(self); + break; + } + } + } + + if workspace_change { + match WorkspaceMetadata::from_path(&workspace_path, self.system()) { + Ok(metadata) => { + tracing::debug!("Reload workspace after structural change."); + // TODO: Handle changes in the program settings. + workspace.reload(self, metadata); + } + Err(error) => { + tracing::error!("Failed to load workspace, keep old workspace: {error}"); + } + } + + return; + } + + let mut added_paths = added_paths.into_iter().filter(|path| { + let Some(package) = workspace.package(self, path) else { + return false; + }; + + // Skip packages that need reloading + !changed_packages.contains(&package) + }); + + // Use directory walking to discover newly added files. + if let Some(path) = added_paths.next() { + let mut walker = self.system().walk_directory(&path); + + for extra_path in added_paths { + walker = walker.add(&extra_path); + } + + let added_paths = std::sync::Mutex::new(Vec::default()); + + walker.run(|| { + Box::new(|entry| { + let Ok(entry) = entry else { + return WalkState::Continue; + }; + + if !entry.file_type().is_file() { + return WalkState::Continue; + } + + let mut paths = added_paths.lock().unwrap(); + + paths.push(entry.into_path()); + + WalkState::Continue + }) + }); + + for path in added_paths.into_inner().unwrap() { + let package = workspace.package(self, &path); + let file = system_path_to_file(self, &path); + + if let (Some(package), Some(file)) = (package, file) { + package.add_file(self, file); + } + } + } + + // Reload + for package in changed_packages { + package.reload_files(self); + } + } +} + +#[cfg(test)] +mod tests {} diff --git a/crates/red_knot/src/main.rs b/crates/red_knot/src/main.rs index 35bfe23806..dac3e6fe6a 100644 --- a/crates/red_knot/src/main.rs +++ b/crates/red_knot/src/main.rs @@ -11,8 +11,8 @@ use tracing_subscriber::{Layer, Registry}; use tracing_tree::time::Uptime; use red_knot::db::RootDatabase; -use red_knot::watch::FileWatcher; -use red_knot::watch::FileWatcherChange; +use red_knot::watch; +use red_knot::watch::Watcher; use red_knot::workspace::WorkspaceMetadata; use ruff_db::program::{ProgramSettings, SearchPathSettings}; use ruff_db::system::{OsSystem, System, SystemPathBuf}; @@ -57,6 +57,13 @@ struct Args { #[clap(flatten)] verbosity: Verbosity, + + #[arg( + long, + help = "Run in watch mode by re-running whenever files change", + short = 'W' + )] + watch: bool, } #[allow( @@ -72,6 +79,7 @@ pub fn main() -> anyhow::Result<()> { extra_search_path: extra_paths, target_version, verbosity, + watch, } = Args::parse_from(std::env::args().collect::>()); let verbosity = verbosity.level(); @@ -117,125 +125,120 @@ pub fn main() -> anyhow::Result<()> { } })?; - let file_changes_notifier = main_loop.file_changes_notifier(); - - // Watch for file changes and re-trigger the analysis. - let mut file_watcher = FileWatcher::new(move |changes| { - file_changes_notifier.notify(changes); - })?; - - file_watcher.watch_folder(db.workspace().root(&db).as_std_path())?; - - main_loop.run(&mut db); - - println!("{}", countme::get_all()); + if watch { + main_loop.watch(&mut db)?; + } else { + main_loop.run(&mut db); + } Ok(()) } struct MainLoop { - verbosity: Option, - orchestrator: crossbeam_channel::Sender, + /// Sender that can be used to send messages to the main loop. + sender: crossbeam_channel::Sender, + + /// Receiver for the messages sent **to** the main loop. receiver: crossbeam_channel::Receiver, + + /// The file system watcher, if running in watch mode. + watcher: Option, + + verbosity: Option, } impl MainLoop { fn new(verbosity: Option) -> (Self, MainLoopCancellationToken) { - let (orchestrator_sender, orchestrator_receiver) = crossbeam_channel::bounded(1); - let (main_loop_sender, main_loop_receiver) = crossbeam_channel::bounded(1); - - let mut orchestrator = Orchestrator { - receiver: orchestrator_receiver, - main_loop: main_loop_sender.clone(), - revision: 0, - }; - - std::thread::spawn(move || { - orchestrator.run(); - }); + let (sender, receiver) = crossbeam_channel::bounded(10); ( Self { + sender: sender.clone(), + receiver, + watcher: None, verbosity, - orchestrator: orchestrator_sender, - receiver: main_loop_receiver, - }, - MainLoopCancellationToken { - sender: main_loop_sender, }, + MainLoopCancellationToken { sender }, ) } - fn file_changes_notifier(&self) -> FileChangesNotifier { - FileChangesNotifier { - sender: self.orchestrator.clone(), - } + fn watch(mut self, db: &mut RootDatabase) -> anyhow::Result<()> { + let sender = self.sender.clone(); + let mut watcher = watch::directory_watcher(move |event| { + sender.send(MainLoopMessage::ApplyChanges(event)).unwrap(); + })?; + + watcher.watch(db.workspace().root(db))?; + + self.watcher = Some(watcher); + + self.run(db); + + Ok(()) } #[allow(clippy::print_stderr)] fn run(self, db: &mut RootDatabase) { - self.orchestrator.send(OrchestratorMessage::Run).unwrap(); + // Schedule the first check. + self.sender.send(MainLoopMessage::CheckWorkspace).unwrap(); + let mut revision = 0usize; for message in &self.receiver { tracing::trace!("Main Loop: Tick"); match message { - MainLoopMessage::CheckWorkspace { revision } => { + MainLoopMessage::CheckWorkspace => { let db = db.snapshot(); - let orchestrator = self.orchestrator.clone(); + let sender = self.sender.clone(); // Spawn a new task that checks the workspace. This needs to be done in a separate thread // to prevent blocking the main loop here. rayon::spawn(move || { if let Ok(result) = db.check() { - orchestrator - .send(OrchestratorMessage::CheckCompleted { - diagnostics: result, - revision, - }) - .unwrap(); + // Send the result back to the main loop for printing. + sender + .send(MainLoopMessage::CheckCompleted { result, revision }) + .ok(); } }); } + + MainLoopMessage::CheckCompleted { + result, + revision: check_revision, + } => { + if check_revision == revision { + eprintln!("{}", result.join("\n")); + + if self.verbosity == Some(VerbosityLevel::Trace) { + eprintln!("{}", countme::get_all()); + } + } + + if self.watcher.is_none() { + return self.exit(); + } + } + MainLoopMessage::ApplyChanges(changes) => { + revision += 1; // Automatically cancels any pending queries and waits for them to complete. db.apply_changes(changes); - } - MainLoopMessage::CheckCompleted(diagnostics) => { - eprintln!("{}", diagnostics.join("\n")); - if self.verbosity == Some(VerbosityLevel::Trace) { - eprintln!("{}", countme::get_all()); - } + self.sender.send(MainLoopMessage::CheckWorkspace).unwrap(); } MainLoopMessage::Exit => { - if self.verbosity == Some(VerbosityLevel::Trace) { - eprintln!("{}", countme::get_all()); - } - return; + return self.exit(); } } } } -} -impl Drop for MainLoop { - fn drop(&mut self) { - self.orchestrator - .send(OrchestratorMessage::Shutdown) - .unwrap(); - } -} - -#[derive(Debug, Clone)] -struct FileChangesNotifier { - sender: crossbeam_channel::Sender, -} - -impl FileChangesNotifier { - fn notify(&self, changes: Vec) { - self.sender - .send(OrchestratorMessage::FileChanges(changes)) - .unwrap(); + #[allow(clippy::print_stderr, clippy::unused_self)] + fn exit(self) { + if self.verbosity == Some(VerbosityLevel::Trace) { + eprintln!("Exit"); + eprintln!("{}", countme::get_all()); + } } } @@ -250,115 +253,16 @@ impl MainLoopCancellationToken { } } -struct Orchestrator { - /// Sends messages to the main loop. - main_loop: crossbeam_channel::Sender, - /// Receives messages from the main loop. - receiver: crossbeam_channel::Receiver, - revision: usize, -} - -impl Orchestrator { - #[allow(clippy::print_stderr)] - fn run(&mut self) { - while let Ok(message) = self.receiver.recv() { - match message { - OrchestratorMessage::Run => { - self.main_loop - .send(MainLoopMessage::CheckWorkspace { - revision: self.revision, - }) - .unwrap(); - } - - OrchestratorMessage::CheckCompleted { - diagnostics, - revision, - } => { - // Only take the diagnostics if they are for the latest revision. - if self.revision == revision { - self.main_loop - .send(MainLoopMessage::CheckCompleted(diagnostics)) - .unwrap(); - } else { - tracing::debug!("Discarding diagnostics for outdated revision {revision} (current: {}).", self.revision); - } - } - - OrchestratorMessage::FileChanges(changes) => { - // Request cancellation, but wait until all analysis tasks have completed to - // avoid stale messages in the next main loop. - - self.revision += 1; - self.debounce_changes(changes); - } - OrchestratorMessage::Shutdown => { - return self.shutdown(); - } - } - } - } - - fn debounce_changes(&self, mut changes: Vec) { - loop { - // Consume possibly incoming file change messages before running a new analysis, but don't wait for more than 100ms. - crossbeam_channel::select! { - recv(self.receiver) -> message => { - match message { - Ok(OrchestratorMessage::Shutdown) => { - return self.shutdown(); - } - Ok(OrchestratorMessage::FileChanges(file_changes)) => { - changes.extend(file_changes); - } - - Ok(OrchestratorMessage::CheckCompleted { .. })=> { - // disregard any outdated completion message. - } - Ok(OrchestratorMessage::Run) => unreachable!("The orchestrator is already running."), - - Err(_) => { - // There are no more senders, no point in waiting for more messages - return; - } - } - }, - default(std::time::Duration::from_millis(10)) => { - // No more file changes after 10 ms, send the changes and schedule a new analysis - self.main_loop.send(MainLoopMessage::ApplyChanges(changes)).unwrap(); - self.main_loop.send(MainLoopMessage::CheckWorkspace { revision: self.revision}).unwrap(); - return; - } - } - } - } - - #[allow(clippy::unused_self)] - fn shutdown(&self) { - tracing::trace!("Shutting down orchestrator."); - } -} - /// Message sent from the orchestrator to the main loop. #[derive(Debug)] enum MainLoopMessage { - CheckWorkspace { revision: usize }, - CheckCompleted(Vec), - ApplyChanges(Vec), - Exit, -} - -#[derive(Debug)] -enum OrchestratorMessage { - Run, - Shutdown, - + CheckWorkspace, CheckCompleted { - diagnostics: Vec, + result: Vec, revision: usize, }, - - FileChanges(Vec), + ApplyChanges(Vec), + Exit, } fn setup_tracing(verbosity: Option) { diff --git a/crates/red_knot/src/watch.rs b/crates/red_knot/src/watch.rs index 440db58690..f68da05338 100644 --- a/crates/red_knot/src/watch.rs +++ b/crates/red_knot/src/watch.rs @@ -1,111 +1,92 @@ -use std::path::Path; - -use anyhow::Context; -use notify::event::{CreateKind, ModifyKind, RemoveKind}; -use notify::{recommended_watcher, Event, EventKind, RecommendedWatcher, RecursiveMode, Watcher}; - use ruff_db::system::{SystemPath, SystemPathBuf}; +pub use watcher::{directory_watcher, EventHandler, Watcher}; -pub struct FileWatcher { - watcher: RecommendedWatcher, +mod watcher; + +/// Classification of a file system change event. +/// +/// ## Renaming a path +/// Renaming a path creates a [`ChangeEvent::Deleted`] event for the old path and/or a [`ChangeEvent::Created`] for the new location. +/// Whether both events are created or just one of them depends from where to where the path was moved: +/// +/// * Inside the watched directory: Both events are created. +/// * From a watched directory to a non-watched directory: Only a [`ChangeEvent::Deleted`] event is created. +/// * From a non-watched directory to a watched directory: Only a [`ChangeEvent::Created`] event is created. +/// +/// ## Renaming a directory +/// It's up to the file watcher implementation to aggregate the rename event for a directory to a single rename +/// event instead of emitting an event for each file or subdirectory in that path. +#[derive(Debug, PartialEq, Eq)] +pub enum ChangeEvent { + /// A new path was created + Created { + path: SystemPathBuf, + kind: CreatedKind, + }, + + /// The content or metadata of a path was changed. + Changed { + path: SystemPathBuf, + kind: ChangedKind, + }, + + /// A path was deleted. + Deleted { + path: SystemPathBuf, + kind: DeletedKind, + }, + + /// The file watcher failed to observe some changes and now is out of sync with the file system. + /// + /// This can happen if many files are changed at once. The consumer should rescan all files to catch up + /// with the file system. + Rescan, } -pub trait EventHandler: Send + 'static { - fn handle(&self, changes: Vec); -} +impl ChangeEvent { + pub fn file_name(&self) -> Option<&str> { + self.path().and_then(|path| path.file_name()) + } -impl EventHandler for F -where - F: Fn(Vec) + Send + 'static, -{ - fn handle(&self, changes: Vec) { - let f = self; - f(changes); + pub fn path(&self) -> Option<&SystemPath> { + match self { + ChangeEvent::Created { path, .. } + | ChangeEvent::Changed { path, .. } + | ChangeEvent::Deleted { path, .. } => Some(path), + ChangeEvent::Rescan => None, + } } } -impl FileWatcher { - pub fn new(handler: E) -> anyhow::Result - where - E: EventHandler, - { - Self::from_handler(Box::new(handler)) - } +/// Classification of an event that creates a new path. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum CreatedKind { + /// A file was created. + File, - fn from_handler(handler: Box) -> anyhow::Result { - let watcher = recommended_watcher(move |event: notify::Result| { - match event { - Ok(event) => { - // TODO verify that this handles all events correctly - let change_kind = match event.kind { - EventKind::Create(CreateKind::File) => FileChangeKind::Created, - EventKind::Modify(ModifyKind::Name(notify::event::RenameMode::From)) => { - FileChangeKind::Deleted - } - EventKind::Modify(ModifyKind::Name(notify::event::RenameMode::To)) => { - FileChangeKind::Created - } - EventKind::Modify(ModifyKind::Name(notify::event::RenameMode::Any)) => { - // TODO Introduce a better catch all event for cases that we don't understand. - FileChangeKind::Created - } - EventKind::Modify(ModifyKind::Name(notify::event::RenameMode::Both)) => { - todo!("Handle both create and delete event."); - } - EventKind::Modify(_) => FileChangeKind::Modified, - EventKind::Remove(RemoveKind::File) => FileChangeKind::Deleted, - _ => { - return; - } - }; + /// A directory was created. + Directory, - let mut changes = Vec::new(); - - for path in event.paths { - if let Some(fs_path) = SystemPath::from_std_path(&path) { - changes - .push(FileWatcherChange::new(fs_path.to_path_buf(), change_kind)); - } - } - - if !changes.is_empty() { - handler.handle(changes); - } - } - // TODO proper error handling - Err(err) => { - panic!("Error: {err}"); - } - } - }) - .context("Failed to create file watcher.")?; - - Ok(Self { watcher }) - } - - pub fn watch_folder(&mut self, path: &Path) -> anyhow::Result<()> { - self.watcher.watch(path, RecursiveMode::Recursive)?; - - Ok(()) - } + /// A file, directory, or any other kind of path was created. + Any, } -#[derive(Clone, Debug)] -pub struct FileWatcherChange { - pub path: SystemPathBuf, - #[allow(unused)] - pub kind: FileChangeKind, -} +/// Classification of an event related to a content or metadata change. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum ChangedKind { + /// The content of a file was changed. + FileContent, -impl FileWatcherChange { - pub fn new(path: SystemPathBuf, kind: FileChangeKind) -> Self { - Self { path, kind } - } + /// The metadata of a file was changed. + FileMetadata, + + /// Either the content or metadata of a path was changed. + Any, } #[derive(Copy, Clone, Debug, Eq, PartialEq)] -pub enum FileChangeKind { - Created, - Modified, - Deleted, +pub enum DeletedKind { + File, + Directory, + Any, } diff --git a/crates/red_knot/src/watch/watcher.rs b/crates/red_knot/src/watch/watcher.rs new file mode 100644 index 0000000000..6e9f712302 --- /dev/null +++ b/crates/red_knot/src/watch/watcher.rs @@ -0,0 +1,393 @@ +use notify::event::{CreateKind, MetadataKind, ModifyKind, RemoveKind, RenameMode}; +use notify::{recommended_watcher, EventKind, RecommendedWatcher, RecursiveMode, Watcher as _}; + +use ruff_db::system::{SystemPath, SystemPathBuf}; + +use crate::watch::{ChangeEvent, ChangedKind, CreatedKind, DeletedKind}; + +/// Creates a new watcher observing file system changes. +/// +/// The watcher debounces events, but guarantees to send all changes eventually (even if the file system keeps changing). +pub fn directory_watcher(handler: H) -> notify::Result +where + H: EventHandler, +{ + let (sender, receiver) = crossbeam::channel::bounded(20); + + let debouncer = std::thread::Builder::new() + .name("watcher::debouncer".to_string()) + .spawn(move || { + // Wait for the next set of changes + for message in &receiver { + let event = match message { + DebouncerMessage::Event(event) => event, + DebouncerMessage::Flush => { + continue; + } + DebouncerMessage::Exit => { + return; + } + }; + + let mut debouncer = Debouncer::default(); + + debouncer.add_result(event); + + // Debounce any new incoming changes: + // * Take any new incoming change events and merge them with the previous change events + // * If there are no new incoming change events after 10 ms, flush the changes and wait for the next notify event. + // * Flush no later than after 3s. + loop { + let start = std::time::Instant::now(); + + crossbeam::select! { + recv(receiver) -> message => { + match message { + Ok(DebouncerMessage::Event(event)) => { + debouncer.add_result(event); + + // Ensure that we flush the changes eventually. + if start.elapsed() > std::time::Duration::from_secs(3) { + break; + } + } + Ok(DebouncerMessage::Flush) => { + break; + } + + Ok(DebouncerMessage::Exit) => { + return; + }, + + Err(_) => { + // There are no more senders. There's no point in waiting for more messages + return; + } + } + }, + default(std::time::Duration::from_millis(10)) => { + break; + } + } + } + + // No more file changes after 10 ms, send the changes and schedule a new analysis + let events = debouncer.into_events(); + + if !events.is_empty() { + handler.handle(events); + } + } + }) + .unwrap(); + + let debouncer_sender = sender.clone(); + let watcher = + recommended_watcher(move |event| sender.send(DebouncerMessage::Event(event)).unwrap())?; + + Ok(Watcher { + watcher, + debouncer_sender, + debouncer_thread: Some(debouncer), + }) +} + +#[derive(Debug)] +enum DebouncerMessage { + /// A new file system event. + Event(notify::Result), + + Flush, + + /// Exit the debouncer thread. + Exit, +} + +pub struct Watcher { + watcher: RecommendedWatcher, + debouncer_sender: crossbeam::channel::Sender, + debouncer_thread: Option>, +} + +impl Watcher { + /// Sets up file watching for `path`. + pub fn watch(&mut self, path: &SystemPath) -> notify::Result<()> { + self.watcher + .watch(path.as_std_path(), RecursiveMode::Recursive) + } + + /// Stops file watching for `path`. + pub fn unwatch(&mut self, path: &SystemPath) -> notify::Result<()> { + self.watcher.unwatch(path.as_std_path()) + } + + /// Stops the file watcher. + /// + /// Pending events will be discarded. + /// + /// The call blocks until the watcher has stopped. + pub fn stop(mut self) { + self.set_stop(); + if let Some(debouncher) = self.debouncer_thread.take() { + debouncher.join().unwrap(); + } + } + + /// Flushes any pending events. + pub fn flush(&self) { + self.debouncer_sender.send(DebouncerMessage::Flush).unwrap(); + } + + fn set_stop(&mut self) { + self.debouncer_sender.send(DebouncerMessage::Exit).ok(); + } +} + +impl Drop for Watcher { + fn drop(&mut self) { + self.set_stop(); + } +} + +#[derive(Default)] +struct Debouncer { + events: Vec, + rescan_event: Option, +} + +impl Debouncer { + #[tracing::instrument(level = "trace", skip(self))] + fn add_result(&mut self, result: notify::Result) { + match result { + Ok(event) => self.add_event(event), + Err(error) => self.add_error(error), + } + } + + #[allow(clippy::unused_self, clippy::needless_pass_by_value)] + fn add_error(&mut self, error: notify::Error) { + // Micha: I skimmed through some of notify's source code and it seems the most common errors + // are IO errors. All other errors should really only happen when adding or removing a watched folders. + // It's not clear what an upstream handler should do in the case of an IOError (other than logging it). + // That's what we do for now as well. + tracing::warn!("File watcher error: {error:?}."); + } + + fn add_event(&mut self, event: notify::Event) { + if self.rescan_event.is_some() { + // We're already in a rescan state, ignore all other events + return; + } + + // If the file watcher is out of sync or we observed too many changes, trigger a full rescan + if event.need_rescan() || self.events.len() > 10000 { + self.events = Vec::new(); + self.rescan_event = Some(ChangeEvent::Rescan); + + return; + } + + let kind = event.kind; + let path = match SystemPathBuf::from_path_buf(event.paths.into_iter().next().unwrap()) { + Ok(path) => path, + Err(path) => { + tracing::debug!( + "Ignore change to non-UTF8 path '{path}': {kind:?}", + path = path.display() + ); + + // Ignore non-UTF8 paths because they aren't handled by the rest of the system. + return; + } + }; + + let event = match kind { + EventKind::Create(create) => { + let kind = match create { + CreateKind::File => CreatedKind::File, + CreateKind::Folder => CreatedKind::Directory, + CreateKind::Any | CreateKind::Other => { + CreatedKind::from(FileType::from_path(&path)) + } + }; + + ChangeEvent::Created { path, kind } + } + + EventKind::Modify(modify) => match modify { + ModifyKind::Metadata(metadata) => { + if FileType::from_path(&path) != FileType::File { + // Only interested in file metadata events. + return; + } + + match metadata { + MetadataKind::Any | MetadataKind::Permissions | MetadataKind::Other => { + ChangeEvent::Changed { + path, + kind: ChangedKind::FileMetadata, + } + } + + MetadataKind::AccessTime + | MetadataKind::WriteTime + | MetadataKind::Ownership + | MetadataKind::Extended => { + // We're not interested in these metadata changes + return; + } + } + } + + ModifyKind::Data(_) => ChangeEvent::Changed { + kind: ChangedKind::FileMetadata, + path, + }, + + ModifyKind::Name(rename) => match rename { + RenameMode::From => { + // TODO: notify_debouncer_full matches the `RenameMode::From` and `RenameMode::To` events. + // Matching the from and to event would have the added advantage that we know the + // type of the path that was renamed, allowing `apply_changes` to avoid traversing the + // entire package. + // https://github.com/notify-rs/notify/blob/128bf6230c03d39dbb7f301ff7b20e594e34c3a2/notify-debouncer-full/src/lib.rs#L293-L297 + ChangeEvent::Deleted { + kind: DeletedKind::Any, + path, + } + } + + RenameMode::To => ChangeEvent::Created { + kind: CreatedKind::from(FileType::from_path(&path)), + path, + }, + + RenameMode::Both => { + // Both is only emitted when moving a path from within a watched directory + // to another watched directory. The event is not emitted if the `to` or `from` path + // lay outside the watched directory. However, the `To` and `From` events are always emitted. + // That's why we ignore `Both` and instead rely on `To` and `From`. + return; + } + + RenameMode::Other => { + // Skip over any other rename events + return; + } + + RenameMode::Any => { + // Guess the action based on the current file system state + if path.as_std_path().exists() { + let file_type = FileType::from_path(&path); + + ChangeEvent::Created { + kind: file_type.into(), + path, + } + } else { + ChangeEvent::Deleted { + kind: DeletedKind::Any, + path, + } + } + } + }, + ModifyKind::Other => { + // Skip other modification events that are not content or metadata related + return; + } + ModifyKind::Any => { + if !path.as_std_path().is_file() { + return; + } + + ChangeEvent::Changed { + path, + kind: ChangedKind::Any, + } + } + }, + + EventKind::Access(_) => { + // We're not interested in any access events + return; + } + + EventKind::Remove(kind) => { + let kind = match kind { + RemoveKind::File => DeletedKind::File, + RemoveKind::Folder => DeletedKind::Directory, + RemoveKind::Any | RemoveKind::Other => DeletedKind::Any, + }; + + ChangeEvent::Deleted { path, kind } + } + + EventKind::Other => { + // Skip over meta events + return; + } + + EventKind::Any => { + tracing::debug!("Skip any FS event for {path}."); + return; + } + }; + + self.events.push(event); + } + + fn into_events(self) -> Vec { + if let Some(rescan_event) = self.rescan_event { + vec![rescan_event] + } else { + self.events + } + } +} + +pub trait EventHandler: Send + 'static { + fn handle(&self, changes: Vec); +} + +impl EventHandler for F +where + F: Fn(Vec) + Send + 'static, +{ + fn handle(&self, changes: Vec) { + let f = self; + f(changes); + } +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +enum FileType { + /// The event is related to a directory. + File, + + /// The event is related to a directory. + Directory, + + /// It's unknown whether the event is related to a file or a directory or if it is any other file type. + Any, +} + +impl FileType { + fn from_path(path: &SystemPath) -> FileType { + match path.as_std_path().metadata() { + Ok(metadata) if metadata.is_file() => FileType::File, + Ok(metadata) if metadata.is_dir() => FileType::Directory, + Ok(_) | Err(_) => FileType::Any, + } + } +} + +impl From for CreatedKind { + fn from(value: FileType) -> Self { + match value { + FileType::File => Self::File, + FileType::Directory => Self::Directory, + FileType::Any => Self::Any, + } + } +} diff --git a/crates/red_knot/src/workspace.rs b/crates/red_knot/src/workspace.rs index 3f8f71956a..bd5e411a93 100644 --- a/crates/red_knot/src/workspace.rs +++ b/crates/red_knot/src/workspace.rs @@ -117,6 +117,7 @@ impl Workspace { self.package_tree(db).values().copied() } + #[tracing::instrument(skip_all)] pub fn reload(self, db: &mut dyn Db, metadata: WorkspaceMetadata) { assert_eq!(self.root(db), metadata.root()); @@ -139,6 +140,7 @@ impl Workspace { self.set_package_tree(db).to(new_packages); } + #[tracing::instrument(level = "debug", skip_all)] pub fn update_package(self, db: &mut dyn Db, metadata: PackageMetadata) -> anyhow::Result<()> { let path = metadata.root().to_path_buf(); @@ -157,7 +159,7 @@ impl Workspace { pub fn package(self, db: &dyn Db, path: &SystemPath) -> Option { let packages = self.package_tree(db); - let (package_path, package) = packages.range(..path.to_path_buf()).next_back()?; + let (package_path, package) = packages.range(..=path.to_path_buf()).next_back()?; if path.starts_with(package_path) { Some(*package) @@ -252,6 +254,7 @@ impl Package { self.file_set(db) } + #[tracing::instrument(level = "debug", skip(db))] pub fn remove_file(self, db: &mut dyn Db, file: File) -> bool { let mut files_arc = self.file_set(db).clone(); @@ -266,6 +269,22 @@ impl Package { removed } + #[tracing::instrument(level = "debug", skip(db))] + pub fn add_file(self, db: &mut dyn Db, file: File) -> bool { + let mut files_arc = self.file_set(db).clone(); + + // Set a dummy value. Salsa will cancel any pending queries and remove its own reference to `files` + // so that the reference counter to `files` now drops to 1. + self.set_file_set(db).to(Arc::new(FxHashSet::default())); + + let files = Arc::get_mut(&mut files_arc).unwrap(); + let added = files.insert(file); + self.set_file_set(db).to(files_arc); + + added + } + + #[tracing::instrument(level = "debug", skip(db))] pub(crate) fn check(self, db: &dyn Db) -> Vec { let mut result = Vec::new(); for file in self.files(db) { @@ -286,9 +305,14 @@ impl Package { let root = self.root(db); assert_eq!(root, metadata.root()); - let files = discover_package_files(db, root); - + self.reload_files(db); self.set_name(db).to(metadata.name); + } + + #[tracing::instrument(level = "debug", skip(db))] + pub fn reload_files(self, db: &mut dyn Db) { + let files = discover_package_files(db, self.root(db)); + self.set_file_set(db).to(Arc::new(files)); } } diff --git a/crates/red_knot/tests/file_watching.rs b/crates/red_knot/tests/file_watching.rs new file mode 100644 index 0000000000..dbd66bab15 --- /dev/null +++ b/crates/red_knot/tests/file_watching.rs @@ -0,0 +1,590 @@ +#![allow(clippy::disallowed_names)] + +use std::time::Duration; + +use anyhow::{anyhow, Context}; + +use red_knot::db::RootDatabase; +use red_knot::watch; +use red_knot::watch::{directory_watcher, Watcher}; +use red_knot::workspace::WorkspaceMetadata; +use red_knot_module_resolver::{resolve_module, ModuleName}; +use ruff_db::files::system_path_to_file; +use ruff_db::program::{ProgramSettings, SearchPathSettings, TargetVersion}; +use ruff_db::source::source_text; +use ruff_db::system::{OsSystem, SystemPath, SystemPathBuf}; +use ruff_db::Upcast; + +struct TestCase { + db: RootDatabase, + watcher: Option, + changes_receiver: crossbeam::channel::Receiver>, + temp_dir: tempfile::TempDir, +} + +impl TestCase { + fn workspace_path(&self, relative: impl AsRef) -> SystemPathBuf { + SystemPath::absolute(relative, self.db.workspace().root(&self.db)) + } + + fn root_path(&self) -> &SystemPath { + SystemPath::from_std_path(self.temp_dir.path()).unwrap() + } + + fn db(&self) -> &RootDatabase { + &self.db + } + + fn db_mut(&mut self) -> &mut RootDatabase { + &mut self.db + } + + fn stop_watch(&mut self) -> Vec { + if let Some(watcher) = self.watcher.take() { + // Give the watcher some time to catch up. + std::thread::sleep(Duration::from_millis(10)); + watcher.flush(); + watcher.stop(); + } + + let mut all_events = Vec::new(); + for events in &self.changes_receiver { + all_events.extend(events); + } + + all_events + } +} + +fn setup(workspace_files: I) -> anyhow::Result +where + I: IntoIterator, + P: AsRef, +{ + let temp_dir = tempfile::tempdir()?; + + let workspace_path = temp_dir.path().join("workspace"); + + std::fs::create_dir_all(&workspace_path).with_context(|| { + format!( + "Failed to create workspace directory '{}'", + workspace_path.display() + ) + })?; + + let workspace_path = SystemPath::from_std_path(&workspace_path).ok_or_else(|| { + anyhow!( + "Workspace root '{}' in temp directory is not a valid UTF-8 path.", + workspace_path.display() + ) + })?; + + let workspace_path = SystemPathBuf::from_utf8_path_buf( + workspace_path + .as_utf8_path() + .canonicalize_utf8() + .with_context(|| "Failed to canonzialize workspace path.")?, + ); + + for (relative_path, content) in workspace_files { + let relative_path = relative_path.as_ref(); + let absolute_path = workspace_path.join(relative_path); + if let Some(parent) = absolute_path.parent() { + std::fs::create_dir_all(parent).with_context(|| { + format!("Failed to create parent directory for file '{relative_path}'.",) + })?; + } + + std::fs::write(absolute_path.as_std_path(), content) + .with_context(|| format!("Failed to write file '{relative_path}'"))?; + } + + let system = OsSystem::new(&workspace_path); + + let workspace = WorkspaceMetadata::from_path(&workspace_path, &system)?; + let settings = ProgramSettings { + target_version: TargetVersion::default(), + search_paths: SearchPathSettings { + extra_paths: vec![], + workspace_root: workspace.root().to_path_buf(), + custom_typeshed: None, + site_packages: None, + }, + }; + + let db = RootDatabase::new(workspace, settings, system); + + let (sender, receiver) = crossbeam::channel::unbounded(); + let mut watcher = directory_watcher(move |events| sender.send(events).unwrap()) + .with_context(|| "Failed to create directory watcher")?; + + watcher + .watch(&workspace_path) + .with_context(|| "Failed to set up watcher for workspace directory.")?; + + let test_case = TestCase { + db, + changes_receiver: receiver, + watcher: Some(watcher), + temp_dir, + }; + + Ok(test_case) +} + +#[test] +fn new_file() -> anyhow::Result<()> { + let mut case = setup([("bar.py", "")])?; + let foo_path = case.workspace_path("foo.py"); + + assert_eq!(system_path_to_file(case.db(), &foo_path), None); + + std::fs::write(foo_path.as_std_path(), "print('Hello')")?; + + let changes = case.stop_watch(); + + case.db_mut().apply_changes(changes); + + let foo = system_path_to_file(case.db(), &foo_path).expect("foo.py to exist."); + + let package = case + .db() + .workspace() + .package(case.db(), &foo_path) + .expect("foo.py to belong to a package."); + + assert!(package.contains_file(case.db(), foo)); + + Ok(()) +} + +#[test] +fn new_ignored_file() -> anyhow::Result<()> { + let mut case = setup([("bar.py", ""), (".ignore", "foo.py")])?; + let foo_path = case.workspace_path("foo.py"); + + assert_eq!(system_path_to_file(case.db(), &foo_path), None); + + std::fs::write(foo_path.as_std_path(), "print('Hello')")?; + + let changes = case.stop_watch(); + + case.db_mut().apply_changes(changes); + + let foo = system_path_to_file(case.db(), &foo_path).expect("foo.py to exist."); + + let package = case + .db() + .workspace() + .package(case.db(), &foo_path) + .expect("foo.py to belong to a package."); + + assert!(!package.contains_file(case.db(), foo)); + + Ok(()) +} + +#[test] +fn changed_file() -> anyhow::Result<()> { + let foo_source = "print('Hello, world!')"; + let mut case = setup([("foo.py", foo_source)])?; + let foo_path = case.workspace_path("foo.py"); + + let foo = system_path_to_file(case.db(), &foo_path).ok_or_else(|| anyhow!("Foo not found"))?; + assert_eq!(source_text(case.db(), foo).as_str(), foo_source); + + std::fs::write(foo_path.as_std_path(), "print('Version 2')")?; + + let changes = case.stop_watch(); + + case.db_mut().apply_changes(changes); + + assert_eq!(source_text(case.db(), foo).as_str(), "print('Version 2')"); + + Ok(()) +} + +#[cfg(unix)] +#[test] +fn changed_metadata() -> anyhow::Result<()> { + use std::os::unix::fs::PermissionsExt; + + let mut case = setup([("foo.py", "")])?; + let foo_path = case.workspace_path("foo.py"); + + let foo = system_path_to_file(case.db(), &foo_path).ok_or_else(|| anyhow!("Foo not found"))?; + assert_eq!( + foo.permissions(case.db()), + Some( + std::fs::metadata(foo_path.as_std_path()) + .unwrap() + .permissions() + .mode() + ) + ); + + std::fs::set_permissions( + foo_path.as_std_path(), + std::fs::Permissions::from_mode(0o777), + ) + .with_context(|| "Failed to set file permissions.")?; + + let changes = case.stop_watch(); + + case.db_mut().apply_changes(changes); + + assert_eq!( + foo.permissions(case.db()), + Some( + std::fs::metadata(foo_path.as_std_path()) + .unwrap() + .permissions() + .mode() + ) + ); + + Ok(()) +} + +#[test] +fn deleted_file() -> anyhow::Result<()> { + let foo_source = "print('Hello, world!')"; + let mut case = setup([("foo.py", foo_source)])?; + let foo_path = case.workspace_path("foo.py"); + + let foo = system_path_to_file(case.db(), &foo_path).ok_or_else(|| anyhow!("Foo not found"))?; + + let Some(package) = case.db().workspace().package(case.db(), &foo_path) else { + panic!("Expected foo.py to belong to a package."); + }; + + assert!(foo.exists(case.db())); + assert!(package.contains_file(case.db(), foo)); + + std::fs::remove_file(foo_path.as_std_path())?; + + let changes = case.stop_watch(); + + case.db_mut().apply_changes(changes); + + assert!(!foo.exists(case.db())); + assert!(!package.contains_file(case.db(), foo)); + + Ok(()) +} + +/// Tests the case where a file is moved from inside a watched directory to a directory that is not watched. +/// +/// This matches the behavior of deleting a file in VS code. +#[test] +fn move_file_to_trash() -> anyhow::Result<()> { + let foo_source = "print('Hello, world!')"; + let mut case = setup([("foo.py", foo_source)])?; + let foo_path = case.workspace_path("foo.py"); + + let trash_path = case.root_path().join(".trash"); + std::fs::create_dir_all(trash_path.as_std_path())?; + + let foo = system_path_to_file(case.db(), &foo_path).ok_or_else(|| anyhow!("Foo not found"))?; + + let Some(package) = case.db().workspace().package(case.db(), &foo_path) else { + panic!("Expected foo.py to belong to a package."); + }; + + assert!(foo.exists(case.db())); + assert!(package.contains_file(case.db(), foo)); + + std::fs::rename( + foo_path.as_std_path(), + trash_path.join("foo.py").as_std_path(), + )?; + + let changes = case.stop_watch(); + + case.db_mut().apply_changes(changes); + + assert!(!foo.exists(case.db())); + assert!(!package.contains_file(case.db(), foo)); + + Ok(()) +} + +/// Move a file from a non-workspace (non-watched) location into the workspace. +#[test] +fn move_file_to_workspace() -> anyhow::Result<()> { + let mut case = setup([("bar.py", "")])?; + let foo_path = case.root_path().join("foo.py"); + std::fs::write(foo_path.as_std_path(), "")?; + + let foo_in_workspace_path = case.workspace_path("foo.py"); + + assert!(system_path_to_file(case.db(), &foo_path).is_some()); + + assert!(case + .db() + .workspace() + .package(case.db(), &foo_path) + .is_none()); + + std::fs::rename(foo_path.as_std_path(), foo_in_workspace_path.as_std_path())?; + + let changes = case.stop_watch(); + + case.db_mut().apply_changes(changes); + + let foo_in_workspace = system_path_to_file(case.db(), &foo_in_workspace_path) + .ok_or_else(|| anyhow!("Foo not found"))?; + + let Some(package) = case + .db() + .workspace() + .package(case.db(), &foo_in_workspace_path) + else { + panic!("Expected foo.py to belong to a package."); + }; + + assert!(foo_in_workspace.exists(case.db())); + assert!(package.contains_file(case.db(), foo_in_workspace)); + + Ok(()) +} + +/// Rename a workspace file. +#[test] +fn rename_file() -> anyhow::Result<()> { + let mut case = setup([("foo.py", "")])?; + let foo_path = case.workspace_path("foo.py"); + let bar_path = case.workspace_path("bar.py"); + + let foo = system_path_to_file(case.db(), &foo_path).ok_or_else(|| anyhow!("Foo not found"))?; + + let Some(package) = case.db().workspace().package(case.db(), &foo_path) else { + panic!("Expected foo.py to belong to a package."); + }; + + std::fs::rename(foo_path.as_std_path(), bar_path.as_std_path())?; + + let changes = case.stop_watch(); + + case.db_mut().apply_changes(changes); + + assert!(!foo.exists(case.db())); + assert!(!package.contains_file(case.db(), foo)); + + let bar = system_path_to_file(case.db(), &bar_path).ok_or_else(|| anyhow!("Bar not found"))?; + + let Some(package) = case.db().workspace().package(case.db(), &bar_path) else { + panic!("Expected bar.py to belong to a package."); + }; + + assert!(bar.exists(case.db())); + assert!(package.contains_file(case.db(), bar)); + + Ok(()) +} + +#[test] +fn directory_moved_to_workspace() -> anyhow::Result<()> { + let mut case = setup([("bar.py", "import sub.a")])?; + + let sub_original_path = case.root_path().join("sub"); + let init_original_path = sub_original_path.join("__init__.py"); + let a_original_path = sub_original_path.join("a.py"); + + std::fs::create_dir(sub_original_path.as_std_path()) + .with_context(|| "Failed to create sub directory")?; + std::fs::write(init_original_path.as_std_path(), "") + .with_context(|| "Failed to create __init__.py")?; + std::fs::write(a_original_path.as_std_path(), "").with_context(|| "Failed to create a.py")?; + + let sub_a_module = resolve_module(case.db().upcast(), ModuleName::new_static("sub.a").unwrap()); + + assert_eq!(sub_a_module, None); + + let sub_new_path = case.workspace_path("sub"); + std::fs::rename(sub_original_path.as_std_path(), sub_new_path.as_std_path()) + .with_context(|| "Failed to move sub directory")?; + + let changes = case.stop_watch(); + + case.db_mut().apply_changes(changes); + + let init_file = system_path_to_file(case.db(), sub_new_path.join("__init__.py")) + .expect("__init__.py to exist"); + let a_file = system_path_to_file(case.db(), sub_new_path.join("a.py")).expect("a.py to exist"); + + // `import sub.a` should now resolve + assert!(resolve_module(case.db().upcast(), ModuleName::new_static("sub.a").unwrap()).is_some()); + + let package = case + .db() + .workspace() + .package(case.db(), &sub_new_path) + .expect("sub to belong to a package"); + + assert!(package.contains_file(case.db(), init_file)); + assert!(package.contains_file(case.db(), a_file)); + + Ok(()) +} + +#[test] +fn directory_moved_to_trash() -> anyhow::Result<()> { + let mut case = setup([ + ("bar.py", "import sub.a"), + ("sub/__init__.py", ""), + ("sub/a.py", ""), + ])?; + + assert!(resolve_module(case.db().upcast(), ModuleName::new_static("sub.a").unwrap()).is_some(),); + + let sub_path = case.workspace_path("sub"); + + let package = case + .db() + .workspace() + .package(case.db(), &sub_path) + .expect("sub to belong to a package"); + + let init_file = + system_path_to_file(case.db(), sub_path.join("__init__.py")).expect("__init__.py to exist"); + let a_file = system_path_to_file(case.db(), sub_path.join("a.py")).expect("a.py to exist"); + + assert!(package.contains_file(case.db(), init_file)); + assert!(package.contains_file(case.db(), a_file)); + + std::fs::create_dir(case.root_path().join(".trash").as_std_path())?; + let trashed_sub = case.root_path().join(".trash/sub"); + std::fs::rename(sub_path.as_std_path(), trashed_sub.as_std_path()) + .with_context(|| "Failed to move the sub directory to the trash")?; + + let changes = case.stop_watch(); + + case.db_mut().apply_changes(changes); + + // `import sub.a` should no longer resolve + assert!(resolve_module(case.db().upcast(), ModuleName::new_static("sub.a").unwrap()).is_none()); + + assert!(!init_file.exists(case.db())); + assert!(!a_file.exists(case.db())); + + assert!(!package.contains_file(case.db(), init_file)); + assert!(!package.contains_file(case.db(), a_file)); + + Ok(()) +} + +#[test] +fn directory_renamed() -> anyhow::Result<()> { + let mut case = setup([ + ("bar.py", "import sub.a"), + ("sub/__init__.py", ""), + ("sub/a.py", ""), + ])?; + + assert!(resolve_module(case.db().upcast(), ModuleName::new_static("sub.a").unwrap()).is_some()); + assert!(resolve_module( + case.db().upcast(), + ModuleName::new_static("foo.baz").unwrap() + ) + .is_none()); + + let sub_path = case.workspace_path("sub"); + + let package = case + .db() + .workspace() + .package(case.db(), &sub_path) + .expect("sub to belong to a package"); + + let sub_init = + system_path_to_file(case.db(), sub_path.join("__init__.py")).expect("__init__.py to exist"); + let sub_a = system_path_to_file(case.db(), sub_path.join("a.py")).expect("a.py to exist"); + + assert!(package.contains_file(case.db(), sub_init)); + assert!(package.contains_file(case.db(), sub_a)); + + let foo_baz = case.workspace_path("foo/baz"); + + std::fs::create_dir(case.workspace_path("foo").as_std_path())?; + std::fs::rename(sub_path.as_std_path(), foo_baz.as_std_path()) + .with_context(|| "Failed to move the sub directory")?; + + let changes = case.stop_watch(); + + case.db_mut().apply_changes(changes); + + // `import sub.a` should no longer resolve + assert!(resolve_module(case.db().upcast(), ModuleName::new_static("sub.a").unwrap()).is_none()); + // `import foo.baz` should now resolve + assert!(resolve_module( + case.db().upcast(), + ModuleName::new_static("foo.baz").unwrap() + ) + .is_some()); + + // The old paths are no longer tracked + assert!(!sub_init.exists(case.db())); + assert!(!sub_a.exists(case.db())); + + assert!(!package.contains_file(case.db(), sub_init)); + assert!(!package.contains_file(case.db(), sub_a)); + + let foo_baz_init = + system_path_to_file(case.db(), foo_baz.join("__init__.py")).expect("__init__.py to exist"); + let foo_baz_a = system_path_to_file(case.db(), foo_baz.join("a.py")).expect("a.py to exist"); + + // The new paths are synced + + assert!(foo_baz_init.exists(case.db())); + assert!(foo_baz_a.exists(case.db())); + + assert!(package.contains_file(case.db(), foo_baz_init)); + assert!(package.contains_file(case.db(), foo_baz_a)); + + Ok(()) +} + +#[test] +fn directory_deleted() -> anyhow::Result<()> { + let mut case = setup([ + ("bar.py", "import sub.a"), + ("sub/__init__.py", ""), + ("sub/a.py", ""), + ])?; + + assert!(resolve_module(case.db().upcast(), ModuleName::new_static("sub.a").unwrap()).is_some(),); + + let sub_path = case.workspace_path("sub"); + + let package = case + .db() + .workspace() + .package(case.db(), &sub_path) + .expect("sub to belong to a package"); + + let init_file = + system_path_to_file(case.db(), sub_path.join("__init__.py")).expect("__init__.py to exist"); + let a_file = system_path_to_file(case.db(), sub_path.join("a.py")).expect("a.py to exist"); + + assert!(package.contains_file(case.db(), init_file)); + assert!(package.contains_file(case.db(), a_file)); + + std::fs::remove_dir_all(sub_path.as_std_path()) + .with_context(|| "Failed to remove the sub directory")?; + + let changes = case.stop_watch(); + + case.db_mut().apply_changes(changes); + + // `import sub.a` should no longer resolve + assert!(resolve_module(case.db().upcast(), ModuleName::new_static("sub.a").unwrap()).is_none()); + + assert!(!init_file.exists(case.db())); + assert!(!a_file.exists(case.db())); + + assert!(!package.contains_file(case.db(), init_file)); + assert!(!package.contains_file(case.db(), a_file)); + + Ok(()) +} diff --git a/crates/red_knot_module_resolver/src/db.rs b/crates/red_knot_module_resolver/src/db.rs index 327a2036a0..3ea9247df9 100644 --- a/crates/red_knot_module_resolver/src/db.rs +++ b/crates/red_knot_module_resolver/src/db.rs @@ -76,6 +76,9 @@ pub(crate) mod tests { fn upcast(&self) -> &(dyn ruff_db::Db + 'static) { self } + fn upcast_mut(&mut self) -> &mut (dyn ruff_db::Db + 'static) { + self + } } impl ruff_db::Db for TestDb { diff --git a/crates/red_knot_module_resolver/src/resolver.rs b/crates/red_knot_module_resolver/src/resolver.rs index 86319b9502..8849b73ee3 100644 --- a/crates/red_knot_module_resolver/src/resolver.rs +++ b/crates/red_knot_module_resolver/src/resolver.rs @@ -30,9 +30,8 @@ pub(crate) fn resolve_module_query<'db>( db: &'db dyn Db, module_name: internal::ModuleNameIngredient<'db>, ) -> Option { - let _span = tracing::trace_span!("resolve_module", ?module_name).entered(); - let name = module_name.name(db); + let _span = tracing::trace_span!("resolve_module", %name).entered(); let (search_path, module_file, kind) = resolve_name(db, name)?; @@ -1225,7 +1224,7 @@ mod tests { // Delete `bar.py` db.memory_file_system().remove_file(&bar_path).unwrap(); - bar.touch(&mut db); + bar.sync(&mut db); // Re-query the foo module. The foo module should still be cached because `bar.py` isn't relevant // for resolving `foo`. @@ -1277,7 +1276,7 @@ mod tests { db.memory_file_system().remove_file(&foo_init_path)?; db.memory_file_system() .remove_directory(foo_init_path.parent().unwrap())?; - File::touch_path(&mut db, &foo_init_path); + File::sync_path(&mut db, &foo_init_path); let foo_module = resolve_module(&db, foo_module_name).expect("Foo module to resolve"); assert_eq!(&src.join("foo.py"), foo_module.file().path(&db)); @@ -1405,7 +1404,7 @@ mod tests { db.memory_file_system() .remove_file(&src_functools_path) .unwrap(); - File::touch_path(&mut db, &src_functools_path); + File::sync_path(&mut db, &src_functools_path); let functools_module = resolve_module(&db, functools_module_name.clone()).unwrap(); assert_eq!(functools_module.search_path(), &stdlib); assert_eq!( @@ -1617,7 +1616,7 @@ not_a_directory // Salsa file forces a new revision. // // TODO: get rid of the `.report_untracked_read()` call... - File::touch_path(&mut db, SystemPath::new("/x/src/foo.py")); + File::sync_path(&mut db, SystemPath::new("/x/src/foo.py")); assert_eq!(resolve_module(&db, foo_module_name.clone()), None); } @@ -1645,8 +1644,8 @@ not_a_directory .remove_file(src_path.join("foo.py")) .unwrap(); db.memory_file_system().remove_directory(&src_path).unwrap(); - File::touch_path(&mut db, &src_path.join("foo.py")); - File::touch_path(&mut db, &src_path); + File::sync_path(&mut db, &src_path.join("foo.py")); + File::sync_path(&mut db, &src_path); assert_eq!(resolve_module(&db, foo_module_name.clone()), None); } diff --git a/crates/red_knot_python_semantic/src/db.rs b/crates/red_knot_python_semantic/src/db.rs index 9704dcba19..1ba9208b32 100644 --- a/crates/red_knot_python_semantic/src/db.rs +++ b/crates/red_knot_python_semantic/src/db.rs @@ -33,10 +33,7 @@ pub struct Jar( ); /// Database giving access to semantic information about a Python program. -pub trait Db: - SourceDb + ResolverDb + DbWithJar + Upcast + Upcast -{ -} +pub trait Db: SourceDb + ResolverDb + DbWithJar + Upcast {} #[cfg(test)] pub(crate) mod tests { @@ -120,12 +117,18 @@ pub(crate) mod tests { fn upcast(&self) -> &(dyn SourceDb + 'static) { self } + fn upcast_mut(&mut self) -> &mut (dyn SourceDb + 'static) { + self + } } impl Upcast for TestDb { fn upcast(&self) -> &(dyn ResolverDb + 'static) { self } + fn upcast_mut(&mut self) -> &mut (dyn ResolverDb + 'static) { + self + } } impl red_knot_module_resolver::Db for TestDb {} diff --git a/crates/ruff_benchmark/benches/red_knot.rs b/crates/ruff_benchmark/benches/red_knot.rs index 11d2b23245..0b6bdea0cc 100644 --- a/crates/ruff_benchmark/benches/red_knot.rs +++ b/crates/ruff_benchmark/benches/red_knot.rs @@ -144,7 +144,7 @@ fn benchmark_incremental(criterion: &mut Criterion) { ) .unwrap(); - case.bar.touch(&mut case.db); + case.bar.sync(&mut case.db); case }, |case| { diff --git a/crates/ruff_db/src/files.rs b/crates/ruff_db/src/files.rs index 86b8620b35..6a928e2e9b 100644 --- a/crates/ruff_db/src/files.rs +++ b/crates/ruff_db/src/files.rs @@ -5,8 +5,8 @@ use dashmap::mapref::entry::Entry; use crate::file_revision::FileRevision; use crate::files::private::FileStatus; -use crate::system::SystemPath; -use crate::vendored::VendoredPath; +use crate::system::{SystemPath, SystemPathBuf}; +use crate::vendored::{VendoredPath, VendoredPathBuf}; use crate::{Db, FxDashMap}; pub use path::FilePath; use ruff_notebook::{Notebook, NotebookError}; @@ -24,10 +24,7 @@ pub fn system_path_to_file(db: &dyn Db, path: impl AsRef) -> Option< // exist anymore so that Salsa can track that the caller of this function depends on the existence of // that file. This function filters out files that don't exist, but Salsa will know that it must // re-run the calling query whenever the `file`'s status changes (because of the `.status` call here). - match file.status(db) { - FileStatus::Exists => Some(file), - FileStatus::Deleted => None, - } + file.exists(db).then_some(file) } /// Interns a vendored file path. Returns `Some` if the vendored file for `path` exists and `None` otherwise. @@ -44,11 +41,14 @@ pub struct Files { #[derive(Default)] struct FilesInner { - /// Lookup table that maps [`FilePath`]s to salsa interned [`File`] instances. + /// Lookup table that maps [`SystemPathBuf`]s to salsa interned [`File`] instances. /// /// The map also stores entries for files that don't exist on the file system. This is necessary /// so that queries that depend on the existence of a file are re-executed when the file is created. - files_by_path: FxDashMap, + system_by_path: FxDashMap, + + /// Lookup table that maps vendored files to the salsa [`File`] ingredients. + vendored_by_path: FxDashMap, } impl Files { @@ -61,11 +61,10 @@ impl Files { #[tracing::instrument(level = "trace", skip(self, db), ret)] fn system(&self, db: &dyn Db, path: &SystemPath) -> File { let absolute = SystemPath::absolute(path, db.system().current_directory()); - let absolute = FilePath::System(absolute); *self .inner - .files_by_path + .system_by_path .entry(absolute.clone()) .or_insert_with(|| { let metadata = db.system().path_metadata(path); @@ -73,7 +72,7 @@ impl Files { match metadata { Ok(metadata) if metadata.file_type().is_file() => File::new( db, - absolute, + FilePath::System(absolute), metadata.permissions(), metadata.revision(), FileStatus::Exists, @@ -81,7 +80,7 @@ impl Files { ), _ => File::new( db, - absolute, + FilePath::System(absolute), None, FileRevision::zero(), FileStatus::Deleted, @@ -92,11 +91,11 @@ impl Files { } /// Tries to look up the file for the given system path, returns `None` if no such file exists yet - fn try_system(&self, db: &dyn Db, path: &SystemPath) -> Option { + pub fn try_system(&self, db: &dyn Db, path: &SystemPath) -> Option { let absolute = SystemPath::absolute(path, db.system().current_directory()); self.inner - .files_by_path - .get(&FilePath::System(absolute)) + .system_by_path + .get(&absolute) .map(|entry| *entry.value()) } @@ -104,11 +103,7 @@ impl Files { /// exists and `None` otherwise. #[tracing::instrument(level = "trace", skip(self, db), ret)] fn vendored(&self, db: &dyn Db, path: &VendoredPath) -> Option { - let file = match self - .inner - .files_by_path - .entry(FilePath::Vendored(path.to_path_buf())) - { + let file = match self.inner.vendored_by_path.entry(path.to_path_buf()) { Entry::Occupied(entry) => *entry.get(), Entry::Vacant(entry) => { let metadata = db.vendored().metadata(path).ok()?; @@ -131,6 +126,44 @@ impl Files { Some(file) } + /// Refreshes the state of all known files under `path` recursively. + /// + /// The most common use case is to update the [`Files`] state after removing or moving a directory. + /// + /// # Performance + /// Refreshing the state of every file under `path` is expensive. It requires iterating over all known files + /// and making system calls to get the latest status of each file in `path`. + /// That's why [`File::sync_path`] and [`File::sync_path`] is preferred if it is known that the path is a file. + #[tracing::instrument(level = "debug", skip(db))] + pub fn sync_recursively(db: &mut dyn Db, path: &SystemPath) { + let path = SystemPath::absolute(path, db.system().current_directory()); + + let inner = Arc::clone(&db.files().inner); + for entry in inner.system_by_path.iter_mut() { + if entry.key().starts_with(&path) { + let file = entry.value(); + file.sync(db); + } + } + } + + /// Refreshes the state of all known files. + /// + /// This is a last-resort method that should only be used when more granular updates aren't possible + /// (for example, because the file watcher failed to observe some changes). Use responsibly! + /// + /// # Performance + /// Refreshing the state of every file is expensive. It requires iterating over all known files and + /// issuing a system call to get the latest status of each file. + #[tracing::instrument(level = "debug", skip(db))] + pub fn sync_all(db: &mut dyn Db) { + let inner = Arc::clone(&db.files().inner); + for entry in inner.system_by_path.iter_mut() { + let file = entry.value(); + file.sync(db); + } + } + /// Creates a salsa like snapshot. The instances share /// the same path-to-file mapping. pub fn snapshot(&self) -> Self { @@ -144,7 +177,7 @@ impl std::fmt::Debug for Files { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut map = f.debug_map(); - for entry in self.inner.files_by_path.iter() { + for entry in self.inner.system_by_path.iter() { map.entry(entry.key(), entry.value()); } map.finish() @@ -219,18 +252,20 @@ impl File { } /// Refreshes the file metadata by querying the file system if needed. - /// TODO: The API should instead take all observed changes from the file system directly - /// and then apply the VfsFile status accordingly. But for now, this is sufficient. - pub fn touch_path(db: &mut dyn Db, path: &SystemPath) { - Self::touch_impl(db, path, None); + #[tracing::instrument(level = "debug", skip(db))] + pub fn sync_path(db: &mut dyn Db, path: &SystemPath) { + let absolute = SystemPath::absolute(path, db.system().current_directory()); + Self::sync_impl(db, &absolute, None); } - pub fn touch(self, db: &mut dyn Db) { + /// Syncs the [`File`]'s state with the state of the file on the system. + #[tracing::instrument(level = "debug", skip(db))] + pub fn sync(self, db: &mut dyn Db) { let path = self.path(db).clone(); match path { FilePath::System(system) => { - Self::touch_impl(db, &system, Some(self)); + Self::sync_impl(db, &system, Some(self)); } FilePath::Vendored(_) => { // Readonly, can never be out of date. @@ -238,23 +273,31 @@ impl File { } } - /// Private method providing the implementation for [`Self::touch_path`] and [`Self::touch`]. - fn touch_impl(db: &mut dyn Db, path: &SystemPath, file: Option) { - let metadata = db.system().path_metadata(path); - - let (status, revision) = match metadata { - Ok(metadata) if metadata.file_type().is_file() => { - (FileStatus::Exists, metadata.revision()) - } - _ => (FileStatus::Deleted, FileRevision::zero()), - }; - + /// Private method providing the implementation for [`Self::sync_path`] and [`Self::sync_path`]. + fn sync_impl(db: &mut dyn Db, path: &SystemPath, file: Option) { let Some(file) = file.or_else(|| db.files().try_system(db, path)) else { return; }; + let metadata = db.system().path_metadata(path); + + let (status, revision, permission) = match metadata { + Ok(metadata) if metadata.file_type().is_file() => ( + FileStatus::Exists, + metadata.revision(), + metadata.permissions(), + ), + _ => (FileStatus::Deleted, FileRevision::zero(), None), + }; + file.set_status(db).to(status); file.set_revision(db).to(revision); + file.set_permissions(db).to(permission); + } + + /// Returns `true` if the file exists. + pub fn exists(self, db: &dyn Db) -> bool { + self.status(db) == FileStatus::Exists } } diff --git a/crates/ruff_db/src/lib.rs b/crates/ruff_db/src/lib.rs index cb0b8b6321..d64b6d47d9 100644 --- a/crates/ruff_db/src/lib.rs +++ b/crates/ruff_db/src/lib.rs @@ -34,6 +34,7 @@ pub trait Db: DbWithJar { /// Trait for upcasting a reference to a base trait object. pub trait Upcast { fn upcast(&self) -> &T; + fn upcast_mut(&mut self) -> &mut T; } #[cfg(test)] diff --git a/crates/ruff_db/src/system/test.rs b/crates/ruff_db/src/system/test.rs index e8f7383c21..24883f0601 100644 --- a/crates/ruff_db/src/system/test.rs +++ b/crates/ruff_db/src/system/test.rs @@ -145,7 +145,7 @@ pub trait DbWithTestSystem: Db + Sized { .write_file(path, content); if result.is_ok() { - File::touch_path(self, path); + File::sync_path(self, path); } result