diff --git a/Cargo.lock b/Cargo.lock index 2f6e9b46ec..0f9ecd2db3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2468,6 +2468,7 @@ dependencies = [ name = "ruff_python_semantic" version = "0.0.0" dependencies = [ + "anyhow", "bitflags 2.5.0", "is-macro", "ruff_db", @@ -2479,6 +2480,8 @@ dependencies = [ "ruff_text_size", "rustc-hash", "salsa-2022", + "smol_str", + "tempfile", "tracing", ] diff --git a/Cargo.toml b/Cargo.toml index 59f52a41c8..c0133b6cc8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -117,6 +117,7 @@ serde_with = { version = "3.6.0", default-features = false, features = [ shellexpand = { version = "3.0.0" } similar = { version = "2.4.0", features = ["inline"] } smallvec = { version = "1.13.2" } +smol_str = { version = "0.2.2" } static_assertions = "1.1.0" strum = { version = "0.26.0", features = ["strum_macros"] } strum_macros = { version = "0.26.0" } diff --git a/crates/ruff_db/src/file_system.rs b/crates/ruff_db/src/file_system.rs index 2214322d13..84bbe4d5e4 100644 --- a/crates/ruff_db/src/file_system.rs +++ b/crates/ruff_db/src/file_system.rs @@ -1,6 +1,6 @@ use std::fmt::Formatter; use std::ops::Deref; -use std::path::Path; +use std::path::{Path, StripPrefixError}; use camino::{Utf8Path, Utf8PathBuf}; use filetime::FileTime; @@ -88,6 +88,245 @@ impl FileSystemPath { self.0.extension() } + /// Determines whether `base` is a prefix of `self`. + /// + /// Only considers whole path components to match. + /// + /// # Examples + /// + /// ``` + /// use ruff_db::file_system::FileSystemPath; + /// + /// let path = FileSystemPath::new("/etc/passwd"); + /// + /// assert!(path.starts_with("/etc")); + /// assert!(path.starts_with("/etc/")); + /// assert!(path.starts_with("/etc/passwd")); + /// assert!(path.starts_with("/etc/passwd/")); // extra slash is okay + /// assert!(path.starts_with("/etc/passwd///")); // multiple extra slashes are okay + /// + /// assert!(!path.starts_with("/e")); + /// assert!(!path.starts_with("/etc/passwd.txt")); + /// + /// assert!(!FileSystemPath::new("/etc/foo.rs").starts_with("/etc/foo")); + /// ``` + #[inline] + #[must_use] + pub fn starts_with(&self, base: impl AsRef) -> bool { + self.0.starts_with(base.as_ref()) + } + + /// Determines whether `child` is a suffix of `self`. + /// + /// Only considers whole path components to match. + /// + /// # Examples + /// + /// ``` + /// use ruff_db::file_system::FileSystemPath; + /// + /// let path = FileSystemPath::new("/etc/resolv.conf"); + /// + /// assert!(path.ends_with("resolv.conf")); + /// assert!(path.ends_with("etc/resolv.conf")); + /// assert!(path.ends_with("/etc/resolv.conf")); + /// + /// assert!(!path.ends_with("/resolv.conf")); + /// assert!(!path.ends_with("conf")); // use .extension() instead + /// ``` + #[inline] + #[must_use] + pub fn ends_with(&self, child: impl AsRef) -> bool { + self.0.ends_with(child.as_ref()) + } + + /// Returns the `FileSystemPath` without its final component, if there is one. + /// + /// Returns [`None`] if the path terminates in a root or prefix. + /// + /// # Examples + /// + /// ``` + /// use ruff_db::file_system::FileSystemPath; + /// + /// let path = FileSystemPath::new("/foo/bar"); + /// let parent = path.parent().unwrap(); + /// assert_eq!(parent, FileSystemPath::new("/foo")); + /// + /// let grand_parent = parent.parent().unwrap(); + /// assert_eq!(grand_parent, FileSystemPath::new("/")); + /// assert_eq!(grand_parent.parent(), None); + /// ``` + #[inline] + #[must_use] + pub fn parent(&self) -> Option<&FileSystemPath> { + self.0.parent().map(FileSystemPath::new) + } + + /// Produces an iterator over the [`camino::Utf8Component`]s of the path. + /// + /// When parsing the path, there is a small amount of normalization: + /// + /// * Repeated separators are ignored, so `a/b` and `a//b` both have + /// `a` and `b` as components. + /// + /// * Occurrences of `.` are normalized away, except if they are at the + /// beginning of the path. For example, `a/./b`, `a/b/`, `a/b/.` and + /// `a/b` all have `a` and `b` as components, but `./a/b` starts with + /// an additional [`CurDir`] component. + /// + /// * A trailing slash is normalized away, `/a/b` and `/a/b/` are equivalent. + /// + /// Note that no other normalization takes place; in particular, `a/c` + /// and `a/b/../c` are distinct, to account for the possibility that `b` + /// is a symbolic link (so its parent isn't `a`). + /// + /// # Examples + /// + /// ``` + /// use camino::{Utf8Component}; + /// use ruff_db::file_system::FileSystemPath; + /// + /// let mut components = FileSystemPath::new("/tmp/foo.txt").components(); + /// + /// assert_eq!(components.next(), Some(Utf8Component::RootDir)); + /// assert_eq!(components.next(), Some(Utf8Component::Normal("tmp"))); + /// assert_eq!(components.next(), Some(Utf8Component::Normal("foo.txt"))); + /// assert_eq!(components.next(), None) + /// ``` + /// + /// [`CurDir`]: camino::Utf8Component::CurDir + #[inline] + pub fn components(&self) -> camino::Utf8Components { + self.0.components() + } + + /// Returns the final component of the `FileSystemPath`, if there is one. + /// + /// If the path is a normal file, this is the file name. If it's the path of a directory, this + /// is the directory name. + /// + /// Returns [`None`] if the path terminates in `..`. + /// + /// # Examples + /// + /// ``` + /// use camino::Utf8Path; + /// use ruff_db::file_system::FileSystemPath; + /// + /// assert_eq!(Some("bin"), FileSystemPath::new("/usr/bin/").file_name()); + /// assert_eq!(Some("foo.txt"), FileSystemPath::new("tmp/foo.txt").file_name()); + /// assert_eq!(Some("foo.txt"), FileSystemPath::new("foo.txt/.").file_name()); + /// assert_eq!(Some("foo.txt"), FileSystemPath::new("foo.txt/.//").file_name()); + /// assert_eq!(None, FileSystemPath::new("foo.txt/..").file_name()); + /// assert_eq!(None, FileSystemPath::new("/").file_name()); + /// ``` + #[inline] + #[must_use] + pub fn file_name(&self) -> Option<&str> { + self.0.file_name() + } + + /// Extracts the stem (non-extension) portion of [`self.file_name`]. + /// + /// [`self.file_name`]: FileSystemPath::file_name + /// + /// The stem is: + /// + /// * [`None`], if there is no file name; + /// * The entire file name if there is no embedded `.`; + /// * The entire file name if the file name begins with `.` and has no other `.`s within; + /// * Otherwise, the portion of the file name before the final `.` + /// + /// # Examples + /// + /// ``` + /// use ruff_db::file_system::FileSystemPath; + /// + /// assert_eq!("foo", FileSystemPath::new("foo.rs").file_stem().unwrap()); + /// assert_eq!("foo.tar", FileSystemPath::new("foo.tar.gz").file_stem().unwrap()); + /// ``` + #[inline] + #[must_use] + pub fn file_stem(&self) -> Option<&str> { + self.0.file_stem() + } + + /// Returns a path that, when joined onto `base`, yields `self`. + /// + /// # Errors + /// + /// If `base` is not a prefix of `self` (i.e., [`starts_with`] + /// returns `false`), returns [`Err`]. + /// + /// [`starts_with`]: FileSystemPath::starts_with + /// + /// # Examples + /// + /// ``` + /// use ruff_db::file_system::{FileSystemPath, FileSystemPathBuf}; + /// + /// let path = FileSystemPath::new("/test/haha/foo.txt"); + /// + /// assert_eq!(path.strip_prefix("/"), Ok(FileSystemPath::new("test/haha/foo.txt"))); + /// assert_eq!(path.strip_prefix("/test"), Ok(FileSystemPath::new("haha/foo.txt"))); + /// assert_eq!(path.strip_prefix("/test/"), Ok(FileSystemPath::new("haha/foo.txt"))); + /// assert_eq!(path.strip_prefix("/test/haha/foo.txt"), Ok(FileSystemPath::new(""))); + /// assert_eq!(path.strip_prefix("/test/haha/foo.txt/"), Ok(FileSystemPath::new(""))); + /// + /// assert!(path.strip_prefix("test").is_err()); + /// assert!(path.strip_prefix("/haha").is_err()); + /// + /// let prefix = FileSystemPathBuf::from("/test/"); + /// assert_eq!(path.strip_prefix(prefix), Ok(FileSystemPath::new("haha/foo.txt"))); + /// ``` + #[inline] + pub fn strip_prefix( + &self, + base: impl AsRef, + ) -> std::result::Result<&FileSystemPath, StripPrefixError> { + self.0.strip_prefix(base.as_ref()).map(FileSystemPath::new) + } + + /// Creates an owned [`FileSystemPathBuf`] with `path` adjoined to `self`. + /// + /// See [`std::path::PathBuf::push`] for more details on what it means to adjoin a path. + /// + /// # Examples + /// + /// ``` + /// use ruff_db::file_system::{FileSystemPath, FileSystemPathBuf}; + /// + /// assert_eq!(FileSystemPath::new("/etc").join("passwd"), FileSystemPathBuf::from("/etc/passwd")); + /// ``` + #[inline] + #[must_use] + pub fn join(&self, path: impl AsRef) -> FileSystemPathBuf { + FileSystemPathBuf::from_utf8_path_buf(self.0.join(&path.as_ref().0)) + } + + /// Creates an owned [`FileSystemPathBuf`] like `self` but with the given extension. + /// + /// See [`std::path::PathBuf::set_extension`] for more details. + /// + /// # Examples + /// + /// ``` + /// use ruff_db::file_system::{FileSystemPath, FileSystemPathBuf}; + /// + /// let path = FileSystemPath::new("foo.rs"); + /// assert_eq!(path.with_extension("txt"), FileSystemPathBuf::from("foo.txt")); + /// + /// let path = FileSystemPath::new("foo.tar.gz"); + /// assert_eq!(path.with_extension(""), FileSystemPathBuf::from("foo.tar")); + /// assert_eq!(path.with_extension("xz"), FileSystemPathBuf::from("foo.tar.xz")); + /// assert_eq!(path.with_extension("").with_extension("txt"), FileSystemPathBuf::from("foo.txt")); + /// ``` + #[inline] + pub fn with_extension(&self, extension: &str) -> FileSystemPathBuf { + FileSystemPathBuf::from_utf8_path_buf(self.0.with_extension(extension)) + } + /// Converts the path to an owned [`FileSystemPathBuf`]. pub fn to_path_buf(&self) -> FileSystemPathBuf { FileSystemPathBuf(self.0.to_path_buf()) @@ -104,6 +343,10 @@ impl FileSystemPath { pub fn as_std_path(&self) -> &Path { self.0.as_std_path() } + + pub fn from_std_path(path: &Path) -> Option<&FileSystemPath> { + Some(FileSystemPath::new(Utf8Path::from_path(path)?)) + } } /// Owned path to a file or directory stored in [`FileSystem`]. @@ -113,12 +356,6 @@ impl FileSystemPath { #[derive(Eq, PartialEq, Clone, Hash, PartialOrd, Ord)] pub struct FileSystemPathBuf(Utf8PathBuf); -impl Default for FileSystemPathBuf { - fn default() -> Self { - Self::new() - } -} - impl FileSystemPathBuf { pub fn new() -> Self { Self(Utf8PathBuf::new()) @@ -128,12 +365,66 @@ impl FileSystemPathBuf { Self(path) } + pub fn from_path_buf( + path: std::path::PathBuf, + ) -> std::result::Result { + Utf8PathBuf::from_path_buf(path).map(Self) + } + + /// Extends `self` with `path`. + /// + /// If `path` is absolute, it replaces the current path. + /// + /// On Windows: + /// + /// * if `path` has a root but no prefix (e.g., `\windows`), it + /// replaces everything except for the prefix (if any) of `self`. + /// * if `path` has a prefix but no root, it replaces `self`. + /// + /// # Examples + /// + /// Pushing a relative path extends the existing path: + /// + /// ``` + /// use ruff_db::file_system::FileSystemPathBuf; + /// + /// let mut path = FileSystemPathBuf::from("/tmp"); + /// path.push("file.bk"); + /// assert_eq!(path, FileSystemPathBuf::from("/tmp/file.bk")); + /// ``` + /// + /// Pushing an absolute path replaces the existing path: + /// + /// ``` + /// + /// use ruff_db::file_system::FileSystemPathBuf; + /// + /// let mut path = FileSystemPathBuf::from("/tmp"); + /// path.push("/etc"); + /// assert_eq!(path, FileSystemPathBuf::from("/etc")); + /// ``` + pub fn push(&mut self, path: impl AsRef) { + self.0.push(&path.as_ref().0); + } + #[inline] pub fn as_path(&self) -> &FileSystemPath { FileSystemPath::new(&self.0) } } +impl From<&str> for FileSystemPathBuf { + fn from(value: &str) -> Self { + FileSystemPathBuf::from_utf8_path_buf(Utf8PathBuf::from(value)) + } +} + +impl Default for FileSystemPathBuf { + fn default() -> Self { + Self::new() + } +} + impl AsRef for FileSystemPathBuf { #[inline] fn as_ref(&self) -> &FileSystemPath { diff --git a/crates/ruff_db/src/file_system/memory.rs b/crates/ruff_db/src/file_system/memory.rs index 2d8a77d882..096a14db7e 100644 --- a/crates/ruff_db/src/file_system/memory.rs +++ b/crates/ruff_db/src/file_system/memory.rs @@ -1,8 +1,8 @@ +use std::collections::BTreeMap; use std::sync::{Arc, RwLock, RwLockWriteGuard}; use camino::{Utf8Path, Utf8PathBuf}; use filetime::FileTime; -use rustc_hash::FxHashMap; use crate::file_system::{FileSystem, FileSystemPath, FileType, Metadata, Result}; @@ -41,7 +41,7 @@ impl MemoryFileSystem { let fs = Self { inner: Arc::new(MemoryFileSystemInner { - by_path: RwLock::new(FxHashMap::default()), + by_path: RwLock::new(BTreeMap::default()), cwd: cwd.clone(), }), }; @@ -80,16 +80,36 @@ impl MemoryFileSystem { /// The operation overrides the content for an existing file with the same normalized `path`. /// /// Enclosing directories are automatically created if they don't exist. - pub fn write_file(&self, path: impl AsRef, content: String) -> Result<()> { + pub fn write_file( + &self, + path: impl AsRef, + content: impl ToString, + ) -> Result<()> { let mut by_path = self.inner.by_path.write().unwrap(); let normalized = normalize_path(path.as_ref(), &self.inner.cwd); - get_or_create_file(&mut by_path, &normalized)?.content = content; + get_or_create_file(&mut by_path, &normalized)?.content = content.to_string(); Ok(()) } + pub fn remove_file(&self, path: impl AsRef) -> Result<()> { + let mut by_path = self.inner.by_path.write().unwrap(); + let normalized = normalize_path(path.as_ref(), &self.inner.cwd); + + match by_path.entry(normalized) { + std::collections::btree_map::Entry::Occupied(entry) => match entry.get() { + Entry::File(_) => { + entry.remove(); + Ok(()) + } + Entry::Directory(_) => Err(is_a_directory()), + }, + std::collections::btree_map::Entry::Vacant(_) => Err(not_found()), + } + } + /// Sets the last modified timestamp of the file stored at `path` to now. /// /// Creates a new file if the file at `path` doesn't exist. @@ -109,6 +129,38 @@ impl MemoryFileSystem { create_dir_all(&mut by_path, &normalized) } + + /// Deletes the directory at `path`. + /// + /// ## Errors + /// * If the directory is not empty + /// * The `path` is not a directory + /// * The `path` does not exist + pub fn remove_directory(&self, path: impl AsRef) -> Result<()> { + let mut by_path = self.inner.by_path.write().unwrap(); + let normalized = normalize_path(path.as_ref(), &self.inner.cwd); + + // Test if the directory is empty + // Skip the directory path itself + for (maybe_child, _) in by_path.range(normalized.clone()..).skip(1) { + if maybe_child.starts_with(&normalized) { + return Err(directory_not_empty()); + } else if !maybe_child.as_str().starts_with(normalized.as_str()) { + break; + } + } + + match by_path.entry(normalized.clone()) { + std::collections::btree_map::Entry::Occupied(entry) => match entry.get() { + Entry::Directory(_) => { + entry.remove(); + Ok(()) + } + Entry::File(_) => Err(not_a_directory()), + }, + std::collections::btree_map::Entry::Vacant(_) => Err(not_found()), + } + } } impl FileSystem for MemoryFileSystem { @@ -169,7 +221,7 @@ impl std::fmt::Debug for MemoryFileSystem { } struct MemoryFileSystemInner { - by_path: RwLock>, + by_path: RwLock>, cwd: Utf8PathBuf, } @@ -212,6 +264,10 @@ fn not_a_directory() -> std::io::Error { std::io::Error::new(std::io::ErrorKind::Other, "Not a directory") } +fn directory_not_empty() -> std::io::Error { + std::io::Error::new(std::io::ErrorKind::Other, "directory not empty") +} + /// Normalizes the path by removing `.` and `..` components and transform the path into an absolute path. /// /// Adapted from https://github.com/rust-lang/cargo/blob/fede83ccf973457de319ba6fa0e36ead454d2e20/src/cargo/util/paths.rs#L61 @@ -249,7 +305,7 @@ fn normalize_path(path: &FileSystemPath, cwd: &Utf8Path) -> Utf8PathBuf { } fn create_dir_all( - paths: &mut RwLockWriteGuard>, + paths: &mut RwLockWriteGuard>, normalized: &Utf8Path, ) -> Result<()> { let mut path = Utf8PathBuf::new(); @@ -271,7 +327,7 @@ fn create_dir_all( } fn get_or_create_file<'a>( - paths: &'a mut RwLockWriteGuard>, + paths: &'a mut RwLockWriteGuard>, normalized: &Utf8Path, ) -> Result<&'a mut File> { if let Some(parent) = normalized.parent() { @@ -293,10 +349,11 @@ fn get_or_create_file<'a>( #[cfg(test)] mod tests { - use crate::file_system::{FileSystem, FileSystemPath, MemoryFileSystem, Result}; use std::io::ErrorKind; use std::time::Duration; + use crate::file_system::{FileSystem, FileSystemPath, MemoryFileSystem, Result}; + /// Creates a file system with the given files. /// /// The content of all files will be empty. @@ -470,4 +527,94 @@ mod tests { Ok(()) } + + #[test] + fn remove_file() -> Result<()> { + let fs = with_files(["a/a.py", "b.py"]); + + fs.remove_file("a/a.py")?; + + assert!(!fs.exists(FileSystemPath::new("a/a.py"))); + + // It doesn't delete the enclosing directories + assert!(fs.exists(FileSystemPath::new("a"))); + + // It doesn't delete unrelated files. + assert!(fs.exists(FileSystemPath::new("b.py"))); + + Ok(()) + } + + #[test] + fn remove_non_existing_file() { + let fs = with_files(["b.py"]); + + let error = fs.remove_file("a.py").unwrap_err(); + + assert_eq!(error.kind(), ErrorKind::NotFound); + } + + #[test] + fn remove_file_that_is_a_directory() -> Result<()> { + let fs = MemoryFileSystem::new(); + fs.create_directory_all("a")?; + + let error = fs.remove_file("a").unwrap_err(); + assert_eq!(error.kind(), ErrorKind::Other); + + Ok(()) + } + + #[test] + fn remove_directory() -> Result<()> { + let fs = with_files(["b.py"]); + fs.create_directory_all("a")?; + + fs.remove_directory("a")?; + + assert!(!fs.exists(FileSystemPath::new("a"))); + + // It doesn't delete unrelated files. + assert!(fs.exists(FileSystemPath::new("b.py"))); + + Ok(()) + } + + #[test] + fn remove_non_empty_directory() { + let fs = with_files(["a/a.py"]); + + let error = fs.remove_directory("a").unwrap_err(); + assert_eq!(error.kind(), ErrorKind::Other); + } + + #[test] + fn remove_directory_with_files_that_start_with_the_same_string() -> Result<()> { + let fs = with_files(["foo_bar.py", "foob.py"]); + fs.create_directory_all("foo")?; + + fs.remove_directory("foo").unwrap(); + + assert!(!fs.exists(FileSystemPath::new("foo"))); + assert!(fs.exists(FileSystemPath::new("foo_bar.py"))); + assert!(fs.exists(FileSystemPath::new("foob.py"))); + + Ok(()) + } + + #[test] + fn remove_non_existing_directory() { + let fs = MemoryFileSystem::new(); + + let error = fs.remove_directory("a").unwrap_err(); + assert_eq!(error.kind(), ErrorKind::NotFound); + } + + #[test] + fn remove_directory_that_is_a_file() { + let fs = with_files(["a"]); + + let error = fs.remove_directory("a").unwrap_err(); + assert_eq!(error.kind(), ErrorKind::Other); + } } diff --git a/crates/ruff_db/src/file_system/os.rs b/crates/ruff_db/src/file_system/os.rs index 417e06b248..cdf7ceb25a 100644 --- a/crates/ruff_db/src/file_system/os.rs +++ b/crates/ruff_db/src/file_system/os.rs @@ -1,6 +1,7 @@ -use crate::file_system::{FileSystem, FileSystemPath, FileType, Metadata, Result}; use filetime::FileTime; +use crate::file_system::{FileSystem, FileSystemPath, FileType, Metadata, Result}; + pub struct OsFileSystem; impl OsFileSystem { @@ -15,6 +16,10 @@ impl OsFileSystem { fn permissions(_metadata: &std::fs::Metadata) -> Option { None } + + pub fn snapshot(&self) -> Self { + Self + } } impl FileSystem for OsFileSystem { diff --git a/crates/ruff_db/src/source.rs b/crates/ruff_db/src/source.rs index a4b0c6ffac..f7cd153d25 100644 --- a/crates/ruff_db/src/source.rs +++ b/crates/ruff_db/src/source.rs @@ -54,7 +54,6 @@ impl std::fmt::Debug for SourceText { #[cfg(test)] mod tests { - use filetime::FileTime; use salsa::EventKind; use ruff_source_file::OneIndexed; @@ -80,7 +79,7 @@ mod tests { db.file_system_mut() .write_file(path, "x = 20".to_string()) .unwrap(); - file.set_revision(&mut db).to(FileTime::now().into()); + file.touch(&mut db); assert_eq!(&*source_text(&db, file), "x = 20"); diff --git a/crates/ruff_db/src/vfs.rs b/crates/ruff_db/src/vfs.rs index 4faf8e72f6..be275609f3 100644 --- a/crates/ruff_db/src/vfs.rs +++ b/crates/ruff_db/src/vfs.rs @@ -259,6 +259,41 @@ impl VfsFile { db.vfs().read(db, path) } + + /// Refreshes the file metadata by querying the file system if needed. + /// TODO: The API should instead take all observed changes from the file system directly + /// and then apply the VfsFile status accordingly. But for now, this is sufficient. + pub fn touch_path(db: &mut dyn Db, path: &VfsPath) { + Self::touch_impl(db, path, None); + } + + pub fn touch(self, db: &mut dyn Db) { + let path = self.path(db).clone(); + Self::touch_impl(db, &path, Some(self)); + } + + /// Private method providing the implementation for [`Self::touch_path`] and [`Self::touch`]. + fn touch_impl(db: &mut dyn Db, path: &VfsPath, file: Option) { + match path { + VfsPath::FileSystem(path) => { + let metadata = db.file_system().metadata(path); + + let (status, revision) = match metadata { + Ok(metadata) if metadata.file_type().is_file() => { + (FileStatus::Exists, metadata.revision()) + } + _ => (FileStatus::Deleted, FileRevision::zero()), + }; + + let file = file.unwrap_or_else(|| db.vfs().file_system(db, path)); + file.set_status(db).to(status); + file.set_revision(db).to(revision); + } + VfsPath::Vendored(_) => { + // Readonly, can never be out of date. + } + } + } } #[derive(Default, Debug)] diff --git a/crates/ruff_db/src/vfs/path.rs b/crates/ruff_db/src/vfs/path.rs index 9febc542b9..173eb60252 100644 --- a/crates/ruff_db/src/vfs/path.rs +++ b/crates/ruff_db/src/vfs/path.rs @@ -111,6 +111,7 @@ impl VfsPath { /// Returns `Some` if the path is a file system path that points to a path on disk. #[must_use] + #[inline] pub fn into_file_system_path_buf(self) -> Option { match self { VfsPath::FileSystem(path) => Some(path), @@ -118,12 +119,38 @@ impl VfsPath { } } + #[must_use] + #[inline] + pub fn as_file_system_path(&self) -> Option<&FileSystemPath> { + match self { + VfsPath::FileSystem(path) => Some(path.as_path()), + VfsPath::Vendored(_) => None, + } + } + /// Returns `true` if the path is a file system path that points to a path on disk. #[must_use] + #[inline] pub const fn is_file_system_path(&self) -> bool { matches!(self, VfsPath::FileSystem(_)) } + /// Returns `true` if the path is a vendored path. + #[must_use] + #[inline] + pub const fn is_vendored_path(&self) -> bool { + matches!(self, VfsPath::Vendored(_)) + } + + #[must_use] + #[inline] + pub fn as_vendored_path(&self) -> Option<&VendoredPath> { + match self { + VfsPath::Vendored(path) => Some(path.as_path()), + VfsPath::FileSystem(_) => None, + } + } + /// Yields the underlying [`str`] slice. pub fn as_str(&self) -> &str { match self { @@ -138,3 +165,84 @@ impl AsRef for VfsPath { self.as_str() } } + +impl From for VfsPath { + fn from(value: FileSystemPathBuf) -> Self { + Self::FileSystem(value) + } +} + +impl From<&FileSystemPath> for VfsPath { + fn from(value: &FileSystemPath) -> Self { + VfsPath::FileSystem(value.to_path_buf()) + } +} + +impl From for VfsPath { + fn from(value: VendoredPathBuf) -> Self { + Self::Vendored(value) + } +} + +impl From<&VendoredPath> for VfsPath { + fn from(value: &VendoredPath) -> Self { + Self::Vendored(value.to_path_buf()) + } +} + +impl PartialEq for VfsPath { + #[inline] + fn eq(&self, other: &FileSystemPath) -> bool { + self.as_file_system_path() + .is_some_and(|self_path| self_path == other) + } +} + +impl PartialEq for FileSystemPath { + #[inline] + fn eq(&self, other: &VfsPath) -> bool { + other == self + } +} + +impl PartialEq for VfsPath { + #[inline] + fn eq(&self, other: &FileSystemPathBuf) -> bool { + self == other.as_path() + } +} + +impl PartialEq for FileSystemPathBuf { + fn eq(&self, other: &VfsPath) -> bool { + other == self + } +} + +impl PartialEq for VfsPath { + #[inline] + fn eq(&self, other: &VendoredPath) -> bool { + self.as_vendored_path() + .is_some_and(|self_path| self_path == other) + } +} + +impl PartialEq for VendoredPath { + #[inline] + fn eq(&self, other: &VfsPath) -> bool { + other == self + } +} + +impl PartialEq for VfsPath { + #[inline] + fn eq(&self, other: &VendoredPathBuf) -> bool { + other.as_path() == self + } +} + +impl PartialEq for VendoredPathBuf { + #[inline] + fn eq(&self, other: &VfsPath) -> bool { + other == self + } +} diff --git a/crates/ruff_python_semantic/Cargo.toml b/crates/ruff_python_semantic/Cargo.toml index b694f2408e..b0650195e0 100644 --- a/crates/ruff_python_semantic/Cargo.toml +++ b/crates/ruff_python_semantic/Cargo.toml @@ -10,9 +10,6 @@ documentation = { workspace = true } repository = { workspace = true } license = { workspace = true } -[lib] -doctest = false - [dependencies] ruff_db = { workspace = true } ruff_index = { workspace = true } @@ -24,14 +21,17 @@ ruff_text_size = { workspace = true } bitflags = { workspace = true } is-macro = { workspace = true } salsa = { workspace = true, optional = true } +smol_str = { workspace = true, optional = true } tracing = { workspace = true, optional = true } rustc-hash = { workspace = true } [dev-dependencies] +anyhow = { workspace = true } ruff_python_parser = { workspace = true } +tempfile = { workspace = true } [lints] workspace = true [features] -red_knot = ["dep:salsa", "dep:tracing"] +red_knot = ["dep:salsa", "dep:smol_str", "dep:tracing"] diff --git a/crates/ruff_python_semantic/src/db.rs b/crates/ruff_python_semantic/src/db.rs index f66bd0e3ff..4765eb5c48 100644 --- a/crates/ruff_python_semantic/src/db.rs +++ b/crates/ruff_python_semantic/src/db.rs @@ -1,60 +1,101 @@ +use crate::module::resolver::{ + file_to_module, internal::ModuleNameIngredient, internal::ModuleResolverSearchPaths, + resolve_module_query, +}; use ruff_db::{Db as SourceDb, Upcast}; use salsa::DbWithJar; -// Salsa doesn't support a struct without fields, so allow the clippy lint for now. -#[allow(clippy::empty_structs_with_brackets)] #[salsa::jar(db=Db)] -pub struct Jar(); +pub struct Jar( + ModuleNameIngredient, + ModuleResolverSearchPaths, + resolve_module_query, + file_to_module, +); /// Database giving access to semantic information about a Python program. pub trait Db: SourceDb + DbWithJar + Upcast {} #[cfg(test)] -mod tests { +pub(crate) mod tests { use super::{Db, Jar}; - use ruff_db::file_system::{FileSystem, MemoryFileSystem}; + use ruff_db::file_system::{FileSystem, MemoryFileSystem, OsFileSystem}; use ruff_db::vfs::Vfs; use ruff_db::{Db as SourceDb, Jar as SourceJar, Upcast}; use salsa::DebugWithDb; + use std::sync::Arc; #[salsa::db(Jar, SourceJar)] pub(crate) struct TestDb { storage: salsa::Storage, vfs: Vfs, - file_system: MemoryFileSystem, + file_system: TestFileSystem, events: std::sync::Arc>>, } impl TestDb { - #[allow(unused)] pub(crate) fn new() -> Self { Self { storage: salsa::Storage::default(), - file_system: MemoryFileSystem::default(), + file_system: TestFileSystem::Memory(MemoryFileSystem::default()), events: std::sync::Arc::default(), vfs: Vfs::with_stubbed_vendored(), } } - #[allow(unused)] + /// Returns the memory file system. + /// + /// ## Panics + /// If this test db isn't using a memory file system. pub(crate) fn memory_file_system(&self) -> &MemoryFileSystem { - &self.file_system + if let TestFileSystem::Memory(fs) = &self.file_system { + fs + } else { + panic!("The test db is not using a memory file system"); + } } + /// Uses the real file system instead of the memory file system. + /// + /// This useful for testing advanced file system features like permissions, symlinks, etc. + /// + /// Note that any files written to the memory file system won't be copied over. #[allow(unused)] - pub(crate) fn memory_file_system_mut(&mut self) -> &mut MemoryFileSystem { - &mut self.file_system + pub(crate) fn with_os_file_system(&mut self) { + self.file_system = TestFileSystem::Os(OsFileSystem); } #[allow(unused)] pub(crate) fn vfs_mut(&mut self) -> &mut Vfs { &mut self.vfs } + + /// Takes the salsa events. + /// + /// ## Panics + /// If there are any pending salsa snapshots. + pub(crate) fn take_sale_events(&mut self) -> Vec { + let inner = Arc::get_mut(&mut self.events).expect("no pending salsa snapshots"); + + let events = inner.get_mut().unwrap(); + std::mem::take(&mut *events) + } + + /// Clears the salsa events. + /// + /// ## Panics + /// If there are any pending salsa snapshots. + pub(crate) fn clear_salsa_events(&mut self) { + self.take_sale_events(); + } } impl SourceDb for TestDb { fn file_system(&self) -> &dyn FileSystem { - &self.file_system + match &self.file_system { + TestFileSystem::Memory(fs) => fs, + TestFileSystem::Os(fs) => fs, + } } fn vfs(&self) -> &Vfs { @@ -83,9 +124,18 @@ mod tests { salsa::Snapshot::new(Self { storage: self.storage.snapshot(), vfs: self.vfs.snapshot(), - file_system: self.file_system.snapshot(), + file_system: match &self.file_system { + TestFileSystem::Memory(memory) => TestFileSystem::Memory(memory.snapshot()), + TestFileSystem::Os(fs) => TestFileSystem::Os(fs.snapshot()), + }, events: self.events.clone(), }) } } + + enum TestFileSystem { + Memory(MemoryFileSystem), + #[allow(unused)] + Os(OsFileSystem), + } } diff --git a/crates/ruff_python_semantic/src/lib.rs b/crates/ruff_python_semantic/src/lib.rs index 4a6be79c98..65f5ae1b3c 100644 --- a/crates/ruff_python_semantic/src/lib.rs +++ b/crates/ruff_python_semantic/src/lib.rs @@ -7,6 +7,8 @@ mod db; mod definition; mod globals; mod model; +#[cfg(feature = "red_knot")] +pub mod module; mod nodes; mod reference; mod scope; diff --git a/crates/ruff_python_semantic/src/module.rs b/crates/ruff_python_semantic/src/module.rs new file mode 100644 index 0000000000..26c7aef904 --- /dev/null +++ b/crates/ruff_python_semantic/src/module.rs @@ -0,0 +1,332 @@ +use std::fmt::Formatter; +use std::ops::Deref; +use std::sync::Arc; + +use ruff_db::file_system::FileSystemPath; +use ruff_db::vfs::{VfsFile, VfsPath}; +use ruff_python_stdlib::identifiers::is_identifier; + +use crate::Db; + +pub mod resolver; + +/// A module name, e.g. `foo.bar`. +/// +/// Always normalized to the absolute form (never a relative module name, i.e., never `.foo`). +#[derive(Clone, Debug, Eq, PartialEq, Hash)] +pub struct ModuleName(smol_str::SmolStr); + +impl ModuleName { + /// Creates a new module name for `name`. Returns `Some` if `name` is a valid, absolute + /// module name and `None` otherwise. + /// + /// The module name is invalid if: + /// + /// * The name is empty + /// * The name is relative + /// * The name ends with a `.` + /// * The name contains a sequence of multiple dots + /// * A component of a name (the part between two dots) isn't a valid python identifier. + #[inline] + pub fn new(name: &str) -> Option { + Self::new_from_smol(smol_str::SmolStr::new(name)) + } + + /// Creates a new module name for `name` where `name` is a static string. + /// Returns `Some` if `name` is a valid, absolute module name and `None` otherwise. + /// + /// The module name is invalid if: + /// + /// * The name is empty + /// * The name is relative + /// * The name ends with a `.` + /// * The name contains a sequence of multiple dots + /// * A component of a name (the part between two dots) isn't a valid python identifier. + /// + /// ## Examples + /// + /// ``` + /// use ruff_python_semantic::module::ModuleName; + /// + /// assert_eq!(ModuleName::new_static("foo.bar").as_deref(), Some("foo.bar")); + /// assert_eq!(ModuleName::new_static(""), None); + /// assert_eq!(ModuleName::new_static("..foo"), None); + /// assert_eq!(ModuleName::new_static(".foo"), None); + /// assert_eq!(ModuleName::new_static("foo."), None); + /// assert_eq!(ModuleName::new_static("foo..bar"), None); + /// assert_eq!(ModuleName::new_static("2000"), None); + /// ``` + #[inline] + pub fn new_static(name: &'static str) -> Option { + Self::new_from_smol(smol_str::SmolStr::new_static(name)) + } + + fn new_from_smol(name: smol_str::SmolStr) -> Option { + if name.is_empty() { + return None; + } + + if name.split('.').all(is_identifier) { + Some(Self(name)) + } else { + None + } + } + + /// An iterator over the components of the module name: + /// + /// # Examples + /// + /// ``` + /// use ruff_python_semantic::module::ModuleName; + /// + /// assert_eq!(ModuleName::new_static("foo.bar.baz").unwrap().components().collect::>(), vec!["foo", "bar", "baz"]); + /// ``` + pub fn components(&self) -> impl DoubleEndedIterator { + self.0.split('.') + } + + /// The name of this module's immediate parent, if it has a parent. + /// + /// # Examples + /// + /// ``` + /// use ruff_python_semantic::module::ModuleName; + /// + /// assert_eq!(ModuleName::new_static("foo.bar").unwrap().parent(), Some(ModuleName::new_static("foo").unwrap())); + /// assert_eq!(ModuleName::new_static("foo.bar.baz").unwrap().parent(), Some(ModuleName::new_static("foo.bar").unwrap())); + /// assert_eq!(ModuleName::new_static("root").unwrap().parent(), None); + /// ``` + pub fn parent(&self) -> Option { + let (parent, _) = self.0.rsplit_once('.')?; + + Some(Self(smol_str::SmolStr::new(parent))) + } + + /// Returns `true` if the name starts with `other`. + /// + /// This is equivalent to checking if `self` is a sub-module of `other`. + /// + /// # Examples + /// + /// ``` + /// use ruff_python_semantic::module::ModuleName; + /// + /// assert!(ModuleName::new_static("foo.bar").unwrap().starts_with(&ModuleName::new_static("foo").unwrap())); + /// + /// assert!(!ModuleName::new_static("foo.bar").unwrap().starts_with(&ModuleName::new_static("bar").unwrap())); + /// assert!(!ModuleName::new_static("foo_bar").unwrap().starts_with(&ModuleName::new_static("foo").unwrap())); + /// ``` + pub fn starts_with(&self, other: &ModuleName) -> bool { + let mut self_components = self.components(); + let other_components = other.components(); + + for other_component in other_components { + if self_components.next() != Some(other_component) { + return false; + } + } + + true + } + + #[inline] + pub fn as_str(&self) -> &str { + &self.0 + } + + fn from_relative_path(path: &FileSystemPath) -> Option { + let path = if path.ends_with("__init__.py") || path.ends_with("__init__.pyi") { + path.parent()? + } else { + path + }; + + let name = if let Some(parent) = path.parent() { + let mut name = String::with_capacity(path.as_str().len()); + + for component in parent.components() { + name.push_str(component.as_os_str().to_str()?); + name.push('.'); + } + + // SAFETY: Unwrap is safe here or `parent` would have returned `None`. + name.push_str(path.file_stem().unwrap()); + + smol_str::SmolStr::from(name) + } else { + smol_str::SmolStr::new(path.file_stem()?) + }; + + Some(Self(name)) + } +} + +impl Deref for ModuleName { + type Target = str; + + #[inline] + fn deref(&self) -> &Self::Target { + self.as_str() + } +} + +impl PartialEq for ModuleName { + fn eq(&self, other: &str) -> bool { + self.as_str() == other + } +} + +impl PartialEq for str { + fn eq(&self, other: &ModuleName) -> bool { + self == other.as_str() + } +} + +impl std::fmt::Display for ModuleName { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.write_str(&self.0) + } +} + +/// Representation of a Python module. +#[derive(Clone, PartialEq, Eq)] +pub struct Module { + inner: Arc, +} + +impl Module { + /// The absolute name of the module (e.g. `foo.bar`) + pub fn name(&self) -> &ModuleName { + &self.inner.name + } + + /// The file to the source code that defines this module + pub fn file(&self) -> VfsFile { + self.inner.file + } + + /// The search path from which the module was resolved. + pub fn search_path(&self) -> &ModuleSearchPath { + &self.inner.search_path + } + + /// Determine whether this module is a single-file module or a package + pub fn kind(&self) -> ModuleKind { + self.inner.kind + } +} + +impl std::fmt::Debug for Module { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Module") + .field("name", &self.name()) + .field("kind", &self.kind()) + .field("file", &self.file()) + .field("search_path", &self.search_path()) + .finish() + } +} + +impl salsa::DebugWithDb for Module { + fn fmt(&self, f: &mut Formatter<'_>, db: &dyn Db) -> std::fmt::Result { + f.debug_struct("Module") + .field("name", &self.name()) + .field("kind", &self.kind()) + .field("file", &self.file().debug(db.upcast())) + .field("search_path", &self.search_path()) + .finish() + } +} + +#[derive(PartialEq, Eq)] +struct ModuleInner { + name: ModuleName, + kind: ModuleKind, + search_path: ModuleSearchPath, + file: VfsFile, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] +pub enum ModuleKind { + /// A single-file module (e.g. `foo.py` or `foo.pyi`) + Module, + + /// A python package (`foo/__init__.py` or `foo/__init__.pyi`) + Package, +} + +/// A search path in which to search modules. +/// Corresponds to a path in [`sys.path`](https://docs.python.org/3/library/sys_path_init.html) at runtime. +/// +/// Cloning a search path is cheap because it's an `Arc`. +#[derive(Clone, PartialEq, Eq)] +pub struct ModuleSearchPath { + inner: Arc, +} + +impl ModuleSearchPath { + pub fn new

(path: P, kind: ModuleSearchPathKind) -> Self + where + P: Into, + { + Self { + inner: Arc::new(ModuleSearchPathInner { + path: path.into(), + kind, + }), + } + } + + /// Determine whether this is a first-party, third-party or standard-library search path + pub fn kind(&self) -> ModuleSearchPathKind { + self.inner.kind + } + + /// Return the location of the search path on the file system + pub fn path(&self) -> &VfsPath { + &self.inner.path + } +} + +impl std::fmt::Debug for ModuleSearchPath { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ModuleSearchPath") + .field("path", &self.inner.path) + .field("kind", &self.kind()) + .finish() + } +} + +#[derive(Eq, PartialEq)] +struct ModuleSearchPathInner { + path: VfsPath, + kind: ModuleSearchPathKind, +} + +/// Enumeration of the different kinds of search paths type checkers are expected to support. +/// +/// N.B. Although we don't implement `Ord` for this enum, they are ordered in terms of the +/// priority that we want to give these modules when resolving them. +/// This is roughly [the order given in the typing spec], but typeshed's stubs +/// for the standard library are moved higher up to match Python's semantics at runtime. +/// +/// [the order given in the typing spec]: https://typing.readthedocs.io/en/latest/spec/distributing.html#import-resolution-ordering +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, is_macro::Is)] +pub enum ModuleSearchPathKind { + /// "Extra" paths provided by the user in a config file, env var or CLI flag. + /// E.g. mypy's `MYPYPATH` env var, or pyright's `stubPath` configuration setting + Extra, + + /// Files in the project we're directly being invoked on + FirstParty, + + /// The `stdlib` directory of typeshed (either vendored or custom) + StandardLibrary, + + /// Stubs or runtime modules installed in site-packages + SitePackagesThirdParty, + + /// Vendored third-party stubs from typeshed + VendoredThirdParty, +} diff --git a/crates/ruff_python_semantic/src/module/resolver.rs b/crates/ruff_python_semantic/src/module/resolver.rs new file mode 100644 index 0000000000..1113b97591 --- /dev/null +++ b/crates/ruff_python_semantic/src/module/resolver.rs @@ -0,0 +1,944 @@ +use std::ops::Deref; +use std::sync::Arc; + +use ruff_db::file_system::{FileSystem, FileSystemPath, FileSystemPathBuf}; +use ruff_db::vfs::{system_path_to_file, vfs_path_to_file, VfsFile, VfsPath}; + +use crate::module::resolver::internal::ModuleResolverSearchPaths; +use crate::module::{ + Module, ModuleInner, ModuleKind, ModuleName, ModuleSearchPath, ModuleSearchPathKind, +}; +use crate::Db; + +const TYPESHED_STDLIB_DIRECTORY: &str = "stdlib"; + +/// Configures the module search paths for the module resolver. +/// +/// Must be called before calling any other module resolution functions. +pub fn set_module_resolution_settings(db: &mut dyn Db, config: ModuleResolutionSettings) { + // There's no concurrency issue here because we hold a `&mut dyn Db` reference. No other + // thread can mutate the `Db` while we're in this call, so using `try_get` to test if + // the settings have already been set is safe. + if let Some(existing) = ModuleResolverSearchPaths::try_get(db) { + existing + .set_search_paths(db) + .to(config.into_ordered_search_paths()); + } else { + ModuleResolverSearchPaths::new(db, config.into_ordered_search_paths()); + } +} + +/// Resolves a module name to a module. +#[tracing::instrument(level = "debug", skip(db))] +pub fn resolve_module(db: &dyn Db, module_name: ModuleName) -> Option { + let interned_name = internal::ModuleNameIngredient::new(db, module_name); + + resolve_module_query(db, interned_name) +} + +/// Salsa query that resolves an interned [`ModuleNameIngredient`] to a module. +/// +/// This query should not be called directly. Instead, use [`resolve_module`]. It only exists +/// because Salsa requires the module name to be an ingredient. +#[salsa::tracked] +pub(crate) fn resolve_module_query( + db: &dyn Db, + module_name: internal::ModuleNameIngredient, +) -> Option { + let name = module_name.name(db); + + let (search_path, module_file, kind) = resolve_name(db, name)?; + + let module = Module { + inner: Arc::new(ModuleInner { + name: name.clone(), + kind, + search_path, + file: module_file, + }), + }; + + Some(module) +} + +/// Resolves the module for the given path. +/// +/// Returns `None` if the path is not a module locatable via `sys.path`. +#[tracing::instrument(level = "debug", skip(db))] +pub fn path_to_module(db: &dyn Db, path: &VfsPath) -> Option { + // It's not entirely clear on first sight why this method calls `file_to_module` instead of + // it being the other way round, considering that the first thing that `file_to_module` does + // is to retrieve the file's path. + // + // The reason is that `file_to_module` is a tracked Salsa query and salsa queries require that + // all arguments are Salsa ingredients (something stored in Salsa). `Path`s aren't salsa ingredients but + // `VfsFile` is. So what we do here is to retrieve the `path`'s `VfsFile` so that we can make + // use of Salsa's caching and invalidation. + let file = vfs_path_to_file(db.upcast(), path)?; + file_to_module(db, file) +} + +/// Resolves the module for the file with the given id. +/// +/// Returns `None` if the file is not a module locatable via `sys.path`. +#[salsa::tracked] +#[tracing::instrument(level = "debug", skip(db))] +pub fn file_to_module(db: &dyn Db, file: VfsFile) -> Option { + let path = file.path(db.upcast()); + + let search_paths = module_search_paths(db); + + let relative_path = search_paths + .iter() + .find_map(|root| match (root.path(), path) { + (VfsPath::FileSystem(root_path), VfsPath::FileSystem(path)) => { + let relative_path = path.strip_prefix(root_path).ok()?; + Some(relative_path) + } + (VfsPath::Vendored(_), VfsPath::Vendored(_)) => { + todo!("Add support for vendored modules") + } + (VfsPath::Vendored(_), VfsPath::FileSystem(_)) + | (VfsPath::FileSystem(_), VfsPath::Vendored(_)) => None, + })?; + + let module_name = ModuleName::from_relative_path(relative_path)?; + + // Resolve the module name to see if Python would resolve the name to the same path. + // If it doesn't, then that means that multiple modules have the same name in different + // root paths, but that the module corresponding to `path` is in a lower priority search path, + // in which case we ignore it. + let module = resolve_module(db, module_name)?; + + if file == module.file() { + Some(module) + } else { + // This path is for a module with the same name but with a different precedence. For example: + // ``` + // src/foo.py + // src/foo/__init__.py + // ``` + // The module name of `src/foo.py` is `foo`, but the module loaded by Python is `src/foo/__init__.py`. + // That means we need to ignore `src/foo.py` even though it resolves to the same module name. + None + } +} + +/// Configures the [`ModuleSearchPath`]s that are used to resolve modules. +#[derive(Eq, PartialEq, Debug)] +pub struct ModuleResolutionSettings { + /// List of user-provided paths that should take first priority in the module resolution. + /// Examples in other type checkers are mypy's MYPYPATH environment variable, + /// or pyright's stubPath configuration setting. + pub extra_paths: Vec, + + /// The root of the workspace, used for finding first-party modules. + pub workspace_root: FileSystemPathBuf, + + /// The path to the user's `site-packages` directory, where third-party packages from ``PyPI`` are installed. + pub site_packages: Option, + + /// Optional path to standard-library typeshed stubs. + /// Currently this has to be a directory that exists on disk. + /// + /// (TODO: fall back to vendored stubs if no custom directory is provided.) + pub custom_typeshed: Option, +} + +impl ModuleResolutionSettings { + /// Implementation of PEP 561's module resolution order + /// (with some small, deliberate, differences) + fn into_ordered_search_paths(self) -> OrderedSearchPaths { + let ModuleResolutionSettings { + extra_paths, + workspace_root, + site_packages, + custom_typeshed, + } = self; + + let mut paths: Vec<_> = extra_paths + .into_iter() + .map(|path| ModuleSearchPath::new(path, ModuleSearchPathKind::Extra)) + .collect(); + + paths.push(ModuleSearchPath::new( + workspace_root, + ModuleSearchPathKind::FirstParty, + )); + + // TODO fallback to vendored typeshed stubs if no custom typeshed directory is provided by the user + if let Some(custom_typeshed) = custom_typeshed { + paths.push(ModuleSearchPath::new( + custom_typeshed.join(TYPESHED_STDLIB_DIRECTORY), + ModuleSearchPathKind::StandardLibrary, + )); + } + + // TODO vendor typeshed's third-party stubs as well as the stdlib and fallback to them as a final step + if let Some(site_packages) = site_packages { + paths.push(ModuleSearchPath::new( + site_packages, + ModuleSearchPathKind::SitePackagesThirdParty, + )); + } + + OrderedSearchPaths(paths) + } +} + +/// A resolved module resolution order, implementing PEP 561 +/// (with some small, deliberate differences) +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub(crate) struct OrderedSearchPaths(Vec); + +impl Deref for OrderedSearchPaths { + type Target = [ModuleSearchPath]; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +// The singleton methods generated by salsa are all `pub` instead of `pub(crate)` which triggers +// `unreachable_pub`. Work around this by creating a module and allow `unreachable_pub` for it. +// Salsa also generates uses to `_db` variables for `interned` which triggers `clippy::used_underscore_binding`. Suppress that too +// TODO(micha): Contribute a fix for this upstream where the singleton methods have the same visibility as the struct. +#[allow(unreachable_pub, clippy::used_underscore_binding)] +pub(crate) mod internal { + use crate::module::resolver::OrderedSearchPaths; + use crate::module::ModuleName; + + #[salsa::input(singleton)] + pub(crate) struct ModuleResolverSearchPaths { + #[return_ref] + pub(super) search_paths: OrderedSearchPaths, + } + + /// A thin wrapper around `ModuleName` to make it a Salsa ingredient. + /// + /// This is needed because Salsa requires that all query arguments are salsa ingredients. + #[salsa::interned] + pub(crate) struct ModuleNameIngredient { + #[return_ref] + pub(super) name: ModuleName, + } +} + +fn module_search_paths(db: &dyn Db) -> &[ModuleSearchPath] { + ModuleResolverSearchPaths::get(db).search_paths(db) +} + +/// Given a module name and a list of search paths in which to lookup modules, +/// attempt to resolve the module name +fn resolve_name(db: &dyn Db, name: &ModuleName) -> Option<(ModuleSearchPath, VfsFile, ModuleKind)> { + let search_paths = module_search_paths(db); + + for search_path in search_paths { + let mut components = name.components(); + let module_name = components.next_back()?; + + let VfsPath::FileSystem(fs_search_path) = search_path.path() else { + todo!("Vendored search paths are not yet supported"); + }; + + match resolve_package(db.file_system(), fs_search_path, components) { + Ok(resolved_package) => { + let mut package_path = resolved_package.path; + + package_path.push(module_name); + + // Must be a `__init__.pyi` or `__init__.py` or it isn't a package. + let kind = if db.file_system().is_directory(&package_path) { + package_path.push("__init__"); + ModuleKind::Package + } else { + ModuleKind::Module + }; + + // TODO Implement full https://peps.python.org/pep-0561/#type-checker-module-resolution-order resolution + let stub = package_path.with_extension("pyi"); + + if let Some(stub) = system_path_to_file(db.upcast(), &stub) { + return Some((search_path.clone(), stub, kind)); + } + + let module = package_path.with_extension("py"); + + if let Some(module) = system_path_to_file(db.upcast(), &module) { + return Some((search_path.clone(), module, kind)); + } + + // For regular packages, don't search the next search path. All files of that + // package must be in the same location + if resolved_package.kind.is_regular_package() { + return None; + } + } + Err(parent_kind) => { + if parent_kind.is_regular_package() { + // For regular packages, don't search the next search path. + return None; + } + } + } + } + + None +} + +fn resolve_package<'a, I>( + fs: &dyn FileSystem, + module_search_path: &FileSystemPath, + components: I, +) -> Result +where + I: Iterator, +{ + let mut package_path = module_search_path.to_path_buf(); + + // `true` if inside a folder that is a namespace package (has no `__init__.py`). + // Namespace packages are special because they can be spread across multiple search paths. + // https://peps.python.org/pep-0420/ + let mut in_namespace_package = false; + + // `true` if resolving a sub-package. For example, `true` when resolving `bar` of `foo.bar`. + let mut in_sub_package = false; + + // For `foo.bar.baz`, test that `foo` and `baz` both contain a `__init__.py`. + for folder in components { + package_path.push(folder); + + let has_init_py = fs.is_file(&package_path.join("__init__.py")) + || fs.is_file(&package_path.join("__init__.pyi")); + + if has_init_py { + in_namespace_package = false; + } else if fs.is_directory(&package_path) { + // A directory without an `__init__.py` is a namespace package, continue with the next folder. + in_namespace_package = true; + } else if in_namespace_package { + // Package not found but it is part of a namespace package. + return Err(PackageKind::Namespace); + } else if in_sub_package { + // A regular sub package wasn't found. + return Err(PackageKind::Regular); + } else { + // We couldn't find `foo` for `foo.bar.baz`, search the next search path. + return Err(PackageKind::Root); + } + + in_sub_package = true; + } + + let kind = if in_namespace_package { + PackageKind::Namespace + } else if in_sub_package { + PackageKind::Regular + } else { + PackageKind::Root + }; + + Ok(ResolvedPackage { + kind, + path: package_path, + }) +} + +#[derive(Debug)] +struct ResolvedPackage { + path: FileSystemPathBuf, + kind: PackageKind, +} + +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +enum PackageKind { + /// A root package or module. E.g. `foo` in `foo.bar.baz` or just `foo`. + Root, + + /// A regular sub-package where the parent contains an `__init__.py`. + /// + /// For example, `bar` in `foo.bar` when the `foo` directory contains an `__init__.py`. + Regular, + + /// A sub-package in a namespace package. A namespace package is a package without an `__init__.py`. + /// + /// For example, `bar` in `foo.bar` if the `foo` directory contains no `__init__.py`. + Namespace, +} + +impl PackageKind { + const fn is_regular_package(self) -> bool { + matches!(self, PackageKind::Regular) + } +} + +#[cfg(test)] +mod tests { + + use ruff_db::file_system::{FileSystemPath, FileSystemPathBuf}; + use ruff_db::vfs::{system_path_to_file, VfsFile, VfsPath}; + + use crate::db::tests::TestDb; + use crate::module::{ModuleKind, ModuleName}; + + use super::{ + path_to_module, resolve_module, set_module_resolution_settings, ModuleResolutionSettings, + TYPESHED_STDLIB_DIRECTORY, + }; + + struct TestCase { + db: TestDb, + + src: FileSystemPathBuf, + custom_typeshed: FileSystemPathBuf, + site_packages: FileSystemPathBuf, + } + + fn create_resolver() -> std::io::Result { + let mut db = TestDb::new(); + + let src = FileSystemPath::new("src").to_path_buf(); + let site_packages = FileSystemPath::new("site_packages").to_path_buf(); + let custom_typeshed = FileSystemPath::new("typeshed").to_path_buf(); + + let fs = db.memory_file_system(); + + fs.create_directory_all(&src)?; + fs.create_directory_all(&site_packages)?; + fs.create_directory_all(&custom_typeshed)?; + + let settings = ModuleResolutionSettings { + extra_paths: vec![], + workspace_root: src.clone(), + site_packages: Some(site_packages.clone()), + custom_typeshed: Some(custom_typeshed.clone()), + }; + + set_module_resolution_settings(&mut db, settings); + + Ok(TestCase { + db, + src, + custom_typeshed, + site_packages, + }) + } + + #[test] + fn first_party_module() -> anyhow::Result<()> { + let TestCase { db, src, .. } = create_resolver()?; + + let foo_module_name = ModuleName::new_static("foo").unwrap(); + let foo_path = src.join("foo.py"); + db.memory_file_system() + .write_file(&foo_path, "print('Hello, world!')")?; + + let foo_module = resolve_module(&db, foo_module_name.clone()).unwrap(); + + assert_eq!( + Some(&foo_module), + resolve_module(&db, foo_module_name.clone()).as_ref() + ); + + assert_eq!("foo", foo_module.name()); + assert_eq!(&src, foo_module.search_path().path()); + assert_eq!(ModuleKind::Module, foo_module.kind()); + assert_eq!(&foo_path, foo_module.file().path(&db)); + + assert_eq!( + Some(foo_module), + path_to_module(&db, &VfsPath::FileSystem(foo_path)) + ); + + Ok(()) + } + + #[test] + fn stdlib() -> anyhow::Result<()> { + let TestCase { + db, + custom_typeshed, + .. + } = create_resolver()?; + + let stdlib_dir = custom_typeshed.join(TYPESHED_STDLIB_DIRECTORY); + let functools_path = stdlib_dir.join("functools.py"); + db.memory_file_system() + .write_file(&functools_path, "def update_wrapper(): ...")?; + + let functools_module_name = ModuleName::new_static("functools").unwrap(); + let functools_module = resolve_module(&db, functools_module_name.clone()).unwrap(); + + assert_eq!( + Some(&functools_module), + resolve_module(&db, functools_module_name).as_ref() + ); + + assert_eq!(&stdlib_dir, functools_module.search_path().path()); + assert_eq!(ModuleKind::Module, functools_module.kind()); + assert_eq!(&functools_path.clone(), functools_module.file().path(&db)); + + assert_eq!( + Some(functools_module), + path_to_module(&db, &VfsPath::FileSystem(functools_path)) + ); + + Ok(()) + } + + #[test] + fn first_party_precedence_over_stdlib() -> anyhow::Result<()> { + let TestCase { + db, + src, + custom_typeshed, + .. + } = create_resolver()?; + + let stdlib_dir = custom_typeshed.join(TYPESHED_STDLIB_DIRECTORY); + let stdlib_functools_path = stdlib_dir.join("functools.py"); + let first_party_functools_path = src.join("functools.py"); + + db.memory_file_system().write_files([ + (&stdlib_functools_path, "def update_wrapper(): ..."), + (&first_party_functools_path, "def update_wrapper(): ..."), + ])?; + + let functools_module_name = ModuleName::new_static("functools").unwrap(); + let functools_module = resolve_module(&db, functools_module_name.clone()).unwrap(); + + assert_eq!( + Some(&functools_module), + resolve_module(&db, functools_module_name).as_ref() + ); + assert_eq!(&src, functools_module.search_path().path()); + assert_eq!(ModuleKind::Module, functools_module.kind()); + assert_eq!( + &first_party_functools_path.clone(), + functools_module.file().path(&db) + ); + + assert_eq!( + Some(functools_module), + path_to_module(&db, &VfsPath::FileSystem(first_party_functools_path)) + ); + + Ok(()) + } + + // TODO: Port typeshed test case. Porting isn't possible at the moment because the vendored zip + // is part of the red knot crate + // #[test] + // fn typeshed_zip_created_at_build_time() -> anyhow::Result<()> { + // // The file path here is hardcoded in this crate's `build.rs` script. + // // Luckily this crate will fail to build if this file isn't available at build time. + // const TYPESHED_ZIP_BYTES: &[u8] = + // include_bytes!(concat!(env!("OUT_DIR"), "/zipped_typeshed.zip")); + // assert!(!TYPESHED_ZIP_BYTES.is_empty()); + // let mut typeshed_zip_archive = ZipArchive::new(Cursor::new(TYPESHED_ZIP_BYTES))?; + // + // let path_to_functools = Path::new("stdlib").join("functools.pyi"); + // let mut functools_module_stub = typeshed_zip_archive + // .by_name(path_to_functools.to_str().unwrap()) + // .unwrap(); + // assert!(functools_module_stub.is_file()); + // + // let mut functools_module_stub_source = String::new(); + // functools_module_stub.read_to_string(&mut functools_module_stub_source)?; + // + // assert!(functools_module_stub_source.contains("def update_wrapper(")); + // Ok(()) + // } + + #[test] + fn resolve_package() -> anyhow::Result<()> { + let TestCase { src, db, .. } = create_resolver()?; + + let foo_dir = src.join("foo"); + let foo_path = foo_dir.join("__init__.py"); + + db.memory_file_system() + .write_file(&foo_path, "print('Hello, world!')")?; + + let foo_module = resolve_module(&db, ModuleName::new_static("foo").unwrap()).unwrap(); + + assert_eq!("foo", foo_module.name()); + assert_eq!(&src, foo_module.search_path().path()); + assert_eq!(&foo_path, foo_module.file().path(&db)); + + assert_eq!( + Some(&foo_module), + path_to_module(&db, &VfsPath::FileSystem(foo_path)).as_ref() + ); + + // Resolving by directory doesn't resolve to the init file. + assert_eq!(None, path_to_module(&db, &VfsPath::FileSystem(foo_dir))); + + Ok(()) + } + + #[test] + fn package_priority_over_module() -> anyhow::Result<()> { + let TestCase { db, src, .. } = create_resolver()?; + + let foo_dir = src.join("foo"); + let foo_init = foo_dir.join("__init__.py"); + + db.memory_file_system() + .write_file(&foo_init, "print('Hello, world!')")?; + + let foo_py = src.join("foo.py"); + db.memory_file_system() + .write_file(&foo_py, "print('Hello, world!')")?; + + let foo_module = resolve_module(&db, ModuleName::new_static("foo").unwrap()).unwrap(); + + assert_eq!(&src, foo_module.search_path().path()); + assert_eq!(&foo_init, foo_module.file().path(&db)); + assert_eq!(ModuleKind::Package, foo_module.kind()); + + assert_eq!( + Some(foo_module), + path_to_module(&db, &VfsPath::FileSystem(foo_init)) + ); + assert_eq!(None, path_to_module(&db, &VfsPath::FileSystem(foo_py))); + + Ok(()) + } + + #[test] + fn typing_stub_over_module() -> anyhow::Result<()> { + let TestCase { db, src, .. } = create_resolver()?; + + let foo_stub = src.join("foo.pyi"); + let foo_py = src.join("foo.py"); + db.memory_file_system() + .write_files([(&foo_stub, "x: int"), (&foo_py, "print('Hello, world!')")])?; + + let foo = resolve_module(&db, ModuleName::new_static("foo").unwrap()).unwrap(); + + assert_eq!(&src, foo.search_path().path()); + assert_eq!(&foo_stub, foo.file().path(&db)); + + assert_eq!( + Some(foo), + path_to_module(&db, &VfsPath::FileSystem(foo_stub)) + ); + assert_eq!(None, path_to_module(&db, &VfsPath::FileSystem(foo_py))); + + Ok(()) + } + + #[test] + fn sub_packages() -> anyhow::Result<()> { + let TestCase { db, src, .. } = create_resolver()?; + + let foo = src.join("foo"); + let bar = foo.join("bar"); + let baz = bar.join("baz.py"); + + db.memory_file_system().write_files([ + (&foo.join("__init__.py"), ""), + (&bar.join("__init__.py"), ""), + (&baz, "print('Hello, world!')"), + ])?; + + let baz_module = + resolve_module(&db, ModuleName::new_static("foo.bar.baz").unwrap()).unwrap(); + + assert_eq!(&src, baz_module.search_path().path()); + assert_eq!(&baz, baz_module.file().path(&db)); + + assert_eq!( + Some(baz_module), + path_to_module(&db, &VfsPath::FileSystem(baz)) + ); + + Ok(()) + } + + #[test] + fn namespace_package() -> anyhow::Result<()> { + let TestCase { + db, + src, + site_packages, + .. + } = create_resolver()?; + + // From [PEP420](https://peps.python.org/pep-0420/#nested-namespace-packages). + // But uses `src` for `project1` and `site_packages2` for `project2`. + // ``` + // src + // parent + // child + // one.py + // site_packages + // parent + // child + // two.py + // ``` + + let parent1 = src.join("parent"); + let child1 = parent1.join("child"); + let one = child1.join("one.py"); + + let parent2 = site_packages.join("parent"); + let child2 = parent2.join("child"); + let two = child2.join("two.py"); + + db.memory_file_system().write_files([ + (&one, "print('Hello, world!')"), + (&two, "print('Hello, world!')"), + ])?; + + let one_module = + resolve_module(&db, ModuleName::new_static("parent.child.one").unwrap()).unwrap(); + + assert_eq!( + Some(one_module), + path_to_module(&db, &VfsPath::FileSystem(one)) + ); + + let two_module = + resolve_module(&db, ModuleName::new_static("parent.child.two").unwrap()).unwrap(); + assert_eq!( + Some(two_module), + path_to_module(&db, &VfsPath::FileSystem(two)) + ); + + Ok(()) + } + + #[test] + fn regular_package_in_namespace_package() -> anyhow::Result<()> { + let TestCase { + db, + src, + site_packages, + .. + } = create_resolver()?; + + // Adopted test case from the [PEP420 examples](https://peps.python.org/pep-0420/#nested-namespace-packages). + // The `src/parent/child` package is a regular package. Therefore, `site_packages/parent/child/two.py` should not be resolved. + // ``` + // src + // parent + // child + // one.py + // site_packages + // parent + // child + // two.py + // ``` + + let parent1 = src.join("parent"); + let child1 = parent1.join("child"); + let one = child1.join("one.py"); + + let parent2 = site_packages.join("parent"); + let child2 = parent2.join("child"); + let two = child2.join("two.py"); + + db.memory_file_system().write_files([ + (&child1.join("__init__.py"), "print('Hello, world!')"), + (&one, "print('Hello, world!')"), + (&two, "print('Hello, world!')"), + ])?; + + let one_module = + resolve_module(&db, ModuleName::new_static("parent.child.one").unwrap()).unwrap(); + + assert_eq!( + Some(one_module), + path_to_module(&db, &VfsPath::FileSystem(one)) + ); + + assert_eq!( + None, + resolve_module(&db, ModuleName::new_static("parent.child.two").unwrap()) + ); + Ok(()) + } + + #[test] + fn module_search_path_priority() -> anyhow::Result<()> { + let TestCase { + db, + src, + site_packages, + .. + } = create_resolver()?; + + let foo_src = src.join("foo.py"); + let foo_site_packages = site_packages.join("foo.py"); + + db.memory_file_system() + .write_files([(&foo_src, ""), (&foo_site_packages, "")])?; + + let foo_module = resolve_module(&db, ModuleName::new_static("foo").unwrap()).unwrap(); + + assert_eq!(&src, foo_module.search_path().path()); + assert_eq!(&foo_src, foo_module.file().path(&db)); + + assert_eq!( + Some(foo_module), + path_to_module(&db, &VfsPath::FileSystem(foo_src)) + ); + assert_eq!( + None, + path_to_module(&db, &VfsPath::FileSystem(foo_site_packages)) + ); + + Ok(()) + } + + #[test] + #[cfg(target_family = "unix")] + fn symlink() -> anyhow::Result<()> { + let TestCase { + mut db, + src, + site_packages, + custom_typeshed, + } = create_resolver()?; + + db.with_os_file_system(); + + let temp_dir = tempfile::tempdir()?; + let root = FileSystemPath::from_std_path(temp_dir.path()).unwrap(); + + let src = root.join(src); + let site_packages = root.join(site_packages); + let custom_typeshed = root.join(custom_typeshed); + + let foo = src.join("foo.py"); + let bar = src.join("bar.py"); + + std::fs::create_dir_all(src.as_std_path())?; + std::fs::create_dir_all(site_packages.as_std_path())?; + std::fs::create_dir_all(custom_typeshed.as_std_path())?; + + std::fs::write(foo.as_std_path(), "")?; + std::os::unix::fs::symlink(foo.as_std_path(), bar.as_std_path())?; + + let settings = ModuleResolutionSettings { + extra_paths: vec![], + workspace_root: src.clone(), + site_packages: Some(site_packages), + custom_typeshed: Some(custom_typeshed), + }; + + set_module_resolution_settings(&mut db, settings); + + let foo_module = resolve_module(&db, ModuleName::new_static("foo").unwrap()).unwrap(); + let bar_module = resolve_module(&db, ModuleName::new_static("bar").unwrap()).unwrap(); + + assert_ne!(foo_module, bar_module); + + assert_eq!(&src, foo_module.search_path().path()); + assert_eq!(&foo, foo_module.file().path(&db)); + + // `foo` and `bar` shouldn't resolve to the same file + + assert_eq!(&src, bar_module.search_path().path()); + assert_eq!(&bar, bar_module.file().path(&db)); + assert_eq!(&foo, foo_module.file().path(&db)); + + assert_ne!(&foo_module, &bar_module); + + assert_eq!( + Some(foo_module), + path_to_module(&db, &VfsPath::FileSystem(foo)) + ); + assert_eq!( + Some(bar_module), + path_to_module(&db, &VfsPath::FileSystem(bar)) + ); + + Ok(()) + } + + #[test] + fn deleting_an_unrealted_file_doesnt_change_module_resolution() -> anyhow::Result<()> { + let TestCase { mut db, src, .. } = create_resolver()?; + + let foo_path = src.join("foo.py"); + let bar_path = src.join("bar.py"); + + db.memory_file_system() + .write_files([(&foo_path, "x = 1"), (&bar_path, "y = 2")])?; + + let foo_module_name = ModuleName::new_static("foo").unwrap(); + let foo_module = resolve_module(&db, foo_module_name.clone()).unwrap(); + + let bar = system_path_to_file(&db, &bar_path).expect("bar.py to exist"); + + db.clear_salsa_events(); + + // Delete `bar.py` + db.memory_file_system().remove_file(&bar_path)?; + bar.touch(&mut db); + + // Re-query the foo module. The foo module should still be cached because `bar.py` isn't relevant + // for resolving `foo`. + + let foo_module2 = resolve_module(&db, foo_module_name); + + assert!(!db + .take_sale_events() + .iter() + .any(|event| { matches!(event.kind, salsa::EventKind::WillExecute { .. }) })); + + assert_eq!(Some(foo_module), foo_module2); + + Ok(()) + } + + #[test] + fn adding_a_file_on_which_the_module_resolution_depends_on_invalidates_the_query( + ) -> anyhow::Result<()> { + let TestCase { mut db, src, .. } = create_resolver()?; + let foo_path = src.join("foo.py"); + + let foo_module_name = ModuleName::new_static("foo").unwrap(); + assert_eq!(resolve_module(&db, foo_module_name.clone()), None); + + // Now write the foo file + db.memory_file_system().write_file(&foo_path, "x = 1")?; + VfsFile::touch_path(&mut db, &VfsPath::FileSystem(foo_path.clone())); + let foo_file = system_path_to_file(&db, &foo_path).expect("foo.py to exist"); + + let foo_module = resolve_module(&db, foo_module_name).expect("Foo module to resolve"); + assert_eq!(foo_file, foo_module.file()); + + Ok(()) + } + + #[test] + fn removing_a_file_that_the_module_resolution_depends_on_invalidates_the_query( + ) -> anyhow::Result<()> { + let TestCase { mut db, src, .. } = create_resolver()?; + let foo_path = src.join("foo.py"); + let foo_init_path = src.join("foo/__init__.py"); + + db.memory_file_system() + .write_files([(&foo_path, "x = 1"), (&foo_init_path, "x = 2")])?; + + let foo_module_name = ModuleName::new_static("foo").unwrap(); + let foo_module = resolve_module(&db, foo_module_name.clone()).expect("foo module to exist"); + + assert_eq!(&foo_init_path, foo_module.file().path(&db)); + + // Delete `foo/__init__.py` and the `foo` folder. `foo` should now resolve to `foo.py` + db.memory_file_system().remove_file(&foo_init_path)?; + db.memory_file_system() + .remove_directory(foo_init_path.parent().unwrap())?; + VfsFile::touch_path(&mut db, &VfsPath::FileSystem(foo_init_path.clone())); + + let foo_module = resolve_module(&db, foo_module_name).expect("Foo module to resolve"); + assert_eq!(&foo_path, foo_module.file().path(&db)); + + Ok(()) + } +}