diff --git a/Cargo.lock b/Cargo.lock
index 9c8c36681e..ec1b10d268 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3936,11 +3936,16 @@ name = "ty_project"
version = "0.0.0"
dependencies = [
"anyhow",
+ "camino",
+ "colored 3.0.0",
"crossbeam",
+ "globset",
"insta",
"notify",
"pep440_rs",
"rayon",
+ "regex",
+ "regex-automata 0.4.9",
"ruff_cache",
"ruff_db",
"ruff_macros",
diff --git a/Cargo.toml b/Cargo.toml
index d86984336d..4f55b171b9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -126,6 +126,7 @@ quote = { version = "1.0.23" }
rand = { version = "0.9.0" }
rayon = { version = "1.10.0" }
regex = { version = "1.10.2" }
+regex-automata = { version = "0.4.9" }
rustc-hash = { version = "2.0.0" }
rustc-stable-hash = { version = "0.1.2" }
# When updating salsa, make sure to also update the revision in `fuzz/Cargo.toml`
@@ -165,7 +166,7 @@ tracing-subscriber = { version = "0.3.18", default-features = false, features =
"env-filter",
"fmt",
"ansi",
- "smallvec",
+ "smallvec"
] }
tryfn = { version = "0.2.1" }
typed-arena = { version = "2.0.2" }
@@ -175,7 +176,11 @@ unicode-width = { version = "0.2.0" }
unicode_names2 = { version = "1.2.2" }
unicode-normalization = { version = "0.1.23" }
url = { version = "2.5.0" }
-uuid = { version = "1.6.1", features = ["v4", "fast-rng", "macro-diagnostics"] }
+uuid = { version = "1.6.1", features = [
+ "v4",
+ "fast-rng",
+ "macro-diagnostics",
+] }
walkdir = { version = "2.3.2" }
wasm-bindgen = { version = "0.2.92" }
wasm-bindgen-test = { version = "0.3.42" }
@@ -210,8 +215,8 @@ must_use_candidate = "allow"
similar_names = "allow"
single_match_else = "allow"
too_many_lines = "allow"
-needless_continue = "allow" # An explicit continue can be more readable, especially if the alternative is an empty block.
-unnecessary_debug_formatting = "allow" # too many instances, the display also doesn't quote the path which is often desired in logs where we use them the most often.
+needless_continue = "allow" # An explicit continue can be more readable, especially if the alternative is an empty block.
+unnecessary_debug_formatting = "allow" # too many instances, the display also doesn't quote the path which is often desired in logs where we use them the most often.
# Without the hashes we run into a `rustfmt` bug in some snapshot tests, see #13250
needless_raw_string_hashes = "allow"
# Disallowed restriction lints
diff --git a/crates/ruff_db/src/diagnostic/mod.rs b/crates/ruff_db/src/diagnostic/mod.rs
index 8c373ca1e4..07ad6371e5 100644
--- a/crates/ruff_db/src/diagnostic/mod.rs
+++ b/crates/ruff_db/src/diagnostic/mod.rs
@@ -665,6 +665,9 @@ pub enum DiagnosticId {
/// No rule with the given name exists.
UnknownRule,
+
+ /// A glob pattern doesn't follow the expected syntax.
+ InvalidGlob,
}
impl DiagnosticId {
@@ -699,6 +702,7 @@ impl DiagnosticId {
DiagnosticId::Lint(name) => name.as_str(),
DiagnosticId::RevealedType => "revealed-type",
DiagnosticId::UnknownRule => "unknown-rule",
+ DiagnosticId::InvalidGlob => "invalid-glob",
}
}
diff --git a/crates/ruff_db/src/system/path.rs b/crates/ruff_db/src/system/path.rs
index c20b2b19f5..07ea7d0b71 100644
--- a/crates/ruff_db/src/system/path.rs
+++ b/crates/ruff_db/src/system/path.rs
@@ -45,6 +45,30 @@ impl SystemPath {
SystemPath::from_std_path(dunce::simplified(self.as_std_path())).unwrap()
}
+ /// Returns `true` if the `SystemPath` is absolute, i.e., if it is independent of
+ /// the current directory.
+ ///
+ /// * On Unix, a path is absolute if it starts with the root, so
+ /// `is_absolute` and [`has_root`] are equivalent.
+ ///
+ /// * On Windows, a path is absolute if it has a prefix and starts with the
+ /// root: `c:\windows` is absolute, while `c:temp` and `\temp` are not.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use ruff_db::system::SystemPath;
+ ///
+ /// assert!(!SystemPath::new("foo.txt").is_absolute());
+ /// ```
+ ///
+ /// [`has_root`]: Utf8Path::has_root
+ #[inline]
+ #[must_use]
+ pub fn is_absolute(&self) -> bool {
+ self.0.is_absolute()
+ }
+
/// Extracts the file extension, if possible.
///
/// The extension is:
@@ -538,6 +562,10 @@ impl SystemPathBuf {
self.0.into_std_path_buf()
}
+ pub fn into_string(self) -> String {
+ self.0.into_string()
+ }
+
#[inline]
pub fn as_path(&self) -> &SystemPath {
SystemPath::new(&self.0)
diff --git a/crates/ty/docs/cli.md b/crates/ty/docs/cli.md
index a68b04eb35..56a6ae32c6 100644
--- a/crates/ty/docs/cli.md
+++ b/crates/ty/docs/cli.md
@@ -51,6 +51,8 @@ over all configuration files.
While ty configuration can be included in a pyproject.toml file, it is not allowed in this context.
May also be set with the TY_CONFIG_FILE environment variable.
--error ruleTreat the given rule as having severity 'error'. Can be specified multiple times.
--error-on-warningUse exit code 1 if there are any warning-level diagnostics
+--exclude excludeGlob patterns for files to exclude from type checking.
+Uses gitignore-style syntax to exclude files and directories from type checking. Supports patterns like tests/, *.tmp, **/__pycache__/**.
--exit-zeroAlways use exit code 0, even when there are error-level diagnostics
Additional path to use as a module-resolution source (can be passed multiple times)
--help, -hPrint help (see a summary with '-h')
diff --git a/crates/ty/docs/configuration.md b/crates/ty/docs/configuration.md
index a5333dc931..25cdb70a75 100644
--- a/crates/ty/docs/configuration.md
+++ b/crates/ty/docs/configuration.md
@@ -153,6 +153,67 @@ typeshed = "/path/to/custom/typeshed"
## `src`
+#### `exclude`
+
+A list of file and directory patterns to exclude from type checking.
+
+Patterns follow a syntax similar to `.gitignore`:
+- `./src/` matches only a directory
+- `./src` matches both files and directories
+- `src` matches files or directories named `src` anywhere in the tree (e.g. `./src` or `./tests/src`)
+- `*` matches any (possibly empty) sequence of characters (except `/`).
+- `**` matches zero or more path components.
+ This sequence **must** form a single path component, so both `**a` and `b**` are invalid and will result in an error.
+ A sequence of more than two consecutive `*` characters is also invalid.
+- `?` matches any single character except `/`
+- `[abc]` matches any character inside the brackets. Character sequences can also specify ranges of characters, as ordered by Unicode,
+ so e.g. `[0-9]` specifies any character between `0` and `9` inclusive. An unclosed bracket is invalid.
+- `!pattern` negates a pattern (undoes the exclusion of files that would otherwise be excluded)
+
+By default, the following directories are excluded:
+
+- `.bzr`
+- `.direnv`
+- `.eggs`
+- `.git`
+- `.git-rewrite`
+- `.hg`
+- `.mypy_cache`
+- `.nox`
+- `.pants.d`
+- `.pytype`
+- `.ruff_cache`
+- `.svn`
+- `.tox`
+- `.venv`
+- `__pypackages__`
+- `_build`
+- `buck-out`
+- `dist`
+- `node_modules`
+- `venv`
+
+You can override any default exclude by using a negated pattern. For example,
+to re-include `dist` use `exclude = ["!dist"]`
+
+**Default value**: `null`
+
+**Type**: `list[str]`
+
+**Example usage** (`pyproject.toml`):
+
+```toml
+[tool.ty.src]
+exclude = [
+ "generated",
+ "*.proto",
+ "tests/fixtures/**",
+ "!tests/fixtures/important.py" # Include this one file
+]
+```
+
+---
+
#### `respect-ignore-files`
Whether to automatically exclude files that are ignored by `.ignore`,
diff --git a/crates/ty/src/args.rs b/crates/ty/src/args.rs
index 8d02b941d8..99f0428f0c 100644
--- a/crates/ty/src/args.rs
+++ b/crates/ty/src/args.rs
@@ -5,7 +5,9 @@ use clap::{ArgAction, ArgMatches, Error, Parser};
use ruff_db::system::SystemPathBuf;
use ty_project::combine::Combine;
use ty_project::metadata::options::{EnvironmentOptions, Options, SrcOptions, TerminalOptions};
-use ty_project::metadata::value::{RangedValue, RelativePathBuf, ValueSource};
+use ty_project::metadata::value::{
+ RangedValue, RelativeExcludePattern, RelativePathBuf, ValueSource,
+};
use ty_python_semantic::lint;
#[derive(Debug, Parser)]
@@ -148,6 +150,13 @@ pub(crate) struct CheckCommand {
respect_ignore_files: Option,
#[clap(long, overrides_with("respect_ignore_files"), hide = true)]
no_respect_ignore_files: bool,
+
+ /// Glob patterns for files to exclude from type checking.
+ ///
+ /// Uses gitignore-style syntax to exclude files and directories from type checking.
+ /// Supports patterns like `tests/`, `*.tmp`, `**/__pycache__/**`.
+ #[arg(long, help_heading = "File selection")]
+ exclude: Option>,
}
impl CheckCommand {
@@ -195,6 +204,12 @@ impl CheckCommand {
}),
src: Some(SrcOptions {
respect_ignore_files,
+ exclude: self.exclude.map(|excludes| {
+ excludes
+ .iter()
+ .map(|exclude| RelativeExcludePattern::cli(exclude))
+ .collect()
+ }),
..SrcOptions::default()
}),
rules,
diff --git a/crates/ty/tests/cli/file_selection.rs b/crates/ty/tests/cli/file_selection.rs
new file mode 100644
index 0000000000..5e646a07cd
--- /dev/null
+++ b/crates/ty/tests/cli/file_selection.rs
@@ -0,0 +1,726 @@
+use insta_cmd::assert_cmd_snapshot;
+
+use crate::CliTest;
+
+/// Test exclude CLI argument functionality
+#[test]
+fn exclude_argument() -> anyhow::Result<()> {
+ let case = CliTest::with_files([
+ (
+ "src/main.py",
+ r#"
+ print(undefined_var) # error: unresolved-reference
+ "#,
+ ),
+ (
+ "tests/test_main.py",
+ r#"
+ print(another_undefined_var) # error: unresolved-reference
+ "#,
+ ),
+ (
+ "temp_file.py",
+ r#"
+ print(temp_undefined_var) # error: unresolved-reference
+ "#,
+ ),
+ ])?;
+
+ // Test that exclude argument is recognized and works
+ assert_cmd_snapshot!(case.command().arg("--exclude").arg("tests/"), @r"
+ success: false
+ exit_code: 1
+ ----- stdout -----
+ error[unresolved-reference]: Name `undefined_var` used when not defined
+ --> src/main.py:2:7
+ |
+ 2 | print(undefined_var) # error: unresolved-reference
+ | ^^^^^^^^^^^^^
+ |
+ info: rule `unresolved-reference` is enabled by default
+
+ error[unresolved-reference]: Name `temp_undefined_var` used when not defined
+ --> temp_file.py:2:7
+ |
+ 2 | print(temp_undefined_var) # error: unresolved-reference
+ | ^^^^^^^^^^^^^^^^^^
+ |
+ info: rule `unresolved-reference` is enabled by default
+
+ Found 2 diagnostics
+
+ ----- stderr -----
+ WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors.
+ ");
+
+ // Test multiple exclude patterns
+ assert_cmd_snapshot!(case.command().arg("--exclude").arg("tests/").arg("--exclude").arg("temp_*.py"), @r"
+ success: false
+ exit_code: 1
+ ----- stdout -----
+ error[unresolved-reference]: Name `undefined_var` used when not defined
+ --> src/main.py:2:7
+ |
+ 2 | print(undefined_var) # error: unresolved-reference
+ | ^^^^^^^^^^^^^
+ |
+ info: rule `unresolved-reference` is enabled by default
+
+ Found 1 diagnostic
+
+ ----- stderr -----
+ WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors.
+ ");
+
+ Ok(())
+}
+
+/// Test configuration file include functionality
+#[test]
+fn configuration_include() -> anyhow::Result<()> {
+ let case = CliTest::with_files([
+ (
+ "src/main.py",
+ r#"
+ print(undefined_var) # error: unresolved-reference
+ "#,
+ ),
+ (
+ "tests/test_main.py",
+ r#"
+ print(another_undefined_var) # error: unresolved-reference
+ "#,
+ ),
+ (
+ "other.py",
+ r#"
+ print(other_undefined_var) # error: unresolved-reference
+ "#,
+ ),
+ ])?;
+
+ // Test include via configuration - should only check included files
+ case.write_file(
+ "ty.toml",
+ r#"
+ [src]
+ include = ["src"]
+ "#,
+ )?;
+
+ assert_cmd_snapshot!(case.command(), @r"
+ success: false
+ exit_code: 1
+ ----- stdout -----
+ error[unresolved-reference]: Name `undefined_var` used when not defined
+ --> src/main.py:2:7
+ |
+ 2 | print(undefined_var) # error: unresolved-reference
+ | ^^^^^^^^^^^^^
+ |
+ info: rule `unresolved-reference` is enabled by default
+
+ Found 1 diagnostic
+
+ ----- stderr -----
+ WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors.
+ ");
+
+ // Test multiple include patterns via configuration
+ case.write_file(
+ "ty.toml",
+ r#"
+ [src]
+ include = ["src", "other.py"]
+ "#,
+ )?;
+
+ assert_cmd_snapshot!(case.command(), @r"
+ success: false
+ exit_code: 1
+ ----- stdout -----
+ error[unresolved-reference]: Name `other_undefined_var` used when not defined
+ --> other.py:2:7
+ |
+ 2 | print(other_undefined_var) # error: unresolved-reference
+ | ^^^^^^^^^^^^^^^^^^^
+ |
+ info: rule `unresolved-reference` is enabled by default
+
+ error[unresolved-reference]: Name `undefined_var` used when not defined
+ --> src/main.py:2:7
+ |
+ 2 | print(undefined_var) # error: unresolved-reference
+ | ^^^^^^^^^^^^^
+ |
+ info: rule `unresolved-reference` is enabled by default
+
+ Found 2 diagnostics
+
+ ----- stderr -----
+ WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors.
+ ");
+
+ Ok(())
+}
+
+/// Test configuration file exclude functionality
+#[test]
+fn configuration_exclude() -> anyhow::Result<()> {
+ let case = CliTest::with_files([
+ (
+ "src/main.py",
+ r#"
+ print(undefined_var) # error: unresolved-reference
+ "#,
+ ),
+ (
+ "tests/test_main.py",
+ r#"
+ print(another_undefined_var) # error: unresolved-reference
+ "#,
+ ),
+ (
+ "temp_file.py",
+ r#"
+ print(temp_undefined_var) # error: unresolved-reference
+ "#,
+ ),
+ ])?;
+
+ // Test exclude via configuration
+ case.write_file(
+ "ty.toml",
+ r#"
+ [src]
+ exclude = ["tests/"]
+ "#,
+ )?;
+
+ assert_cmd_snapshot!(case.command(), @r"
+ success: false
+ exit_code: 1
+ ----- stdout -----
+ error[unresolved-reference]: Name `undefined_var` used when not defined
+ --> src/main.py:2:7
+ |
+ 2 | print(undefined_var) # error: unresolved-reference
+ | ^^^^^^^^^^^^^
+ |
+ info: rule `unresolved-reference` is enabled by default
+
+ error[unresolved-reference]: Name `temp_undefined_var` used when not defined
+ --> temp_file.py:2:7
+ |
+ 2 | print(temp_undefined_var) # error: unresolved-reference
+ | ^^^^^^^^^^^^^^^^^^
+ |
+ info: rule `unresolved-reference` is enabled by default
+
+ Found 2 diagnostics
+
+ ----- stderr -----
+ WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors.
+ ");
+
+ // Test multiple exclude patterns via configuration
+ case.write_file(
+ "ty.toml",
+ r#"
+ [src]
+ exclude = ["tests/", "temp_*.py"]
+ "#,
+ )?;
+
+ assert_cmd_snapshot!(case.command(), @r"
+ success: false
+ exit_code: 1
+ ----- stdout -----
+ error[unresolved-reference]: Name `undefined_var` used when not defined
+ --> src/main.py:2:7
+ |
+ 2 | print(undefined_var) # error: unresolved-reference
+ | ^^^^^^^^^^^^^
+ |
+ info: rule `unresolved-reference` is enabled by default
+
+ Found 1 diagnostic
+
+ ----- stderr -----
+ WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors.
+ ");
+
+ Ok(())
+}
+
+/// Test that exclude takes precedence over include in configuration
+#[test]
+fn exclude_precedence_over_include() -> anyhow::Result<()> {
+ let case = CliTest::with_files([
+ (
+ "src/main.py",
+ r#"
+ print(undefined_var) # error: unresolved-reference
+ "#,
+ ),
+ (
+ "src/test_helper.py",
+ r#"
+ print(helper_undefined_var) # error: unresolved-reference
+ "#,
+ ),
+ (
+ "other.py",
+ r#"
+ print(other_undefined_var) # error: unresolved-reference
+ "#,
+ ),
+ ])?;
+
+ // Include all src files but exclude test files - exclude should win
+ case.write_file(
+ "ty.toml",
+ r#"
+ [src]
+ include = ["src"]
+ exclude = ["test_*.py"]
+ "#,
+ )?;
+
+ assert_cmd_snapshot!(case.command(), @r"
+ success: false
+ exit_code: 1
+ ----- stdout -----
+ error[unresolved-reference]: Name `undefined_var` used when not defined
+ --> src/main.py:2:7
+ |
+ 2 | print(undefined_var) # error: unresolved-reference
+ | ^^^^^^^^^^^^^
+ |
+ info: rule `unresolved-reference` is enabled by default
+
+ Found 1 diagnostic
+
+ ----- stderr -----
+ WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors.
+ ");
+
+ Ok(())
+}
+
+/// Test that CLI exclude overrides configuration include
+#[test]
+fn exclude_argument_precedence_include_argument() -> anyhow::Result<()> {
+ let case = CliTest::with_files([
+ (
+ "src/main.py",
+ r#"
+ print(undefined_var) # error: unresolved-reference
+ "#,
+ ),
+ (
+ "tests/test_main.py",
+ r#"
+ print(another_undefined_var) # error: unresolved-reference
+ "#,
+ ),
+ (
+ "other.py",
+ r#"
+ print(other_undefined_var) # error: unresolved-reference
+ "#,
+ ),
+ ])?;
+
+ // Configuration includes all files, but CLI excludes tests
+ case.write_file(
+ "ty.toml",
+ r#"
+ [src]
+ include = ["src/", "tests/"]
+ "#,
+ )?;
+
+ assert_cmd_snapshot!(case.command().arg("--exclude").arg("tests/"), @r###"
+ success: false
+ exit_code: 1
+ ----- stdout -----
+ error[unresolved-reference]: Name `undefined_var` used when not defined
+ --> src/main.py:2:7
+ |
+ 2 | print(undefined_var) # error: unresolved-reference
+ | ^^^^^^^^^^^^^
+ |
+ info: rule `unresolved-reference` is enabled by default
+
+ Found 1 diagnostic
+
+ ----- stderr -----
+ WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors.
+ "###);
+
+ Ok(())
+}
+
+/// Test that default excludes can be removed using negated patterns
+#[test]
+fn remove_default_exclude() -> anyhow::Result<()> {
+ let case = CliTest::with_files([
+ (
+ "src/main.py",
+ r#"
+ print(undefined_var) # error: unresolved-reference
+ "#,
+ ),
+ (
+ "dist/generated.py",
+ r#"
+ print(another_undefined_var) # error: unresolved-reference
+ "#,
+ ),
+ ])?;
+
+ // By default, 'dist' directory should be excluded (see default excludes)
+ assert_cmd_snapshot!(case.command(), @r"
+ success: false
+ exit_code: 1
+ ----- stdout -----
+ error[unresolved-reference]: Name `undefined_var` used when not defined
+ --> src/main.py:2:7
+ |
+ 2 | print(undefined_var) # error: unresolved-reference
+ | ^^^^^^^^^^^^^
+ |
+ info: rule `unresolved-reference` is enabled by default
+
+ Found 1 diagnostic
+
+ ----- stderr -----
+ WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors.
+ ");
+
+ // Now override the default exclude by using a negated pattern to re-include 'dist'
+ case.write_file(
+ "ty.toml",
+ r#"
+ [src]
+ exclude = ["!dist"]
+ "#,
+ )?;
+
+ assert_cmd_snapshot!(case.command(), @r"
+ success: false
+ exit_code: 1
+ ----- stdout -----
+ error[unresolved-reference]: Name `another_undefined_var` used when not defined
+ --> dist/generated.py:2:7
+ |
+ 2 | print(another_undefined_var) # error: unresolved-reference
+ | ^^^^^^^^^^^^^^^^^^^^^
+ |
+ info: rule `unresolved-reference` is enabled by default
+
+ error[unresolved-reference]: Name `undefined_var` used when not defined
+ --> src/main.py:2:7
+ |
+ 2 | print(undefined_var) # error: unresolved-reference
+ | ^^^^^^^^^^^^^
+ |
+ info: rule `unresolved-reference` is enabled by default
+
+ Found 2 diagnostics
+
+ ----- stderr -----
+ WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors.
+ ");
+
+ Ok(())
+}
+
+/// Test that configuration excludes can be removed via CLI negation
+#[test]
+fn cli_removes_config_exclude() -> anyhow::Result<()> {
+ let case = CliTest::with_files([
+ (
+ "src/main.py",
+ r#"
+ print(undefined_var) # error: unresolved-reference
+ "#,
+ ),
+ (
+ "build/output.py",
+ r#"
+ print(build_undefined_var) # error: unresolved-reference
+ "#,
+ ),
+ ])?;
+
+ // Configuration excludes the build directory
+ case.write_file(
+ "ty.toml",
+ r#"
+ [src]
+ exclude = ["build/"]
+ "#,
+ )?;
+
+ // Verify that build/ is excluded by configuration
+ assert_cmd_snapshot!(case.command(), @r"
+ success: false
+ exit_code: 1
+ ----- stdout -----
+ error[unresolved-reference]: Name `undefined_var` used when not defined
+ --> src/main.py:2:7
+ |
+ 2 | print(undefined_var) # error: unresolved-reference
+ | ^^^^^^^^^^^^^
+ |
+ info: rule `unresolved-reference` is enabled by default
+
+ Found 1 diagnostic
+
+ ----- stderr -----
+ WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors.
+ ");
+
+ // Now remove the configuration exclude via CLI negation
+ assert_cmd_snapshot!(case.command().arg("--exclude").arg("!build/"), @r"
+ success: false
+ exit_code: 1
+ ----- stdout -----
+ error[unresolved-reference]: Name `build_undefined_var` used when not defined
+ --> build/output.py:2:7
+ |
+ 2 | print(build_undefined_var) # error: unresolved-reference
+ | ^^^^^^^^^^^^^^^^^^^
+ |
+ info: rule `unresolved-reference` is enabled by default
+
+ error[unresolved-reference]: Name `undefined_var` used when not defined
+ --> src/main.py:2:7
+ |
+ 2 | print(undefined_var) # error: unresolved-reference
+ | ^^^^^^^^^^^^^
+ |
+ info: rule `unresolved-reference` is enabled by default
+
+ Found 2 diagnostics
+
+ ----- stderr -----
+ WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors.
+ ");
+
+ Ok(())
+}
+
+/// Test behavior when explicitly checking a path that matches an exclude pattern
+#[test]
+fn explicit_path_overrides_exclude() -> anyhow::Result<()> {
+ let case = CliTest::with_files([
+ (
+ "src/main.py",
+ r#"
+ print(undefined_var) # error: unresolved-reference
+ "#,
+ ),
+ (
+ "tests/generated.py",
+ r#"
+ print(dist_undefined_var) # error: unresolved-reference
+ "#,
+ ),
+ (
+ "dist/other.py",
+ r#"
+ print(other_undefined_var) # error: unresolved-reference
+ "#,
+ ),
+ (
+ "ty.toml",
+ r#"
+ [src]
+ exclude = ["tests/generated.py"]
+ "#,
+ ),
+ ])?;
+
+ // dist is excluded by default and `tests/generated` is excluded in the project, so only src/main.py should be checked
+ assert_cmd_snapshot!(case.command(), @r"
+ success: false
+ exit_code: 1
+ ----- stdout -----
+ error[unresolved-reference]: Name `undefined_var` used when not defined
+ --> src/main.py:2:7
+ |
+ 2 | print(undefined_var) # error: unresolved-reference
+ | ^^^^^^^^^^^^^
+ |
+ info: rule `unresolved-reference` is enabled by default
+
+ Found 1 diagnostic
+
+ ----- stderr -----
+ WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors.
+ ");
+
+ // Explicitly checking a file in an excluded directory should still check that file
+ assert_cmd_snapshot!(case.command().arg("tests/generated.py"), @r"
+ success: false
+ exit_code: 1
+ ----- stdout -----
+ error[unresolved-reference]: Name `dist_undefined_var` used when not defined
+ --> tests/generated.py:2:7
+ |
+ 2 | print(dist_undefined_var) # error: unresolved-reference
+ | ^^^^^^^^^^^^^^^^^^
+ |
+ info: rule `unresolved-reference` is enabled by default
+
+ Found 1 diagnostic
+
+ ----- stderr -----
+ WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors.
+ ");
+
+ // Explicitly checking the entire excluded directory should check all files in it
+ assert_cmd_snapshot!(case.command().arg("dist/"), @r"
+ success: false
+ exit_code: 1
+ ----- stdout -----
+ error[unresolved-reference]: Name `other_undefined_var` used when not defined
+ --> dist/other.py:2:7
+ |
+ 2 | print(other_undefined_var) # error: unresolved-reference
+ | ^^^^^^^^^^^^^^^^^^^
+ |
+ info: rule `unresolved-reference` is enabled by default
+
+ Found 1 diagnostic
+
+ ----- stderr -----
+ WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors.
+ ");
+
+ Ok(())
+}
+
+#[test]
+fn invalid_include_pattern() -> anyhow::Result<()> {
+ let case = CliTest::with_files([
+ (
+ "src/main.py",
+ r#"
+ print(undefined_var) # error: unresolved-reference
+ "#,
+ ),
+ (
+ "ty.toml",
+ r#"
+ [src]
+ include = [
+ "src/**test/"
+ ]
+ "#,
+ ),
+ ])?;
+
+ // By default, dist/ is excluded, so only src/main.py should be checked
+ assert_cmd_snapshot!(case.command(), @r#"
+ success: false
+ exit_code: 2
+ ----- stdout -----
+
+ ----- stderr -----
+ WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors.
+ ty failed
+ Cause: error[invalid-glob]: Invalid include pattern
+ --> ty.toml:4:5
+ |
+ 2 | [src]
+ 3 | include = [
+ 4 | "src/**test/"
+ | ^^^^^^^^^^^^^ Too many stars at position 5 in glob: `src/**test/`
+ 5 | ]
+ |
+ "#);
+
+ Ok(())
+}
+
+#[test]
+fn invalid_include_pattern_concise_output() -> anyhow::Result<()> {
+ let case = CliTest::with_files([
+ (
+ "src/main.py",
+ r#"
+ print(undefined_var) # error: unresolved-reference
+ "#,
+ ),
+ (
+ "ty.toml",
+ r#"
+ [src]
+ include = [
+ "src/**test/"
+ ]
+ "#,
+ ),
+ ])?;
+
+ // By default, dist/ is excluded, so only src/main.py should be checked
+ assert_cmd_snapshot!(case.command().arg("--output-format").arg("concise"), @r"
+ success: false
+ exit_code: 2
+ ----- stdout -----
+
+ ----- stderr -----
+ WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors.
+ ty failed
+ Cause: error[invalid-glob] ty.toml:4:5: Invalid include pattern: Too many stars at position 5 in glob: `src/**test/`
+ ");
+
+ Ok(())
+}
+
+#[test]
+fn invalid_exclude_pattern() -> anyhow::Result<()> {
+ let case = CliTest::with_files([
+ (
+ "src/main.py",
+ r#"
+ print(undefined_var) # error: unresolved-reference
+ "#,
+ ),
+ (
+ "ty.toml",
+ r#"
+ [src]
+ exclude = [
+ "../src"
+ ]
+ "#,
+ ),
+ ])?;
+
+ // By default, dist/ is excluded, so only src/main.py should be checked
+ assert_cmd_snapshot!(case.command(), @r#"
+ success: false
+ exit_code: 2
+ ----- stdout -----
+
+ ----- stderr -----
+ WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors.
+ ty failed
+ Cause: error[invalid-glob]: Invalid exclude pattern
+ --> ty.toml:4:5
+ |
+ 2 | [src]
+ 3 | exclude = [
+ 4 | "../src"
+ | ^^^^^^^^ The parent directory operator (`..`) at position 1 is not allowed in glob: `../src`
+ 5 | ]
+ |
+ "#);
+
+ Ok(())
+}
diff --git a/crates/ty/tests/cli/main.rs b/crates/ty/tests/cli/main.rs
index 023e14cefc..d314b4731a 100644
--- a/crates/ty/tests/cli/main.rs
+++ b/crates/ty/tests/cli/main.rs
@@ -1,5 +1,6 @@
mod config_option;
mod exit_code;
+mod file_selection;
mod python_environment;
mod rule_selection;
diff --git a/crates/ty/tests/cli/rule_selection.rs b/crates/ty/tests/cli/rule_selection.rs
index 9269790e3a..d60c229263 100644
--- a/crates/ty/tests/cli/rule_selection.rs
+++ b/crates/ty/tests/cli/rule_selection.rs
@@ -254,12 +254,12 @@ fn configuration_unknown_rules() -> anyhow::Result<()> {
success: true
exit_code: 0
----- stdout -----
- warning[unknown-rule]
+ warning[unknown-rule]: Unknown lint rule `division-by-zer`
--> pyproject.toml:3:1
|
2 | [tool.ty.rules]
3 | division-by-zer = "warn" # incorrect rule name
- | ^^^^^^^^^^^^^^^ Unknown lint rule `division-by-zer`
+ | ^^^^^^^^^^^^^^^
|
Found 1 diagnostic
diff --git a/crates/ty_project/Cargo.toml b/crates/ty_project/Cargo.toml
index e8df92d566..707c9915ae 100644
--- a/crates/ty_project/Cargo.toml
+++ b/crates/ty_project/Cargo.toml
@@ -24,10 +24,15 @@ ty_python_semantic = { workspace = true, features = ["serde"] }
ty_vendored = { workspace = true }
anyhow = { workspace = true }
+camino = { workspace = true }
+colored = { workspace = true }
crossbeam = { workspace = true }
+globset = { workspace = true }
notify = { workspace = true }
pep440_rs = { workspace = true, features = ["version-ranges"] }
rayon = { workspace = true }
+regex = { workspace = true }
+regex-automata = { workspace = true }
rustc-hash = { workspace = true }
salsa = { workspace = true }
schemars = { workspace = true, optional = true }
diff --git a/crates/ty_project/src/db.rs b/crates/ty_project/src/db.rs
index 9af4e7e50e..ddcf8b95ab 100644
--- a/crates/ty_project/src/db.rs
+++ b/crates/ty_project/src/db.rs
@@ -70,7 +70,10 @@ impl ProjectDatabase {
let program_settings = project_metadata.to_program_settings(db.system());
Program::from_settings(&db, program_settings)?;
- db.project = Some(Project::from_metadata(&db, project_metadata));
+ db.project = Some(
+ Project::from_metadata(&db, project_metadata)
+ .map_err(|error| anyhow::anyhow!("{}", error.pretty(&db)))?,
+ );
Ok(db)
}
@@ -269,7 +272,7 @@ pub(crate) mod tests {
project: None,
};
- let project = Project::from_metadata(&db, project);
+ let project = Project::from_metadata(&db, project).unwrap();
db.project = Some(project);
db
}
diff --git a/crates/ty_project/src/db/changes.rs b/crates/ty_project/src/db/changes.rs
index de9dcd1b42..7a9c989821 100644
--- a/crates/ty_project/src/db/changes.rs
+++ b/crates/ty_project/src/db/changes.rs
@@ -9,6 +9,7 @@ use ruff_db::Db as _;
use ruff_db::files::{File, Files};
use ruff_db::system::SystemPath;
use rustc_hash::FxHashSet;
+use salsa::Setter;
use ty_python_semantic::Program;
/// Represents the result of applying changes to the project database.
@@ -113,14 +114,15 @@ impl ProjectDatabase {
// should be included in the project. We can skip this check for
// paths that aren't part of the project or shouldn't be included
// when checking the project.
- if project.is_path_included(self, &path) {
- if self.system().is_file(&path) {
+
+ if self.system().is_file(&path) {
+ if project.is_file_included(self, &path) {
// Add the parent directory because `walkdir` always visits explicitly passed files
// even if they match an exclude filter.
added_paths.insert(path.parent().unwrap().to_path_buf());
- } else {
- added_paths.insert(path);
}
+ } else if project.is_directory_included(self, &path) {
+ added_paths.insert(path);
}
}
@@ -153,7 +155,9 @@ impl ProjectDatabase {
result.custom_stdlib_changed = true;
}
- if project.is_path_included(self, &path) || path == project_root {
+ let directory_included = project.is_directory_included(self, &path);
+
+ if directory_included || path == project_root {
// TODO: Shouldn't it be enough to simply traverse the project files and remove all
// that start with the given path?
tracing::debug!(
@@ -165,6 +169,10 @@ impl ProjectDatabase {
// indexed files and remove the once that start with the same path, unless
// the deleted path is the project configuration.
result.project_changed = true;
+ } else if !directory_included {
+ tracing::debug!(
+ "Skipping reload because directory '{path}' isn't included in the project"
+ );
}
}
}
@@ -233,8 +241,22 @@ impl ProjectDatabase {
tracing::debug!("Reloading project after structural change");
project.reload(self, metadata);
} else {
- tracing::debug!("Replace project after structural change");
- project = Project::from_metadata(self, metadata);
+ match Project::from_metadata(self, metadata) {
+ Ok(new_project) => {
+ tracing::debug!("Replace project after structural change");
+ project = new_project;
+ }
+ Err(error) => {
+ tracing::error!(
+ "Keeping old project configuration because loading the new settings failed with: {error}"
+ );
+
+ project
+ .set_settings_diagnostics(self)
+ .to(vec![error.into_diagnostic()]);
+ }
+ }
+
self.project = Some(project);
}
}
diff --git a/crates/ty_project/src/glob.rs b/crates/ty_project/src/glob.rs
new file mode 100644
index 0000000000..2c93fc4716
--- /dev/null
+++ b/crates/ty_project/src/glob.rs
@@ -0,0 +1,89 @@
+use ruff_db::system::SystemPath;
+
+pub(crate) use exclude::{ExcludeFilter, ExcludeFilterBuilder};
+pub(crate) use include::{IncludeFilter, IncludeFilterBuilder};
+pub(crate) use portable::{AbsolutePortableGlobPattern, PortableGlobError, PortableGlobPattern};
+
+mod exclude;
+mod include;
+mod portable;
+
+/// Path filtering based on an an exclude and include glob pattern set.
+///
+/// Exclude patterns take precedence over includes.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct IncludeExcludeFilter {
+ include: IncludeFilter,
+ exclude: ExcludeFilter,
+}
+
+impl IncludeExcludeFilter {
+ pub(crate) fn new(include: IncludeFilter, exclude: ExcludeFilter) -> Self {
+ Self { include, exclude }
+ }
+
+ /// Returns whether this directory is included in this filter.
+ ///
+ /// Note, this function never returns [`IncludeResult::Included`] for a path that is not included or excluded.
+ /// However, it may return [`IncludeResult::Included`] for directories that are not excluded, but where
+ /// it requires traversal to decide if any of its subdirectories or files are included. This, for example,
+ /// is the case when using wildcard include-patterns like `**/test`. Prefix wildcards require to traverse `src`
+ /// because it can't be known ahead of time whether it contains a `test` directory or file.
+ pub(crate) fn is_directory_maybe_included(
+ &self,
+ path: &SystemPath,
+ mode: GlobFilterCheckMode,
+ ) -> IncludeResult {
+ if self.exclude.match_directory(path, mode) {
+ IncludeResult::Excluded
+ } else if self.include.match_directory(path) {
+ IncludeResult::Included
+ } else {
+ IncludeResult::NotIncluded
+ }
+ }
+
+ pub(crate) fn is_file_included(
+ &self,
+ path: &SystemPath,
+ mode: GlobFilterCheckMode,
+ ) -> IncludeResult {
+ if self.exclude.match_file(path, mode) {
+ IncludeResult::Excluded
+ } else if self.include.match_file(path) {
+ IncludeResult::Included
+ } else {
+ IncludeResult::NotIncluded
+ }
+ }
+}
+
+#[derive(Copy, Clone, Eq, PartialEq)]
+pub(crate) enum GlobFilterCheckMode {
+ /// The paths are checked top-to-bottom and inclusion is determined
+ /// for each path during the traversal.
+ TopDown,
+
+ /// An adhoc test if a single file or directory is included.
+ ///
+ /// This is more expensive than a [`Self::TopDown`] check
+ /// because it may require testing every ancestor path in addition to the
+ /// path itself to ensure no ancestor path matches an exclude rule.
+ Adhoc,
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub(crate) enum IncludeResult {
+ /// The path matches or at least is a prefix of an include pattern.
+ ///
+ /// For directories: This isn't a guarantee that any file in this directory gets included
+ /// but we need to traverse it to make this decision.
+ Included,
+
+ /// The path matches an exclude pattern.
+ Excluded,
+
+ /// The path matches neither an include nor an exclude pattern and, therefore,
+ /// isn't included.
+ NotIncluded,
+}
diff --git a/crates/ty_project/src/glob/exclude.rs b/crates/ty_project/src/glob/exclude.rs
new file mode 100644
index 0000000000..1e6813a00c
--- /dev/null
+++ b/crates/ty_project/src/glob/exclude.rs
@@ -0,0 +1,285 @@
+//! Exclude filter supporting gitignore-like globs.
+//!
+//! * `src` excludes a file or directory named `src` anywhere in the path.
+//! * `/src/` excludes a directory named `src` at the root of the path.
+//! * `/src` excludes a directory or file named `src` at the root of the path.
+//! * `/src/**` excludes all files and directories inside a directory named `src` but not `src` itself.
+//! * `!src` allows a file or directory named `src` anywhere in the path
+
+use std::sync::Arc;
+
+use globset::{Candidate, GlobBuilder, GlobSet, GlobSetBuilder};
+use regex_automata::util::pool::Pool;
+use ruff_db::system::SystemPath;
+
+use crate::GlobFilterCheckMode;
+use crate::glob::portable::AbsolutePortableGlobPattern;
+
+/// A filter for gitignore-like globs that excludes files and directories.
+///
+/// # Equality
+///
+/// Two filters are equal if they're constructed from the same patterns (including order).
+/// Two filters that exclude the exact same files but were constructed from different patterns aren't considered
+/// equal.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) struct ExcludeFilter {
+ ignore: Gitignore,
+}
+
+impl ExcludeFilter {
+ /// Returns `true` if the path to a directory is definitely excluded and `false` otherwise.
+ pub(crate) fn match_directory(&self, path: &SystemPath, mode: GlobFilterCheckMode) -> bool {
+ self.matches(path, mode, true)
+ }
+
+ /// Returns `true` if the path to a file is definitely excluded and `false` otherwise.
+ pub(crate) fn match_file(&self, path: &SystemPath, mode: GlobFilterCheckMode) -> bool {
+ self.matches(path, mode, false)
+ }
+
+ fn matches(&self, path: &SystemPath, mode: GlobFilterCheckMode, directory: bool) -> bool {
+ match mode {
+ GlobFilterCheckMode::TopDown => {
+ match self.ignore.matched(path, directory) {
+ // No hit or an allow hit means the file or directory is not excluded.
+ Match::None | Match::Allow => false,
+ Match::Ignore => true,
+ }
+ }
+ GlobFilterCheckMode::Adhoc => {
+ for ancestor in path.ancestors() {
+ match self.ignore.matched(ancestor, directory) {
+ // If the path is allowlisted or there's no hit, try the parent to ensure we don't return false
+ // for a folder where there's an exclude for a parent.
+ Match::None | Match::Allow => {}
+ Match::Ignore => return true,
+ }
+ }
+
+ false
+ }
+ }
+ }
+}
+
+pub(crate) struct ExcludeFilterBuilder {
+ ignore: GitignoreBuilder,
+}
+
+impl ExcludeFilterBuilder {
+ pub(crate) fn new() -> Self {
+ Self {
+ ignore: GitignoreBuilder::new(),
+ }
+ }
+
+ pub(crate) fn add(
+ &mut self,
+ pattern: &AbsolutePortableGlobPattern,
+ ) -> Result<&mut Self, globset::Error> {
+ self.ignore.add(pattern)?;
+
+ Ok(self)
+ }
+
+ pub(crate) fn build(self) -> Result {
+ Ok(ExcludeFilter {
+ ignore: self.ignore.build()?,
+ })
+ }
+}
+
+/// Matcher for gitignore like globs.
+///
+/// This code is our own vendored copy of the ignore's crate `Gitignore` type.
+/// The main difference to `ignore`'s version is that it makes use
+/// of the fact that all our globs are absolute. This simplifies the implementation a fair bit.
+/// Making globs absolute is also because the globs can come from both the CLI and configuration files,
+/// where the paths are anchored relative to the current working directory or the project root respectively.
+///
+/// Vendoring our own copy has the added benefit that we don't need to deal with ignore's `Error` type.
+/// Instead, we can exclusively use [`globset::Error`].
+///
+/// This implementation also removes supported for comments, because the patterns aren't read
+/// from a `.gitignore` file. This removes the need to escape `#` for file names starting with `#`,
+///
+/// You can find the original source on [GitHub](https://github.com/BurntSushi/ripgrep/blob/cbc598f245f3c157a872b69102653e2e349b6d92/crates/ignore/src/gitignore.rs#L81).
+///
+/// # Equality
+///
+/// Two ignore matches are only equal if they're constructed from the same patterns (including order).
+/// Two matchers that were constructed from different patterns but result in
+/// including the same files don't compare equal.
+#[derive(Clone)]
+struct Gitignore {
+ set: GlobSet,
+ globs: Vec,
+ matches: Option>>>,
+}
+
+impl Gitignore {
+ /// Returns whether the given path (file or directory) matched a pattern in
+ /// this gitignore matcher.
+ ///
+ /// `is_dir` should be true if the path refers to a directory and false
+ /// otherwise.
+ ///
+ /// The path must be absolute or it will only match prefix-wildcard patterns.
+ fn matched(&self, path: &SystemPath, is_dir: bool) -> Match {
+ if self.globs.is_empty() {
+ return Match::None;
+ }
+
+ let mut matches = self.matches.as_ref().unwrap().get();
+ let candidate = Candidate::new(path);
+ self.set.matches_candidate_into(&candidate, &mut matches);
+ for &i in matches.iter().rev() {
+ let glob = &self.globs[i];
+ if !glob.is_only_dir || is_dir {
+ return if glob.is_ignore() {
+ Match::Ignore
+ } else {
+ Match::Allow
+ };
+ }
+ }
+ Match::None
+ }
+}
+
+impl std::fmt::Debug for Gitignore {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ f.debug_struct("Gitignore")
+ .field("globs", &self.globs)
+ .finish_non_exhaustive()
+ }
+}
+
+impl PartialEq for Gitignore {
+ fn eq(&self, other: &Self) -> bool {
+ self.globs == other.globs
+ }
+}
+
+impl Eq for Gitignore {}
+
+#[derive(Copy, Clone, Debug)]
+enum Match {
+ /// The path matches no pattern.
+ None,
+
+ /// The path matches an ignore pattern (a positive pattern)
+ /// It should be ignored.
+ Ignore,
+
+ /// The path matches an allow pattern (a negative pattern).
+ /// It should not be ignored.
+ Allow,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+struct IgnoreGlob {
+ /// The pattern that was originally parsed.
+ original: String,
+
+ /// This is a pattern allowing a path (it starts with a `!`, possibly undoing a previous ignore)
+ is_allow: bool,
+
+ /// Whether this pattern only matches directories.
+ is_only_dir: bool,
+}
+
+impl IgnoreGlob {
+ const fn is_ignore(&self) -> bool {
+ !self.is_allow
+ }
+}
+
+/// Builds a matcher for git-ignore like globs.
+///
+/// All globs need to use absolute paths, unless they're unanchored (contain no `/`).
+#[derive(Clone, Debug)]
+struct GitignoreBuilder {
+ builder: GlobSetBuilder,
+ globs: Vec,
+}
+
+impl GitignoreBuilder {
+ /// Create a new builder for a gitignore file.
+ fn new() -> GitignoreBuilder {
+ GitignoreBuilder {
+ builder: GlobSetBuilder::new(),
+ globs: vec![],
+ }
+ }
+
+ /// Builds a new matcher from the globs added so far.
+ ///
+ /// Once a matcher is built, no new globs can be added to it.
+ fn build(&self) -> Result {
+ let set = self.builder.build()?;
+
+ Ok(Gitignore {
+ set,
+ globs: self.globs.clone(),
+ matches: Some(Arc::new(Pool::new(Vec::new))),
+ })
+ }
+
+ /// Adds a gitignore like glob pattern to this builder.
+ ///
+ /// If the pattern could not be parsed as a glob, then an error is returned.
+ fn add(&mut self, mut pattern: &str) -> Result<&mut GitignoreBuilder, globset::Error> {
+ let mut glob = IgnoreGlob {
+ original: pattern.to_string(),
+ is_allow: false,
+ is_only_dir: false,
+ };
+
+ // File names starting with `!` are escaped with a backslash. Strip the backslash.
+ // This is not a negated pattern!
+ if pattern.starts_with("\\!") {
+ pattern = &pattern[1..];
+ } else if let Some(after) = pattern.strip_prefix("!") {
+ glob.is_allow = true;
+ pattern = after;
+ }
+
+ // If it ends with a slash, then this should only match directories,
+ // but the slash should otherwise not be used while globbing.
+ if let Some(before) = pattern.strip_suffix('/') {
+ glob.is_only_dir = true;
+ pattern = before;
+ }
+
+ let mut actual = pattern.to_string();
+
+ // If there is a literal slash, then this is a glob that must match the
+ // entire path name. Otherwise, we should let it match anywhere, so use
+ // a **/ prefix.
+ if !pattern.chars().any(|c| c == '/') {
+ // ... but only if we don't already have a **/ prefix.
+ if !pattern.starts_with("**/") {
+ actual = format!("**/{actual}");
+ }
+ }
+ // If the glob ends with `/**`, then we should only match everything
+ // inside a directory, but not the directory itself. Standard globs
+ // will match the directory. So we add `/*` to force the issue.
+ if actual.ends_with("/**") {
+ actual = format!("{actual}/*");
+ }
+
+ let parsed = GlobBuilder::new(&actual)
+ .literal_separator(true)
+ // No need to support Windows-style paths, so the backslash can be used an escape.
+ .backslash_escape(true)
+ .build()?;
+
+ self.builder.add(parsed);
+ self.globs.push(glob);
+
+ Ok(self)
+ }
+}
diff --git a/crates/ty_project/src/glob/include.rs b/crates/ty_project/src/glob/include.rs
new file mode 100644
index 0000000000..b6365cf627
--- /dev/null
+++ b/crates/ty_project/src/glob/include.rs
@@ -0,0 +1,399 @@
+use globset::{Glob, GlobBuilder, GlobSet, GlobSetBuilder};
+use regex_automata::dfa;
+use regex_automata::dfa::Automaton;
+use ruff_db::system::SystemPath;
+use std::path::{MAIN_SEPARATOR, MAIN_SEPARATOR_STR};
+use tracing::warn;
+
+use crate::glob::portable::AbsolutePortableGlobPattern;
+
+/// Chosen at a whim -Konsti
+const DFA_SIZE_LIMIT: usize = 1_000_000;
+
+/// Path filter based on a set of include globs.
+///
+/// The patterns are similar to gitignore, but reversed:
+///
+/// * `/src`: matches a file or directory with its content named `src`
+/// * `/src/`: matches a directory with its content named `src`
+/// * `/src/**` or `/src/*`: matches the content of `src`, but not a file named `src`
+///
+/// Negated patterns are not supported.
+///
+/// Internally, the globs are converted to a regex and then to a DFA, which unlike the globs and the
+/// regex allows to check for prefix matches.
+///
+/// ## Equality
+/// Equality is based on the patterns from which a filter was constructed.
+///
+/// Because of that, two filters that include the exact same files but were
+/// constructed from different patterns (or even just order) compare unequal.
+#[derive(Clone)]
+pub(crate) struct IncludeFilter {
+ glob_set: GlobSet,
+ original_patterns: Box<[String]>,
+ dfa: Option>>,
+}
+
+impl IncludeFilter {
+ /// Whether the file matches any of the globs.
+ pub(crate) fn match_file(&self, path: impl AsRef) -> bool {
+ let path = path.as_ref();
+
+ self.glob_set.is_match(path)
+ }
+
+ /// Check whether a directory or any of its children can be matched by any of the globs.
+ ///
+ /// This never returns `false` if any child matches, but it may return `true` even if we
+ /// don't end up including any child.
+ pub(crate) fn match_directory(&self, path: impl AsRef) -> bool {
+ self.match_directory_impl(path.as_ref())
+ }
+
+ fn match_directory_impl(&self, path: &SystemPath) -> bool {
+ let Some(dfa) = &self.dfa else {
+ return true;
+ };
+
+ // Allow the root path
+ if path == SystemPath::new("") {
+ return true;
+ }
+
+ let config_anchored =
+ regex_automata::util::start::Config::new().anchored(regex_automata::Anchored::Yes);
+ let mut state = dfa.start_state(&config_anchored).unwrap();
+
+ let byte_path = path
+ .as_str()
+ .strip_suffix('/')
+ .unwrap_or(path.as_str())
+ .as_bytes();
+ for b in byte_path {
+ state = dfa.next_state(state, *b);
+ }
+ // Say we're looking at a directory `foo/bar`. We want to continue if either `foo/bar` is
+ // a match, e.g., from `foo/*`, or a path below it can match, e.g., from `foo/bar/*`.
+ let eoi_state = dfa.next_eoi_state(state);
+ // We must not call `next_eoi_state` on the slash state, we want to only check if more
+ // characters (path components) are allowed, not if we're matching the `$` anchor at the
+ // end.
+ let slash_state = dfa.next_state(state, u8::try_from(MAIN_SEPARATOR).unwrap());
+
+ debug_assert!(
+ !dfa.is_quit_state(eoi_state) && !dfa.is_quit_state(slash_state),
+ "matcher is in quit state"
+ );
+
+ dfa.is_match_state(eoi_state) || !dfa.is_dead_state(slash_state)
+ }
+}
+
+impl std::fmt::Debug for IncludeFilter {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ f.debug_struct("IncludeFilder")
+ .field("original_patterns", &self.original_patterns)
+ .finish_non_exhaustive()
+ }
+}
+
+impl PartialEq for IncludeFilter {
+ fn eq(&self, other: &Self) -> bool {
+ self.original_patterns == other.original_patterns
+ }
+}
+
+impl Eq for IncludeFilter {}
+
+#[derive(Debug)]
+pub(crate) struct IncludeFilterBuilder {
+ set: GlobSetBuilder,
+ original_pattern: Vec,
+ regexes: Vec,
+}
+
+impl IncludeFilterBuilder {
+ pub(crate) fn new() -> Self {
+ Self {
+ set: GlobSetBuilder::new(),
+ original_pattern: Vec::new(),
+ regexes: Vec::new(),
+ }
+ }
+
+ /// Adds an include pattern to the filter.
+ pub(crate) fn add(
+ &mut self,
+ input: &AbsolutePortableGlobPattern,
+ ) -> Result<&mut Self, globset::Error> {
+ let mut glob = &**input;
+
+ let mut only_directory = false;
+
+ // A pattern ending with a `/` should only match directories. E.g. `src/` only matches directories
+ // whereas `src` matches both files and directories.
+ // We need to remove the `/` to ensure that a path missing the trailing `/` matches.
+ if let Some(after) = input.strip_suffix('/') {
+ // Escaped `/` or `\` aren't allowed. `portable_glob::parse` will error
+ only_directory = true;
+ glob = after;
+ }
+
+ // If regex ends with `/**`, only push that one glob and regex
+ // Otherwise, push two regex, one for `/**` and one for without
+ let glob = GlobBuilder::new(glob)
+ .literal_separator(true)
+ // No need to support Windows-style paths, so the backslash can be used a escape.
+ .backslash_escape(true)
+ .build()?;
+ self.original_pattern.push(input.to_string());
+
+ // `lib` is the same as `lib/**`
+ // Add a glob that matches `lib` exactly, change the glob to `lib/**`.
+ if input.ends_with("**") {
+ self.push_prefix_regex(&glob);
+ self.set.add(glob);
+ } else {
+ let prefix_glob = GlobBuilder::new(&format!("{glob}/**"))
+ .literal_separator(true)
+ // No need to support Windows-style paths, so the backslash can be used a escape.
+ .backslash_escape(true)
+ .build()?;
+
+ self.push_prefix_regex(&prefix_glob);
+ self.set.add(prefix_glob);
+
+ // The reason we add the exact glob, e.g. `src` when the original pattern was `src/` is
+ // so that `match_file` returns true when matching against a file. However, we don't
+ // need to do this if this is a pattern that should only match a directory (specifically, its contents).
+ if !only_directory {
+ self.set.add(glob);
+ }
+ }
+
+ Ok(self)
+ }
+
+ fn push_prefix_regex(&mut self, glob: &Glob) {
+ let main_separator = regex::escape(MAIN_SEPARATOR_STR);
+
+ let regex = glob
+ .regex()
+ // We are using a custom DFA builder
+ .strip_prefix("(?-u)")
+ .expect("a glob is a non-unicode byte regex")
+ // Match windows paths if applicable
+ .replace('/', &main_separator);
+
+ self.regexes.push(regex);
+ }
+
+ /// The filter matches if any of the globs matches.
+ ///
+ /// See for the error returned.
+ pub(crate) fn build(self) -> Result {
+ let glob_set = self.set.build()?;
+
+ let dfa_builder = dfa::dense::Builder::new()
+ .syntax(
+ // The glob regex is a byte matcher
+ regex_automata::util::syntax::Config::new()
+ .unicode(false)
+ .utf8(false),
+ )
+ .configure(
+ dfa::dense::Config::new()
+ .start_kind(dfa::StartKind::Anchored)
+ // DFA can grow exponentially, in which case we bail out
+ .dfa_size_limit(Some(DFA_SIZE_LIMIT))
+ .determinize_size_limit(Some(DFA_SIZE_LIMIT)),
+ )
+ .build_many(&self.regexes);
+ let dfa = if let Ok(dfa) = dfa_builder {
+ Some(dfa)
+ } else {
+ // TODO(konsti): `regex_automata::dfa::dense::BuildError` should allow asking whether
+ // is a size error
+ warn!(
+ "Glob expressions regex is larger than {DFA_SIZE_LIMIT} bytes, \
+ falling back to full directory traversal!"
+ );
+ None
+ };
+
+ Ok(IncludeFilter {
+ glob_set,
+ dfa,
+ original_patterns: self.original_pattern.into(),
+ })
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use std::path::{MAIN_SEPARATOR, MAIN_SEPARATOR_STR};
+
+ use crate::glob::PortableGlobPattern;
+ use crate::glob::include::{IncludeFilter, IncludeFilterBuilder};
+ use ruff_db::system::{MemoryFileSystem, walk_directory::WalkState};
+
+ fn create_filter(patterns: impl IntoIterator- ) -> IncludeFilter {
+ let mut builder = IncludeFilterBuilder::new();
+ for pattern in patterns {
+ builder
+ .add(
+ &PortableGlobPattern::parse(pattern, false)
+ .unwrap()
+ .into_absolute(""),
+ )
+ .unwrap();
+ }
+
+ builder.build().unwrap()
+ }
+
+ fn setup_files(files: impl IntoIterator
- ) -> MemoryFileSystem {
+ let fs = MemoryFileSystem::new();
+
+ fs.write_files_all(files.into_iter().map(|name| (name, "")))
+ .unwrap();
+ fs
+ }
+
+ #[track_caller]
+ fn assert_match_directory(filter: &IncludeFilter, path: &str) {
+ assert!(filter.match_directory(path.replace('/', MAIN_SEPARATOR_STR)));
+ }
+
+ #[track_caller]
+ fn assert_not_match_directory(filter: &IncludeFilter, path: &str) {
+ assert!(!filter.match_directory(path.replace('/', MAIN_SEPARATOR_STR)));
+ }
+
+ #[test]
+ fn match_directory() {
+ // `lib` is the same as `src/**`. It includes a file or directory (including its contents)
+ // `src/*`: The same as `src/**`
+ let filter = create_filter(["lib", "src/*", "tests/**", "a/test-*/b", "files/*.py"]);
+
+ assert_match_directory(&filter, "lib");
+ assert_match_directory(&filter, "lib/more/test");
+
+ assert_match_directory(&filter, "src");
+ assert_match_directory(&filter, "src/more/test");
+
+ assert_match_directory(&filter, "tests");
+ assert_match_directory(&filter, "tests/more/test");
+
+ assert_match_directory(&filter, "a");
+ assert_match_directory(&filter, "a/test-b");
+
+ assert_not_match_directory(&filter, "a/test-b/x");
+ assert_not_match_directory(&filter, "a/test");
+
+ assert_match_directory(&filter, "files/a.py");
+ assert_match_directory(&filter, "files/a.py/bcd");
+
+ assert_not_match_directory(&filter, "not_included");
+ assert_not_match_directory(&filter, "files/a.pi");
+ }
+
+ #[test]
+ fn match_file() {
+ // `lib` is the same as `src/**`. It includes a file or directory (including its contents)
+ // `src/*`: The same as `src/**`
+ let filter = create_filter([
+ "lib",
+ "src/*",
+ "directory/",
+ "tests/**",
+ "a/test-*/b",
+ "files/*.py",
+ ]);
+
+ assert!(filter.match_file("lib"));
+ assert!(filter.match_file("lib/more/test"));
+
+ // Unlike `directory`, `directory/` only includes a directory with the given name and its contents
+ assert!(!filter.match_file("directory"));
+ assert!(filter.match_file("directory/more/test"));
+
+ // Unlike `src`, `src/*` only includes a directory with the given name.
+ assert!(!filter.match_file("src"));
+ assert!(filter.match_file("src/more/test"));
+
+ // Unlike `tests`, `tests/**` only includes files under `tests`, but not a file named tests
+ assert!(!filter.match_file("tests"));
+ assert!(filter.match_file("tests/more/test"));
+
+ // Unlike `match_directory`, prefixes should not be included.
+ assert!(!filter.match_file("a"));
+ assert!(!filter.match_file("a/test-b"));
+
+ assert!(!filter.match_file("a/test-b/x"));
+ assert!(!filter.match_file("a/test"));
+
+ assert!(filter.match_file("files/a.py"));
+ assert!(filter.match_file("files/a.py/bcd"));
+
+ assert!(!filter.match_file("not_included"));
+ assert!(!filter.match_file("files/a.pi"));
+ }
+
+ /// Check that we skip directories that can never match.
+ #[test]
+ fn prefilter() {
+ let filter = create_filter(["/a/b/test-*/d", "/a/b/c/e", "/b/c"]);
+ let fs = setup_files([
+ // Should visit
+ "/a/b/test-a/d",
+ "/a/b/c/e",
+ "/b/c",
+ // Can skip
+ "/d/e",
+ "/a/b/x/f",
+ ]);
+
+ let visited = std::sync::Mutex::new(Vec::new());
+
+ // Test the prefix filtering
+ fs.walk_directory("/").run(|| {
+ Box::new(|entry| {
+ let entry = entry.unwrap();
+
+ if entry.file_type().is_directory() {
+ if !filter.match_directory(entry.path()) {
+ return WalkState::Skip;
+ }
+ }
+
+ visited
+ .lock()
+ .unwrap()
+ .push(entry.path().as_str().replace(MAIN_SEPARATOR, "/"));
+
+ WalkState::Continue
+ })
+ });
+
+ let mut visited = visited.into_inner().unwrap();
+ visited.sort();
+
+ // Assert that it didn't traverse into `/d` or `/a/b/x`
+ assert_eq!(
+ visited,
+ [
+ "/",
+ "/a",
+ "/a/b",
+ "/a/b/c",
+ "/a/b/c/e",
+ "/a/b/test-a",
+ "/a/b/test-a/d",
+ "/b",
+ "/b/c"
+ ]
+ );
+ }
+}
diff --git a/crates/ty_project/src/glob/portable.rs b/crates/ty_project/src/glob/portable.rs
new file mode 100644
index 0000000000..8de8d91188
--- /dev/null
+++ b/crates/ty_project/src/glob/portable.rs
@@ -0,0 +1,407 @@
+//! Cross-language glob syntax from
+//! [PEP 639](https://packaging.python.org/en/latest/specifications/glob-patterns/).
+//!
+//! The glob syntax matches the `uv` variant of uv's `uv-globfilter` crate.
+//! We intentionally use the same syntax to give users a consistent experience
+//! across our tools.
+//!
+//! [Source](https://github.com/astral-sh/uv/blob/main/crates/uv-globfilter/src/portable_glob.rs)
+
+use ruff_db::system::SystemPath;
+use std::ops::Deref;
+use std::{fmt::Write, path::MAIN_SEPARATOR};
+use thiserror::Error;
+
+/// Pattern that only uses cross-language glob syntax based on [PEP 639](https://packaging.python.org/en/latest/specifications/glob-patterns/):
+///
+/// - Alphanumeric characters, underscores (`_`), hyphens (`-`) and dots (`.`) are matched verbatim.
+/// - The special glob characters are:
+/// - `*`: Matches any number of characters except path separators
+/// - `?`: Matches a single character except the path separator
+/// - `**`: Matches any number of characters including path separators
+/// - `[]`, containing only the verbatim matched characters: Matches a single of the characters contained. Within
+/// `[...]`, the hyphen indicates a locale-agnostic range (e.g. `a-z`, order based on Unicode code points). Hyphens at
+/// the start or end are matched literally.
+/// - `\`: It escapes the following character to be matched verbatim (extension to PEP 639).
+/// - The path separator is the forward slash character (`/`). Patterns are relative to the given directory, a leading slash
+/// character for absolute paths is not supported.
+/// - Parent directory indicators (`..`) are not allowed.
+///
+/// These rules mean that matching the backslash (`\`) is forbidden, which avoid collisions with the windows path separator.
+#[derive(Clone, Debug, Eq, PartialEq, Hash)]
+pub(crate) struct PortableGlobPattern<'a> {
+ pattern: &'a str,
+ is_exclude: bool,
+}
+
+impl<'a> PortableGlobPattern<'a> {
+ /// Parses a portable glob pattern. Returns an error if the pattern isn't valid.
+ pub(crate) fn parse(glob: &'a str, is_exclude: bool) -> Result {
+ let mut chars = glob.chars().enumerate().peekable();
+
+ if is_exclude {
+ chars.next_if(|(_, c)| *c == '!');
+ }
+
+ // A `..` is on a parent directory indicator at the start of the string or after a directory
+ // separator.
+ let mut start_or_slash = true;
+ // The number of consecutive stars before the current character.
+ while let Some((offset, c)) = chars.next() {
+ let pos = offset + 1;
+
+ // `***` or `**literals` can be correctly represented with less stars. They are banned by
+ // `glob`, they are allowed by `globset` and PEP 639 is ambiguous, so we're filtering them
+ // out.
+ if c == '*' {
+ let mut star_run = 1;
+ while let Some((_, c)) = chars.peek() {
+ if *c == '*' {
+ star_run += 1;
+ chars.next();
+ } else {
+ break;
+ }
+ }
+ if star_run >= 3 {
+ return Err(PortableGlobError::TooManyStars {
+ glob: glob.to_string(),
+ // We don't update pos for the stars.
+ pos,
+ });
+ } else if star_run == 2 {
+ if chars.peek().is_some_and(|(_, c)| *c != '/') {
+ return Err(PortableGlobError::TooManyStars {
+ glob: glob.to_string(),
+ // We don't update pos for the stars.
+ pos,
+ });
+ }
+ }
+ start_or_slash = false;
+ } else if c.is_alphanumeric() || matches!(c, '_' | '-' | '?') {
+ start_or_slash = false;
+ } else if c == '.' {
+ if start_or_slash && matches!(chars.peek(), Some((_, '.'))) {
+ return Err(PortableGlobError::ParentDirectory {
+ pos,
+ glob: glob.to_string(),
+ });
+ }
+ start_or_slash = false;
+ } else if c == '/' {
+ start_or_slash = true;
+ } else if c == '[' {
+ for (pos, c) in chars.by_ref() {
+ if c.is_alphanumeric() || matches!(c, '_' | '-' | '.') {
+ // Allowed.
+ } else if c == ']' {
+ break;
+ } else {
+ return Err(PortableGlobError::InvalidCharacterRange {
+ glob: glob.to_string(),
+ pos,
+ invalid: InvalidChar(c),
+ });
+ }
+ }
+ start_or_slash = false;
+ } else if c == '\\' {
+ match chars.next() {
+ Some((pos, '/' | '\\')) => {
+ // For cross-platform compatibility, we don't allow forward slashes or
+ // backslashes to be escaped.
+ return Err(PortableGlobError::InvalidEscapee {
+ glob: glob.to_string(),
+ pos,
+ });
+ }
+ Some(_) => {
+ // Escaped character
+ }
+ None => {
+ return Err(PortableGlobError::TrailingEscape {
+ glob: glob.to_string(),
+ pos,
+ });
+ }
+ }
+ } else {
+ return Err(PortableGlobError::InvalidCharacter {
+ glob: glob.to_string(),
+ pos,
+ invalid: InvalidChar(c),
+ });
+ }
+ }
+ Ok(PortableGlobPattern {
+ pattern: glob,
+ is_exclude,
+ })
+ }
+
+ /// Anchors pattern at `cwd`.
+ ///
+ /// `is_exclude` indicates whether this is a pattern in an exclude filter.
+ ///
+ /// This method similar to [`SystemPath::absolute`] but for a glob pattern.
+ /// The main difference is that this method always uses `/` as path separator.
+ pub(crate) fn into_absolute(self, cwd: impl AsRef) -> AbsolutePortableGlobPattern {
+ let mut pattern = self.pattern;
+ let mut negated = false;
+
+ if self.is_exclude {
+ // If the pattern starts with `!`, we need to remove it and then anchor the rest.
+ if let Some(after) = self.pattern.strip_prefix('!') {
+ pattern = after;
+ negated = true;
+ }
+
+ // Patterns that don't contain any `/`, e.g. `.venv` are unanchored patterns
+ // that match anywhere.
+ if !self.chars().any(|c| c == '/') {
+ return AbsolutePortableGlobPattern(self.to_string());
+ }
+ }
+
+ if pattern.starts_with('/') {
+ return AbsolutePortableGlobPattern(pattern.to_string());
+ }
+
+ let mut rest = pattern;
+ let mut prefix = cwd.as_ref().to_path_buf().into_utf8_path_buf();
+
+ loop {
+ if let Some(after) = rest.strip_prefix("./") {
+ rest = after;
+ } else if let Some(after) = rest.strip_prefix("../") {
+ prefix.pop();
+ rest = after;
+ } else {
+ break;
+ }
+ }
+
+ let mut output = String::with_capacity(prefix.as_str().len() + rest.len());
+
+ for component in prefix.components() {
+ match component {
+ camino::Utf8Component::Prefix(utf8_prefix_component) => {
+ output.push_str(&utf8_prefix_component.as_str().replace(MAIN_SEPARATOR, "/"));
+ }
+
+ camino::Utf8Component::RootDir => {
+ output.push('/');
+ continue;
+ }
+ camino::Utf8Component::CurDir => {}
+ camino::Utf8Component::ParentDir => output.push_str("../"),
+ camino::Utf8Component::Normal(component) => {
+ output.push_str(component);
+ output.push('/');
+ }
+ }
+ }
+
+ output.push_str(rest);
+ if negated {
+ // If the pattern is negated, we need to keep the leading `!`.
+ AbsolutePortableGlobPattern(format!("!{output}"))
+ } else {
+ AbsolutePortableGlobPattern(output)
+ }
+ }
+}
+
+impl Deref for PortableGlobPattern<'_> {
+ type Target = str;
+
+ fn deref(&self) -> &Self::Target {
+ self.pattern
+ }
+}
+
+/// A portable glob pattern that uses absolute paths.
+///
+/// E.g., `./src/**` becomes `/root/src/**` when anchored to `/root`.
+#[derive(Debug, Eq, PartialEq, Hash)]
+pub(crate) struct AbsolutePortableGlobPattern(String);
+
+impl Deref for AbsolutePortableGlobPattern {
+ type Target = str;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
+#[derive(Debug, Error)]
+pub(crate) enum PortableGlobError {
+ /// Shows the failing glob in the error message.
+ #[error(transparent)]
+ GlobError(#[from] globset::Error),
+
+ #[error(
+ "The parent directory operator (`..`) at position {pos} is not allowed in glob: `{glob}`"
+ )]
+ ParentDirectory { glob: String, pos: usize },
+
+ #[error(
+ "Invalid character `{invalid}` at position {pos} in glob: `{glob}`. hint: Characters can be escaped with a backslash"
+ )]
+ InvalidCharacter {
+ glob: String,
+ pos: usize,
+ invalid: InvalidChar,
+ },
+
+ #[error(
+ "Path separators can't be escaped, invalid character at position {pos} in glob: `{glob}`"
+ )]
+ InvalidEscapee { glob: String, pos: usize },
+
+ #[error("Invalid character `{invalid}` in range at position {pos} in glob: `{glob}`")]
+ InvalidCharacterRange {
+ glob: String,
+ pos: usize,
+ invalid: InvalidChar,
+ },
+
+ #[error("Too many stars at position {pos} in glob: `{glob}`")]
+ TooManyStars { glob: String, pos: usize },
+
+ #[error("Trailing backslash at position {pos} in glob: `{glob}`")]
+ TrailingEscape { glob: String, pos: usize },
+}
+
+#[derive(Copy, Clone, Debug)]
+pub(crate) struct InvalidChar(pub char);
+
+impl std::fmt::Display for InvalidChar {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self.0 {
+ '\'' => f.write_char('\''),
+ c => c.escape_debug().fmt(f),
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+
+ use crate::glob::PortableGlobPattern;
+ use insta::assert_snapshot;
+ use ruff_db::system::SystemPath;
+
+ #[test]
+ fn test_error() {
+ #[track_caller]
+ fn parse_err(glob: &str) -> String {
+ let error = PortableGlobPattern::parse(glob, true).unwrap_err();
+ error.to_string()
+ }
+
+ assert_snapshot!(
+ parse_err(".."),
+ @"The parent directory operator (`..`) at position 1 is not allowed in glob: `..`"
+ );
+ assert_snapshot!(
+ parse_err("licenses/.."),
+ @"The parent directory operator (`..`) at position 10 is not allowed in glob: `licenses/..`"
+ );
+ assert_snapshot!(
+ parse_err("licenses/LICEN!E.txt"),
+ @"Invalid character `!` at position 15 in glob: `licenses/LICEN!E.txt`. hint: Characters can be escaped with a backslash"
+ );
+ assert_snapshot!(
+ parse_err("licenses/LICEN[!C]E.txt"),
+ @"Invalid character `!` in range at position 15 in glob: `licenses/LICEN[!C]E.txt`"
+ );
+ assert_snapshot!(
+ parse_err("licenses/LICEN[C?]E.txt"),
+ @"Invalid character `?` in range at position 16 in glob: `licenses/LICEN[C?]E.txt`"
+ );
+ assert_snapshot!(
+ parse_err("******"),
+ @"Too many stars at position 1 in glob: `******`"
+ );
+ assert_snapshot!(
+ parse_err("licenses/**license"),
+ @"Too many stars at position 10 in glob: `licenses/**license`"
+ );
+ assert_snapshot!(
+ parse_err("licenses/***/licenses.csv"),
+ @"Too many stars at position 10 in glob: `licenses/***/licenses.csv`"
+ );
+ assert_snapshot!(
+ parse_err(r"**/@test"),
+ @"Invalid character `@` at position 4 in glob: `**/@test`. hint: Characters can be escaped with a backslash"
+ );
+ // Escapes are not allowed in strict PEP 639 mode
+ assert_snapshot!(
+ parse_err(r"public domain/Gulliver\\’s Travels.txt"),
+ @r"Invalid character ` ` at position 7 in glob: `public domain/Gulliver\\’s Travels.txt`. hint: Characters can be escaped with a backslash"
+ );
+ assert_snapshot!(
+ parse_err(r"**/@test"),
+ @"Invalid character `@` at position 4 in glob: `**/@test`. hint: Characters can be escaped with a backslash"
+ );
+ // Escaping slashes is not allowed.
+ assert_snapshot!(
+ parse_err(r"licenses\\MIT.txt"),
+ @r"Path separators can't be escaped, invalid character at position 9 in glob: `licenses\\MIT.txt`"
+ );
+ assert_snapshot!(
+ parse_err(r"licenses\/MIT.txt"),
+ @r"Path separators can't be escaped, invalid character at position 9 in glob: `licenses\/MIT.txt`"
+ );
+ }
+
+ #[test]
+ fn test_valid() {
+ let cases = [
+ r"licenses/*.txt",
+ r"licenses/**/*.txt",
+ r"LICEN[CS]E.txt",
+ r"LICEN?E.txt",
+ r"[a-z].txt",
+ r"[a-z._-].txt",
+ r"*/**",
+ r"LICENSE..txt",
+ r"LICENSE_file-1.txt",
+ // (google translate)
+ r"licenses/라이센스*.txt",
+ r"licenses/ライセンス*.txt",
+ r"licenses/执照*.txt",
+ r"src/**",
+ ];
+ let cases_uv = [
+ r"public-domain/Gulliver\’s\ Travels.txt",
+ // https://github.com/astral-sh/uv/issues/13280
+ r"**/\@test",
+ ];
+ for case in cases.iter().chain(cases_uv.iter()) {
+ PortableGlobPattern::parse(case, true).unwrap();
+ }
+ }
+
+ #[track_caller]
+ fn assert_absolute_path(pattern: &str, relative_to: impl AsRef, expected: &str) {
+ let pattern = PortableGlobPattern::parse(pattern, true).unwrap();
+ let absolute = pattern.into_absolute(relative_to);
+ assert_eq!(&*absolute, expected);
+ }
+
+ #[test]
+ fn absolute_pattern() {
+ assert_absolute_path("/src", "/root", "/src");
+ assert_absolute_path("./src", "/root", "/root/src");
+ }
+
+ #[test]
+ #[cfg(windows)]
+ fn absolute_pattern_windows() {
+ assert_absolute_path("./src", r"C:\root", "C:/root/src");
+ assert_absolute_path("./src", r"\\server\test", "//server/test/src");
+ }
+}
diff --git a/crates/ty_project/src/lib.rs b/crates/ty_project/src/lib.rs
index b57608d5f5..bc9daff46e 100644
--- a/crates/ty_project/src/lib.rs
+++ b/crates/ty_project/src/lib.rs
@@ -1,6 +1,5 @@
-#![allow(clippy::ref_option)]
-
-use crate::metadata::options::OptionDiagnostic;
+use crate::glob::{GlobFilterCheckMode, IncludeResult};
+use crate::metadata::options::{OptionDiagnostic, ToSettingsError};
use crate::walk::{ProjectFilesFilter, ProjectFilesWalker};
pub use db::{Db, ProjectDatabase};
use files::{Index, Indexed, IndexedFiles};
@@ -30,6 +29,7 @@ pub mod combine;
mod db;
mod files;
+mod glob;
pub mod metadata;
mod walk;
pub mod watch;
@@ -70,12 +70,20 @@ pub struct Project {
file_set: IndexedFiles,
/// The metadata describing the project, including the unresolved options.
- #[returns(ref)]
- pub metadata: ProjectMetadata,
+ ///
+ /// We box the metadata here because it's a fairly large type and
+ /// reducing the size of `Project` helps reduce the size of the
+ /// salsa allocated table for `Project`.
+ #[returns(deref)]
+ pub metadata: Box,
/// The resolved project settings.
- #[returns(ref)]
- pub settings: Settings,
+ ///
+ /// We box the metadata here because it's a fairly large type and
+ /// reducing the size of `Project` helps reduce the size of the
+ /// salsa allocated table for `Project`.
+ #[returns(deref)]
+ pub settings: Box,
/// The paths that should be included when checking this project.
///
@@ -126,14 +134,16 @@ impl Reporter for DummyReporter {
#[salsa::tracked]
impl Project {
- pub fn from_metadata(db: &dyn Db, metadata: ProjectMetadata) -> Self {
- let (settings, settings_diagnostics) = metadata.options().to_settings(db);
+ pub fn from_metadata(db: &dyn Db, metadata: ProjectMetadata) -> Result {
+ let (settings, diagnostics) = metadata.options().to_settings(db, metadata.root())?;
- Project::builder(metadata, settings, settings_diagnostics)
+ let project = Project::builder(Box::new(metadata), Box::new(settings), diagnostics)
.durability(Durability::MEDIUM)
.open_fileset_durability(Durability::LOW)
.file_set_durability(Durability::LOW)
- .new(db)
+ .new(db);
+
+ Ok(project)
}
pub fn root(self, db: &dyn Db) -> &SystemPath {
@@ -160,8 +170,16 @@ impl Project {
/// the project's include and exclude settings as well as the paths that were passed to `ty check `.
/// This means, that this method is an over-approximation of `Self::files` and may return `true` for paths
/// that won't be included when checking the project because they're ignored in a `.gitignore` file.
- pub fn is_path_included(self, db: &dyn Db, path: &SystemPath) -> bool {
- ProjectFilesFilter::from_project(db, self).is_included(path)
+ pub fn is_file_included(self, db: &dyn Db, path: &SystemPath) -> bool {
+ ProjectFilesFilter::from_project(db, self)
+ .is_file_included(path, GlobFilterCheckMode::Adhoc)
+ == IncludeResult::Included
+ }
+
+ pub fn is_directory_included(self, db: &dyn Db, path: &SystemPath) -> bool {
+ ProjectFilesFilter::from_project(db, self)
+ .is_directory_included(path, GlobFilterCheckMode::Adhoc)
+ == IncludeResult::Included
}
pub fn reload(self, db: &mut dyn Db, metadata: ProjectMetadata) {
@@ -169,17 +187,23 @@ impl Project {
assert_eq!(self.root(db), metadata.root());
if &metadata != self.metadata(db) {
- let (settings, settings_diagnostics) = metadata.options().to_settings(db);
+ match metadata.options().to_settings(db, metadata.root()) {
+ Ok((settings, settings_diagnostics)) => {
+ if self.settings(db) != &settings {
+ self.set_settings(db).to(Box::new(settings));
+ }
- if self.settings(db) != &settings {
- self.set_settings(db).to(settings);
+ if self.settings_diagnostics(db) != settings_diagnostics {
+ self.set_settings_diagnostics(db).to(settings_diagnostics);
+ }
+ }
+ Err(error) => {
+ self.set_settings_diagnostics(db)
+ .to(vec![error.into_diagnostic()]);
+ }
}
- if self.settings_diagnostics(db) != settings_diagnostics {
- self.set_settings_diagnostics(db).to(settings_diagnostics);
- }
-
- self.set_metadata(db).to(metadata);
+ self.set_metadata(db).to(Box::new(metadata));
}
self.reload_files(db);
@@ -248,6 +272,10 @@ impl Project {
}
pub(crate) fn check_file(self, db: &dyn Db, file: File) -> Vec {
+ if !self.is_file_open(db, file) {
+ return Vec::new();
+ }
+
let mut file_diagnostics: Vec<_> = self
.settings_diagnostics(db)
.iter()
diff --git a/crates/ty_project/src/metadata/options.rs b/crates/ty_project/src/metadata/options.rs
index 50e07aad1d..6c7696278c 100644
--- a/crates/ty_project/src/metadata/options.rs
+++ b/crates/ty_project/src/metadata/options.rs
@@ -1,13 +1,26 @@
use crate::Db;
-use crate::metadata::value::{RangedValue, RelativePathBuf, ValueSource, ValueSourceGuard};
-use ruff_db::diagnostic::{Annotation, Diagnostic, DiagnosticFormat, DiagnosticId, Severity, Span};
+use crate::glob::{
+ ExcludeFilterBuilder, IncludeExcludeFilter, IncludeFilterBuilder, PortableGlobPattern,
+};
+use crate::metadata::settings::SrcSettings;
+use crate::metadata::value::{
+ RangedValue, RelativeExcludePattern, RelativeIncludePattern, RelativePathBuf, ValueSource,
+ ValueSourceGuard,
+};
+
+use ruff_db::diagnostic::{
+ Annotation, Diagnostic, DiagnosticFormat, DiagnosticId, DisplayDiagnosticConfig, Severity,
+ Span, SubDiagnostic,
+};
use ruff_db::files::system_path_to_file;
use ruff_db::system::{System, SystemPath, SystemPathBuf};
use ruff_macros::{Combine, OptionsMetadata};
use ruff_python_ast::PythonVersion;
use rustc_hash::FxHashMap;
use serde::{Deserialize, Serialize};
-use std::fmt::Debug;
+use std::borrow::Cow;
+use std::fmt::{self, Debug, Display};
+use std::sync::Arc;
use thiserror::Error;
use ty_python_semantic::lint::{GetLintError, Level, LintSource, RuleSelection};
use ty_python_semantic::{
@@ -210,24 +223,44 @@ impl Options {
}
}
- #[must_use]
- pub(crate) fn to_settings(&self, db: &dyn Db) -> (Settings, Vec) {
+ pub(crate) fn to_settings(
+ &self,
+ db: &dyn Db,
+ project_root: &SystemPath,
+ ) -> Result<(Settings, Vec), ToSettingsError> {
let (rules, diagnostics) = self.to_rule_selection(db);
- let mut settings = Settings::new(rules, self.src.as_ref());
+ let terminal_options = self.terminal.clone().unwrap_or_default();
+ let terminal = TerminalSettings {
+ output_format: terminal_options
+ .output_format
+ .as_deref()
+ .copied()
+ .unwrap_or_default(),
+ error_on_warning: terminal_options.error_on_warning.unwrap_or_default(),
+ };
- if let Some(terminal) = self.terminal.as_ref() {
- settings.set_terminal(TerminalSettings {
- output_format: terminal
- .output_format
- .as_deref()
- .copied()
- .unwrap_or_default(),
- error_on_warning: terminal.error_on_warning.unwrap_or_default(),
- });
- }
+ let src_options = if let Some(src) = self.src.as_ref() {
+ Cow::Borrowed(src)
+ } else {
+ Cow::Owned(SrcOptions::default())
+ };
- (settings, diagnostics)
+ let src = src_options
+ .to_settings(db, project_root)
+ .map_err(|err| ToSettingsError {
+ diagnostic: err,
+ output_format: terminal.output_format,
+ color: colored::control::SHOULD_COLORIZE.should_colorize(),
+ })?;
+
+ let settings = Settings {
+ rules: Arc::new(rules),
+ terminal,
+ src,
+ };
+
+ Ok((settings, diagnostics))
}
#[must_use]
@@ -290,14 +323,10 @@ impl Options {
),
};
- let span = file.map(Span::from).map(|span| {
- if let Some(range) = rule_name.range() {
- span.with_range(range)
- } else {
- span
- }
+ let annotation = file.map(Span::from).map(|span| {
+ Annotation::primary(span.with_optional_range(rule_name.range()))
});
- diagnostics.push(diagnostic.with_span(span));
+ diagnostics.push(diagnostic.with_annotation(annotation));
}
}
}
@@ -442,6 +471,246 @@ pub struct SrcOptions {
)]
#[serde(skip_serializing_if = "Option::is_none")]
pub respect_ignore_files: Option,
+
+ /// A list of files and directories to check. The `include` option
+ /// follows a similar syntax to `.gitignore` but reversed:
+ /// Including a file or directory will make it so that it (and its contents)
+ /// are type checked.
+ ///
+ /// - `./src/` matches only a directory
+ /// - `./src` matches both files and directories
+ /// - `src` matches files or directories named `src` anywhere in the tree (e.g. `./src` or `./tests/src`)
+ /// - `*` matches any (possibly empty) sequence of characters (except `/`).
+ /// - `**` matches zero or more path components.
+ /// This sequence **must** form a single path component, so both `**a` and `b**` are invalid and will result in an error.
+ /// A sequence of more than two consecutive `*` characters is also invalid.
+ /// - `?` matches any single character except `/`
+ /// - `[abc]` matches any character inside the brackets. Character sequences can also specify ranges of characters, as ordered by Unicode,
+ /// so e.g. `[0-9]` specifies any character between `0` and `9` inclusive. An unclosed bracket is invalid.
+ ///
+ /// Unlike `exclude`, all paths are anchored relative to the project root (`src` only
+ /// matches `/src` and not `/test/src`).
+ ///
+ /// `exclude` take precedence over `include`.
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub include: Option>,
+
+ /// A list of file and directory patterns to exclude from type checking.
+ ///
+ /// Patterns follow a syntax similar to `.gitignore`:
+ /// - `./src/` matches only a directory
+ /// - `./src` matches both files and directories
+ /// - `src` matches files or directories named `src` anywhere in the tree (e.g. `./src` or `./tests/src`)
+ /// - `*` matches any (possibly empty) sequence of characters (except `/`).
+ /// - `**` matches zero or more path components.
+ /// This sequence **must** form a single path component, so both `**a` and `b**` are invalid and will result in an error.
+ /// A sequence of more than two consecutive `*` characters is also invalid.
+ /// - `?` matches any single character except `/`
+ /// - `[abc]` matches any character inside the brackets. Character sequences can also specify ranges of characters, as ordered by Unicode,
+ /// so e.g. `[0-9]` specifies any character between `0` and `9` inclusive. An unclosed bracket is invalid.
+ /// - `!pattern` negates a pattern (undoes the exclusion of files that would otherwise be excluded)
+ ///
+ /// By default, the following directories are excluded:
+ ///
+ /// - `.bzr`
+ /// - `.direnv`
+ /// - `.eggs`
+ /// - `.git`
+ /// - `.git-rewrite`
+ /// - `.hg`
+ /// - `.mypy_cache`
+ /// - `.nox`
+ /// - `.pants.d`
+ /// - `.pytype`
+ /// - `.ruff_cache`
+ /// - `.svn`
+ /// - `.tox`
+ /// - `.venv`
+ /// - `__pypackages__`
+ /// - `_build`
+ /// - `buck-out`
+ /// - `dist`
+ /// - `node_modules`
+ /// - `venv`
+ ///
+ /// You can override any default exclude by using a negated pattern. For example,
+ /// to re-include `dist` use `exclude = ["!dist"]`
+ #[option(
+ default = r#"null"#,
+ value_type = r#"list[str]"#,
+ example = r#"
+ exclude = [
+ "generated",
+ "*.proto",
+ "tests/fixtures/**",
+ "!tests/fixtures/important.py" # Include this one file
+ ]
+ "#
+ )]
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub exclude: Option>,
+}
+
+impl SrcOptions {
+ fn to_settings(
+ &self,
+ db: &dyn Db,
+ project_root: &SystemPath,
+ ) -> Result> {
+ let mut includes = IncludeFilterBuilder::new();
+ let system = db.system();
+
+ if let Some(include) = self.include.as_ref() {
+ for pattern in include {
+ // Check the relative pattern for better error messages.
+ pattern.absolute(project_root, system)
+ .and_then(|include| Ok(includes.add(&include)?))
+ .map_err(|err| {
+ let diagnostic = OptionDiagnostic::new(
+ DiagnosticId::InvalidGlob,
+ format!("Invalid include pattern: {err}"),
+ Severity::Error,
+ );
+
+ match pattern.source() {
+ ValueSource::File(file_path) => {
+ if let Ok(file) = system_path_to_file(db.upcast(), &**file_path) {
+ diagnostic
+ .with_message("Invalid include pattern")
+ .with_annotation(Some(
+ Annotation::primary(
+ Span::from(file)
+ .with_optional_range(pattern.range()),
+ )
+ .message(err.to_string()),
+ ))
+ } else {
+ diagnostic.sub(Some(SubDiagnostic::new(
+ Severity::Info,
+ "The pattern is defined in the `src.include` option in your configuration file",
+ )))
+ }
+ }
+ ValueSource::Cli => diagnostic.sub(Some(SubDiagnostic::new(
+ Severity::Info,
+ "The pattern was specified on the CLI using `--include`",
+ ))),
+ }
+ })?;
+ }
+ } else {
+ includes
+ .add(
+ &PortableGlobPattern::parse("**", false)
+ .unwrap()
+ .into_absolute(""),
+ )
+ .unwrap();
+ }
+
+ let include = includes.build().map_err(|_| {
+ // https://github.com/BurntSushi/ripgrep/discussions/2927
+ let diagnostic = OptionDiagnostic::new(
+ DiagnosticId::InvalidGlob,
+ "The `src.include` patterns resulted in a regex that is too large".to_string(),
+ Severity::Error,
+ );
+ diagnostic.sub(Some(SubDiagnostic::new(
+ Severity::Info,
+ "Please open an issue on the ty repository and share the pattern that caused the error.",
+ )))
+ })?;
+
+ let mut excludes = ExcludeFilterBuilder::new();
+
+ // Add the default excludes first, so that a user can override them with a negated exclude pattern.
+ for pattern in [
+ ".bzr",
+ ".direnv",
+ ".eggs",
+ ".git",
+ ".git-rewrite",
+ ".hg",
+ ".mypy_cache",
+ ".nox",
+ ".pants.d",
+ ".pytype",
+ ".ruff_cache",
+ ".svn",
+ ".tox",
+ ".venv",
+ "__pypackages__",
+ "_build",
+ "buck-out",
+ "dist",
+ "node_modules",
+ "venv",
+ ] {
+ PortableGlobPattern::parse(pattern, true)
+ .and_then(|exclude| Ok(excludes.add(&exclude.into_absolute(""))?))
+ .unwrap_or_else(|err| {
+ panic!(
+ "Expected default exclude to be valid glob but adding it failed with: {err}"
+ )
+ });
+ }
+
+ for exclude in self.exclude.as_deref().unwrap_or_default() {
+ // Check the relative path for better error messages.
+ exclude.absolute(project_root, system)
+ .and_then(|pattern| Ok(excludes.add(&pattern)?))
+ .map_err(|err| {
+ let diagnostic = OptionDiagnostic::new(
+ DiagnosticId::InvalidGlob,
+ format!("Invalid exclude pattern: {err}"),
+ Severity::Error,
+ );
+
+ match exclude.source() {
+ ValueSource::File(file_path) => {
+ if let Ok(file) = system_path_to_file(db.upcast(), &**file_path) {
+ diagnostic
+ .with_message("Invalid exclude pattern")
+ .with_annotation(Some(
+ Annotation::primary(
+ Span::from(file)
+ .with_optional_range(exclude.range()),
+ )
+ .message(err.to_string()),
+ ))
+ } else {
+ diagnostic.sub(Some(SubDiagnostic::new(
+ Severity::Info,
+ "The pattern is defined in the `src.exclude` option in your configuration file",
+ )))
+ }
+ }
+ ValueSource::Cli => diagnostic.sub(Some(SubDiagnostic::new(
+ Severity::Info,
+ "The pattern was specified on the CLI using `--exclude`",
+ ))),
+ }
+ })?;
+ }
+
+ let exclude = excludes.build().map_err(|_| {
+ // https://github.com/BurntSushi/ripgrep/discussions/2927
+ let diagnostic = OptionDiagnostic::new(
+ DiagnosticId::InvalidGlob,
+ "The `src.exclude` patterns resulted in a regex that is too large".to_string(),
+ Severity::Error,
+ );
+ diagnostic.sub(Some(SubDiagnostic::new(
+ Severity::Info,
+ "Please open an issue on the ty repository and share the pattern that caused the error.",
+ )))
+ })?;
+
+ Ok(SrcSettings {
+ respect_ignore_files: self.respect_ignore_files.unwrap_or(true),
+ files: IncludeExcludeFilter::new(include, exclude),
+ })
+ }
}
#[derive(Debug, Default, Clone, Eq, PartialEq, Combine, Serialize, Deserialize)]
@@ -494,6 +763,54 @@ pub struct TerminalOptions {
pub error_on_warning: Option,
}
+/// Error returned when the settings can't be resolved because of a hard error.
+#[derive(Debug)]
+pub struct ToSettingsError {
+ diagnostic: Box,
+ output_format: DiagnosticFormat,
+ color: bool,
+}
+
+impl ToSettingsError {
+ pub fn pretty<'a>(&'a self, db: &'a dyn Db) -> impl fmt::Display + use<'a> {
+ struct DisplayPretty<'a> {
+ db: &'a dyn Db,
+ error: &'a ToSettingsError,
+ }
+
+ impl fmt::Display for DisplayPretty<'_> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let display_config = DisplayDiagnosticConfig::default()
+ .format(self.error.output_format)
+ .color(self.error.color);
+
+ write!(
+ f,
+ "{}",
+ self.error
+ .diagnostic
+ .to_diagnostic()
+ .display(&self.db.upcast(), &display_config)
+ )
+ }
+ }
+
+ DisplayPretty { db, error: self }
+ }
+
+ pub fn into_diagnostic(self) -> OptionDiagnostic {
+ *self.diagnostic
+ }
+}
+
+impl Display for ToSettingsError {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.write_str(&self.diagnostic.message)
+ }
+}
+
+impl std::error::Error for ToSettingsError {}
+
#[cfg(feature = "schemars")]
mod schema {
use crate::DEFAULT_LINT_REGISTRY;
@@ -568,7 +885,8 @@ pub struct OptionDiagnostic {
id: DiagnosticId,
message: String,
severity: Severity,
- span: Option,
+ annotation: Option,
+ sub: Option,
}
impl OptionDiagnostic {
@@ -577,23 +895,35 @@ impl OptionDiagnostic {
id,
message,
severity,
- span: None,
+ annotation: None,
+ sub: None,
}
}
#[must_use]
- fn with_span(self, span: Option) -> Self {
- OptionDiagnostic { span, ..self }
+ fn with_message(self, message: impl Display) -> Self {
+ OptionDiagnostic {
+ message: message.to_string(),
+ ..self
+ }
+ }
+
+ #[must_use]
+ fn with_annotation(self, annotation: Option) -> Self {
+ OptionDiagnostic { annotation, ..self }
+ }
+
+ #[must_use]
+ fn sub(self, sub: Option) -> Self {
+ OptionDiagnostic { sub, ..self }
}
pub(crate) fn to_diagnostic(&self) -> Diagnostic {
- if let Some(ref span) = self.span {
- let mut diag = Diagnostic::new(self.id, self.severity, "");
- diag.annotate(Annotation::primary(span.clone()).message(&self.message));
- diag
- } else {
- Diagnostic::new(self.id, self.severity, &self.message)
+ let mut diag = Diagnostic::new(self.id, self.severity, &self.message);
+ if let Some(annotation) = self.annotation.clone() {
+ diag.annotate(annotation);
}
+ diag
}
}
diff --git a/crates/ty_project/src/metadata/settings.rs b/crates/ty_project/src/metadata/settings.rs
index 0a48fc559c..156f72632d 100644
--- a/crates/ty_project/src/metadata/settings.rs
+++ b/crates/ty_project/src/metadata/settings.rs
@@ -1,9 +1,10 @@
use std::sync::Arc;
-use crate::metadata::options::SrcOptions;
use ruff_db::diagnostic::DiagnosticFormat;
use ty_python_semantic::lint::RuleSelection;
+use crate::glob::IncludeExcludeFilter;
+
/// The resolved [`super::Options`] for the project.
///
/// Unlike [`super::Options`], the struct has default values filled in and
@@ -19,32 +20,18 @@ use ty_python_semantic::lint::RuleSelection;
/// Settings that are part of [`ty_python_semantic::ProgramSettings`] are not included here.
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Settings {
- rules: Arc,
-
- terminal: TerminalSettings,
-
- respect_ignore_files: bool,
+ pub(super) rules: Arc,
+ pub(super) terminal: TerminalSettings,
+ pub(super) src: SrcSettings,
}
impl Settings {
- pub fn new(rules: RuleSelection, src_options: Option<&SrcOptions>) -> Self {
- let respect_ignore_files = src_options
- .and_then(|src| src.respect_ignore_files)
- .unwrap_or(true);
-
- Self {
- rules: Arc::new(rules),
- terminal: TerminalSettings::default(),
- respect_ignore_files,
- }
- }
-
pub fn rules(&self) -> &RuleSelection {
&self.rules
}
- pub fn respect_ignore_files(&self) -> bool {
- self.respect_ignore_files
+ pub fn src(&self) -> &SrcSettings {
+ &self.src
}
pub fn to_rules(&self) -> Arc {
@@ -54,10 +41,6 @@ impl Settings {
pub fn terminal(&self) -> &TerminalSettings {
&self.terminal
}
-
- pub fn set_terminal(&mut self, terminal: TerminalSettings) {
- self.terminal = terminal;
- }
}
#[derive(Debug, Clone, PartialEq, Eq, Default)]
@@ -65,3 +48,9 @@ pub struct TerminalSettings {
pub output_format: DiagnosticFormat,
pub error_on_warning: bool,
}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct SrcSettings {
+ pub respect_ignore_files: bool,
+ pub files: IncludeExcludeFilter,
+}
diff --git a/crates/ty_project/src/metadata/value.rs b/crates/ty_project/src/metadata/value.rs
index f2f8db1e22..8e19e6b402 100644
--- a/crates/ty_project/src/metadata/value.rs
+++ b/crates/ty_project/src/metadata/value.rs
@@ -1,5 +1,6 @@
use crate::Db;
use crate::combine::Combine;
+use crate::glob::{AbsolutePortableGlobPattern, PortableGlobError, PortableGlobPattern};
use ruff_db::system::{System, SystemPath, SystemPathBuf};
use ruff_macros::Combine;
use ruff_text_size::{TextRange, TextSize};
@@ -356,3 +357,102 @@ impl RelativePathBuf {
SystemPath::absolute(&self.0, relative_to)
}
}
+
+#[derive(
+ Debug,
+ Clone,
+ serde::Serialize,
+ serde::Deserialize,
+ PartialEq,
+ Eq,
+ PartialOrd,
+ Ord,
+ Hash,
+ Combine,
+)]
+#[serde(transparent)]
+#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
+pub struct RelativeIncludePattern(RangedValue);
+
+impl RelativeIncludePattern {
+ pub fn new(pattern: &str, source: ValueSource) -> Self {
+ Self(RangedValue::new(pattern.to_string(), source))
+ }
+
+ pub fn cli(pattern: &str) -> Self {
+ Self::new(pattern, ValueSource::Cli)
+ }
+
+ pub(crate) fn source(&self) -> &ValueSource {
+ self.0.source()
+ }
+
+ pub(crate) fn range(&self) -> Option {
+ self.0.range()
+ }
+
+ /// Resolves the absolute pattern for `self` based on its origin.
+ pub(crate) fn absolute(
+ &self,
+ project_root: &SystemPath,
+ system: &dyn System,
+ ) -> Result {
+ let relative_to = match &self.0.source {
+ ValueSource::File(_) => project_root,
+ ValueSource::Cli => system.current_directory(),
+ };
+
+ let pattern = PortableGlobPattern::parse(&self.0, false)?;
+ Ok(pattern.into_absolute(relative_to))
+ }
+}
+
+#[derive(
+ Debug,
+ Clone,
+ serde::Serialize,
+ serde::Deserialize,
+ PartialEq,
+ Eq,
+ PartialOrd,
+ Ord,
+ Hash,
+ Combine,
+)]
+#[serde(transparent)]
+#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
+pub struct RelativeExcludePattern(RangedValue);
+
+impl RelativeExcludePattern {
+ pub fn new(pattern: &str, source: ValueSource) -> Self {
+ Self(RangedValue::new(pattern.to_string(), source))
+ }
+
+ pub fn cli(pattern: &str) -> Self {
+ Self::new(pattern, ValueSource::Cli)
+ }
+
+ pub(crate) fn source(&self) -> &ValueSource {
+ self.0.source()
+ }
+
+ pub(crate) fn range(&self) -> Option {
+ self.0.range()
+ }
+
+ /// Resolves the absolute pattern for `self` based on its origin.
+ pub(crate) fn absolute(
+ &self,
+ project_root: &SystemPath,
+ system: &dyn System,
+ ) -> Result {
+ let relative_to = match &self.0.source {
+ ValueSource::File(_) => project_root,
+ ValueSource::Cli => system.current_directory(),
+ };
+
+ let pattern = PortableGlobPattern::parse(&self.0, true)?;
+
+ Ok(pattern.into_absolute(relative_to))
+ }
+}
diff --git a/crates/ty_project/src/walk.rs b/crates/ty_project/src/walk.rs
index e6d7f7fdc3..761d1fd3e5 100644
--- a/crates/ty_project/src/walk.rs
+++ b/crates/ty_project/src/walk.rs
@@ -1,7 +1,8 @@
-use crate::{Db, IOErrorDiagnostic, IOErrorKind, Project};
+use crate::glob::IncludeExcludeFilter;
+use crate::{Db, GlobFilterCheckMode, IOErrorDiagnostic, IOErrorKind, IncludeResult, Project};
use ruff_db::files::{File, system_path_to_file};
use ruff_db::system::walk_directory::{ErrorKind, WalkDirectoryBuilder, WalkState};
-use ruff_db::system::{FileType, SystemPath, SystemPathBuf};
+use ruff_db::system::{SystemPath, SystemPathBuf};
use ruff_python_ast::PySourceType;
use rustc_hash::{FxBuildHasher, FxHashSet};
use std::path::PathBuf;
@@ -13,22 +14,47 @@ use thiserror::Error;
///
/// This struct mainly exists because `dyn Db` isn't `Send` or `Sync`, making it impossible
/// to access fields from within the walker.
-#[derive(Default, Debug)]
+#[derive(Debug)]
pub(crate) struct ProjectFilesFilter<'a> {
/// The same as [`Project::included_paths_or_root`].
included_paths: &'a [SystemPathBuf],
- /// The filter skips checking if the path is in `included_paths` if set to `true`.
- ///
- /// Skipping this check is useful when the walker only walks over `included_paths`.
- skip_included_paths: bool,
+ /// The resolved `src.include` and `src.exclude` filter.
+ src_filter: &'a IncludeExcludeFilter,
}
impl<'a> ProjectFilesFilter<'a> {
pub(crate) fn from_project(db: &'a dyn Db, project: Project) -> Self {
Self {
included_paths: project.included_paths_or_root(db),
- skip_included_paths: false,
+ src_filter: &project.settings(db).src().files,
+ }
+ }
+
+ fn match_included_paths(
+ &self,
+ path: &SystemPath,
+ mode: GlobFilterCheckMode,
+ ) -> Option {
+ match mode {
+ GlobFilterCheckMode::TopDown => Some(CheckPathMatch::Partial),
+ GlobFilterCheckMode::Adhoc => {
+ self.included_paths
+ .iter()
+ .filter_map(|included_path| {
+ if let Ok(relative_path) = path.strip_prefix(included_path) {
+ // Exact matches are always included
+ if relative_path.as_str().is_empty() {
+ Some(CheckPathMatch::Full)
+ } else {
+ Some(CheckPathMatch::Partial)
+ }
+ } else {
+ None
+ }
+ })
+ .max()
+ }
}
}
@@ -45,45 +71,40 @@ impl<'a> ProjectFilesFilter<'a> {
/// This method may return `true` for files that don't end up being included when walking the
/// project tree because it doesn't consider `.gitignore` and other ignore files when deciding
/// if a file's included.
- pub(crate) fn is_included(&self, path: &SystemPath) -> bool {
- #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
- enum CheckPathMatch {
- /// The path is a partial match of the checked path (it's a sub path)
- Partial,
-
- /// The path matches a check path exactly.
- Full,
- }
-
- let m = if self.skip_included_paths {
- Some(CheckPathMatch::Partial)
- } else {
- self.included_paths
- .iter()
- .filter_map(|included_path| {
- if let Ok(relative_path) = path.strip_prefix(included_path) {
- // Exact matches are always included
- if relative_path.as_str().is_empty() {
- Some(CheckPathMatch::Full)
- } else {
- Some(CheckPathMatch::Partial)
- }
- } else {
- None
- }
- })
- .max()
- };
-
- match m {
- None => false,
- Some(CheckPathMatch::Partial) => {
- // TODO: For partial matches, only include the file if it is included by the project's include/exclude settings.
- true
- }
- Some(CheckPathMatch::Full) => true,
+ pub(crate) fn is_file_included(
+ &self,
+ path: &SystemPath,
+ mode: GlobFilterCheckMode,
+ ) -> IncludeResult {
+ match self.match_included_paths(path, mode) {
+ None => IncludeResult::NotIncluded,
+ Some(CheckPathMatch::Partial) => self.src_filter.is_file_included(path, mode),
+ Some(CheckPathMatch::Full) => IncludeResult::Included,
}
}
+
+ pub(crate) fn is_directory_included(
+ &self,
+ path: &SystemPath,
+ mode: GlobFilterCheckMode,
+ ) -> IncludeResult {
+ match self.match_included_paths(path, mode) {
+ None => IncludeResult::NotIncluded,
+ Some(CheckPathMatch::Partial) => {
+ self.src_filter.is_directory_maybe_included(path, mode)
+ }
+ Some(CheckPathMatch::Full) => IncludeResult::Included,
+ }
+ }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
+enum CheckPathMatch {
+ /// The path is a partial match of the checked path (it's a sub path)
+ Partial,
+
+ /// The path matches a check path exactly.
+ Full,
}
pub(crate) struct ProjectFilesWalker<'a> {
@@ -96,9 +117,7 @@ impl<'a> ProjectFilesWalker<'a> {
pub(crate) fn new(db: &'a dyn Db) -> Self {
let project = db.project();
- let mut filter = ProjectFilesFilter::from_project(db, project);
- // It's unnecessary to filter on included paths because it only iterates over those to start with.
- filter.skip_included_paths = true;
+ let filter = ProjectFilesFilter::from_project(db, project);
Self::from_paths(db, project.included_paths_or_root(db), filter)
.expect("included_paths_or_root to never return an empty iterator")
@@ -132,7 +151,7 @@ impl<'a> ProjectFilesWalker<'a> {
let mut walker = db
.system()
.walk_directory(paths.next()?.as_ref())
- .standard_filters(db.project().settings(db).respect_ignore_files())
+ .standard_filters(db.project().settings(db).src().respect_ignore_files)
.ignore_hidden(false);
for path in paths {
@@ -152,25 +171,51 @@ impl<'a> ProjectFilesWalker<'a> {
Box::new(|entry| {
match entry {
Ok(entry) => {
- if !self.filter.is_included(entry.path()) {
- tracing::debug!("Ignoring not-included path: {}", entry.path());
- return WalkState::Skip;
+ // Skip excluded directories unless they were explicitly passed to the walker
+ // (which is the case passed to `ty check `).
+ if entry.file_type().is_directory() && entry.depth() > 0 {
+ return match self.filter.is_directory_included(entry.path(), GlobFilterCheckMode::TopDown) {
+ IncludeResult::Included => WalkState::Continue,
+ IncludeResult::Excluded => {
+ tracing::debug!("Skipping directory '{path}' because it is excluded by a default or `src.exclude` pattern", path=entry.path());
+ WalkState::Skip
+ },
+ IncludeResult::NotIncluded => {
+ tracing::debug!("Skipping directory `{path}` because it doesn't match any `src.include` pattern or path specified on the CLI", path=entry.path());
+ WalkState::Skip
+ },
+ };
}
- // Skip over any non python files to avoid creating too many entries in `Files`.
- match entry.file_type() {
- FileType::File => {
- if entry
- .path()
- .extension()
- .and_then(PySourceType::try_from_extension)
- .is_some()
- {
- let mut paths = paths.lock().unwrap();
- paths.push(entry.into_path());
+ if entry.file_type().is_file() {
+ // Ignore any non python files to avoid creating too many entries in `Files`.
+ if entry
+ .path()
+ .extension()
+ .and_then(PySourceType::try_from_extension)
+ .is_none()
+ {
+ return WalkState::Continue;
+ }
+
+ // For all files, except the ones that were explicitly passed to the walker (CLI),
+ // check if they're included in the project.
+ if entry.depth() > 0 {
+ match self.filter.is_file_included(entry.path(), GlobFilterCheckMode::TopDown) {
+ IncludeResult::Included => {},
+ IncludeResult::Excluded => {
+ tracing::debug!("Ignoring file `{path}` because it is excluded by a default or `src.exclude` pattern.", path=entry.path());
+ return WalkState::Continue;
+ },
+ IncludeResult::NotIncluded => {
+ tracing::debug!("Ignoring file `{path}` because it doesn't match any `src.include` pattern or path specified on the CLI.", path=entry.path());
+ return WalkState::Continue;
+ },
}
}
- FileType::Directory | FileType::Symlink => {}
+
+ let mut paths = paths.lock().unwrap();
+ paths.push(entry.into_path());
}
}
Err(error) => match error.kind() {
diff --git a/ty.schema.json b/ty.schema.json
index fc6306d7ec..d3180fb4db 100644
--- a/ty.schema.json
+++ b/ty.schema.json
@@ -851,6 +851,26 @@
"SrcOptions": {
"type": "object",
"properties": {
+ "exclude": {
+ "description": "A list of file and directory patterns to exclude from type checking.\n\nPatterns follow a syntax similar to `.gitignore`: - `./src/` matches only a directory - `./src` matches both files and directories - `src` matches files or directories named `src` anywhere in the tree (e.g. `./src` or `./tests/src`) - `*` matches any (possibly empty) sequence of characters (except `/`). - `**` matches zero or more path components. This sequence **must** form a single path component, so both `**a` and `b**` are invalid and will result in an error. A sequence of more than two consecutive `*` characters is also invalid. - `?` matches any single character except `/` - `[abc]` matches any character inside the brackets. Character sequences can also specify ranges of characters, as ordered by Unicode, so e.g. `[0-9]` specifies any character between `0` and `9` inclusive. An unclosed bracket is invalid. - `!pattern` negates a pattern (undoes the exclusion of files that would otherwise be excluded)\n\nBy default, the following directories are excluded:\n\n- `.bzr` - `.direnv` - `.eggs` - `.git` - `.git-rewrite` - `.hg` - `.mypy_cache` - `.nox` - `.pants.d` - `.pytype` - `.ruff_cache` - `.svn` - `.tox` - `.venv` - `__pypackages__` - `_build` - `buck-out` - `dist` - `node_modules` - `venv`\n\nYou can override any default exclude by using a negated pattern. For example, to re-include `dist` use `exclude = [\"!dist\"]`",
+ "type": [
+ "array",
+ "null"
+ ],
+ "items": {
+ "type": "string"
+ }
+ },
+ "include": {
+ "description": "A list of files and directories to check. The `include` option follows a similar syntax to `.gitignore` but reversed: Including a file or directory will make it so that it (and its contents) are type checked.\n\n- `./src/` matches only a directory - `./src` matches both files and directories - `src` matches files or directories named `src` anywhere in the tree (e.g. `./src` or `./tests/src`) - `*` matches any (possibly empty) sequence of characters (except `/`). - `**` matches zero or more path components. This sequence **must** form a single path component, so both `**a` and `b**` are invalid and will result in an error. A sequence of more than two consecutive `*` characters is also invalid. - `?` matches any single character except `/` - `[abc]` matches any character inside the brackets. Character sequences can also specify ranges of characters, as ordered by Unicode, so e.g. `[0-9]` specifies any character between `0` and `9` inclusive. An unclosed bracket is invalid.\n\nUnlike `exclude`, all paths are anchored relative to the project root (`src` only matches `/src` and not `/test/src`).\n\n`exclude` take precedence over `include`.",
+ "type": [
+ "array",
+ "null"
+ ],
+ "items": {
+ "type": "string"
+ }
+ },
"respect-ignore-files": {
"description": "Whether to automatically exclude files that are ignored by `.ignore`, `.gitignore`, `.git/info/exclude`, and global `gitignore` files. Enabled by default.",
"type": [