From 1f27d53fd576e92a7d2cd4abc1761d6ad836d85b Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Thu, 12 Jun 2025 19:07:31 +0200 Subject: [PATCH] [ty] File inclusion and exclusion (#18498) --- Cargo.lock | 5 + Cargo.toml | 13 +- crates/ruff_db/src/diagnostic/mod.rs | 4 + crates/ruff_db/src/system/path.rs | 28 + crates/ty/docs/cli.md | 2 + crates/ty/docs/configuration.md | 61 ++ crates/ty/src/args.rs | 17 +- crates/ty/tests/cli/file_selection.rs | 726 +++++++++++++++++++++ crates/ty/tests/cli/main.rs | 1 + crates/ty/tests/cli/rule_selection.rs | 4 +- crates/ty_project/Cargo.toml | 5 + crates/ty_project/src/db.rs | 7 +- crates/ty_project/src/db/changes.rs | 36 +- crates/ty_project/src/glob.rs | 89 +++ crates/ty_project/src/glob/exclude.rs | 285 ++++++++ crates/ty_project/src/glob/include.rs | 399 +++++++++++ crates/ty_project/src/glob/portable.rs | 407 ++++++++++++ crates/ty_project/src/lib.rs | 70 +- crates/ty_project/src/metadata/options.rs | 398 ++++++++++- crates/ty_project/src/metadata/settings.rs | 37 +- crates/ty_project/src/metadata/value.rs | 100 +++ crates/ty_project/src/walk.rs | 173 +++-- ty.schema.json | 20 + 23 files changed, 2728 insertions(+), 159 deletions(-) create mode 100644 crates/ty/tests/cli/file_selection.rs create mode 100644 crates/ty_project/src/glob.rs create mode 100644 crates/ty_project/src/glob/exclude.rs create mode 100644 crates/ty_project/src/glob/include.rs create mode 100644 crates/ty_project/src/glob/portable.rs diff --git a/Cargo.lock b/Cargo.lock index 9c8c36681e..ec1b10d268 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3936,11 +3936,16 @@ name = "ty_project" version = "0.0.0" dependencies = [ "anyhow", + "camino", + "colored 3.0.0", "crossbeam", + "globset", "insta", "notify", "pep440_rs", "rayon", + "regex", + "regex-automata 0.4.9", "ruff_cache", "ruff_db", "ruff_macros", diff --git a/Cargo.toml b/Cargo.toml index d86984336d..4f55b171b9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -126,6 +126,7 @@ quote = { version = "1.0.23" } rand = { version = "0.9.0" } rayon = { version = "1.10.0" } regex = { version = "1.10.2" } +regex-automata = { version = "0.4.9" } rustc-hash = { version = "2.0.0" } rustc-stable-hash = { version = "0.1.2" } # When updating salsa, make sure to also update the revision in `fuzz/Cargo.toml` @@ -165,7 +166,7 @@ tracing-subscriber = { version = "0.3.18", default-features = false, features = "env-filter", "fmt", "ansi", - "smallvec", + "smallvec" ] } tryfn = { version = "0.2.1" } typed-arena = { version = "2.0.2" } @@ -175,7 +176,11 @@ unicode-width = { version = "0.2.0" } unicode_names2 = { version = "1.2.2" } unicode-normalization = { version = "0.1.23" } url = { version = "2.5.0" } -uuid = { version = "1.6.1", features = ["v4", "fast-rng", "macro-diagnostics"] } +uuid = { version = "1.6.1", features = [ + "v4", + "fast-rng", + "macro-diagnostics", +] } walkdir = { version = "2.3.2" } wasm-bindgen = { version = "0.2.92" } wasm-bindgen-test = { version = "0.3.42" } @@ -210,8 +215,8 @@ must_use_candidate = "allow" similar_names = "allow" single_match_else = "allow" too_many_lines = "allow" -needless_continue = "allow" # An explicit continue can be more readable, especially if the alternative is an empty block. -unnecessary_debug_formatting = "allow" # too many instances, the display also doesn't quote the path which is often desired in logs where we use them the most often. +needless_continue = "allow" # An explicit continue can be more readable, especially if the alternative is an empty block. +unnecessary_debug_formatting = "allow" # too many instances, the display also doesn't quote the path which is often desired in logs where we use them the most often. # Without the hashes we run into a `rustfmt` bug in some snapshot tests, see #13250 needless_raw_string_hashes = "allow" # Disallowed restriction lints diff --git a/crates/ruff_db/src/diagnostic/mod.rs b/crates/ruff_db/src/diagnostic/mod.rs index 8c373ca1e4..07ad6371e5 100644 --- a/crates/ruff_db/src/diagnostic/mod.rs +++ b/crates/ruff_db/src/diagnostic/mod.rs @@ -665,6 +665,9 @@ pub enum DiagnosticId { /// No rule with the given name exists. UnknownRule, + + /// A glob pattern doesn't follow the expected syntax. + InvalidGlob, } impl DiagnosticId { @@ -699,6 +702,7 @@ impl DiagnosticId { DiagnosticId::Lint(name) => name.as_str(), DiagnosticId::RevealedType => "revealed-type", DiagnosticId::UnknownRule => "unknown-rule", + DiagnosticId::InvalidGlob => "invalid-glob", } } diff --git a/crates/ruff_db/src/system/path.rs b/crates/ruff_db/src/system/path.rs index c20b2b19f5..07ea7d0b71 100644 --- a/crates/ruff_db/src/system/path.rs +++ b/crates/ruff_db/src/system/path.rs @@ -45,6 +45,30 @@ impl SystemPath { SystemPath::from_std_path(dunce::simplified(self.as_std_path())).unwrap() } + /// Returns `true` if the `SystemPath` is absolute, i.e., if it is independent of + /// the current directory. + /// + /// * On Unix, a path is absolute if it starts with the root, so + /// `is_absolute` and [`has_root`] are equivalent. + /// + /// * On Windows, a path is absolute if it has a prefix and starts with the + /// root: `c:\windows` is absolute, while `c:temp` and `\temp` are not. + /// + /// # Examples + /// + /// ``` + /// use ruff_db::system::SystemPath; + /// + /// assert!(!SystemPath::new("foo.txt").is_absolute()); + /// ``` + /// + /// [`has_root`]: Utf8Path::has_root + #[inline] + #[must_use] + pub fn is_absolute(&self) -> bool { + self.0.is_absolute() + } + /// Extracts the file extension, if possible. /// /// The extension is: @@ -538,6 +562,10 @@ impl SystemPathBuf { self.0.into_std_path_buf() } + pub fn into_string(self) -> String { + self.0.into_string() + } + #[inline] pub fn as_path(&self) -> &SystemPath { SystemPath::new(&self.0) diff --git a/crates/ty/docs/cli.md b/crates/ty/docs/cli.md index a68b04eb35..56a6ae32c6 100644 --- a/crates/ty/docs/cli.md +++ b/crates/ty/docs/cli.md @@ -51,6 +51,8 @@ over all configuration files.

While ty configuration can be included in a pyproject.toml file, it is not allowed in this context.

May also be set with the TY_CONFIG_FILE environment variable.

--error rule

Treat the given rule as having severity 'error'. Can be specified multiple times.

--error-on-warning

Use exit code 1 if there are any warning-level diagnostics

+
--exclude exclude

Glob patterns for files to exclude from type checking.

+

Uses gitignore-style syntax to exclude files and directories from type checking. Supports patterns like tests/, *.tmp, **/__pycache__/**.

--exit-zero

Always use exit code 0, even when there are error-level diagnostics

--extra-search-path path

Additional path to use as a module-resolution source (can be passed multiple times)

--help, -h

Print help (see a summary with '-h')

diff --git a/crates/ty/docs/configuration.md b/crates/ty/docs/configuration.md index a5333dc931..25cdb70a75 100644 --- a/crates/ty/docs/configuration.md +++ b/crates/ty/docs/configuration.md @@ -153,6 +153,67 @@ typeshed = "/path/to/custom/typeshed" ## `src` +#### `exclude` + +A list of file and directory patterns to exclude from type checking. + +Patterns follow a syntax similar to `.gitignore`: +- `./src/` matches only a directory +- `./src` matches both files and directories +- `src` matches files or directories named `src` anywhere in the tree (e.g. `./src` or `./tests/src`) +- `*` matches any (possibly empty) sequence of characters (except `/`). +- `**` matches zero or more path components. + This sequence **must** form a single path component, so both `**a` and `b**` are invalid and will result in an error. + A sequence of more than two consecutive `*` characters is also invalid. +- `?` matches any single character except `/` +- `[abc]` matches any character inside the brackets. Character sequences can also specify ranges of characters, as ordered by Unicode, + so e.g. `[0-9]` specifies any character between `0` and `9` inclusive. An unclosed bracket is invalid. +- `!pattern` negates a pattern (undoes the exclusion of files that would otherwise be excluded) + +By default, the following directories are excluded: + +- `.bzr` +- `.direnv` +- `.eggs` +- `.git` +- `.git-rewrite` +- `.hg` +- `.mypy_cache` +- `.nox` +- `.pants.d` +- `.pytype` +- `.ruff_cache` +- `.svn` +- `.tox` +- `.venv` +- `__pypackages__` +- `_build` +- `buck-out` +- `dist` +- `node_modules` +- `venv` + +You can override any default exclude by using a negated pattern. For example, +to re-include `dist` use `exclude = ["!dist"]` + +**Default value**: `null` + +**Type**: `list[str]` + +**Example usage** (`pyproject.toml`): + +```toml +[tool.ty.src] +exclude = [ + "generated", + "*.proto", + "tests/fixtures/**", + "!tests/fixtures/important.py" # Include this one file +] +``` + +--- + #### `respect-ignore-files` Whether to automatically exclude files that are ignored by `.ignore`, diff --git a/crates/ty/src/args.rs b/crates/ty/src/args.rs index 8d02b941d8..99f0428f0c 100644 --- a/crates/ty/src/args.rs +++ b/crates/ty/src/args.rs @@ -5,7 +5,9 @@ use clap::{ArgAction, ArgMatches, Error, Parser}; use ruff_db::system::SystemPathBuf; use ty_project::combine::Combine; use ty_project::metadata::options::{EnvironmentOptions, Options, SrcOptions, TerminalOptions}; -use ty_project::metadata::value::{RangedValue, RelativePathBuf, ValueSource}; +use ty_project::metadata::value::{ + RangedValue, RelativeExcludePattern, RelativePathBuf, ValueSource, +}; use ty_python_semantic::lint; #[derive(Debug, Parser)] @@ -148,6 +150,13 @@ pub(crate) struct CheckCommand { respect_ignore_files: Option, #[clap(long, overrides_with("respect_ignore_files"), hide = true)] no_respect_ignore_files: bool, + + /// Glob patterns for files to exclude from type checking. + /// + /// Uses gitignore-style syntax to exclude files and directories from type checking. + /// Supports patterns like `tests/`, `*.tmp`, `**/__pycache__/**`. + #[arg(long, help_heading = "File selection")] + exclude: Option>, } impl CheckCommand { @@ -195,6 +204,12 @@ impl CheckCommand { }), src: Some(SrcOptions { respect_ignore_files, + exclude: self.exclude.map(|excludes| { + excludes + .iter() + .map(|exclude| RelativeExcludePattern::cli(exclude)) + .collect() + }), ..SrcOptions::default() }), rules, diff --git a/crates/ty/tests/cli/file_selection.rs b/crates/ty/tests/cli/file_selection.rs new file mode 100644 index 0000000000..5e646a07cd --- /dev/null +++ b/crates/ty/tests/cli/file_selection.rs @@ -0,0 +1,726 @@ +use insta_cmd::assert_cmd_snapshot; + +use crate::CliTest; + +/// Test exclude CLI argument functionality +#[test] +fn exclude_argument() -> anyhow::Result<()> { + let case = CliTest::with_files([ + ( + "src/main.py", + r#" + print(undefined_var) # error: unresolved-reference + "#, + ), + ( + "tests/test_main.py", + r#" + print(another_undefined_var) # error: unresolved-reference + "#, + ), + ( + "temp_file.py", + r#" + print(temp_undefined_var) # error: unresolved-reference + "#, + ), + ])?; + + // Test that exclude argument is recognized and works + assert_cmd_snapshot!(case.command().arg("--exclude").arg("tests/"), @r" + success: false + exit_code: 1 + ----- stdout ----- + error[unresolved-reference]: Name `undefined_var` used when not defined + --> src/main.py:2:7 + | + 2 | print(undefined_var) # error: unresolved-reference + | ^^^^^^^^^^^^^ + | + info: rule `unresolved-reference` is enabled by default + + error[unresolved-reference]: Name `temp_undefined_var` used when not defined + --> temp_file.py:2:7 + | + 2 | print(temp_undefined_var) # error: unresolved-reference + | ^^^^^^^^^^^^^^^^^^ + | + info: rule `unresolved-reference` is enabled by default + + Found 2 diagnostics + + ----- stderr ----- + WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors. + "); + + // Test multiple exclude patterns + assert_cmd_snapshot!(case.command().arg("--exclude").arg("tests/").arg("--exclude").arg("temp_*.py"), @r" + success: false + exit_code: 1 + ----- stdout ----- + error[unresolved-reference]: Name `undefined_var` used when not defined + --> src/main.py:2:7 + | + 2 | print(undefined_var) # error: unresolved-reference + | ^^^^^^^^^^^^^ + | + info: rule `unresolved-reference` is enabled by default + + Found 1 diagnostic + + ----- stderr ----- + WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors. + "); + + Ok(()) +} + +/// Test configuration file include functionality +#[test] +fn configuration_include() -> anyhow::Result<()> { + let case = CliTest::with_files([ + ( + "src/main.py", + r#" + print(undefined_var) # error: unresolved-reference + "#, + ), + ( + "tests/test_main.py", + r#" + print(another_undefined_var) # error: unresolved-reference + "#, + ), + ( + "other.py", + r#" + print(other_undefined_var) # error: unresolved-reference + "#, + ), + ])?; + + // Test include via configuration - should only check included files + case.write_file( + "ty.toml", + r#" + [src] + include = ["src"] + "#, + )?; + + assert_cmd_snapshot!(case.command(), @r" + success: false + exit_code: 1 + ----- stdout ----- + error[unresolved-reference]: Name `undefined_var` used when not defined + --> src/main.py:2:7 + | + 2 | print(undefined_var) # error: unresolved-reference + | ^^^^^^^^^^^^^ + | + info: rule `unresolved-reference` is enabled by default + + Found 1 diagnostic + + ----- stderr ----- + WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors. + "); + + // Test multiple include patterns via configuration + case.write_file( + "ty.toml", + r#" + [src] + include = ["src", "other.py"] + "#, + )?; + + assert_cmd_snapshot!(case.command(), @r" + success: false + exit_code: 1 + ----- stdout ----- + error[unresolved-reference]: Name `other_undefined_var` used when not defined + --> other.py:2:7 + | + 2 | print(other_undefined_var) # error: unresolved-reference + | ^^^^^^^^^^^^^^^^^^^ + | + info: rule `unresolved-reference` is enabled by default + + error[unresolved-reference]: Name `undefined_var` used when not defined + --> src/main.py:2:7 + | + 2 | print(undefined_var) # error: unresolved-reference + | ^^^^^^^^^^^^^ + | + info: rule `unresolved-reference` is enabled by default + + Found 2 diagnostics + + ----- stderr ----- + WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors. + "); + + Ok(()) +} + +/// Test configuration file exclude functionality +#[test] +fn configuration_exclude() -> anyhow::Result<()> { + let case = CliTest::with_files([ + ( + "src/main.py", + r#" + print(undefined_var) # error: unresolved-reference + "#, + ), + ( + "tests/test_main.py", + r#" + print(another_undefined_var) # error: unresolved-reference + "#, + ), + ( + "temp_file.py", + r#" + print(temp_undefined_var) # error: unresolved-reference + "#, + ), + ])?; + + // Test exclude via configuration + case.write_file( + "ty.toml", + r#" + [src] + exclude = ["tests/"] + "#, + )?; + + assert_cmd_snapshot!(case.command(), @r" + success: false + exit_code: 1 + ----- stdout ----- + error[unresolved-reference]: Name `undefined_var` used when not defined + --> src/main.py:2:7 + | + 2 | print(undefined_var) # error: unresolved-reference + | ^^^^^^^^^^^^^ + | + info: rule `unresolved-reference` is enabled by default + + error[unresolved-reference]: Name `temp_undefined_var` used when not defined + --> temp_file.py:2:7 + | + 2 | print(temp_undefined_var) # error: unresolved-reference + | ^^^^^^^^^^^^^^^^^^ + | + info: rule `unresolved-reference` is enabled by default + + Found 2 diagnostics + + ----- stderr ----- + WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors. + "); + + // Test multiple exclude patterns via configuration + case.write_file( + "ty.toml", + r#" + [src] + exclude = ["tests/", "temp_*.py"] + "#, + )?; + + assert_cmd_snapshot!(case.command(), @r" + success: false + exit_code: 1 + ----- stdout ----- + error[unresolved-reference]: Name `undefined_var` used when not defined + --> src/main.py:2:7 + | + 2 | print(undefined_var) # error: unresolved-reference + | ^^^^^^^^^^^^^ + | + info: rule `unresolved-reference` is enabled by default + + Found 1 diagnostic + + ----- stderr ----- + WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors. + "); + + Ok(()) +} + +/// Test that exclude takes precedence over include in configuration +#[test] +fn exclude_precedence_over_include() -> anyhow::Result<()> { + let case = CliTest::with_files([ + ( + "src/main.py", + r#" + print(undefined_var) # error: unresolved-reference + "#, + ), + ( + "src/test_helper.py", + r#" + print(helper_undefined_var) # error: unresolved-reference + "#, + ), + ( + "other.py", + r#" + print(other_undefined_var) # error: unresolved-reference + "#, + ), + ])?; + + // Include all src files but exclude test files - exclude should win + case.write_file( + "ty.toml", + r#" + [src] + include = ["src"] + exclude = ["test_*.py"] + "#, + )?; + + assert_cmd_snapshot!(case.command(), @r" + success: false + exit_code: 1 + ----- stdout ----- + error[unresolved-reference]: Name `undefined_var` used when not defined + --> src/main.py:2:7 + | + 2 | print(undefined_var) # error: unresolved-reference + | ^^^^^^^^^^^^^ + | + info: rule `unresolved-reference` is enabled by default + + Found 1 diagnostic + + ----- stderr ----- + WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors. + "); + + Ok(()) +} + +/// Test that CLI exclude overrides configuration include +#[test] +fn exclude_argument_precedence_include_argument() -> anyhow::Result<()> { + let case = CliTest::with_files([ + ( + "src/main.py", + r#" + print(undefined_var) # error: unresolved-reference + "#, + ), + ( + "tests/test_main.py", + r#" + print(another_undefined_var) # error: unresolved-reference + "#, + ), + ( + "other.py", + r#" + print(other_undefined_var) # error: unresolved-reference + "#, + ), + ])?; + + // Configuration includes all files, but CLI excludes tests + case.write_file( + "ty.toml", + r#" + [src] + include = ["src/", "tests/"] + "#, + )?; + + assert_cmd_snapshot!(case.command().arg("--exclude").arg("tests/"), @r###" + success: false + exit_code: 1 + ----- stdout ----- + error[unresolved-reference]: Name `undefined_var` used when not defined + --> src/main.py:2:7 + | + 2 | print(undefined_var) # error: unresolved-reference + | ^^^^^^^^^^^^^ + | + info: rule `unresolved-reference` is enabled by default + + Found 1 diagnostic + + ----- stderr ----- + WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors. + "###); + + Ok(()) +} + +/// Test that default excludes can be removed using negated patterns +#[test] +fn remove_default_exclude() -> anyhow::Result<()> { + let case = CliTest::with_files([ + ( + "src/main.py", + r#" + print(undefined_var) # error: unresolved-reference + "#, + ), + ( + "dist/generated.py", + r#" + print(another_undefined_var) # error: unresolved-reference + "#, + ), + ])?; + + // By default, 'dist' directory should be excluded (see default excludes) + assert_cmd_snapshot!(case.command(), @r" + success: false + exit_code: 1 + ----- stdout ----- + error[unresolved-reference]: Name `undefined_var` used when not defined + --> src/main.py:2:7 + | + 2 | print(undefined_var) # error: unresolved-reference + | ^^^^^^^^^^^^^ + | + info: rule `unresolved-reference` is enabled by default + + Found 1 diagnostic + + ----- stderr ----- + WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors. + "); + + // Now override the default exclude by using a negated pattern to re-include 'dist' + case.write_file( + "ty.toml", + r#" + [src] + exclude = ["!dist"] + "#, + )?; + + assert_cmd_snapshot!(case.command(), @r" + success: false + exit_code: 1 + ----- stdout ----- + error[unresolved-reference]: Name `another_undefined_var` used when not defined + --> dist/generated.py:2:7 + | + 2 | print(another_undefined_var) # error: unresolved-reference + | ^^^^^^^^^^^^^^^^^^^^^ + | + info: rule `unresolved-reference` is enabled by default + + error[unresolved-reference]: Name `undefined_var` used when not defined + --> src/main.py:2:7 + | + 2 | print(undefined_var) # error: unresolved-reference + | ^^^^^^^^^^^^^ + | + info: rule `unresolved-reference` is enabled by default + + Found 2 diagnostics + + ----- stderr ----- + WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors. + "); + + Ok(()) +} + +/// Test that configuration excludes can be removed via CLI negation +#[test] +fn cli_removes_config_exclude() -> anyhow::Result<()> { + let case = CliTest::with_files([ + ( + "src/main.py", + r#" + print(undefined_var) # error: unresolved-reference + "#, + ), + ( + "build/output.py", + r#" + print(build_undefined_var) # error: unresolved-reference + "#, + ), + ])?; + + // Configuration excludes the build directory + case.write_file( + "ty.toml", + r#" + [src] + exclude = ["build/"] + "#, + )?; + + // Verify that build/ is excluded by configuration + assert_cmd_snapshot!(case.command(), @r" + success: false + exit_code: 1 + ----- stdout ----- + error[unresolved-reference]: Name `undefined_var` used when not defined + --> src/main.py:2:7 + | + 2 | print(undefined_var) # error: unresolved-reference + | ^^^^^^^^^^^^^ + | + info: rule `unresolved-reference` is enabled by default + + Found 1 diagnostic + + ----- stderr ----- + WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors. + "); + + // Now remove the configuration exclude via CLI negation + assert_cmd_snapshot!(case.command().arg("--exclude").arg("!build/"), @r" + success: false + exit_code: 1 + ----- stdout ----- + error[unresolved-reference]: Name `build_undefined_var` used when not defined + --> build/output.py:2:7 + | + 2 | print(build_undefined_var) # error: unresolved-reference + | ^^^^^^^^^^^^^^^^^^^ + | + info: rule `unresolved-reference` is enabled by default + + error[unresolved-reference]: Name `undefined_var` used when not defined + --> src/main.py:2:7 + | + 2 | print(undefined_var) # error: unresolved-reference + | ^^^^^^^^^^^^^ + | + info: rule `unresolved-reference` is enabled by default + + Found 2 diagnostics + + ----- stderr ----- + WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors. + "); + + Ok(()) +} + +/// Test behavior when explicitly checking a path that matches an exclude pattern +#[test] +fn explicit_path_overrides_exclude() -> anyhow::Result<()> { + let case = CliTest::with_files([ + ( + "src/main.py", + r#" + print(undefined_var) # error: unresolved-reference + "#, + ), + ( + "tests/generated.py", + r#" + print(dist_undefined_var) # error: unresolved-reference + "#, + ), + ( + "dist/other.py", + r#" + print(other_undefined_var) # error: unresolved-reference + "#, + ), + ( + "ty.toml", + r#" + [src] + exclude = ["tests/generated.py"] + "#, + ), + ])?; + + // dist is excluded by default and `tests/generated` is excluded in the project, so only src/main.py should be checked + assert_cmd_snapshot!(case.command(), @r" + success: false + exit_code: 1 + ----- stdout ----- + error[unresolved-reference]: Name `undefined_var` used when not defined + --> src/main.py:2:7 + | + 2 | print(undefined_var) # error: unresolved-reference + | ^^^^^^^^^^^^^ + | + info: rule `unresolved-reference` is enabled by default + + Found 1 diagnostic + + ----- stderr ----- + WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors. + "); + + // Explicitly checking a file in an excluded directory should still check that file + assert_cmd_snapshot!(case.command().arg("tests/generated.py"), @r" + success: false + exit_code: 1 + ----- stdout ----- + error[unresolved-reference]: Name `dist_undefined_var` used when not defined + --> tests/generated.py:2:7 + | + 2 | print(dist_undefined_var) # error: unresolved-reference + | ^^^^^^^^^^^^^^^^^^ + | + info: rule `unresolved-reference` is enabled by default + + Found 1 diagnostic + + ----- stderr ----- + WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors. + "); + + // Explicitly checking the entire excluded directory should check all files in it + assert_cmd_snapshot!(case.command().arg("dist/"), @r" + success: false + exit_code: 1 + ----- stdout ----- + error[unresolved-reference]: Name `other_undefined_var` used when not defined + --> dist/other.py:2:7 + | + 2 | print(other_undefined_var) # error: unresolved-reference + | ^^^^^^^^^^^^^^^^^^^ + | + info: rule `unresolved-reference` is enabled by default + + Found 1 diagnostic + + ----- stderr ----- + WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors. + "); + + Ok(()) +} + +#[test] +fn invalid_include_pattern() -> anyhow::Result<()> { + let case = CliTest::with_files([ + ( + "src/main.py", + r#" + print(undefined_var) # error: unresolved-reference + "#, + ), + ( + "ty.toml", + r#" + [src] + include = [ + "src/**test/" + ] + "#, + ), + ])?; + + // By default, dist/ is excluded, so only src/main.py should be checked + assert_cmd_snapshot!(case.command(), @r#" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors. + ty failed + Cause: error[invalid-glob]: Invalid include pattern + --> ty.toml:4:5 + | + 2 | [src] + 3 | include = [ + 4 | "src/**test/" + | ^^^^^^^^^^^^^ Too many stars at position 5 in glob: `src/**test/` + 5 | ] + | + "#); + + Ok(()) +} + +#[test] +fn invalid_include_pattern_concise_output() -> anyhow::Result<()> { + let case = CliTest::with_files([ + ( + "src/main.py", + r#" + print(undefined_var) # error: unresolved-reference + "#, + ), + ( + "ty.toml", + r#" + [src] + include = [ + "src/**test/" + ] + "#, + ), + ])?; + + // By default, dist/ is excluded, so only src/main.py should be checked + assert_cmd_snapshot!(case.command().arg("--output-format").arg("concise"), @r" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors. + ty failed + Cause: error[invalid-glob] ty.toml:4:5: Invalid include pattern: Too many stars at position 5 in glob: `src/**test/` + "); + + Ok(()) +} + +#[test] +fn invalid_exclude_pattern() -> anyhow::Result<()> { + let case = CliTest::with_files([ + ( + "src/main.py", + r#" + print(undefined_var) # error: unresolved-reference + "#, + ), + ( + "ty.toml", + r#" + [src] + exclude = [ + "../src" + ] + "#, + ), + ])?; + + // By default, dist/ is excluded, so only src/main.py should be checked + assert_cmd_snapshot!(case.command(), @r#" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + WARN ty is pre-release software and not ready for production use. Expect to encounter bugs, missing features, and fatal errors. + ty failed + Cause: error[invalid-glob]: Invalid exclude pattern + --> ty.toml:4:5 + | + 2 | [src] + 3 | exclude = [ + 4 | "../src" + | ^^^^^^^^ The parent directory operator (`..`) at position 1 is not allowed in glob: `../src` + 5 | ] + | + "#); + + Ok(()) +} diff --git a/crates/ty/tests/cli/main.rs b/crates/ty/tests/cli/main.rs index 023e14cefc..d314b4731a 100644 --- a/crates/ty/tests/cli/main.rs +++ b/crates/ty/tests/cli/main.rs @@ -1,5 +1,6 @@ mod config_option; mod exit_code; +mod file_selection; mod python_environment; mod rule_selection; diff --git a/crates/ty/tests/cli/rule_selection.rs b/crates/ty/tests/cli/rule_selection.rs index 9269790e3a..d60c229263 100644 --- a/crates/ty/tests/cli/rule_selection.rs +++ b/crates/ty/tests/cli/rule_selection.rs @@ -254,12 +254,12 @@ fn configuration_unknown_rules() -> anyhow::Result<()> { success: true exit_code: 0 ----- stdout ----- - warning[unknown-rule] + warning[unknown-rule]: Unknown lint rule `division-by-zer` --> pyproject.toml:3:1 | 2 | [tool.ty.rules] 3 | division-by-zer = "warn" # incorrect rule name - | ^^^^^^^^^^^^^^^ Unknown lint rule `division-by-zer` + | ^^^^^^^^^^^^^^^ | Found 1 diagnostic diff --git a/crates/ty_project/Cargo.toml b/crates/ty_project/Cargo.toml index e8df92d566..707c9915ae 100644 --- a/crates/ty_project/Cargo.toml +++ b/crates/ty_project/Cargo.toml @@ -24,10 +24,15 @@ ty_python_semantic = { workspace = true, features = ["serde"] } ty_vendored = { workspace = true } anyhow = { workspace = true } +camino = { workspace = true } +colored = { workspace = true } crossbeam = { workspace = true } +globset = { workspace = true } notify = { workspace = true } pep440_rs = { workspace = true, features = ["version-ranges"] } rayon = { workspace = true } +regex = { workspace = true } +regex-automata = { workspace = true } rustc-hash = { workspace = true } salsa = { workspace = true } schemars = { workspace = true, optional = true } diff --git a/crates/ty_project/src/db.rs b/crates/ty_project/src/db.rs index 9af4e7e50e..ddcf8b95ab 100644 --- a/crates/ty_project/src/db.rs +++ b/crates/ty_project/src/db.rs @@ -70,7 +70,10 @@ impl ProjectDatabase { let program_settings = project_metadata.to_program_settings(db.system()); Program::from_settings(&db, program_settings)?; - db.project = Some(Project::from_metadata(&db, project_metadata)); + db.project = Some( + Project::from_metadata(&db, project_metadata) + .map_err(|error| anyhow::anyhow!("{}", error.pretty(&db)))?, + ); Ok(db) } @@ -269,7 +272,7 @@ pub(crate) mod tests { project: None, }; - let project = Project::from_metadata(&db, project); + let project = Project::from_metadata(&db, project).unwrap(); db.project = Some(project); db } diff --git a/crates/ty_project/src/db/changes.rs b/crates/ty_project/src/db/changes.rs index de9dcd1b42..7a9c989821 100644 --- a/crates/ty_project/src/db/changes.rs +++ b/crates/ty_project/src/db/changes.rs @@ -9,6 +9,7 @@ use ruff_db::Db as _; use ruff_db::files::{File, Files}; use ruff_db::system::SystemPath; use rustc_hash::FxHashSet; +use salsa::Setter; use ty_python_semantic::Program; /// Represents the result of applying changes to the project database. @@ -113,14 +114,15 @@ impl ProjectDatabase { // should be included in the project. We can skip this check for // paths that aren't part of the project or shouldn't be included // when checking the project. - if project.is_path_included(self, &path) { - if self.system().is_file(&path) { + + if self.system().is_file(&path) { + if project.is_file_included(self, &path) { // Add the parent directory because `walkdir` always visits explicitly passed files // even if they match an exclude filter. added_paths.insert(path.parent().unwrap().to_path_buf()); - } else { - added_paths.insert(path); } + } else if project.is_directory_included(self, &path) { + added_paths.insert(path); } } @@ -153,7 +155,9 @@ impl ProjectDatabase { result.custom_stdlib_changed = true; } - if project.is_path_included(self, &path) || path == project_root { + let directory_included = project.is_directory_included(self, &path); + + if directory_included || path == project_root { // TODO: Shouldn't it be enough to simply traverse the project files and remove all // that start with the given path? tracing::debug!( @@ -165,6 +169,10 @@ impl ProjectDatabase { // indexed files and remove the once that start with the same path, unless // the deleted path is the project configuration. result.project_changed = true; + } else if !directory_included { + tracing::debug!( + "Skipping reload because directory '{path}' isn't included in the project" + ); } } } @@ -233,8 +241,22 @@ impl ProjectDatabase { tracing::debug!("Reloading project after structural change"); project.reload(self, metadata); } else { - tracing::debug!("Replace project after structural change"); - project = Project::from_metadata(self, metadata); + match Project::from_metadata(self, metadata) { + Ok(new_project) => { + tracing::debug!("Replace project after structural change"); + project = new_project; + } + Err(error) => { + tracing::error!( + "Keeping old project configuration because loading the new settings failed with: {error}" + ); + + project + .set_settings_diagnostics(self) + .to(vec![error.into_diagnostic()]); + } + } + self.project = Some(project); } } diff --git a/crates/ty_project/src/glob.rs b/crates/ty_project/src/glob.rs new file mode 100644 index 0000000000..2c93fc4716 --- /dev/null +++ b/crates/ty_project/src/glob.rs @@ -0,0 +1,89 @@ +use ruff_db::system::SystemPath; + +pub(crate) use exclude::{ExcludeFilter, ExcludeFilterBuilder}; +pub(crate) use include::{IncludeFilter, IncludeFilterBuilder}; +pub(crate) use portable::{AbsolutePortableGlobPattern, PortableGlobError, PortableGlobPattern}; + +mod exclude; +mod include; +mod portable; + +/// Path filtering based on an an exclude and include glob pattern set. +/// +/// Exclude patterns take precedence over includes. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct IncludeExcludeFilter { + include: IncludeFilter, + exclude: ExcludeFilter, +} + +impl IncludeExcludeFilter { + pub(crate) fn new(include: IncludeFilter, exclude: ExcludeFilter) -> Self { + Self { include, exclude } + } + + /// Returns whether this directory is included in this filter. + /// + /// Note, this function never returns [`IncludeResult::Included`] for a path that is not included or excluded. + /// However, it may return [`IncludeResult::Included`] for directories that are not excluded, but where + /// it requires traversal to decide if any of its subdirectories or files are included. This, for example, + /// is the case when using wildcard include-patterns like `**/test`. Prefix wildcards require to traverse `src` + /// because it can't be known ahead of time whether it contains a `test` directory or file. + pub(crate) fn is_directory_maybe_included( + &self, + path: &SystemPath, + mode: GlobFilterCheckMode, + ) -> IncludeResult { + if self.exclude.match_directory(path, mode) { + IncludeResult::Excluded + } else if self.include.match_directory(path) { + IncludeResult::Included + } else { + IncludeResult::NotIncluded + } + } + + pub(crate) fn is_file_included( + &self, + path: &SystemPath, + mode: GlobFilterCheckMode, + ) -> IncludeResult { + if self.exclude.match_file(path, mode) { + IncludeResult::Excluded + } else if self.include.match_file(path) { + IncludeResult::Included + } else { + IncludeResult::NotIncluded + } + } +} + +#[derive(Copy, Clone, Eq, PartialEq)] +pub(crate) enum GlobFilterCheckMode { + /// The paths are checked top-to-bottom and inclusion is determined + /// for each path during the traversal. + TopDown, + + /// An adhoc test if a single file or directory is included. + /// + /// This is more expensive than a [`Self::TopDown`] check + /// because it may require testing every ancestor path in addition to the + /// path itself to ensure no ancestor path matches an exclude rule. + Adhoc, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub(crate) enum IncludeResult { + /// The path matches or at least is a prefix of an include pattern. + /// + /// For directories: This isn't a guarantee that any file in this directory gets included + /// but we need to traverse it to make this decision. + Included, + + /// The path matches an exclude pattern. + Excluded, + + /// The path matches neither an include nor an exclude pattern and, therefore, + /// isn't included. + NotIncluded, +} diff --git a/crates/ty_project/src/glob/exclude.rs b/crates/ty_project/src/glob/exclude.rs new file mode 100644 index 0000000000..1e6813a00c --- /dev/null +++ b/crates/ty_project/src/glob/exclude.rs @@ -0,0 +1,285 @@ +//! Exclude filter supporting gitignore-like globs. +//! +//! * `src` excludes a file or directory named `src` anywhere in the path. +//! * `/src/` excludes a directory named `src` at the root of the path. +//! * `/src` excludes a directory or file named `src` at the root of the path. +//! * `/src/**` excludes all files and directories inside a directory named `src` but not `src` itself. +//! * `!src` allows a file or directory named `src` anywhere in the path + +use std::sync::Arc; + +use globset::{Candidate, GlobBuilder, GlobSet, GlobSetBuilder}; +use regex_automata::util::pool::Pool; +use ruff_db::system::SystemPath; + +use crate::GlobFilterCheckMode; +use crate::glob::portable::AbsolutePortableGlobPattern; + +/// A filter for gitignore-like globs that excludes files and directories. +/// +/// # Equality +/// +/// Two filters are equal if they're constructed from the same patterns (including order). +/// Two filters that exclude the exact same files but were constructed from different patterns aren't considered +/// equal. +#[derive(Clone, Debug, PartialEq, Eq)] +pub(crate) struct ExcludeFilter { + ignore: Gitignore, +} + +impl ExcludeFilter { + /// Returns `true` if the path to a directory is definitely excluded and `false` otherwise. + pub(crate) fn match_directory(&self, path: &SystemPath, mode: GlobFilterCheckMode) -> bool { + self.matches(path, mode, true) + } + + /// Returns `true` if the path to a file is definitely excluded and `false` otherwise. + pub(crate) fn match_file(&self, path: &SystemPath, mode: GlobFilterCheckMode) -> bool { + self.matches(path, mode, false) + } + + fn matches(&self, path: &SystemPath, mode: GlobFilterCheckMode, directory: bool) -> bool { + match mode { + GlobFilterCheckMode::TopDown => { + match self.ignore.matched(path, directory) { + // No hit or an allow hit means the file or directory is not excluded. + Match::None | Match::Allow => false, + Match::Ignore => true, + } + } + GlobFilterCheckMode::Adhoc => { + for ancestor in path.ancestors() { + match self.ignore.matched(ancestor, directory) { + // If the path is allowlisted or there's no hit, try the parent to ensure we don't return false + // for a folder where there's an exclude for a parent. + Match::None | Match::Allow => {} + Match::Ignore => return true, + } + } + + false + } + } + } +} + +pub(crate) struct ExcludeFilterBuilder { + ignore: GitignoreBuilder, +} + +impl ExcludeFilterBuilder { + pub(crate) fn new() -> Self { + Self { + ignore: GitignoreBuilder::new(), + } + } + + pub(crate) fn add( + &mut self, + pattern: &AbsolutePortableGlobPattern, + ) -> Result<&mut Self, globset::Error> { + self.ignore.add(pattern)?; + + Ok(self) + } + + pub(crate) fn build(self) -> Result { + Ok(ExcludeFilter { + ignore: self.ignore.build()?, + }) + } +} + +/// Matcher for gitignore like globs. +/// +/// This code is our own vendored copy of the ignore's crate `Gitignore` type. +/// The main difference to `ignore`'s version is that it makes use +/// of the fact that all our globs are absolute. This simplifies the implementation a fair bit. +/// Making globs absolute is also because the globs can come from both the CLI and configuration files, +/// where the paths are anchored relative to the current working directory or the project root respectively. +/// +/// Vendoring our own copy has the added benefit that we don't need to deal with ignore's `Error` type. +/// Instead, we can exclusively use [`globset::Error`]. +/// +/// This implementation also removes supported for comments, because the patterns aren't read +/// from a `.gitignore` file. This removes the need to escape `#` for file names starting with `#`, +/// +/// You can find the original source on [GitHub](https://github.com/BurntSushi/ripgrep/blob/cbc598f245f3c157a872b69102653e2e349b6d92/crates/ignore/src/gitignore.rs#L81). +/// +/// # Equality +/// +/// Two ignore matches are only equal if they're constructed from the same patterns (including order). +/// Two matchers that were constructed from different patterns but result in +/// including the same files don't compare equal. +#[derive(Clone)] +struct Gitignore { + set: GlobSet, + globs: Vec, + matches: Option>>>, +} + +impl Gitignore { + /// Returns whether the given path (file or directory) matched a pattern in + /// this gitignore matcher. + /// + /// `is_dir` should be true if the path refers to a directory and false + /// otherwise. + /// + /// The path must be absolute or it will only match prefix-wildcard patterns. + fn matched(&self, path: &SystemPath, is_dir: bool) -> Match { + if self.globs.is_empty() { + return Match::None; + } + + let mut matches = self.matches.as_ref().unwrap().get(); + let candidate = Candidate::new(path); + self.set.matches_candidate_into(&candidate, &mut matches); + for &i in matches.iter().rev() { + let glob = &self.globs[i]; + if !glob.is_only_dir || is_dir { + return if glob.is_ignore() { + Match::Ignore + } else { + Match::Allow + }; + } + } + Match::None + } +} + +impl std::fmt::Debug for Gitignore { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Gitignore") + .field("globs", &self.globs) + .finish_non_exhaustive() + } +} + +impl PartialEq for Gitignore { + fn eq(&self, other: &Self) -> bool { + self.globs == other.globs + } +} + +impl Eq for Gitignore {} + +#[derive(Copy, Clone, Debug)] +enum Match { + /// The path matches no pattern. + None, + + /// The path matches an ignore pattern (a positive pattern) + /// It should be ignored. + Ignore, + + /// The path matches an allow pattern (a negative pattern). + /// It should not be ignored. + Allow, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct IgnoreGlob { + /// The pattern that was originally parsed. + original: String, + + /// This is a pattern allowing a path (it starts with a `!`, possibly undoing a previous ignore) + is_allow: bool, + + /// Whether this pattern only matches directories. + is_only_dir: bool, +} + +impl IgnoreGlob { + const fn is_ignore(&self) -> bool { + !self.is_allow + } +} + +/// Builds a matcher for git-ignore like globs. +/// +/// All globs need to use absolute paths, unless they're unanchored (contain no `/`). +#[derive(Clone, Debug)] +struct GitignoreBuilder { + builder: GlobSetBuilder, + globs: Vec, +} + +impl GitignoreBuilder { + /// Create a new builder for a gitignore file. + fn new() -> GitignoreBuilder { + GitignoreBuilder { + builder: GlobSetBuilder::new(), + globs: vec![], + } + } + + /// Builds a new matcher from the globs added so far. + /// + /// Once a matcher is built, no new globs can be added to it. + fn build(&self) -> Result { + let set = self.builder.build()?; + + Ok(Gitignore { + set, + globs: self.globs.clone(), + matches: Some(Arc::new(Pool::new(Vec::new))), + }) + } + + /// Adds a gitignore like glob pattern to this builder. + /// + /// If the pattern could not be parsed as a glob, then an error is returned. + fn add(&mut self, mut pattern: &str) -> Result<&mut GitignoreBuilder, globset::Error> { + let mut glob = IgnoreGlob { + original: pattern.to_string(), + is_allow: false, + is_only_dir: false, + }; + + // File names starting with `!` are escaped with a backslash. Strip the backslash. + // This is not a negated pattern! + if pattern.starts_with("\\!") { + pattern = &pattern[1..]; + } else if let Some(after) = pattern.strip_prefix("!") { + glob.is_allow = true; + pattern = after; + } + + // If it ends with a slash, then this should only match directories, + // but the slash should otherwise not be used while globbing. + if let Some(before) = pattern.strip_suffix('/') { + glob.is_only_dir = true; + pattern = before; + } + + let mut actual = pattern.to_string(); + + // If there is a literal slash, then this is a glob that must match the + // entire path name. Otherwise, we should let it match anywhere, so use + // a **/ prefix. + if !pattern.chars().any(|c| c == '/') { + // ... but only if we don't already have a **/ prefix. + if !pattern.starts_with("**/") { + actual = format!("**/{actual}"); + } + } + // If the glob ends with `/**`, then we should only match everything + // inside a directory, but not the directory itself. Standard globs + // will match the directory. So we add `/*` to force the issue. + if actual.ends_with("/**") { + actual = format!("{actual}/*"); + } + + let parsed = GlobBuilder::new(&actual) + .literal_separator(true) + // No need to support Windows-style paths, so the backslash can be used an escape. + .backslash_escape(true) + .build()?; + + self.builder.add(parsed); + self.globs.push(glob); + + Ok(self) + } +} diff --git a/crates/ty_project/src/glob/include.rs b/crates/ty_project/src/glob/include.rs new file mode 100644 index 0000000000..b6365cf627 --- /dev/null +++ b/crates/ty_project/src/glob/include.rs @@ -0,0 +1,399 @@ +use globset::{Glob, GlobBuilder, GlobSet, GlobSetBuilder}; +use regex_automata::dfa; +use regex_automata::dfa::Automaton; +use ruff_db::system::SystemPath; +use std::path::{MAIN_SEPARATOR, MAIN_SEPARATOR_STR}; +use tracing::warn; + +use crate::glob::portable::AbsolutePortableGlobPattern; + +/// Chosen at a whim -Konsti +const DFA_SIZE_LIMIT: usize = 1_000_000; + +/// Path filter based on a set of include globs. +/// +/// The patterns are similar to gitignore, but reversed: +/// +/// * `/src`: matches a file or directory with its content named `src` +/// * `/src/`: matches a directory with its content named `src` +/// * `/src/**` or `/src/*`: matches the content of `src`, but not a file named `src` +/// +/// Negated patterns are not supported. +/// +/// Internally, the globs are converted to a regex and then to a DFA, which unlike the globs and the +/// regex allows to check for prefix matches. +/// +/// ## Equality +/// Equality is based on the patterns from which a filter was constructed. +/// +/// Because of that, two filters that include the exact same files but were +/// constructed from different patterns (or even just order) compare unequal. +#[derive(Clone)] +pub(crate) struct IncludeFilter { + glob_set: GlobSet, + original_patterns: Box<[String]>, + dfa: Option>>, +} + +impl IncludeFilter { + /// Whether the file matches any of the globs. + pub(crate) fn match_file(&self, path: impl AsRef) -> bool { + let path = path.as_ref(); + + self.glob_set.is_match(path) + } + + /// Check whether a directory or any of its children can be matched by any of the globs. + /// + /// This never returns `false` if any child matches, but it may return `true` even if we + /// don't end up including any child. + pub(crate) fn match_directory(&self, path: impl AsRef) -> bool { + self.match_directory_impl(path.as_ref()) + } + + fn match_directory_impl(&self, path: &SystemPath) -> bool { + let Some(dfa) = &self.dfa else { + return true; + }; + + // Allow the root path + if path == SystemPath::new("") { + return true; + } + + let config_anchored = + regex_automata::util::start::Config::new().anchored(regex_automata::Anchored::Yes); + let mut state = dfa.start_state(&config_anchored).unwrap(); + + let byte_path = path + .as_str() + .strip_suffix('/') + .unwrap_or(path.as_str()) + .as_bytes(); + for b in byte_path { + state = dfa.next_state(state, *b); + } + // Say we're looking at a directory `foo/bar`. We want to continue if either `foo/bar` is + // a match, e.g., from `foo/*`, or a path below it can match, e.g., from `foo/bar/*`. + let eoi_state = dfa.next_eoi_state(state); + // We must not call `next_eoi_state` on the slash state, we want to only check if more + // characters (path components) are allowed, not if we're matching the `$` anchor at the + // end. + let slash_state = dfa.next_state(state, u8::try_from(MAIN_SEPARATOR).unwrap()); + + debug_assert!( + !dfa.is_quit_state(eoi_state) && !dfa.is_quit_state(slash_state), + "matcher is in quit state" + ); + + dfa.is_match_state(eoi_state) || !dfa.is_dead_state(slash_state) + } +} + +impl std::fmt::Debug for IncludeFilter { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("IncludeFilder") + .field("original_patterns", &self.original_patterns) + .finish_non_exhaustive() + } +} + +impl PartialEq for IncludeFilter { + fn eq(&self, other: &Self) -> bool { + self.original_patterns == other.original_patterns + } +} + +impl Eq for IncludeFilter {} + +#[derive(Debug)] +pub(crate) struct IncludeFilterBuilder { + set: GlobSetBuilder, + original_pattern: Vec, + regexes: Vec, +} + +impl IncludeFilterBuilder { + pub(crate) fn new() -> Self { + Self { + set: GlobSetBuilder::new(), + original_pattern: Vec::new(), + regexes: Vec::new(), + } + } + + /// Adds an include pattern to the filter. + pub(crate) fn add( + &mut self, + input: &AbsolutePortableGlobPattern, + ) -> Result<&mut Self, globset::Error> { + let mut glob = &**input; + + let mut only_directory = false; + + // A pattern ending with a `/` should only match directories. E.g. `src/` only matches directories + // whereas `src` matches both files and directories. + // We need to remove the `/` to ensure that a path missing the trailing `/` matches. + if let Some(after) = input.strip_suffix('/') { + // Escaped `/` or `\` aren't allowed. `portable_glob::parse` will error + only_directory = true; + glob = after; + } + + // If regex ends with `/**`, only push that one glob and regex + // Otherwise, push two regex, one for `/**` and one for without + let glob = GlobBuilder::new(glob) + .literal_separator(true) + // No need to support Windows-style paths, so the backslash can be used a escape. + .backslash_escape(true) + .build()?; + self.original_pattern.push(input.to_string()); + + // `lib` is the same as `lib/**` + // Add a glob that matches `lib` exactly, change the glob to `lib/**`. + if input.ends_with("**") { + self.push_prefix_regex(&glob); + self.set.add(glob); + } else { + let prefix_glob = GlobBuilder::new(&format!("{glob}/**")) + .literal_separator(true) + // No need to support Windows-style paths, so the backslash can be used a escape. + .backslash_escape(true) + .build()?; + + self.push_prefix_regex(&prefix_glob); + self.set.add(prefix_glob); + + // The reason we add the exact glob, e.g. `src` when the original pattern was `src/` is + // so that `match_file` returns true when matching against a file. However, we don't + // need to do this if this is a pattern that should only match a directory (specifically, its contents). + if !only_directory { + self.set.add(glob); + } + } + + Ok(self) + } + + fn push_prefix_regex(&mut self, glob: &Glob) { + let main_separator = regex::escape(MAIN_SEPARATOR_STR); + + let regex = glob + .regex() + // We are using a custom DFA builder + .strip_prefix("(?-u)") + .expect("a glob is a non-unicode byte regex") + // Match windows paths if applicable + .replace('/', &main_separator); + + self.regexes.push(regex); + } + + /// The filter matches if any of the globs matches. + /// + /// See for the error returned. + pub(crate) fn build(self) -> Result { + let glob_set = self.set.build()?; + + let dfa_builder = dfa::dense::Builder::new() + .syntax( + // The glob regex is a byte matcher + regex_automata::util::syntax::Config::new() + .unicode(false) + .utf8(false), + ) + .configure( + dfa::dense::Config::new() + .start_kind(dfa::StartKind::Anchored) + // DFA can grow exponentially, in which case we bail out + .dfa_size_limit(Some(DFA_SIZE_LIMIT)) + .determinize_size_limit(Some(DFA_SIZE_LIMIT)), + ) + .build_many(&self.regexes); + let dfa = if let Ok(dfa) = dfa_builder { + Some(dfa) + } else { + // TODO(konsti): `regex_automata::dfa::dense::BuildError` should allow asking whether + // is a size error + warn!( + "Glob expressions regex is larger than {DFA_SIZE_LIMIT} bytes, \ + falling back to full directory traversal!" + ); + None + }; + + Ok(IncludeFilter { + glob_set, + dfa, + original_patterns: self.original_pattern.into(), + }) + } +} + +#[cfg(test)] +mod tests { + use std::path::{MAIN_SEPARATOR, MAIN_SEPARATOR_STR}; + + use crate::glob::PortableGlobPattern; + use crate::glob::include::{IncludeFilter, IncludeFilterBuilder}; + use ruff_db::system::{MemoryFileSystem, walk_directory::WalkState}; + + fn create_filter(patterns: impl IntoIterator) -> IncludeFilter { + let mut builder = IncludeFilterBuilder::new(); + for pattern in patterns { + builder + .add( + &PortableGlobPattern::parse(pattern, false) + .unwrap() + .into_absolute(""), + ) + .unwrap(); + } + + builder.build().unwrap() + } + + fn setup_files(files: impl IntoIterator) -> MemoryFileSystem { + let fs = MemoryFileSystem::new(); + + fs.write_files_all(files.into_iter().map(|name| (name, ""))) + .unwrap(); + fs + } + + #[track_caller] + fn assert_match_directory(filter: &IncludeFilter, path: &str) { + assert!(filter.match_directory(path.replace('/', MAIN_SEPARATOR_STR))); + } + + #[track_caller] + fn assert_not_match_directory(filter: &IncludeFilter, path: &str) { + assert!(!filter.match_directory(path.replace('/', MAIN_SEPARATOR_STR))); + } + + #[test] + fn match_directory() { + // `lib` is the same as `src/**`. It includes a file or directory (including its contents) + // `src/*`: The same as `src/**` + let filter = create_filter(["lib", "src/*", "tests/**", "a/test-*/b", "files/*.py"]); + + assert_match_directory(&filter, "lib"); + assert_match_directory(&filter, "lib/more/test"); + + assert_match_directory(&filter, "src"); + assert_match_directory(&filter, "src/more/test"); + + assert_match_directory(&filter, "tests"); + assert_match_directory(&filter, "tests/more/test"); + + assert_match_directory(&filter, "a"); + assert_match_directory(&filter, "a/test-b"); + + assert_not_match_directory(&filter, "a/test-b/x"); + assert_not_match_directory(&filter, "a/test"); + + assert_match_directory(&filter, "files/a.py"); + assert_match_directory(&filter, "files/a.py/bcd"); + + assert_not_match_directory(&filter, "not_included"); + assert_not_match_directory(&filter, "files/a.pi"); + } + + #[test] + fn match_file() { + // `lib` is the same as `src/**`. It includes a file or directory (including its contents) + // `src/*`: The same as `src/**` + let filter = create_filter([ + "lib", + "src/*", + "directory/", + "tests/**", + "a/test-*/b", + "files/*.py", + ]); + + assert!(filter.match_file("lib")); + assert!(filter.match_file("lib/more/test")); + + // Unlike `directory`, `directory/` only includes a directory with the given name and its contents + assert!(!filter.match_file("directory")); + assert!(filter.match_file("directory/more/test")); + + // Unlike `src`, `src/*` only includes a directory with the given name. + assert!(!filter.match_file("src")); + assert!(filter.match_file("src/more/test")); + + // Unlike `tests`, `tests/**` only includes files under `tests`, but not a file named tests + assert!(!filter.match_file("tests")); + assert!(filter.match_file("tests/more/test")); + + // Unlike `match_directory`, prefixes should not be included. + assert!(!filter.match_file("a")); + assert!(!filter.match_file("a/test-b")); + + assert!(!filter.match_file("a/test-b/x")); + assert!(!filter.match_file("a/test")); + + assert!(filter.match_file("files/a.py")); + assert!(filter.match_file("files/a.py/bcd")); + + assert!(!filter.match_file("not_included")); + assert!(!filter.match_file("files/a.pi")); + } + + /// Check that we skip directories that can never match. + #[test] + fn prefilter() { + let filter = create_filter(["/a/b/test-*/d", "/a/b/c/e", "/b/c"]); + let fs = setup_files([ + // Should visit + "/a/b/test-a/d", + "/a/b/c/e", + "/b/c", + // Can skip + "/d/e", + "/a/b/x/f", + ]); + + let visited = std::sync::Mutex::new(Vec::new()); + + // Test the prefix filtering + fs.walk_directory("/").run(|| { + Box::new(|entry| { + let entry = entry.unwrap(); + + if entry.file_type().is_directory() { + if !filter.match_directory(entry.path()) { + return WalkState::Skip; + } + } + + visited + .lock() + .unwrap() + .push(entry.path().as_str().replace(MAIN_SEPARATOR, "/")); + + WalkState::Continue + }) + }); + + let mut visited = visited.into_inner().unwrap(); + visited.sort(); + + // Assert that it didn't traverse into `/d` or `/a/b/x` + assert_eq!( + visited, + [ + "/", + "/a", + "/a/b", + "/a/b/c", + "/a/b/c/e", + "/a/b/test-a", + "/a/b/test-a/d", + "/b", + "/b/c" + ] + ); + } +} diff --git a/crates/ty_project/src/glob/portable.rs b/crates/ty_project/src/glob/portable.rs new file mode 100644 index 0000000000..8de8d91188 --- /dev/null +++ b/crates/ty_project/src/glob/portable.rs @@ -0,0 +1,407 @@ +//! Cross-language glob syntax from +//! [PEP 639](https://packaging.python.org/en/latest/specifications/glob-patterns/). +//! +//! The glob syntax matches the `uv` variant of uv's `uv-globfilter` crate. +//! We intentionally use the same syntax to give users a consistent experience +//! across our tools. +//! +//! [Source](https://github.com/astral-sh/uv/blob/main/crates/uv-globfilter/src/portable_glob.rs) + +use ruff_db::system::SystemPath; +use std::ops::Deref; +use std::{fmt::Write, path::MAIN_SEPARATOR}; +use thiserror::Error; + +/// Pattern that only uses cross-language glob syntax based on [PEP 639](https://packaging.python.org/en/latest/specifications/glob-patterns/): +/// +/// - Alphanumeric characters, underscores (`_`), hyphens (`-`) and dots (`.`) are matched verbatim. +/// - The special glob characters are: +/// - `*`: Matches any number of characters except path separators +/// - `?`: Matches a single character except the path separator +/// - `**`: Matches any number of characters including path separators +/// - `[]`, containing only the verbatim matched characters: Matches a single of the characters contained. Within +/// `[...]`, the hyphen indicates a locale-agnostic range (e.g. `a-z`, order based on Unicode code points). Hyphens at +/// the start or end are matched literally. +/// - `\`: It escapes the following character to be matched verbatim (extension to PEP 639). +/// - The path separator is the forward slash character (`/`). Patterns are relative to the given directory, a leading slash +/// character for absolute paths is not supported. +/// - Parent directory indicators (`..`) are not allowed. +/// +/// These rules mean that matching the backslash (`\`) is forbidden, which avoid collisions with the windows path separator. +#[derive(Clone, Debug, Eq, PartialEq, Hash)] +pub(crate) struct PortableGlobPattern<'a> { + pattern: &'a str, + is_exclude: bool, +} + +impl<'a> PortableGlobPattern<'a> { + /// Parses a portable glob pattern. Returns an error if the pattern isn't valid. + pub(crate) fn parse(glob: &'a str, is_exclude: bool) -> Result { + let mut chars = glob.chars().enumerate().peekable(); + + if is_exclude { + chars.next_if(|(_, c)| *c == '!'); + } + + // A `..` is on a parent directory indicator at the start of the string or after a directory + // separator. + let mut start_or_slash = true; + // The number of consecutive stars before the current character. + while let Some((offset, c)) = chars.next() { + let pos = offset + 1; + + // `***` or `**literals` can be correctly represented with less stars. They are banned by + // `glob`, they are allowed by `globset` and PEP 639 is ambiguous, so we're filtering them + // out. + if c == '*' { + let mut star_run = 1; + while let Some((_, c)) = chars.peek() { + if *c == '*' { + star_run += 1; + chars.next(); + } else { + break; + } + } + if star_run >= 3 { + return Err(PortableGlobError::TooManyStars { + glob: glob.to_string(), + // We don't update pos for the stars. + pos, + }); + } else if star_run == 2 { + if chars.peek().is_some_and(|(_, c)| *c != '/') { + return Err(PortableGlobError::TooManyStars { + glob: glob.to_string(), + // We don't update pos for the stars. + pos, + }); + } + } + start_or_slash = false; + } else if c.is_alphanumeric() || matches!(c, '_' | '-' | '?') { + start_or_slash = false; + } else if c == '.' { + if start_or_slash && matches!(chars.peek(), Some((_, '.'))) { + return Err(PortableGlobError::ParentDirectory { + pos, + glob: glob.to_string(), + }); + } + start_or_slash = false; + } else if c == '/' { + start_or_slash = true; + } else if c == '[' { + for (pos, c) in chars.by_ref() { + if c.is_alphanumeric() || matches!(c, '_' | '-' | '.') { + // Allowed. + } else if c == ']' { + break; + } else { + return Err(PortableGlobError::InvalidCharacterRange { + glob: glob.to_string(), + pos, + invalid: InvalidChar(c), + }); + } + } + start_or_slash = false; + } else if c == '\\' { + match chars.next() { + Some((pos, '/' | '\\')) => { + // For cross-platform compatibility, we don't allow forward slashes or + // backslashes to be escaped. + return Err(PortableGlobError::InvalidEscapee { + glob: glob.to_string(), + pos, + }); + } + Some(_) => { + // Escaped character + } + None => { + return Err(PortableGlobError::TrailingEscape { + glob: glob.to_string(), + pos, + }); + } + } + } else { + return Err(PortableGlobError::InvalidCharacter { + glob: glob.to_string(), + pos, + invalid: InvalidChar(c), + }); + } + } + Ok(PortableGlobPattern { + pattern: glob, + is_exclude, + }) + } + + /// Anchors pattern at `cwd`. + /// + /// `is_exclude` indicates whether this is a pattern in an exclude filter. + /// + /// This method similar to [`SystemPath::absolute`] but for a glob pattern. + /// The main difference is that this method always uses `/` as path separator. + pub(crate) fn into_absolute(self, cwd: impl AsRef) -> AbsolutePortableGlobPattern { + let mut pattern = self.pattern; + let mut negated = false; + + if self.is_exclude { + // If the pattern starts with `!`, we need to remove it and then anchor the rest. + if let Some(after) = self.pattern.strip_prefix('!') { + pattern = after; + negated = true; + } + + // Patterns that don't contain any `/`, e.g. `.venv` are unanchored patterns + // that match anywhere. + if !self.chars().any(|c| c == '/') { + return AbsolutePortableGlobPattern(self.to_string()); + } + } + + if pattern.starts_with('/') { + return AbsolutePortableGlobPattern(pattern.to_string()); + } + + let mut rest = pattern; + let mut prefix = cwd.as_ref().to_path_buf().into_utf8_path_buf(); + + loop { + if let Some(after) = rest.strip_prefix("./") { + rest = after; + } else if let Some(after) = rest.strip_prefix("../") { + prefix.pop(); + rest = after; + } else { + break; + } + } + + let mut output = String::with_capacity(prefix.as_str().len() + rest.len()); + + for component in prefix.components() { + match component { + camino::Utf8Component::Prefix(utf8_prefix_component) => { + output.push_str(&utf8_prefix_component.as_str().replace(MAIN_SEPARATOR, "/")); + } + + camino::Utf8Component::RootDir => { + output.push('/'); + continue; + } + camino::Utf8Component::CurDir => {} + camino::Utf8Component::ParentDir => output.push_str("../"), + camino::Utf8Component::Normal(component) => { + output.push_str(component); + output.push('/'); + } + } + } + + output.push_str(rest); + if negated { + // If the pattern is negated, we need to keep the leading `!`. + AbsolutePortableGlobPattern(format!("!{output}")) + } else { + AbsolutePortableGlobPattern(output) + } + } +} + +impl Deref for PortableGlobPattern<'_> { + type Target = str; + + fn deref(&self) -> &Self::Target { + self.pattern + } +} + +/// A portable glob pattern that uses absolute paths. +/// +/// E.g., `./src/**` becomes `/root/src/**` when anchored to `/root`. +#[derive(Debug, Eq, PartialEq, Hash)] +pub(crate) struct AbsolutePortableGlobPattern(String); + +impl Deref for AbsolutePortableGlobPattern { + type Target = str; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +#[derive(Debug, Error)] +pub(crate) enum PortableGlobError { + /// Shows the failing glob in the error message. + #[error(transparent)] + GlobError(#[from] globset::Error), + + #[error( + "The parent directory operator (`..`) at position {pos} is not allowed in glob: `{glob}`" + )] + ParentDirectory { glob: String, pos: usize }, + + #[error( + "Invalid character `{invalid}` at position {pos} in glob: `{glob}`. hint: Characters can be escaped with a backslash" + )] + InvalidCharacter { + glob: String, + pos: usize, + invalid: InvalidChar, + }, + + #[error( + "Path separators can't be escaped, invalid character at position {pos} in glob: `{glob}`" + )] + InvalidEscapee { glob: String, pos: usize }, + + #[error("Invalid character `{invalid}` in range at position {pos} in glob: `{glob}`")] + InvalidCharacterRange { + glob: String, + pos: usize, + invalid: InvalidChar, + }, + + #[error("Too many stars at position {pos} in glob: `{glob}`")] + TooManyStars { glob: String, pos: usize }, + + #[error("Trailing backslash at position {pos} in glob: `{glob}`")] + TrailingEscape { glob: String, pos: usize }, +} + +#[derive(Copy, Clone, Debug)] +pub(crate) struct InvalidChar(pub char); + +impl std::fmt::Display for InvalidChar { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self.0 { + '\'' => f.write_char('\''), + c => c.escape_debug().fmt(f), + } + } +} + +#[cfg(test)] +mod tests { + + use crate::glob::PortableGlobPattern; + use insta::assert_snapshot; + use ruff_db::system::SystemPath; + + #[test] + fn test_error() { + #[track_caller] + fn parse_err(glob: &str) -> String { + let error = PortableGlobPattern::parse(glob, true).unwrap_err(); + error.to_string() + } + + assert_snapshot!( + parse_err(".."), + @"The parent directory operator (`..`) at position 1 is not allowed in glob: `..`" + ); + assert_snapshot!( + parse_err("licenses/.."), + @"The parent directory operator (`..`) at position 10 is not allowed in glob: `licenses/..`" + ); + assert_snapshot!( + parse_err("licenses/LICEN!E.txt"), + @"Invalid character `!` at position 15 in glob: `licenses/LICEN!E.txt`. hint: Characters can be escaped with a backslash" + ); + assert_snapshot!( + parse_err("licenses/LICEN[!C]E.txt"), + @"Invalid character `!` in range at position 15 in glob: `licenses/LICEN[!C]E.txt`" + ); + assert_snapshot!( + parse_err("licenses/LICEN[C?]E.txt"), + @"Invalid character `?` in range at position 16 in glob: `licenses/LICEN[C?]E.txt`" + ); + assert_snapshot!( + parse_err("******"), + @"Too many stars at position 1 in glob: `******`" + ); + assert_snapshot!( + parse_err("licenses/**license"), + @"Too many stars at position 10 in glob: `licenses/**license`" + ); + assert_snapshot!( + parse_err("licenses/***/licenses.csv"), + @"Too many stars at position 10 in glob: `licenses/***/licenses.csv`" + ); + assert_snapshot!( + parse_err(r"**/@test"), + @"Invalid character `@` at position 4 in glob: `**/@test`. hint: Characters can be escaped with a backslash" + ); + // Escapes are not allowed in strict PEP 639 mode + assert_snapshot!( + parse_err(r"public domain/Gulliver\\’s Travels.txt"), + @r"Invalid character ` ` at position 7 in glob: `public domain/Gulliver\\’s Travels.txt`. hint: Characters can be escaped with a backslash" + ); + assert_snapshot!( + parse_err(r"**/@test"), + @"Invalid character `@` at position 4 in glob: `**/@test`. hint: Characters can be escaped with a backslash" + ); + // Escaping slashes is not allowed. + assert_snapshot!( + parse_err(r"licenses\\MIT.txt"), + @r"Path separators can't be escaped, invalid character at position 9 in glob: `licenses\\MIT.txt`" + ); + assert_snapshot!( + parse_err(r"licenses\/MIT.txt"), + @r"Path separators can't be escaped, invalid character at position 9 in glob: `licenses\/MIT.txt`" + ); + } + + #[test] + fn test_valid() { + let cases = [ + r"licenses/*.txt", + r"licenses/**/*.txt", + r"LICEN[CS]E.txt", + r"LICEN?E.txt", + r"[a-z].txt", + r"[a-z._-].txt", + r"*/**", + r"LICENSE..txt", + r"LICENSE_file-1.txt", + // (google translate) + r"licenses/라이센스*.txt", + r"licenses/ライセンス*.txt", + r"licenses/执照*.txt", + r"src/**", + ]; + let cases_uv = [ + r"public-domain/Gulliver\’s\ Travels.txt", + // https://github.com/astral-sh/uv/issues/13280 + r"**/\@test", + ]; + for case in cases.iter().chain(cases_uv.iter()) { + PortableGlobPattern::parse(case, true).unwrap(); + } + } + + #[track_caller] + fn assert_absolute_path(pattern: &str, relative_to: impl AsRef, expected: &str) { + let pattern = PortableGlobPattern::parse(pattern, true).unwrap(); + let absolute = pattern.into_absolute(relative_to); + assert_eq!(&*absolute, expected); + } + + #[test] + fn absolute_pattern() { + assert_absolute_path("/src", "/root", "/src"); + assert_absolute_path("./src", "/root", "/root/src"); + } + + #[test] + #[cfg(windows)] + fn absolute_pattern_windows() { + assert_absolute_path("./src", r"C:\root", "C:/root/src"); + assert_absolute_path("./src", r"\\server\test", "//server/test/src"); + } +} diff --git a/crates/ty_project/src/lib.rs b/crates/ty_project/src/lib.rs index b57608d5f5..bc9daff46e 100644 --- a/crates/ty_project/src/lib.rs +++ b/crates/ty_project/src/lib.rs @@ -1,6 +1,5 @@ -#![allow(clippy::ref_option)] - -use crate::metadata::options::OptionDiagnostic; +use crate::glob::{GlobFilterCheckMode, IncludeResult}; +use crate::metadata::options::{OptionDiagnostic, ToSettingsError}; use crate::walk::{ProjectFilesFilter, ProjectFilesWalker}; pub use db::{Db, ProjectDatabase}; use files::{Index, Indexed, IndexedFiles}; @@ -30,6 +29,7 @@ pub mod combine; mod db; mod files; +mod glob; pub mod metadata; mod walk; pub mod watch; @@ -70,12 +70,20 @@ pub struct Project { file_set: IndexedFiles, /// The metadata describing the project, including the unresolved options. - #[returns(ref)] - pub metadata: ProjectMetadata, + /// + /// We box the metadata here because it's a fairly large type and + /// reducing the size of `Project` helps reduce the size of the + /// salsa allocated table for `Project`. + #[returns(deref)] + pub metadata: Box, /// The resolved project settings. - #[returns(ref)] - pub settings: Settings, + /// + /// We box the metadata here because it's a fairly large type and + /// reducing the size of `Project` helps reduce the size of the + /// salsa allocated table for `Project`. + #[returns(deref)] + pub settings: Box, /// The paths that should be included when checking this project. /// @@ -126,14 +134,16 @@ impl Reporter for DummyReporter { #[salsa::tracked] impl Project { - pub fn from_metadata(db: &dyn Db, metadata: ProjectMetadata) -> Self { - let (settings, settings_diagnostics) = metadata.options().to_settings(db); + pub fn from_metadata(db: &dyn Db, metadata: ProjectMetadata) -> Result { + let (settings, diagnostics) = metadata.options().to_settings(db, metadata.root())?; - Project::builder(metadata, settings, settings_diagnostics) + let project = Project::builder(Box::new(metadata), Box::new(settings), diagnostics) .durability(Durability::MEDIUM) .open_fileset_durability(Durability::LOW) .file_set_durability(Durability::LOW) - .new(db) + .new(db); + + Ok(project) } pub fn root(self, db: &dyn Db) -> &SystemPath { @@ -160,8 +170,16 @@ impl Project { /// the project's include and exclude settings as well as the paths that were passed to `ty check `. /// This means, that this method is an over-approximation of `Self::files` and may return `true` for paths /// that won't be included when checking the project because they're ignored in a `.gitignore` file. - pub fn is_path_included(self, db: &dyn Db, path: &SystemPath) -> bool { - ProjectFilesFilter::from_project(db, self).is_included(path) + pub fn is_file_included(self, db: &dyn Db, path: &SystemPath) -> bool { + ProjectFilesFilter::from_project(db, self) + .is_file_included(path, GlobFilterCheckMode::Adhoc) + == IncludeResult::Included + } + + pub fn is_directory_included(self, db: &dyn Db, path: &SystemPath) -> bool { + ProjectFilesFilter::from_project(db, self) + .is_directory_included(path, GlobFilterCheckMode::Adhoc) + == IncludeResult::Included } pub fn reload(self, db: &mut dyn Db, metadata: ProjectMetadata) { @@ -169,17 +187,23 @@ impl Project { assert_eq!(self.root(db), metadata.root()); if &metadata != self.metadata(db) { - let (settings, settings_diagnostics) = metadata.options().to_settings(db); + match metadata.options().to_settings(db, metadata.root()) { + Ok((settings, settings_diagnostics)) => { + if self.settings(db) != &settings { + self.set_settings(db).to(Box::new(settings)); + } - if self.settings(db) != &settings { - self.set_settings(db).to(settings); + if self.settings_diagnostics(db) != settings_diagnostics { + self.set_settings_diagnostics(db).to(settings_diagnostics); + } + } + Err(error) => { + self.set_settings_diagnostics(db) + .to(vec![error.into_diagnostic()]); + } } - if self.settings_diagnostics(db) != settings_diagnostics { - self.set_settings_diagnostics(db).to(settings_diagnostics); - } - - self.set_metadata(db).to(metadata); + self.set_metadata(db).to(Box::new(metadata)); } self.reload_files(db); @@ -248,6 +272,10 @@ impl Project { } pub(crate) fn check_file(self, db: &dyn Db, file: File) -> Vec { + if !self.is_file_open(db, file) { + return Vec::new(); + } + let mut file_diagnostics: Vec<_> = self .settings_diagnostics(db) .iter() diff --git a/crates/ty_project/src/metadata/options.rs b/crates/ty_project/src/metadata/options.rs index 50e07aad1d..6c7696278c 100644 --- a/crates/ty_project/src/metadata/options.rs +++ b/crates/ty_project/src/metadata/options.rs @@ -1,13 +1,26 @@ use crate::Db; -use crate::metadata::value::{RangedValue, RelativePathBuf, ValueSource, ValueSourceGuard}; -use ruff_db::diagnostic::{Annotation, Diagnostic, DiagnosticFormat, DiagnosticId, Severity, Span}; +use crate::glob::{ + ExcludeFilterBuilder, IncludeExcludeFilter, IncludeFilterBuilder, PortableGlobPattern, +}; +use crate::metadata::settings::SrcSettings; +use crate::metadata::value::{ + RangedValue, RelativeExcludePattern, RelativeIncludePattern, RelativePathBuf, ValueSource, + ValueSourceGuard, +}; + +use ruff_db::diagnostic::{ + Annotation, Diagnostic, DiagnosticFormat, DiagnosticId, DisplayDiagnosticConfig, Severity, + Span, SubDiagnostic, +}; use ruff_db::files::system_path_to_file; use ruff_db::system::{System, SystemPath, SystemPathBuf}; use ruff_macros::{Combine, OptionsMetadata}; use ruff_python_ast::PythonVersion; use rustc_hash::FxHashMap; use serde::{Deserialize, Serialize}; -use std::fmt::Debug; +use std::borrow::Cow; +use std::fmt::{self, Debug, Display}; +use std::sync::Arc; use thiserror::Error; use ty_python_semantic::lint::{GetLintError, Level, LintSource, RuleSelection}; use ty_python_semantic::{ @@ -210,24 +223,44 @@ impl Options { } } - #[must_use] - pub(crate) fn to_settings(&self, db: &dyn Db) -> (Settings, Vec) { + pub(crate) fn to_settings( + &self, + db: &dyn Db, + project_root: &SystemPath, + ) -> Result<(Settings, Vec), ToSettingsError> { let (rules, diagnostics) = self.to_rule_selection(db); - let mut settings = Settings::new(rules, self.src.as_ref()); + let terminal_options = self.terminal.clone().unwrap_or_default(); + let terminal = TerminalSettings { + output_format: terminal_options + .output_format + .as_deref() + .copied() + .unwrap_or_default(), + error_on_warning: terminal_options.error_on_warning.unwrap_or_default(), + }; - if let Some(terminal) = self.terminal.as_ref() { - settings.set_terminal(TerminalSettings { - output_format: terminal - .output_format - .as_deref() - .copied() - .unwrap_or_default(), - error_on_warning: terminal.error_on_warning.unwrap_or_default(), - }); - } + let src_options = if let Some(src) = self.src.as_ref() { + Cow::Borrowed(src) + } else { + Cow::Owned(SrcOptions::default()) + }; - (settings, diagnostics) + let src = src_options + .to_settings(db, project_root) + .map_err(|err| ToSettingsError { + diagnostic: err, + output_format: terminal.output_format, + color: colored::control::SHOULD_COLORIZE.should_colorize(), + })?; + + let settings = Settings { + rules: Arc::new(rules), + terminal, + src, + }; + + Ok((settings, diagnostics)) } #[must_use] @@ -290,14 +323,10 @@ impl Options { ), }; - let span = file.map(Span::from).map(|span| { - if let Some(range) = rule_name.range() { - span.with_range(range) - } else { - span - } + let annotation = file.map(Span::from).map(|span| { + Annotation::primary(span.with_optional_range(rule_name.range())) }); - diagnostics.push(diagnostic.with_span(span)); + diagnostics.push(diagnostic.with_annotation(annotation)); } } } @@ -442,6 +471,246 @@ pub struct SrcOptions { )] #[serde(skip_serializing_if = "Option::is_none")] pub respect_ignore_files: Option, + + /// A list of files and directories to check. The `include` option + /// follows a similar syntax to `.gitignore` but reversed: + /// Including a file or directory will make it so that it (and its contents) + /// are type checked. + /// + /// - `./src/` matches only a directory + /// - `./src` matches both files and directories + /// - `src` matches files or directories named `src` anywhere in the tree (e.g. `./src` or `./tests/src`) + /// - `*` matches any (possibly empty) sequence of characters (except `/`). + /// - `**` matches zero or more path components. + /// This sequence **must** form a single path component, so both `**a` and `b**` are invalid and will result in an error. + /// A sequence of more than two consecutive `*` characters is also invalid. + /// - `?` matches any single character except `/` + /// - `[abc]` matches any character inside the brackets. Character sequences can also specify ranges of characters, as ordered by Unicode, + /// so e.g. `[0-9]` specifies any character between `0` and `9` inclusive. An unclosed bracket is invalid. + /// + /// Unlike `exclude`, all paths are anchored relative to the project root (`src` only + /// matches `/src` and not `/test/src`). + /// + /// `exclude` take precedence over `include`. + #[serde(skip_serializing_if = "Option::is_none")] + pub include: Option>, + + /// A list of file and directory patterns to exclude from type checking. + /// + /// Patterns follow a syntax similar to `.gitignore`: + /// - `./src/` matches only a directory + /// - `./src` matches both files and directories + /// - `src` matches files or directories named `src` anywhere in the tree (e.g. `./src` or `./tests/src`) + /// - `*` matches any (possibly empty) sequence of characters (except `/`). + /// - `**` matches zero or more path components. + /// This sequence **must** form a single path component, so both `**a` and `b**` are invalid and will result in an error. + /// A sequence of more than two consecutive `*` characters is also invalid. + /// - `?` matches any single character except `/` + /// - `[abc]` matches any character inside the brackets. Character sequences can also specify ranges of characters, as ordered by Unicode, + /// so e.g. `[0-9]` specifies any character between `0` and `9` inclusive. An unclosed bracket is invalid. + /// - `!pattern` negates a pattern (undoes the exclusion of files that would otherwise be excluded) + /// + /// By default, the following directories are excluded: + /// + /// - `.bzr` + /// - `.direnv` + /// - `.eggs` + /// - `.git` + /// - `.git-rewrite` + /// - `.hg` + /// - `.mypy_cache` + /// - `.nox` + /// - `.pants.d` + /// - `.pytype` + /// - `.ruff_cache` + /// - `.svn` + /// - `.tox` + /// - `.venv` + /// - `__pypackages__` + /// - `_build` + /// - `buck-out` + /// - `dist` + /// - `node_modules` + /// - `venv` + /// + /// You can override any default exclude by using a negated pattern. For example, + /// to re-include `dist` use `exclude = ["!dist"]` + #[option( + default = r#"null"#, + value_type = r#"list[str]"#, + example = r#" + exclude = [ + "generated", + "*.proto", + "tests/fixtures/**", + "!tests/fixtures/important.py" # Include this one file + ] + "# + )] + #[serde(skip_serializing_if = "Option::is_none")] + pub exclude: Option>, +} + +impl SrcOptions { + fn to_settings( + &self, + db: &dyn Db, + project_root: &SystemPath, + ) -> Result> { + let mut includes = IncludeFilterBuilder::new(); + let system = db.system(); + + if let Some(include) = self.include.as_ref() { + for pattern in include { + // Check the relative pattern for better error messages. + pattern.absolute(project_root, system) + .and_then(|include| Ok(includes.add(&include)?)) + .map_err(|err| { + let diagnostic = OptionDiagnostic::new( + DiagnosticId::InvalidGlob, + format!("Invalid include pattern: {err}"), + Severity::Error, + ); + + match pattern.source() { + ValueSource::File(file_path) => { + if let Ok(file) = system_path_to_file(db.upcast(), &**file_path) { + diagnostic + .with_message("Invalid include pattern") + .with_annotation(Some( + Annotation::primary( + Span::from(file) + .with_optional_range(pattern.range()), + ) + .message(err.to_string()), + )) + } else { + diagnostic.sub(Some(SubDiagnostic::new( + Severity::Info, + "The pattern is defined in the `src.include` option in your configuration file", + ))) + } + } + ValueSource::Cli => diagnostic.sub(Some(SubDiagnostic::new( + Severity::Info, + "The pattern was specified on the CLI using `--include`", + ))), + } + })?; + } + } else { + includes + .add( + &PortableGlobPattern::parse("**", false) + .unwrap() + .into_absolute(""), + ) + .unwrap(); + } + + let include = includes.build().map_err(|_| { + // https://github.com/BurntSushi/ripgrep/discussions/2927 + let diagnostic = OptionDiagnostic::new( + DiagnosticId::InvalidGlob, + "The `src.include` patterns resulted in a regex that is too large".to_string(), + Severity::Error, + ); + diagnostic.sub(Some(SubDiagnostic::new( + Severity::Info, + "Please open an issue on the ty repository and share the pattern that caused the error.", + ))) + })?; + + let mut excludes = ExcludeFilterBuilder::new(); + + // Add the default excludes first, so that a user can override them with a negated exclude pattern. + for pattern in [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".mypy_cache", + ".nox", + ".pants.d", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + "__pypackages__", + "_build", + "buck-out", + "dist", + "node_modules", + "venv", + ] { + PortableGlobPattern::parse(pattern, true) + .and_then(|exclude| Ok(excludes.add(&exclude.into_absolute(""))?)) + .unwrap_or_else(|err| { + panic!( + "Expected default exclude to be valid glob but adding it failed with: {err}" + ) + }); + } + + for exclude in self.exclude.as_deref().unwrap_or_default() { + // Check the relative path for better error messages. + exclude.absolute(project_root, system) + .and_then(|pattern| Ok(excludes.add(&pattern)?)) + .map_err(|err| { + let diagnostic = OptionDiagnostic::new( + DiagnosticId::InvalidGlob, + format!("Invalid exclude pattern: {err}"), + Severity::Error, + ); + + match exclude.source() { + ValueSource::File(file_path) => { + if let Ok(file) = system_path_to_file(db.upcast(), &**file_path) { + diagnostic + .with_message("Invalid exclude pattern") + .with_annotation(Some( + Annotation::primary( + Span::from(file) + .with_optional_range(exclude.range()), + ) + .message(err.to_string()), + )) + } else { + diagnostic.sub(Some(SubDiagnostic::new( + Severity::Info, + "The pattern is defined in the `src.exclude` option in your configuration file", + ))) + } + } + ValueSource::Cli => diagnostic.sub(Some(SubDiagnostic::new( + Severity::Info, + "The pattern was specified on the CLI using `--exclude`", + ))), + } + })?; + } + + let exclude = excludes.build().map_err(|_| { + // https://github.com/BurntSushi/ripgrep/discussions/2927 + let diagnostic = OptionDiagnostic::new( + DiagnosticId::InvalidGlob, + "The `src.exclude` patterns resulted in a regex that is too large".to_string(), + Severity::Error, + ); + diagnostic.sub(Some(SubDiagnostic::new( + Severity::Info, + "Please open an issue on the ty repository and share the pattern that caused the error.", + ))) + })?; + + Ok(SrcSettings { + respect_ignore_files: self.respect_ignore_files.unwrap_or(true), + files: IncludeExcludeFilter::new(include, exclude), + }) + } } #[derive(Debug, Default, Clone, Eq, PartialEq, Combine, Serialize, Deserialize)] @@ -494,6 +763,54 @@ pub struct TerminalOptions { pub error_on_warning: Option, } +/// Error returned when the settings can't be resolved because of a hard error. +#[derive(Debug)] +pub struct ToSettingsError { + diagnostic: Box, + output_format: DiagnosticFormat, + color: bool, +} + +impl ToSettingsError { + pub fn pretty<'a>(&'a self, db: &'a dyn Db) -> impl fmt::Display + use<'a> { + struct DisplayPretty<'a> { + db: &'a dyn Db, + error: &'a ToSettingsError, + } + + impl fmt::Display for DisplayPretty<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let display_config = DisplayDiagnosticConfig::default() + .format(self.error.output_format) + .color(self.error.color); + + write!( + f, + "{}", + self.error + .diagnostic + .to_diagnostic() + .display(&self.db.upcast(), &display_config) + ) + } + } + + DisplayPretty { db, error: self } + } + + pub fn into_diagnostic(self) -> OptionDiagnostic { + *self.diagnostic + } +} + +impl Display for ToSettingsError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(&self.diagnostic.message) + } +} + +impl std::error::Error for ToSettingsError {} + #[cfg(feature = "schemars")] mod schema { use crate::DEFAULT_LINT_REGISTRY; @@ -568,7 +885,8 @@ pub struct OptionDiagnostic { id: DiagnosticId, message: String, severity: Severity, - span: Option, + annotation: Option, + sub: Option, } impl OptionDiagnostic { @@ -577,23 +895,35 @@ impl OptionDiagnostic { id, message, severity, - span: None, + annotation: None, + sub: None, } } #[must_use] - fn with_span(self, span: Option) -> Self { - OptionDiagnostic { span, ..self } + fn with_message(self, message: impl Display) -> Self { + OptionDiagnostic { + message: message.to_string(), + ..self + } + } + + #[must_use] + fn with_annotation(self, annotation: Option) -> Self { + OptionDiagnostic { annotation, ..self } + } + + #[must_use] + fn sub(self, sub: Option) -> Self { + OptionDiagnostic { sub, ..self } } pub(crate) fn to_diagnostic(&self) -> Diagnostic { - if let Some(ref span) = self.span { - let mut diag = Diagnostic::new(self.id, self.severity, ""); - diag.annotate(Annotation::primary(span.clone()).message(&self.message)); - diag - } else { - Diagnostic::new(self.id, self.severity, &self.message) + let mut diag = Diagnostic::new(self.id, self.severity, &self.message); + if let Some(annotation) = self.annotation.clone() { + diag.annotate(annotation); } + diag } } diff --git a/crates/ty_project/src/metadata/settings.rs b/crates/ty_project/src/metadata/settings.rs index 0a48fc559c..156f72632d 100644 --- a/crates/ty_project/src/metadata/settings.rs +++ b/crates/ty_project/src/metadata/settings.rs @@ -1,9 +1,10 @@ use std::sync::Arc; -use crate::metadata::options::SrcOptions; use ruff_db::diagnostic::DiagnosticFormat; use ty_python_semantic::lint::RuleSelection; +use crate::glob::IncludeExcludeFilter; + /// The resolved [`super::Options`] for the project. /// /// Unlike [`super::Options`], the struct has default values filled in and @@ -19,32 +20,18 @@ use ty_python_semantic::lint::RuleSelection; /// Settings that are part of [`ty_python_semantic::ProgramSettings`] are not included here. #[derive(Clone, Debug, Eq, PartialEq)] pub struct Settings { - rules: Arc, - - terminal: TerminalSettings, - - respect_ignore_files: bool, + pub(super) rules: Arc, + pub(super) terminal: TerminalSettings, + pub(super) src: SrcSettings, } impl Settings { - pub fn new(rules: RuleSelection, src_options: Option<&SrcOptions>) -> Self { - let respect_ignore_files = src_options - .and_then(|src| src.respect_ignore_files) - .unwrap_or(true); - - Self { - rules: Arc::new(rules), - terminal: TerminalSettings::default(), - respect_ignore_files, - } - } - pub fn rules(&self) -> &RuleSelection { &self.rules } - pub fn respect_ignore_files(&self) -> bool { - self.respect_ignore_files + pub fn src(&self) -> &SrcSettings { + &self.src } pub fn to_rules(&self) -> Arc { @@ -54,10 +41,6 @@ impl Settings { pub fn terminal(&self) -> &TerminalSettings { &self.terminal } - - pub fn set_terminal(&mut self, terminal: TerminalSettings) { - self.terminal = terminal; - } } #[derive(Debug, Clone, PartialEq, Eq, Default)] @@ -65,3 +48,9 @@ pub struct TerminalSettings { pub output_format: DiagnosticFormat, pub error_on_warning: bool, } + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct SrcSettings { + pub respect_ignore_files: bool, + pub files: IncludeExcludeFilter, +} diff --git a/crates/ty_project/src/metadata/value.rs b/crates/ty_project/src/metadata/value.rs index f2f8db1e22..8e19e6b402 100644 --- a/crates/ty_project/src/metadata/value.rs +++ b/crates/ty_project/src/metadata/value.rs @@ -1,5 +1,6 @@ use crate::Db; use crate::combine::Combine; +use crate::glob::{AbsolutePortableGlobPattern, PortableGlobError, PortableGlobPattern}; use ruff_db::system::{System, SystemPath, SystemPathBuf}; use ruff_macros::Combine; use ruff_text_size::{TextRange, TextSize}; @@ -356,3 +357,102 @@ impl RelativePathBuf { SystemPath::absolute(&self.0, relative_to) } } + +#[derive( + Debug, + Clone, + serde::Serialize, + serde::Deserialize, + PartialEq, + Eq, + PartialOrd, + Ord, + Hash, + Combine, +)] +#[serde(transparent)] +#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))] +pub struct RelativeIncludePattern(RangedValue); + +impl RelativeIncludePattern { + pub fn new(pattern: &str, source: ValueSource) -> Self { + Self(RangedValue::new(pattern.to_string(), source)) + } + + pub fn cli(pattern: &str) -> Self { + Self::new(pattern, ValueSource::Cli) + } + + pub(crate) fn source(&self) -> &ValueSource { + self.0.source() + } + + pub(crate) fn range(&self) -> Option { + self.0.range() + } + + /// Resolves the absolute pattern for `self` based on its origin. + pub(crate) fn absolute( + &self, + project_root: &SystemPath, + system: &dyn System, + ) -> Result { + let relative_to = match &self.0.source { + ValueSource::File(_) => project_root, + ValueSource::Cli => system.current_directory(), + }; + + let pattern = PortableGlobPattern::parse(&self.0, false)?; + Ok(pattern.into_absolute(relative_to)) + } +} + +#[derive( + Debug, + Clone, + serde::Serialize, + serde::Deserialize, + PartialEq, + Eq, + PartialOrd, + Ord, + Hash, + Combine, +)] +#[serde(transparent)] +#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))] +pub struct RelativeExcludePattern(RangedValue); + +impl RelativeExcludePattern { + pub fn new(pattern: &str, source: ValueSource) -> Self { + Self(RangedValue::new(pattern.to_string(), source)) + } + + pub fn cli(pattern: &str) -> Self { + Self::new(pattern, ValueSource::Cli) + } + + pub(crate) fn source(&self) -> &ValueSource { + self.0.source() + } + + pub(crate) fn range(&self) -> Option { + self.0.range() + } + + /// Resolves the absolute pattern for `self` based on its origin. + pub(crate) fn absolute( + &self, + project_root: &SystemPath, + system: &dyn System, + ) -> Result { + let relative_to = match &self.0.source { + ValueSource::File(_) => project_root, + ValueSource::Cli => system.current_directory(), + }; + + let pattern = PortableGlobPattern::parse(&self.0, true)?; + + Ok(pattern.into_absolute(relative_to)) + } +} diff --git a/crates/ty_project/src/walk.rs b/crates/ty_project/src/walk.rs index e6d7f7fdc3..761d1fd3e5 100644 --- a/crates/ty_project/src/walk.rs +++ b/crates/ty_project/src/walk.rs @@ -1,7 +1,8 @@ -use crate::{Db, IOErrorDiagnostic, IOErrorKind, Project}; +use crate::glob::IncludeExcludeFilter; +use crate::{Db, GlobFilterCheckMode, IOErrorDiagnostic, IOErrorKind, IncludeResult, Project}; use ruff_db::files::{File, system_path_to_file}; use ruff_db::system::walk_directory::{ErrorKind, WalkDirectoryBuilder, WalkState}; -use ruff_db::system::{FileType, SystemPath, SystemPathBuf}; +use ruff_db::system::{SystemPath, SystemPathBuf}; use ruff_python_ast::PySourceType; use rustc_hash::{FxBuildHasher, FxHashSet}; use std::path::PathBuf; @@ -13,22 +14,47 @@ use thiserror::Error; /// /// This struct mainly exists because `dyn Db` isn't `Send` or `Sync`, making it impossible /// to access fields from within the walker. -#[derive(Default, Debug)] +#[derive(Debug)] pub(crate) struct ProjectFilesFilter<'a> { /// The same as [`Project::included_paths_or_root`]. included_paths: &'a [SystemPathBuf], - /// The filter skips checking if the path is in `included_paths` if set to `true`. - /// - /// Skipping this check is useful when the walker only walks over `included_paths`. - skip_included_paths: bool, + /// The resolved `src.include` and `src.exclude` filter. + src_filter: &'a IncludeExcludeFilter, } impl<'a> ProjectFilesFilter<'a> { pub(crate) fn from_project(db: &'a dyn Db, project: Project) -> Self { Self { included_paths: project.included_paths_or_root(db), - skip_included_paths: false, + src_filter: &project.settings(db).src().files, + } + } + + fn match_included_paths( + &self, + path: &SystemPath, + mode: GlobFilterCheckMode, + ) -> Option { + match mode { + GlobFilterCheckMode::TopDown => Some(CheckPathMatch::Partial), + GlobFilterCheckMode::Adhoc => { + self.included_paths + .iter() + .filter_map(|included_path| { + if let Ok(relative_path) = path.strip_prefix(included_path) { + // Exact matches are always included + if relative_path.as_str().is_empty() { + Some(CheckPathMatch::Full) + } else { + Some(CheckPathMatch::Partial) + } + } else { + None + } + }) + .max() + } } } @@ -45,45 +71,40 @@ impl<'a> ProjectFilesFilter<'a> { /// This method may return `true` for files that don't end up being included when walking the /// project tree because it doesn't consider `.gitignore` and other ignore files when deciding /// if a file's included. - pub(crate) fn is_included(&self, path: &SystemPath) -> bool { - #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] - enum CheckPathMatch { - /// The path is a partial match of the checked path (it's a sub path) - Partial, - - /// The path matches a check path exactly. - Full, - } - - let m = if self.skip_included_paths { - Some(CheckPathMatch::Partial) - } else { - self.included_paths - .iter() - .filter_map(|included_path| { - if let Ok(relative_path) = path.strip_prefix(included_path) { - // Exact matches are always included - if relative_path.as_str().is_empty() { - Some(CheckPathMatch::Full) - } else { - Some(CheckPathMatch::Partial) - } - } else { - None - } - }) - .max() - }; - - match m { - None => false, - Some(CheckPathMatch::Partial) => { - // TODO: For partial matches, only include the file if it is included by the project's include/exclude settings. - true - } - Some(CheckPathMatch::Full) => true, + pub(crate) fn is_file_included( + &self, + path: &SystemPath, + mode: GlobFilterCheckMode, + ) -> IncludeResult { + match self.match_included_paths(path, mode) { + None => IncludeResult::NotIncluded, + Some(CheckPathMatch::Partial) => self.src_filter.is_file_included(path, mode), + Some(CheckPathMatch::Full) => IncludeResult::Included, } } + + pub(crate) fn is_directory_included( + &self, + path: &SystemPath, + mode: GlobFilterCheckMode, + ) -> IncludeResult { + match self.match_included_paths(path, mode) { + None => IncludeResult::NotIncluded, + Some(CheckPathMatch::Partial) => { + self.src_filter.is_directory_maybe_included(path, mode) + } + Some(CheckPathMatch::Full) => IncludeResult::Included, + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +enum CheckPathMatch { + /// The path is a partial match of the checked path (it's a sub path) + Partial, + + /// The path matches a check path exactly. + Full, } pub(crate) struct ProjectFilesWalker<'a> { @@ -96,9 +117,7 @@ impl<'a> ProjectFilesWalker<'a> { pub(crate) fn new(db: &'a dyn Db) -> Self { let project = db.project(); - let mut filter = ProjectFilesFilter::from_project(db, project); - // It's unnecessary to filter on included paths because it only iterates over those to start with. - filter.skip_included_paths = true; + let filter = ProjectFilesFilter::from_project(db, project); Self::from_paths(db, project.included_paths_or_root(db), filter) .expect("included_paths_or_root to never return an empty iterator") @@ -132,7 +151,7 @@ impl<'a> ProjectFilesWalker<'a> { let mut walker = db .system() .walk_directory(paths.next()?.as_ref()) - .standard_filters(db.project().settings(db).respect_ignore_files()) + .standard_filters(db.project().settings(db).src().respect_ignore_files) .ignore_hidden(false); for path in paths { @@ -152,25 +171,51 @@ impl<'a> ProjectFilesWalker<'a> { Box::new(|entry| { match entry { Ok(entry) => { - if !self.filter.is_included(entry.path()) { - tracing::debug!("Ignoring not-included path: {}", entry.path()); - return WalkState::Skip; + // Skip excluded directories unless they were explicitly passed to the walker + // (which is the case passed to `ty check `). + if entry.file_type().is_directory() && entry.depth() > 0 { + return match self.filter.is_directory_included(entry.path(), GlobFilterCheckMode::TopDown) { + IncludeResult::Included => WalkState::Continue, + IncludeResult::Excluded => { + tracing::debug!("Skipping directory '{path}' because it is excluded by a default or `src.exclude` pattern", path=entry.path()); + WalkState::Skip + }, + IncludeResult::NotIncluded => { + tracing::debug!("Skipping directory `{path}` because it doesn't match any `src.include` pattern or path specified on the CLI", path=entry.path()); + WalkState::Skip + }, + }; } - // Skip over any non python files to avoid creating too many entries in `Files`. - match entry.file_type() { - FileType::File => { - if entry - .path() - .extension() - .and_then(PySourceType::try_from_extension) - .is_some() - { - let mut paths = paths.lock().unwrap(); - paths.push(entry.into_path()); + if entry.file_type().is_file() { + // Ignore any non python files to avoid creating too many entries in `Files`. + if entry + .path() + .extension() + .and_then(PySourceType::try_from_extension) + .is_none() + { + return WalkState::Continue; + } + + // For all files, except the ones that were explicitly passed to the walker (CLI), + // check if they're included in the project. + if entry.depth() > 0 { + match self.filter.is_file_included(entry.path(), GlobFilterCheckMode::TopDown) { + IncludeResult::Included => {}, + IncludeResult::Excluded => { + tracing::debug!("Ignoring file `{path}` because it is excluded by a default or `src.exclude` pattern.", path=entry.path()); + return WalkState::Continue; + }, + IncludeResult::NotIncluded => { + tracing::debug!("Ignoring file `{path}` because it doesn't match any `src.include` pattern or path specified on the CLI.", path=entry.path()); + return WalkState::Continue; + }, } } - FileType::Directory | FileType::Symlink => {} + + let mut paths = paths.lock().unwrap(); + paths.push(entry.into_path()); } } Err(error) => match error.kind() { diff --git a/ty.schema.json b/ty.schema.json index fc6306d7ec..d3180fb4db 100644 --- a/ty.schema.json +++ b/ty.schema.json @@ -851,6 +851,26 @@ "SrcOptions": { "type": "object", "properties": { + "exclude": { + "description": "A list of file and directory patterns to exclude from type checking.\n\nPatterns follow a syntax similar to `.gitignore`: - `./src/` matches only a directory - `./src` matches both files and directories - `src` matches files or directories named `src` anywhere in the tree (e.g. `./src` or `./tests/src`) - `*` matches any (possibly empty) sequence of characters (except `/`). - `**` matches zero or more path components. This sequence **must** form a single path component, so both `**a` and `b**` are invalid and will result in an error. A sequence of more than two consecutive `*` characters is also invalid. - `?` matches any single character except `/` - `[abc]` matches any character inside the brackets. Character sequences can also specify ranges of characters, as ordered by Unicode, so e.g. `[0-9]` specifies any character between `0` and `9` inclusive. An unclosed bracket is invalid. - `!pattern` negates a pattern (undoes the exclusion of files that would otherwise be excluded)\n\nBy default, the following directories are excluded:\n\n- `.bzr` - `.direnv` - `.eggs` - `.git` - `.git-rewrite` - `.hg` - `.mypy_cache` - `.nox` - `.pants.d` - `.pytype` - `.ruff_cache` - `.svn` - `.tox` - `.venv` - `__pypackages__` - `_build` - `buck-out` - `dist` - `node_modules` - `venv`\n\nYou can override any default exclude by using a negated pattern. For example, to re-include `dist` use `exclude = [\"!dist\"]`", + "type": [ + "array", + "null" + ], + "items": { + "type": "string" + } + }, + "include": { + "description": "A list of files and directories to check. The `include` option follows a similar syntax to `.gitignore` but reversed: Including a file or directory will make it so that it (and its contents) are type checked.\n\n- `./src/` matches only a directory - `./src` matches both files and directories - `src` matches files or directories named `src` anywhere in the tree (e.g. `./src` or `./tests/src`) - `*` matches any (possibly empty) sequence of characters (except `/`). - `**` matches zero or more path components. This sequence **must** form a single path component, so both `**a` and `b**` are invalid and will result in an error. A sequence of more than two consecutive `*` characters is also invalid. - `?` matches any single character except `/` - `[abc]` matches any character inside the brackets. Character sequences can also specify ranges of characters, as ordered by Unicode, so e.g. `[0-9]` specifies any character between `0` and `9` inclusive. An unclosed bracket is invalid.\n\nUnlike `exclude`, all paths are anchored relative to the project root (`src` only matches `/src` and not `/test/src`).\n\n`exclude` take precedence over `include`.", + "type": [ + "array", + "null" + ], + "items": { + "type": "string" + } + }, "respect-ignore-files": { "description": "Whether to automatically exclude files that are ignored by `.ignore`, `.gitignore`, `.git/info/exclude`, and global `gitignore` files. Enabled by default.", "type": [