From 3b7a5e4de81bb1eb594962894224cdacf99c5465 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Wed, 7 Jan 2026 11:38:02 +0100 Subject: [PATCH] [ty] Allow including files with no extension (#22243) --- crates/ty/tests/cli/file_selection.rs | 59 ++++++++++++ crates/ty_project/src/glob.rs | 19 +++- crates/ty_project/src/glob/include.rs | 105 +++++++++++++++------ crates/ty_project/src/lib.rs | 16 ++-- crates/ty_project/src/metadata/settings.rs | 2 +- crates/ty_project/src/walk.rs | 49 +++++----- 6 files changed, 188 insertions(+), 62 deletions(-) diff --git a/crates/ty/tests/cli/file_selection.rs b/crates/ty/tests/cli/file_selection.rs index 36835f2634..05be57a118 100644 --- a/crates/ty/tests/cli/file_selection.rs +++ b/crates/ty/tests/cli/file_selection.rs @@ -160,6 +160,65 @@ fn configuration_include() -> anyhow::Result<()> { Ok(()) } +/// Files without extensions can be included by adding a literal glob to `include` that matches +/// the path exactly. A literal glob is a glob without any meta characters. +#[test] +fn configuration_include_no_extension() -> anyhow::Result<()> { + let case = CliTest::with_files([( + "src/main", + r#" + print(undefined_var) # error: unresolved-reference + "#, + )])?; + + // By default, `src/main` is excluded because the file has no supported extension. + case.write_file( + "ty.toml", + r#" + [src] + include = ["src"] + "#, + )?; + + assert_cmd_snapshot!(case.command(), @r" + success: true + exit_code: 0 + ----- stdout ----- + All checks passed! + + ----- stderr ----- + WARN No python files found under the given path(s) + "); + + // The file can be included by adding an exactly matching pattern + case.write_file( + "ty.toml", + r#" + [src] + include = ["src", "src/main"] + "#, + )?; + + assert_cmd_snapshot!(case.command(), @r" + success: false + exit_code: 1 + ----- stdout ----- + error[unresolved-reference]: Name `undefined_var` used when not defined + --> src/main:2:7 + | + 2 | print(undefined_var) # error: unresolved-reference + | ^^^^^^^^^^^^^ + | + info: rule `unresolved-reference` is enabled by default + + Found 1 diagnostic + + ----- stderr ----- + "); + + Ok(()) +} + /// Test configuration file exclude functionality #[test] fn configuration_exclude() -> anyhow::Result<()> { diff --git a/crates/ty_project/src/glob.rs b/crates/ty_project/src/glob.rs index 81842f4948..7f421682c1 100644 --- a/crates/ty_project/src/glob.rs +++ b/crates/ty_project/src/glob.rs @@ -1,5 +1,6 @@ use ruff_db::system::SystemPath; +use crate::glob::include::MatchFile; pub(crate) use exclude::{ExcludeFilter, ExcludeFilterBuilder}; pub(crate) use include::{IncludeFilter, IncludeFilterBuilder}; pub(crate) use portable::{ @@ -39,7 +40,9 @@ impl IncludeExcludeFilter { if self.exclude.match_directory(path, mode) { IncludeResult::Excluded } else if self.include.match_directory(path) { - IncludeResult::Included + IncludeResult::Included { + literal_match: None, + } } else { IncludeResult::NotIncluded } @@ -52,10 +55,16 @@ impl IncludeExcludeFilter { ) -> IncludeResult { if self.exclude.match_file(path, mode) { IncludeResult::Excluded - } else if self.include.match_file(path) { - IncludeResult::Included } else { - IncludeResult::NotIncluded + match self.include.match_file(path) { + MatchFile::Literal => IncludeResult::Included { + literal_match: Some(true), + }, + MatchFile::Pattern => IncludeResult::Included { + literal_match: Some(false), + }, + MatchFile::No => IncludeResult::NotIncluded, + } } } } @@ -86,7 +95,7 @@ pub(crate) enum IncludeResult { /// /// For directories: This isn't a guarantee that any file in this directory gets included /// but we need to traverse it to make this decision. - Included, + Included { literal_match: Option }, /// The path matches an exclude pattern. Excluded, diff --git a/crates/ty_project/src/glob/include.rs b/crates/ty_project/src/glob/include.rs index e133d13a95..c828f9f128 100644 --- a/crates/ty_project/src/glob/include.rs +++ b/crates/ty_project/src/glob/include.rs @@ -33,7 +33,8 @@ const DFA_SIZE_LIMIT: usize = 1_000_000; pub(crate) struct IncludeFilter { #[get_size(ignore)] glob_set: GlobSet, - original_patterns: Box<[String]>, + original_patterns: Box<[Box]>, + literal_pattern_indices: Box<[usize]>, #[get_size(size_fn = dfa_memory_usage)] dfa: Option>>, } @@ -45,10 +46,29 @@ fn dfa_memory_usage(dfa: &Option>>) -> usize { impl IncludeFilter { /// Whether the file matches any of the globs. - pub(crate) fn match_file(&self, path: impl AsRef) -> bool { + pub(crate) fn match_file(&self, path: impl AsRef) -> MatchFile { let path = path.as_ref(); - self.glob_set.is_match(path) + if self.literal_pattern_indices.is_empty() { + return if self.glob_set.is_match(path) { + MatchFile::Pattern + } else { + MatchFile::No + }; + } + + let matches = self.glob_set.matches(path); + + if matches.is_empty() { + MatchFile::No + } else { + for match_index in matches { + if self.literal_pattern_indices.contains(&match_index) { + return MatchFile::Literal; + } + } + MatchFile::Pattern + } } /// Check whether a directory or any of its children can be matched by any of the globs. @@ -120,18 +140,36 @@ impl PartialEq for IncludeFilter { impl Eq for IncludeFilter {} +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub(crate) enum MatchFile { + No, + /// The file path matches the glob literally exactly. This is only the case for globs + /// that don't use any wildcards. + Literal, + + /// The file path matches the glob pattern. + Pattern, +} + +impl MatchFile {} + #[derive(Debug)] pub(crate) struct IncludeFilterBuilder { set: GlobSetBuilder, - original_pattern: Vec, + set_len: usize, + original_patterns: Vec>, regexes: Vec, + /// Indices of literal patterns (contain no meta characters). + literal_pattern_indices: Vec, } impl IncludeFilterBuilder { pub(crate) fn new() -> Self { Self { + literal_pattern_indices: Vec::new(), set: GlobSetBuilder::new(), - original_pattern: Vec::new(), + set_len: 0, + original_patterns: Vec::new(), regexes: Vec::new(), } } @@ -161,13 +199,16 @@ impl IncludeFilterBuilder { // No need to support Windows-style paths, so the backslash can be used a escape. .backslash_escape(true) .build()?; - self.original_pattern.push(input.relative().to_string()); + + let is_literal_pattern = globset::escape(glob_pattern) == glob_pattern; + + self.original_patterns.push(input.relative().into()); // `lib` is the same as `lib/**` // Add a glob that matches `lib` exactly, change the glob to `lib/**`. if glob_pattern.ends_with("**") { self.push_prefix_regex(&glob); - self.set.add(glob); + self.add_glob(glob); } else { let prefix_glob = GlobBuilder::new(&format!("{glob_pattern}/**")) .literal_separator(true) @@ -176,19 +217,28 @@ impl IncludeFilterBuilder { .build()?; self.push_prefix_regex(&prefix_glob); - self.set.add(prefix_glob); + self.add_glob(prefix_glob); // The reason we add the exact glob, e.g. `src` when the original pattern was `src/` is // so that `match_file` returns true when matching against a file. However, we don't // need to do this if this is a pattern that should only match a directory (specifically, its contents). if !only_directory { - self.set.add(glob); + if is_literal_pattern { + self.literal_pattern_indices.push(self.set_len); + } + + self.add_glob(glob); } } Ok(self) } + fn add_glob(&mut self, glob: Glob) { + self.set.add(glob); + self.set_len += 1; + } + fn push_prefix_regex(&mut self, glob: &Glob) { let main_separator = regex::escape(MAIN_SEPARATOR_STR); @@ -239,7 +289,8 @@ impl IncludeFilterBuilder { Ok(IncludeFilter { glob_set, dfa, - original_patterns: self.original_pattern.into(), + literal_pattern_indices: self.literal_pattern_indices.into(), + original_patterns: self.original_patterns.into(), }) } } @@ -248,7 +299,7 @@ impl IncludeFilterBuilder { mod tests { use std::path::{MAIN_SEPARATOR, MAIN_SEPARATOR_STR}; - use crate::glob::include::{IncludeFilter, IncludeFilterBuilder}; + use crate::glob::include::{IncludeFilter, IncludeFilterBuilder, MatchFile}; use crate::glob::{PortableGlobKind, PortableGlobPattern}; use ruff_db::system::{MemoryFileSystem, walk_directory::WalkState}; @@ -326,33 +377,33 @@ mod tests { "files/*.py", ]); - assert!(filter.match_file("lib")); - assert!(filter.match_file("lib/more/test")); + assert_eq!(filter.match_file("lib"), MatchFile::Literal); + assert_eq!(filter.match_file("lib/more/test"), MatchFile::Pattern); // Unlike `directory`, `directory/` only includes a directory with the given name and its contents - assert!(!filter.match_file("directory")); - assert!(filter.match_file("directory/more/test")); + assert_eq!(filter.match_file("directory"), MatchFile::No); + assert_eq!(filter.match_file("directory/more/test"), MatchFile::Pattern); // Unlike `src`, `src/*` only includes a directory with the given name. - assert!(!filter.match_file("src")); - assert!(filter.match_file("src/more/test")); + assert_eq!(filter.match_file("src"), MatchFile::No); + assert_eq!(filter.match_file("src/more/test"), MatchFile::Pattern); // Unlike `tests`, `tests/**` only includes files under `tests`, but not a file named tests - assert!(!filter.match_file("tests")); - assert!(filter.match_file("tests/more/test")); + assert_eq!(filter.match_file("tests"), MatchFile::No); + assert_eq!(filter.match_file("tests/more/test"), MatchFile::Pattern); // Unlike `match_directory`, prefixes should not be included. - assert!(!filter.match_file("a")); - assert!(!filter.match_file("a/test-b")); + assert_eq!(filter.match_file("a"), MatchFile::No); + assert_eq!(filter.match_file("a/test-b"), MatchFile::No); - assert!(!filter.match_file("a/test-b/x")); - assert!(!filter.match_file("a/test")); + assert_eq!(filter.match_file("a/test-b/x"), MatchFile::No); + assert_eq!(filter.match_file("a/test"), MatchFile::No); - assert!(filter.match_file("files/a.py")); - assert!(filter.match_file("files/a.py/bcd")); + assert_eq!(filter.match_file("files/a.py"), MatchFile::Pattern); + assert_eq!(filter.match_file("files/a.py/bcd"), MatchFile::Pattern); - assert!(!filter.match_file("not_included")); - assert!(!filter.match_file("files/a.pi")); + assert_eq!(filter.match_file("not_included"), MatchFile::No); + assert_eq!(filter.match_file("files/a.pi"), MatchFile::No); } /// Check that we skip directories that can never match. diff --git a/crates/ty_project/src/lib.rs b/crates/ty_project/src/lib.rs index b468d49ea2..d1cb8d9571 100644 --- a/crates/ty_project/src/lib.rs +++ b/crates/ty_project/src/lib.rs @@ -216,15 +216,19 @@ impl Project { /// This means, that this method is an over-approximation of `Self::files` and may return `true` for paths /// that won't be included when checking the project because they're ignored in a `.gitignore` file. pub fn is_file_included(self, db: &dyn Db, path: &SystemPath) -> bool { - ProjectFilesFilter::from_project(db, self) - .is_file_included(path, GlobFilterCheckMode::Adhoc) - == IncludeResult::Included + matches!( + ProjectFilesFilter::from_project(db, self) + .is_file_included(path, GlobFilterCheckMode::Adhoc), + IncludeResult::Included { .. } + ) } pub fn is_directory_included(self, db: &dyn Db, path: &SystemPath) -> bool { - ProjectFilesFilter::from_project(db, self) - .is_directory_included(path, GlobFilterCheckMode::Adhoc) - == IncludeResult::Included + matches!( + ProjectFilesFilter::from_project(db, self) + .is_directory_included(path, GlobFilterCheckMode::Adhoc), + IncludeResult::Included { .. } + ) } pub fn reload(self, db: &mut dyn Db, metadata: ProjectMetadata) { diff --git a/crates/ty_project/src/metadata/settings.rs b/crates/ty_project/src/metadata/settings.rs index 6be53577de..67e6ba2099 100644 --- a/crates/ty_project/src/metadata/settings.rs +++ b/crates/ty_project/src/metadata/settings.rs @@ -97,7 +97,7 @@ impl Override { matches!( self.files .is_file_included(path, GlobFilterCheckMode::Adhoc), - IncludeResult::Included + IncludeResult::Included { .. } ) } } diff --git a/crates/ty_project/src/walk.rs b/crates/ty_project/src/walk.rs index e1d61e99ec..ee91914a97 100644 --- a/crates/ty_project/src/walk.rs +++ b/crates/ty_project/src/walk.rs @@ -79,7 +79,9 @@ impl<'a> ProjectFilesFilter<'a> { match self.match_included_paths(path, mode) { None => IncludeResult::NotIncluded, Some(CheckPathMatch::Partial) => self.src_filter.is_file_included(path, mode), - Some(CheckPathMatch::Full) => IncludeResult::Included, + Some(CheckPathMatch::Full) => IncludeResult::Included { + literal_match: Some(true), + }, } } @@ -93,7 +95,9 @@ impl<'a> ProjectFilesFilter<'a> { Some(CheckPathMatch::Partial) => { self.src_filter.is_directory_maybe_included(path, mode) } - Some(CheckPathMatch::Full) => IncludeResult::Included, + Some(CheckPathMatch::Full) => IncludeResult::Included { + literal_match: Some(true), + }, } } } @@ -189,60 +193,59 @@ impl<'a> ProjectFilesWalker<'a> { let directory_included = filter .is_directory_included(entry.path(), GlobFilterCheckMode::TopDown); return match directory_included { - IncludeResult::Included => WalkState::Continue, + IncludeResult::Included { .. } => WalkState::Continue, IncludeResult::Excluded => { tracing::debug!( "Skipping directory '{path}' because it is excluded by a default or `src.exclude` pattern", path=entry.path() ); WalkState::Skip - }, + } IncludeResult::NotIncluded => { tracing::debug!( "Skipping directory `{path}` because it doesn't match any `src.include` pattern or path specified on the CLI", path=entry.path() ); WalkState::Skip - }, + } }; } } else { - // Ignore any non python files to avoid creating too many entries in `Files`. - // Unless the file is explicitly passed, we then always assume it's a python file. - let source_type = entry.path().extension().and_then(PySourceType::try_from_extension).or_else(|| { - if entry.depth() == 0 { - Some(PySourceType::Python) - } else { - db.system().source_type(entry.path()) - } - }); - - if source_type.is_none() - { - return WalkState::Continue; - } - // For all files, except the ones that were explicitly passed to the walker (CLI), // check if they're included in the project. if entry.depth() > 0 || self.force_exclude { match filter .is_file_included(entry.path(), GlobFilterCheckMode::TopDown) { - IncludeResult::Included => {}, + IncludeResult::Included { literal_match } => { + // Ignore any non python files to avoid creating too many entries in `Files`. + // Unless the file is explicitly passed on the CLI or a literal match in the `include`, we then always assume it's a file ty can analyze + let source_type = if literal_match == Some(true) || entry.depth() == 0 { + Some(PySourceType::Python) + } else { + entry.path().extension().and_then(PySourceType::try_from_extension).or_else(|| db.system().source_type(entry.path())) + }; + + + if source_type.is_none() + { + return WalkState::Continue; + } + } IncludeResult::Excluded => { tracing::debug!( "Ignoring file `{path}` because it is excluded by a default or `src.exclude` pattern.", path=entry.path() ); return WalkState::Continue; - }, + } IncludeResult::NotIncluded => { tracing::debug!( "Ignoring file `{path}` because it doesn't match any `src.include` pattern or path specified on the CLI.", path=entry.path() ); return WalkState::Continue; - }, + } } }