[ty] Allow including files with no extension (#22243)

This commit is contained in:
Micha Reiser
2026-01-07 11:38:02 +01:00
committed by GitHub
parent 93039d055d
commit 3b7a5e4de8
6 changed files with 188 additions and 62 deletions

View File

@@ -160,6 +160,65 @@ fn configuration_include() -> anyhow::Result<()> {
Ok(())
}
/// Files without extensions can be included by adding a literal glob to `include` that matches
/// the path exactly. A literal glob is a glob without any meta characters.
#[test]
fn configuration_include_no_extension() -> anyhow::Result<()> {
let case = CliTest::with_files([(
"src/main",
r#"
print(undefined_var) # error: unresolved-reference
"#,
)])?;
// By default, `src/main` is excluded because the file has no supported extension.
case.write_file(
"ty.toml",
r#"
[src]
include = ["src"]
"#,
)?;
assert_cmd_snapshot!(case.command(), @r"
success: true
exit_code: 0
----- stdout -----
All checks passed!
----- stderr -----
WARN No python files found under the given path(s)
");
// The file can be included by adding an exactly matching pattern
case.write_file(
"ty.toml",
r#"
[src]
include = ["src", "src/main"]
"#,
)?;
assert_cmd_snapshot!(case.command(), @r"
success: false
exit_code: 1
----- stdout -----
error[unresolved-reference]: Name `undefined_var` used when not defined
--> src/main:2:7
|
2 | print(undefined_var) # error: unresolved-reference
| ^^^^^^^^^^^^^
|
info: rule `unresolved-reference` is enabled by default
Found 1 diagnostic
----- stderr -----
");
Ok(())
}
/// Test configuration file exclude functionality
#[test]
fn configuration_exclude() -> anyhow::Result<()> {

View File

@@ -1,5 +1,6 @@
use ruff_db::system::SystemPath;
use crate::glob::include::MatchFile;
pub(crate) use exclude::{ExcludeFilter, ExcludeFilterBuilder};
pub(crate) use include::{IncludeFilter, IncludeFilterBuilder};
pub(crate) use portable::{
@@ -39,7 +40,9 @@ impl IncludeExcludeFilter {
if self.exclude.match_directory(path, mode) {
IncludeResult::Excluded
} else if self.include.match_directory(path) {
IncludeResult::Included
IncludeResult::Included {
literal_match: None,
}
} else {
IncludeResult::NotIncluded
}
@@ -52,10 +55,16 @@ impl IncludeExcludeFilter {
) -> IncludeResult {
if self.exclude.match_file(path, mode) {
IncludeResult::Excluded
} else if self.include.match_file(path) {
IncludeResult::Included
} else {
IncludeResult::NotIncluded
match self.include.match_file(path) {
MatchFile::Literal => IncludeResult::Included {
literal_match: Some(true),
},
MatchFile::Pattern => IncludeResult::Included {
literal_match: Some(false),
},
MatchFile::No => IncludeResult::NotIncluded,
}
}
}
}
@@ -86,7 +95,7 @@ pub(crate) enum IncludeResult {
///
/// For directories: This isn't a guarantee that any file in this directory gets included
/// but we need to traverse it to make this decision.
Included,
Included { literal_match: Option<bool> },
/// The path matches an exclude pattern.
Excluded,

View File

@@ -33,7 +33,8 @@ const DFA_SIZE_LIMIT: usize = 1_000_000;
pub(crate) struct IncludeFilter {
#[get_size(ignore)]
glob_set: GlobSet,
original_patterns: Box<[String]>,
original_patterns: Box<[Box<str>]>,
literal_pattern_indices: Box<[usize]>,
#[get_size(size_fn = dfa_memory_usage)]
dfa: Option<dfa::dense::DFA<Vec<u32>>>,
}
@@ -45,10 +46,29 @@ fn dfa_memory_usage(dfa: &Option<dfa::dense::DFA<Vec<u32>>>) -> usize {
impl IncludeFilter {
/// Whether the file matches any of the globs.
pub(crate) fn match_file(&self, path: impl AsRef<SystemPath>) -> bool {
pub(crate) fn match_file(&self, path: impl AsRef<SystemPath>) -> MatchFile {
let path = path.as_ref();
self.glob_set.is_match(path)
if self.literal_pattern_indices.is_empty() {
return if self.glob_set.is_match(path) {
MatchFile::Pattern
} else {
MatchFile::No
};
}
let matches = self.glob_set.matches(path);
if matches.is_empty() {
MatchFile::No
} else {
for match_index in matches {
if self.literal_pattern_indices.contains(&match_index) {
return MatchFile::Literal;
}
}
MatchFile::Pattern
}
}
/// Check whether a directory or any of its children can be matched by any of the globs.
@@ -120,18 +140,36 @@ impl PartialEq for IncludeFilter {
impl Eq for IncludeFilter {}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub(crate) enum MatchFile {
No,
/// The file path matches the glob literally exactly. This is only the case for globs
/// that don't use any wildcards.
Literal,
/// The file path matches the glob pattern.
Pattern,
}
impl MatchFile {}
#[derive(Debug)]
pub(crate) struct IncludeFilterBuilder {
set: GlobSetBuilder,
original_pattern: Vec<String>,
set_len: usize,
original_patterns: Vec<Box<str>>,
regexes: Vec<String>,
/// Indices of literal patterns (contain no meta characters).
literal_pattern_indices: Vec<usize>,
}
impl IncludeFilterBuilder {
pub(crate) fn new() -> Self {
Self {
literal_pattern_indices: Vec::new(),
set: GlobSetBuilder::new(),
original_pattern: Vec::new(),
set_len: 0,
original_patterns: Vec::new(),
regexes: Vec::new(),
}
}
@@ -161,13 +199,16 @@ impl IncludeFilterBuilder {
// No need to support Windows-style paths, so the backslash can be used a escape.
.backslash_escape(true)
.build()?;
self.original_pattern.push(input.relative().to_string());
let is_literal_pattern = globset::escape(glob_pattern) == glob_pattern;
self.original_patterns.push(input.relative().into());
// `lib` is the same as `lib/**`
// Add a glob that matches `lib` exactly, change the glob to `lib/**`.
if glob_pattern.ends_with("**") {
self.push_prefix_regex(&glob);
self.set.add(glob);
self.add_glob(glob);
} else {
let prefix_glob = GlobBuilder::new(&format!("{glob_pattern}/**"))
.literal_separator(true)
@@ -176,19 +217,28 @@ impl IncludeFilterBuilder {
.build()?;
self.push_prefix_regex(&prefix_glob);
self.set.add(prefix_glob);
self.add_glob(prefix_glob);
// The reason we add the exact glob, e.g. `src` when the original pattern was `src/` is
// so that `match_file` returns true when matching against a file. However, we don't
// need to do this if this is a pattern that should only match a directory (specifically, its contents).
if !only_directory {
self.set.add(glob);
if is_literal_pattern {
self.literal_pattern_indices.push(self.set_len);
}
self.add_glob(glob);
}
}
Ok(self)
}
fn add_glob(&mut self, glob: Glob) {
self.set.add(glob);
self.set_len += 1;
}
fn push_prefix_regex(&mut self, glob: &Glob) {
let main_separator = regex::escape(MAIN_SEPARATOR_STR);
@@ -239,7 +289,8 @@ impl IncludeFilterBuilder {
Ok(IncludeFilter {
glob_set,
dfa,
original_patterns: self.original_pattern.into(),
literal_pattern_indices: self.literal_pattern_indices.into(),
original_patterns: self.original_patterns.into(),
})
}
}
@@ -248,7 +299,7 @@ impl IncludeFilterBuilder {
mod tests {
use std::path::{MAIN_SEPARATOR, MAIN_SEPARATOR_STR};
use crate::glob::include::{IncludeFilter, IncludeFilterBuilder};
use crate::glob::include::{IncludeFilter, IncludeFilterBuilder, MatchFile};
use crate::glob::{PortableGlobKind, PortableGlobPattern};
use ruff_db::system::{MemoryFileSystem, walk_directory::WalkState};
@@ -326,33 +377,33 @@ mod tests {
"files/*.py",
]);
assert!(filter.match_file("lib"));
assert!(filter.match_file("lib/more/test"));
assert_eq!(filter.match_file("lib"), MatchFile::Literal);
assert_eq!(filter.match_file("lib/more/test"), MatchFile::Pattern);
// Unlike `directory`, `directory/` only includes a directory with the given name and its contents
assert!(!filter.match_file("directory"));
assert!(filter.match_file("directory/more/test"));
assert_eq!(filter.match_file("directory"), MatchFile::No);
assert_eq!(filter.match_file("directory/more/test"), MatchFile::Pattern);
// Unlike `src`, `src/*` only includes a directory with the given name.
assert!(!filter.match_file("src"));
assert!(filter.match_file("src/more/test"));
assert_eq!(filter.match_file("src"), MatchFile::No);
assert_eq!(filter.match_file("src/more/test"), MatchFile::Pattern);
// Unlike `tests`, `tests/**` only includes files under `tests`, but not a file named tests
assert!(!filter.match_file("tests"));
assert!(filter.match_file("tests/more/test"));
assert_eq!(filter.match_file("tests"), MatchFile::No);
assert_eq!(filter.match_file("tests/more/test"), MatchFile::Pattern);
// Unlike `match_directory`, prefixes should not be included.
assert!(!filter.match_file("a"));
assert!(!filter.match_file("a/test-b"));
assert_eq!(filter.match_file("a"), MatchFile::No);
assert_eq!(filter.match_file("a/test-b"), MatchFile::No);
assert!(!filter.match_file("a/test-b/x"));
assert!(!filter.match_file("a/test"));
assert_eq!(filter.match_file("a/test-b/x"), MatchFile::No);
assert_eq!(filter.match_file("a/test"), MatchFile::No);
assert!(filter.match_file("files/a.py"));
assert!(filter.match_file("files/a.py/bcd"));
assert_eq!(filter.match_file("files/a.py"), MatchFile::Pattern);
assert_eq!(filter.match_file("files/a.py/bcd"), MatchFile::Pattern);
assert!(!filter.match_file("not_included"));
assert!(!filter.match_file("files/a.pi"));
assert_eq!(filter.match_file("not_included"), MatchFile::No);
assert_eq!(filter.match_file("files/a.pi"), MatchFile::No);
}
/// Check that we skip directories that can never match.

View File

@@ -216,15 +216,19 @@ impl Project {
/// This means, that this method is an over-approximation of `Self::files` and may return `true` for paths
/// that won't be included when checking the project because they're ignored in a `.gitignore` file.
pub fn is_file_included(self, db: &dyn Db, path: &SystemPath) -> bool {
ProjectFilesFilter::from_project(db, self)
.is_file_included(path, GlobFilterCheckMode::Adhoc)
== IncludeResult::Included
matches!(
ProjectFilesFilter::from_project(db, self)
.is_file_included(path, GlobFilterCheckMode::Adhoc),
IncludeResult::Included { .. }
)
}
pub fn is_directory_included(self, db: &dyn Db, path: &SystemPath) -> bool {
ProjectFilesFilter::from_project(db, self)
.is_directory_included(path, GlobFilterCheckMode::Adhoc)
== IncludeResult::Included
matches!(
ProjectFilesFilter::from_project(db, self)
.is_directory_included(path, GlobFilterCheckMode::Adhoc),
IncludeResult::Included { .. }
)
}
pub fn reload(self, db: &mut dyn Db, metadata: ProjectMetadata) {

View File

@@ -97,7 +97,7 @@ impl Override {
matches!(
self.files
.is_file_included(path, GlobFilterCheckMode::Adhoc),
IncludeResult::Included
IncludeResult::Included { .. }
)
}
}

View File

@@ -79,7 +79,9 @@ impl<'a> ProjectFilesFilter<'a> {
match self.match_included_paths(path, mode) {
None => IncludeResult::NotIncluded,
Some(CheckPathMatch::Partial) => self.src_filter.is_file_included(path, mode),
Some(CheckPathMatch::Full) => IncludeResult::Included,
Some(CheckPathMatch::Full) => IncludeResult::Included {
literal_match: Some(true),
},
}
}
@@ -93,7 +95,9 @@ impl<'a> ProjectFilesFilter<'a> {
Some(CheckPathMatch::Partial) => {
self.src_filter.is_directory_maybe_included(path, mode)
}
Some(CheckPathMatch::Full) => IncludeResult::Included,
Some(CheckPathMatch::Full) => IncludeResult::Included {
literal_match: Some(true),
},
}
}
}
@@ -189,60 +193,59 @@ impl<'a> ProjectFilesWalker<'a> {
let directory_included = filter
.is_directory_included(entry.path(), GlobFilterCheckMode::TopDown);
return match directory_included {
IncludeResult::Included => WalkState::Continue,
IncludeResult::Included { .. } => WalkState::Continue,
IncludeResult::Excluded => {
tracing::debug!(
"Skipping directory '{path}' because it is excluded by a default or `src.exclude` pattern",
path=entry.path()
);
WalkState::Skip
},
}
IncludeResult::NotIncluded => {
tracing::debug!(
"Skipping directory `{path}` because it doesn't match any `src.include` pattern or path specified on the CLI",
path=entry.path()
);
WalkState::Skip
},
}
};
}
} else {
// Ignore any non python files to avoid creating too many entries in `Files`.
// Unless the file is explicitly passed, we then always assume it's a python file.
let source_type = entry.path().extension().and_then(PySourceType::try_from_extension).or_else(|| {
if entry.depth() == 0 {
Some(PySourceType::Python)
} else {
db.system().source_type(entry.path())
}
});
if source_type.is_none()
{
return WalkState::Continue;
}
// For all files, except the ones that were explicitly passed to the walker (CLI),
// check if they're included in the project.
if entry.depth() > 0 || self.force_exclude {
match filter
.is_file_included(entry.path(), GlobFilterCheckMode::TopDown)
{
IncludeResult::Included => {},
IncludeResult::Included { literal_match } => {
// Ignore any non python files to avoid creating too many entries in `Files`.
// Unless the file is explicitly passed on the CLI or a literal match in the `include`, we then always assume it's a file ty can analyze
let source_type = if literal_match == Some(true) || entry.depth() == 0 {
Some(PySourceType::Python)
} else {
entry.path().extension().and_then(PySourceType::try_from_extension).or_else(|| db.system().source_type(entry.path()))
};
if source_type.is_none()
{
return WalkState::Continue;
}
}
IncludeResult::Excluded => {
tracing::debug!(
"Ignoring file `{path}` because it is excluded by a default or `src.exclude` pattern.",
path=entry.path()
);
return WalkState::Continue;
},
}
IncludeResult::NotIncluded => {
tracing::debug!(
"Ignoring file `{path}` because it doesn't match any `src.include` pattern or path specified on the CLI.",
path=entry.path()
);
return WalkState::Continue;
},
}
}
}