mirror of https://github.com/astral-sh/ruff
[ty] Improve resolution of absolute imports in tests (#21817)
By teaching desperate resolution to try every possible ancestor that doesn't have an `__init__.py(i)` when resolving absolute imports. * Fixes https://github.com/astral-sh/ty/issues/1782
This commit is contained in:
parent
3ac58b47bd
commit
d5546508cf
|
|
@ -2390,14 +2390,14 @@ fn default_root_flat_layout() -> anyhow::Result<()> {
|
|||
fn default_root_tests_folder() -> anyhow::Result<()> {
|
||||
let case = CliTest::with_files([
|
||||
("src/foo.py", "foo = 10"),
|
||||
("tests/bar.py", "bar = 20"),
|
||||
("tests/bar.py", "baz = 20"),
|
||||
(
|
||||
"tests/test_bar.py",
|
||||
r#"
|
||||
from foo import foo
|
||||
from bar import bar
|
||||
from bar import baz
|
||||
|
||||
print(f"{foo} {bar}")
|
||||
print(f"{foo} {baz}")
|
||||
"#,
|
||||
),
|
||||
])?;
|
||||
|
|
|
|||
|
|
@ -285,22 +285,6 @@ impl Options {
|
|||
roots.push(python);
|
||||
}
|
||||
|
||||
// Considering pytest test discovery conventions,
|
||||
// we also include the `tests` directory if it exists and is not a package.
|
||||
let tests_dir = project_root.join("tests");
|
||||
if system.is_directory(&tests_dir)
|
||||
&& !system.is_file(&tests_dir.join("__init__.py"))
|
||||
&& !system.is_file(&tests_dir.join("__init__.pyi"))
|
||||
&& !roots.contains(&tests_dir)
|
||||
{
|
||||
// If the `tests` directory exists and is not a package, include it as a source root.
|
||||
tracing::debug!(
|
||||
"Including `./tests` in `environment.root` because a `./tests` directory exists"
|
||||
);
|
||||
|
||||
roots.push(tests_dir);
|
||||
}
|
||||
|
||||
// The project root should always be included, and should always come
|
||||
// after any subdirectories such as `./src`, `./tests` and/or `./python`.
|
||||
roots.push(project_root.to_path_buf());
|
||||
|
|
|
|||
|
|
@ -8,12 +8,87 @@ two projects in a monorepo have conflicting definitions (but we want to analyze
|
|||
|
||||
In practice these tests cover what we call "desperate module resolution" which, when an import
|
||||
fails, results in us walking up the ancestor directories of the importing file and trying those as
|
||||
"desperate search-paths".
|
||||
"desperate search-paths" until one works.
|
||||
|
||||
Currently desperate search-paths are restricted to subdirectories of the first-party search-path
|
||||
(the directory you're running `ty` in). Currently we only consider one desperate search-path: the
|
||||
closest ancestor directory containing a `pyproject.toml`. In the future we may want to try every
|
||||
ancestor `pyproject.toml` or every ancestor directory.
|
||||
(typically, the directory you're running `ty` in).
|
||||
|
||||
There are two styles of desperate search-path we consider: "absolute" and "relative". Absolute
|
||||
desperate search-paths are used for resolving absolute imports (`import a.b.c`) while relative
|
||||
desperate search-paths are used for resolving relative imports (`from .c import x`).
|
||||
|
||||
Only the closest directory that contains either a `pyproject.toml` or `ty.toml` is a valid relative
|
||||
desperate search-path.
|
||||
|
||||
All ancestor directories that *do not* contain an `__init__.py(i)` are valid absolute desperate
|
||||
search-paths.
|
||||
|
||||
(Distracting detail: to ensure relative desperate search-paths are always valid absolute desperate
|
||||
search-paths, a directory that contains an `__init__.py(i)` *and* either a `pyproject.toml` or
|
||||
`ty.toml` is also a valid absolute search-path, but this shouldn't matter in practice, as you do not
|
||||
typically have those two kinds of file in the same directory.)
|
||||
|
||||
## Relative Desperate Search-Paths
|
||||
|
||||
We do not directly resolve relative imports. Instead we have a two-phase process:
|
||||
|
||||
1. Convert the relative module name `.c` to an absolute one `a.b.c`
|
||||
1. Resolve the absolute import `a.b.c`
|
||||
|
||||
(This allows us to transparently handle packaging semantics that mandate separate directories should
|
||||
be "logically combined" into a single directory, like namespace packages and stub packages.)
|
||||
|
||||
Relative desperate search-paths only appear in step 1, where we compute the module name of the
|
||||
importing file as the first step in resolving `.` to an absolute module name.
|
||||
|
||||
In practice, relative desperate search-paths are rarely needed because it usually doesn't matter if
|
||||
we think `.` is `a.b` or `b` when resolving `.c`: the fact that we computed `a.b` using our
|
||||
search-paths means `a.b.c` is what will resolve with those search-paths!
|
||||
|
||||
There are three caveats to this:
|
||||
|
||||
- If the module name we compute is *too short* then too many relative levels will fail to resolve
|
||||
(`..c` resolves in `a.b` but not `b`).
|
||||
- If the module name is *too long* then we may encounter directories that aren't valid module names,
|
||||
and reject the import (`my-proj.a.b.c` is not a valid module name).
|
||||
- Sloppiness will break relative imports in any kind of packaging situation where different
|
||||
directories are supposed to be "logically combined".
|
||||
|
||||
The fact that we restrict desperate resolution to the first-party search-path ("the project you're
|
||||
working on") allows us to largely dismiss the last concern for the purposes of this discussion. The
|
||||
remaining two concerns encourage us to find "the longest possible module name without stumbling into
|
||||
random nonsense directories". When we need relative desperate search-paths we are usually running
|
||||
into the "too long" problem and "snap to the parent `pyproject.toml` (or `ty.toml`)" tends to
|
||||
resolve it well!
|
||||
|
||||
As a more aesthetic concern, this approach also ensures that all the files under a given
|
||||
`pyproject.toml` will, when faced with desperation, agree on eachother's relative module names. This
|
||||
may or may not be important, but it's definitely *reassuring* and *satisfying*!
|
||||
|
||||
## Absolute Desperate Search-Paths
|
||||
|
||||
Absolute desperate search-paths are much more load-bearing, because if we're handed the absolute
|
||||
import `a.b.c` then there is only one possible search-path that will properly resolve this the way
|
||||
the user wants, and if that search-path isn't configured we will fail.
|
||||
|
||||
Basic heuristics like checking for `<working-dir>/src/` and resolving editables in the local `.venv`
|
||||
work well in most cases, but desperate resolution is needed in a couple key scenarios:
|
||||
|
||||
- Test or script directories have a tendency to assume extra search-paths that aren't structurally
|
||||
obvious ([notably pytest](https://docs.pytest.org/en/stable/explanation/pythonpath.html))
|
||||
- If you open the root of a monorepo in an IDE, you will often have many separate projects but no
|
||||
configuration explaining this. Absolute imports within each project should resolve things in
|
||||
that project.
|
||||
|
||||
The latter case is often handled reasonably well by the the `pyproject.toml` rule that relative
|
||||
desperate search-paths have. However the more complex testing/scripting scenarios tend to fall over
|
||||
here -- in the limit pytest will add literally every ancestor to the search-path, and so we simply
|
||||
need to try every single one and hope *one* works for every absolute import (and it might be a
|
||||
different one for different imports).
|
||||
|
||||
We exclude directories that contain an `__init__.py(i)` because there shouldn't be any reasonable
|
||||
scenario where we need to "truncate" a regular package like that (and pytest's Exciting behaviour
|
||||
here is explicitly disabled by `__init__.py`).
|
||||
|
||||
## Invalid Names
|
||||
|
||||
|
|
@ -134,13 +209,11 @@ from .mod1 import x
|
|||
|
||||
# error: [unresolved-import]
|
||||
from . import mod2
|
||||
|
||||
# error: [unresolved-import]
|
||||
import mod3
|
||||
|
||||
reveal_type(x) # revealed: Unknown
|
||||
reveal_type(mod2.y) # revealed: Unknown
|
||||
reveal_type(mod3.z) # revealed: Unknown
|
||||
reveal_type(mod3.z) # revealed: int
|
||||
```
|
||||
|
||||
`my-proj/tests/mod1.py`:
|
||||
|
|
@ -338,21 +411,6 @@ create, and we are now very sensitive to precise search-path ordering.**
|
|||
|
||||
Here the use of editables means that `a/` has higher priority than `a/src/a/`.
|
||||
|
||||
Somehow this results in `a/tests/test1.py` being able to resolve `.setup` but not `.`.
|
||||
|
||||
My best guess is that in this state we can resolve regular modules in `a/tests/` but not namespace
|
||||
packages because we have some extra validation for namespace packages conflicted by regular
|
||||
packages, but that validation isn't applied when we successfully resolve a submodule of the
|
||||
namespace package.
|
||||
|
||||
In this case, as we find that `a/tests/test1.py` matches on the first-party path as `a.tests.test1`
|
||||
and is syntactically valid. We then resolve `a.tests.test1` and because the namespace package
|
||||
(`/a/`) comes first we succeed. We then syntactically compute `.` to be `a.tests`.
|
||||
|
||||
When we go to lookup `a.tests.setup`, whatever grace that allowed `a.tests.test1` to resolve still
|
||||
works so it resolves too. However when we try to resolve `a.tests` on its own some additional
|
||||
validation rejects the namespace package conflicting with the regular package.
|
||||
|
||||
```toml
|
||||
[environment]
|
||||
# Setup a venv with editables for a/src/ and b/src/
|
||||
|
|
@ -385,17 +443,13 @@ b/src/
|
|||
`a/tests/test1.py`:
|
||||
|
||||
```py
|
||||
# TODO: there should be no errors in this file.
|
||||
|
||||
from .setup import x
|
||||
|
||||
# error: [unresolved-import]
|
||||
from . import setup
|
||||
from a import y
|
||||
import a
|
||||
|
||||
reveal_type(x) # revealed: int
|
||||
reveal_type(setup.x) # revealed: Unknown
|
||||
reveal_type(setup.x) # revealed: int
|
||||
reveal_type(y) # revealed: int
|
||||
reveal_type(a.y) # revealed: int
|
||||
```
|
||||
|
|
@ -422,17 +476,13 @@ y: int = 10
|
|||
`b/tests/test1.py`:
|
||||
|
||||
```py
|
||||
# TODO: there should be no errors in this file
|
||||
|
||||
from .setup import x
|
||||
|
||||
# error: [unresolved-import]
|
||||
from . import setup
|
||||
from b import y
|
||||
import b
|
||||
|
||||
reveal_type(x) # revealed: str
|
||||
reveal_type(setup.x) # revealed: Unknown
|
||||
reveal_type(setup.x) # revealed: str
|
||||
reveal_type(y) # revealed: str
|
||||
reveal_type(b.y) # revealed: str
|
||||
```
|
||||
|
|
|
|||
|
|
@ -336,7 +336,14 @@ pub(crate) fn file_to_module(db: &dyn Db, file: File) -> Option<Module<'_>> {
|
|||
path,
|
||||
search_paths(db, ModuleResolveMode::StubsAllowed),
|
||||
)
|
||||
.or_else(|| file_to_module_impl(db, file, path, desperate_search_paths(db, file).iter()))
|
||||
.or_else(|| {
|
||||
file_to_module_impl(
|
||||
db,
|
||||
file,
|
||||
path,
|
||||
relative_desperate_search_paths(db, file).iter(),
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
fn file_to_module_impl<'db, 'a>(
|
||||
|
|
@ -388,11 +395,81 @@ pub(crate) fn search_paths(db: &dyn Db, resolve_mode: ModuleResolveMode) -> Sear
|
|||
Program::get(db).search_paths(db).iter(db, resolve_mode)
|
||||
}
|
||||
|
||||
/// Get the search-paths that should be used for desperate resolution of imports in this file
|
||||
/// Get the search-paths for desperate resolution of absolute imports in this file.
|
||||
///
|
||||
/// Currently this is "the closest ancestor dir that contains a pyproject.toml", which is
|
||||
/// a completely arbitrary decision. We could potentially change this to return an iterator
|
||||
/// of every ancestor with a pyproject.toml or every ancestor.
|
||||
/// Currently this is "all ancestor directories that don't contain an `__init__.py(i)`"
|
||||
/// (from closest-to-importing-file to farthest).
|
||||
///
|
||||
/// (For paranoia purposes, all relative desperate search-paths are also absolute
|
||||
/// valid desperate search-paths, but don't worry about that.)
|
||||
///
|
||||
/// We exclude `__init__.py(i)` dirs to avoid truncating packages.
|
||||
#[salsa::tracked(heap_size=ruff_memory_usage::heap_size)]
|
||||
fn absolute_desperate_search_paths(db: &dyn Db, importing_file: File) -> Option<Vec<SearchPath>> {
|
||||
let system = db.system();
|
||||
let importing_path = importing_file.path(db).as_system_path()?;
|
||||
|
||||
// Only allow this if the importing_file is under the first-party search path
|
||||
let (base_path, rel_path) =
|
||||
search_paths(db, ModuleResolveMode::StubsAllowed).find_map(|search_path| {
|
||||
if !search_path.is_first_party() {
|
||||
return None;
|
||||
}
|
||||
Some((
|
||||
search_path.as_system_path()?,
|
||||
search_path.relativize_system_path_only(importing_path)?,
|
||||
))
|
||||
})?;
|
||||
|
||||
// Read the revision on the corresponding file root to
|
||||
// register an explicit dependency on this directory. When
|
||||
// the revision gets bumped, the cache that Salsa creates
|
||||
// for this routine will be invalidated.
|
||||
//
|
||||
// (This is conditional because ruff uses this code too and doesn't set roots)
|
||||
if let Some(root) = db.files().root(db, base_path) {
|
||||
let _ = root.revision(db);
|
||||
}
|
||||
|
||||
// Only allow searching up to the first-party path's root
|
||||
let mut search_paths = Vec::new();
|
||||
for rel_dir in rel_path.ancestors() {
|
||||
let candidate_path = base_path.join(rel_dir);
|
||||
if !system.is_directory(&candidate_path) {
|
||||
continue;
|
||||
}
|
||||
// Any dir that isn't a proper package is plausibly some test/script dir that could be
|
||||
// added as a search-path at runtime. Notably this reflects pytest's default mode where
|
||||
// it adds every dir with a .py to the search-paths (making all test files root modules),
|
||||
// unless they see an `__init__.py`, in which case they assume you don't want that.
|
||||
let isnt_regular_package = !system.is_file(&candidate_path.join("__init__.py"))
|
||||
&& !system.is_file(&candidate_path.join("__init__.pyi"));
|
||||
// Any dir with a pyproject.toml or ty.toml is a valid relative desperate search-path and
|
||||
// we want all of those to also be valid absolute desperate search-paths. It doesn't
|
||||
// make any sense for a folder to have `pyproject.toml` and `__init__.py` but let's
|
||||
// not let something cursed and spooky happen, ok? d
|
||||
if isnt_regular_package
|
||||
|| system.is_file(&candidate_path.join("pyproject.toml"))
|
||||
|| system.is_file(&candidate_path.join("ty.toml"))
|
||||
{
|
||||
let search_path = SearchPath::first_party(system, candidate_path).ok()?;
|
||||
search_paths.push(search_path);
|
||||
}
|
||||
}
|
||||
|
||||
if search_paths.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(search_paths)
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the search-paths for desperate resolution of relative imports in this file.
|
||||
///
|
||||
/// Currently this is "the closest ancestor dir that contains a pyproject.toml (or ty.toml)",
|
||||
/// which is a completely arbitrary decision. However it's farily important that relative
|
||||
/// desperate search-paths pick a single "best" answer because every one is *valid* but one
|
||||
/// that's too long or too short may cause problems.
|
||||
///
|
||||
/// For now this works well in common cases where we have some larger workspace that contains
|
||||
/// one or more python projects in sub-directories, and those python projects assume that
|
||||
|
|
@ -402,7 +479,7 @@ pub(crate) fn search_paths(db: &dyn Db, resolve_mode: ModuleResolveMode) -> Sear
|
|||
/// chaotic things. In particular, all files under a given pyproject.toml will currently
|
||||
/// agree on this being their desperate search-path, which is really nice.
|
||||
#[salsa::tracked(heap_size=ruff_memory_usage::heap_size)]
|
||||
fn desperate_search_paths(db: &dyn Db, importing_file: File) -> Option<SearchPath> {
|
||||
fn relative_desperate_search_paths(db: &dyn Db, importing_file: File) -> Option<SearchPath> {
|
||||
let system = db.system();
|
||||
let importing_path = importing_file.path(db).as_system_path()?;
|
||||
|
||||
|
|
@ -431,13 +508,15 @@ fn desperate_search_paths(db: &dyn Db, importing_file: File) -> Option<SearchPat
|
|||
// Only allow searching up to the first-party path's root
|
||||
for rel_dir in rel_path.ancestors() {
|
||||
let candidate_path = base_path.join(rel_dir);
|
||||
if system.path_exists(&candidate_path.join("pyproject.toml"))
|
||||
|| system.path_exists(&candidate_path.join("ty.toml"))
|
||||
// Any dir with a pyproject.toml or ty.toml might be a project root
|
||||
if system.is_file(&candidate_path.join("pyproject.toml"))
|
||||
|| system.is_file(&candidate_path.join("ty.toml"))
|
||||
{
|
||||
let search_path = SearchPath::first_party(system, candidate_path).ok()?;
|
||||
return Some(search_path);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
#[derive(Clone, Debug, PartialEq, Eq, get_size2::GetSize)]
|
||||
|
|
@ -960,8 +1039,8 @@ fn desperately_resolve_name(
|
|||
name: &ModuleName,
|
||||
mode: ModuleResolveMode,
|
||||
) -> Option<ResolvedName> {
|
||||
let search_paths = desperate_search_paths(db, importing_file);
|
||||
resolve_name_impl(db, name, mode, search_paths.iter())
|
||||
let search_paths = absolute_desperate_search_paths(db, importing_file);
|
||||
resolve_name_impl(db, name, mode, search_paths.iter().flatten())
|
||||
}
|
||||
|
||||
fn resolve_name_impl<'a>(
|
||||
|
|
|
|||
Loading…
Reference in New Issue