From e91e2f49dbe3a280d1cd14644d6a9dd9a648a9f6 Mon Sep 17 00:00:00 2001 From: David Peter Date: Wed, 23 Apr 2025 19:31:14 +0200 Subject: [PATCH] [red-knot] Trust module-level undeclared symbols in stubs (#17577) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Many symbols in typeshed are defined without being declared. For example: ```pyi # builtins: IOError = OSError # types LambdaType = FunctionType NotImplementedType = _NotImplementedType # typing Text = str # random uniform = _inst.uniform # optparse make_option = Option # all over the place: _T = TypeVar("_T") ``` Here, we introduce a change that skips widening the public type of these symbols (by unioning with `Unknown`). fixes #17032 ## Ecosystem analysis This is difficult to analyze in detail, but I went over most changes and it looks very favorable to me overall. The diff on the overall numbers is: ``` errors: 1287 -> 859 (reduction by 428) warnings: 45 -> 59 (increase by 14) ``` ### Removed false positives `invalid-base` examples: ```diff - error[lint:invalid-base] /tmp/mypy_primer/projects/pip/src/pip/_vendor/rich/console.py:548:27: Invalid class base with type `Unknown | Literal[_local]` (all bases must be a class, `Any`, `Unknown` or `Todo`) - error[lint:invalid-base] /tmp/mypy_primer/projects/tornado/tornado/iostream.py:84:25: Invalid class base with type `Unknown | Literal[OSError]` (all bases must be a class, `Any`, `Unknown` or `Todo`) - error[lint:invalid-base] /tmp/mypy_primer/projects/mitmproxy/test/conftest.py:35:40: Invalid class base with type `Unknown | Literal[_UnixDefaultEventLoopPolicy]` (all bases must be a class, `Any`, `Unknown` or `Todo`) ``` `invalid-exception-caught` examples: ```diff - error[lint:invalid-exception-caught] /tmp/mypy_primer/projects/cloud-init/cloudinit/cmd/status.py:334:16: Cannot catch object of type `Literal[ProcessExecutionError]` in an exception handler (must be a `BaseException` subclass or a tuple of `BaseException` subclasses) - error[lint:invalid-exception-caught] /tmp/mypy_primer/projects/jinja/src/jinja2/loaders.py:537:16: Cannot catch object of type `Literal[TemplateNotFound]` in an exception handler (must be a `BaseException` subclass or a tuple of `BaseException` subclasses) ``` `unresolved-reference` examples https://github.com/canonical/cloud-init/blob/7a0265d36e01e649f72005548f17dca9ac0150ad/cloudinit/handlers/jinja_template.py#L120-L123 (we now understand the `isinstance` narrowing) ```diff - error[lint:unresolved-attribute] /tmp/mypy_primer/projects/cloud-init/cloudinit/handlers/jinja_template.py:123:16: Type `Exception` has no attribute `errno` ``` `unknown-argument` examples https://github.com/hauntsaninja/boostedblob/blob/master/boostedblob/request.py#L53 ```diff - error[lint:unknown-argument] /tmp/mypy_primer/projects/boostedblob/boostedblob/request.py:53:17: Argument `connect` does not match any known parameter of bound method `__init__` ``` `unknown-argument` There are a lot of `__init__`-related changes because we now understand [`@attr.s`](https://github.com/python-attrs/attrs/blob/3d42a6978ac60b487135db39218cfb742b100899/src/attr/__init__.pyi#L387) as a `@dataclass_transform` annotated symbol. For example: ```diff - error[lint:unknown-argument] /tmp/mypy_primer/projects/attrs/tests/test_hooks.py:72:18: Argument `x` does not match any known parameter of bound method `__init__` ``` ### New false positives This can happen if a symbol that previously was inferred as `X | Unknown` was assigned-to, but we don't yet understand the assignability to `X`: https://github.com/strawberry-graphql/strawberry/blob/main/strawberry/exceptions/handler.py#L90 ```diff + error[lint:invalid-assignment] /tmp/mypy_primer/projects/strawberry/strawberry/exceptions/handler.py:90:9: Object of type `def strawberry_threading_exception_handler(args: tuple[type[BaseException], BaseException | None, TracebackType | None, Thread | None]) -> None` is not assignable to attribute `excepthook` of type `(_ExceptHookArgs, /) -> Any` ``` ### New true positives https://github.com/DataDog/dd-trace-py/blob/6bbb5519fe4b3964f9ca73b21cf35df8387618b2/tests/tracer/test_span.py#L714 ```diff + error[lint:invalid-argument-type] /tmp/mypy_primer/projects/dd-trace-py/tests/tracer/test_span.py:714:33: Argument to this function is incorrect: Expected `str`, found `Literal[b"\xf0\x9f\xa4\x94"]` ``` ### Changed diagnostics A lot of changed diagnostics because we now show `@Todo(Support for `typing.TypeVar` instances in type expressions)` instead of `Unknown` for all kinds of symbols that used a `_T = TypeVar("_T")` as a type. One prominent example is the `list.__getitem__` method: `builtins.pyi`: ```pyi _T = TypeVar("_T") # previously `TypeVar | Unknown`, now just `TypeVar` # … class list(MutableSequence[_T]): # … @overload def __getitem__(self, i: SupportsIndex, /) -> _T: ... # … ``` which causes this change in diagnostics: ```py xs = [1, 2] reveal_type(xs[0]) # previously `Unknown`, now `@Todo(Support for `typing.TypeVar` instances in type expressions)` ``` ## Test Plan Updated Markdown tests --- .../resources/mdtest/scopes/eager.md | 2 +- .../resources/mdtest/subscript/lists.md | 2 +- .../mdtest/type_properties/is_singleton.md | 5 +---- .../src/semantic_index/symbol.rs | 8 ++++++++ crates/red_knot_python_semantic/src/symbol.rs | 14 ++++++++++++-- .../src/types/signatures.rs | 5 +---- 6 files changed, 24 insertions(+), 12 deletions(-) diff --git a/crates/red_knot_python_semantic/resources/mdtest/scopes/eager.md b/crates/red_knot_python_semantic/resources/mdtest/scopes/eager.md index 533330ddb7..bfd11b6677 100644 --- a/crates/red_knot_python_semantic/resources/mdtest/scopes/eager.md +++ b/crates/red_knot_python_semantic/resources/mdtest/scopes/eager.md @@ -404,7 +404,7 @@ x = int class C: var: ClassVar[x] -reveal_type(C.var) # revealed: Unknown | str +reveal_type(C.var) # revealed: str x = str ``` diff --git a/crates/red_knot_python_semantic/resources/mdtest/subscript/lists.md b/crates/red_knot_python_semantic/resources/mdtest/subscript/lists.md index d074d1b826..192dc4a88e 100644 --- a/crates/red_knot_python_semantic/resources/mdtest/subscript/lists.md +++ b/crates/red_knot_python_semantic/resources/mdtest/subscript/lists.md @@ -12,7 +12,7 @@ x = [1, 2, 3] reveal_type(x) # revealed: list # TODO reveal int -reveal_type(x[0]) # revealed: Unknown +reveal_type(x[0]) # revealed: @Todo(Support for `typing.TypeVar` instances in type expressions) # TODO reveal list reveal_type(x[0:1]) # revealed: @Todo(specialized non-generic class) diff --git a/crates/red_knot_python_semantic/resources/mdtest/type_properties/is_singleton.md b/crates/red_knot_python_semantic/resources/mdtest/type_properties/is_singleton.md index a60efdfd9e..4673affed6 100644 --- a/crates/red_knot_python_semantic/resources/mdtest/type_properties/is_singleton.md +++ b/crates/red_knot_python_semantic/resources/mdtest/type_properties/is_singleton.md @@ -128,10 +128,7 @@ python-version = "3.10" import types from knot_extensions import static_assert, is_singleton -# TODO: types.NotImplementedType is a TypeAlias of builtins._NotImplementedType -# Once TypeAlias support is added, it should satisfy `is_singleton` -reveal_type(types.NotImplementedType) # revealed: Unknown | Literal[_NotImplementedType] -static_assert(not is_singleton(types.NotImplementedType)) +static_assert(is_singleton(types.NotImplementedType)) ``` ### Callables diff --git a/crates/red_knot_python_semantic/src/semantic_index/symbol.rs b/crates/red_knot_python_semantic/src/semantic_index/symbol.rs index ec9ef3886e..10f6e9c5d1 100644 --- a/crates/red_knot_python_semantic/src/semantic_index/symbol.rs +++ b/crates/red_knot_python_semantic/src/semantic_index/symbol.rs @@ -115,6 +115,10 @@ impl<'db> ScopeId<'db> { self.node(db).scope_kind().is_function_like() } + pub(crate) fn is_module_scope(self, db: &'db dyn Db) -> bool { + self.node(db).scope_kind().is_module() + } + pub(crate) fn is_type_parameter(self, db: &'db dyn Db) -> bool { self.node(db).scope_kind().is_type_parameter() } @@ -263,6 +267,10 @@ impl ScopeKind { matches!(self, ScopeKind::Class) } + pub(crate) fn is_module(self) -> bool { + matches!(self, ScopeKind::Module) + } + pub(crate) fn is_type_parameter(self) -> bool { matches!(self, ScopeKind::Annotation | ScopeKind::TypeAlias) } diff --git a/crates/red_knot_python_semantic/src/symbol.rs b/crates/red_knot_python_semantic/src/symbol.rs index dae79c3b63..4e713a7fde 100644 --- a/crates/red_knot_python_semantic/src/symbol.rs +++ b/crates/red_knot_python_semantic/src/symbol.rs @@ -593,8 +593,18 @@ fn symbol_by_id<'db>( "__slots__" | "TYPE_CHECKING" ); - widen_type_for_undeclared_public_symbol(db, inferred, is_considered_non_modifiable) - .into() + if scope.is_module_scope(db) && scope.file(db).is_stub(db.upcast()) { + // We generally trust module-level undeclared symbols in stubs and do not union + // with `Unknown`. If we don't do this, simple aliases like `IOError = OSError` in + // stubs would result in `IOError` being a union of `OSError` and `Unknown`, which + // leads to all sorts of downstream problems. Similarly, type variables are often + // defined as `_T = TypeVar("_T")`, without being declared. + + inferred.into() + } else { + widen_type_for_undeclared_public_symbol(db, inferred, is_considered_non_modifiable) + .into() + } } // Symbol has conflicting declared types Err((declared, _)) => { diff --git a/crates/red_knot_python_semantic/src/types/signatures.rs b/crates/red_knot_python_semantic/src/types/signatures.rs index 52b1ab827f..c74554fe91 100644 --- a/crates/red_knot_python_semantic/src/types/signatures.rs +++ b/crates/red_knot_python_semantic/src/types/signatures.rs @@ -1631,10 +1631,7 @@ mod tests { assert_eq!(a_name, "a"); assert_eq!(b_name, "b"); // Parameter resolution deferred; we should see B - assert_eq!( - a_annotated_ty.unwrap().display(&db).to_string(), - "Unknown | B" - ); + assert_eq!(a_annotated_ty.unwrap().display(&db).to_string(), "B"); assert_eq!(b_annotated_ty.unwrap().display(&db).to_string(), "T"); }