diff --git a/crates/red_knot_python_semantic/resources/mdtest/attributes.md b/crates/red_knot_python_semantic/resources/mdtest/attributes.md index 0a5602e2e5..9b13af91eb 100644 --- a/crates/red_knot_python_semantic/resources/mdtest/attributes.md +++ b/crates/red_knot_python_semantic/resources/mdtest/attributes.md @@ -25,25 +25,21 @@ class C: c_instance = C(1) -# TODO: Mypy/pyright infer `int | str` here. We want this to be `Unknown | Literal[1, "a"]` -reveal_type(c_instance.inferred_from_value) # revealed: @Todo(implicit instance attribute) +reveal_type(c_instance.inferred_from_value) # revealed: Unknown | Literal[1, "a"] # TODO: Same here. This should be `Unknown | Literal[1, "a"]` -reveal_type(c_instance.inferred_from_other_attribute) # revealed: @Todo(implicit instance attribute) +reveal_type(c_instance.inferred_from_other_attribute) # revealed: Unknown # TODO: should be `int | None` -reveal_type(c_instance.inferred_from_param) # revealed: @Todo(implicit instance attribute) +reveal_type(c_instance.inferred_from_param) # revealed: Unknown | int | None -# TODO: should be `bytes` -reveal_type(c_instance.declared_only) # revealed: @Todo(implicit instance attribute) +reveal_type(c_instance.declared_only) # revealed: bytes -# TODO: should be `bool` -reveal_type(c_instance.declared_and_bound) # revealed: @Todo(implicit instance attribute) +reveal_type(c_instance.declared_and_bound) # revealed: bool -# TODO: should be `str` # We probably don't want to emit a diagnostic for this being possibly undeclared/unbound. # mypy and pyright do not show an error here. -reveal_type(c_instance.possibly_undeclared_unbound) # revealed: @Todo(implicit instance attribute) +reveal_type(c_instance.possibly_undeclared_unbound) # revealed: str # This assignment is fine, as we infer `Unknown | Literal[1, "a"]` for `inferred_from_value`. c_instance.inferred_from_value = "value set on instance" @@ -71,7 +67,7 @@ c_instance.declared_and_bound = False # in general (we don't know what else happened to `c_instance` between the assignment and the use # here), but mypy and pyright support this. In conclusion, this could be `bool` but should probably # be `Literal[False]`. -reveal_type(c_instance.declared_and_bound) # revealed: @Todo(implicit instance attribute) +reveal_type(c_instance.declared_and_bound) # revealed: bool ``` #### Variable declared in class body and possibly bound in `__init__` @@ -124,7 +120,44 @@ reveal_type(C.only_declared) # revealed: str C.only_declared = "overwritten on class" ``` -#### Variable only defined in unrelated method +#### Mixed declarations/bindings in class body and `__init__` + +```py +class C: + only_declared_in_body: str | None + declared_in_body_and_init: str | None + + declared_in_body_defined_in_init: str | None + + bound_in_body_declared_in_init = "a" + + bound_in_body_and_init = None + + def __init__(self, flag) -> None: + self.only_declared_in_init: str | None + self.declared_in_body_and_init: str | None = None + + self.declared_in_body_defined_in_init = "a" + + self.bound_in_body_declared_in_init: str | None + + if flag: + self.bound_in_body_and_init = "a" + +c_instance = C(True) + +reveal_type(c_instance.only_declared_in_body) # revealed: str | None +reveal_type(c_instance.only_declared_in_init) # revealed: str | None +reveal_type(c_instance.declared_in_body_and_init) # revealed: str | None + +reveal_type(c_instance.declared_in_body_defined_in_init) # revealed: str | None + +reveal_type(c_instance.bound_in_body_declared_in_init) # revealed: str | None + +reveal_type(c_instance.bound_in_body_and_init) # revealed: Unknown | None | Literal["a"] +``` + +#### Variable defined in non-`__init__` method We also recognize pure instance variables if they are defined in a method that is not `__init__`. @@ -143,20 +176,17 @@ class C: c_instance = C(1) -# TODO: Should be `Unknown | Literal[1, "a"]` -reveal_type(c_instance.inferred_from_value) # revealed: @Todo(implicit instance attribute) +reveal_type(c_instance.inferred_from_value) # revealed: Unknown | Literal[1, "a"] # TODO: Should be `Unknown | Literal[1, "a"]` -reveal_type(c_instance.inferred_from_other_attribute) # revealed: @Todo(implicit instance attribute) +reveal_type(c_instance.inferred_from_other_attribute) # revealed: Unknown # TODO: Should be `int | None` -reveal_type(c_instance.inferred_from_param) # revealed: @Todo(implicit instance attribute) +reveal_type(c_instance.inferred_from_param) # revealed: Unknown | int | None -# TODO: Should be `bytes` -reveal_type(c_instance.declared_only) # revealed: @Todo(implicit instance attribute) +reveal_type(c_instance.declared_only) # revealed: bytes -# TODO: Should be `bool` -reveal_type(c_instance.declared_and_bound) # revealed: @Todo(implicit instance attribute) +reveal_type(c_instance.declared_and_bound) # revealed: bool # TODO: We already show an error here, but the message might be improved? # error: [unresolved-attribute] @@ -166,6 +196,138 @@ reveal_type(C.inferred_from_value) # revealed: Unknown C.inferred_from_value = "overwritten on class" ``` +#### Variable defined in multiple methods + +If we see multiple un-annotated assignments to a single attribute (`self.x` below), we build the +union of all inferred types (and `Unknown`). If we see multiple conflicting declarations of the same +attribute, that should be an error. + +```py +def get_int() -> int: + return 0 + +def get_str() -> str: + return "a" + +class C: + def __init__(self) -> None: + self.x = get_int() + self.y: int = 1 + + def other_method(self): + self.x = get_str() + + # TODO: this redeclaration should be an error + self.y: str = "a" + +c_instance = C() + +reveal_type(c_instance.x) # revealed: Unknown | int | str + +# TODO: We should probably infer `int | str` here. +reveal_type(c_instance.y) # revealed: int +``` + +#### Attributes defined in tuple unpackings + +```py +def returns_tuple() -> tuple[int, str]: + return (1, "a") + +class C: + a1, b1 = (1, "a") + c1, d1 = returns_tuple() + + def __init__(self) -> None: + self.a2, self.b2 = (1, "a") + self.c2, self.d2 = returns_tuple() + +c_instance = C() + +reveal_type(c_instance.a1) # revealed: Unknown | Literal[1] +reveal_type(c_instance.b1) # revealed: Unknown | Literal["a"] +reveal_type(c_instance.c1) # revealed: Unknown | int +reveal_type(c_instance.d1) # revealed: Unknown | str + +# TODO: This should be supported (no error; type should be: `Unknown | Literal[1]`) +# error: [unresolved-attribute] +reveal_type(c_instance.a2) # revealed: Unknown + +# TODO: This should be supported (no error; type should be: `Unknown | Literal["a"]`) +# error: [unresolved-attribute] +reveal_type(c_instance.b2) # revealed: Unknown + +# TODO: Similar for these two (should be `Unknown | int` and `Unknown | str`, respectively) +# error: [unresolved-attribute] +reveal_type(c_instance.c2) # revealed: Unknown +# error: [unresolved-attribute] +reveal_type(c_instance.d2) # revealed: Unknown +``` + +#### Attributes defined in for-loop (unpacking) + +```py +class IntIterator: + def __next__(self) -> int: + return 1 + +class IntIterable: + def __iter__(self) -> IntIterator: + return IntIterator() + +class TupleIterator: + def __next__(self) -> tuple[int, str]: + return (1, "a") + +class TupleIterable: + def __iter__(self) -> TupleIterator: + return TupleIterator() + +class C: + def __init__(self): + for self.x in IntIterable(): + pass + + for _, self.y in TupleIterable(): + pass + +# TODO: Pyright fully supports these, mypy detects the presence of the attributes, +# but infers type `Any` for both of them. We should infer `int` and `str` here: + +# error: [unresolved-attribute] +reveal_type(C().x) # revealed: Unknown + +# error: [unresolved-attribute] +reveal_type(C().y) # revealed: Unknown +``` + +#### Conditionally declared / bound attributes + +We currently do not raise a diagnostic or change behavior if an attribute is only conditionally +defined. This is consistent with what mypy and pyright do. + +```py +def flag() -> bool: + return True + +class C: + def f(self) -> None: + if flag(): + self.a1: str | None = "a" + self.b1 = 1 + if flag(): + def f(self) -> None: + self.a2: str | None = "a" + self.b2 = 1 + +c_instance = C() + +reveal_type(c_instance.a1) # revealed: str | None +reveal_type(c_instance.a2) # revealed: str | None +reveal_type(c_instance.b1) # revealed: Unknown | Literal[1] +reveal_type(c_instance.b2) # revealed: Unknown | Literal[1] +``` + #### Methods that does not use `self` as a first parameter ```py @@ -175,8 +337,7 @@ class C: def __init__(this) -> None: this.declared_and_bound: str | None = "a" -# TODO: should be `str | None` -reveal_type(C().declared_and_bound) # revealed: @Todo(implicit instance attribute) +reveal_type(C().declared_and_bound) # revealed: str | None ``` #### Aliased `self` parameter @@ -187,9 +348,28 @@ class C: this = self this.declared_and_bound: str | None = "a" -# TODO: This would ideally be `str | None`, but mypy/pyright don't support this either, +# This would ideally be `str | None`, but mypy/pyright don't support this either, # so `Unknown` + a diagnostic is also fine. -reveal_type(C().declared_and_bound) # revealed: @Todo(implicit instance attribute) +# error: [unresolved-attribute] +reveal_type(C().declared_and_bound) # revealed: Unknown +``` + +#### Attributes defined in statically-known-to-be-false branches + +```py +class C: + def __init__(self) -> None: + # We use a "significantly complex" condition here (instead of just `False`) + # for a proper comparison with mypy and pyright, which distinguish between + # conditions that can be resolved from a simple pattern matching and those + # that need proper type inference. + if (2 + 3) < 4: + self.x: str = "a" + +# TODO: Ideally, this would result in a `unresolved-attribute` error. But mypy and pyright +# do not support this either (for conditions that can only be resolved to `False` in type +# inference), so it does not seem to be particularly important. +reveal_type(C().x) # revealed: str ``` ### Pure class variables (`ClassVar`) @@ -266,7 +446,7 @@ reveal_type(C.pure_class_variable) # revealed: Unknown c_instance = C() # TODO: should be `Literal["overwritten on class"]` -reveal_type(c_instance.pure_class_variable) # revealed: @Todo(implicit instance attribute) +reveal_type(c_instance.pure_class_variable) # revealed: Unknown | Literal["value set in class method"] # TODO: should raise an error. c_instance.pure_class_variable = "value set on instance" @@ -360,8 +540,7 @@ reveal_type(Derived.declared_in_body) # revealed: int | None reveal_type(Derived().declared_in_body) # revealed: int | None -# TODO: Should be `str | None` -reveal_type(Derived().defined_in_init) # revealed: @Todo(implicit instance attribute) +reveal_type(Derived().defined_in_init) # revealed: str | None ``` ## Union of attributes @@ -646,6 +825,90 @@ reveal_type(b"foo".join) # revealed: @Todo(bound method) reveal_type(b"foo".endswith) # revealed: @Todo(bound method) ``` +## Instance attribute edge cases + +### Assignment to attribute that does not correspond to the instance + +```py +class Other: + x: int = 1 + +class C: + def __init__(self, other: Other) -> None: + other.x = 1 + +def f(c: C): + # error: [unresolved-attribute] + reveal_type(c.x) # revealed: Unknown +``` + +### Nested classes + +```py +class Outer: + def __init__(self): + self.x: int = 1 + + class Middle: + # has no 'x' attribute + + class Inner: + def __init__(self): + self.x: str = "a" + +reveal_type(Outer().x) # revealed: int + +# error: [unresolved-attribute] +Outer.Middle().x + +reveal_type(Outer.Middle.Inner().x) # revealed: str +``` + +### Shadowing of `self` + +```py +class Other: + x: int = 1 + +class C: + def __init__(self) -> None: + # Redeclaration of self. `self` does not refer to the instance anymore. + self: Other = Other() + self.x: int = 1 + +# TODO: this should be an error +C().x +``` + +### Assignment to `self` after nested function + +```py +class Other: + x: str = "a" + +class C: + def __init__(self) -> None: + def nested_function(self: Other): + self.x = "b" + self.x: int = 1 + +reveal_type(C().x) # revealed: int +``` + +### Assignment to `self` from nested function + +```py +class C: + def __init__(self) -> None: + def set_attribute(value: str): + self.x: str = value + set_attribute("a") + +# TODO: ideally, this would be `str`. Mypy supports this, pyright does not. +# error: [unresolved-attribute] +reveal_type(C().x) # revealed: Unknown +``` + ## References Some of the tests in the *Class and instance variables* section draw inspiration from diff --git a/crates/red_knot_python_semantic/src/semantic_index.rs b/crates/red_knot_python_semantic/src/semantic_index.rs index b975966c2d..263c4cfddc 100644 --- a/crates/red_knot_python_semantic/src/semantic_index.rs +++ b/crates/red_knot_python_semantic/src/semantic_index.rs @@ -11,6 +11,7 @@ use ruff_index::{IndexSlice, IndexVec}; use crate::module_name::ModuleName; use crate::semantic_index::ast_ids::node_key::ExpressionNodeKey; use crate::semantic_index::ast_ids::AstIds; +use crate::semantic_index::attribute_assignment::AttributeAssignments; use crate::semantic_index::builder::SemanticIndexBuilder; use crate::semantic_index::definition::{Definition, DefinitionNodeKey}; use crate::semantic_index::expression::Expression; @@ -21,6 +22,7 @@ use crate::semantic_index::use_def::UseDefMap; use crate::Db; pub mod ast_ids; +pub mod attribute_assignment; mod builder; pub(crate) mod constraint; pub mod definition; @@ -93,6 +95,25 @@ pub(crate) fn use_def_map<'db>(db: &'db dyn Db, scope: ScopeId<'db>) -> Arc( + db: &'db dyn Db, + class_body_scope: ScopeId<'db>, +) -> Option>> { + let file = class_body_scope.file(db); + let index = semantic_index(db, file); + + index + .attribute_assignments + .get(&class_body_scope.file_scope_id(db)) + .cloned() +} + /// Returns the module global scope of `file`. #[salsa::tracked] pub(crate) fn global_scope(db: &dyn Db, file: File) -> ScopeId<'_> { @@ -139,6 +160,10 @@ pub(crate) struct SemanticIndex<'db> { /// Flags about the global scope (code usage impacting inference) has_future_annotations: bool, + + /// Maps from class body scopes to attribute assignments that were found + /// in methods of that class. + attribute_assignments: FxHashMap>>, } impl<'db> SemanticIndex<'db> { diff --git a/crates/red_knot_python_semantic/src/semantic_index/attribute_assignment.rs b/crates/red_knot_python_semantic/src/semantic_index/attribute_assignment.rs new file mode 100644 index 0000000000..29f9a36d12 --- /dev/null +++ b/crates/red_knot_python_semantic/src/semantic_index/attribute_assignment.rs @@ -0,0 +1,19 @@ +use crate::semantic_index::expression::Expression; + +use ruff_python_ast::name::Name; + +use rustc_hash::FxHashMap; + +/// Describes an (annotated) attribute assignment that we discovered in a method +/// body, typically of the form `self.x: int`, `self.x: int = …` or `self.x = …`. +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) enum AttributeAssignment<'db> { + /// An attribute assignment with an explicit type annotation, either + /// `self.x: ` or `self.x: = …`. + Annotated { annotation: Expression<'db> }, + + /// An attribute assignment without a type annotation, e.g. `self.x = `. + Unannotated { value: Expression<'db> }, +} + +pub(crate) type AttributeAssignments<'db> = FxHashMap>>; diff --git a/crates/red_knot_python_semantic/src/semantic_index/builder.rs b/crates/red_knot_python_semantic/src/semantic_index/builder.rs index 183beebbf9..fc393e9a8b 100644 --- a/crates/red_knot_python_semantic/src/semantic_index/builder.rs +++ b/crates/red_knot_python_semantic/src/semantic_index/builder.rs @@ -14,14 +14,15 @@ use crate::ast_node_ref::AstNodeRef; use crate::module_name::ModuleName; use crate::semantic_index::ast_ids::node_key::ExpressionNodeKey; use crate::semantic_index::ast_ids::AstIdsBuilder; +use crate::semantic_index::attribute_assignment::{AttributeAssignment, AttributeAssignments}; use crate::semantic_index::constraint::PatternConstraintKind; use crate::semantic_index::definition::{ AssignmentDefinitionNodeRef, ComprehensionDefinitionNodeRef, Definition, DefinitionNodeKey, DefinitionNodeRef, ForStmtDefinitionNodeRef, ImportFromDefinitionNodeRef, }; -use crate::semantic_index::expression::Expression; +use crate::semantic_index::expression::{Expression, ExpressionKind}; use crate::semantic_index::symbol::{ - FileScopeId, NodeWithScopeKey, NodeWithScopeRef, Scope, ScopeId, ScopedSymbolId, + FileScopeId, NodeWithScopeKey, NodeWithScopeRef, Scope, ScopeId, ScopeKind, ScopedSymbolId, SymbolTableBuilder, }; use crate::semantic_index::use_def::{ @@ -53,17 +54,24 @@ impl LoopState { } } +struct ScopeInfo { + file_scope_id: FileScopeId, + loop_state: LoopState, +} + pub(super) struct SemanticIndexBuilder<'db> { // Builder state db: &'db dyn Db, file: File, module: &'db ParsedModule, - scope_stack: Vec<(FileScopeId, LoopState)>, + scope_stack: Vec, /// The assignments we're currently visiting, with /// the most recent visit at the end of the Vec current_assignments: Vec>, /// The match case we're currently visiting. current_match_case: Option>, + /// The name of the first function parameter of the innermost function that we're currently visiting. + current_first_parameter_name: Option<&'db str>, /// Flow states at each `break` in the current loop. loop_break_states: Vec, @@ -84,6 +92,7 @@ pub(super) struct SemanticIndexBuilder<'db> { definitions_by_node: FxHashMap>, expressions_by_node: FxHashMap>, imported_modules: FxHashSet, + attribute_assignments: FxHashMap>, } impl<'db> SemanticIndexBuilder<'db> { @@ -95,6 +104,7 @@ impl<'db> SemanticIndexBuilder<'db> { scope_stack: Vec::new(), current_assignments: vec![], current_match_case: None, + current_first_parameter_name: None, loop_break_states: vec![], try_node_context_stack_manager: TryNodeContextStackManager::default(), @@ -112,6 +122,8 @@ impl<'db> SemanticIndexBuilder<'db> { expressions_by_node: FxHashMap::default(), imported_modules: FxHashSet::default(), + + attribute_assignments: FxHashMap::default(), }; builder.push_scope_with_parent(NodeWithScopeRef::Module, None); @@ -123,7 +135,7 @@ impl<'db> SemanticIndexBuilder<'db> { *self .scope_stack .last() - .map(|(scope, _)| scope) + .map(|ScopeInfo { file_scope_id, .. }| file_scope_id) .expect("Always to have a root scope") } @@ -131,14 +143,32 @@ impl<'db> SemanticIndexBuilder<'db> { self.scope_stack .last() .expect("Always to have a root scope") - .1 + .loop_state + } + + /// Returns the scope ID of the surrounding class body scope if the current scope + /// is a method inside a class body. Returns `None` otherwise, e.g. if the current + /// scope is a function body outside of a class, or if the current scope is not a + /// function body. + fn is_method_of_class(&self) -> Option { + let mut scopes_rev = self.scope_stack.iter().rev(); + let current = scopes_rev.next()?; + let parent = scopes_rev.next()?; + + match ( + self.scopes[current.file_scope_id].kind(), + self.scopes[parent.file_scope_id].kind(), + ) { + (ScopeKind::Function, ScopeKind::Class) => Some(parent.file_scope_id), + _ => None, + } } fn set_inside_loop(&mut self, state: LoopState) { self.scope_stack .last_mut() .expect("Always to have a root scope") - .1 = state; + .loop_state = state; } fn push_scope(&mut self, node: NodeWithScopeRef) { @@ -171,16 +201,20 @@ impl<'db> SemanticIndexBuilder<'db> { debug_assert_eq!(ast_id_scope, file_scope_id); - self.scope_stack.push((file_scope_id, LoopState::NotInLoop)); + self.scope_stack.push(ScopeInfo { + file_scope_id, + loop_state: LoopState::NotInLoop, + }); } fn pop_scope(&mut self) -> FileScopeId { - let (id, _) = self.scope_stack.pop().expect("Root scope to be present"); + let ScopeInfo { file_scope_id, .. } = + self.scope_stack.pop().expect("Root scope to be present"); let children_end = self.scopes.next_index(); - let scope = &mut self.scopes[id]; + let scope = &mut self.scopes[file_scope_id]; scope.descendents = scope.descendents.start..children_end; self.try_node_context_stack_manager.exit_scope(); - id + file_scope_id } fn current_symbol_table(&mut self) -> &mut SymbolTableBuilder { @@ -404,6 +438,32 @@ impl<'db> SemanticIndexBuilder<'db> { self.current_assignments.last_mut() } + /// Records the fact that we saw an attribute assignment of the form + /// `object.attr: ( = …)` or `object.attr = `. + fn register_attribute_assignment( + &mut self, + object: &ast::Expr, + attr: &'db ast::Identifier, + attribute_assignment: AttributeAssignment<'db>, + ) { + if let Some(class_body_scope) = self.is_method_of_class() { + // We only care about attribute assignments to the first parameter of a method, + // i.e. typically `self` or `cls`. + let accessed_object_refers_to_first_parameter = + object.as_name_expr().map(|name| name.id.as_str()) + == self.current_first_parameter_name; + + if accessed_object_refers_to_first_parameter { + self.attribute_assignments + .entry(class_body_scope) + .or_default() + .entry(attr.id().clone()) + .or_default() + .push(attribute_assignment); + } + } + } + fn add_pattern_constraint( &mut self, subject: Expression<'db>, @@ -457,6 +517,20 @@ impl<'db> SemanticIndexBuilder<'db> { /// Record an expression that needs to be a Salsa ingredient, because we need to infer its type /// standalone (type narrowing tests, RHS of an assignment.) fn add_standalone_expression(&mut self, expression_node: &ast::Expr) -> Expression<'db> { + self.add_standalone_expression_impl(expression_node, ExpressionKind::Normal) + } + + /// Same as [`SemanticIndexBuilder::add_standalone_expression`], but marks the expression as a + /// *type* expression, which makes sure that it will later be inferred as such. + fn add_standalone_type_expression(&mut self, expression_node: &ast::Expr) -> Expression<'db> { + self.add_standalone_expression_impl(expression_node, ExpressionKind::TypeExpression) + } + + fn add_standalone_expression_impl( + &mut self, + expression_node: &ast::Expr, + expression_kind: ExpressionKind, + ) -> Expression<'db> { let expression = Expression::new( self.db, self.file, @@ -465,6 +539,7 @@ impl<'db> SemanticIndexBuilder<'db> { unsafe { AstNodeRef::new(self.module.clone(), expression_node) }, + expression_kind, countme::Count::default(), ); self.expressions_by_node @@ -668,6 +743,11 @@ impl<'db> SemanticIndexBuilder<'db> { use_def_maps, imported_modules: Arc::new(self.imported_modules), has_future_annotations: self.has_future_annotations, + attribute_assignments: self + .attribute_assignments + .into_iter() + .map(|(k, v)| (k, Arc::new(v))) + .collect(), } } } @@ -706,7 +786,17 @@ where builder.declare_parameters(parameters); + let mut first_parameter_name = parameters + .iter_non_variadic_params() + .next() + .map(|first_param| first_param.parameter.name.id().as_str()); + std::mem::swap( + &mut builder.current_first_parameter_name, + &mut first_parameter_name, + ); builder.visit_body(body); + builder.current_first_parameter_name = first_parameter_name; + builder.pop_scope() }, ); @@ -840,6 +930,19 @@ where unpack: None, first: false, }), + ast::Expr::Attribute(ast::ExprAttribute { + value: object, + attr, + .. + }) => { + self.register_attribute_assignment( + object, + attr, + AttributeAssignment::Unannotated { value }, + ); + + None + } _ => None, }; @@ -858,6 +961,7 @@ where ast::Stmt::AnnAssign(node) => { debug_assert_eq!(&self.current_assignments, &[]); self.visit_expr(&node.annotation); + let annotation = self.add_standalone_type_expression(&node.annotation); if let Some(value) = &node.value { self.visit_expr(value); } @@ -869,6 +973,20 @@ where ) { self.push_assignment(node.into()); self.visit_expr(&node.target); + + if let ast::Expr::Attribute(ast::ExprAttribute { + value: object, + attr, + .. + }) = &*node.target + { + self.register_attribute_assignment( + object, + attr, + AttributeAssignment::Annotated { annotation }, + ); + } + self.pop_assignment(); } else { self.visit_expr(&node.target); diff --git a/crates/red_knot_python_semantic/src/semantic_index/expression.rs b/crates/red_knot_python_semantic/src/semantic_index/expression.rs index 327d40a4b1..6809dbef24 100644 --- a/crates/red_knot_python_semantic/src/semantic_index/expression.rs +++ b/crates/red_knot_python_semantic/src/semantic_index/expression.rs @@ -5,6 +5,16 @@ use ruff_db::files::File; use ruff_python_ast as ast; use salsa; +/// Whether or not this expression should be inferred as a normal expression or +/// a type expression. For example, in `self.x: = `, the +/// `` is inferred as a type expression, while `` is inferred +/// as a normal expression. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub(crate) enum ExpressionKind { + Normal, + TypeExpression, +} + /// An independently type-inferable expression. /// /// Includes constraint expressions (e.g. if tests) and the RHS of an unpacking assignment. @@ -35,6 +45,10 @@ pub(crate) struct Expression<'db> { #[return_ref] pub(crate) node_ref: AstNodeRef, + /// Should this expression be inferred as a normal expression or a type expression? + #[id] + pub(crate) kind: ExpressionKind, + #[no_eq] count: countme::Count>, } diff --git a/crates/red_knot_python_semantic/src/semantic_index/symbol.rs b/crates/red_knot_python_semantic/src/semantic_index/symbol.rs index c5b1f7c3fd..3094bca7cd 100644 --- a/crates/red_knot_python_semantic/src/semantic_index/symbol.rs +++ b/crates/red_knot_python_semantic/src/semantic_index/symbol.rs @@ -119,6 +119,7 @@ impl<'db> ScopeId<'db> { self.node(db).scope_kind(), ScopeKind::Annotation | ScopeKind::Function + | ScopeKind::Lambda | ScopeKind::TypeAlias | ScopeKind::Comprehension ) @@ -203,6 +204,7 @@ pub enum ScopeKind { Annotation, Class, Function, + Lambda, Comprehension, TypeAlias, } @@ -443,7 +445,8 @@ impl NodeWithScopeKind { match self { Self::Module => ScopeKind::Module, Self::Class(_) => ScopeKind::Class, - Self::Function(_) | Self::Lambda(_) => ScopeKind::Function, + Self::Function(_) => ScopeKind::Function, + Self::Lambda(_) => ScopeKind::Lambda, Self::FunctionTypeParameters(_) | Self::ClassTypeParameters(_) | Self::TypeAliasTypeParameters(_) => ScopeKind::Annotation, diff --git a/crates/red_knot_python_semantic/src/types.rs b/crates/red_knot_python_semantic/src/types.rs index e6a09b1cdb..296ea3b64c 100644 --- a/crates/red_knot_python_semantic/src/types.rs +++ b/crates/red_knot_python_semantic/src/types.rs @@ -23,11 +23,13 @@ pub use self::subclass_of::SubclassOfType; use crate::module_name::ModuleName; use crate::module_resolver::{file_to_module, resolve_module, KnownModule}; use crate::semantic_index::ast_ids::HasScopedExpressionId; +use crate::semantic_index::attribute_assignment::AttributeAssignment; use crate::semantic_index::definition::Definition; +use crate::semantic_index::expression::Expression; use crate::semantic_index::symbol::{self as symbol, ScopeId, ScopedSymbolId}; use crate::semantic_index::{ - global_scope, imported_modules, semantic_index, symbol_table, use_def_map, - BindingWithConstraints, BindingWithConstraintsIterator, DeclarationWithConstraint, + attribute_assignments, global_scope, imported_modules, semantic_index, symbol_table, + use_def_map, BindingWithConstraints, BindingWithConstraintsIterator, DeclarationWithConstraint, DeclarationsIterator, }; use crate::stdlib::{builtins_symbol, known_module_symbol, typing_extensions_symbol}; @@ -4134,9 +4136,76 @@ impl<'db> Class<'db> { } } - // TODO: The symbol is not present in any class body, but it could be implicitly - // defined in `__init__` or other methods anywhere in the MRO. - todo_type!("implicit instance attribute").into() + SymbolAndQualifiers(Symbol::Unbound, TypeQualifiers::empty()) + } + + /// Tries to find declarations/bindings of an instance attribute named `name` that are only + /// "implicitly" defined in a method of the class that corresponds to `class_body_scope`. + fn implicit_instance_attribute( + db: &'db dyn Db, + class_body_scope: ScopeId<'db>, + name: &str, + inferred_type_from_class_body: Option>, + ) -> Symbol<'db> { + // We use a separate salsa query here to prevent unrelated changes in the AST of an external + // file from triggering re-evaluations of downstream queries. + // See the `dependency_implicit_instance_attribute` test for more information. + #[salsa::tracked] + fn infer_expression_type<'db>(db: &'db dyn Db, expression: Expression<'db>) -> Type<'db> { + let inference = infer_expression_types(db, expression); + let expr_scope = expression.scope(db); + inference.expression_type(expression.node_ref(db).scoped_expression_id(db, expr_scope)) + } + + // If we do not see any declarations of an attribute, neither in the class body nor in + // any method, we build a union of `Unknown` with the inferred types of all bindings of + // that attribute. We include `Unknown` in that union to account for the fact that the + // attribute might be externally modified. + let mut union_of_inferred_types = UnionBuilder::new(db).add(Type::unknown()); + + if let Some(ty) = inferred_type_from_class_body { + union_of_inferred_types = union_of_inferred_types.add(ty); + } + + let attribute_assignments = attribute_assignments(db, class_body_scope); + + let Some(attribute_assignments) = attribute_assignments + .as_deref() + .and_then(|assignments| assignments.get(name)) + else { + if inferred_type_from_class_body.is_some() { + return union_of_inferred_types.build().into(); + } + return Symbol::Unbound; + }; + + for attribute_assignment in attribute_assignments { + match attribute_assignment { + AttributeAssignment::Annotated { annotation } => { + // We found an annotated assignment of one of the following forms (using 'self' in these + // examples, but we support arbitrary names for the first parameters of methods): + // + // self.name: + // self.name: = … + + let annotation_ty = infer_expression_type(db, *annotation); + + // TODO: check if there are conflicting declarations + return annotation_ty.into(); + } + AttributeAssignment::Unannotated { value } => { + // We found an un-annotated attribute assignment of the form: + // + // self.name = + + let inferred_ty = infer_expression_type(db, *value); + + union_of_inferred_types = union_of_inferred_types.add(inferred_ty); + } + } + } + + union_of_inferred_types.build().into() } /// A helper function for `instance_member` that looks up the `name` attribute only on @@ -4158,6 +4227,8 @@ impl<'db> Class<'db> { match symbol_from_declarations(db, declarations) { Ok(SymbolAndQualifiers(Symbol::Type(declared_ty, _), qualifiers)) => { + // The attribute is declared in the class body. + if let Some(function) = declared_ty.into_function_literal() { // TODO: Eventually, we are going to process all decorators correctly. This is // just a temporary heuristic to provide a broad categorization into properties @@ -4171,22 +4242,26 @@ impl<'db> Class<'db> { SymbolAndQualifiers(Symbol::Type(declared_ty, Boundness::Bound), qualifiers) } } - Ok(symbol @ SymbolAndQualifiers(Symbol::Unbound, qualifiers)) => { + Ok(SymbolAndQualifiers(Symbol::Unbound, _)) => { + // The attribute is not *declared* in the class body. It could still be declared + // in a method, and it could also be *bound* in the class body (and/or in a method). + let bindings = use_def.public_bindings(symbol_id); let inferred = symbol_from_bindings(db, bindings); + let inferred_ty = inferred.ignore_possibly_unbound(); - SymbolAndQualifiers( - widen_type_for_undeclared_public_symbol(db, inferred, symbol.is_final()), - qualifiers, - ) + Self::implicit_instance_attribute(db, body_scope, name, inferred_ty).into() } Err((declared_ty, _conflicting_declarations)) => { - // Ignore conflicting declarations + // There are conflicting declarations for this attribute in the class body. SymbolAndQualifiers(declared_ty.inner_type().into(), declared_ty.qualifiers()) } } } else { - Symbol::Unbound.into() + // This attribute is neither declared nor bound in the class body. + // It could still be implicitly defined in a method. + + Self::implicit_instance_attribute(db, body_scope, name, None).into() } } diff --git a/crates/red_knot_python_semantic/src/types/infer.rs b/crates/red_knot_python_semantic/src/types/infer.rs index 40c9b02d40..4ff32148ca 100644 --- a/crates/red_knot_python_semantic/src/types/infer.rs +++ b/crates/red_knot_python_semantic/src/types/infer.rs @@ -44,7 +44,7 @@ use crate::semantic_index::definition::{ AssignmentDefinitionKind, Definition, DefinitionKind, DefinitionNodeKey, ExceptHandlerDefinitionKind, ForStmtDefinitionKind, TargetKind, }; -use crate::semantic_index::expression::Expression; +use crate::semantic_index::expression::{Expression, ExpressionKind}; use crate::semantic_index::semantic_index; use crate::semantic_index::symbol::{NodeWithScopeKind, NodeWithScopeRef, ScopeId}; use crate::semantic_index::SemanticIndex; @@ -832,7 +832,14 @@ impl<'db> TypeInferenceBuilder<'db> { } fn infer_region_expression(&mut self, expression: Expression<'db>) { - self.infer_expression_impl(expression.node_ref(self.db())); + match expression.kind(self.db()) { + ExpressionKind::Normal => { + self.infer_expression_impl(expression.node_ref(self.db())); + } + ExpressionKind::TypeExpression => { + self.infer_type_expression(expression.node_ref(self.db())); + } + } } /// Raise a diagnostic if the given type cannot be divided by zero. @@ -6019,7 +6026,7 @@ mod tests { use crate::types::check_types; use ruff_db::files::{system_path_to_file, File}; use ruff_db::system::DbWithTestSystem; - use ruff_db::testing::assert_function_query_was_not_run; + use ruff_db::testing::{assert_function_query_was_not_run, assert_function_query_was_run}; use super::*; @@ -6346,4 +6353,84 @@ mod tests { ); Ok(()) } + + #[test] + fn dependency_implicit_instance_attribute() -> anyhow::Result<()> { + fn x_rhs_expression(db: &TestDb) -> Expression<'_> { + let file_main = system_path_to_file(db, "/src/main.py").unwrap(); + let ast = parsed_module(db, file_main); + // Get the second statement in `main.py` (x = …) and extract the expression + // node on the right-hand side: + let x_rhs_node = &ast.syntax().body[1].as_assign_stmt().unwrap().value; + + let index = semantic_index(db, file_main); + index.expression(x_rhs_node.as_ref()) + } + + let mut db = setup_db(); + + db.write_dedented( + "/src/mod.py", + r#" + class C: + def f(self): + self.attr: int | None = None + "#, + )?; + db.write_dedented( + "/src/main.py", + r#" + from mod import C + x = C().attr + "#, + )?; + + let file_main = system_path_to_file(&db, "/src/main.py").unwrap(); + let attr_ty = global_symbol(&db, file_main, "x").expect_type(); + assert_eq!(attr_ty.display(&db).to_string(), "Unknown | int | None"); + + // Change the type of `attr` to `str | None`; this should trigger the type of `x` to be re-inferred + db.write_dedented( + "/src/mod.py", + r#" + class C: + def f(self): + self.attr: str | None = None + "#, + )?; + + let events = { + db.clear_salsa_events(); + let attr_ty = global_symbol(&db, file_main, "x").expect_type(); + assert_eq!(attr_ty.display(&db).to_string(), "Unknown | str | None"); + db.take_salsa_events() + }; + assert_function_query_was_run(&db, infer_expression_types, x_rhs_expression(&db), &events); + + // Add a comment; this should not trigger the type of `x` to be re-inferred + db.write_dedented( + "/src/mod.py", + r#" + class C: + def f(self): + # a comment! + self.attr: str | None = None + "#, + )?; + + let events = { + db.clear_salsa_events(); + let attr_ty = global_symbol(&db, file_main, "x").expect_type(); + assert_eq!(attr_ty.display(&db).to_string(), "Unknown | str | None"); + db.take_salsa_events() + }; + assert_function_query_was_not_run( + &db, + infer_expression_types, + x_rhs_expression(&db), + &events, + ); + + Ok(()) + } }