diff --git a/crates/red_knot_python_semantic/resources/mdtest/binary/instances.md b/crates/red_knot_python_semantic/resources/mdtest/binary/instances.md new file mode 100644 index 0000000000..fdde6eed2b --- /dev/null +++ b/crates/red_knot_python_semantic/resources/mdtest/binary/instances.md @@ -0,0 +1,463 @@ +# Binary operations on instances + +Binary operations in Python are implemented by means of magic double-underscore methods. + +For references, see: + +- +- + +## Operations + +We support inference for all Python's binary operators: +`+`, `-`, `*`, `@`, `/`, `//`, `%`, `**`, `<<`, `>>`, `&`, `^`, and `|`. + +```py +class A: + def __add__(self, other) -> A: + return self + + def __sub__(self, other) -> A: + return self + + def __mul__(self, other) -> A: + return self + + def __matmul__(self, other) -> A: + return self + + def __truediv__(self, other) -> A: + return self + + def __floordiv__(self, other) -> A: + return self + + def __mod__(self, other) -> A: + return self + + def __pow__(self, other) -> A: + return self + + def __lshift__(self, other) -> A: + return self + + def __rshift__(self, other) -> A: + return self + + def __and__(self, other) -> A: + return self + + def __xor__(self, other) -> A: + return self + + def __or__(self, other) -> A: + return self + + +class B: ... + + +reveal_type(A() + B()) # revealed: A +reveal_type(A() - B()) # revealed: A +reveal_type(A() * B()) # revealed: A +reveal_type(A() @ B()) # revealed: A +reveal_type(A() / B()) # revealed: A +reveal_type(A() // B()) # revealed: A +reveal_type(A() % B()) # revealed: A +reveal_type(A() ** B()) # revealed: A +reveal_type(A() << B()) # revealed: A +reveal_type(A() >> B()) # revealed: A +reveal_type(A() & B()) # revealed: A +reveal_type(A() ^ B()) # revealed: A +reveal_type(A() | B()) # revealed: A +``` + +## Reflected + +We also support inference for reflected operations: + +```py +class A: + def __radd__(self, other) -> A: + return self + + def __rsub__(self, other) -> A: + return self + + def __rmul__(self, other) -> A: + return self + + def __rmatmul__(self, other) -> A: + return self + + def __rtruediv__(self, other) -> A: + return self + + def __rfloordiv__(self, other) -> A: + return self + + def __rmod__(self, other) -> A: + return self + + def __rpow__(self, other) -> A: + return self + + def __rlshift__(self, other) -> A: + return self + + def __rrshift__(self, other) -> A: + return self + + def __rand__(self, other) -> A: + return self + + def __rxor__(self, other) -> A: + return self + + def __ror__(self, other) -> A: + return self + + +class B: ... + + +reveal_type(B() + A()) # revealed: A +reveal_type(B() - A()) # revealed: A +reveal_type(B() * A()) # revealed: A +reveal_type(B() @ A()) # revealed: A +reveal_type(B() / A()) # revealed: A +reveal_type(B() // A()) # revealed: A +reveal_type(B() % A()) # revealed: A +reveal_type(B() ** A()) # revealed: A +reveal_type(B() << A()) # revealed: A +reveal_type(B() >> A()) # revealed: A +reveal_type(B() & A()) # revealed: A +reveal_type(B() ^ A()) # revealed: A +reveal_type(B() | A()) # revealed: A +``` + +## Returning a different type + +The magic methods aren't required to return the type of `self`: + +```py +class A: + def __add__(self, other) -> int: + return 1 + + def __rsub__(self, other) -> int: + return 1 + + +class B: ... + + +reveal_type(A() + B()) # revealed: int +reveal_type(B() - A()) # revealed: int +``` + +## Non-reflected precedence in general + +In general, if the left-hand side defines `__add__` and the right-hand side +defines `__radd__` and the right-hand side is not a subtype of the left-hand +side, `lhs.__add__` will take precedence: + +```py +class A: + def __add__(self, other: B) -> int: + return 42 + + +class B: + def __radd__(self, other: A) -> str: + return "foo" + + +reveal_type(A() + B()) # revealed: int + + +# Edge case: C is a subtype of C, *but* if the two sides are of *equal* types, +# the lhs *still* takes precedence +class C: + def __add__(self, other: C) -> int: + return 42 + + def __radd__(self, other: C) -> str: + return "foo" + + +reveal_type(C() + C()) # revealed: int +``` + +## Reflected precedence for subtypes (in some cases) + +If the right-hand operand is a subtype of the left-hand operand and has a +different implementation of the reflected method, the reflected method on the +right-hand operand takes precedence. + +```py +class A: + def __add__(self, other) -> str: + return "foo" + + def __radd__(self, other) -> str: + return "foo" + + +class MyString(str): ... + + +class B(A): + def __radd__(self, other) -> MyString: + return MyString() + + +reveal_type(A() + B()) # revealed: MyString + + +# N.B. Still a subtype of `A`, even though `A` does not appear directly in the class's `__bases__` +class C(B): ... + + +# TODO: we currently only understand direct subclasses as subtypes of the superclass. +# We need to iterate through the full MRO rather than just the class's bases; +# if we do, we'll understand `C` as a subtype of `A`, and correctly understand this as being +# `MyString` rather than `str` +reveal_type(A() + C()) # revealed: str +``` + +## Reflected precedence 2 + +If the right-hand operand is a subtype of the left-hand operand, but does not +override the reflected method, the left-hand operand's non-reflected method +still takes precedence: + +```py +class A: + def __add__(self, other) -> str: + return "foo" + + def __radd__(self, other) -> int: + return 42 + + +class B(A): ... + + +reveal_type(A() + B()) # revealed: str +``` + +## Only reflected supported + +For example, at runtime, `(1).__add__(1.2)` is `NotImplemented`, but +`(1.2).__radd__(1) == 2.2`, meaning that `1 + 1.2` succeeds at runtime +(producing `2.2`). The runtime tries the second one only if the first one +returns `NotImplemented` to signal failure. + +Typeshed and other stubs annotate dunder-method calls that would return +`NotImplemented` as being "illegal" calls. `int.__add__` is annotated as only +"accepting" `int`s, even though it strictly-speaking "accepts" any other object +without raising an exception -- it will simply return `NotImplemented`, +allowing the runtime to try the `__radd__` method of the right-hand operand +as well. + +```py +class A: + def __sub__(self, other: A) -> A: + return A() + + +class B: + def __rsub__(self, other: A) -> B: + return B() + + +# TODO: this should be `B` (the return annotation of `B.__rsub__`), +# because `A.__sub__` is annotated as only accepting `A`, +# but `B.__rsub__` will accept `A`. +reveal_type(A() - B()) # revealed: A +``` + +## Callable instances as dunders + +Believe it or not, this is supported at runtime: + +```py +class A: + def __call__(self, other) -> int: + return 42 + + +class B: + __add__ = A() + + +reveal_type(B() + B()) # revealed: int +``` + +## Integration test: numbers from typeshed + +```py +reveal_type(3j + 3.14) # revealed: complex +reveal_type(4.2 + 42) # revealed: float +reveal_type(3j + 3) # revealed: complex + +# TODO should be complex, need to check arg type and fall back to `rhs.__radd__` +reveal_type(3.14 + 3j) # revealed: float + +# TODO should be float, need to check arg type and fall back to `rhs.__radd__` +reveal_type(42 + 4.2) # revealed: int + +# TODO should be complex, need to check arg type and fall back to `rhs.__radd__` +reveal_type(3 + 3j) # revealed: int + + +def returns_int() -> int: + return 42 + + +def returns_bool() -> bool: + return True + + +x = returns_bool() +y = returns_int() + +reveal_type(x + y) # revealed: int +reveal_type(4.2 + x) # revealed: float + +# TODO should be float, need to check arg type and fall back to `rhs.__radd__` +reveal_type(y + 4.12) # revealed: int +``` + +## With literal types + +When we have a literal type for one operand, we're able to fall back to the +instance handling for its instance super-type. + +```py +class A: + def __add__(self, other) -> A: + return self + + def __radd__(self, other) -> A: + return self + + +reveal_type(A() + 1) # revealed: A +# TODO should be `A` since `int.__add__` doesn't support `A` instances +reveal_type(1 + A()) # revealed: int + +reveal_type(A() + "foo") # revealed: A +# TODO should be `A` since `str.__add__` doesn't support `A` instances +# TODO overloads +reveal_type("foo" + A()) # revealed: @Todo + +reveal_type(A() + b"foo") # revealed: A +# TODO should be `A` since `bytes.__add__` doesn't support `A` instances +reveal_type(b"foo" + A()) # revealed: bytes + +reveal_type(A() + ()) # revealed: A +# TODO this should be `A`, since `tuple.__add__` doesn't support `A` instances +reveal_type(() + A()) # revealed: @Todo + +literal_string_instance = "foo" * 1_000_000_000 +# the test is not testing what it's meant to be testing if this isn't a `LiteralString`: +reveal_type(literal_string_instance) # revealed: LiteralString + +reveal_type(A() + literal_string_instance) # revealed: A +# TODO should be `A` since `str.__add__` doesn't support `A` instances +# TODO overloads +reveal_type(literal_string_instance + A()) # revealed: @Todo +``` + +## Operations involving instances of classes inheriting from `Any` + +`Any` and `Unknown` represent a set of possible runtime objects, wherein the +bounds of the set are unknown. Whether the left-hand operand's dunder or the +right-hand operand's reflected dunder depends on whether the right-hand operand +is an instance of a class that is a subclass of the left-hand operand's class +and overrides the reflected dunder. In the following example, because of the +unknowable nature of `Any`/`Unknown`, we must consider both possibilities: +`Any`/`Unknown` might resolve to an unknown third class that inherits from `X` +and overrides `__radd__`; but it also might not. Thus, the correct answer here +for the `reveal_type` is `int | Unknown`. + +```py +from does_not_exist import Foo # error: [unresolved-import] + +reveal_type(Foo) # revealed: Unknown + + +class X: + def __add__(self, other: object) -> int: + return 42 + + +class Y(Foo): ... + + +# TODO: Should be `int | Unknown`; see above discussion. +reveal_type(X() + Y()) # revealed: int +``` + +## Unsupported + +### Dunder as instance attribute + +The magic method must exist on the class, not just on the instance: + +```py +def add_impl(self, other) -> int: + return 1 + + +class A: + def __init__(self): + self.__add__ = add_impl + + +# error: [unsupported-operator] "Operator `+` is unsupported between objects of type `A` and `A`" +# revealed: Unknown +reveal_type(A() + A()) +``` + +### Missing dunder + +```py +class A: ... + + +# error: [unsupported-operator] +# revealed: Unknown +reveal_type(A() + A()) +``` + +### Wrong position + +A left-hand dunder method doesn't apply for the right-hand operand, or vice versa: + +```py +class A: + def __add__(self, other) -> int: ... + + +class B: + def __radd__(self, other) -> int: ... + + +class C: ... + + +# error: [unsupported-operator] +# revealed: Unknown +reveal_type(C() + A()) + +# error: [unsupported-operator] +# revealed: Unknown +reveal_type(B() + C()) +``` + +### Wrong type + +TODO: check signature and error if `other` is the wrong type diff --git a/crates/red_knot_python_semantic/resources/mdtest/binary/integers.md b/crates/red_knot_python_semantic/resources/mdtest/binary/integers.md index 746e8d7f4a..4fded53097 100644 --- a/crates/red_knot_python_semantic/resources/mdtest/binary/integers.md +++ b/crates/red_knot_python_semantic/resources/mdtest/binary/integers.md @@ -34,19 +34,19 @@ reveal_type(b) # revealed: int c = 3 % 0 # error: "Cannot reduce object of type `Literal[3]` modulo zero" reveal_type(c) # revealed: int -d = int() / 0 # error: "Cannot divide object of type `int` by zero" -# TODO should be int -reveal_type(d) # revealed: @Todo +# error: "Cannot divide object of type `int` by zero" +# revealed: float +reveal_type(int() / 0) -e = 1.0 / 0 # error: "Cannot divide object of type `float` by zero" -# TODO should be float -reveal_type(e) # revealed: @Todo +# error: "Cannot divide object of type `float` by zero" +# revealed: float +reveal_type(1.0 / 0) class MyInt(int): ... # No error for a subclass of int -# TODO should be float -reveal_type(MyInt(3) / 0) # revealed: @Todo +# revealed: float +reveal_type(MyInt(3) / 0) ``` diff --git a/crates/red_knot_python_semantic/src/types.rs b/crates/red_knot_python_semantic/src/types.rs index 8f5412ef43..71df2ac1e7 100644 --- a/crates/red_knot_python_semantic/src/types.rs +++ b/crates/red_knot_python_semantic/src/types.rs @@ -440,6 +440,9 @@ impl<'db> Type<'db> { .any(|&elem_ty| ty.is_subtype_of(db, elem_ty)), (_, Type::Instance(class)) if class.is_known(db, KnownClass::Object) => true, (Type::Instance(class), _) if class.is_known(db, KnownClass::Object) => false, + (Type::Instance(self_class), Type::Instance(target_class)) => { + self_class.is_subclass_of(db, target_class) + } // TODO _ => false, } @@ -1582,6 +1585,18 @@ impl<'db> ClassType<'db> { }) } + pub fn is_subclass_of(self, db: &'db dyn Db, other: ClassType) -> bool { + // TODO: we need to iterate over the *MRO* here, not the bases + (other == self) + || self.bases(db).any(|base| match base { + Type::Class(base_class) => base_class == other, + // `is_subclass_of` is checking the subtype relation, in which gradual types do not + // participate, so we should not return `True` if we find `Any/Unknown` in the + // bases. + _ => false, + }) + } + /// Returns the class member of this class named `name`. /// /// The member resolves to a member of the class itself or any of its bases. @@ -1823,6 +1838,7 @@ mod tests { #[test_case(Ty::LiteralString, Ty::BuiltinInstance("str"))] #[test_case(Ty::BytesLiteral("foo"), Ty::BuiltinInstance("bytes"))] #[test_case(Ty::IntLiteral(1), Ty::Union(vec![Ty::BuiltinInstance("int"), Ty::BuiltinInstance("str")]))] + #[test_case(Ty::BuiltinInstance("TypeError"), Ty::BuiltinInstance("Exception"))] fn is_subtype_of(from: Ty, to: Ty) { let db = setup_db(); assert!(from.into_type(&db).is_subtype_of(&db, to.into_type(&db))); diff --git a/crates/red_knot_python_semantic/src/types/infer.rs b/crates/red_knot_python_semantic/src/types/infer.rs index 27655b944e..5e785e62c8 100644 --- a/crates/red_knot_python_semantic/src/types/infer.rs +++ b/crates/red_knot_python_semantic/src/types/infer.rs @@ -2497,71 +2497,98 @@ impl<'db> TypeInferenceBuilder<'db> { self.check_division_by_zero(binary, left_ty); } + self.infer_binary_expression_type(left_ty, right_ty, *op) + .unwrap_or_else(|| { + self.add_diagnostic( + binary.into(), + "unsupported-operator", + format_args!( + "Operator `{op}` is unsupported between objects of type `{}` and `{}`", + left_ty.display(self.db), + right_ty.display(self.db) + ), + ); + Type::Unknown + }) + } + + fn infer_binary_expression_type( + &mut self, + left_ty: Type<'db>, + right_ty: Type<'db>, + op: ast::Operator, + ) -> Option> { match (left_ty, right_ty, op) { // When interacting with Todo, Any and Unknown should propagate (as if we fix this // `Todo` in the future, the result would then become Any or Unknown, respectively.) - (Type::Any, _, _) | (_, Type::Any, _) => Type::Any, - (Type::Unknown, _, _) | (_, Type::Unknown, _) => Type::Unknown, + (Type::Any, _, _) | (_, Type::Any, _) => Some(Type::Any), + (Type::Unknown, _, _) | (_, Type::Unknown, _) => Some(Type::Unknown), - (Type::IntLiteral(n), Type::IntLiteral(m), ast::Operator::Add) => n - .checked_add(m) - .map(Type::IntLiteral) - .unwrap_or_else(|| KnownClass::Int.to_instance(self.db)), + (Type::IntLiteral(n), Type::IntLiteral(m), ast::Operator::Add) => Some( + n.checked_add(m) + .map(Type::IntLiteral) + .unwrap_or_else(|| KnownClass::Int.to_instance(self.db)), + ), - (Type::IntLiteral(n), Type::IntLiteral(m), ast::Operator::Sub) => n - .checked_sub(m) - .map(Type::IntLiteral) - .unwrap_or_else(|| KnownClass::Int.to_instance(self.db)), + (Type::IntLiteral(n), Type::IntLiteral(m), ast::Operator::Sub) => Some( + n.checked_sub(m) + .map(Type::IntLiteral) + .unwrap_or_else(|| KnownClass::Int.to_instance(self.db)), + ), - (Type::IntLiteral(n), Type::IntLiteral(m), ast::Operator::Mult) => n - .checked_mul(m) - .map(Type::IntLiteral) - .unwrap_or_else(|| KnownClass::Int.to_instance(self.db)), + (Type::IntLiteral(n), Type::IntLiteral(m), ast::Operator::Mult) => Some( + n.checked_mul(m) + .map(Type::IntLiteral) + .unwrap_or_else(|| KnownClass::Int.to_instance(self.db)), + ), (Type::IntLiteral(_), Type::IntLiteral(_), ast::Operator::Div) => { - KnownClass::Float.to_instance(self.db) + Some(KnownClass::Float.to_instance(self.db)) } - (Type::IntLiteral(n), Type::IntLiteral(m), ast::Operator::FloorDiv) => n - .checked_div(m) - .map(Type::IntLiteral) - .unwrap_or_else(|| KnownClass::Int.to_instance(self.db)), + (Type::IntLiteral(n), Type::IntLiteral(m), ast::Operator::FloorDiv) => Some( + n.checked_div(m) + .map(Type::IntLiteral) + .unwrap_or_else(|| KnownClass::Int.to_instance(self.db)), + ), - (Type::IntLiteral(n), Type::IntLiteral(m), ast::Operator::Mod) => n - .checked_rem(m) - .map(Type::IntLiteral) - .unwrap_or_else(|| KnownClass::Int.to_instance(self.db)), + (Type::IntLiteral(n), Type::IntLiteral(m), ast::Operator::Mod) => Some( + n.checked_rem(m) + .map(Type::IntLiteral) + .unwrap_or_else(|| KnownClass::Int.to_instance(self.db)), + ), (Type::BytesLiteral(lhs), Type::BytesLiteral(rhs), ast::Operator::Add) => { - Type::BytesLiteral(BytesLiteralType::new( + Some(Type::BytesLiteral(BytesLiteralType::new( self.db, [lhs.value(self.db).as_ref(), rhs.value(self.db).as_ref()] .concat() .into_boxed_slice(), - )) + ))) } (Type::StringLiteral(lhs), Type::StringLiteral(rhs), ast::Operator::Add) => { let lhs_value = lhs.value(self.db).to_string(); let rhs_value = rhs.value(self.db).as_ref(); - if lhs_value.len() + rhs_value.len() <= Self::MAX_STRING_LITERAL_SIZE { + let ty = if lhs_value.len() + rhs_value.len() <= Self::MAX_STRING_LITERAL_SIZE { Type::StringLiteral(StringLiteralType::new(self.db, { (lhs_value + rhs_value).into_boxed_str() })) } else { Type::LiteralString - } + }; + Some(ty) } ( Type::StringLiteral(_) | Type::LiteralString, Type::StringLiteral(_) | Type::LiteralString, ast::Operator::Add, - ) => Type::LiteralString, + ) => Some(Type::LiteralString), (Type::StringLiteral(s), Type::IntLiteral(n), ast::Operator::Mult) | (Type::IntLiteral(n), Type::StringLiteral(s), ast::Operator::Mult) => { - if n < 1 { + let ty = if n < 1 { Type::StringLiteral(StringLiteralType::new(self.db, "")) } else if let Ok(n) = usize::try_from(n) { if n.checked_mul(s.value(self.db).len()) @@ -2577,19 +2604,92 @@ impl<'db> TypeInferenceBuilder<'db> { } } else { Type::LiteralString - } + }; + Some(ty) } (Type::LiteralString, Type::IntLiteral(n), ast::Operator::Mult) | (Type::IntLiteral(n), Type::LiteralString, ast::Operator::Mult) => { - if n < 1 { + let ty = if n < 1 { Type::StringLiteral(StringLiteralType::new(self.db, "")) } else { Type::LiteralString - } + }; + Some(ty) } - _ => Type::Todo, // TODO + (Type::Instance(_), Type::IntLiteral(_), op) => { + self.infer_binary_expression_type(left_ty, KnownClass::Int.to_instance(self.db), op) + } + + (Type::IntLiteral(_), Type::Instance(_), op) => self.infer_binary_expression_type( + KnownClass::Int.to_instance(self.db), + right_ty, + op, + ), + + (Type::Instance(_), Type::Tuple(_), op) => self.infer_binary_expression_type( + left_ty, + KnownClass::Tuple.to_instance(self.db), + op, + ), + + (Type::Tuple(_), Type::Instance(_), op) => self.infer_binary_expression_type( + KnownClass::Tuple.to_instance(self.db), + right_ty, + op, + ), + + (Type::Instance(_), Type::StringLiteral(_) | Type::LiteralString, op) => { + self.infer_binary_expression_type(left_ty, KnownClass::Str.to_instance(self.db), op) + } + + (Type::StringLiteral(_) | Type::LiteralString, Type::Instance(_), op) => self + .infer_binary_expression_type(KnownClass::Str.to_instance(self.db), right_ty, op), + + (Type::Instance(_), Type::BytesLiteral(_), op) => self.infer_binary_expression_type( + left_ty, + KnownClass::Bytes.to_instance(self.db), + op, + ), + + (Type::BytesLiteral(_), Type::Instance(_), op) => self.infer_binary_expression_type( + KnownClass::Bytes.to_instance(self.db), + right_ty, + op, + ), + + (Type::Instance(left_class), Type::Instance(right_class), op) => { + if left_class != right_class && right_class.is_subclass_of(self.db, left_class) { + let reflected_dunder = op.reflected_dunder(); + let rhs_reflected = right_class.class_member(self.db, reflected_dunder); + if !rhs_reflected.is_unbound() + && rhs_reflected != left_class.class_member(self.db, reflected_dunder) + { + return rhs_reflected + .call(self.db, &[right_ty, left_ty]) + .return_ty(self.db) + .or_else(|| { + left_class + .class_member(self.db, op.dunder()) + .call(self.db, &[left_ty, right_ty]) + .return_ty(self.db) + }); + } + } + left_class + .class_member(self.db, op.dunder()) + .call(self.db, &[left_ty, right_ty]) + .return_ty(self.db) + .or_else(|| { + right_class + .class_member(self.db, op.reflected_dunder()) + .call(self.db, &[right_ty, left_ty]) + .return_ty(self.db) + }) + } + + _ => Some(Type::Todo), // TODO } } diff --git a/crates/ruff_python_ast/src/nodes.rs b/crates/ruff_python_ast/src/nodes.rs index c8d508d273..f44052f9fc 100644 --- a/crates/ruff_python_ast/src/nodes.rs +++ b/crates/ruff_python_ast/src/nodes.rs @@ -2971,6 +2971,42 @@ impl Operator { Operator::FloorDiv => "//", } } + + pub const fn dunder(self) -> &'static str { + match self { + Operator::Add => "__add__", + Operator::Sub => "__sub__", + Operator::Mult => "__mul__", + Operator::MatMult => "__matmul__", + Operator::Div => "__truediv__", + Operator::Mod => "__mod__", + Operator::Pow => "__pow__", + Operator::LShift => "__lshift__", + Operator::RShift => "__rshift__", + Operator::BitOr => "__or__", + Operator::BitXor => "__xor__", + Operator::BitAnd => "__and__", + Operator::FloorDiv => "__floordiv__", + } + } + + pub const fn reflected_dunder(self) -> &'static str { + match self { + Operator::Add => "__radd__", + Operator::Sub => "__rsub__", + Operator::Mult => "__rmul__", + Operator::MatMult => "__rmatmul__", + Operator::Div => "__rtruediv__", + Operator::Mod => "__rmod__", + Operator::Pow => "__rpow__", + Operator::LShift => "__rlshift__", + Operator::RShift => "__rrshift__", + Operator::BitOr => "__ror__", + Operator::BitXor => "__rxor__", + Operator::BitAnd => "__rand__", + Operator::FloorDiv => "__rfloordiv__", + } + } } impl fmt::Display for Operator {