[red-knot] binary arithmetic on instances (#13800)

Co-authored-by: Alex Waygood <Alex.Waygood@Gmail.com>
This commit is contained in:
Carl Meyer 2024-10-19 08:22:54 -07:00 committed by GitHub
parent 36cb1199cc
commit f4b5e70fae
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 656 additions and 41 deletions

View File

@ -0,0 +1,463 @@
# Binary operations on instances
Binary operations in Python are implemented by means of magic double-underscore methods.
For references, see:
- <https://snarky.ca/unravelling-binary-arithmetic-operations-in-python/>
- <https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types>
## Operations
We support inference for all Python's binary operators:
`+`, `-`, `*`, `@`, `/`, `//`, `%`, `**`, `<<`, `>>`, `&`, `^`, and `|`.
```py
class A:
def __add__(self, other) -> A:
return self
def __sub__(self, other) -> A:
return self
def __mul__(self, other) -> A:
return self
def __matmul__(self, other) -> A:
return self
def __truediv__(self, other) -> A:
return self
def __floordiv__(self, other) -> A:
return self
def __mod__(self, other) -> A:
return self
def __pow__(self, other) -> A:
return self
def __lshift__(self, other) -> A:
return self
def __rshift__(self, other) -> A:
return self
def __and__(self, other) -> A:
return self
def __xor__(self, other) -> A:
return self
def __or__(self, other) -> A:
return self
class B: ...
reveal_type(A() + B()) # revealed: A
reveal_type(A() - B()) # revealed: A
reveal_type(A() * B()) # revealed: A
reveal_type(A() @ B()) # revealed: A
reveal_type(A() / B()) # revealed: A
reveal_type(A() // B()) # revealed: A
reveal_type(A() % B()) # revealed: A
reveal_type(A() ** B()) # revealed: A
reveal_type(A() << B()) # revealed: A
reveal_type(A() >> B()) # revealed: A
reveal_type(A() & B()) # revealed: A
reveal_type(A() ^ B()) # revealed: A
reveal_type(A() | B()) # revealed: A
```
## Reflected
We also support inference for reflected operations:
```py
class A:
def __radd__(self, other) -> A:
return self
def __rsub__(self, other) -> A:
return self
def __rmul__(self, other) -> A:
return self
def __rmatmul__(self, other) -> A:
return self
def __rtruediv__(self, other) -> A:
return self
def __rfloordiv__(self, other) -> A:
return self
def __rmod__(self, other) -> A:
return self
def __rpow__(self, other) -> A:
return self
def __rlshift__(self, other) -> A:
return self
def __rrshift__(self, other) -> A:
return self
def __rand__(self, other) -> A:
return self
def __rxor__(self, other) -> A:
return self
def __ror__(self, other) -> A:
return self
class B: ...
reveal_type(B() + A()) # revealed: A
reveal_type(B() - A()) # revealed: A
reveal_type(B() * A()) # revealed: A
reveal_type(B() @ A()) # revealed: A
reveal_type(B() / A()) # revealed: A
reveal_type(B() // A()) # revealed: A
reveal_type(B() % A()) # revealed: A
reveal_type(B() ** A()) # revealed: A
reveal_type(B() << A()) # revealed: A
reveal_type(B() >> A()) # revealed: A
reveal_type(B() & A()) # revealed: A
reveal_type(B() ^ A()) # revealed: A
reveal_type(B() | A()) # revealed: A
```
## Returning a different type
The magic methods aren't required to return the type of `self`:
```py
class A:
def __add__(self, other) -> int:
return 1
def __rsub__(self, other) -> int:
return 1
class B: ...
reveal_type(A() + B()) # revealed: int
reveal_type(B() - A()) # revealed: int
```
## Non-reflected precedence in general
In general, if the left-hand side defines `__add__` and the right-hand side
defines `__radd__` and the right-hand side is not a subtype of the left-hand
side, `lhs.__add__` will take precedence:
```py
class A:
def __add__(self, other: B) -> int:
return 42
class B:
def __radd__(self, other: A) -> str:
return "foo"
reveal_type(A() + B()) # revealed: int
# Edge case: C is a subtype of C, *but* if the two sides are of *equal* types,
# the lhs *still* takes precedence
class C:
def __add__(self, other: C) -> int:
return 42
def __radd__(self, other: C) -> str:
return "foo"
reveal_type(C() + C()) # revealed: int
```
## Reflected precedence for subtypes (in some cases)
If the right-hand operand is a subtype of the left-hand operand and has a
different implementation of the reflected method, the reflected method on the
right-hand operand takes precedence.
```py
class A:
def __add__(self, other) -> str:
return "foo"
def __radd__(self, other) -> str:
return "foo"
class MyString(str): ...
class B(A):
def __radd__(self, other) -> MyString:
return MyString()
reveal_type(A() + B()) # revealed: MyString
# N.B. Still a subtype of `A`, even though `A` does not appear directly in the class's `__bases__`
class C(B): ...
# TODO: we currently only understand direct subclasses as subtypes of the superclass.
# We need to iterate through the full MRO rather than just the class's bases;
# if we do, we'll understand `C` as a subtype of `A`, and correctly understand this as being
# `MyString` rather than `str`
reveal_type(A() + C()) # revealed: str
```
## Reflected precedence 2
If the right-hand operand is a subtype of the left-hand operand, but does not
override the reflected method, the left-hand operand's non-reflected method
still takes precedence:
```py
class A:
def __add__(self, other) -> str:
return "foo"
def __radd__(self, other) -> int:
return 42
class B(A): ...
reveal_type(A() + B()) # revealed: str
```
## Only reflected supported
For example, at runtime, `(1).__add__(1.2)` is `NotImplemented`, but
`(1.2).__radd__(1) == 2.2`, meaning that `1 + 1.2` succeeds at runtime
(producing `2.2`). The runtime tries the second one only if the first one
returns `NotImplemented` to signal failure.
Typeshed and other stubs annotate dunder-method calls that would return
`NotImplemented` as being "illegal" calls. `int.__add__` is annotated as only
"accepting" `int`s, even though it strictly-speaking "accepts" any other object
without raising an exception -- it will simply return `NotImplemented`,
allowing the runtime to try the `__radd__` method of the right-hand operand
as well.
```py
class A:
def __sub__(self, other: A) -> A:
return A()
class B:
def __rsub__(self, other: A) -> B:
return B()
# TODO: this should be `B` (the return annotation of `B.__rsub__`),
# because `A.__sub__` is annotated as only accepting `A`,
# but `B.__rsub__` will accept `A`.
reveal_type(A() - B()) # revealed: A
```
## Callable instances as dunders
Believe it or not, this is supported at runtime:
```py
class A:
def __call__(self, other) -> int:
return 42
class B:
__add__ = A()
reveal_type(B() + B()) # revealed: int
```
## Integration test: numbers from typeshed
```py
reveal_type(3j + 3.14) # revealed: complex
reveal_type(4.2 + 42) # revealed: float
reveal_type(3j + 3) # revealed: complex
# TODO should be complex, need to check arg type and fall back to `rhs.__radd__`
reveal_type(3.14 + 3j) # revealed: float
# TODO should be float, need to check arg type and fall back to `rhs.__radd__`
reveal_type(42 + 4.2) # revealed: int
# TODO should be complex, need to check arg type and fall back to `rhs.__radd__`
reveal_type(3 + 3j) # revealed: int
def returns_int() -> int:
return 42
def returns_bool() -> bool:
return True
x = returns_bool()
y = returns_int()
reveal_type(x + y) # revealed: int
reveal_type(4.2 + x) # revealed: float
# TODO should be float, need to check arg type and fall back to `rhs.__radd__`
reveal_type(y + 4.12) # revealed: int
```
## With literal types
When we have a literal type for one operand, we're able to fall back to the
instance handling for its instance super-type.
```py
class A:
def __add__(self, other) -> A:
return self
def __radd__(self, other) -> A:
return self
reveal_type(A() + 1) # revealed: A
# TODO should be `A` since `int.__add__` doesn't support `A` instances
reveal_type(1 + A()) # revealed: int
reveal_type(A() + "foo") # revealed: A
# TODO should be `A` since `str.__add__` doesn't support `A` instances
# TODO overloads
reveal_type("foo" + A()) # revealed: @Todo
reveal_type(A() + b"foo") # revealed: A
# TODO should be `A` since `bytes.__add__` doesn't support `A` instances
reveal_type(b"foo" + A()) # revealed: bytes
reveal_type(A() + ()) # revealed: A
# TODO this should be `A`, since `tuple.__add__` doesn't support `A` instances
reveal_type(() + A()) # revealed: @Todo
literal_string_instance = "foo" * 1_000_000_000
# the test is not testing what it's meant to be testing if this isn't a `LiteralString`:
reveal_type(literal_string_instance) # revealed: LiteralString
reveal_type(A() + literal_string_instance) # revealed: A
# TODO should be `A` since `str.__add__` doesn't support `A` instances
# TODO overloads
reveal_type(literal_string_instance + A()) # revealed: @Todo
```
## Operations involving instances of classes inheriting from `Any`
`Any` and `Unknown` represent a set of possible runtime objects, wherein the
bounds of the set are unknown. Whether the left-hand operand's dunder or the
right-hand operand's reflected dunder depends on whether the right-hand operand
is an instance of a class that is a subclass of the left-hand operand's class
and overrides the reflected dunder. In the following example, because of the
unknowable nature of `Any`/`Unknown`, we must consider both possibilities:
`Any`/`Unknown` might resolve to an unknown third class that inherits from `X`
and overrides `__radd__`; but it also might not. Thus, the correct answer here
for the `reveal_type` is `int | Unknown`.
```py
from does_not_exist import Foo # error: [unresolved-import]
reveal_type(Foo) # revealed: Unknown
class X:
def __add__(self, other: object) -> int:
return 42
class Y(Foo): ...
# TODO: Should be `int | Unknown`; see above discussion.
reveal_type(X() + Y()) # revealed: int
```
## Unsupported
### Dunder as instance attribute
The magic method must exist on the class, not just on the instance:
```py
def add_impl(self, other) -> int:
return 1
class A:
def __init__(self):
self.__add__ = add_impl
# error: [unsupported-operator] "Operator `+` is unsupported between objects of type `A` and `A`"
# revealed: Unknown
reveal_type(A() + A())
```
### Missing dunder
```py
class A: ...
# error: [unsupported-operator]
# revealed: Unknown
reveal_type(A() + A())
```
### Wrong position
A left-hand dunder method doesn't apply for the right-hand operand, or vice versa:
```py
class A:
def __add__(self, other) -> int: ...
class B:
def __radd__(self, other) -> int: ...
class C: ...
# error: [unsupported-operator]
# revealed: Unknown
reveal_type(C() + A())
# error: [unsupported-operator]
# revealed: Unknown
reveal_type(B() + C())
```
### Wrong type
TODO: check signature and error if `other` is the wrong type

View File

@ -34,19 +34,19 @@ reveal_type(b) # revealed: int
c = 3 % 0 # error: "Cannot reduce object of type `Literal[3]` modulo zero"
reveal_type(c) # revealed: int
d = int() / 0 # error: "Cannot divide object of type `int` by zero"
# TODO should be int
reveal_type(d) # revealed: @Todo
# error: "Cannot divide object of type `int` by zero"
# revealed: float
reveal_type(int() / 0)
e = 1.0 / 0 # error: "Cannot divide object of type `float` by zero"
# TODO should be float
reveal_type(e) # revealed: @Todo
# error: "Cannot divide object of type `float` by zero"
# revealed: float
reveal_type(1.0 / 0)
class MyInt(int): ...
# No error for a subclass of int
# TODO should be float
reveal_type(MyInt(3) / 0) # revealed: @Todo
# revealed: float
reveal_type(MyInt(3) / 0)
```

View File

@ -440,6 +440,9 @@ impl<'db> Type<'db> {
.any(|&elem_ty| ty.is_subtype_of(db, elem_ty)),
(_, Type::Instance(class)) if class.is_known(db, KnownClass::Object) => true,
(Type::Instance(class), _) if class.is_known(db, KnownClass::Object) => false,
(Type::Instance(self_class), Type::Instance(target_class)) => {
self_class.is_subclass_of(db, target_class)
}
// TODO
_ => false,
}
@ -1582,6 +1585,18 @@ impl<'db> ClassType<'db> {
})
}
pub fn is_subclass_of(self, db: &'db dyn Db, other: ClassType) -> bool {
// TODO: we need to iterate over the *MRO* here, not the bases
(other == self)
|| self.bases(db).any(|base| match base {
Type::Class(base_class) => base_class == other,
// `is_subclass_of` is checking the subtype relation, in which gradual types do not
// participate, so we should not return `True` if we find `Any/Unknown` in the
// bases.
_ => false,
})
}
/// Returns the class member of this class named `name`.
///
/// The member resolves to a member of the class itself or any of its bases.
@ -1823,6 +1838,7 @@ mod tests {
#[test_case(Ty::LiteralString, Ty::BuiltinInstance("str"))]
#[test_case(Ty::BytesLiteral("foo"), Ty::BuiltinInstance("bytes"))]
#[test_case(Ty::IntLiteral(1), Ty::Union(vec![Ty::BuiltinInstance("int"), Ty::BuiltinInstance("str")]))]
#[test_case(Ty::BuiltinInstance("TypeError"), Ty::BuiltinInstance("Exception"))]
fn is_subtype_of(from: Ty, to: Ty) {
let db = setup_db();
assert!(from.into_type(&db).is_subtype_of(&db, to.into_type(&db)));

View File

@ -2497,71 +2497,98 @@ impl<'db> TypeInferenceBuilder<'db> {
self.check_division_by_zero(binary, left_ty);
}
self.infer_binary_expression_type(left_ty, right_ty, *op)
.unwrap_or_else(|| {
self.add_diagnostic(
binary.into(),
"unsupported-operator",
format_args!(
"Operator `{op}` is unsupported between objects of type `{}` and `{}`",
left_ty.display(self.db),
right_ty.display(self.db)
),
);
Type::Unknown
})
}
fn infer_binary_expression_type(
&mut self,
left_ty: Type<'db>,
right_ty: Type<'db>,
op: ast::Operator,
) -> Option<Type<'db>> {
match (left_ty, right_ty, op) {
// When interacting with Todo, Any and Unknown should propagate (as if we fix this
// `Todo` in the future, the result would then become Any or Unknown, respectively.)
(Type::Any, _, _) | (_, Type::Any, _) => Type::Any,
(Type::Unknown, _, _) | (_, Type::Unknown, _) => Type::Unknown,
(Type::Any, _, _) | (_, Type::Any, _) => Some(Type::Any),
(Type::Unknown, _, _) | (_, Type::Unknown, _) => Some(Type::Unknown),
(Type::IntLiteral(n), Type::IntLiteral(m), ast::Operator::Add) => n
.checked_add(m)
.map(Type::IntLiteral)
.unwrap_or_else(|| KnownClass::Int.to_instance(self.db)),
(Type::IntLiteral(n), Type::IntLiteral(m), ast::Operator::Add) => Some(
n.checked_add(m)
.map(Type::IntLiteral)
.unwrap_or_else(|| KnownClass::Int.to_instance(self.db)),
),
(Type::IntLiteral(n), Type::IntLiteral(m), ast::Operator::Sub) => n
.checked_sub(m)
.map(Type::IntLiteral)
.unwrap_or_else(|| KnownClass::Int.to_instance(self.db)),
(Type::IntLiteral(n), Type::IntLiteral(m), ast::Operator::Sub) => Some(
n.checked_sub(m)
.map(Type::IntLiteral)
.unwrap_or_else(|| KnownClass::Int.to_instance(self.db)),
),
(Type::IntLiteral(n), Type::IntLiteral(m), ast::Operator::Mult) => n
.checked_mul(m)
.map(Type::IntLiteral)
.unwrap_or_else(|| KnownClass::Int.to_instance(self.db)),
(Type::IntLiteral(n), Type::IntLiteral(m), ast::Operator::Mult) => Some(
n.checked_mul(m)
.map(Type::IntLiteral)
.unwrap_or_else(|| KnownClass::Int.to_instance(self.db)),
),
(Type::IntLiteral(_), Type::IntLiteral(_), ast::Operator::Div) => {
KnownClass::Float.to_instance(self.db)
Some(KnownClass::Float.to_instance(self.db))
}
(Type::IntLiteral(n), Type::IntLiteral(m), ast::Operator::FloorDiv) => n
.checked_div(m)
.map(Type::IntLiteral)
.unwrap_or_else(|| KnownClass::Int.to_instance(self.db)),
(Type::IntLiteral(n), Type::IntLiteral(m), ast::Operator::FloorDiv) => Some(
n.checked_div(m)
.map(Type::IntLiteral)
.unwrap_or_else(|| KnownClass::Int.to_instance(self.db)),
),
(Type::IntLiteral(n), Type::IntLiteral(m), ast::Operator::Mod) => n
.checked_rem(m)
.map(Type::IntLiteral)
.unwrap_or_else(|| KnownClass::Int.to_instance(self.db)),
(Type::IntLiteral(n), Type::IntLiteral(m), ast::Operator::Mod) => Some(
n.checked_rem(m)
.map(Type::IntLiteral)
.unwrap_or_else(|| KnownClass::Int.to_instance(self.db)),
),
(Type::BytesLiteral(lhs), Type::BytesLiteral(rhs), ast::Operator::Add) => {
Type::BytesLiteral(BytesLiteralType::new(
Some(Type::BytesLiteral(BytesLiteralType::new(
self.db,
[lhs.value(self.db).as_ref(), rhs.value(self.db).as_ref()]
.concat()
.into_boxed_slice(),
))
)))
}
(Type::StringLiteral(lhs), Type::StringLiteral(rhs), ast::Operator::Add) => {
let lhs_value = lhs.value(self.db).to_string();
let rhs_value = rhs.value(self.db).as_ref();
if lhs_value.len() + rhs_value.len() <= Self::MAX_STRING_LITERAL_SIZE {
let ty = if lhs_value.len() + rhs_value.len() <= Self::MAX_STRING_LITERAL_SIZE {
Type::StringLiteral(StringLiteralType::new(self.db, {
(lhs_value + rhs_value).into_boxed_str()
}))
} else {
Type::LiteralString
}
};
Some(ty)
}
(
Type::StringLiteral(_) | Type::LiteralString,
Type::StringLiteral(_) | Type::LiteralString,
ast::Operator::Add,
) => Type::LiteralString,
) => Some(Type::LiteralString),
(Type::StringLiteral(s), Type::IntLiteral(n), ast::Operator::Mult)
| (Type::IntLiteral(n), Type::StringLiteral(s), ast::Operator::Mult) => {
if n < 1 {
let ty = if n < 1 {
Type::StringLiteral(StringLiteralType::new(self.db, ""))
} else if let Ok(n) = usize::try_from(n) {
if n.checked_mul(s.value(self.db).len())
@ -2577,19 +2604,92 @@ impl<'db> TypeInferenceBuilder<'db> {
}
} else {
Type::LiteralString
}
};
Some(ty)
}
(Type::LiteralString, Type::IntLiteral(n), ast::Operator::Mult)
| (Type::IntLiteral(n), Type::LiteralString, ast::Operator::Mult) => {
if n < 1 {
let ty = if n < 1 {
Type::StringLiteral(StringLiteralType::new(self.db, ""))
} else {
Type::LiteralString
}
};
Some(ty)
}
_ => Type::Todo, // TODO
(Type::Instance(_), Type::IntLiteral(_), op) => {
self.infer_binary_expression_type(left_ty, KnownClass::Int.to_instance(self.db), op)
}
(Type::IntLiteral(_), Type::Instance(_), op) => self.infer_binary_expression_type(
KnownClass::Int.to_instance(self.db),
right_ty,
op,
),
(Type::Instance(_), Type::Tuple(_), op) => self.infer_binary_expression_type(
left_ty,
KnownClass::Tuple.to_instance(self.db),
op,
),
(Type::Tuple(_), Type::Instance(_), op) => self.infer_binary_expression_type(
KnownClass::Tuple.to_instance(self.db),
right_ty,
op,
),
(Type::Instance(_), Type::StringLiteral(_) | Type::LiteralString, op) => {
self.infer_binary_expression_type(left_ty, KnownClass::Str.to_instance(self.db), op)
}
(Type::StringLiteral(_) | Type::LiteralString, Type::Instance(_), op) => self
.infer_binary_expression_type(KnownClass::Str.to_instance(self.db), right_ty, op),
(Type::Instance(_), Type::BytesLiteral(_), op) => self.infer_binary_expression_type(
left_ty,
KnownClass::Bytes.to_instance(self.db),
op,
),
(Type::BytesLiteral(_), Type::Instance(_), op) => self.infer_binary_expression_type(
KnownClass::Bytes.to_instance(self.db),
right_ty,
op,
),
(Type::Instance(left_class), Type::Instance(right_class), op) => {
if left_class != right_class && right_class.is_subclass_of(self.db, left_class) {
let reflected_dunder = op.reflected_dunder();
let rhs_reflected = right_class.class_member(self.db, reflected_dunder);
if !rhs_reflected.is_unbound()
&& rhs_reflected != left_class.class_member(self.db, reflected_dunder)
{
return rhs_reflected
.call(self.db, &[right_ty, left_ty])
.return_ty(self.db)
.or_else(|| {
left_class
.class_member(self.db, op.dunder())
.call(self.db, &[left_ty, right_ty])
.return_ty(self.db)
});
}
}
left_class
.class_member(self.db, op.dunder())
.call(self.db, &[left_ty, right_ty])
.return_ty(self.db)
.or_else(|| {
right_class
.class_member(self.db, op.reflected_dunder())
.call(self.db, &[right_ty, left_ty])
.return_ty(self.db)
})
}
_ => Some(Type::Todo), // TODO
}
}

View File

@ -2971,6 +2971,42 @@ impl Operator {
Operator::FloorDiv => "//",
}
}
pub const fn dunder(self) -> &'static str {
match self {
Operator::Add => "__add__",
Operator::Sub => "__sub__",
Operator::Mult => "__mul__",
Operator::MatMult => "__matmul__",
Operator::Div => "__truediv__",
Operator::Mod => "__mod__",
Operator::Pow => "__pow__",
Operator::LShift => "__lshift__",
Operator::RShift => "__rshift__",
Operator::BitOr => "__or__",
Operator::BitXor => "__xor__",
Operator::BitAnd => "__and__",
Operator::FloorDiv => "__floordiv__",
}
}
pub const fn reflected_dunder(self) -> &'static str {
match self {
Operator::Add => "__radd__",
Operator::Sub => "__rsub__",
Operator::Mult => "__rmul__",
Operator::MatMult => "__rmatmul__",
Operator::Div => "__rtruediv__",
Operator::Mod => "__rmod__",
Operator::Pow => "__rpow__",
Operator::LShift => "__rlshift__",
Operator::RShift => "__rrshift__",
Operator::BitOr => "__ror__",
Operator::BitXor => "__rxor__",
Operator::BitAnd => "__rand__",
Operator::FloorDiv => "__rfloordiv__",
}
}
}
impl fmt::Display for Operator {