From e0149cd9f3712dbe719335216015f09ceab0e70c Mon Sep 17 00:00:00 2001 From: Douglas Creager Date: Wed, 23 Jul 2025 17:11:44 -0400 Subject: [PATCH] [ty] Return a tuple spec from the iterator protocol (#19496) This PR updates our iterator protocol machinery to return a tuple spec describing the elements that are returned, instead of a type. That allows us to track heterogeneous iterators more precisely, and consolidates the logic in unpacking and splatting, which are the two places where we can take advantage of that more precise information. (Other iterator consumers, like `for` loops, have to collapse the iterated elements down to a single type regardless, and we provide a new helper method on `TupleSpec` to perform that summarization.) --- .../resources/mdtest/call/function.md | 75 +++++++++++++++ .../resources/mdtest/loops/for.md | 9 +- crates/ty_python_semantic/src/types.rs | 91 +++++++++++-------- .../src/types/call/arguments.rs | 10 +- .../ty_python_semantic/src/types/call/bind.rs | 35 ++----- crates/ty_python_semantic/src/types/class.rs | 8 +- .../ty_python_semantic/src/types/generics.rs | 2 +- crates/ty_python_semantic/src/types/infer.rs | 46 ++++++---- crates/ty_python_semantic/src/types/tuple.rs | 4 + .../ty_python_semantic/src/types/unpacker.rs | 49 ++++------ 10 files changed, 204 insertions(+), 125 deletions(-) diff --git a/crates/ty_python_semantic/resources/mdtest/call/function.md b/crates/ty_python_semantic/resources/mdtest/call/function.md index 30d636bede..828c9d65d6 100644 --- a/crates/ty_python_semantic/resources/mdtest/call/function.md +++ b/crates/ty_python_semantic/resources/mdtest/call/function.md @@ -259,6 +259,81 @@ def _(args: tuple[int, *tuple[str, ...], int]) -> None: takes_at_least_two_positional_only(*args) # error: [invalid-argument-type] ``` +### String argument + +```py +from typing import Literal + +def takes_zero() -> None: ... +def takes_one(x: str) -> None: ... +def takes_two(x: str, y: str) -> None: ... +def takes_two_positional_only(x: str, y: str, /) -> None: ... +def takes_two_different(x: int, y: str) -> None: ... +def takes_two_different_positional_only(x: int, y: str, /) -> None: ... +def takes_at_least_zero(*args) -> None: ... +def takes_at_least_one(x: str, *args) -> None: ... +def takes_at_least_two(x: str, y: str, *args) -> None: ... +def takes_at_least_two_positional_only(x: str, y: str, /, *args) -> None: ... + +# Test all of the above with a number of different splatted argument types + +def _(args: Literal["a"]) -> None: + takes_zero(*args) # error: [too-many-positional-arguments] + takes_one(*args) + takes_two(*args) # error: [missing-argument] + takes_two_positional_only(*args) # error: [missing-argument] + # error: [invalid-argument-type] + # error: [missing-argument] + takes_two_different(*args) + # error: [invalid-argument-type] + # error: [missing-argument] + takes_two_different_positional_only(*args) + takes_at_least_zero(*args) + takes_at_least_one(*args) + takes_at_least_two(*args) # error: [missing-argument] + takes_at_least_two_positional_only(*args) # error: [missing-argument] + +def _(args: Literal["ab"]) -> None: + takes_zero(*args) # error: [too-many-positional-arguments] + takes_one(*args) # error: [too-many-positional-arguments] + takes_two(*args) + takes_two_positional_only(*args) + takes_two_different(*args) # error: [invalid-argument-type] + takes_two_different_positional_only(*args) # error: [invalid-argument-type] + takes_at_least_zero(*args) + takes_at_least_one(*args) + takes_at_least_two(*args) + takes_at_least_two_positional_only(*args) + +def _(args: Literal["abc"]) -> None: + takes_zero(*args) # error: [too-many-positional-arguments] + takes_one(*args) # error: [too-many-positional-arguments] + takes_two(*args) # error: [too-many-positional-arguments] + takes_two_positional_only(*args) # error: [too-many-positional-arguments] + # error: [invalid-argument-type] + # error: [too-many-positional-arguments] + takes_two_different(*args) + # error: [invalid-argument-type] + # error: [too-many-positional-arguments] + takes_two_different_positional_only(*args) + takes_at_least_zero(*args) + takes_at_least_one(*args) + takes_at_least_two(*args) + takes_at_least_two_positional_only(*args) + +def _(args: str) -> None: + takes_zero(*args) + takes_one(*args) + takes_two(*args) + takes_two_positional_only(*args) + takes_two_different(*args) # error: [invalid-argument-type] + takes_two_different_positional_only(*args) # error: [invalid-argument-type] + takes_at_least_zero(*args) + takes_at_least_one(*args) + takes_at_least_two(*args) + takes_at_least_two_positional_only(*args) +``` + ### Argument expansion regression This is a regression that was highlighted by the ecosystem check, which shows that we might need to diff --git a/crates/ty_python_semantic/resources/mdtest/loops/for.md b/crates/ty_python_semantic/resources/mdtest/loops/for.md index 85f2390aa8..139a812be1 100644 --- a/crates/ty_python_semantic/resources/mdtest/loops/for.md +++ b/crates/ty_python_semantic/resources/mdtest/loops/for.md @@ -738,6 +738,13 @@ def _(flag: bool, flag2: bool): reveal_type(y) # revealed: bytes | str | int ``` +## Empty tuple is iterable + +```py +for x in (): + reveal_type(x) # revealed: Never +``` + ## Never is iterable ```py @@ -745,5 +752,5 @@ from typing_extensions import Never def f(never: Never): for x in never: - reveal_type(x) # revealed: Never + reveal_type(x) # revealed: Unknown ``` diff --git a/crates/ty_python_semantic/src/types.rs b/crates/ty_python_semantic/src/types.rs index 8ea970fdf4..8a559667f1 100644 --- a/crates/ty_python_semantic/src/types.rs +++ b/crates/ty_python_semantic/src/types.rs @@ -2,6 +2,7 @@ use infer::nearest_enclosing_class; use itertools::{Either, Itertools}; use ruff_db::parsed::parsed_module; +use std::borrow::Cow; use std::slice::Iter; use bitflags::bitflags; @@ -56,7 +57,7 @@ use crate::types::infer::infer_unpack_types; use crate::types::mro::{Mro, MroError, MroIterator}; pub(crate) use crate::types::narrow::infer_narrowing_constraint; use crate::types::signatures::{Parameter, ParameterForm, Parameters, walk_signature}; -use crate::types::tuple::TupleType; +use crate::types::tuple::{TupleSpec, TupleType}; pub use crate::util::diagnostics::add_inferred_python_version_hint_to_diagnostic; use crate::{Db, FxOrderSet, Module, Program}; pub(crate) use class::{ClassLiteral, ClassType, GenericAlias, KnownClass}; @@ -813,13 +814,6 @@ impl<'db> Type<'db> { .expect("Expected a Type::EnumLiteral variant") } - pub(crate) const fn into_tuple(self) -> Option> { - match self { - Type::Tuple(tuple_type) => Some(tuple_type), - _ => None, - } - } - /// Turn a class literal (`Type::ClassLiteral` or `Type::GenericAlias`) into a `ClassType`. /// Since a `ClassType` must be specialized, apply the default specialization to any /// unspecialized generic class literal. @@ -4615,35 +4609,50 @@ impl<'db> Type<'db> { } } - /// Returns the element type when iterating over `self`. + /// Returns a tuple spec describing the elements that are produced when iterating over `self`. /// /// This method should only be used outside of type checking because it omits any errors. /// For type checking, use [`try_iterate`](Self::try_iterate) instead. - fn iterate(self, db: &'db dyn Db) -> Type<'db> { + fn iterate(self, db: &'db dyn Db) -> Cow<'db, TupleSpec<'db>> { self.try_iterate(db) - .unwrap_or_else(|err| err.fallback_element_type(db)) + .unwrap_or_else(|err| Cow::Owned(TupleSpec::homogeneous(err.fallback_element_type(db)))) } /// Given the type of an object that is iterated over in some way, - /// return the type of objects that are yielded by that iteration. + /// return a tuple spec describing the type of objects that are yielded by that iteration. /// - /// E.g., for the following loop, given the type of `x`, infer the type of `y`: + /// E.g., for the following call, given the type of `x`, infer the types of the values that are + /// splatted into `y`'s positional arguments: /// ```python - /// for y in x: - /// pass + /// y(*x) /// ``` - fn try_iterate(self, db: &'db dyn Db) -> Result, IterationError<'db>> { - if let Type::Tuple(tuple_type) = self { - return Ok(UnionType::from_elements( - db, - tuple_type.tuple(db).all_elements(), - )); - } - - if let Type::GenericAlias(alias) = self { - if alias.origin(db).is_tuple(db) { - return Ok(todo_type!("*tuple[] annotations")); + fn try_iterate(self, db: &'db dyn Db) -> Result>, IterationError<'db>> { + match self { + Type::Tuple(tuple_type) => return Ok(Cow::Borrowed(tuple_type.tuple(db))), + Type::GenericAlias(alias) if alias.origin(db).is_tuple(db) => { + return Ok(Cow::Owned(TupleSpec::homogeneous(todo_type!( + "*tuple[] annotations" + )))); } + Type::StringLiteral(string_literal_ty) => { + // We could go further and deconstruct to an array of `StringLiteral` + // with each individual character, instead of just an array of + // `LiteralString`, but there would be a cost and it's not clear that + // it's worth it. + return Ok(Cow::Owned(TupleSpec::from_elements(std::iter::repeat_n( + Type::LiteralString, + string_literal_ty.python_len(db), + )))); + } + Type::Never => { + // The dunder logic below would have us return `tuple[Never, ...]`, which eagerly + // simplifies to `tuple[()]`. That will will cause us to emit false positives if we + // index into the tuple. Using `tuple[Unknown, ...]` avoids these false positives. + // TODO: Consider removing this special case, and instead hide the indexing + // diagnostic in unreachable code. + return Ok(Cow::Owned(TupleSpec::homogeneous(Type::unknown()))); + } + _ => {} } let try_call_dunder_getitem = || { @@ -4669,12 +4678,14 @@ impl<'db> Type<'db> { Ok(iterator) => { // `__iter__` is definitely bound and calling it succeeds. // See what calling `__next__` on the object returned by `__iter__` gives us... - try_call_dunder_next_on_iterator(iterator).map_err(|dunder_next_error| { - IterationError::IterReturnsInvalidIterator { - iterator, - dunder_next_error, - } - }) + try_call_dunder_next_on_iterator(iterator) + .map(|ty| Cow::Owned(TupleSpec::homogeneous(ty))) + .map_err( + |dunder_next_error| IterationError::IterReturnsInvalidIterator { + iterator, + dunder_next_error, + }, + ) } // `__iter__` is possibly unbound... @@ -4692,10 +4703,10 @@ impl<'db> Type<'db> { // and the type returned by the `__getitem__` method. // // No diagnostic is emitted; iteration will always succeed! - UnionType::from_elements( + Cow::Owned(TupleSpec::homogeneous(UnionType::from_elements( db, [dunder_next_return, dunder_getitem_return_type], - ) + ))) }) .map_err(|dunder_getitem_error| { IterationError::PossiblyUnboundIterAndGetitemError { @@ -4718,13 +4729,13 @@ impl<'db> Type<'db> { } // There's no `__iter__` method. Try `__getitem__` instead... - Err(CallDunderError::MethodNotAvailable) => { - try_call_dunder_getitem().map_err(|dunder_getitem_error| { - IterationError::UnboundIterAndGetitemError { + Err(CallDunderError::MethodNotAvailable) => try_call_dunder_getitem() + .map(|ty| Cow::Owned(TupleSpec::homogeneous(ty))) + .map_err( + |dunder_getitem_error| IterationError::UnboundIterAndGetitemError { dunder_getitem_error, - } - }) - } + }, + ), } } diff --git a/crates/ty_python_semantic/src/types/call/arguments.rs b/crates/ty_python_semantic/src/types/call/arguments.rs index 497c412d5c..5259872902 100644 --- a/crates/ty_python_semantic/src/types/call/arguments.rs +++ b/crates/ty_python_semantic/src/types/call/arguments.rs @@ -51,12 +51,10 @@ impl<'a, 'db> CallArguments<'a, 'db> { ast::ArgOrKeyword::Arg(arg) => match arg { ast::Expr::Starred(ast::ExprStarred { value, .. }) => { let ty = infer_argument_type(arg, value); - let length = match ty { - Type::Tuple(tuple) => tuple.tuple(db).len(), - // TODO: have `Type::try_iterator` return a tuple spec, and use its - // length as this argument's arity - _ => TupleLength::unknown(), - }; + let length = ty + .try_iterate(db) + .map(|tuple| tuple.len()) + .unwrap_or(TupleLength::unknown()); (Argument::Variadic(length), Some(ty)) } _ => (Argument::Positional, None), diff --git a/crates/ty_python_semantic/src/types/call/bind.rs b/crates/ty_python_semantic/src/types/call/bind.rs index 0b4d0308c8..19091fa735 100644 --- a/crates/ty_python_semantic/src/types/call/bind.rs +++ b/crates/ty_python_semantic/src/types/call/bind.rs @@ -977,24 +977,14 @@ impl<'db> Bindings<'db> { // `tuple(range(42))` => `tuple[int, ...]` // BUT `tuple((1, 2))` => `tuple[Literal[1], Literal[2]]` rather than `tuple[Literal[1, 2], ...]` if let [Some(argument)] = overload.parameter_types() { - let overridden_return = - argument.into_tuple().map(Type::Tuple).unwrap_or_else(|| { - // Some awkward special handling is required here because of the fact - // that calling `try_iterate()` on `Never` returns `Never`, - // but `tuple[Never, ...]` eagerly simplifies to `tuple[()]`, - // which will cause us to emit false positives if we index into the tuple. - // Using `tuple[Unknown, ...]` avoids these false positives. - let specialization = if argument.is_never() { - Type::unknown() - } else { - argument.try_iterate(db).expect( - "try_iterate() should not fail on a type \ - assignable to `Iterable`", - ) - }; - TupleType::homogeneous(db, specialization) - }); - overload.set_return_type(overridden_return); + let tuple_spec = argument.try_iterate(db).expect( + "try_iterate() should not fail on a type \ + assignable to `Iterable`", + ); + overload.set_return_type(Type::tuple(TupleType::new( + db, + tuple_spec.as_ref(), + ))); } } @@ -2097,14 +2087,7 @@ impl<'a, 'db> ArgumentTypeChecker<'a, 'db> { // elements. For tuples, we don't have to do anything! For other types, we treat it as // an iterator, and create a homogeneous tuple of its output type, since we don't know // how many elements the iterator will produce. - // TODO: update `Type::try_iterate` to return this tuple type for us. - let argument_types = match argument_type { - Type::Tuple(tuple) => Cow::Borrowed(tuple.tuple(self.db)), - _ => { - let element_type = argument_type.iterate(self.db); - Cow::Owned(Tuple::homogeneous(element_type)) - } - }; + let argument_types = argument_type.iterate(self.db); // TODO: When we perform argument expansion during overload resolution, we might need // to retry both `match_parameters` _and_ `check_types` for each expansion. Currently diff --git a/crates/ty_python_semantic/src/types/class.rs b/crates/ty_python_semantic/src/types/class.rs index a2ed282f3b..8ad0c5dd29 100644 --- a/crates/ty_python_semantic/src/types/class.rs +++ b/crates/ty_python_semantic/src/types/class.rs @@ -618,6 +618,8 @@ impl<'db> ClassType<'db> { match specialization { Some(spec) => { + // TODO: Once we support PEP 646 annotations for `*args` parameters, we can + // use the tuple itself as the argument type. let tuple = spec.tuple(db); let tuple_len = tuple.len(); @@ -2137,7 +2139,8 @@ impl<'db> ClassLiteral<'db> { index.expression(for_stmt.iterable(&module)), ); // TODO: Potential diagnostics resulting from the iterable are currently not reported. - let inferred_ty = iterable_ty.iterate(db); + let inferred_ty = + iterable_ty.iterate(db).homogeneous_element_type(db); union_of_inferred_types = union_of_inferred_types.add(inferred_ty); } @@ -2195,7 +2198,8 @@ impl<'db> ClassLiteral<'db> { index.expression(comprehension.iterable(&module)), ); // TODO: Potential diagnostics resulting from the iterable are currently not reported. - let inferred_ty = iterable_ty.iterate(db); + let inferred_ty = + iterable_ty.iterate(db).homogeneous_element_type(db); union_of_inferred_types = union_of_inferred_types.add(inferred_ty); } diff --git a/crates/ty_python_semantic/src/types/generics.rs b/crates/ty_python_semantic/src/types/generics.rs index e6714330d9..4eafce6052 100644 --- a/crates/ty_python_semantic/src/types/generics.rs +++ b/crates/ty_python_semantic/src/types/generics.rs @@ -194,7 +194,7 @@ impl<'db> GenericContext<'db> { db: &'db dyn Db, tuple: TupleType<'db>, ) -> Specialization<'db> { - let element_type = UnionType::from_elements(db, tuple.tuple(db).all_elements()); + let element_type = tuple.tuple(db).homogeneous_element_type(db); Specialization::new(db, self, Box::from([element_type]), Some(tuple)) } diff --git a/crates/ty_python_semantic/src/types/infer.rs b/crates/ty_python_semantic/src/types/infer.rs index 000e6204dd..9152bf57a6 100644 --- a/crates/ty_python_semantic/src/types/infer.rs +++ b/crates/ty_python_semantic/src/types/infer.rs @@ -4533,6 +4533,7 @@ impl<'db, 'ast> TypeInferenceBuilder<'db, 'ast> { builder .infer_standalone_expression(iter_expr) .iterate(builder.db()) + .homogeneous_element_type(builder.db()) }); self.infer_body(body); @@ -4562,10 +4563,13 @@ impl<'db, 'ast> TypeInferenceBuilder<'db, 'ast> { } TargetKind::Single => { let iterable_type = self.infer_standalone_expression(iterable); - iterable_type.try_iterate(self.db()).unwrap_or_else(|err| { - err.report_diagnostic(&self.context, iterable_type, iterable.into()); - err.fallback_element_type(self.db()) - }) + iterable_type + .try_iterate(self.db()) + .map(|tuple| tuple.homogeneous_element_type(self.db())) + .unwrap_or_else(|err| { + err.report_diagnostic(&self.context, iterable_type, iterable.into()); + err.fallback_element_type(self.db()) + }) } } }; @@ -5671,6 +5675,7 @@ impl<'db, 'ast> TypeInferenceBuilder<'db, 'ast> { builder.infer_standalone_expression(iter_expr) } .iterate(builder.db()) + .homogeneous_element_type(builder.db()) }); for expr in ifs { self.infer_standalone_expression(expr); @@ -5719,10 +5724,13 @@ impl<'db, 'ast> TypeInferenceBuilder<'db, 'ast> { } TargetKind::Single => { let iterable_type = infer_iterable_type(); - iterable_type.try_iterate(self.db()).unwrap_or_else(|err| { - err.report_diagnostic(&self.context, iterable_type, iterable.into()); - err.fallback_element_type(self.db()) - }) + iterable_type + .try_iterate(self.db()) + .map(|tuple| tuple.homogeneous_element_type(self.db())) + .unwrap_or_else(|err| { + err.report_diagnostic(&self.context, iterable_type, iterable.into()); + err.fallback_element_type(self.db()) + }) } } }; @@ -6062,10 +6070,13 @@ impl<'db, 'ast> TypeInferenceBuilder<'db, 'ast> { } = starred; let iterable_type = self.infer_expression(value); - iterable_type.try_iterate(self.db()).unwrap_or_else(|err| { - err.report_diagnostic(&self.context, iterable_type, value.as_ref().into()); - err.fallback_element_type(self.db()) - }); + iterable_type + .try_iterate(self.db()) + .map(|tuple| tuple.homogeneous_element_type(self.db())) + .unwrap_or_else(|err| { + err.report_diagnostic(&self.context, iterable_type, value.as_ref().into()); + err.fallback_element_type(self.db()) + }); // TODO todo_type!("starred expression") @@ -6089,10 +6100,13 @@ impl<'db, 'ast> TypeInferenceBuilder<'db, 'ast> { } = yield_from; let iterable_type = self.infer_expression(value); - iterable_type.try_iterate(self.db()).unwrap_or_else(|err| { - err.report_diagnostic(&self.context, iterable_type, value.as_ref().into()); - err.fallback_element_type(self.db()) - }); + iterable_type + .try_iterate(self.db()) + .map(|tuple| tuple.homogeneous_element_type(self.db())) + .unwrap_or_else(|err| { + err.report_diagnostic(&self.context, iterable_type, value.as_ref().into()); + err.fallback_element_type(self.db()) + }); // TODO get type from `ReturnType` of generator todo_type!("Generic `typing.Generator` type") diff --git a/crates/ty_python_semantic/src/types/tuple.rs b/crates/ty_python_semantic/src/types/tuple.rs index 0075eddff1..268bd42d1e 100644 --- a/crates/ty_python_semantic/src/types/tuple.rs +++ b/crates/ty_python_semantic/src/types/tuple.rs @@ -1014,6 +1014,10 @@ impl Tuple { } impl<'db> Tuple> { + pub(crate) fn homogeneous_element_type(&self, db: &'db dyn Db) -> Type<'db> { + UnionType::from_elements(db, self.all_elements()) + } + /// Concatenates another tuple to the end of this tuple, returning a new tuple. pub(crate) fn concat(&self, db: &'db dyn Db, other: &Self) -> Self { match self { diff --git a/crates/ty_python_semantic/src/types/unpacker.rs b/crates/ty_python_semantic/src/types/unpacker.rs index e3c74dd25a..a85f2189b2 100644 --- a/crates/ty_python_semantic/src/types/unpacker.rs +++ b/crates/ty_python_semantic/src/types/unpacker.rs @@ -8,7 +8,7 @@ use ruff_python_ast::{self as ast, AnyNodeRef}; use crate::Db; use crate::semantic_index::ast_ids::node_key::ExpressionNodeKey; use crate::semantic_index::place::ScopeId; -use crate::types::tuple::{ResizeTupleError, Tuple, TupleLength, TupleUnpacker}; +use crate::types::tuple::{ResizeTupleError, Tuple, TupleLength, TupleSpec, TupleUnpacker}; use crate::types::{Type, TypeCheckDiagnostics, infer_expression_types}; use crate::unpack::{UnpackKind, UnpackValue}; @@ -64,14 +64,17 @@ impl<'db, 'ast> Unpacker<'db, 'ast> { value_type } } - UnpackKind::Iterable => value_type.try_iterate(self.db()).unwrap_or_else(|err| { - err.report_diagnostic( - &self.context, - value_type, - value.as_any_node_ref(self.db(), self.module()), - ); - err.fallback_element_type(self.db()) - }), + UnpackKind::Iterable => value_type + .try_iterate(self.db()) + .map(|tuple| tuple.homogeneous_element_type(self.db())) + .unwrap_or_else(|err| { + err.report_diagnostic( + &self.context, + value_type, + value.as_any_node_ref(self.db(), self.module()), + ); + err.fallback_element_type(self.db()) + }), UnpackKind::ContextManager => value_type.try_enter(self.db()).unwrap_or_else(|err| { err.report_diagnostic( &self.context, @@ -118,30 +121,10 @@ impl<'db, 'ast> Unpacker<'db, 'ast> { }; for ty in unpack_types.iter().copied() { - let tuple = match ty { - Type::Tuple(tuple_ty) => Cow::Borrowed(tuple_ty.tuple(self.db())), - Type::StringLiteral(string_literal_ty) => { - // We could go further and deconstruct to an array of `StringLiteral` - // with each individual character, instead of just an array of - // `LiteralString`, but there would be a cost and it's not clear that - // it's worth it. - Cow::Owned(Tuple::from_elements(std::iter::repeat_n( - Type::LiteralString, - string_literal_ty.python_len(self.db()), - ))) - } - Type::LiteralString => Cow::Owned(Tuple::homogeneous(Type::LiteralString)), - _ => { - // TODO: Update our iterator protocol machinery to return a tuple - // describing the returned values in more detail, when we can. - Cow::Owned(Tuple::homogeneous( - ty.try_iterate(self.db()).unwrap_or_else(|err| { - err.report_diagnostic(&self.context, ty, value_expr); - err.fallback_element_type(self.db()) - }), - )) - } - }; + let tuple = ty.try_iterate(self.db()).unwrap_or_else(|err| { + err.report_diagnostic(&self.context, ty, value_expr); + Cow::Owned(TupleSpec::homogeneous(err.fallback_element_type(self.db()))) + }); if let Err(err) = unpacker.unpack_tuple(tuple.as_ref()) { unpacker