[ty] add nested bindings to the semantic index

This commit is contained in:
Jack O'Connor 2025-08-07 18:04:48 -07:00
parent 827456f977
commit a6569ed960
13 changed files with 395 additions and 164 deletions

View File

@ -684,7 +684,8 @@ impl SemanticSyntaxContext for Checker<'_> {
| SemanticSyntaxErrorKind::LoadBeforeNonlocalDeclaration { .. } | SemanticSyntaxErrorKind::LoadBeforeNonlocalDeclaration { .. }
| SemanticSyntaxErrorKind::NonlocalAndGlobal(_) | SemanticSyntaxErrorKind::NonlocalAndGlobal(_)
| SemanticSyntaxErrorKind::AnnotatedGlobal(_) | SemanticSyntaxErrorKind::AnnotatedGlobal(_)
| SemanticSyntaxErrorKind::AnnotatedNonlocal(_) => { | SemanticSyntaxErrorKind::AnnotatedNonlocal(_)
| SemanticSyntaxErrorKind::NoBindingForNonlocal(_) => {
self.semantic_errors.borrow_mut().push(error); self.semantic_errors.borrow_mut().push(error);
} }
} }

View File

@ -989,6 +989,9 @@ impl Display for SemanticSyntaxError {
SemanticSyntaxErrorKind::AnnotatedNonlocal(name) => { SemanticSyntaxErrorKind::AnnotatedNonlocal(name) => {
write!(f, "annotated name `{name}` can't be nonlocal") write!(f, "annotated name `{name}` can't be nonlocal")
} }
SemanticSyntaxErrorKind::NoBindingForNonlocal(name) => {
write!(f, "no binding for nonlocal `{name}` found")
}
} }
} }
} }
@ -1346,6 +1349,20 @@ pub enum SemanticSyntaxErrorKind {
/// Represents a type annotation on a variable that's been declared nonlocal /// Represents a type annotation on a variable that's been declared nonlocal
AnnotatedNonlocal(String), AnnotatedNonlocal(String),
/// Represents a `nonlocal` statement that doesn't match any enclosing definition.
///
/// ## Examples
///
/// ```python
/// def f():
/// nonlocal x # error
///
/// y = 1
/// def f():
/// nonlocal y # error (the global `y` isn't considered)
/// ```
NoBindingForNonlocal(String),
} }
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, get_size2::GetSize)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, get_size2::GetSize)]

View File

@ -312,8 +312,7 @@ def outer() -> None:
set_x() set_x()
def inner() -> None: def inner() -> None:
# TODO: this should ideally be `None | Literal[1]`. Mypy and pyright support this. reveal_type(x) # revealed: None | Literal[1]
reveal_type(x) # revealed: None
inner() inner()
``` ```

View File

@ -201,7 +201,7 @@ x = 42
def f(): def f():
global x global x
reveal_type(x) # revealed: Unknown | Literal[42] reveal_type(x) # revealed: Unknown | Literal[42, "56"]
x = "56" x = "56"
reveal_type(x) # revealed: Literal["56"] reveal_type(x) # revealed: Literal["56"]
``` ```

View File

@ -78,7 +78,7 @@ reveal_type(module.__spec__) # revealed: Unknown | ModuleSpec | None
def nested_scope(): def nested_scope():
global __loader__ global __loader__
reveal_type(__loader__) # revealed: LoaderProtocol | None reveal_type(__loader__) # revealed: Unknown | LoaderProtocol | None
__loader__ = 56 # error: [invalid-assignment] "Object of type `Literal[56]` is not assignable to `LoaderProtocol | None`" __loader__ = 56 # error: [invalid-assignment] "Object of type `Literal[56]` is not assignable to `LoaderProtocol | None`"
``` ```

View File

@ -104,16 +104,20 @@ def a():
def d(): def d():
nonlocal x nonlocal x
reveal_type(x) # revealed: Literal[3, 2] # It's counterintuitive that 4 gets included here, since we haven't reached the
# binding in this scope, but this function might get called more than once.
reveal_type(x) # revealed: Literal[2, 3, 4]
x = 4 x = 4
reveal_type(x) # revealed: Literal[4] reveal_type(x) # revealed: Literal[4]
def e(): def e():
reveal_type(x) # revealed: Literal[4, 3, 2] reveal_type(x) # revealed: Literal[2, 3, 4]
``` ```
However, currently the union of types that we build is incomplete. We walk parent scopes, but not In addition to parent scopes, we also consider sibling scopes, child scopes,
sibling scopes, child scopes, second-cousin-once-removed scopes, etc: second-cousin-once-removed scopes, etc. However, we only fall back to bindings from other scopes
when a place is unbound or possibly unbound in the current scope. In other words, nested bindings
are only included in the "public type" of a variable:
```py ```py
def a(): def a():
@ -121,12 +125,14 @@ def a():
def b(): def b():
nonlocal x nonlocal x
x = 2 x = 2
reveal_type(x) # revealed: Literal[2]
def c(): def c():
def d(): def d():
nonlocal x nonlocal x
x = 3 x = 3
# TODO: This should include 2 and 3. reveal_type(x) # revealed: Literal[1, 2, 3]
# `x` is local here, so we don't look at nested scopes.
reveal_type(x) # revealed: Literal[1] reveal_type(x) # revealed: Literal[1]
``` ```
@ -365,10 +371,10 @@ def f():
x = 1 x = 1
def g(): def g():
nonlocal x nonlocal x
reveal_type(x) # revealed: Literal[1] reveal_type(x) # revealed: int
x += 1 x += 1
reveal_type(x) # revealed: Literal[2] reveal_type(x) # revealed: int
# TODO: should be `Unknown | Literal[1]` # TODO: should be `int`
reveal_type(x) # revealed: Literal[1] reveal_type(x) # revealed: Literal[1]
``` ```

View File

@ -651,6 +651,32 @@ fn place_by_id<'db>(
) -> PlaceAndQualifiers<'db> { ) -> PlaceAndQualifiers<'db> {
let use_def = use_def_map(db, scope); let use_def = use_def_map(db, scope);
// If there are any nested bindings (via `global` or `nonlocal` variables) for this symbol,
// infer them and union the results. Nested bindings aren't allowed to have declarations or
// qualifiers, and we can just union their inferred types.
let mut nested_bindings_union = UnionBuilder::new(db);
if let Some(symbol_id) = place_id.as_symbol() {
let current_place_table = place_table(db, scope);
let symbol = current_place_table.symbol(symbol_id);
for nested_file_scope_id in place_table(db, scope).nested_scopes_with_bindings(symbol_id) {
let nested_scope_id = nested_file_scope_id.to_scope_id(db, scope.file(db));
let nested_place_table = place_table(db, nested_scope_id);
let nested_symbol_id = nested_place_table
.symbol_id(symbol.name())
.expect("nested_scopes_with_bindings says this reference exists");
let nested_place = place_by_id(
db,
nested_scope_id,
ScopedPlaceId::Symbol(nested_symbol_id),
RequiresExplicitReExport::No,
ConsideredDefinitions::AllReachable,
);
if let Place::Type(nested_type, _) = nested_place.place {
nested_bindings_union.add_in_place(nested_type);
}
}
}
// If the place is declared, the public type is based on declarations; otherwise, it's based // If the place is declared, the public type is based on declarations; otherwise, it's based
// on inference from bindings. // on inference from bindings.
@ -725,11 +751,17 @@ fn place_by_id<'db>(
// TODO: We probably don't want to report `Bound` here. This requires a bit of // TODO: We probably don't want to report `Bound` here. This requires a bit of
// design work though as we might want a different behavior for stubs and for // design work though as we might want a different behavior for stubs and for
// normal modules. // normal modules.
Place::Type(declared_ty, Boundness::Bound) Place::Type(
nested_bindings_union.add(declared_ty).build(),
Boundness::Bound,
)
} }
// Place is possibly undeclared and (possibly) bound // Place is possibly undeclared and (possibly) bound
Place::Type(inferred_ty, boundness) => Place::Type( Place::Type(inferred_ty, boundness) => Place::Type(
UnionType::from_elements(db, [inferred_ty, declared_ty]), nested_bindings_union
.add(inferred_ty)
.add(declared_ty)
.build(),
if boundness_analysis == BoundnessAnalysis::AssumeBound { if boundness_analysis == BoundnessAnalysis::AssumeBound {
Boundness::Bound Boundness::Bound
} else { } else {
@ -740,7 +772,8 @@ fn place_by_id<'db>(
PlaceAndQualifiers { place, qualifiers } PlaceAndQualifiers { place, qualifiers }
} }
// Place is undeclared, return the union of `Unknown` with the inferred type // Place is undeclared, return the inferred type, and union it with `Unknown` if the place
// is public.
Ok(PlaceAndQualifiers { Ok(PlaceAndQualifiers {
place: Place::Unbound, place: Place::Unbound,
qualifiers: _, qualifiers: _,
@ -749,6 +782,20 @@ fn place_by_id<'db>(
let boundness_analysis = bindings.boundness_analysis; let boundness_analysis = bindings.boundness_analysis;
let mut inferred = place_from_bindings_impl(db, bindings, requires_explicit_reexport); let mut inferred = place_from_bindings_impl(db, bindings, requires_explicit_reexport);
// If there are nested bindings, union whatever we inferred from those into what we've
// inferred here.
if let Some(nested_bindings_type) = nested_bindings_union.try_build() {
match &mut inferred {
Place::Type(inferred_type, _) => {
*inferred_type =
UnionType::from_elements(db, [*inferred_type, nested_bindings_type]);
}
Place::Unbound => {
inferred = Place::Type(nested_bindings_type, Boundness::PossiblyUnbound);
}
}
}
if boundness_analysis == BoundnessAnalysis::AssumeBound { if boundness_analysis == BoundnessAnalysis::AssumeBound {
if let Place::Type(ty, Boundness::PossiblyUnbound) = inferred { if let Place::Type(ty, Boundness::PossiblyUnbound) = inferred {
inferred = Place::Type(ty, Boundness::Bound); inferred = Place::Type(ty, Boundness::Bound);

View File

@ -14,7 +14,7 @@ use ruff_python_ast::{self as ast, NodeIndex, PySourceType, PythonVersion};
use ruff_python_parser::semantic_errors::{ use ruff_python_parser::semantic_errors::{
SemanticSyntaxChecker, SemanticSyntaxContext, SemanticSyntaxError, SemanticSyntaxErrorKind, SemanticSyntaxChecker, SemanticSyntaxContext, SemanticSyntaxError, SemanticSyntaxErrorKind,
}; };
use ruff_text_size::TextRange; use ruff_text_size::{Ranged, TextRange};
use crate::ast_node_ref::AstNodeRef; use crate::ast_node_ref::AstNodeRef;
use crate::module_name::ModuleName; use crate::module_name::ModuleName;
@ -66,10 +66,40 @@ impl Loop {
} }
} }
struct ScopeInfo { struct ScopeInfo<'ast> {
file_scope_id: FileScopeId, file_scope_id: FileScopeId,
/// Current loop state; None if we are not currently visiting a loop /// Current loop state; None if we are not currently visiting a loop
current_loop: Option<Loop>, current_loop: Option<Loop>,
/// Symbols from scopes nested inside of this one that haven't yet been resolved to a
/// definition. They might end up resolving in this scope, or in an enclosing scope.
///
/// When we pop scopes, we merge any unresolved free variables into the parent scope's
/// collection. The reason we need to collect free variables for each scope separately, instead
/// of just having one map for the whole builder, is because of sibling scope arrangements like
/// this:
/// ```py
/// def f():
/// def g():
/// # When we pop `g`, this `x` goes in `f`'s set of free variables.
/// nonlocal x
/// def h():
/// # When we pop `h`, this binding of `x` won't resolve the free variable from `g`,
/// # because it's not in `h`'s set of free variables.
/// x = 1
/// # When we pop `f`, this binding of `x` will resolve the free variable from `g`.
/// x = 1
/// ```
free_variables: FxHashMap<ast::name::Name, Vec<FreeVariable<'ast>>>,
}
struct FreeVariable<'ast> {
scope_id: FileScopeId,
// If this variable is `nonlocal`, then this is `Some` reference to its identifier in the
// `nonlocal` statement. In that case, it's an error if we don't resolve it before we reach the
// global scope (or if we resolve it in a scope where it's `global`).
nonlocal_identifier: Option<&'ast ast::Identifier>,
} }
pub(super) struct SemanticIndexBuilder<'db, 'ast> { pub(super) struct SemanticIndexBuilder<'db, 'ast> {
@ -78,7 +108,7 @@ pub(super) struct SemanticIndexBuilder<'db, 'ast> {
file: File, file: File,
source_type: PySourceType, source_type: PySourceType,
module: &'ast ParsedModuleRef, module: &'ast ParsedModuleRef,
scope_stack: Vec<ScopeInfo>, scope_stack: Vec<ScopeInfo<'ast>>,
/// The assignments we're currently visiting, with /// The assignments we're currently visiting, with
/// the most recent visit at the end of the Vec /// the most recent visit at the end of the Vec
current_assignments: Vec<CurrentAssignment<'ast, 'db>>, current_assignments: Vec<CurrentAssignment<'ast, 'db>>,
@ -167,13 +197,13 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> {
builder builder
} }
fn current_scope_info(&self) -> &ScopeInfo { fn current_scope_info(&self) -> &ScopeInfo<'ast> {
self.scope_stack self.scope_stack
.last() .last()
.expect("SemanticIndexBuilder should have created a root scope") .expect("SemanticIndexBuilder should have created a root scope")
} }
fn current_scope_info_mut(&mut self) -> &mut ScopeInfo { fn current_scope_info_mut(&mut self) -> &mut ScopeInfo<'ast> {
self.scope_stack self.scope_stack
.last_mut() .last_mut()
.expect("SemanticIndexBuilder should have created a root scope") .expect("SemanticIndexBuilder should have created a root scope")
@ -275,6 +305,7 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> {
self.scope_stack.push(ScopeInfo { self.scope_stack.push(ScopeInfo {
file_scope_id, file_scope_id,
current_loop: None, current_loop: None,
free_variables: FxHashMap::default(),
}); });
} }
@ -446,6 +477,7 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> {
let ScopeInfo { let ScopeInfo {
file_scope_id: popped_scope_id, file_scope_id: popped_scope_id,
free_variables: mut popped_free_variables,
.. ..
} = self } = self
.scope_stack .scope_stack
@ -458,13 +490,165 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> {
let popped_scope = &mut self.scopes[popped_scope_id]; let popped_scope = &mut self.scopes[popped_scope_id];
popped_scope.extend_descendants(children_end); popped_scope.extend_descendants(children_end);
let is_eager = popped_scope.is_eager();
let kind = popped_scope.kind();
if popped_scope.is_eager() { if is_eager {
self.record_eager_snapshots(popped_scope_id); self.record_eager_snapshots(popped_scope_id);
} else { } else {
self.record_lazy_snapshots(popped_scope_id); self.record_lazy_snapshots(popped_scope_id);
} }
// If we've popped a scope that free variables from nested (previously popped) scopes can
// refer to (i.e. not a class body), try to resolve outstanding free variables.
if kind.is_function_like() || popped_scope_id.is_global() {
// Look up each free variable name in the popped scope, and see if we've resolved it.
// Collect these in a separate list, to avoid borrowck woes.
struct Resolution {
name: ast::name::Name,
symbol_id: ScopedSymbolId,
// Either the symbol is declared `global`, or this is the global scope.
is_global: bool,
}
let mut resolutions = Vec::new();
for name in popped_free_variables.keys() {
if let Some(symbol_id) = self.place_tables[popped_scope_id].symbol_id(name.as_str())
{
// If a name is local or `global` here (i.e. bound or declared, and not marked
// `nonlocal`), then free variables of that name resolve here. Note that
// popping scopes in the normal stack order means that free variables resolve
// (correctly) to the closest scope with a matching definition.
let symbol = self.place_tables[popped_scope_id].symbol(symbol_id);
if symbol.is_local() || symbol.is_global() {
resolutions.push(Resolution {
name: name.clone(),
symbol_id,
is_global: symbol.is_global() || popped_scope_id.is_global(),
});
}
}
}
// Remove each resolved name along with all its references from
// `popped_free_variables`. For each reference, if it's bound in its nested scope, add
// an entry to `nested_scopes_with_bindings` in the popped scope's symbol table. This
// is also where we flag any `nonlocal` statements that resolve to globals, which is a
// semantic syntax error.
for resolution in resolutions {
let resolved_variables = popped_free_variables.remove(&resolution.name).unwrap();
for FreeVariable {
scope_id: nested_scope_id,
nonlocal_identifier,
} in resolved_variables
{
let nested_symbol_is_nonlocal = nonlocal_identifier.is_some();
if nested_symbol_is_nonlocal && resolution.is_global {
// If the symbol is declared `nonlocal` in the nested scope (rather than
// just used without a local binding or declaration), then it's a syntax
// error for it to resolve to the global scope or to a `global` statement.
self.report_semantic_error(SemanticSyntaxError {
kind: SemanticSyntaxErrorKind::NoBindingForNonlocal(
resolution.name.clone().into(),
),
range: nonlocal_identifier.unwrap().range(),
python_version: self.python_version,
});
} else {
let nested_place_table = &self.place_tables[nested_scope_id];
let nested_symbol_id =
nested_place_table.symbol_id(&resolution.name).unwrap();
let nested_symbol = nested_place_table.symbol(nested_symbol_id);
if nested_symbol.is_bound() {
self.place_tables[popped_scope_id].add_nested_scope_with_binding(
resolution.symbol_id,
nested_scope_id,
);
}
}
}
}
}
if popped_scope_id.is_global() {
// If we've popped the global/module scope, any remaining free variables are
// unresolved. The common case for these is built-ins like `print`, and rarer cases are
// things like direct insertions into `globals()`. However, if any `nonlocal` free
// variables are still unresolved, that's another syntax error.
debug_assert!(self.scope_stack.is_empty());
for (name, variables) in &popped_free_variables {
for variable in variables {
if let Some(nonlocal_identifier) = variable.nonlocal_identifier {
self.report_semantic_error(SemanticSyntaxError {
kind: SemanticSyntaxErrorKind::NoBindingForNonlocal(
name.clone().into(),
),
range: nonlocal_identifier.range(),
python_version: self.python_version,
});
}
}
}
} else {
// Otherwise, add any still-unresolved free variables from nested scopes to the parent
// scope's collection, and walk the popped scope's symbol table to collect any new free
// variables. During that walk, also record references to global variables.
let parent_free_variables = &mut self
.scope_stack
.last_mut() // current_scope_info_mut() would be a borrock error here
.expect("this is not the global/module scope")
.free_variables;
for (name, variables) in popped_free_variables {
parent_free_variables
.entry(name)
.or_default()
.extend(variables);
}
let popped_place_table = &self.place_tables[popped_scope_id];
let mut bound_global_symbols = Vec::new();
for symbol in popped_place_table.symbols() {
// Collect new implicit (not `nonlocal`) free variables.
//
// NOTE: Because these variables aren't bound (won't wind up in
// `nested_scopes_with_bindings`) and aren't `nonlocal` (can't trigger `nonlocal`
// syntax errors), collecting them currently has no effect. We could consider
// removing this bit and renaming `free_variables` to say `unresolved_nonlocals`?
if symbol.is_used()
&& !symbol.is_bound()
&& !symbol.is_declared()
&& !symbol.is_global()
// `nonlocal` variables are handled in `visit_stmt`, which lets us stash an AST
// reference.
&& !symbol.is_nonlocal()
{
parent_free_variables
.entry(symbol.name().clone())
.or_default()
.push(FreeVariable {
scope_id: popped_scope_id,
nonlocal_identifier: None,
});
}
// Record bindings of global variables. Put these in a temporary Vec as another
// borrowck workaround.
if symbol.is_global() && symbol.is_bound() {
bound_global_symbols.push(symbol.name().clone());
}
}
// Update the global scope with those references to globals, now that
// `popped_place_table` and `parent_free_variables` are no longer borrowed.
for symbol_name in bound_global_symbols {
// Add this symbol to the global scope, if it isn't there already.
let global_symbol_id = self.add_symbol_to_scope(symbol_name, FileScopeId::global());
// Update the global place table with this reference. Doing this here rather than
// when we first encounter the `global` statement lets us see whether the symbol is
// bound.
self.place_tables[FileScopeId::global()]
.add_nested_scope_with_binding(global_symbol_id, popped_scope_id);
}
}
popped_scope_id popped_scope_id
} }
@ -512,22 +696,36 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> {
/// Add a symbol to the place table and the use-def map. /// Add a symbol to the place table and the use-def map.
/// Return the [`ScopedPlaceId`] that uniquely identifies the symbol in both. /// Return the [`ScopedPlaceId`] that uniquely identifies the symbol in both.
fn add_symbol(&mut self, name: Name) -> ScopedSymbolId { fn add_symbol_to_scope(&mut self, name: Name, scope_id: FileScopeId) -> ScopedSymbolId {
let (symbol_id, added) = self.current_place_table_mut().add_symbol(Symbol::new(name)); let (symbol_id, added) = self.place_tables[scope_id].add_symbol(Symbol::new(name));
if added { if added {
self.current_use_def_map_mut().add_place(symbol_id.into()); self.use_def_maps[scope_id].add_place(symbol_id.into());
} }
symbol_id symbol_id
} }
fn add_symbol(&mut self, name: Name) -> ScopedSymbolId {
self.add_symbol_to_scope(name, self.current_scope())
}
/// Add a place to the place table and the use-def map.
/// Return the [`ScopedPlaceId`] that uniquely identifies the place in both.
fn add_place_to_scope(
&mut self,
place_expr: PlaceExpr,
scope_id: FileScopeId,
) -> ScopedPlaceId {
let (place_id, added) = self.place_tables[scope_id].add_place(place_expr);
if added {
self.use_def_maps[scope_id].add_place(place_id);
}
place_id
}
/// Add a place to the place table and the use-def map. /// Add a place to the place table and the use-def map.
/// Return the [`ScopedPlaceId`] that uniquely identifies the place in both. /// Return the [`ScopedPlaceId`] that uniquely identifies the place in both.
fn add_place(&mut self, place_expr: PlaceExpr) -> ScopedPlaceId { fn add_place(&mut self, place_expr: PlaceExpr) -> ScopedPlaceId {
let (place_id, added) = self.current_place_table_mut().add_place(place_expr); self.add_place_to_scope(place_expr, self.current_scope())
if added {
self.current_use_def_map_mut().add_place(place_id);
}
place_id
} }
#[track_caller] #[track_caller]
@ -2104,10 +2302,20 @@ impl<'ast> Visitor<'ast> for SemanticIndexBuilder<'_, 'ast> {
range: name.range, range: name.range,
python_version: self.python_version, python_version: self.python_version,
}); });
// Never mark a symbol both global and nonlocal, even in this error case.
continue;
}
// Assuming none of the rules above are violated, repeated `global`
// declarations are allowed and ignored.
if symbol.is_global() {
continue;
} }
self.current_place_table_mut() self.current_place_table_mut()
.symbol_mut(symbol_id) .symbol_mut(symbol_id)
.mark_global(); .mark_global();
// We'll add this symbol to the global scope in `pop_scope`, at the same time
// we're collecting free variables. That lets us record whether it's bound in
// this scope, which we don't know yet.
} }
walk_stmt(self, stmt); walk_stmt(self, stmt);
} }
@ -2137,19 +2345,38 @@ impl<'ast> Visitor<'ast> for SemanticIndexBuilder<'_, 'ast> {
range: name.range, range: name.range,
python_version: self.python_version, python_version: self.python_version,
}); });
// Never mark a symbol both global and nonlocal, even in this error case.
continue;
}
// Check whether this is the module scope, where `nonlocal` isn't allowed.
let scope_id = self.current_scope();
if scope_id.is_global() {
// The SemanticSyntaxChecker will report an error for this.
continue;
}
// Assuming none of the rules above are violated, repeated `nonlocal`
// declarations are allowed and ignored.
if symbol.is_nonlocal() {
continue;
} }
// The variable is required to exist in an enclosing scope, but that definition
// might come later. For example, this is example legal, but we can't check
// that here, because we haven't gotten to `x = 1`:
// ```py
// def f():
// def g():
// nonlocal x
// x = 1
// ```
self.current_place_table_mut() self.current_place_table_mut()
.symbol_mut(symbol_id) .symbol_mut(symbol_id)
.mark_nonlocal(); .mark_nonlocal();
// Add this symbol to the parent scope's set of free variables. (It would also
// work to add it to this scope's set, which will get folded into the parent's
// in `pop_scope`. But since it can't possibly resolve here, we might as well
// spare an allocation.) We checked above that we aren't in the module scope,
// so there's definitely a parent scope.
let parent_scope_index = self.scope_stack.len() - 2;
let parent_scope_info = &mut self.scope_stack[parent_scope_index];
parent_scope_info
.free_variables
.entry(name.id.clone())
.or_default()
.push(FreeVariable {
scope_id,
nonlocal_identifier: Some(name),
});
} }
walk_stmt(self, stmt); walk_stmt(self, stmt);
} }
@ -2177,6 +2404,9 @@ impl<'ast> Visitor<'ast> for SemanticIndexBuilder<'_, 'ast> {
// foo() // foo()
// ``` // ```
symbol.mark_bound(); symbol.mark_bound();
// TODO: `mark_used` might be redundant here, since `walk_stmt` visits
// the deleted expression, and `visit_expr` considers `del` to be a
// use.
symbol.mark_used(); symbol.mark_used();
} }

View File

@ -39,11 +39,6 @@ impl Member {
self.flags.contains(MemberFlags::IS_BOUND) self.flags.contains(MemberFlags::IS_BOUND)
} }
/// Is the place declared in its containing scope?
pub(crate) fn is_declared(&self) -> bool {
self.flags.contains(MemberFlags::IS_DECLARED)
}
pub(super) fn mark_bound(&mut self) { pub(super) fn mark_bound(&mut self) {
self.insert_flags(MemberFlags::IS_BOUND); self.insert_flags(MemberFlags::IS_BOUND);
} }

View File

@ -80,13 +80,6 @@ impl<'a> PlaceExprRef<'a> {
matches!(self, PlaceExprRef::Symbol(_)) matches!(self, PlaceExprRef::Symbol(_))
} }
pub(crate) fn is_declared(self) -> bool {
match self {
Self::Symbol(symbol) => symbol.is_declared(),
Self::Member(member) => member.is_declared(),
}
}
pub(crate) const fn is_bound(self) -> bool { pub(crate) const fn is_bound(self) -> bool {
match self { match self {
PlaceExprRef::Symbol(symbol) => symbol.is_bound(), PlaceExprRef::Symbol(symbol) => symbol.is_bound(),
@ -233,6 +226,14 @@ impl PlaceTable {
) -> Option<ScopedMemberId> { ) -> Option<ScopedMemberId> {
self.members.place_id_by_instance_attribute_name(name) self.members.place_id_by_instance_attribute_name(name)
} }
pub(crate) fn nested_scopes_with_bindings(&self, symbol_id: ScopedSymbolId) -> &[FileScopeId] {
if let Some(scopes) = self.symbols.nested_scopes_with_bindings.get(&symbol_id) {
scopes
} else {
&[]
}
}
} }
#[derive(Default)] #[derive(Default)]
@ -375,6 +376,15 @@ impl PlaceTableBuilder {
} }
} }
pub(super) fn add_nested_scope_with_binding(
&mut self,
this_scope_symbol_id: ScopedSymbolId,
nested_scope: FileScopeId,
) {
self.symbols
.add_nested_scope_with_binding(this_scope_symbol_id, nested_scope);
}
pub(crate) fn finish(self) -> PlaceTable { pub(crate) fn finish(self) -> PlaceTable {
PlaceTable { PlaceTable {
symbols: self.symbols.build(), symbols: self.symbols.build(),

View File

@ -1,8 +1,9 @@
use crate::semantic_index::scope::FileScopeId;
use bitflags::bitflags; use bitflags::bitflags;
use hashbrown::hash_table::Entry; use hashbrown::hash_table::Entry;
use ruff_index::{IndexVec, newtype_index}; use ruff_index::{IndexVec, newtype_index};
use ruff_python_ast::name::Name; use ruff_python_ast::name::Name;
use rustc_hash::FxHasher; use rustc_hash::{FxHashMap, FxHasher};
use std::hash::{Hash as _, Hasher as _}; use std::hash::{Hash as _, Hasher as _};
use std::ops::{Deref, DerefMut}; use std::ops::{Deref, DerefMut};
@ -156,6 +157,12 @@ pub(super) struct SymbolTable {
/// ///
/// Uses a hash table to avoid storing the name twice. /// Uses a hash table to avoid storing the name twice.
map: hashbrown::HashTable<ScopedSymbolId>, map: hashbrown::HashTable<ScopedSymbolId>,
// Variables defined in this scope, and not marked `global` or `nonlocal` here, which are also
// bound in nested scopes (by being marked `global` or `nonlocal` there). These (keys) are
// similar to what CPython calls "cell" variables, except that this scope may also be the
// global scope.
pub(super) nested_scopes_with_bindings: FxHashMap<ScopedSymbolId, Vec<FileScopeId>>,
} }
impl SymbolTable { impl SymbolTable {
@ -245,12 +252,30 @@ impl SymbolTableBuilder {
} }
} }
pub(super) fn add_nested_scope_with_binding(
&mut self,
this_scope_symbol_id: ScopedSymbolId,
nested_scope: FileScopeId,
) {
let bindings = self
.table
.nested_scopes_with_bindings
.entry(this_scope_symbol_id)
.or_default();
debug_assert!(
!bindings.contains(&nested_scope),
"the same scoped symbol shouldn't get added more than once",
);
bindings.push(nested_scope);
}
pub(super) fn build(self) -> SymbolTable { pub(super) fn build(self) -> SymbolTable {
let mut table = self.table; let mut table = self.table;
table.symbols.shrink_to_fit(); table.symbols.shrink_to_fit();
table table
.map .map
.shrink_to_fit(|id| SymbolTable::hash_name(&table.symbols[*id].name)); .shrink_to_fit(|id| SymbolTable::hash_name(&table.symbols[*id].name));
table.nested_scopes_with_bindings.shrink_to_fit();
table table
} }
} }

View File

@ -202,7 +202,7 @@ enum ReduceResult<'db> {
// TODO increase this once we extend `UnionElement` throughout all union/intersection // TODO increase this once we extend `UnionElement` throughout all union/intersection
// representations, so that we can make large unions of literals fast in all operations. // representations, so that we can make large unions of literals fast in all operations.
const MAX_UNION_LITERALS: usize = 200; const MAX_UNION_LITERALS: usize = 100;
pub(crate) struct UnionBuilder<'db> { pub(crate) struct UnionBuilder<'db> {
elements: Vec<UnionElement<'db>>, elements: Vec<UnionElement<'db>>,

View File

@ -37,7 +37,6 @@
//! be considered a bug.) //! be considered a bug.)
use itertools::{Either, Itertools}; use itertools::{Either, Itertools};
use ruff_db::diagnostic::{Annotation, DiagnosticId, Severity};
use ruff_db::files::File; use ruff_db::files::File;
use ruff_db::parsed::{ParsedModuleRef, parsed_module}; use ruff_db::parsed::{ParsedModuleRef, parsed_module};
use ruff_python_ast::visitor::{Visitor, walk_expr}; use ruff_python_ast::visitor::{Visitor, walk_expr};
@ -85,7 +84,7 @@ use crate::semantic_index::place::{PlaceExpr, PlaceExprRef};
use crate::semantic_index::scope::{ use crate::semantic_index::scope::{
FileScopeId, NodeWithScopeKind, NodeWithScopeRef, ScopeId, ScopeKind, FileScopeId, NodeWithScopeKind, NodeWithScopeRef, ScopeId, ScopeKind,
}; };
use crate::semantic_index::symbol::ScopedSymbolId; use crate::semantic_index::symbol::{ScopedSymbolId, Symbol};
use crate::semantic_index::{ use crate::semantic_index::{
ApplicableConstraints, EnclosingSnapshotResult, SemanticIndex, place_table, semantic_index, ApplicableConstraints, EnclosingSnapshotResult, SemanticIndex, place_table, semantic_index,
}; };
@ -2548,9 +2547,9 @@ impl<'db, 'ast> TypeInferenceBuilder<'db, 'ast> {
ast::Stmt::Raise(raise) => self.infer_raise_statement(raise), ast::Stmt::Raise(raise) => self.infer_raise_statement(raise),
ast::Stmt::Return(ret) => self.infer_return_statement(ret), ast::Stmt::Return(ret) => self.infer_return_statement(ret),
ast::Stmt::Delete(delete) => self.infer_delete_statement(delete), ast::Stmt::Delete(delete) => self.infer_delete_statement(delete),
ast::Stmt::Nonlocal(nonlocal) => self.infer_nonlocal_statement(nonlocal),
ast::Stmt::Global(global) => self.infer_global_statement(global), ast::Stmt::Global(global) => self.infer_global_statement(global),
ast::Stmt::Break(_) ast::Stmt::Nonlocal(_)
| ast::Stmt::Break(_)
| ast::Stmt::Continue(_) | ast::Stmt::Continue(_)
| ast::Stmt::Pass(_) | ast::Stmt::Pass(_)
| ast::Stmt::IpyEscapeCommand(_) => { | ast::Stmt::IpyEscapeCommand(_) => {
@ -5355,75 +5354,6 @@ impl<'db, 'ast> TypeInferenceBuilder<'db, 'ast> {
} }
} }
fn infer_nonlocal_statement(&mut self, nonlocal: &ast::StmtNonlocal) {
let ast::StmtNonlocal {
node_index: _,
range,
names,
} = nonlocal;
let db = self.db();
let scope = self.scope();
let file_scope_id = scope.file_scope_id(db);
let current_file = self.file();
'names: for name in names {
// Walk up parent scopes looking for a possible enclosing scope that may have a
// definition of this name visible to us. Note that we skip the scope containing the
// use that we are resolving, since we already looked for the place there up above.
for (enclosing_scope_file_id, _) in self.index.ancestor_scopes(file_scope_id).skip(1) {
// Class scopes are not visible to nested scopes, and `nonlocal` cannot refer to
// globals, so check only function-like scopes.
let enclosing_scope_id = enclosing_scope_file_id.to_scope_id(db, current_file);
if !enclosing_scope_id.is_function_like(db) {
continue;
}
let enclosing_place_table = self.index.place_table(enclosing_scope_file_id);
let Some(enclosing_symbol_id) = enclosing_place_table.symbol_id(name) else {
// This scope doesn't define this name. Keep going.
continue;
};
let enclosing_symbol = enclosing_place_table.symbol(enclosing_symbol_id);
// We've found a definition for this name in an enclosing function-like scope.
// Either this definition is the valid place this name refers to, or else we'll
// emit a syntax error. Either way, we won't walk any more enclosing scopes. Note
// that there are differences here compared to `infer_place_load`: A regular load
// (e.g. `print(x)`) is allowed to refer to a global variable (e.g. `x = 1` in the
// global scope), and similarly it's allowed to refer to a local variable in an
// enclosing function that's declared `global` (e.g. `global x`). However, the
// `nonlocal` keyword can't refer to global variables (that's a `SyntaxError`), and
// it also can't refer to local variables in enclosing functions that are declared
// `global` (also a `SyntaxError`).
if enclosing_symbol.is_global() {
// A "chain" of `nonlocal` statements is "broken" by a `global` statement. Stop
// looping and report that this `nonlocal` statement is invalid.
break;
}
if !enclosing_symbol.is_bound()
&& !enclosing_symbol.is_declared()
&& !enclosing_symbol.is_nonlocal()
{
debug_assert!(enclosing_symbol.is_used());
// The name is only referenced here, not defined. Keep going.
continue;
}
// We found a definition. We've checked that the name isn't `global` in this scope,
// but it's ok if it's `nonlocal`. If a "chain" of `nonlocal` statements fails to
// lead to a valid binding, the outermost one will be an error; we don't need to
// walk the whole chain for each one.
continue 'names;
}
// There's no matching binding in an enclosing scope. This `nonlocal` statement is
// invalid.
if let Some(builder) = self
.context
.report_diagnostic(DiagnosticId::InvalidSyntax, Severity::Error)
{
builder
.into_diagnostic(format_args!("no binding for nonlocal `{name}` found"))
.annotate(Annotation::primary(self.context.span(*range)));
}
}
}
fn module_type_from_name(&self, module_name: &ModuleName) -> Option<Type<'db>> { fn module_type_from_name(&self, module_name: &ModuleName) -> Option<Type<'db>> {
resolve_module(self.db(), module_name) resolve_module(self.db(), module_name)
.map(|module| Type::module_literal(self.db(), self.file(), module)) .map(|module| Type::module_literal(self.db(), self.file(), module))
@ -6724,8 +6654,6 @@ impl<'db, 'ast> TypeInferenceBuilder<'db, 'ast> {
// definition of this name visible to us (would be `LOAD_DEREF` at runtime.) // definition of this name visible to us (would be `LOAD_DEREF` at runtime.)
// Note that we skip the scope containing the use that we are resolving, since we // Note that we skip the scope containing the use that we are resolving, since we
// already looked for the place there up above. // already looked for the place there up above.
let mut nonlocal_union_builder = UnionBuilder::new(db);
let mut found_some_definition = false;
for (enclosing_scope_file_id, _) in self.index.ancestor_scopes(file_scope_id).skip(1) { for (enclosing_scope_file_id, _) in self.index.ancestor_scopes(file_scope_id).skip(1) {
// Class scopes are not visible to nested scopes, and we need to handle global // Class scopes are not visible to nested scopes, and we need to handle global
// scope differently (because an unbound name there falls back to builtins), so // scope differently (because an unbound name there falls back to builtins), so
@ -6821,22 +6749,17 @@ impl<'db, 'ast> TypeInferenceBuilder<'db, 'ast> {
// see a `global` declaration, stop walking scopes and proceed to the global // see a `global` declaration, stop walking scopes and proceed to the global
// handling below. (If we're walking from a prior/inner scope where this variable // handling below. (If we're walking from a prior/inner scope where this variable
// is `nonlocal`, then this is a semantic syntax error, but we don't enforce that // is `nonlocal`, then this is a semantic syntax error, but we don't enforce that
// here. See `infer_nonlocal_statement`.) // here. See `SemanticSyntaxBuilder::pop_scope`.)
if enclosing_place if enclosing_place.as_symbol().is_some_and(Symbol::is_global) {
.as_symbol()
.is_some_and(super::super::semantic_index::symbol::Symbol::is_global)
{
break; break;
} }
// If the name is declared or bound in this scope, figure out its type. This might // If we've reached the scope where the name is local (bound or declared, and not
// resolve the name and end the walk. But if the name is declared `nonlocal` in // marked `global` or `nonlocal`), end the walk and infer its "public" type. This
// this scope, we'll keep walking enclosing scopes and union this type with the // considers bindings from nested scopes, not only those we just walked but also
// other types we find. (It's a semantic syntax error to declare a type for a // sibling/cousin scopes.
// `nonlocal` variable, but we don't enforce that here. See the if enclosing_place.as_symbol().is_some_and(Symbol::is_local) {
// `ast::Stmt::AnnAssign` handling in `SemanticIndexBuilder::visit_stmt`.) return place(
if enclosing_place.is_bound() || enclosing_place.is_declared() {
let local_place_and_qualifiers = place(
db, db,
enclosing_scope_id, enclosing_scope_id,
place_expr, place_expr,
@ -6849,28 +6772,6 @@ impl<'db, 'ast> TypeInferenceBuilder<'db, 'ast> {
&constraint_keys, &constraint_keys,
) )
}); });
// We could have Place::Unbound here, despite the checks above, for example if
// this scope contains a `del` statement but no binding or declaration.
if let Place::Type(type_, boundness) = local_place_and_qualifiers.place {
nonlocal_union_builder.add_in_place(type_);
// `ConsideredDefinitions::AllReachable` never returns PossiblyUnbound
debug_assert_eq!(boundness, Boundness::Bound);
found_some_definition = true;
}
if !enclosing_place
.as_symbol()
.is_some_and(super::super::semantic_index::symbol::Symbol::is_nonlocal)
{
// We've reached a function-like scope that marks this name bound or
// declared but doesn't mark it `nonlocal`. The name is therefore resolved,
// and we won't consider any scopes outside of this one.
return if found_some_definition {
Place::Type(nonlocal_union_builder.build(), Boundness::Bound).into()
} else {
Place::Unbound.into()
};
}
} }
} }