stop tracking implicit (read-only) free variables

This commit is contained in:
Jack O'Connor 2025-08-08 12:54:02 -07:00
parent 56e550176c
commit b82bbcd51c
1 changed files with 76 additions and 104 deletions

View File

@ -14,7 +14,7 @@ use ruff_python_ast::{self as ast, NodeIndex, PySourceType, PythonVersion};
use ruff_python_parser::semantic_errors::{ use ruff_python_parser::semantic_errors::{
SemanticSyntaxChecker, SemanticSyntaxContext, SemanticSyntaxError, SemanticSyntaxErrorKind, SemanticSyntaxChecker, SemanticSyntaxContext, SemanticSyntaxError, SemanticSyntaxErrorKind,
}; };
use ruff_text_size::{Ranged, TextRange}; use ruff_text_size::TextRange;
use crate::ast_node_ref::AstNodeRef; use crate::ast_node_ref::AstNodeRef;
use crate::module_name::ModuleName; use crate::module_name::ModuleName;
@ -72,34 +72,36 @@ struct ScopeInfo {
/// Current loop state; None if we are not currently visiting a loop /// Current loop state; None if we are not currently visiting a loop
current_loop: Option<Loop>, current_loop: Option<Loop>,
/// Symbols from scopes nested inside of this one that haven't yet been resolved to a /// `nonlocal` variables from scopes nested inside of this one that haven't yet been resolved
/// definition. They might end up resolving in this scope, or in an enclosing scope. /// to a definition. They might end up resolving in this scope, or in an enclosing scope.
/// ///
/// When we pop scopes, we merge any unresolved free variables into the parent scope's /// When we pop scopes, we merge any unresolved nonlocals into the parent scope's collection.
/// collection. The reason we need to collect free variables for each scope separately, instead /// The reason we need to track them for each scope separately, instead of using one map for
/// of just having one map for the whole builder, is because of sibling scope arrangements like /// the whole builder, is because of sibling scope arrangements like this:
/// this:
/// ```py /// ```py
/// def f(): /// def f():
/// def g(): /// def g():
/// # When we pop `g`, this `x` goes in `f`'s set of free variables. /// # When we pop `g`, this `x` goes in `f`'s set of unresolved nonlocals.
/// nonlocal x /// nonlocal x
/// def h(): /// def h():
/// # When we pop `h`, this binding of `x` won't resolve the free variable from `g`, /// # When we pop `h`, this binding of `x` will *not* resolve the nonlocal from `g`,
/// # because it's not in `h`'s set of free variables. /// # because it's not in `h`'s set of unresolved nonlocals.
/// x = 1 /// x = 1
/// # When we pop `f`, this binding of `x` will resolve the free variable from `g`. /// # When we pop `f`, this binding of `x` will resolve the nonlocal from `g`.
/// x = 1 /// x = 1
/// ``` /// ```
free_variables: FxHashMap<ast::name::Name, Vec<FreeVariable>>, ///
/// Currently we only track explicit nonlocals, because ordinary "free" variables referring to
/// enclosing scopes can't be bound and can't trigger semantic syntax errors. Callers who need
/// to resolve free variables (e.g. `infer_place_load`) need to walk parent scopes until they
/// find one where `Symbol::is_local` or `Symbol::is_global` is true. If we wanted to
/// pre-record more of that information, we could expand this.
unresolved_nonlocals: FxHashMap<ast::name::Name, Vec<UnresolvedNonlocal>>,
} }
struct FreeVariable { struct UnresolvedNonlocal {
scope_id: FileScopeId, scope_id: FileScopeId,
// If this variable is `nonlocal`, then this is `Some` reference to its identifier in the range: TextRange,
// `nonlocal` statement. In that case, it's an error if we don't resolve it before we reach the
// global scope (or if we resolve it in a scope where it's `global`).
nonlocal_identifier: Option<TextRange>,
} }
pub(super) struct SemanticIndexBuilder<'db, 'ast> { pub(super) struct SemanticIndexBuilder<'db, 'ast> {
@ -305,7 +307,7 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> {
self.scope_stack.push(ScopeInfo { self.scope_stack.push(ScopeInfo {
file_scope_id, file_scope_id,
current_loop: None, current_loop: None,
free_variables: FxHashMap::default(), unresolved_nonlocals: FxHashMap::default(),
}); });
} }
@ -477,7 +479,7 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> {
let ScopeInfo { let ScopeInfo {
file_scope_id: popped_scope_id, file_scope_id: popped_scope_id,
free_variables: mut popped_free_variables, unresolved_nonlocals: mut popped_unresolved_nonlocals,
.. ..
} = self } = self
.scope_stack .scope_stack
@ -499,42 +501,39 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> {
self.record_lazy_snapshots(popped_scope_id); self.record_lazy_snapshots(popped_scope_id);
} }
// If we've popped a scope that free variables from nested (previously popped) scopes can // If we've popped a scope that nonlocals from nested (previously popped) scopes can refer
// refer to (i.e. not a class body), try to resolve outstanding free variables. // to (i.e. not a class body), try to resolve them.
if kind.is_function_like() || popped_scope_id.is_global() { if kind.is_function_like() || popped_scope_id.is_global() {
popped_free_variables.retain(|name, references| { popped_unresolved_nonlocals.retain(|name, nonlocals_with_this_name| {
let popped_place_table = &self.place_tables[popped_scope_id]; let popped_place_table = &self.place_tables[popped_scope_id];
if let Some(symbol_id) = popped_place_table.symbol_id(name.as_str()) { if let Some(symbol_id) = popped_place_table.symbol_id(name.as_str()) {
let symbol = popped_place_table.symbol(symbol_id); let symbol = popped_place_table.symbol(symbol_id);
let symbol_is_resolved = symbol.is_local() || symbol.is_global(); let symbol_is_resolved = symbol.is_local() || symbol.is_global();
let resolution_is_global = symbol.is_global() || popped_scope_id.is_global(); let resolution_is_global = symbol.is_global() || popped_scope_id.is_global();
if symbol_is_resolved { if symbol_is_resolved {
for FreeVariable { for &mut UnresolvedNonlocal {
scope_id: nested_scope_id, scope_id: nested_scope_id,
nonlocal_identifier, range,
} in references } in nonlocals_with_this_name
{ {
if let Some(nonlocal_identifier_range) = nonlocal_identifier { if resolution_is_global {
if resolution_is_global { // It's a syntax error for a nonlocal variable to resolve to the
// If the symbol is declared `nonlocal` in the nested scope (rather than // global scope or to a `global` statement in an enclosing scope.
// just used without a local binding or declaration), then it's a syntax self.report_semantic_error(SemanticSyntaxError {
// error for it to resolve to the global scope or to a `global` statement. kind: SemanticSyntaxErrorKind::NoBindingForNonlocal(
self.report_semantic_error(SemanticSyntaxError { name.clone().into(),
kind: SemanticSyntaxErrorKind::NoBindingForNonlocal( ),
name.clone().into(), range,
), python_version: self.python_version,
range: *nonlocal_identifier_range, });
python_version: self.python_version, continue;
});
continue;
}
} }
let nested_place_table = &self.place_tables[*nested_scope_id]; let nested_place_table = &self.place_tables[nested_scope_id];
let nested_symbol_id = nested_place_table.symbol_id(name).unwrap(); let nested_symbol_id = nested_place_table.symbol_id(name).unwrap();
let nested_symbol = nested_place_table.symbol(nested_symbol_id); let nested_symbol = nested_place_table.symbol(nested_symbol_id);
if nested_symbol.is_bound() { if nested_symbol.is_bound() {
self.place_tables[popped_scope_id] self.place_tables[popped_scope_id]
.add_nested_scope_with_binding(symbol_id, *nested_scope_id); .add_nested_scope_with_binding(symbol_id, nested_scope_id);
} }
} }
// This name was resolved. Remove it and its references. // This name was resolved. Remove it and its references.
@ -548,74 +547,47 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> {
} }
if popped_scope_id.is_global() { if popped_scope_id.is_global() {
// If we've popped the global/module scope, any remaining free variables are // If we've popped the global/module scope, still-unresolved `nonlocal` variables are
// unresolved. The common case for these is built-ins like `print`, and rarer cases are // another syntax error.
// things like direct insertions into `globals()`. However, if any `nonlocal` free
// variables are still unresolved, that's another syntax error.
debug_assert!(self.scope_stack.is_empty()); debug_assert!(self.scope_stack.is_empty());
for (name, variables) in &popped_free_variables { for (name, nonlocals) in &popped_unresolved_nonlocals {
for variable in variables { for nonlocal in nonlocals {
if let Some(nonlocal_identifier) = variable.nonlocal_identifier { self.report_semantic_error(SemanticSyntaxError {
self.report_semantic_error(SemanticSyntaxError { kind: SemanticSyntaxErrorKind::NoBindingForNonlocal(name.clone().into()),
kind: SemanticSyntaxErrorKind::NoBindingForNonlocal( range: nonlocal.range,
name.clone().into(), python_version: self.python_version,
), });
range: nonlocal_identifier.range(),
python_version: self.python_version,
});
}
} }
} }
} else { } else {
// Otherwise, add any still-unresolved free variables from nested scopes to the parent // Otherwise, add any still-unresolved nonlocals from nested scopes to the parent
// scope's collection, and walk the popped scope's symbol table to collect any new free // scope's collection.
// variables. During that walk, also record references to global variables. let parent_unresolved_nonlocals = &mut self
let parent_free_variables = &mut self
.scope_stack .scope_stack
.last_mut() // current_scope_info_mut() would be a borrock error here .last_mut() // current_scope_info_mut() would be a borrock error here
.expect("this is not the global/module scope") .expect("this is not the global/module scope")
.free_variables; .unresolved_nonlocals;
for (name, variables) in popped_free_variables { for (name, variables) in popped_unresolved_nonlocals {
parent_free_variables parent_unresolved_nonlocals
.entry(name) .entry(name)
.or_default() .or_default()
.extend(variables); .extend(variables);
} }
let popped_place_table = &self.place_tables[popped_scope_id];
let mut bound_global_symbols = Vec::new();
for symbol in popped_place_table.symbols() {
// Collect new implicit (not `nonlocal`) free variables.
//
// NOTE: Because these variables aren't bound (won't wind up in
// `nested_scopes_with_bindings`) and aren't `nonlocal` (can't trigger `nonlocal`
// syntax errors), collecting them currently has no effect. We could consider
// removing this bit and renaming `free_variables` to say `unresolved_nonlocals`?
if symbol.is_used()
&& !symbol.is_bound()
&& !symbol.is_declared()
&& !symbol.is_global()
// `nonlocal` variables are handled in `visit_stmt`, which lets us stash an AST
// reference.
&& !symbol.is_nonlocal()
{
parent_free_variables
.entry(symbol.name().clone())
.or_default()
.push(FreeVariable {
scope_id: popped_scope_id,
nonlocal_identifier: None,
});
}
// Record bindings of global variables. Put these in a temporary Vec as another // Also, update the global/module symbol table with any bound `global` variables in
// borrowck workaround. // this scope. We do this here, rather than when we visit the `global` statement,
// because at that point we don't know whether the variable is bound.
let mut bound_global_symbols = Vec::new();
for symbol in self.place_tables[popped_scope_id].symbols() {
// Record bindings of global variables in a temporary Vec as a borrowck workaround.
// We could get clever here, but `global` variables are relatively rare, and
// allocating a small Vec in those cases isn't expensive.
if symbol.is_global() && symbol.is_bound() { if symbol.is_global() && symbol.is_bound() {
bound_global_symbols.push(symbol.name().clone()); bound_global_symbols.push(symbol.name().clone());
} }
} }
// Update the global scope with those bindings, now that `self.place_tables` is no
// Update the global scope with those references to globals, now that // longer borrowed.
// `popped_place_table` and `parent_free_variables` are no longer borrowed.
for symbol_name in bound_global_symbols { for symbol_name in bound_global_symbols {
// Add this symbol to the global scope, if it isn't there already. // Add this symbol to the global scope, if it isn't there already.
let global_symbol_id = self.add_symbol_to_scope(symbol_name, FileScopeId::global()); let global_symbol_id = self.add_symbol_to_scope(symbol_name, FileScopeId::global());
@ -2291,9 +2263,9 @@ impl<'ast> Visitor<'ast> for SemanticIndexBuilder<'_, 'ast> {
self.current_place_table_mut() self.current_place_table_mut()
.symbol_mut(symbol_id) .symbol_mut(symbol_id)
.mark_global(); .mark_global();
// We'll add this symbol to the global scope in `pop_scope`, at the same time // We'll add this symbol to the global scope in `pop_scope`, after we resolve
// we're collecting free variables. That lets us record whether it's bound in // nonlocals. That lets us record whether it's bound in this scope, which we
// this scope, which we don't know yet. // don't know yet.
} }
walk_stmt(self, stmt); walk_stmt(self, stmt);
} }
@ -2340,20 +2312,20 @@ impl<'ast> Visitor<'ast> for SemanticIndexBuilder<'_, 'ast> {
self.current_place_table_mut() self.current_place_table_mut()
.symbol_mut(symbol_id) .symbol_mut(symbol_id)
.mark_nonlocal(); .mark_nonlocal();
// Add this symbol to the parent scope's set of free variables. (It would also // Add this symbol to the parent scope's set of unresolved nonlocals. (It would
// work to add it to this scope's set, which will get folded into the parent's // also work to add it to this scope's set, which will get folded into the
// in `pop_scope`. But since it can't possibly resolve here, we might as well // parent's in `pop_scope`. But since it can't possibly resolve here, we might
// spare an allocation.) We checked above that we aren't in the module scope, // as well try to spare an allocation.) We checked above that we aren't in the
// so there's definitely a parent scope. // module scope, so there's definitely a parent scope.
let parent_scope_index = self.scope_stack.len() - 2; let parent_scope_index = self.scope_stack.len() - 2;
let parent_scope_info = &mut self.scope_stack[parent_scope_index]; let parent_scope_info = &mut self.scope_stack[parent_scope_index];
parent_scope_info parent_scope_info
.free_variables .unresolved_nonlocals
.entry(name.id.clone()) .entry(name.id.clone())
.or_default() .or_default()
.push(FreeVariable { .push(UnresolvedNonlocal {
scope_id, scope_id,
nonlocal_identifier: Some(name.range), range: name.range,
}); });
} }
walk_stmt(self, stmt); walk_stmt(self, stmt);