stop tracking implicit (read-only) free variables

This commit is contained in:
Jack O'Connor 2025-08-08 12:54:02 -07:00
parent 56e550176c
commit b82bbcd51c
1 changed files with 76 additions and 104 deletions

View File

@ -14,7 +14,7 @@ use ruff_python_ast::{self as ast, NodeIndex, PySourceType, PythonVersion};
use ruff_python_parser::semantic_errors::{
SemanticSyntaxChecker, SemanticSyntaxContext, SemanticSyntaxError, SemanticSyntaxErrorKind,
};
use ruff_text_size::{Ranged, TextRange};
use ruff_text_size::TextRange;
use crate::ast_node_ref::AstNodeRef;
use crate::module_name::ModuleName;
@ -72,34 +72,36 @@ struct ScopeInfo {
/// Current loop state; None if we are not currently visiting a loop
current_loop: Option<Loop>,
/// Symbols from scopes nested inside of this one that haven't yet been resolved to a
/// definition. They might end up resolving in this scope, or in an enclosing scope.
/// `nonlocal` variables from scopes nested inside of this one that haven't yet been resolved
/// to a definition. They might end up resolving in this scope, or in an enclosing scope.
///
/// When we pop scopes, we merge any unresolved free variables into the parent scope's
/// collection. The reason we need to collect free variables for each scope separately, instead
/// of just having one map for the whole builder, is because of sibling scope arrangements like
/// this:
/// When we pop scopes, we merge any unresolved nonlocals into the parent scope's collection.
/// The reason we need to track them for each scope separately, instead of using one map for
/// the whole builder, is because of sibling scope arrangements like this:
/// ```py
/// def f():
/// def g():
/// # When we pop `g`, this `x` goes in `f`'s set of free variables.
/// # When we pop `g`, this `x` goes in `f`'s set of unresolved nonlocals.
/// nonlocal x
/// def h():
/// # When we pop `h`, this binding of `x` won't resolve the free variable from `g`,
/// # because it's not in `h`'s set of free variables.
/// # When we pop `h`, this binding of `x` will *not* resolve the nonlocal from `g`,
/// # because it's not in `h`'s set of unresolved nonlocals.
/// x = 1
/// # When we pop `f`, this binding of `x` will resolve the free variable from `g`.
/// # When we pop `f`, this binding of `x` will resolve the nonlocal from `g`.
/// x = 1
/// ```
free_variables: FxHashMap<ast::name::Name, Vec<FreeVariable>>,
///
/// Currently we only track explicit nonlocals, because ordinary "free" variables referring to
/// enclosing scopes can't be bound and can't trigger semantic syntax errors. Callers who need
/// to resolve free variables (e.g. `infer_place_load`) need to walk parent scopes until they
/// find one where `Symbol::is_local` or `Symbol::is_global` is true. If we wanted to
/// pre-record more of that information, we could expand this.
unresolved_nonlocals: FxHashMap<ast::name::Name, Vec<UnresolvedNonlocal>>,
}
struct FreeVariable {
struct UnresolvedNonlocal {
scope_id: FileScopeId,
// If this variable is `nonlocal`, then this is `Some` reference to its identifier in the
// `nonlocal` statement. In that case, it's an error if we don't resolve it before we reach the
// global scope (or if we resolve it in a scope where it's `global`).
nonlocal_identifier: Option<TextRange>,
range: TextRange,
}
pub(super) struct SemanticIndexBuilder<'db, 'ast> {
@ -305,7 +307,7 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> {
self.scope_stack.push(ScopeInfo {
file_scope_id,
current_loop: None,
free_variables: FxHashMap::default(),
unresolved_nonlocals: FxHashMap::default(),
});
}
@ -477,7 +479,7 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> {
let ScopeInfo {
file_scope_id: popped_scope_id,
free_variables: mut popped_free_variables,
unresolved_nonlocals: mut popped_unresolved_nonlocals,
..
} = self
.scope_stack
@ -499,42 +501,39 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> {
self.record_lazy_snapshots(popped_scope_id);
}
// If we've popped a scope that free variables from nested (previously popped) scopes can
// refer to (i.e. not a class body), try to resolve outstanding free variables.
// If we've popped a scope that nonlocals from nested (previously popped) scopes can refer
// to (i.e. not a class body), try to resolve them.
if kind.is_function_like() || popped_scope_id.is_global() {
popped_free_variables.retain(|name, references| {
popped_unresolved_nonlocals.retain(|name, nonlocals_with_this_name| {
let popped_place_table = &self.place_tables[popped_scope_id];
if let Some(symbol_id) = popped_place_table.symbol_id(name.as_str()) {
let symbol = popped_place_table.symbol(symbol_id);
let symbol_is_resolved = symbol.is_local() || symbol.is_global();
let resolution_is_global = symbol.is_global() || popped_scope_id.is_global();
if symbol_is_resolved {
for FreeVariable {
for &mut UnresolvedNonlocal {
scope_id: nested_scope_id,
nonlocal_identifier,
} in references
range,
} in nonlocals_with_this_name
{
if let Some(nonlocal_identifier_range) = nonlocal_identifier {
if resolution_is_global {
// If the symbol is declared `nonlocal` in the nested scope (rather than
// just used without a local binding or declaration), then it's a syntax
// error for it to resolve to the global scope or to a `global` statement.
self.report_semantic_error(SemanticSyntaxError {
kind: SemanticSyntaxErrorKind::NoBindingForNonlocal(
name.clone().into(),
),
range: *nonlocal_identifier_range,
python_version: self.python_version,
});
continue;
}
if resolution_is_global {
// It's a syntax error for a nonlocal variable to resolve to the
// global scope or to a `global` statement in an enclosing scope.
self.report_semantic_error(SemanticSyntaxError {
kind: SemanticSyntaxErrorKind::NoBindingForNonlocal(
name.clone().into(),
),
range,
python_version: self.python_version,
});
continue;
}
let nested_place_table = &self.place_tables[*nested_scope_id];
let nested_place_table = &self.place_tables[nested_scope_id];
let nested_symbol_id = nested_place_table.symbol_id(name).unwrap();
let nested_symbol = nested_place_table.symbol(nested_symbol_id);
if nested_symbol.is_bound() {
self.place_tables[popped_scope_id]
.add_nested_scope_with_binding(symbol_id, *nested_scope_id);
.add_nested_scope_with_binding(symbol_id, nested_scope_id);
}
}
// This name was resolved. Remove it and its references.
@ -548,74 +547,47 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> {
}
if popped_scope_id.is_global() {
// If we've popped the global/module scope, any remaining free variables are
// unresolved. The common case for these is built-ins like `print`, and rarer cases are
// things like direct insertions into `globals()`. However, if any `nonlocal` free
// variables are still unresolved, that's another syntax error.
// If we've popped the global/module scope, still-unresolved `nonlocal` variables are
// another syntax error.
debug_assert!(self.scope_stack.is_empty());
for (name, variables) in &popped_free_variables {
for variable in variables {
if let Some(nonlocal_identifier) = variable.nonlocal_identifier {
self.report_semantic_error(SemanticSyntaxError {
kind: SemanticSyntaxErrorKind::NoBindingForNonlocal(
name.clone().into(),
),
range: nonlocal_identifier.range(),
python_version: self.python_version,
});
}
for (name, nonlocals) in &popped_unresolved_nonlocals {
for nonlocal in nonlocals {
self.report_semantic_error(SemanticSyntaxError {
kind: SemanticSyntaxErrorKind::NoBindingForNonlocal(name.clone().into()),
range: nonlocal.range,
python_version: self.python_version,
});
}
}
} else {
// Otherwise, add any still-unresolved free variables from nested scopes to the parent
// scope's collection, and walk the popped scope's symbol table to collect any new free
// variables. During that walk, also record references to global variables.
let parent_free_variables = &mut self
// Otherwise, add any still-unresolved nonlocals from nested scopes to the parent
// scope's collection.
let parent_unresolved_nonlocals = &mut self
.scope_stack
.last_mut() // current_scope_info_mut() would be a borrock error here
.expect("this is not the global/module scope")
.free_variables;
for (name, variables) in popped_free_variables {
parent_free_variables
.unresolved_nonlocals;
for (name, variables) in popped_unresolved_nonlocals {
parent_unresolved_nonlocals
.entry(name)
.or_default()
.extend(variables);
}
let popped_place_table = &self.place_tables[popped_scope_id];
let mut bound_global_symbols = Vec::new();
for symbol in popped_place_table.symbols() {
// Collect new implicit (not `nonlocal`) free variables.
//
// NOTE: Because these variables aren't bound (won't wind up in
// `nested_scopes_with_bindings`) and aren't `nonlocal` (can't trigger `nonlocal`
// syntax errors), collecting them currently has no effect. We could consider
// removing this bit and renaming `free_variables` to say `unresolved_nonlocals`?
if symbol.is_used()
&& !symbol.is_bound()
&& !symbol.is_declared()
&& !symbol.is_global()
// `nonlocal` variables are handled in `visit_stmt`, which lets us stash an AST
// reference.
&& !symbol.is_nonlocal()
{
parent_free_variables
.entry(symbol.name().clone())
.or_default()
.push(FreeVariable {
scope_id: popped_scope_id,
nonlocal_identifier: None,
});
}
// Record bindings of global variables. Put these in a temporary Vec as another
// borrowck workaround.
// Also, update the global/module symbol table with any bound `global` variables in
// this scope. We do this here, rather than when we visit the `global` statement,
// because at that point we don't know whether the variable is bound.
let mut bound_global_symbols = Vec::new();
for symbol in self.place_tables[popped_scope_id].symbols() {
// Record bindings of global variables in a temporary Vec as a borrowck workaround.
// We could get clever here, but `global` variables are relatively rare, and
// allocating a small Vec in those cases isn't expensive.
if symbol.is_global() && symbol.is_bound() {
bound_global_symbols.push(symbol.name().clone());
}
}
// Update the global scope with those references to globals, now that
// `popped_place_table` and `parent_free_variables` are no longer borrowed.
// Update the global scope with those bindings, now that `self.place_tables` is no
// longer borrowed.
for symbol_name in bound_global_symbols {
// Add this symbol to the global scope, if it isn't there already.
let global_symbol_id = self.add_symbol_to_scope(symbol_name, FileScopeId::global());
@ -2291,9 +2263,9 @@ impl<'ast> Visitor<'ast> for SemanticIndexBuilder<'_, 'ast> {
self.current_place_table_mut()
.symbol_mut(symbol_id)
.mark_global();
// We'll add this symbol to the global scope in `pop_scope`, at the same time
// we're collecting free variables. That lets us record whether it's bound in
// this scope, which we don't know yet.
// We'll add this symbol to the global scope in `pop_scope`, after we resolve
// nonlocals. That lets us record whether it's bound in this scope, which we
// don't know yet.
}
walk_stmt(self, stmt);
}
@ -2340,20 +2312,20 @@ impl<'ast> Visitor<'ast> for SemanticIndexBuilder<'_, 'ast> {
self.current_place_table_mut()
.symbol_mut(symbol_id)
.mark_nonlocal();
// Add this symbol to the parent scope's set of free variables. (It would also
// work to add it to this scope's set, which will get folded into the parent's
// in `pop_scope`. But since it can't possibly resolve here, we might as well
// spare an allocation.) We checked above that we aren't in the module scope,
// so there's definitely a parent scope.
// Add this symbol to the parent scope's set of unresolved nonlocals. (It would
// also work to add it to this scope's set, which will get folded into the
// parent's in `pop_scope`. But since it can't possibly resolve here, we might
// as well try to spare an allocation.) We checked above that we aren't in the
// module scope, so there's definitely a parent scope.
let parent_scope_index = self.scope_stack.len() - 2;
let parent_scope_info = &mut self.scope_stack[parent_scope_index];
parent_scope_info
.free_variables
.unresolved_nonlocals
.entry(name.id.clone())
.or_default()
.push(FreeVariable {
.push(UnresolvedNonlocal {
scope_id,
nonlocal_identifier: Some(name.range),
range: name.range,
});
}
walk_stmt(self, stmt);