From aa41ffcfde55882554820e82387af4f04fc1be76 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Wed, 14 Jun 2023 09:27:24 -0400 Subject: [PATCH] Add `BindingKind` variants to represent deleted bindings (#5071) ## Summary Our current mechanism for handling deletions (e.g., `del x`) is to remove the symbol from the scope's `bindings` table. This "does the right thing", in that if we then reference a deleted symbol, we're able to determine that it's unbound -- but it causes a variety of problems, mostly in that it makes certain bindings and references unreachable after-the-fact. Consider: ```python x = 1 print(x) del x ``` If we analyze this code _after_ running the semantic model over the AST, we'll have no way of knowing that `x` was ever introduced in the scope, much less that it was bound to a value, read, and then deleted -- because we effectively erased `x` from the model entirely when we hit the deletion. In practice, this will make it impossible for us to support local symbol renames. It also means that certain rules that we want to move out of the model-building phase and into the "check dead scopes" phase wouldn't work today, since we'll have lost important information about the source code. This PR introduces two new `BindingKind` variants to model deletions: - `BindingKind::Deletion`, which represents `x = 1; del x`. - `BindingKind::UnboundException`, which represents: ```python try: 1 / 0 except Exception as e: pass ``` In the latter case, `e` gets unbound after the exception handler (assuming it's triggered), so we want to handle it similarly to a deletion. The main challenge here is auditing all of our existing `Binding` and `Scope` usages to understand whether they need to accommodate deletions or otherwise behave differently. If you look one commit back on this branch, you'll see that the code is littered with `NOTE(charlie)` comments that describe the reasoning behind changing (or not) each of those call sites. I've also augmented our test suite in preparation for this change over a few prior PRs. ### Alternatives As an alternative, I considered introducing a flag to `BindingFlags`, like `BindingFlags::UNBOUND`, and setting that at the appropriate time. This turned out to be a much more difficult change, because we tend to match on `BindingKind` all over the place (e.g., we have a bunch of code blocks that only run when a `BindingKind` is `BindingKind::Importation`). As a result, introducing these new `BindingKind` variants requires only a few changes at the client sites. Adding a flag would've required a much wider-reaching change. --- crates/ruff/src/checkers/ast/mod.rs | 116 +++++++++++------- crates/ruff/src/importer/mod.rs | 6 +- .../rules/string_in_exception.rs | 6 +- crates/ruff/src/rules/pyflakes/mod.rs | 9 ++ ...tests__augmented_assignment_after_del.snap | 4 +- ...f__rules__pyflakes__tests__double_del.snap | 12 ++ .../src/rules/pyupgrade/rules/open_alias.rs | 5 +- crates/ruff_python_ast/src/helpers.rs | 2 +- crates/ruff_python_semantic/src/binding.rs | 38 +++++- crates/ruff_python_semantic/src/model.rs | 49 +++++--- crates/ruff_python_semantic/src/scope.rs | 14 --- 11 files changed, 169 insertions(+), 92 deletions(-) create mode 100644 crates/ruff/src/rules/pyflakes/snapshots/ruff__rules__pyflakes__tests__double_del.snap diff --git a/crates/ruff/src/checkers/ast/mod.rs b/crates/ruff/src/checkers/ast/mod.rs index 2231a1207d..11e6f4de3a 100644 --- a/crates/ruff/src/checkers/ast/mod.rs +++ b/crates/ruff/src/checkers/ast/mod.rs @@ -307,18 +307,8 @@ where stmt.range(), ExecutionContext::Runtime, ); - } - - // Ensure that every nonlocal has an existing binding from a parent scope. - if self.enabled(Rule::NonlocalWithoutBinding) { - if self - .semantic_model - .scopes - .ancestors(self.semantic_model.scope_id) - .skip(1) - .take_while(|scope| !scope.kind.is_module()) - .all(|scope| !scope.declares(name.as_str())) - { + } else { + if self.enabled(Rule::NonlocalWithoutBinding) { self.diagnostics.push(Diagnostic::new( pylint::rules::NonlocalWithoutBinding { name: name.to_string(), @@ -3932,7 +3922,7 @@ where } // Add the bound exception name to the scope. - self.add_binding( + let binding_id = self.add_binding( name, range, BindingKind::Assignment, @@ -3942,27 +3932,30 @@ where walk_excepthandler(self, excepthandler); // Remove it from the scope immediately after. - if let Some(binding_id) = { - let scope = self.semantic_model.scope_mut(); - scope.delete(name) - } { - if !self.semantic_model.is_used(binding_id) { - if self.enabled(Rule::UnusedVariable) { - let mut diagnostic = Diagnostic::new( - pyflakes::rules::UnusedVariable { name: name.into() }, - range, - ); - if self.patch(Rule::UnusedVariable) { - diagnostic.try_set_fix(|| { - let edit = pyflakes::fixes::remove_exception_handler_assignment( - excepthandler, - self.locator, - )?; - Ok(Fix::automatic(edit)) - }); - } - self.diagnostics.push(diagnostic); + self.add_binding( + name, + range, + BindingKind::UnboundException, + BindingFlags::empty(), + ); + + // If the exception name wasn't used in the scope, emit a diagnostic. + if !self.semantic_model.is_used(binding_id) { + if self.enabled(Rule::UnusedVariable) { + let mut diagnostic = Diagnostic::new( + pyflakes::rules::UnusedVariable { name: name.into() }, + range, + ); + if self.patch(Rule::UnusedVariable) { + diagnostic.try_set_fix(|| { + pyflakes::fixes::remove_exception_handler_assignment( + excepthandler, + self.locator, + ) + .map(Fix::automatic) + }); } + self.diagnostics.push(diagnostic); } } } @@ -4224,7 +4217,14 @@ impl<'a> Checker<'a> { .ancestors(self.semantic_model.scope_id) .enumerate() .find_map(|(stack_index, scope)| { - scope.get(name).map(|binding_id| (stack_index, binding_id)) + scope.get(name).and_then(|binding_id| { + let binding = &self.semantic_model.bindings[binding_id]; + if binding.is_unbound() { + None + } else { + Some((stack_index, binding_id)) + } + }) }) { let shadowed = &self.semantic_model.bindings[shadowed_id]; @@ -4303,7 +4303,7 @@ impl<'a> Checker<'a> { .map(|binding_id| &self.semantic_model.bindings[binding_id]) { match &shadowed.kind { - BindingKind::Builtin => { + BindingKind::Builtin | BindingKind::Deletion | BindingKind::UnboundException => { // Avoid overriding builtins. } kind @ (BindingKind::Global | BindingKind::Nonlocal) => { @@ -4351,7 +4351,7 @@ impl<'a> Checker<'a> { } fn handle_node_load(&mut self, expr: &Expr) { - let Expr::Name(ast::ExprName { id, .. } )= expr else { + let Expr::Name(ast::ExprName { id, .. }) = expr else { return; }; match self.semantic_model.resolve_read(id, expr.range()) { @@ -4559,12 +4559,29 @@ impl<'a> Checker<'a> { let Expr::Name(ast::ExprName { id, .. } )= expr else { return; }; - if helpers::on_conditional_branch(&mut self.semantic_model.parents()) { - return; - } - let scope = self.semantic_model.scope_mut(); - if scope.delete(id.as_str()).is_none() { + // Treat the deletion of a name as a reference to that name. + if let Some(binding_id) = self.semantic_model.scope().get(id) { + self.semantic_model.add_local_reference( + binding_id, + expr.range(), + ExecutionContext::Runtime, + ); + + // If the name is unbound, then it's an error. + if self.enabled(Rule::UndefinedName) { + let binding = &self.semantic_model.bindings[binding_id]; + if binding.is_unbound() { + self.diagnostics.push(Diagnostic::new( + pyflakes::rules::UndefinedName { + name: id.to_string(), + }, + expr.range(), + )); + } + } + } else { + // If the name isn't bound at all, then it's an error. if self.enabled(Rule::UndefinedName) { self.diagnostics.push(Diagnostic::new( pyflakes::rules::UndefinedName { @@ -4574,6 +4591,19 @@ impl<'a> Checker<'a> { )); } } + + if helpers::on_conditional_branch(&mut self.semantic_model.parents()) { + return; + } + + // Create a binding to model the deletion. + let binding_id = self.semantic_model.push_binding( + expr.range(), + BindingKind::Deletion, + BindingFlags::empty(), + ); + let scope = self.semantic_model.scope_mut(); + scope.add(id, binding_id); } fn check_deferred_future_type_definitions(&mut self) { @@ -4759,8 +4789,8 @@ impl<'a> Checker<'a> { // Mark anything referenced in `__all__` as used. let exports: Vec<(&str, TextRange)> = { - let global_scope = self.semantic_model.global_scope(); - global_scope + self.semantic_model + .global_scope() .bindings_for_name("__all__") .map(|binding_id| &self.semantic_model.bindings[binding_id]) .filter_map(|binding| match &binding.kind { diff --git a/crates/ruff/src/importer/mod.rs b/crates/ruff/src/importer/mod.rs index f57fcfafe5..53b7fbb597 100644 --- a/crates/ruff/src/importer/mod.rs +++ b/crates/ruff/src/importer/mod.rs @@ -239,7 +239,7 @@ impl<'a> Importer<'a> { // Case 1: `from functools import lru_cache` is in scope, and we're trying to reference // `functools.cache`; thus, we add `cache` to the import, and return `"cache"` as the // bound name. - if semantic_model.is_unbound(symbol.member) { + if semantic_model.is_available(symbol.member) { let Ok(import_edit) = self.add_member(stmt, symbol.member) else { return Err(ResolutionError::InvalidEdit); }; @@ -252,7 +252,7 @@ impl<'a> Importer<'a> { ImportStyle::Import => { // Case 2a: No `functools` import is in scope; thus, we add `import functools`, // and return `"functools.cache"` as the bound name. - if semantic_model.is_unbound(symbol.module) { + if semantic_model.is_available(symbol.module) { let import_edit = self.add_import(&AnyImport::Import(Import::module(symbol.module)), at); Ok(( @@ -270,7 +270,7 @@ impl<'a> Importer<'a> { ImportStyle::ImportFrom => { // Case 2b: No `functools` import is in scope; thus, we add // `from functools import cache`, and return `"cache"` as the bound name. - if semantic_model.is_unbound(symbol.member) { + if semantic_model.is_available(symbol.member) { let import_edit = self.add_import( &AnyImport::ImportFrom(ImportFrom::member( symbol.module, diff --git a/crates/ruff/src/rules/flake8_errmsg/rules/string_in_exception.rs b/crates/ruff/src/rules/flake8_errmsg/rules/string_in_exception.rs index 0641058bf9..b1aca25362 100644 --- a/crates/ruff/src/rules/flake8_errmsg/rules/string_in_exception.rs +++ b/crates/ruff/src/rules/flake8_errmsg/rules/string_in_exception.rs @@ -190,7 +190,7 @@ pub(crate) fn string_in_exception(checker: &mut Checker, stmt: &Stmt, exc: &Expr if let Some(indentation) = whitespace::indentation(checker.locator, stmt) { - if checker.semantic_model().is_unbound("msg") { + if checker.semantic_model().is_available("msg") { diagnostic.set_fix(generate_fix( stmt, first, @@ -213,7 +213,7 @@ pub(crate) fn string_in_exception(checker: &mut Checker, stmt: &Stmt, exc: &Expr if let Some(indentation) = whitespace::indentation(checker.locator, stmt) { - if checker.semantic_model().is_unbound("msg") { + if checker.semantic_model().is_available("msg") { diagnostic.set_fix(generate_fix( stmt, first, @@ -240,7 +240,7 @@ pub(crate) fn string_in_exception(checker: &mut Checker, stmt: &Stmt, exc: &Expr if let Some(indentation) = whitespace::indentation(checker.locator, stmt) { - if checker.semantic_model().is_unbound("msg") { + if checker.semantic_model().is_available("msg") { diagnostic.set_fix(generate_fix( stmt, first, diff --git a/crates/ruff/src/rules/pyflakes/mod.rs b/crates/ruff/src/rules/pyflakes/mod.rs index 80e6fd868d..1f8d587325 100644 --- a/crates/ruff/src/rules/pyflakes/mod.rs +++ b/crates/ruff/src/rules/pyflakes/mod.rs @@ -356,6 +356,15 @@ mod tests { "#, "print_after_shadowing_except" )] + #[test_case( + r#" + def f(): + x = 1 + del x + del x + "#, + "double_del" + )] fn contents(contents: &str, snapshot: &str) { let diagnostics = test_snippet(contents, &Settings::for_rules(&Linter::Pyflakes)); assert_messages!(snapshot, diagnostics); diff --git a/crates/ruff/src/rules/pyflakes/snapshots/ruff__rules__pyflakes__tests__augmented_assignment_after_del.snap b/crates/ruff/src/rules/pyflakes/snapshots/ruff__rules__pyflakes__tests__augmented_assignment_after_del.snap index 8e311ebc89..57a88f04c4 100644 --- a/crates/ruff/src/rules/pyflakes/snapshots/ruff__rules__pyflakes__tests__augmented_assignment_after_del.snap +++ b/crates/ruff/src/rules/pyflakes/snapshots/ruff__rules__pyflakes__tests__augmented_assignment_after_del.snap @@ -1,12 +1,12 @@ --- source: crates/ruff/src/rules/pyflakes/mod.rs --- -:10:5: F823 Local variable `x` referenced before assignment +:10:5: F821 Undefined name `x` | 8 | # entirely after the `del` statement. However, it should be an F821 9 | # error, because the name is defined in the scope, but unbound. 10 | x += 1 - | ^ F823 + | ^ F821 | :10:5: F841 Local variable `x` is assigned to but never used diff --git a/crates/ruff/src/rules/pyflakes/snapshots/ruff__rules__pyflakes__tests__double_del.snap b/crates/ruff/src/rules/pyflakes/snapshots/ruff__rules__pyflakes__tests__double_del.snap new file mode 100644 index 0000000000..ef01e14323 --- /dev/null +++ b/crates/ruff/src/rules/pyflakes/snapshots/ruff__rules__pyflakes__tests__double_del.snap @@ -0,0 +1,12 @@ +--- +source: crates/ruff/src/rules/pyflakes/mod.rs +--- +:5:9: F821 Undefined name `x` + | +3 | x = 1 +4 | del x +5 | del x + | ^ F821 + | + + diff --git a/crates/ruff/src/rules/pyupgrade/rules/open_alias.rs b/crates/ruff/src/rules/pyupgrade/rules/open_alias.rs index c8e3809123..a49207bbb7 100644 --- a/crates/ruff/src/rules/pyupgrade/rules/open_alias.rs +++ b/crates/ruff/src/rules/pyupgrade/rules/open_alias.rs @@ -31,9 +31,8 @@ pub(crate) fn open_alias(checker: &mut Checker, expr: &Expr, func: &Expr) { { let mut diagnostic = Diagnostic::new(OpenAlias, expr.range()); if checker.patch(diagnostic.kind.rule()) { - if checker.semantic_model().is_unbound("open") { - #[allow(deprecated)] - diagnostic.set_fix(Fix::unspecified(Edit::range_replacement( + if checker.semantic_model().is_available("open") { + diagnostic.set_fix(Fix::suggested(Edit::range_replacement( "open".to_string(), func.range(), ))); diff --git a/crates/ruff_python_ast/src/helpers.rs b/crates/ruff_python_ast/src/helpers.rs index 8c3e08774a..0d86104608 100644 --- a/crates/ruff_python_ast/src/helpers.rs +++ b/crates/ruff_python_ast/src/helpers.rs @@ -1393,7 +1393,7 @@ impl<'a> SimpleCallArgs<'a> { } } -/// Check if a node is parent of a conditional branch. +/// Check if a node is part of a conditional branch. pub fn on_conditional_branch<'a>(parents: &mut impl Iterator) -> bool { parents.any(|parent| { if matches!(parent, Stmt::If(_) | Stmt::While(_) | Stmt::Match(_)) { diff --git a/crates/ruff_python_semantic/src/binding.rs b/crates/ruff_python_semantic/src/binding.rs index c3294809d6..42d828a7d2 100644 --- a/crates/ruff_python_semantic/src/binding.rs +++ b/crates/ruff_python_semantic/src/binding.rs @@ -41,11 +41,20 @@ impl<'a> Binding<'a> { } /// Return `true` if this [`Binding`] represents an explicit re-export - /// (e.g., `import FastAPI as FastAPI`). + /// (e.g., `FastAPI` in `from fastapi import FastAPI as FastAPI`). pub const fn is_explicit_export(&self) -> bool { self.flags.contains(BindingFlags::EXPLICIT_EXPORT) } + /// Return `true` if this [`Binding`] represents an unbound variable + /// (e.g., `x` in `x = 1; del x`). + pub const fn is_unbound(&self) -> bool { + matches!( + self.kind, + BindingKind::Annotation | BindingKind::Deletion | BindingKind::UnboundException + ) + } + /// Return `true` if this binding redefines the given binding. pub fn redefines(&self, existing: &'a Binding) -> bool { match &self.kind { @@ -83,10 +92,10 @@ impl<'a> Binding<'a> { _ => {} } } - BindingKind::Annotation => { - return false; - } - BindingKind::FutureImportation => { + BindingKind::Deletion + | BindingKind::Annotation + | BindingKind::FutureImportation + | BindingKind::Builtin => { return false; } _ => {} @@ -95,7 +104,6 @@ impl<'a> Binding<'a> { existing.kind, BindingKind::ClassDefinition | BindingKind::FunctionDefinition - | BindingKind::Builtin | BindingKind::Importation(..) | BindingKind::FromImportation(..) | BindingKind::SubmoduleImportation(..) @@ -367,6 +375,24 @@ pub enum BindingKind<'a> { /// import foo.bar /// ``` SubmoduleImportation(SubmoduleImportation<'a>), + + /// A binding for a deletion, like `x` in: + /// ```python + /// del x + /// ``` + Deletion, + + /// A binding to unbind the local variable, like `x` in: + /// ```python + /// try: + /// ... + /// except Exception as x: + /// ... + /// ``` + /// + /// After the `except` block, `x` is unbound, despite the lack + /// of an explicit `del` statement. + UnboundException, } bitflags! { diff --git a/crates/ruff_python_semantic/src/model.rs b/crates/ruff_python_semantic/src/model.rs index 4142833083..c26b4dfb62 100644 --- a/crates/ruff_python_semantic/src/model.rs +++ b/crates/ruff_python_semantic/src/model.rs @@ -191,8 +191,9 @@ impl<'a> SemanticModel<'a> { .map_or(false, |binding| binding.kind.is_builtin()) } - /// Return `true` if `member` is unbound. - pub fn is_unbound(&self, member: &str) -> bool { + /// Return `true` if `member` is an "available" symbol, i.e., a symbol that has not been bound + /// in the current scope, or in any containing scope. + pub fn is_available(&self, member: &str) -> bool { self.find_binding(member) .map_or(true, |binding| binding.kind.is_builtin()) } @@ -203,7 +204,7 @@ impl<'a> SemanticModel<'a> { // should prefer it over local resolutions. if self.in_forward_reference() { if let Some(binding_id) = self.scopes.global().get(symbol) { - if !self.bindings[binding_id].kind.is_annotation() { + if !self.bindings[binding_id].is_unbound() { // Mark the binding as used. let context = self.execution_context(); let reference_id = self.references.push(ScopeId::global(), range, context); @@ -254,17 +255,29 @@ impl<'a> SemanticModel<'a> { self.bindings[binding_id].references.push(reference_id); } - // But if it's a type annotation, don't treat it as resolved. For example, given: - // - // ```python - // name: str - // print(name) - // ``` - // - // The `name` in `print(name)` should be treated as unresolved, but the `name` in - // `name: str` should be treated as used. - if self.bindings[binding_id].kind.is_annotation() { - continue; + match self.bindings[binding_id].kind { + // If it's a type annotation, don't treat it as resolved. For example, given: + // + // ```python + // name: str + // print(name) + // ``` + // + // The `name` in `print(name)` should be treated as unresolved, but the `name` in + // `name: str` should be treated as used. + BindingKind::Annotation => continue, + // If it's a deletion, don't treat it as resolved, since the name is now + // unbound. For example, given: + // + // ```python + // x = 1 + // del x + // print(x) + // ``` + // + // The `x` in `print(x)` should be treated as unresolved. + BindingKind::Deletion | BindingKind::UnboundException => break, + _ => {} } return ResolvedRead::Resolved(binding_id); @@ -618,9 +631,11 @@ impl<'a> SemanticModel<'a> { pub fn set_globals(&mut self, globals: Globals<'a>) { // If any global bindings don't already exist in the global scope, add them. for (name, range) in globals.iter() { - if self.global_scope().get(name).map_or(true, |binding_id| { - self.bindings[binding_id].kind.is_annotation() - }) { + if self + .global_scope() + .get(name) + .map_or(true, |binding_id| self.bindings[binding_id].is_unbound()) + { let id = self.bindings.push(Binding { kind: BindingKind::Assignment, range: *range, diff --git a/crates/ruff_python_semantic/src/scope.rs b/crates/ruff_python_semantic/src/scope.rs index f1e9107844..c53a6b43d2 100644 --- a/crates/ruff_python_semantic/src/scope.rs +++ b/crates/ruff_python_semantic/src/scope.rs @@ -38,9 +38,6 @@ pub struct Scope<'a> { /// In this case, the binding created by `x = 2` shadows the binding created by `x = 1`. shadowed_bindings: HashMap>, - /// A list of all names that have been deleted in this scope. - deleted_symbols: Vec<&'a str>, - /// Index into the globals arena, if the scope contains any globally-declared symbols. globals_id: Option, @@ -56,7 +53,6 @@ impl<'a> Scope<'a> { star_imports: Vec::default(), bindings: FxHashMap::default(), shadowed_bindings: IntMap::default(), - deleted_symbols: Vec::default(), globals_id: None, flags: ScopeFlags::empty(), } @@ -69,7 +65,6 @@ impl<'a> Scope<'a> { star_imports: Vec::default(), bindings: FxHashMap::default(), shadowed_bindings: IntMap::default(), - deleted_symbols: Vec::default(), globals_id: None, flags: ScopeFlags::empty(), } @@ -92,7 +87,6 @@ impl<'a> Scope<'a> { /// Removes the binding with the given name. pub fn delete(&mut self, name: &'a str) -> Option { - self.deleted_symbols.push(name); self.bindings.remove(name) } @@ -101,14 +95,6 @@ impl<'a> Scope<'a> { self.bindings.contains_key(name) } - /// Returns `true` if the scope declares a symbol with the given name. - /// - /// Unlike [`Scope::has`], the name may no longer be bound to a value (e.g., it could be - /// deleted). - pub fn declares(&self, name: &str) -> bool { - self.has(name) || self.deleted_symbols.contains(&name) - } - /// Returns the ids of all bindings defined in this scope. pub fn binding_ids(&self) -> impl Iterator + '_ { self.bindings.values().copied()