From fd1dfc3bfa291d7a0689fa1fa4a75b03083623c3 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Fri, 16 Jun 2023 10:35:10 -0400 Subject: [PATCH] Add support for global and nonlocal symbol renames (#5134) ## Summary In #5074, we introduced an abstraction to support local symbol renames ("local" here refers to "within a module"). However, that abstraction didn't support `global` and `nonlocal` symbols. This PR extends it to those cases. Broadly, there are considerations. First, if we're renaming a symbol in a scope in which it is declared `global` or `nonlocal`. For example, given: ```python x = 1 def foo(): global x ``` Then when renaming `x` in `foo`, we need to detect that it's `global` and instead perform the rename starting from the module scope. Second, when renaming a symbol, we need to determine the scopes in which it is declared `global` or `nonlocal`. This is effectively the inverse of the above: when renaming `x` in the module scope, we need to detect that we should _also_ rename `x` in `foo`. To support these cases, the renaming algorithm was adjusted as follows: - When we start a rename in a scope, determine whether the symbol is declared `global` or `nonlocal` by looking for a `global` or `nonlocal` binding. If it is, start the rename in the defining scope. (This requires storing the defining scope on the `nonlocal` binding, which is new.) - We then perform the rename in the defining scope. - We then check whether the symbol was declared as `global` or `nonlocal` in any scopes, and perform the rename in those scopes too. (Thankfully, this doesn't need to be done recursively.) Closes #5092. ## Test Plan Added some additional snapshot tests. --- .../test/fixtures/flake8_pyi/PYI025.pyi | 20 +++ crates/ruff/src/checkers/ast/mod.rs | 49 +++---- crates/ruff/src/renamer.rs | 131 ++++++++++++++---- ..._flake8_pyi__tests__PYI025_PYI025.pyi.snap | 128 +++++++++++++---- crates/ruff/src/rules/pandas_vet/helpers.rs | 2 +- crates/ruff_python_semantic/src/binding.rs | 3 +- crates/ruff_python_semantic/src/model.rs | 53 ++++++- 7 files changed, 305 insertions(+), 81 deletions(-) diff --git a/crates/ruff/resources/test/fixtures/flake8_pyi/PYI025.pyi b/crates/ruff/resources/test/fixtures/flake8_pyi/PYI025.pyi index bda9c0d083..26a2a69ce9 100644 --- a/crates/ruff/resources/test/fixtures/flake8_pyi/PYI025.pyi +++ b/crates/ruff/resources/test/fixtures/flake8_pyi/PYI025.pyi @@ -11,6 +11,7 @@ def f(): from collections.abc import Container, Sized, Set, ValuesView # PYI025 def f(): + """Test: local symbol renaming.""" if True: from collections.abc import Set else: @@ -28,3 +29,22 @@ def f(): def Set(): pass print(Set) + +from collections.abc import Set + +def f(): + """Test: global symbol renaming.""" + global Set + + Set = 1 + print(Set) + +def f(): + """Test: nonlocal symbol renaming.""" + from collections.abc import Set + + def g(): + nonlocal Set + + Set = 1 + print(Set) diff --git a/crates/ruff/src/checkers/ast/mod.rs b/crates/ruff/src/checkers/ast/mod.rs index d9b1b9d3d4..4fe244633d 100644 --- a/crates/ruff/src/checkers/ast/mod.rs +++ b/crates/ruff/src/checkers/ast/mod.rs @@ -254,6 +254,12 @@ where let ranges: Vec = identifier::names(stmt, self.locator).collect(); if !self.semantic.scope_id.is_global() { for (name, range) in names.iter().zip(ranges.iter()) { + if let Some(binding_id) = self.semantic.global_scope().get(name) { + // Mark the binding in the global scope as "rebound" in the current scope. + self.semantic + .add_rebinding_scope(binding_id, self.semantic.scope_id); + } + // Add a binding to the current scope. let binding_id = self.semantic.push_binding( *range, @@ -264,6 +270,7 @@ where scope.add(name, binding_id); } } + if self.enabled(Rule::AmbiguousVariableName) { self.diagnostics .extend(names.iter().zip(ranges.iter()).filter_map(|(name, range)| { @@ -275,33 +282,27 @@ where let ranges: Vec = identifier::names(stmt, self.locator).collect(); if !self.semantic.scope_id.is_global() { for (name, range) in names.iter().zip(ranges.iter()) { - // Add a binding to the current scope. - let binding_id = self.semantic.push_binding( - *range, - BindingKind::Nonlocal, - BindingFlags::empty(), - ); - let scope = self.semantic.scope_mut(); - scope.add(name, binding_id); - } - - // Mark the binding in the defining scopes as used too. (Skip the global scope - // and the current scope, and, per standard resolution rules, any class scopes.) - for (name, range) in names.iter().zip(ranges.iter()) { - let binding_id = self - .semantic - .scopes - .ancestors(self.semantic.scope_id) - .skip(1) - .filter(|scope| !(scope.kind.is_module() || scope.kind.is_class())) - .find_map(|scope| scope.get(name.as_str())); - - if let Some(binding_id) = binding_id { + if let Some((scope_id, binding_id)) = self.semantic.nonlocal(name) { + // Mark the binding as "used". self.semantic.add_local_reference( binding_id, - stmt.range(), + *range, ExecutionContext::Runtime, ); + + // Mark the binding in the enclosing scope as "rebound" in the current + // scope. + self.semantic + .add_rebinding_scope(binding_id, self.semantic.scope_id); + + // Add a binding to the current scope. + let binding_id = self.semantic.push_binding( + *range, + BindingKind::Nonlocal(scope_id), + BindingFlags::empty(), + ); + let scope = self.semantic.scope_mut(); + scope.add(name, binding_id); } else { if self.enabled(Rule::NonlocalWithoutBinding) { self.diagnostics.push(Diagnostic::new( @@ -4283,7 +4284,7 @@ impl<'a> Checker<'a> { BindingKind::Builtin | BindingKind::Deletion | BindingKind::UnboundException => { // Avoid overriding builtins. } - kind @ (BindingKind::Global | BindingKind::Nonlocal) => { + kind @ (BindingKind::Global | BindingKind::Nonlocal(_)) => { // If the original binding was a global or nonlocal, then the new binding is // too. let references = shadowed.references.clone(); diff --git a/crates/ruff/src/renamer.rs b/crates/ruff/src/renamer.rs index 05ab127f2e..6c75d9176d 100644 --- a/crates/ruff/src/renamer.rs +++ b/crates/ruff/src/renamer.rs @@ -1,21 +1,41 @@ //! Code modification struct to support symbol renaming within a scope. use anyhow::{anyhow, Result}; +use itertools::Itertools; use ruff_diagnostics::Edit; -use ruff_python_semantic::{Binding, BindingKind, Scope, SemanticModel}; +use ruff_python_semantic::{Binding, BindingKind, Scope, ScopeId, SemanticModel}; pub(crate) struct Renamer; impl Renamer { - /// Rename a symbol (from `name` to `target`) within a [`Scope`]. + /// Rename a symbol (from `name` to `target`). /// /// ## How it works /// /// The renaming algorithm is as follows: /// - /// 1. Start with the first [`Binding`] in the scope, for the given name. For example, in the - /// following snippet, we'd start by examining the `x = 1` binding: + /// 1. Determine the scope in which the rename should occur. This is typically the scope passed + /// in by the caller. However, if a symbol is `nonlocal` or `global`, then the rename needs + /// to occur in the scope in which the symbol is declared. For example, attempting to rename + /// `x` in `foo` below should trigger a rename in the module scope: + /// + /// ```python + /// x = 1 + /// + /// def foo(): + /// global x + /// x = 2 + /// ``` + /// + /// 1. Determine whether the symbol is rebound in another scope. This is effectively the inverse + /// of the previous step: when attempting to rename `x` in the module scope, we need to + /// detect that `x` is rebound in the `foo` scope. Determine every scope in which the symbol + /// is rebound, and add it to the set of scopes in which the rename should occur. + /// + /// 1. Start with the first scope in the stack. Take the first [`Binding`] in the scope, for the + /// given name. For example, in the following snippet, we'd start by examining the `x = 1` + /// binding: /// /// ```python /// if True: @@ -28,7 +48,7 @@ impl Renamer { /// print(x) /// ``` /// - /// 2. Rename the [`Binding`]. In most cases, this is a simple replacement. For example, + /// 1. Rename the [`Binding`]. In most cases, this is a simple replacement. For example, /// renaming `x` to `y` above would require replacing `x = 1` with `y = 1`. After the /// first replacement in the snippet above, we'd have: /// @@ -47,7 +67,7 @@ impl Renamer { /// example, to rename `pandas` to `pd`, we may need to rewrite `import pandas` to /// `import pandas as pd`, rather than `import pd`. /// - /// 3. Rename every reference to the [`Binding`]. For example, renaming the references to the + /// 1. Rename every reference to the [`Binding`]. For example, renaming the references to the /// `x = 1` binding above would give us: /// /// ```python @@ -61,9 +81,9 @@ impl Renamer { /// print(x) /// ``` /// - /// 4. Rename every delayed annotation. (See [`SemanticModel::delayed_annotations`].) + /// 1. Rename every delayed annotation. (See [`SemanticModel::delayed_annotations`].) /// - /// 5. Repeat the above process for every [`Binding`] in the scope with the given name. + /// 1. Repeat the above process for every [`Binding`] in the scope with the given name. /// After renaming the `x = 2` binding, we'd have: /// /// ```python @@ -77,17 +97,7 @@ impl Renamer { /// print(y) /// ``` /// - /// ## Limitations - /// - /// `global` and `nonlocal` declarations are not yet supported. - /// - /// `global` and `nonlocal` declarations add some additional complexity. If we're renaming a - /// name that's declared as `global` or `nonlocal` in a child scope, we need to rename the name - /// in that scope too, repeating the above process. - /// - /// If we're renaming a name that's declared as `global` or `nonlocal` in the current scope, - /// then we need to identify the scope in which the name is declared, and perform the rename - /// in that scope instead (which will in turn trigger the above process on the current scope). + /// 1. Repeat the above process for every scope in the stack. pub(crate) fn rename( name: &str, target: &str, @@ -96,6 +106,82 @@ impl Renamer { ) -> Result<(Edit, Vec)> { let mut edits = vec![]; + // Determine whether the symbol is `nonlocal` or `global`. (A symbol can't be both; Python + // raises a `SyntaxError`.) If the symbol is `nonlocal` or `global`, we need to rename it in + // the scope in which it's declared, rather than the current scope. For example, given: + // + // ```python + // x = 1 + // + // def foo(): + // global x + // ``` + // + // When renaming `x` in `foo`, we detect that `x` is a global, and back out to the module + // scope. + let scope_id = scope.get_all(name).find_map(|binding_id| { + let binding = semantic.binding(binding_id); + match binding.kind { + BindingKind::Global => Some(ScopeId::global()), + BindingKind::Nonlocal(symbol_id) => Some(symbol_id), + _ => None, + } + }); + + let scope = scope_id.map_or(scope, |scope_id| &semantic.scopes[scope_id]); + edits.extend(Renamer::rename_in_scope(name, target, scope, semantic)); + + // Find any scopes in which the symbol is referenced as `nonlocal` or `global`. For example, + // given: + // + // ```python + // x = 1 + // + // def foo(): + // global x + // + // def bar(): + // global x + // ``` + // + // When renaming `x` in `foo`, we detect that `x` is a global, and back out to the module + // scope. But we need to rename `x` in `bar` too. + // + // Note that it's impossible for a symbol to be referenced as both `nonlocal` and `global` + // in the same program. If a symbol is referenced as `global`, then it must be defined in + // the module scope. If a symbol is referenced as `nonlocal`, then it _can't_ be defined in + // the module scope (because `nonlocal` can only be used in a nested scope). + for scope_id in scope + .get_all(name) + .filter_map(|binding_id| semantic.rebinding_scopes(binding_id)) + .flatten() + .dedup() + .copied() + { + let scope = &semantic.scopes[scope_id]; + edits.extend(Renamer::rename_in_scope(name, target, scope, semantic)); + } + + // Deduplicate any edits. + edits.sort(); + edits.dedup(); + + let edit = edits + .pop() + .ok_or(anyhow!("Unable to rename any references to `{name}`"))?; + + Ok((edit, edits)) + } + + /// Rename a symbol in a single [`Scope`]. + fn rename_in_scope( + name: &str, + target: &str, + scope: &Scope, + semantic: &SemanticModel, + ) -> Vec { + let mut edits = vec![]; + // Iterate over every binding to the name in the scope. for binding_id in scope.get_all(name) { let binding = semantic.binding(binding_id); @@ -125,10 +211,7 @@ impl Renamer { edits.sort(); edits.dedup(); - let edit = edits - .pop() - .ok_or(anyhow!("Unable to rename any references to `{name}`"))?; - Ok((edit, edits)) + edits } /// Rename a [`Binding`] reference. @@ -164,7 +247,7 @@ impl Renamer { | BindingKind::Assignment | BindingKind::LoopVar | BindingKind::Global - | BindingKind::Nonlocal + | BindingKind::Nonlocal(_) | BindingKind::ClassDefinition | BindingKind::FunctionDefinition | BindingKind::Deletion diff --git a/crates/ruff/src/rules/flake8_pyi/snapshots/ruff__rules__flake8_pyi__tests__PYI025_PYI025.pyi.snap b/crates/ruff/src/rules/flake8_pyi/snapshots/ruff__rules__flake8_pyi__tests__PYI025_PYI025.pyi.snap index b3659c1544..6da7686105 100644 --- a/crates/ruff/src/rules/flake8_pyi/snapshots/ruff__rules__flake8_pyi__tests__PYI025_PYI025.pyi.snap +++ b/crates/ruff/src/rules/flake8_pyi/snapshots/ruff__rules__flake8_pyi__tests__PYI025_PYI025.pyi.snap @@ -39,43 +39,111 @@ PYI025.pyi:11:51: PYI025 [*] Use `from collections.abc import Set as AbstractSet 11 |+ from collections.abc import Container, Sized, Set as AbstractSet, ValuesView # PYI025 12 12 | 13 13 | def f(): -14 14 | if True: +14 14 | """Test: local symbol renaming.""" -PYI025.pyi:15:37: PYI025 [*] Use `from collections.abc import Set as AbstractSet` to avoid confusion with the `set` builtin +PYI025.pyi:16:37: PYI025 [*] Use `from collections.abc import Set as AbstractSet` to avoid confusion with the `set` builtin | -13 | def f(): -14 | if True: -15 | from collections.abc import Set +14 | """Test: local symbol renaming.""" +15 | if True: +16 | from collections.abc import Set | ^^^ PYI025 -16 | else: -17 | Set = 1 +17 | else: +18 | Set = 1 | = help: Alias `Set` to `AbstractSet` ℹ Suggested fix -12 12 | 13 13 | def f(): -14 14 | if True: -15 |- from collections.abc import Set - 15 |+ from collections.abc import Set as AbstractSet -16 16 | else: -17 |- Set = 1 - 17 |+ AbstractSet = 1 -18 18 | -19 |- x: Set = set() - 19 |+ x: AbstractSet = set() -20 20 | -21 |- x: Set - 21 |+ x: AbstractSet -22 22 | -23 |- del Set - 23 |+ del AbstractSet -24 24 | -25 25 | def f(): -26 |- print(Set) - 26 |+ print(AbstractSet) -27 27 | -28 28 | def Set(): -29 29 | pass +14 14 | """Test: local symbol renaming.""" +15 15 | if True: +16 |- from collections.abc import Set + 16 |+ from collections.abc import Set as AbstractSet +17 17 | else: +18 |- Set = 1 + 18 |+ AbstractSet = 1 +19 19 | +20 20 | x: Set = set() +21 21 | +22 22 | x: Set +23 23 | +24 |- del Set + 24 |+ del AbstractSet +25 25 | +26 26 | def f(): +27 |- print(Set) + 27 |+ print(AbstractSet) +28 28 | +29 29 | def Set(): +30 30 | pass + +PYI025.pyi:33:29: PYI025 [*] Use `from collections.abc import Set as AbstractSet` to avoid confusion with the `set` builtin + | +31 | print(Set) +32 | +33 | from collections.abc import Set + | ^^^ PYI025 +34 | +35 | def f(): + | + = help: Alias `Set` to `AbstractSet` + +ℹ Suggested fix +17 17 | else: +18 18 | Set = 1 +19 19 | +20 |- x: Set = set() + 20 |+ x: AbstractSet = set() +21 21 | +22 |- x: Set + 22 |+ x: AbstractSet +23 23 | +24 24 | del Set +25 25 | +-------------------------------------------------------------------------------- +30 30 | pass +31 31 | print(Set) +32 32 | +33 |-from collections.abc import Set + 33 |+from collections.abc import Set as AbstractSet +34 34 | +35 35 | def f(): +36 36 | """Test: global symbol renaming.""" +37 |- global Set + 37 |+ global AbstractSet +38 38 | +39 |- Set = 1 +40 |- print(Set) + 39 |+ AbstractSet = 1 + 40 |+ print(AbstractSet) +41 41 | +42 42 | def f(): +43 43 | """Test: nonlocal symbol renaming.""" + +PYI025.pyi:44:33: PYI025 [*] Use `from collections.abc import Set as AbstractSet` to avoid confusion with the `set` builtin + | +42 | def f(): +43 | """Test: nonlocal symbol renaming.""" +44 | from collections.abc import Set + | ^^^ PYI025 +45 | +46 | def g(): + | + = help: Alias `Set` to `AbstractSet` + +ℹ Suggested fix +41 41 | +42 42 | def f(): +43 43 | """Test: nonlocal symbol renaming.""" +44 |- from collections.abc import Set + 44 |+ from collections.abc import Set as AbstractSet +45 45 | +46 46 | def g(): +47 |- nonlocal Set + 47 |+ nonlocal AbstractSet +48 48 | +49 |- Set = 1 +50 |- print(Set) + 49 |+ AbstractSet = 1 + 50 |+ print(AbstractSet) diff --git a/crates/ruff/src/rules/pandas_vet/helpers.rs b/crates/ruff/src/rules/pandas_vet/helpers.rs index ec4400a978..1b92dd9867 100644 --- a/crates/ruff/src/rules/pandas_vet/helpers.rs +++ b/crates/ruff/src/rules/pandas_vet/helpers.rs @@ -38,7 +38,7 @@ pub(super) fn test_expression(expr: &Expr, semantic: &SemanticModel) -> Resoluti | BindingKind::UnpackedAssignment | BindingKind::LoopVar | BindingKind::Global - | BindingKind::Nonlocal => Resolution::RelevantLocal, + | BindingKind::Nonlocal(_) => Resolution::RelevantLocal, BindingKind::Importation(Importation { qualified_name: module, }) if module == "pandas" => Resolution::PandasModule, diff --git a/crates/ruff_python_semantic/src/binding.rs b/crates/ruff_python_semantic/src/binding.rs index 819782a3af..7284de17f6 100644 --- a/crates/ruff_python_semantic/src/binding.rs +++ b/crates/ruff_python_semantic/src/binding.rs @@ -10,6 +10,7 @@ use crate::context::ExecutionContext; use crate::model::SemanticModel; use crate::node::NodeId; use crate::reference::ReferenceId; +use crate::ScopeId; #[derive(Debug, Clone)] pub struct Binding<'a> { @@ -336,7 +337,7 @@ pub enum BindingKind<'a> { /// def foo(): /// nonlocal x /// ``` - Nonlocal, + Nonlocal(ScopeId), /// A binding for a builtin, like `print` or `bool`. Builtin, diff --git a/crates/ruff_python_semantic/src/model.rs b/crates/ruff_python_semantic/src/model.rs index 944e4f4f57..bae2be65ca 100644 --- a/crates/ruff_python_semantic/src/model.rs +++ b/crates/ruff_python_semantic/src/model.rs @@ -73,7 +73,7 @@ pub struct SemanticModel<'a> { /// Map from binding index to indexes of bindings that annotate it (in the same scope). /// - /// For example: + /// For example, given: /// ```python /// x = 1 /// x: int @@ -94,6 +94,21 @@ pub struct SemanticModel<'a> { /// first binding in a scope; any annotations that follow are treated as "delayed" annotations. delayed_annotations: HashMap, BuildNoHashHasher>, + /// Map from binding ID to the IDs of all scopes in which it is declared a `global` or + /// `nonlocal`. + /// + /// For example, given: + /// ```python + /// x = 1 + /// + /// def f(): + /// global x + /// ``` + /// + /// In this case, the binding created by `x = 1` is rebound within the scope created by `f` + /// by way of the `global x` statement. + rebinding_scopes: HashMap, BuildNoHashHasher>, + /// Body iteration; used to peek at siblings. pub body: &'a [Stmt], pub body_index: usize, @@ -123,6 +138,7 @@ impl<'a> SemanticModel<'a> { globals: GlobalsArena::default(), shadowed_bindings: IntMap::default(), delayed_annotations: IntMap::default(), + rebinding_scopes: IntMap::default(), body: &[], body_index: 0, flags: SemanticModelFlags::new(path), @@ -699,6 +715,26 @@ impl<'a> SemanticModel<'a> { self.globals[global_id].get(name).copied() } + /// Given a `name` that has been declared `nonlocal`, return the [`ScopeId`] and [`BindingId`] + /// to which it refers. + /// + /// Unlike `global` declarations, for which the scope is unambiguous, Python requires that + /// `nonlocal` declarations refer to the closest enclosing scope that contains a binding for + /// the given name. + pub fn nonlocal(&self, name: &str) -> Option<(ScopeId, BindingId)> { + self.scopes + .ancestor_ids(self.scope_id) + .skip(1) + .find_map(|scope_id| { + let scope = &self.scopes[scope_id]; + if scope.kind.is_module() || scope.kind.is_class() { + None + } else { + scope.get(name).map(|binding_id| (scope_id, binding_id)) + } + }) + } + /// Return `true` if the given [`ScopeId`] matches that of the current scope. pub fn is_current_scope(&self, scope_id: ScopeId) -> bool { self.scope_id == scope_id @@ -766,6 +802,21 @@ impl<'a> SemanticModel<'a> { self.delayed_annotations.get(&binding_id).map(Vec::as_slice) } + /// Mark the given [`BindingId`] as rebound in the given [`ScopeId`] (i.e., declared as + /// `global` or `nonlocal`). + pub fn add_rebinding_scope(&mut self, binding_id: BindingId, scope_id: ScopeId) { + self.rebinding_scopes + .entry(binding_id) + .or_insert_with(Vec::new) + .push(scope_id); + } + + /// Return the list of [`ScopeId`]s in which the given [`BindingId`] is rebound (i.e., declared + /// as `global` or `nonlocal`). + pub fn rebinding_scopes(&self, binding_id: BindingId) -> Option<&[ScopeId]> { + self.rebinding_scopes.get(&binding_id).map(Vec::as_slice) + } + /// Return the [`ExecutionContext`] of the current scope. pub const fn execution_context(&self) -> ExecutionContext { if self.in_type_checking_block()