From 5a9d71a5f100df438420b23f3fcbff1e74d596dd Mon Sep 17 00:00:00 2001 From: Douglas Creager Date: Fri, 24 Jan 2025 16:07:31 -0500 Subject: [PATCH] Speed symbol state merging back up (#15731) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a follow-up to #15702 that hopefully claws back the 1% performance regression. Assuming it works, the trick is to iterate over the constraints vectors via mut reference (aka a single pointer), so that we're not copying `BitSet`s into and out of the zip tuples as we iterate. We use `std::mem::take` as a poor-man's move constructor only at the very end, when we're ready to emplace it into the result. (C++ idioms intended! :smile:) With local testing via hyperfine, I'm seeing this be 1-3% faster than `main` most of the time — though a small number of runs (1 in 10, maybe?) are a wash or have `main` faster. Codspeed reports a 2% gain. --- .../semantic_index/use_def/symbol_state.rs | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/crates/red_knot_python_semantic/src/semantic_index/use_def/symbol_state.rs b/crates/red_knot_python_semantic/src/semantic_index/use_def/symbol_state.rs index 6a898e3ce1..39ccd193cf 100644 --- a/crates/red_knot_python_semantic/src/semantic_index/use_def/symbol_state.rs +++ b/crates/red_knot_python_semantic/src/semantic_index/use_def/symbol_state.rs @@ -287,18 +287,27 @@ impl SymbolBindings { } } - fn merge(&mut self, b: Self, visibility_constraints: &mut VisibilityConstraints) { - let a = std::mem::take(self); + fn merge(&mut self, mut b: Self, visibility_constraints: &mut VisibilityConstraints) { + let mut a = std::mem::take(self); self.live_bindings = a.live_bindings.clone(); self.live_bindings.union(&b.live_bindings); // Invariant: These zips are well-formed since we maintain an invariant that all of our // fields are sets/vecs with the same length. + // + // Performance: We iterate over the `constraints` smallvecs via mut reference, because the + // individual elements are `BitSet`s (currently 24 bytes in size), and we don't want to + // move them by value multiple times during iteration. By iterating by reference, we only + // have to copy single pointers around. In the loop below, the `std::mem::take` calls + // specify precisely where we want to move them into the merged `constraints` smallvec. + // + // We don't need a similar optimization for `visibility_constraints`, since those elements + // are 32-bit IndexVec IDs, and so are already cheap to move/copy. let a = (a.live_bindings.iter()) - .zip(a.constraints) + .zip(a.constraints.iter_mut()) .zip(a.visibility_constraints); let b = (b.live_bindings.iter()) - .zip(b.constraints) + .zip(b.constraints.iter_mut()) .zip(b.visibility_constraints); // Invariant: merge_join_by consumes the two iterators in sorted order, which ensures that @@ -315,9 +324,9 @@ impl SymbolBindings { // If the same definition is visible through both paths, any constraint // that applies on only one path is irrelevant to the resulting type from // unioning the two paths, so we intersect the constraints. - let mut constraints = a_constraints; - constraints.intersect(&b_constraints); - self.constraints.push(constraints); + let constraints = a_constraints; + constraints.intersect(b_constraints); + self.constraints.push(std::mem::take(constraints)); // For visibility constraints, we merge them using a ternary OR operation: let vis_constraint = visibility_constraints @@ -327,7 +336,7 @@ impl SymbolBindings { EitherOrBoth::Left(((_, constraints), vis_constraint)) | EitherOrBoth::Right(((_, constraints), vis_constraint)) => { - self.constraints.push(constraints); + self.constraints.push(std::mem::take(constraints)); self.visibility_constraints.push(vis_constraint); } }