diff --git a/Cargo.lock b/Cargo.lock index 3ee5faec40..e6777f13ec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -261,6 +261,18 @@ version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -1121,6 +1133,12 @@ dependencies = [ "libc", ] +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + [[package]] name = "generic-array" version = "0.14.7" @@ -2548,6 +2566,12 @@ version = "5.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + [[package]] name = "rand" version = "0.8.5" @@ -3768,6 +3792,12 @@ dependencies = [ "syn", ] +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + [[package]] name = "tempfile" version = "3.20.0" @@ -4239,6 +4269,7 @@ version = "0.0.0" dependencies = [ "anyhow", "bitflags 2.9.1", + "bitvec", "camino", "colored 3.0.0", "compact_str", @@ -5092,6 +5123,15 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + [[package]] name = "yansi" version = "1.0.1" diff --git a/Cargo.toml b/Cargo.toml index d602e79811..da9383c64f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -57,6 +57,9 @@ assert_fs = { version = "1.1.0" } argfile = { version = "0.2.0" } bincode = { version = "2.0.0" } bitflags = { version = "2.5.0" } +bitvec = { version = "1.0.1", default-features = false, features = [ + "alloc", +] } bstr = { version = "1.9.1" } cachedir = { version = "0.3.1" } camino = { version = "1.1.7" } diff --git a/crates/ty_python_semantic/Cargo.toml b/crates/ty_python_semantic/Cargo.toml index 3550ce0e9b..370b0a4154 100644 --- a/crates/ty_python_semantic/Cargo.toml +++ b/crates/ty_python_semantic/Cargo.toml @@ -26,6 +26,7 @@ ty_static = { workspace = true } anyhow = { workspace = true } bitflags = { workspace = true } +bitvec = { workspace = true } camino = { workspace = true } colored = { workspace = true } compact_str = { workspace = true } diff --git a/crates/ty_python_semantic/src/lib.rs b/crates/ty_python_semantic/src/lib.rs index 11a842cb26..57cf070dab 100644 --- a/crates/ty_python_semantic/src/lib.rs +++ b/crates/ty_python_semantic/src/lib.rs @@ -34,6 +34,7 @@ mod node_key; pub(crate) mod place; mod program; mod python_platform; +mod rank; pub mod semantic_index; mod semantic_model; pub(crate) mod site_packages; diff --git a/crates/ty_python_semantic/src/rank.rs b/crates/ty_python_semantic/src/rank.rs new file mode 100644 index 0000000000..2d06d6f638 --- /dev/null +++ b/crates/ty_python_semantic/src/rank.rs @@ -0,0 +1,83 @@ +//! A boxed bit slice that supports a constant-time `rank` operation. + +use bitvec::prelude::{BitBox, Msb0}; +use get_size2::GetSize; + +/// A boxed bit slice that supports a constant-time `rank` operation. +/// +/// This can be used to "shrink" a large vector, where you only need to keep certain elements, and +/// you want to continue to use the index in the large vector to identify each element. +/// +/// First you create a new smaller vector, keeping only the elements of the large vector that you +/// care about. Now you need a way to translate an index into the large vector (which no longer +/// exists) into the corresponding index into the smaller vector. To do that, you create a bit +/// slice, containing a bit for every element of the original large vector. Each bit in the bit +/// slice indicates whether that element of the large vector was kept in the smaller vector. And +/// the `rank` of the bit gives us the index of the element in the smaller vector. +/// +/// However, the naive implementation of `rank` is O(n) in the size of the bit slice. To address +/// that, we use a standard trick: we divide the bit slice into 64-bit chunks, and when +/// constructing the bit slice, precalculate the rank of the first bit in each chunk. Then, to +/// calculate the rank of an arbitrary bit, we first grab the precalculated rank of the chunk that +/// bit belongs to, and add the rank of the bit within its (fixed-sized) chunk. +/// +/// This trick adds O(1.5) bits of overhead per large vector element on 64-bit platforms, and O(2) +/// bits of overhead on 32-bit platforms. +#[derive(Clone, Debug, Eq, PartialEq, GetSize)] +pub(crate) struct RankBitBox { + #[get_size(size_fn = bit_box_size)] + bits: BitBox, + chunk_ranks: Box<[u32]>, +} + +fn bit_box_size(bits: &BitBox) -> usize { + bits.as_raw_slice().get_heap_size() +} + +// bitvec does not support `u64` as a Store type on 32-bit platforms +#[cfg(target_pointer_width = "64")] +type Chunk = u64; +#[cfg(not(target_pointer_width = "64"))] +type Chunk = u32; + +const CHUNK_SIZE: usize = Chunk::BITS as usize; + +impl RankBitBox { + pub(crate) fn from_bits(iter: impl Iterator) -> Self { + let bits: BitBox = iter.collect(); + let chunk_ranks = bits + .as_raw_slice() + .iter() + .scan(0u32, |rank, chunk| { + let result = *rank; + *rank += chunk.count_ones(); + Some(result) + }) + .collect(); + Self { bits, chunk_ranks } + } + + #[inline] + pub(crate) fn get_bit(&self, index: usize) -> Option { + self.bits.get(index).map(|bit| *bit) + } + + /// Returns the number of bits _before_ (and not including) the given index that are set. + #[inline] + pub(crate) fn rank(&self, index: usize) -> u32 { + let chunk_index = index / CHUNK_SIZE; + let index_within_chunk = index % CHUNK_SIZE; + let chunk_rank = self.chunk_ranks[chunk_index]; + if index_within_chunk == 0 { + return chunk_rank; + } + + // To calculate the rank within the bit's chunk, we zero out the requested bit and every + // bit to the right, then count the number of 1s remaining (i.e., to the left of the + // requested bit). + let chunk = self.bits.as_raw_slice()[chunk_index]; + let chunk_mask = Chunk::MAX << (CHUNK_SIZE - index_within_chunk); + let rank_within_chunk = (chunk & chunk_mask).count_ones(); + chunk_rank + rank_within_chunk + } +} diff --git a/crates/ty_python_semantic/src/semantic_index/builder.rs b/crates/ty_python_semantic/src/semantic_index/builder.rs index b612d0d91a..7d37371ff6 100644 --- a/crates/ty_python_semantic/src/semantic_index/builder.rs +++ b/crates/ty_python_semantic/src/semantic_index/builder.rs @@ -1021,6 +1021,14 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> { assert_eq!(&self.current_assignments, &[]); + for scope in &self.scopes { + if let Some(parent) = scope.parent() { + self.use_def_maps[parent] + .reachability_constraints + .mark_used(scope.reachability()); + } + } + let mut place_tables: IndexVec<_, _> = self .place_tables .into_iter() diff --git a/crates/ty_python_semantic/src/semantic_index/reachability_constraints.rs b/crates/ty_python_semantic/src/semantic_index/reachability_constraints.rs index 7196c730bf..3f5da746c5 100644 --- a/crates/ty_python_semantic/src/semantic_index/reachability_constraints.rs +++ b/crates/ty_python_semantic/src/semantic_index/reachability_constraints.rs @@ -201,6 +201,7 @@ use rustc_hash::FxHashMap; use crate::Db; use crate::dunder_all::dunder_all_names; use crate::place::{RequiresExplicitReExport, imported_symbol}; +use crate::rank::RankBitBox; use crate::semantic_index::expression::Expression; use crate::semantic_index::place_table; use crate::semantic_index::predicate::{ @@ -283,6 +284,10 @@ impl ScopedReachabilityConstraintId { fn is_terminal(self) -> bool { self.0 >= SMALLEST_TERMINAL.0 } + + fn as_u32(self) -> u32 { + self.0 + } } impl Idx for ScopedReachabilityConstraintId { @@ -309,12 +314,18 @@ const SMALLEST_TERMINAL: ScopedReachabilityConstraintId = ALWAYS_FALSE; /// A collection of reachability constraints for a given scope. #[derive(Debug, PartialEq, Eq, salsa::Update, get_size2::GetSize)] pub(crate) struct ReachabilityConstraints { - interiors: IndexVec, + /// The interior TDD nodes that were marked as used when being built. + used_interiors: Box<[InteriorNode]>, + /// A bit vector indicating which interior TDD nodes were marked as used. This is indexed by + /// the node's [`ScopedReachabilityConstraintId`]. The rank of the corresponding bit gives the + /// index of that node in the `used_interiors` vector. + used_indices: RankBitBox, } #[derive(Debug, Default, PartialEq, Eq)] pub(crate) struct ReachabilityConstraintsBuilder { interiors: IndexVec, + interior_used: IndexVec, interior_cache: FxHashMap, not_cache: FxHashMap, and_cache: FxHashMap< @@ -334,11 +345,28 @@ pub(crate) struct ReachabilityConstraintsBuilder { } impl ReachabilityConstraintsBuilder { - pub(crate) fn build(mut self) -> ReachabilityConstraints { - self.interiors.shrink_to_fit(); - + pub(crate) fn build(self) -> ReachabilityConstraints { + let used_indices = RankBitBox::from_bits(self.interior_used.iter().copied()); + let used_interiors = (self.interiors.into_iter()) + .zip(self.interior_used) + .filter_map(|(interior, used)| used.then_some(interior)) + .collect(); ReachabilityConstraints { - interiors: self.interiors, + used_interiors, + used_indices, + } + } + + /// Marks that a particular TDD node is used. This lets us throw away interior nodes that were + /// only calculated for intermediate values, and which don't need to be included in the final + /// built result. + pub(crate) fn mark_used(&mut self, node: ScopedReachabilityConstraintId) { + if !node.is_terminal() && !self.interior_used[node] { + self.interior_used[node] = true; + let node = self.interiors[node]; + self.mark_used(node.if_true); + self.mark_used(node.if_ambiguous); + self.mark_used(node.if_false); } } @@ -370,10 +398,10 @@ impl ReachabilityConstraintsBuilder { return node.if_true; } - *self - .interior_cache - .entry(node) - .or_insert_with(|| self.interiors.push(node)) + *self.interior_cache.entry(node).or_insert_with(|| { + self.interior_used.push(false); + self.interiors.push(node) + }) } /// Adds a new reachability constraint that checks a single [`Predicate`]. @@ -581,7 +609,21 @@ impl ReachabilityConstraints { ALWAYS_TRUE => return Truthiness::AlwaysTrue, AMBIGUOUS => return Truthiness::Ambiguous, ALWAYS_FALSE => return Truthiness::AlwaysFalse, - _ => self.interiors[id], + _ => { + // `id` gives us the index of this node in the IndexVec that we used when + // constructing this BDD. When finalizing the builder, we threw away any + // interior nodes that weren't marked as used. The `used_indices` bit vector + // lets us verify that this node was marked as used, and the rank of that bit + // in the bit vector tells us where this node lives in the "condensed" + // `used_interiors` vector. + let raw_index = id.as_u32() as usize; + debug_assert!( + self.used_indices.get_bit(raw_index).unwrap_or(false), + "all used reachability constraints should have been marked as used", + ); + let index = self.used_indices.rank(raw_index) as usize; + self.used_interiors[index] + } }; let predicate = &predicates[node.atom]; match Self::analyze_single(db, predicate) { diff --git a/crates/ty_python_semantic/src/semantic_index/use_def.rs b/crates/ty_python_semantic/src/semantic_index/use_def.rs index 33dc7d8989..61b09f5070 100644 --- a/crates/ty_python_semantic/src/semantic_index/use_def.rs +++ b/crates/ty_python_semantic/src/semantic_index/use_def.rs @@ -1118,7 +1118,41 @@ impl<'db> UseDefMapBuilder<'db> { .add_or_constraint(self.reachability, snapshot.reachability); } + fn mark_reachability_constraints(&mut self) { + // We only walk the fields that are copied through to the UseDefMap when we finish building + // it. + for bindings in &mut self.bindings_by_use { + bindings.finish(&mut self.reachability_constraints); + } + for constraint in self.node_reachability.values() { + self.reachability_constraints.mark_used(*constraint); + } + for place_state in &mut self.place_states { + place_state.finish(&mut self.reachability_constraints); + } + for reachable_definition in &mut self.reachable_definitions { + reachable_definition + .bindings + .finish(&mut self.reachability_constraints); + reachable_definition + .declarations + .finish(&mut self.reachability_constraints); + } + for declarations in self.declarations_by_binding.values_mut() { + declarations.finish(&mut self.reachability_constraints); + } + for bindings in self.bindings_by_definition.values_mut() { + bindings.finish(&mut self.reachability_constraints); + } + for eager_snapshot in &mut self.eager_snapshots { + eager_snapshot.finish(&mut self.reachability_constraints); + } + self.reachability_constraints.mark_used(self.reachability); + } + pub(super) fn finish(mut self) -> UseDefMap<'db> { + self.mark_reachability_constraints(); + self.all_definitions.shrink_to_fit(); self.place_states.shrink_to_fit(); self.reachable_definitions.shrink_to_fit(); diff --git a/crates/ty_python_semantic/src/semantic_index/use_def/place_state.rs b/crates/ty_python_semantic/src/semantic_index/use_def/place_state.rs index 1f4b2c04e3..c219bec1dd 100644 --- a/crates/ty_python_semantic/src/semantic_index/use_def/place_state.rs +++ b/crates/ty_python_semantic/src/semantic_index/use_def/place_state.rs @@ -172,6 +172,13 @@ impl Declarations { } } } + + pub(super) fn finish(&mut self, reachability_constraints: &mut ReachabilityConstraintsBuilder) { + self.live_declarations.shrink_to_fit(); + for declaration in &self.live_declarations { + reachability_constraints.mark_used(declaration.reachability_constraint); + } + } } /// A snapshot of a place state that can be used to resolve a reference in a nested eager scope. @@ -185,6 +192,17 @@ pub(super) enum EagerSnapshot { Bindings(Bindings), } +impl EagerSnapshot { + pub(super) fn finish(&mut self, reachability_constraints: &mut ReachabilityConstraintsBuilder) { + match self { + EagerSnapshot::Constraint(_) => {} + EagerSnapshot::Bindings(bindings) => { + bindings.finish(reachability_constraints); + } + } + } +} + /// Live bindings for a single place at some point in control flow. Each live binding comes /// with a set of narrowing constraints and a reachability constraint. #[derive(Clone, Debug, Default, PartialEq, Eq, salsa::Update, get_size2::GetSize)] @@ -203,6 +221,13 @@ impl Bindings { self.unbound_narrowing_constraint .unwrap_or(self.live_bindings[0].narrowing_constraint) } + + pub(super) fn finish(&mut self, reachability_constraints: &mut ReachabilityConstraintsBuilder) { + self.live_bindings.shrink_to_fit(); + for binding in &self.live_bindings { + reachability_constraints.mark_used(binding.reachability_constraint); + } + } } /// One of the live bindings for a single place at some point in control flow. @@ -422,6 +447,11 @@ impl PlaceState { pub(super) fn declarations(&self) -> &Declarations { &self.declarations } + + pub(super) fn finish(&mut self, reachability_constraints: &mut ReachabilityConstraintsBuilder) { + self.declarations.finish(reachability_constraints); + self.bindings.finish(reachability_constraints); + } } #[cfg(test)]