use bitflags::bitflags; use hashbrown::hash_table::Entry; use ruff_index::{IndexVec, newtype_index}; use ruff_python_ast::{self as ast, name::Name}; use ruff_text_size::{TextLen as _, TextRange, TextSize}; use rustc_hash::FxHasher; use smallvec::SmallVec; use std::hash::{Hash, Hasher as _}; use std::ops::{Deref, DerefMut}; /// A member access, e.g. `x.y` or `x[1]` or `x["foo"]`. #[derive(Clone, Debug, PartialEq, Eq, get_size2::GetSize)] pub(crate) struct Member { expression: MemberExpr, flags: MemberFlags, } impl Member { pub(crate) fn new(expression: MemberExpr) -> Self { Self { expression, flags: MemberFlags::empty(), } } /// Returns the left most part of the member expression, e.g. `x` in `x.y.z`. /// /// This is the symbol on which the member access is performed. pub(crate) fn symbol_name(&self) -> &str { self.expression.symbol_name() } pub(crate) fn expression(&self) -> &MemberExpr { &self.expression } /// Is the place given a value in its containing scope? pub(crate) const fn is_bound(&self) -> bool { self.flags.contains(MemberFlags::IS_BOUND) } /// Is the place declared in its containing scope? pub(crate) fn is_declared(&self) -> bool { self.flags.contains(MemberFlags::IS_DECLARED) } pub(super) fn mark_bound(&mut self) { self.insert_flags(MemberFlags::IS_BOUND); } pub(super) fn mark_declared(&mut self) { self.insert_flags(MemberFlags::IS_DECLARED); } pub(super) fn mark_instance_attribute(&mut self) { self.flags.insert(MemberFlags::IS_INSTANCE_ATTRIBUTE); } /// Is the place an instance attribute? pub(crate) fn is_instance_attribute(&self) -> bool { let is_instance_attribute = self.flags.contains(MemberFlags::IS_INSTANCE_ATTRIBUTE); if is_instance_attribute { debug_assert!(self.is_instance_attribute_candidate()); } is_instance_attribute } fn insert_flags(&mut self, flags: MemberFlags) { self.flags.insert(flags); } /// If the place expression has the form `.` /// (meaning it *may* be an instance attribute), /// return `Some()`. Else, return `None`. /// /// This method is internal to the semantic-index submodule. /// It *only* checks that the AST structure of the `Place` is /// correct. It does not check whether the `Place` actually occurred in /// a method context, or whether the `` actually refers to the first /// parameter of the method (i.e. `self`). To answer those questions, /// use [`Self::as_instance_attribute`]. pub(super) fn as_instance_attribute_candidate(&self) -> Option<&str> { let mut segments = self.expression().segments(); let first_segment = segments.next()?; if first_segment.kind == SegmentKind::Attribute && segments.next().is_none() { Some(first_segment.text) } else { None } } /// Return `true` if the place expression has the form `.`, /// indicating that it *may* be an instance attribute if we are in a method context. /// /// This method is internal to the semantic-index submodule. /// It *only* checks that the AST structure of the `Place` is /// correct. It does not check whether the `Place` actually occurred in /// a method context, or whether the `` actually refers to the first /// parameter of the method (i.e. `self`). To answer those questions, /// use [`Self::is_instance_attribute`]. pub(super) fn is_instance_attribute_candidate(&self) -> bool { self.as_instance_attribute_candidate().is_some() } /// Does the place expression have the form `self.{name}` (`self` is the first parameter of the method)? pub(super) fn is_instance_attribute_named(&self, name: &str) -> bool { self.as_instance_attribute() == Some(name) } /// Return `Some()` if the place expression is an instance attribute. pub(crate) fn as_instance_attribute(&self) -> Option<&str> { if self.is_instance_attribute() { debug_assert!(self.as_instance_attribute_candidate().is_some()); self.as_instance_attribute_candidate() } else { None } } } impl std::fmt::Display for Member { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(&self.expression, f) } } bitflags! { /// Flags that can be queried to obtain information about a member in a given scope. /// /// See the doc-comment at the top of [`super::use_def`] for explanations of what it /// means for a member to be *bound* as opposed to *declared*. #[derive(Copy, Clone, Debug, Eq, PartialEq)] struct MemberFlags: u8 { const IS_BOUND = 1 << 0; const IS_DECLARED = 1 << 1; const IS_INSTANCE_ATTRIBUTE = 1 << 2; } } impl get_size2::GetSize for MemberFlags {} /// An expression accessing a member on a symbol named `symbol_name`, e.g. `x.y.z`. /// /// The parts after the symbol name are called segments, and they can be either: /// * An attribute access, e.g. `.y` in `x.y` /// * An integer-based subscript, e.g. `[1]` in `x[1]` /// * A string-based subscript, e.g. `["foo"]` in `x["foo"]` /// /// Uses a compact representation where the entire expression is stored as a single path. /// For example, `foo.bar[0]["baz"]` is stored as: /// - path: `foobar0baz` /// - segments: stores where each segment starts and its kind (attribute, int subscript, string subscript) /// /// The symbol name can be extracted from the path by taking the text up to the first segment's start offset. #[derive(Clone, Debug, PartialEq, Eq, get_size2::GetSize)] pub(crate) struct MemberExpr { /// The entire path as a single Name path: Name, /// Metadata for each segment (in forward order) segments: Segments, } impl MemberExpr { pub(super) fn try_from_expr(expression: ast::ExprRef<'_>) -> Option { fn visit(expr: ast::ExprRef) -> Option<(Name, SmallVec<[SegmentInfo; 8]>)> { use std::fmt::Write as _; match expr { ast::ExprRef::Name(name) => { Some((name.id.clone(), smallvec::SmallVec::new_const())) } ast::ExprRef::Attribute(attribute) => { let (mut path, mut segments) = visit(ast::ExprRef::from(&attribute.value))?; let start_offset = path.text_len(); let _ = write!(path, "{}", attribute.attr.id); segments.push(SegmentInfo::new(SegmentKind::Attribute, start_offset)); Some((path, segments)) } ast::ExprRef::Subscript(subscript) => { let (mut path, mut segments) = visit((&subscript.value).into())?; let start_offset = path.text_len(); match &*subscript.slice { ast::Expr::NumberLiteral(ast::ExprNumberLiteral { value: ast::Number::Int(index), .. }) => { let _ = write!(path, "{index}"); segments .push(SegmentInfo::new(SegmentKind::IntSubscript, start_offset)); } ast::Expr::StringLiteral(string) => { let _ = write!(path, "{}", string.value); segments .push(SegmentInfo::new(SegmentKind::StringSubscript, start_offset)); } _ => { return None; } } Some((path, segments)) } _ => None, } } let (path, segments) = visit(expression)?; if segments.is_empty() { None } else { Some(Self { path, segments: Segments::from_vec(segments), }) } } fn segment_infos(&self) -> impl Iterator + '_ { self.segments.iter() } fn segments(&self) -> impl Iterator> + '_ { SegmentsIterator::new(self.path.as_str(), self.segment_infos()) } fn shrink_to_fit(&mut self) { self.path.shrink_to_fit(); } /// Returns the left most part of the member expression, e.g. `x` in `x.y.z`. /// /// This is the symbol on which the member access is performed. pub(crate) fn symbol_name(&self) -> &str { self.as_ref().symbol_name() } pub(super) fn num_segments(&self) -> usize { self.segments.len() } pub(crate) fn as_ref(&self) -> MemberExprRef<'_> { MemberExprRef { path: self.path.as_str(), segments: SegmentsRef::from(&self.segments), } } } impl std::fmt::Display for MemberExpr { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_str(self.symbol_name())?; for segment in self.segments() { match segment.kind { SegmentKind::Attribute => write!(f, ".{}", segment.text)?, SegmentKind::IntSubscript => write!(f, "[{}]", segment.text)?, SegmentKind::StringSubscript => write!(f, "[\"{}\"]", segment.text)?, } } Ok(()) } } impl PartialEq> for MemberExpr { fn eq(&self, other: &MemberExprRef) -> bool { self.as_ref() == *other } } impl PartialEq> for &MemberExpr { fn eq(&self, other: &MemberExprRef) -> bool { self.as_ref() == *other } } impl PartialEq for MemberExprRef<'_> { fn eq(&self, other: &MemberExpr) -> bool { other == self } } impl PartialEq<&MemberExpr> for MemberExprRef<'_> { fn eq(&self, other: &&MemberExpr) -> bool { *other == self } } /// Reference to a member expression. #[derive(Debug, Clone, PartialEq, Eq)] pub(crate) struct MemberExprRef<'a> { path: &'a str, segments: SegmentsRef<'a>, } impl<'a> MemberExprRef<'a> { pub(super) fn symbol_name(&self) -> &'a str { let end = self .segments .iter() .next() .map(SegmentInfo::offset) .unwrap_or(self.path.text_len()); let range = TextRange::new(TextSize::default(), end); &self.path[range] } #[cfg(test)] fn segments(&self) -> impl Iterator> + '_ { SegmentsIterator::new(self.path, self.segments.iter()) } pub(super) fn parent(&self) -> Option> { let parent_segments = self.segments.parent()?; // The removed segment is always the last one. Find its start offset. let last_segment = self.segments.iter().last()?; let path_end = last_segment.offset(); Some(MemberExprRef { path: &self.path[TextRange::new(TextSize::default(), path_end)], segments: parent_segments, }) } } impl<'a> From<&'a MemberExpr> for MemberExprRef<'a> { fn from(value: &'a MemberExpr) -> Self { value.as_ref() } } impl Hash for MemberExprRef<'_> { fn hash(&self, state: &mut H) { // Path on its own isn't 100% unique, but it should avoid // most collisions and avoids iterating all segments. self.path.hash(state); } } /// Uniquely identifies a member in a scope. #[newtype_index] #[derive(get_size2::GetSize, salsa::Update)] pub struct ScopedMemberId; /// The members of a scope. Allows lookup by member path and [`ScopedMemberId`]. #[derive(Default, get_size2::GetSize)] pub(super) struct MemberTable { members: IndexVec, /// Map from member path to its ID. /// /// Uses a hash table to avoid storing the path twice. map: hashbrown::HashTable, } impl MemberTable { /// Returns the member with the given ID. /// /// ## Panics /// If the ID is not valid for this table. #[track_caller] pub(crate) fn member(&self, id: ScopedMemberId) -> &Member { &self.members[id] } /// Returns a mutable reference to the member with the given ID. /// /// ## Panics /// If the ID is not valid for this table. #[track_caller] pub(super) fn member_mut(&mut self, id: ScopedMemberId) -> &mut Member { &mut self.members[id] } /// Returns an iterator over all members in the table. pub(crate) fn iter(&self) -> std::slice::Iter<'_, Member> { self.members.iter() } fn hash_member_expression_ref(member: &MemberExprRef) -> u64 { hash_single(member) } /// Returns the ID of the member with the given expression, if it exists. pub(crate) fn member_id<'a>( &self, member: impl Into>, ) -> Option { let member = member.into(); let hash = Self::hash_member_expression_ref(&member); self.map .find(hash, |id| self.members[*id].expression == member) .copied() } pub(crate) fn place_id_by_instance_attribute_name(&self, name: &str) -> Option { for (id, member) in self.members.iter_enumerated() { if member.is_instance_attribute_named(name) { return Some(id); } } None } } impl PartialEq for MemberTable { fn eq(&self, other: &Self) -> bool { // It's sufficient to compare the members as the map is only a reverse lookup. self.members == other.members } } impl Eq for MemberTable {} impl std::fmt::Debug for MemberTable { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_tuple("MemberTable").field(&self.members).finish() } } #[derive(Debug, Default)] pub(super) struct MemberTableBuilder { table: MemberTable, } impl MemberTableBuilder { /// Adds a member to the table or updates the flags of an existing member if it already exists. /// /// Members are identified by their expression, which is hashed to find the entry in the table. pub(super) fn add(&mut self, mut member: Member) -> (ScopedMemberId, bool) { let member_ref = member.expression.as_ref(); let hash = MemberTable::hash_member_expression_ref(&member_ref); let entry = self.table.map.entry( hash, |id| self.table.members[*id].expression.as_ref() == member.expression.as_ref(), |id| { let ref_expr = self.table.members[*id].expression.as_ref(); MemberTable::hash_member_expression_ref(&ref_expr) }, ); match entry { Entry::Occupied(entry) => { let id = *entry.get(); if !member.flags.is_empty() { self.members[id].flags.insert(member.flags); } (id, false) } Entry::Vacant(entry) => { member.expression.shrink_to_fit(); let id = self.table.members.push(member); entry.insert(id); (id, true) } } } pub(super) fn build(self) -> MemberTable { let mut table = self.table; table.members.shrink_to_fit(); table.map.shrink_to_fit(|id| { let ref_expr = table.members[*id].expression.as_ref(); MemberTable::hash_member_expression_ref(&ref_expr) }); table } } impl Deref for MemberTableBuilder { type Target = MemberTable; fn deref(&self) -> &Self::Target { &self.table } } impl DerefMut for MemberTableBuilder { fn deref_mut(&mut self) -> &mut Self::Target { &mut self.table } } /// Representation of segments that can be either inline or heap-allocated. /// /// Design choices: /// - Uses `Box<[SegmentInfo]>` instead of `ThinVec` because even with a `ThinVec`, the size of `Segments` is still 128 bytes. /// - Uses u64 for inline storage. That's the largest size without increasing the overall size of `Segments` and allows to encode up to 7 segments. #[derive(Clone, Debug, PartialEq, Eq, get_size2::GetSize)] enum Segments { /// Inline storage for up to 7 segments with 6-bit relative offsets (max 63 bytes per segment) Small(SmallSegments), /// Heap storage for expressions that don't fit inline Heap(Box<[SegmentInfo]>), } static_assertions::assert_eq_size!(SmallSegments, u64); #[cfg(target_pointer_width = "64")] static_assertions::assert_eq_size!(Segments, [u64; 2]); impl Segments { fn from_vec(segments: SmallVec<[SegmentInfo; 8]>) -> Self { debug_assert!( !segments.is_empty(), "Segments cannot be empty. A member without segments is a symbol" ); if let Some(small) = SmallSegments::try_from_slice(&segments) { Self::Small(small) } else { Self::Heap(segments.into_vec().into_boxed_slice()) } } fn len(&self) -> usize { match self { Self::Small(small) => small.len(), Self::Heap(segments) => segments.len(), } } fn iter(&self) -> impl Iterator + '_ { match self { Self::Small(small) => itertools::Either::Left(small.iter()), Self::Heap(heap) => itertools::Either::Right(heap.iter().copied()), } } } /// Segment metadata - packed into a single u32 /// Layout: [kind: 2 bits][offset: 30 bits] /// - Bits 0-1: `SegmentKind` (0=Attribute, 1=IntSubscript, 2=StringSubscript) /// - Bits 2-31: Absolute offset from start of path (up to 1,073,741,823 bytes) #[derive(Clone, Copy, PartialEq, Eq, Hash, get_size2::GetSize)] struct SegmentInfo(u32); const KIND_MASK: u32 = 0b11; const OFFSET_SHIFT: u32 = 2; const MAX_OFFSET: u32 = (1 << 30) - 1; // 2^30 - 1 impl SegmentInfo { const fn new(kind: SegmentKind, offset: TextSize) -> Self { assert!(offset.to_u32() < MAX_OFFSET); let value = (offset.to_u32() << OFFSET_SHIFT) | (kind as u32); Self(value) } const fn kind(self) -> SegmentKind { match self.0 & KIND_MASK { 0 => SegmentKind::Attribute, 1 => SegmentKind::IntSubscript, 2 => SegmentKind::StringSubscript, _ => panic!("Invalid SegmentKind bits"), } } const fn offset(self) -> TextSize { TextSize::new(self.0 >> OFFSET_SHIFT) } } impl std::fmt::Debug for SegmentInfo { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("SegmentInfo") .field("kind", &self.kind()) .field("offset", &self.offset()) .finish() } } struct Segment<'a> { kind: SegmentKind, text: &'a str, } #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, get_size2::GetSize)] #[repr(u8)] enum SegmentKind { Attribute = 0, IntSubscript = 1, StringSubscript = 2, } /// Iterator over segments that converts `SegmentInfo` to `Segment` with text slices. struct SegmentsIterator<'a, I> { path: &'a str, segment_infos: I, current: Option, next: Option, } impl<'a, I> SegmentsIterator<'a, I> where I: Iterator, { fn new(path: &'a str, mut segment_infos: I) -> Self { let current = segment_infos.next(); let next = segment_infos.next(); Self { path, segment_infos, current, next, } } } impl<'a, I> Iterator for SegmentsIterator<'a, I> where I: Iterator, { type Item = Segment<'a>; fn next(&mut self) -> Option { let info = self.current.take()?; let end = self .next .map(SegmentInfo::offset) .unwrap_or(self.path.text_len()); self.current = self.next; self.next = self.segment_infos.next(); Some(Segment { kind: info.kind(), text: &self.path[TextRange::new(info.offset(), end)], }) } } const INLINE_COUNT_BITS: u32 = 3; const INLINE_COUNT_MASK: u64 = (1 << INLINE_COUNT_BITS) - 1; const INLINE_SEGMENT_BITS: u32 = 8; const INLINE_SEGMENT_MASK: u64 = (1 << INLINE_SEGMENT_BITS) - 1; const INLINE_KIND_BITS: u32 = 2; const INLINE_KIND_MASK: u64 = (1 << INLINE_KIND_BITS) - 1; const INLINE_PREV_LEN_BITS: u32 = 6; const INLINE_PREV_LEN_MASK: u64 = (1 << INLINE_PREV_LEN_BITS) - 1; const INLINE_MAX_SEGMENTS: usize = 7; const INLINE_MAX_RELATIVE_OFFSET: u32 = (1 << INLINE_PREV_LEN_BITS) - 1; // 63 /// Compact representation that can store up to 7 segments inline in a u64. /// /// Layout: /// - Bits 0-2: Number of segments minus 1 (0-6, representing 1-7 segments) /// - Bits 3-10: Segment 0 (2 bits kind + 6 bits relative offset, max 63 bytes) /// - Bits 11-18: Segment 1 (2 bits kind + 6 bits relative offset, max 63 bytes) /// - Bits 19-26: Segment 2 (2 bits kind + 6 bits relative offset, max 63 bytes) /// - Bits 27-34: Segment 3 (2 bits kind + 6 bits relative offset, max 63 bytes) /// - Bits 35-42: Segment 4 (2 bits kind + 6 bits relative offset, max 63 bytes) /// - Bits 43-50: Segment 5 (2 bits kind + 6 bits relative offset, max 63 bytes) /// - Bits 51-58: Segment 6 (2 bits kind + 6 bits relative offset, max 63 bytes) /// - Bits 59-63: Unused (5 bits) /// /// Constraints: /// - Maximum 7 segments (realistic limit for member access chains) /// - Maximum 63-byte relative offset per segment (sufficient for most identifiers) /// - Never empty (`segments.len()` >= 1) /// #[derive(Clone, Copy, PartialEq, Eq, get_size2::GetSize)] #[repr(transparent)] struct SmallSegments(u64); impl SmallSegments { fn try_from_slice(segments: &[SegmentInfo]) -> Option { if segments.is_empty() || segments.len() > INLINE_MAX_SEGMENTS { return None; } // Pack into inline representation // Store count minus 1 (since segments are never empty, range 0-6 represents 1-7 segments) let mut packed = (segments.len() - 1) as u64; let mut prev_offset = TextSize::new(0); for (i, segment) in segments.iter().enumerate() { // Compute relative offset on-the-fly let relative_offset = segment.offset() - prev_offset; if relative_offset > TextSize::from(INLINE_MAX_RELATIVE_OFFSET) { return None; } let kind = segment.kind() as u64; let relative_offset_val = u64::from(relative_offset.to_u32()); let segment_data = (relative_offset_val << INLINE_KIND_BITS) | kind; let shift = INLINE_COUNT_BITS + (u32::try_from(i).expect("i is bounded by INLINE_MAX_SEGMENTS") * INLINE_SEGMENT_BITS); packed |= segment_data << shift; prev_offset = segment.offset(); } Some(Self(packed)) } #[expect( clippy::cast_possible_truncation, reason = "INLINE_COUNT_MASK ensures value is at most 7" )] const fn len(self) -> usize { // Add 1 because we store count minus 1 ((self.0 & INLINE_COUNT_MASK) + 1) as usize } fn iter(self) -> SmallSegmentsInfoIterator { SmallSegmentsInfoIterator { segments: self, index: 0, next_offset: TextSize::new(0), } } /// Returns the parent member expression, e.g. `x.b` from `x.b.c`, or `None` if the parent is /// the `symbol` itself (e, g. parent of `x.a` is just `x`). const fn parent(self) -> Option { let len = self.len(); if len <= 1 { return None; } // Simply copy the packed value but update the count let mut new_packed = self.0; // Clear the count bits and set the new count (len - 2, since we store count - 1) new_packed &= !INLINE_COUNT_MASK; new_packed |= (len - 2) as u64; Some(Self(new_packed)) } } impl std::fmt::Debug for SmallSegments { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_list().entries(self.iter()).finish() } } struct SmallSegmentsInfoIterator { segments: SmallSegments, index: usize, next_offset: TextSize, } impl Iterator for SmallSegmentsInfoIterator { type Item = SegmentInfo; fn next(&mut self) -> Option { let count = self.segments.len(); if self.index >= count { return None; } // Extract the relative offset and kind for the current segment let shift = INLINE_COUNT_BITS + (u32::try_from(self.index).expect("index is bounded by INLINE_MAX_SEGMENTS") * INLINE_SEGMENT_BITS); let segment_data = (self.segments.0 >> shift) & INLINE_SEGMENT_MASK; let kind = (segment_data & INLINE_KIND_MASK) as u8; let relative_offset = ((segment_data >> INLINE_KIND_BITS) & INLINE_PREV_LEN_MASK) as u32; // Update the running absolute offset self.next_offset += TextSize::new(relative_offset); let kind = match kind { 0 => SegmentKind::Attribute, 1 => SegmentKind::IntSubscript, 2 => SegmentKind::StringSubscript, _ => panic!("Invalid SegmentKind bits"), }; self.index += 1; Some(SegmentInfo::new(kind, self.next_offset)) } } /// Reference view of segments, can be either small (inline) or heap-allocated. #[derive(Clone, Copy, Debug)] enum SegmentsRef<'a> { Small(SmallSegments), Heap(&'a [SegmentInfo]), } impl<'a> SegmentsRef<'a> { fn len(&self) -> usize { match self { Self::Small(small) => small.len(), Self::Heap(segments) => segments.len(), } } fn iter(&self) -> impl Iterator + '_ { match self { Self::Small(small) => itertools::Either::Left(small.iter()), Self::Heap(heap) => itertools::Either::Right(heap.iter().copied()), } } /// Returns a parent view with one fewer segment, or None if <= 1 segment fn parent(&self) -> Option> { match self { Self::Small(small) => small.parent().map(SegmentsRef::Small), Self::Heap(segments) => { let len = segments.len(); if len <= 1 { None } else { Some(SegmentsRef::Heap(&segments[..len - 1])) } } } } } impl<'a> From<&'a Segments> for SegmentsRef<'a> { fn from(segments: &'a Segments) -> Self { match segments { Segments::Small(small) => SegmentsRef::Small(*small), Segments::Heap(heap) => SegmentsRef::Heap(heap), } } } impl PartialEq for SegmentsRef<'_> { fn eq(&self, other: &Self) -> bool { let len = self.len(); if len != other.len() { return false; } self.iter().eq(other.iter()) } } impl Eq for SegmentsRef<'_> {} /// Helper function to hash a single value and return the hash. fn hash_single(value: &T) -> u64 { let mut hasher = FxHasher::default(); value.hash(&mut hasher); hasher.finish() } #[cfg(test)] mod tests { use super::*; #[test] fn test_member_expr_ref_hash_and_eq_small_heap() { // For expression: foo.bar[0]["baz"] // The path would be: "foobar0baz" (no dots or brackets in the path) let path = "foobar0baz"; let segments = vec![ SegmentInfo::new(SegmentKind::Attribute, TextSize::new(3)), // .bar at offset 3 SegmentInfo::new(SegmentKind::IntSubscript, TextSize::new(6)), // [0] at offset 6 SegmentInfo::new(SegmentKind::StringSubscript, TextSize::new(7)), // ["baz"] at offset 7 ]; // Create Small version. let small_segments = SmallSegments::try_from_slice(&segments).unwrap(); let member_ref_small = MemberExprRef { path, segments: SegmentsRef::Small(small_segments), }; // Create Heap version with the same data. let heap_segments: Box<[SegmentInfo]> = segments.into_boxed_slice(); let member_ref_heap = MemberExprRef { path, segments: SegmentsRef::Heap(&heap_segments), }; // Test hash equality (MemberExprRef only hashes the path). assert_eq!( hash_single(&member_ref_small), hash_single(&member_ref_heap) ); // Test equality in both directions. assert_eq!(member_ref_small, member_ref_heap); assert_eq!(member_ref_heap, member_ref_small); } #[test] fn test_member_expr_ref_different_segments() { // For expressions: foo.bar[0] vs foo.bar["0"] // Both have the same path "foobar0" but different segment types let path = "foobar0"; // First expression: foo.bar[0] let segments1 = vec![ SegmentInfo::new(SegmentKind::Attribute, TextSize::new(3)), // .bar at offset 3 SegmentInfo::new(SegmentKind::IntSubscript, TextSize::new(6)), // [0] at offset 6 ]; // Second expression: foo.bar["0"] let segments2 = vec![ SegmentInfo::new(SegmentKind::Attribute, TextSize::new(3)), // .bar at offset 3 SegmentInfo::new(SegmentKind::StringSubscript, TextSize::new(6)), // ["0"] at offset 6 ]; // Create MemberExprRef instances let small1 = SmallSegments::try_from_slice(&segments1).unwrap(); let member_ref1 = MemberExprRef { path, segments: SegmentsRef::Small(small1), }; let small2 = SmallSegments::try_from_slice(&segments2).unwrap(); let member_ref2 = MemberExprRef { path, segments: SegmentsRef::Small(small2), }; // Test inequality assert_ne!(member_ref1, member_ref2); assert_ne!(member_ref2, member_ref1); // Test hash equality (MemberExprRef only hashes the path, not segments) assert_eq!(hash_single(&member_ref1), hash_single(&member_ref2)); } #[test] fn test_member_expr_ref_parent() { use ruff_python_parser::parse_expression; // Parse a real Python expression let parsed = parse_expression(r#"foo.bar[0]["baz"]"#).unwrap(); let expr = parsed.expr(); // Convert to MemberExpr let member_expr = MemberExpr::try_from_expr(ast::ExprRef::from(expr)).unwrap(); let member_ref = member_expr.as_ref(); // Verify the initial state: foo.bar[0]["baz"] assert_eq!(member_ref.symbol_name(), "foo"); let segments: Vec<_> = member_ref.segments().map(|s| (s.kind, s.text)).collect(); assert_eq!( segments, vec![ (SegmentKind::Attribute, "bar"), (SegmentKind::IntSubscript, "0"), (SegmentKind::StringSubscript, "baz") ] ); // Test parent() removes the last segment ["baz"] -> foo.bar[0] let parent1 = member_ref.parent().unwrap(); assert_eq!(parent1.symbol_name(), "foo"); let parent1_segments: Vec<_> = parent1.segments().map(|s| (s.kind, s.text)).collect(); assert_eq!( parent1_segments, vec![ (SegmentKind::Attribute, "bar"), (SegmentKind::IntSubscript, "0") ] ); // Test parent of parent removes [0] -> foo.bar let parent2 = parent1.parent().unwrap(); assert_eq!(parent2.symbol_name(), "foo"); let parent2_segments: Vec<_> = parent2.segments().map(|s| (s.kind, s.text)).collect(); assert_eq!(parent2_segments, vec![(SegmentKind::Attribute, "bar")]); // Test parent of single segment is a symbol and not a member. let parent3 = parent2.parent(); assert!(parent3.is_none()); } #[test] fn test_member_expr_small_vs_heap_allocation() { use ruff_python_parser::parse_expression; // Test Small allocation: 7 segments (maximum for inline storage) // Create expression with exactly 7 segments: x.a.b.c.d.e.f.g let small_expr = parse_expression("x.a.b.c.d.e.f.g").unwrap(); let small_member = MemberExpr::try_from_expr(ast::ExprRef::from(small_expr.expr())).unwrap(); // Should use Small allocation assert!(matches!(small_member.segments, Segments::Small(_))); assert_eq!(small_member.num_segments(), 7); // Test Heap allocation: 8 segments (exceeds inline capacity) // Create expression with 8 segments: x.a.b.c.d.e.f.g.h let heap_expr = parse_expression("x.a.b.c.d.e.f.g.h").unwrap(); let heap_member = MemberExpr::try_from_expr(ast::ExprRef::from(heap_expr.expr())).unwrap(); // Should use Heap allocation assert!(matches!(heap_member.segments, Segments::Heap(_))); assert_eq!(heap_member.num_segments(), 8); // Test Small allocation with relative offset limit // Create expression where relative offsets are small enough: a.b[0]["c"] let small_offset_expr = parse_expression(r#"a.b[0]["c"]"#).unwrap(); let small_offset_member = MemberExpr::try_from_expr(ast::ExprRef::from(small_offset_expr.expr())).unwrap(); // Should use Small allocation (3 segments, small offsets) assert!(matches!(small_offset_member.segments, Segments::Small(_))); assert_eq!(small_offset_member.num_segments(), 3); // Test Small allocation with maximum 63-byte relative offset limit // Create expression where one segment has exactly 63 bytes (the limit) let segment_63_bytes = "a".repeat(63); let max_offset_expr_code = format!("x.{segment_63_bytes}.y"); let max_offset_expr = parse_expression(&max_offset_expr_code).unwrap(); let max_offset_member = MemberExpr::try_from_expr(ast::ExprRef::from(max_offset_expr.expr())).unwrap(); // Should still use Small allocation (exactly at the limit) assert!(matches!(max_offset_member.segments, Segments::Small(_))); assert_eq!(max_offset_member.num_segments(), 2); // Test that heap allocation works for segment content that would exceed relative offset limits // This would require very long identifiers (>63 bytes between segments), which is uncommon // but we can test by creating an expression with long attribute names let long_name = "a".repeat(64); // 64 bytes (exceeds 63-byte limit) let long_expr_code = format!("x.{long_name}.y"); let long_expr = parse_expression(&long_expr_code).unwrap(); let long_member = MemberExpr::try_from_expr(ast::ExprRef::from(long_expr.expr())).unwrap(); // Should use Heap allocation due to large relative offset assert!(matches!(long_member.segments, Segments::Heap(_))); assert_eq!(long_member.num_segments(), 2); } }