mirror of https://github.com/astral-sh/ruff
1038 lines
35 KiB
Rust
1038 lines
35 KiB
Rust
use bitflags::bitflags;
|
|
use hashbrown::hash_table::Entry;
|
|
use ruff_index::{IndexVec, newtype_index};
|
|
use ruff_python_ast::{self as ast, name::Name};
|
|
use ruff_text_size::{TextLen as _, TextRange, TextSize};
|
|
use rustc_hash::FxHasher;
|
|
use smallvec::SmallVec;
|
|
use std::hash::{Hash, Hasher as _};
|
|
use std::ops::{Deref, DerefMut};
|
|
|
|
/// A member access, e.g. `x.y` or `x[1]` or `x["foo"]`.
|
|
#[derive(Clone, Debug, PartialEq, Eq, get_size2::GetSize)]
|
|
pub(crate) struct Member {
|
|
expression: MemberExpr,
|
|
flags: MemberFlags,
|
|
}
|
|
|
|
impl Member {
|
|
pub(crate) fn new(expression: MemberExpr) -> Self {
|
|
Self {
|
|
expression,
|
|
flags: MemberFlags::empty(),
|
|
}
|
|
}
|
|
|
|
/// Returns the left most part of the member expression, e.g. `x` in `x.y.z`.
|
|
///
|
|
/// This is the symbol on which the member access is performed.
|
|
pub(crate) fn symbol_name(&self) -> &str {
|
|
self.expression.symbol_name()
|
|
}
|
|
|
|
pub(crate) fn expression(&self) -> &MemberExpr {
|
|
&self.expression
|
|
}
|
|
|
|
/// Is the place given a value in its containing scope?
|
|
pub(crate) const fn is_bound(&self) -> bool {
|
|
self.flags.contains(MemberFlags::IS_BOUND)
|
|
}
|
|
|
|
/// Is the place declared in its containing scope?
|
|
pub(crate) fn is_declared(&self) -> bool {
|
|
self.flags.contains(MemberFlags::IS_DECLARED)
|
|
}
|
|
|
|
pub(super) fn mark_bound(&mut self) {
|
|
self.insert_flags(MemberFlags::IS_BOUND);
|
|
}
|
|
|
|
pub(super) fn mark_declared(&mut self) {
|
|
self.insert_flags(MemberFlags::IS_DECLARED);
|
|
}
|
|
|
|
pub(super) fn mark_instance_attribute(&mut self) {
|
|
self.flags.insert(MemberFlags::IS_INSTANCE_ATTRIBUTE);
|
|
}
|
|
|
|
/// Is the place an instance attribute?
|
|
pub(crate) fn is_instance_attribute(&self) -> bool {
|
|
let is_instance_attribute = self.flags.contains(MemberFlags::IS_INSTANCE_ATTRIBUTE);
|
|
if is_instance_attribute {
|
|
debug_assert!(self.is_instance_attribute_candidate());
|
|
}
|
|
is_instance_attribute
|
|
}
|
|
|
|
fn insert_flags(&mut self, flags: MemberFlags) {
|
|
self.flags.insert(flags);
|
|
}
|
|
|
|
/// If the place expression has the form `<NAME>.<MEMBER>`
|
|
/// (meaning it *may* be an instance attribute),
|
|
/// return `Some(<MEMBER>)`. Else, return `None`.
|
|
///
|
|
/// This method is internal to the semantic-index submodule.
|
|
/// It *only* checks that the AST structure of the `Place` is
|
|
/// correct. It does not check whether the `Place` actually occurred in
|
|
/// a method context, or whether the `<NAME>` actually refers to the first
|
|
/// parameter of the method (i.e. `self`). To answer those questions,
|
|
/// use [`Self::as_instance_attribute`].
|
|
pub(super) fn as_instance_attribute_candidate(&self) -> Option<&str> {
|
|
let mut segments = self.expression().segments();
|
|
let first_segment = segments.next()?;
|
|
|
|
if first_segment.kind == SegmentKind::Attribute && segments.next().is_none() {
|
|
Some(first_segment.text)
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
|
|
/// Return `true` if the place expression has the form `<NAME>.<MEMBER>`,
|
|
/// indicating that it *may* be an instance attribute if we are in a method context.
|
|
///
|
|
/// This method is internal to the semantic-index submodule.
|
|
/// It *only* checks that the AST structure of the `Place` is
|
|
/// correct. It does not check whether the `Place` actually occurred in
|
|
/// a method context, or whether the `<NAME>` actually refers to the first
|
|
/// parameter of the method (i.e. `self`). To answer those questions,
|
|
/// use [`Self::is_instance_attribute`].
|
|
pub(super) fn is_instance_attribute_candidate(&self) -> bool {
|
|
self.as_instance_attribute_candidate().is_some()
|
|
}
|
|
|
|
/// Does the place expression have the form `self.{name}` (`self` is the first parameter of the method)?
|
|
pub(super) fn is_instance_attribute_named(&self, name: &str) -> bool {
|
|
self.as_instance_attribute() == Some(name)
|
|
}
|
|
|
|
/// Return `Some(<ATTRIBUTE>)` if the place expression is an instance attribute.
|
|
pub(crate) fn as_instance_attribute(&self) -> Option<&str> {
|
|
if self.is_instance_attribute() {
|
|
debug_assert!(self.as_instance_attribute_candidate().is_some());
|
|
self.as_instance_attribute_candidate()
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
}
|
|
|
|
impl std::fmt::Display for Member {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
std::fmt::Display::fmt(&self.expression, f)
|
|
}
|
|
}
|
|
|
|
bitflags! {
|
|
/// Flags that can be queried to obtain information about a member in a given scope.
|
|
///
|
|
/// See the doc-comment at the top of [`super::use_def`] for explanations of what it
|
|
/// means for a member to be *bound* as opposed to *declared*.
|
|
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
|
struct MemberFlags: u8 {
|
|
const IS_BOUND = 1 << 0;
|
|
const IS_DECLARED = 1 << 1;
|
|
const IS_INSTANCE_ATTRIBUTE = 1 << 2;
|
|
}
|
|
}
|
|
|
|
impl get_size2::GetSize for MemberFlags {}
|
|
|
|
/// An expression accessing a member on a symbol named `symbol_name`, e.g. `x.y.z`.
|
|
///
|
|
/// The parts after the symbol name are called segments, and they can be either:
|
|
/// * An attribute access, e.g. `.y` in `x.y`
|
|
/// * An integer-based subscript, e.g. `[1]` in `x[1]`
|
|
/// * A string-based subscript, e.g. `["foo"]` in `x["foo"]`
|
|
///
|
|
/// Uses a compact representation where the entire expression is stored as a single path.
|
|
/// For example, `foo.bar[0]["baz"]` is stored as:
|
|
/// - path: `foobar0baz`
|
|
/// - segments: stores where each segment starts and its kind (attribute, int subscript, string subscript)
|
|
///
|
|
/// The symbol name can be extracted from the path by taking the text up to the first segment's start offset.
|
|
#[derive(Clone, Debug, PartialEq, Eq, get_size2::GetSize)]
|
|
pub(crate) struct MemberExpr {
|
|
/// The entire path as a single Name
|
|
path: Name,
|
|
/// Metadata for each segment (in forward order)
|
|
segments: Segments,
|
|
}
|
|
|
|
impl MemberExpr {
|
|
pub(super) fn try_from_expr(expression: ast::ExprRef<'_>) -> Option<Self> {
|
|
fn visit(expr: ast::ExprRef) -> Option<(Name, SmallVec<[SegmentInfo; 8]>)> {
|
|
use std::fmt::Write as _;
|
|
|
|
match expr {
|
|
ast::ExprRef::Name(name) => {
|
|
Some((name.id.clone(), smallvec::SmallVec::new_const()))
|
|
}
|
|
ast::ExprRef::Attribute(attribute) => {
|
|
let (mut path, mut segments) = visit(ast::ExprRef::from(&attribute.value))?;
|
|
|
|
let start_offset = path.text_len();
|
|
let _ = write!(path, "{}", attribute.attr.id);
|
|
segments.push(SegmentInfo::new(SegmentKind::Attribute, start_offset));
|
|
|
|
Some((path, segments))
|
|
}
|
|
ast::ExprRef::Subscript(subscript) => {
|
|
let (mut path, mut segments) = visit((&subscript.value).into())?;
|
|
let start_offset = path.text_len();
|
|
|
|
match &*subscript.slice {
|
|
ast::Expr::NumberLiteral(ast::ExprNumberLiteral {
|
|
value: ast::Number::Int(index),
|
|
..
|
|
}) => {
|
|
let _ = write!(path, "{index}");
|
|
segments
|
|
.push(SegmentInfo::new(SegmentKind::IntSubscript, start_offset));
|
|
}
|
|
ast::Expr::StringLiteral(string) => {
|
|
let _ = write!(path, "{}", string.value);
|
|
segments
|
|
.push(SegmentInfo::new(SegmentKind::StringSubscript, start_offset));
|
|
}
|
|
_ => {
|
|
return None;
|
|
}
|
|
}
|
|
|
|
Some((path, segments))
|
|
}
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
let (path, segments) = visit(expression)?;
|
|
|
|
if segments.is_empty() {
|
|
None
|
|
} else {
|
|
Some(Self {
|
|
path,
|
|
segments: Segments::from_vec(segments),
|
|
})
|
|
}
|
|
}
|
|
|
|
fn segment_infos(&self) -> impl Iterator<Item = SegmentInfo> + '_ {
|
|
self.segments.iter()
|
|
}
|
|
|
|
fn segments(&self) -> impl Iterator<Item = Segment<'_>> + '_ {
|
|
SegmentsIterator::new(self.path.as_str(), self.segment_infos())
|
|
}
|
|
|
|
fn shrink_to_fit(&mut self) {
|
|
self.path.shrink_to_fit();
|
|
}
|
|
|
|
/// Returns the left most part of the member expression, e.g. `x` in `x.y.z`.
|
|
///
|
|
/// This is the symbol on which the member access is performed.
|
|
pub(crate) fn symbol_name(&self) -> &str {
|
|
self.as_ref().symbol_name()
|
|
}
|
|
|
|
pub(super) fn num_segments(&self) -> usize {
|
|
self.segments.len()
|
|
}
|
|
|
|
pub(crate) fn as_ref(&self) -> MemberExprRef<'_> {
|
|
MemberExprRef {
|
|
path: self.path.as_str(),
|
|
segments: SegmentsRef::from(&self.segments),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl std::fmt::Display for MemberExpr {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
f.write_str(self.symbol_name())?;
|
|
|
|
for segment in self.segments() {
|
|
match segment.kind {
|
|
SegmentKind::Attribute => write!(f, ".{}", segment.text)?,
|
|
SegmentKind::IntSubscript => write!(f, "[{}]", segment.text)?,
|
|
SegmentKind::StringSubscript => write!(f, "[\"{}\"]", segment.text)?,
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
impl PartialEq<MemberExprRef<'_>> for MemberExpr {
|
|
fn eq(&self, other: &MemberExprRef) -> bool {
|
|
self.as_ref() == *other
|
|
}
|
|
}
|
|
|
|
impl PartialEq<MemberExprRef<'_>> for &MemberExpr {
|
|
fn eq(&self, other: &MemberExprRef) -> bool {
|
|
self.as_ref() == *other
|
|
}
|
|
}
|
|
|
|
impl PartialEq<MemberExpr> for MemberExprRef<'_> {
|
|
fn eq(&self, other: &MemberExpr) -> bool {
|
|
other == self
|
|
}
|
|
}
|
|
|
|
impl PartialEq<&MemberExpr> for MemberExprRef<'_> {
|
|
fn eq(&self, other: &&MemberExpr) -> bool {
|
|
*other == self
|
|
}
|
|
}
|
|
|
|
/// Reference to a member expression.
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub(crate) struct MemberExprRef<'a> {
|
|
path: &'a str,
|
|
segments: SegmentsRef<'a>,
|
|
}
|
|
|
|
impl<'a> MemberExprRef<'a> {
|
|
pub(super) fn symbol_name(&self) -> &'a str {
|
|
let end = self
|
|
.segments
|
|
.iter()
|
|
.next()
|
|
.map(SegmentInfo::offset)
|
|
.unwrap_or(self.path.text_len());
|
|
|
|
let range = TextRange::new(TextSize::default(), end);
|
|
|
|
&self.path[range]
|
|
}
|
|
|
|
#[cfg(test)]
|
|
fn segments(&self) -> impl Iterator<Item = Segment<'_>> + '_ {
|
|
SegmentsIterator::new(self.path, self.segments.iter())
|
|
}
|
|
|
|
pub(super) fn parent(&self) -> Option<MemberExprRef<'a>> {
|
|
let parent_segments = self.segments.parent()?;
|
|
|
|
// The removed segment is always the last one. Find its start offset.
|
|
let last_segment = self.segments.iter().last()?;
|
|
let path_end = last_segment.offset();
|
|
|
|
Some(MemberExprRef {
|
|
path: &self.path[TextRange::new(TextSize::default(), path_end)],
|
|
segments: parent_segments,
|
|
})
|
|
}
|
|
}
|
|
|
|
impl<'a> From<&'a MemberExpr> for MemberExprRef<'a> {
|
|
fn from(value: &'a MemberExpr) -> Self {
|
|
value.as_ref()
|
|
}
|
|
}
|
|
|
|
impl Hash for MemberExprRef<'_> {
|
|
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
|
// Path on its own isn't 100% unique, but it should avoid
|
|
// most collisions and avoids iterating all segments.
|
|
self.path.hash(state);
|
|
}
|
|
}
|
|
|
|
/// Uniquely identifies a member in a scope.
|
|
#[newtype_index]
|
|
#[derive(get_size2::GetSize, salsa::Update)]
|
|
pub struct ScopedMemberId;
|
|
|
|
/// The members of a scope. Allows lookup by member path and [`ScopedMemberId`].
|
|
#[derive(Default, get_size2::GetSize)]
|
|
pub(super) struct MemberTable {
|
|
members: IndexVec<ScopedMemberId, Member>,
|
|
|
|
/// Map from member path to its ID.
|
|
///
|
|
/// Uses a hash table to avoid storing the path twice.
|
|
map: hashbrown::HashTable<ScopedMemberId>,
|
|
}
|
|
|
|
impl MemberTable {
|
|
/// Returns the member with the given ID.
|
|
///
|
|
/// ## Panics
|
|
/// If the ID is not valid for this table.
|
|
#[track_caller]
|
|
pub(crate) fn member(&self, id: ScopedMemberId) -> &Member {
|
|
&self.members[id]
|
|
}
|
|
|
|
/// Returns a mutable reference to the member with the given ID.
|
|
///
|
|
/// ## Panics
|
|
/// If the ID is not valid for this table.
|
|
#[track_caller]
|
|
pub(super) fn member_mut(&mut self, id: ScopedMemberId) -> &mut Member {
|
|
&mut self.members[id]
|
|
}
|
|
|
|
/// Returns an iterator over all members in the table.
|
|
pub(crate) fn iter(&self) -> std::slice::Iter<'_, Member> {
|
|
self.members.iter()
|
|
}
|
|
|
|
fn hash_member_expression_ref(member: &MemberExprRef) -> u64 {
|
|
hash_single(member)
|
|
}
|
|
|
|
/// Returns the ID of the member with the given expression, if it exists.
|
|
pub(crate) fn member_id<'a>(
|
|
&self,
|
|
member: impl Into<MemberExprRef<'a>>,
|
|
) -> Option<ScopedMemberId> {
|
|
let member = member.into();
|
|
let hash = Self::hash_member_expression_ref(&member);
|
|
self.map
|
|
.find(hash, |id| self.members[*id].expression == member)
|
|
.copied()
|
|
}
|
|
|
|
pub(crate) fn place_id_by_instance_attribute_name(&self, name: &str) -> Option<ScopedMemberId> {
|
|
for (id, member) in self.members.iter_enumerated() {
|
|
if member.is_instance_attribute_named(name) {
|
|
return Some(id);
|
|
}
|
|
}
|
|
|
|
None
|
|
}
|
|
}
|
|
|
|
impl PartialEq for MemberTable {
|
|
fn eq(&self, other: &Self) -> bool {
|
|
// It's sufficient to compare the members as the map is only a reverse lookup.
|
|
self.members == other.members
|
|
}
|
|
}
|
|
|
|
impl Eq for MemberTable {}
|
|
|
|
impl std::fmt::Debug for MemberTable {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
f.debug_tuple("MemberTable").field(&self.members).finish()
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Default)]
|
|
pub(super) struct MemberTableBuilder {
|
|
table: MemberTable,
|
|
}
|
|
|
|
impl MemberTableBuilder {
|
|
/// Adds a member to the table or updates the flags of an existing member if it already exists.
|
|
///
|
|
/// Members are identified by their expression, which is hashed to find the entry in the table.
|
|
pub(super) fn add(&mut self, mut member: Member) -> (ScopedMemberId, bool) {
|
|
let member_ref = member.expression.as_ref();
|
|
let hash = MemberTable::hash_member_expression_ref(&member_ref);
|
|
let entry = self.table.map.entry(
|
|
hash,
|
|
|id| self.table.members[*id].expression.as_ref() == member.expression.as_ref(),
|
|
|id| {
|
|
let ref_expr = self.table.members[*id].expression.as_ref();
|
|
MemberTable::hash_member_expression_ref(&ref_expr)
|
|
},
|
|
);
|
|
|
|
match entry {
|
|
Entry::Occupied(entry) => {
|
|
let id = *entry.get();
|
|
|
|
if !member.flags.is_empty() {
|
|
self.members[id].flags.insert(member.flags);
|
|
}
|
|
|
|
(id, false)
|
|
}
|
|
Entry::Vacant(entry) => {
|
|
member.expression.shrink_to_fit();
|
|
|
|
let id = self.table.members.push(member);
|
|
entry.insert(id);
|
|
(id, true)
|
|
}
|
|
}
|
|
}
|
|
|
|
pub(super) fn build(self) -> MemberTable {
|
|
let mut table = self.table;
|
|
table.members.shrink_to_fit();
|
|
table.map.shrink_to_fit(|id| {
|
|
let ref_expr = table.members[*id].expression.as_ref();
|
|
MemberTable::hash_member_expression_ref(&ref_expr)
|
|
});
|
|
table
|
|
}
|
|
}
|
|
|
|
impl Deref for MemberTableBuilder {
|
|
type Target = MemberTable;
|
|
|
|
fn deref(&self) -> &Self::Target {
|
|
&self.table
|
|
}
|
|
}
|
|
|
|
impl DerefMut for MemberTableBuilder {
|
|
fn deref_mut(&mut self) -> &mut Self::Target {
|
|
&mut self.table
|
|
}
|
|
}
|
|
|
|
/// Representation of segments that can be either inline or heap-allocated.
|
|
///
|
|
/// Design choices:
|
|
/// - Uses `Box<[SegmentInfo]>` instead of `ThinVec` because even with a `ThinVec`, the size of `Segments` is still 128 bytes.
|
|
/// - Uses u64 for inline storage. That's the largest size without increasing the overall size of `Segments` and allows to encode up to 7 segments.
|
|
#[derive(Clone, Debug, PartialEq, Eq, get_size2::GetSize)]
|
|
enum Segments {
|
|
/// Inline storage for up to 7 segments with 6-bit relative offsets (max 63 bytes per segment)
|
|
Small(SmallSegments),
|
|
/// Heap storage for expressions that don't fit inline
|
|
Heap(Box<[SegmentInfo]>),
|
|
}
|
|
|
|
static_assertions::assert_eq_size!(SmallSegments, u64);
|
|
#[cfg(target_pointer_width = "64")]
|
|
static_assertions::assert_eq_size!(Segments, [u64; 2]);
|
|
|
|
impl Segments {
|
|
fn from_vec(segments: SmallVec<[SegmentInfo; 8]>) -> Self {
|
|
debug_assert!(
|
|
!segments.is_empty(),
|
|
"Segments cannot be empty. A member without segments is a symbol"
|
|
);
|
|
if let Some(small) = SmallSegments::try_from_slice(&segments) {
|
|
Self::Small(small)
|
|
} else {
|
|
Self::Heap(segments.into_vec().into_boxed_slice())
|
|
}
|
|
}
|
|
|
|
fn len(&self) -> usize {
|
|
match self {
|
|
Self::Small(small) => small.len(),
|
|
Self::Heap(segments) => segments.len(),
|
|
}
|
|
}
|
|
|
|
fn iter(&self) -> impl Iterator<Item = SegmentInfo> + '_ {
|
|
match self {
|
|
Self::Small(small) => itertools::Either::Left(small.iter()),
|
|
Self::Heap(heap) => itertools::Either::Right(heap.iter().copied()),
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Segment metadata - packed into a single u32
|
|
/// Layout: [kind: 2 bits][offset: 30 bits]
|
|
/// - Bits 0-1: `SegmentKind` (0=Attribute, 1=IntSubscript, 2=StringSubscript)
|
|
/// - Bits 2-31: Absolute offset from start of path (up to 1,073,741,823 bytes)
|
|
#[derive(Clone, Copy, PartialEq, Eq, Hash, get_size2::GetSize)]
|
|
struct SegmentInfo(u32);
|
|
|
|
const KIND_MASK: u32 = 0b11;
|
|
const OFFSET_SHIFT: u32 = 2;
|
|
const MAX_OFFSET: u32 = (1 << 30) - 1; // 2^30 - 1
|
|
|
|
impl SegmentInfo {
|
|
const fn new(kind: SegmentKind, offset: TextSize) -> Self {
|
|
assert!(offset.to_u32() < MAX_OFFSET);
|
|
|
|
let value = (offset.to_u32() << OFFSET_SHIFT) | (kind as u32);
|
|
Self(value)
|
|
}
|
|
|
|
const fn kind(self) -> SegmentKind {
|
|
match self.0 & KIND_MASK {
|
|
0 => SegmentKind::Attribute,
|
|
1 => SegmentKind::IntSubscript,
|
|
2 => SegmentKind::StringSubscript,
|
|
_ => panic!("Invalid SegmentKind bits"),
|
|
}
|
|
}
|
|
|
|
const fn offset(self) -> TextSize {
|
|
TextSize::new(self.0 >> OFFSET_SHIFT)
|
|
}
|
|
}
|
|
|
|
impl std::fmt::Debug for SegmentInfo {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
f.debug_struct("SegmentInfo")
|
|
.field("kind", &self.kind())
|
|
.field("offset", &self.offset())
|
|
.finish()
|
|
}
|
|
}
|
|
|
|
struct Segment<'a> {
|
|
kind: SegmentKind,
|
|
text: &'a str,
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, get_size2::GetSize)]
|
|
#[repr(u8)]
|
|
enum SegmentKind {
|
|
Attribute = 0,
|
|
IntSubscript = 1,
|
|
StringSubscript = 2,
|
|
}
|
|
|
|
/// Iterator over segments that converts `SegmentInfo` to `Segment` with text slices.
|
|
struct SegmentsIterator<'a, I> {
|
|
path: &'a str,
|
|
segment_infos: I,
|
|
current: Option<SegmentInfo>,
|
|
next: Option<SegmentInfo>,
|
|
}
|
|
|
|
impl<'a, I> SegmentsIterator<'a, I>
|
|
where
|
|
I: Iterator<Item = SegmentInfo>,
|
|
{
|
|
fn new(path: &'a str, mut segment_infos: I) -> Self {
|
|
let current = segment_infos.next();
|
|
let next = segment_infos.next();
|
|
|
|
Self {
|
|
path,
|
|
segment_infos,
|
|
current,
|
|
next,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'a, I> Iterator for SegmentsIterator<'a, I>
|
|
where
|
|
I: Iterator<Item = SegmentInfo>,
|
|
{
|
|
type Item = Segment<'a>;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
let info = self.current.take()?;
|
|
let end = self
|
|
.next
|
|
.map(SegmentInfo::offset)
|
|
.unwrap_or(self.path.text_len());
|
|
|
|
self.current = self.next;
|
|
self.next = self.segment_infos.next();
|
|
|
|
Some(Segment {
|
|
kind: info.kind(),
|
|
text: &self.path[TextRange::new(info.offset(), end)],
|
|
})
|
|
}
|
|
}
|
|
|
|
const INLINE_COUNT_BITS: u32 = 3;
|
|
const INLINE_COUNT_MASK: u64 = (1 << INLINE_COUNT_BITS) - 1;
|
|
const INLINE_SEGMENT_BITS: u32 = 8;
|
|
const INLINE_SEGMENT_MASK: u64 = (1 << INLINE_SEGMENT_BITS) - 1;
|
|
const INLINE_KIND_BITS: u32 = 2;
|
|
const INLINE_KIND_MASK: u64 = (1 << INLINE_KIND_BITS) - 1;
|
|
const INLINE_PREV_LEN_BITS: u32 = 6;
|
|
const INLINE_PREV_LEN_MASK: u64 = (1 << INLINE_PREV_LEN_BITS) - 1;
|
|
const INLINE_MAX_SEGMENTS: usize = 7;
|
|
const INLINE_MAX_RELATIVE_OFFSET: u32 = (1 << INLINE_PREV_LEN_BITS) - 1; // 63
|
|
|
|
/// Compact representation that can store up to 7 segments inline in a u64.
|
|
///
|
|
/// Layout:
|
|
/// - Bits 0-2: Number of segments minus 1 (0-6, representing 1-7 segments)
|
|
/// - Bits 3-10: Segment 0 (2 bits kind + 6 bits relative offset, max 63 bytes)
|
|
/// - Bits 11-18: Segment 1 (2 bits kind + 6 bits relative offset, max 63 bytes)
|
|
/// - Bits 19-26: Segment 2 (2 bits kind + 6 bits relative offset, max 63 bytes)
|
|
/// - Bits 27-34: Segment 3 (2 bits kind + 6 bits relative offset, max 63 bytes)
|
|
/// - Bits 35-42: Segment 4 (2 bits kind + 6 bits relative offset, max 63 bytes)
|
|
/// - Bits 43-50: Segment 5 (2 bits kind + 6 bits relative offset, max 63 bytes)
|
|
/// - Bits 51-58: Segment 6 (2 bits kind + 6 bits relative offset, max 63 bytes)
|
|
/// - Bits 59-63: Unused (5 bits)
|
|
///
|
|
/// Constraints:
|
|
/// - Maximum 7 segments (realistic limit for member access chains)
|
|
/// - Maximum 63-byte relative offset per segment (sufficient for most identifiers)
|
|
/// - Never empty (`segments.len()` >= 1)
|
|
///
|
|
#[derive(Clone, Copy, PartialEq, Eq, get_size2::GetSize)]
|
|
#[repr(transparent)]
|
|
struct SmallSegments(u64);
|
|
|
|
impl SmallSegments {
|
|
fn try_from_slice(segments: &[SegmentInfo]) -> Option<Self> {
|
|
if segments.is_empty() || segments.len() > INLINE_MAX_SEGMENTS {
|
|
return None;
|
|
}
|
|
|
|
// Pack into inline representation
|
|
// Store count minus 1 (since segments are never empty, range 0-6 represents 1-7 segments)
|
|
let mut packed = (segments.len() - 1) as u64;
|
|
let mut prev_offset = TextSize::new(0);
|
|
|
|
for (i, segment) in segments.iter().enumerate() {
|
|
// Compute relative offset on-the-fly
|
|
let relative_offset = segment.offset() - prev_offset;
|
|
if relative_offset > TextSize::from(INLINE_MAX_RELATIVE_OFFSET) {
|
|
return None;
|
|
}
|
|
|
|
let kind = segment.kind() as u64;
|
|
let relative_offset_val = u64::from(relative_offset.to_u32());
|
|
let segment_data = (relative_offset_val << INLINE_KIND_BITS) | kind;
|
|
let shift = INLINE_COUNT_BITS
|
|
+ (u32::try_from(i).expect("i is bounded by INLINE_MAX_SEGMENTS")
|
|
* INLINE_SEGMENT_BITS);
|
|
packed |= segment_data << shift;
|
|
|
|
prev_offset = segment.offset();
|
|
}
|
|
|
|
Some(Self(packed))
|
|
}
|
|
|
|
#[expect(
|
|
clippy::cast_possible_truncation,
|
|
reason = "INLINE_COUNT_MASK ensures value is at most 7"
|
|
)]
|
|
const fn len(self) -> usize {
|
|
// Add 1 because we store count minus 1
|
|
((self.0 & INLINE_COUNT_MASK) + 1) as usize
|
|
}
|
|
|
|
fn iter(self) -> SmallSegmentsInfoIterator {
|
|
SmallSegmentsInfoIterator {
|
|
segments: self,
|
|
index: 0,
|
|
next_offset: TextSize::new(0),
|
|
}
|
|
}
|
|
|
|
/// Returns the parent member expression, e.g. `x.b` from `x.b.c`, or `None` if the parent is
|
|
/// the `symbol` itself (e, g. parent of `x.a` is just `x`).
|
|
const fn parent(self) -> Option<Self> {
|
|
let len = self.len();
|
|
if len <= 1 {
|
|
return None;
|
|
}
|
|
|
|
// Simply copy the packed value but update the count
|
|
let mut new_packed = self.0;
|
|
|
|
// Clear the count bits and set the new count (len - 2, since we store count - 1)
|
|
new_packed &= !INLINE_COUNT_MASK;
|
|
new_packed |= (len - 2) as u64;
|
|
|
|
Some(Self(new_packed))
|
|
}
|
|
}
|
|
|
|
impl std::fmt::Debug for SmallSegments {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
f.debug_list().entries(self.iter()).finish()
|
|
}
|
|
}
|
|
|
|
struct SmallSegmentsInfoIterator {
|
|
segments: SmallSegments,
|
|
index: usize,
|
|
next_offset: TextSize,
|
|
}
|
|
|
|
impl Iterator for SmallSegmentsInfoIterator {
|
|
type Item = SegmentInfo;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
let count = self.segments.len();
|
|
if self.index >= count {
|
|
return None;
|
|
}
|
|
|
|
// Extract the relative offset and kind for the current segment
|
|
let shift = INLINE_COUNT_BITS
|
|
+ (u32::try_from(self.index).expect("index is bounded by INLINE_MAX_SEGMENTS")
|
|
* INLINE_SEGMENT_BITS);
|
|
let segment_data = (self.segments.0 >> shift) & INLINE_SEGMENT_MASK;
|
|
let kind = (segment_data & INLINE_KIND_MASK) as u8;
|
|
let relative_offset = ((segment_data >> INLINE_KIND_BITS) & INLINE_PREV_LEN_MASK) as u32;
|
|
|
|
// Update the running absolute offset
|
|
self.next_offset += TextSize::new(relative_offset);
|
|
|
|
let kind = match kind {
|
|
0 => SegmentKind::Attribute,
|
|
1 => SegmentKind::IntSubscript,
|
|
2 => SegmentKind::StringSubscript,
|
|
_ => panic!("Invalid SegmentKind bits"),
|
|
};
|
|
|
|
self.index += 1;
|
|
Some(SegmentInfo::new(kind, self.next_offset))
|
|
}
|
|
}
|
|
|
|
/// Reference view of segments, can be either small (inline) or heap-allocated.
|
|
#[derive(Clone, Copy, Debug)]
|
|
enum SegmentsRef<'a> {
|
|
Small(SmallSegments),
|
|
Heap(&'a [SegmentInfo]),
|
|
}
|
|
|
|
impl<'a> SegmentsRef<'a> {
|
|
fn len(&self) -> usize {
|
|
match self {
|
|
Self::Small(small) => small.len(),
|
|
Self::Heap(segments) => segments.len(),
|
|
}
|
|
}
|
|
|
|
fn iter(&self) -> impl Iterator<Item = SegmentInfo> + '_ {
|
|
match self {
|
|
Self::Small(small) => itertools::Either::Left(small.iter()),
|
|
Self::Heap(heap) => itertools::Either::Right(heap.iter().copied()),
|
|
}
|
|
}
|
|
|
|
/// Returns a parent view with one fewer segment, or None if <= 1 segment
|
|
fn parent(&self) -> Option<SegmentsRef<'a>> {
|
|
match self {
|
|
Self::Small(small) => small.parent().map(SegmentsRef::Small),
|
|
Self::Heap(segments) => {
|
|
let len = segments.len();
|
|
if len <= 1 {
|
|
None
|
|
} else {
|
|
Some(SegmentsRef::Heap(&segments[..len - 1]))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'a> From<&'a Segments> for SegmentsRef<'a> {
|
|
fn from(segments: &'a Segments) -> Self {
|
|
match segments {
|
|
Segments::Small(small) => SegmentsRef::Small(*small),
|
|
Segments::Heap(heap) => SegmentsRef::Heap(heap),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl PartialEq for SegmentsRef<'_> {
|
|
fn eq(&self, other: &Self) -> bool {
|
|
let len = self.len();
|
|
if len != other.len() {
|
|
return false;
|
|
}
|
|
self.iter().eq(other.iter())
|
|
}
|
|
}
|
|
|
|
impl Eq for SegmentsRef<'_> {}
|
|
|
|
/// Helper function to hash a single value and return the hash.
|
|
fn hash_single<T: Hash>(value: &T) -> u64 {
|
|
let mut hasher = FxHasher::default();
|
|
value.hash(&mut hasher);
|
|
hasher.finish()
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_member_expr_ref_hash_and_eq_small_heap() {
|
|
// For expression: foo.bar[0]["baz"]
|
|
// The path would be: "foobar0baz" (no dots or brackets in the path)
|
|
let path = "foobar0baz";
|
|
|
|
let segments = vec![
|
|
SegmentInfo::new(SegmentKind::Attribute, TextSize::new(3)), // .bar at offset 3
|
|
SegmentInfo::new(SegmentKind::IntSubscript, TextSize::new(6)), // [0] at offset 6
|
|
SegmentInfo::new(SegmentKind::StringSubscript, TextSize::new(7)), // ["baz"] at offset 7
|
|
];
|
|
|
|
// Create Small version.
|
|
let small_segments = SmallSegments::try_from_slice(&segments).unwrap();
|
|
let member_ref_small = MemberExprRef {
|
|
path,
|
|
segments: SegmentsRef::Small(small_segments),
|
|
};
|
|
|
|
// Create Heap version with the same data.
|
|
let heap_segments: Box<[SegmentInfo]> = segments.into_boxed_slice();
|
|
let member_ref_heap = MemberExprRef {
|
|
path,
|
|
segments: SegmentsRef::Heap(&heap_segments),
|
|
};
|
|
|
|
// Test hash equality (MemberExprRef only hashes the path).
|
|
assert_eq!(
|
|
hash_single(&member_ref_small),
|
|
hash_single(&member_ref_heap)
|
|
);
|
|
|
|
// Test equality in both directions.
|
|
assert_eq!(member_ref_small, member_ref_heap);
|
|
assert_eq!(member_ref_heap, member_ref_small);
|
|
}
|
|
|
|
#[test]
|
|
fn test_member_expr_ref_different_segments() {
|
|
// For expressions: foo.bar[0] vs foo.bar["0"]
|
|
// Both have the same path "foobar0" but different segment types
|
|
let path = "foobar0";
|
|
|
|
// First expression: foo.bar[0]
|
|
let segments1 = vec![
|
|
SegmentInfo::new(SegmentKind::Attribute, TextSize::new(3)), // .bar at offset 3
|
|
SegmentInfo::new(SegmentKind::IntSubscript, TextSize::new(6)), // [0] at offset 6
|
|
];
|
|
|
|
// Second expression: foo.bar["0"]
|
|
let segments2 = vec![
|
|
SegmentInfo::new(SegmentKind::Attribute, TextSize::new(3)), // .bar at offset 3
|
|
SegmentInfo::new(SegmentKind::StringSubscript, TextSize::new(6)), // ["0"] at offset 6
|
|
];
|
|
|
|
// Create MemberExprRef instances
|
|
let small1 = SmallSegments::try_from_slice(&segments1).unwrap();
|
|
let member_ref1 = MemberExprRef {
|
|
path,
|
|
segments: SegmentsRef::Small(small1),
|
|
};
|
|
|
|
let small2 = SmallSegments::try_from_slice(&segments2).unwrap();
|
|
let member_ref2 = MemberExprRef {
|
|
path,
|
|
segments: SegmentsRef::Small(small2),
|
|
};
|
|
|
|
// Test inequality
|
|
assert_ne!(member_ref1, member_ref2);
|
|
assert_ne!(member_ref2, member_ref1);
|
|
|
|
// Test hash equality (MemberExprRef only hashes the path, not segments)
|
|
assert_eq!(hash_single(&member_ref1), hash_single(&member_ref2));
|
|
}
|
|
|
|
#[test]
|
|
fn test_member_expr_ref_parent() {
|
|
use ruff_python_parser::parse_expression;
|
|
|
|
// Parse a real Python expression
|
|
let parsed = parse_expression(r#"foo.bar[0]["baz"]"#).unwrap();
|
|
let expr = parsed.expr();
|
|
|
|
// Convert to MemberExpr
|
|
let member_expr = MemberExpr::try_from_expr(ast::ExprRef::from(expr)).unwrap();
|
|
let member_ref = member_expr.as_ref();
|
|
|
|
// Verify the initial state: foo.bar[0]["baz"]
|
|
assert_eq!(member_ref.symbol_name(), "foo");
|
|
let segments: Vec<_> = member_ref.segments().map(|s| (s.kind, s.text)).collect();
|
|
assert_eq!(
|
|
segments,
|
|
vec![
|
|
(SegmentKind::Attribute, "bar"),
|
|
(SegmentKind::IntSubscript, "0"),
|
|
(SegmentKind::StringSubscript, "baz")
|
|
]
|
|
);
|
|
|
|
// Test parent() removes the last segment ["baz"] -> foo.bar[0]
|
|
let parent1 = member_ref.parent().unwrap();
|
|
assert_eq!(parent1.symbol_name(), "foo");
|
|
let parent1_segments: Vec<_> = parent1.segments().map(|s| (s.kind, s.text)).collect();
|
|
assert_eq!(
|
|
parent1_segments,
|
|
vec![
|
|
(SegmentKind::Attribute, "bar"),
|
|
(SegmentKind::IntSubscript, "0")
|
|
]
|
|
);
|
|
|
|
// Test parent of parent removes [0] -> foo.bar
|
|
let parent2 = parent1.parent().unwrap();
|
|
assert_eq!(parent2.symbol_name(), "foo");
|
|
let parent2_segments: Vec<_> = parent2.segments().map(|s| (s.kind, s.text)).collect();
|
|
assert_eq!(parent2_segments, vec![(SegmentKind::Attribute, "bar")]);
|
|
|
|
// Test parent of single segment is a symbol and not a member.
|
|
let parent3 = parent2.parent();
|
|
assert!(parent3.is_none());
|
|
}
|
|
|
|
#[test]
|
|
fn test_member_expr_small_vs_heap_allocation() {
|
|
use ruff_python_parser::parse_expression;
|
|
|
|
// Test Small allocation: 7 segments (maximum for inline storage)
|
|
// Create expression with exactly 7 segments: x.a.b.c.d.e.f.g
|
|
let small_expr = parse_expression("x.a.b.c.d.e.f.g").unwrap();
|
|
let small_member =
|
|
MemberExpr::try_from_expr(ast::ExprRef::from(small_expr.expr())).unwrap();
|
|
|
|
// Should use Small allocation
|
|
assert!(matches!(small_member.segments, Segments::Small(_)));
|
|
assert_eq!(small_member.num_segments(), 7);
|
|
|
|
// Test Heap allocation: 8 segments (exceeds inline capacity)
|
|
// Create expression with 8 segments: x.a.b.c.d.e.f.g.h
|
|
let heap_expr = parse_expression("x.a.b.c.d.e.f.g.h").unwrap();
|
|
let heap_member = MemberExpr::try_from_expr(ast::ExprRef::from(heap_expr.expr())).unwrap();
|
|
|
|
// Should use Heap allocation
|
|
assert!(matches!(heap_member.segments, Segments::Heap(_)));
|
|
assert_eq!(heap_member.num_segments(), 8);
|
|
|
|
// Test Small allocation with relative offset limit
|
|
// Create expression where relative offsets are small enough: a.b[0]["c"]
|
|
let small_offset_expr = parse_expression(r#"a.b[0]["c"]"#).unwrap();
|
|
let small_offset_member =
|
|
MemberExpr::try_from_expr(ast::ExprRef::from(small_offset_expr.expr())).unwrap();
|
|
|
|
// Should use Small allocation (3 segments, small offsets)
|
|
assert!(matches!(small_offset_member.segments, Segments::Small(_)));
|
|
assert_eq!(small_offset_member.num_segments(), 3);
|
|
|
|
// Test Small allocation with maximum 63-byte relative offset limit
|
|
// Create expression where one segment has exactly 63 bytes (the limit)
|
|
let segment_63_bytes = "a".repeat(63);
|
|
let max_offset_expr_code = format!("x.{segment_63_bytes}.y");
|
|
let max_offset_expr = parse_expression(&max_offset_expr_code).unwrap();
|
|
let max_offset_member =
|
|
MemberExpr::try_from_expr(ast::ExprRef::from(max_offset_expr.expr())).unwrap();
|
|
// Should still use Small allocation (exactly at the limit)
|
|
assert!(matches!(max_offset_member.segments, Segments::Small(_)));
|
|
assert_eq!(max_offset_member.num_segments(), 2);
|
|
|
|
// Test that heap allocation works for segment content that would exceed relative offset limits
|
|
// This would require very long identifiers (>63 bytes between segments), which is uncommon
|
|
// but we can test by creating an expression with long attribute names
|
|
let long_name = "a".repeat(64); // 64 bytes (exceeds 63-byte limit)
|
|
let long_expr_code = format!("x.{long_name}.y");
|
|
let long_expr = parse_expression(&long_expr_code).unwrap();
|
|
let long_member = MemberExpr::try_from_expr(ast::ExprRef::from(long_expr.expr())).unwrap();
|
|
// Should use Heap allocation due to large relative offset
|
|
assert!(matches!(long_member.segments, Segments::Heap(_)));
|
|
assert_eq!(long_member.num_segments(), 2);
|
|
}
|
|
}
|