Remove dependency on `ruff_rowan` (#2875)

This PR removes the dependency on `ruff_rowan` (i.e., Rome's fork of rust-analyzer's `rowan`), and in turn, trims out a lot of code in `ruff_formatter` that isn't necessary (or isn't _yet_ necessary) to power the autoformatter.

We may end up pulling some of this back in -- TBD. For example, the autoformatter has its own comment representation right now, but we may eventually want to use the `comments.rs` data structures defined in `rome_formatter`.
This commit is contained in:
Charlie Marsh 2023-02-14 22:54:08 -05:00 committed by GitHub
parent 5a84df293f
commit f661c90bd7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
61 changed files with 39 additions and 17489 deletions

99
Cargo.lock generated
View File

@ -536,12 +536,6 @@ version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc"
[[package]]
name = "countme"
version = "3.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7704b5fdd17b18ae31c4c1da5a2e0305a2bf17b5249300a9ee9ed7b72114c636"
[[package]]
name = "cpufeatures"
version = "0.2.5"
@ -626,7 +620,7 @@ dependencies = [
"autocfg",
"cfg-if",
"crossbeam-utils",
"memoffset 0.7.1",
"memoffset",
"scopeguard",
]
@ -810,16 +804,6 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "env_logger"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3"
dependencies = [
"log",
"regex",
]
[[package]]
name = "env_proxy"
version = "0.4.1"
@ -1226,12 +1210,6 @@ dependencies = [
"want",
]
[[package]]
name = "iai"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71a816c97c42258aa5834d07590b718b4c9a598944cd39a52dc25b351185d678"
[[package]]
name = "iana-time-zone"
version = "0.1.53"
@ -1674,15 +1652,6 @@ version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]]
name = "memoffset"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce"
dependencies = [
"autocfg",
]
[[package]]
name = "memoffset"
version = "0.7.1"
@ -2302,28 +2271,6 @@ dependencies = [
"memchr",
]
[[package]]
name = "quickcheck"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6"
dependencies = [
"env_logger",
"log",
"rand",
]
[[package]]
name = "quickcheck_macros"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b22a693222d716a9587786f37ac3f6b4faedb5b80c23914e7303ff5a1d8016e9"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "quote"
version = "1.0.23"
@ -2620,12 +2567,9 @@ dependencies = [
name = "ruff_formatter"
version = "0.0.0"
dependencies = [
"cfg-if",
"countme",
"drop_bomb",
"indexmap",
"insta",
"ruff_rowan",
"ruff_text_size",
"rustc-hash",
"schemars",
"serde",
@ -2653,35 +2597,6 @@ dependencies = [
"rustc-hash",
]
[[package]]
name = "ruff_rowan"
version = "0.0.0"
dependencies = [
"countme",
"hashbrown",
"iai",
"memoffset 0.6.5",
"quickcheck",
"quickcheck_macros",
"ruff_text_edit",
"ruff_text_size",
"rustc-hash",
"schemars",
"serde",
"serde_json",
"tracing",
]
[[package]]
name = "ruff_text_edit"
version = "0.0.0"
dependencies = [
"ruff_text_size",
"schemars",
"serde",
"similar",
]
[[package]]
name = "ruff_text_size"
version = "0.0.0"
@ -3024,10 +2939,6 @@ name = "similar"
version = "2.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "420acb44afdae038210c99e69aae24109f32f15500aa708e81d46c9f29d55fcf"
dependencies = [
"bstr 0.2.17",
"unicode-segmentation",
]
[[package]]
name = "siphasher"
@ -3646,12 +3557,6 @@ dependencies = [
"tinyvec",
]
[[package]]
name = "unicode-segmentation"
version = "1.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36"
[[package]]
name = "unicode-width"
version = "0.1.10"

54
LICENSE
View File

@ -1063,33 +1063,6 @@ are:
- flake8-django, licensed under the GPL license.
- rust-analyzer/rowan, licensed under the MIT license:
"""
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
"""
- rust-analyzer/text-size, licensed under the MIT license:
"""
Permission is hereby granted, free of charge, to any
@ -1117,33 +1090,6 @@ are:
DEALINGS IN THE SOFTWARE.
"""
- rust-analyzer/text-edit, licensed under the MIT license:
"""
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
"""
- rome/tools, licensed under the MIT license:
"""
MIT License

View File

@ -5,11 +5,8 @@ publish = false
edition = "2021"
[dependencies]
cfg-if = { version = "1.0.0" }
countme = { version = "3.0.1" }
drop_bomb = { version = "0.1.5" }
indexmap = { version = "1.9.1" }
ruff_rowan = { path = "../ruff_rowan" }
ruff_text_size = { path = "../ruff_text_size" }
rustc-hash = { workspace = true }
schemars = { version = "0.8.10", optional = true }
serde = { version = "1.0.136", features = ["derive"], optional = true }
@ -20,4 +17,4 @@ unicode-width = { version = "0.1.9" }
insta = { version = "1.19.0" }
[features]
serde = ["dep:serde", "schemars", "ruff_rowan/serde"]
serde = ["dep:serde", "schemars"]

View File

@ -1,10 +1,10 @@
use crate::format_element::tag::{Condition, Tag};
use crate::prelude::tag::{DedentMode, GroupMode, LabelId};
use crate::prelude::*;
use crate::{format_element, write, Argument, Arguments, GroupId, TextRange, TextSize};
use crate::{format_element, write, Argument, Arguments, GroupId, TextSize};
use crate::{Buffer, VecBuffer};
use ruff_rowan::{Language, SyntaxNode, SyntaxToken, SyntaxTokenText, TextLen};
use std::borrow::Cow;
use ruff_text_size::TextRange;
use std::cell::Cell;
use std::marker::PhantomData;
use std::num::NonZeroU8;
@ -332,93 +332,6 @@ impl std::fmt::Debug for StaticTextSlice {
}
}
/// String that is the same as in the input source text if `text` is [`Cow::Borrowed`] or
/// some replaced content if `text` is [`Cow::Owned`].
pub fn syntax_token_cow_slice<'a, L: Language>(
text: Cow<'a, str>,
token: &'a SyntaxToken<L>,
start: TextSize,
) -> SyntaxTokenCowSlice<'a, L> {
debug_assert_no_newlines(&text);
SyntaxTokenCowSlice { text, token, start }
}
pub struct SyntaxTokenCowSlice<'a, L: Language> {
text: Cow<'a, str>,
token: &'a SyntaxToken<L>,
start: TextSize,
}
impl<L: Language, Context> Format<Context> for SyntaxTokenCowSlice<'_, L> {
fn fmt(&self, f: &mut Formatter<Context>) -> FormatResult<()> {
match &self.text {
Cow::Borrowed(text) => {
let range = TextRange::at(self.start, text.text_len());
debug_assert_eq!(
*text,
&self.token.text()[range - self.token.text_range().start()],
"The borrowed string doesn't match the specified token substring. Does the borrowed string belong to this token and range?"
);
let relative_range = range - self.token.text_range().start();
let slice = self.token.token_text().slice(relative_range);
f.write_element(FormatElement::SyntaxTokenTextSlice {
slice,
source_position: self.start,
})
}
Cow::Owned(text) => f.write_element(FormatElement::DynamicText {
text: text.to_string().into_boxed_str(),
source_position: self.start,
}),
}
}
}
impl<L: Language> std::fmt::Debug for SyntaxTokenCowSlice<'_, L> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::write!(f, "SyntaxTokenCowSlice({})", self.text)
}
}
/// Copies a source text 1:1 into the output text.
pub fn syntax_token_text_slice<L: Language>(
token: &SyntaxToken<L>,
range: TextRange,
) -> SyntaxTokenTextSlice {
let relative_range = range - token.text_range().start();
let slice = token.token_text().slice(relative_range);
debug_assert_no_newlines(&slice);
SyntaxTokenTextSlice {
text: slice,
source_position: range.start(),
}
}
pub struct SyntaxTokenTextSlice {
text: SyntaxTokenText,
source_position: TextSize,
}
impl<Context> Format<Context> for SyntaxTokenTextSlice {
fn fmt(&self, f: &mut Formatter<Context>) -> FormatResult<()> {
f.write_element(FormatElement::SyntaxTokenTextSlice {
slice: self.text.clone(),
source_position: self.source_position,
})
}
}
impl std::fmt::Debug for SyntaxTokenTextSlice {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::write!(f, "SyntaxTokenTextSlice({})", self.text)
}
}
fn debug_assert_no_newlines(text: &str) {
debug_assert!(!text.contains('\r'), "The content '{}' contains an unsupported '\\r' line terminator character but text must only use line feeds '\\n' as line separator. Use '\\n' instead of '\\r' and '\\r\\n' to insert a line break in strings.", text);
}
@ -1853,7 +1766,7 @@ impl<Context, T> std::fmt::Debug for FormatWith<Context, T> {
/// ```
/// use ruff_formatter::prelude::*;
/// use ruff_formatter::{SimpleFormatContext, format, write};
/// use ruff_rowan::TextSize;
/// use ruff_text_size::TextSize;
///
/// struct MyFormat {
/// items: Vec<&'static str>,
@ -1948,7 +1861,7 @@ where
/// ```panics
/// use ruff_formatter::prelude::*;
/// use ruff_formatter::{SimpleFormatContext, format, write, Buffer};
/// use ruff_rowan::TextSize;
/// use ruff_text_size::TextSize;
///
/// let mut count = 0;
///
@ -2063,99 +1976,6 @@ where
}
}
/// Builder to join together nodes that ensures that nodes separated by empty lines continue
/// to be separated by empty lines in the formatted output.
#[must_use = "must eventually call `finish()` on Format builders"]
pub struct JoinNodesBuilder<'fmt, 'buf, Separator, Context> {
result: FormatResult<()>,
/// The separator to insert between nodes. Either a soft or hard line break
separator: Separator,
fmt: &'fmt mut Formatter<'buf, Context>,
has_elements: bool,
}
impl<'fmt, 'buf, Separator, Context> JoinNodesBuilder<'fmt, 'buf, Separator, Context>
where
Separator: Format<Context>,
{
pub(super) fn new(separator: Separator, fmt: &'fmt mut Formatter<'buf, Context>) -> Self {
Self {
result: Ok(()),
separator,
fmt,
has_elements: false,
}
}
/// Adds a new node with the specified formatted content to the output, respecting any new lines
/// that appear before the node in the input source.
pub fn entry<L: Language>(&mut self, node: &SyntaxNode<L>, content: &dyn Format<Context>) {
self.result = self.result.and_then(|_| {
if self.has_elements {
if get_lines_before(node) > 1 {
write!(self.fmt, [empty_line()])?;
} else {
self.separator.fmt(self.fmt)?;
}
}
self.has_elements = true;
write!(self.fmt, [content])
});
}
/// Writes an entry without adding a separating line break or empty line.
pub fn entry_no_separator(&mut self, content: &dyn Format<Context>) {
self.result = self.result.and_then(|_| {
self.has_elements = true;
write!(self.fmt, [content])
})
}
/// Adds an iterator of entries to the output. Each entry is a `(node, content)` tuple.
pub fn entries<L, F, I>(&mut self, entries: I) -> &mut Self
where
L: Language,
F: Format<Context>,
I: IntoIterator<Item = (SyntaxNode<L>, F)>,
{
for (node, content) in entries {
self.entry(&node, &content)
}
self
}
pub fn finish(&mut self) -> FormatResult<()> {
self.result
}
}
/// Get the number of line breaks between two consecutive SyntaxNodes in the tree
pub fn get_lines_before<L: Language>(next_node: &SyntaxNode<L>) -> usize {
// Count the newlines in the leading trivia of the next node
if let Some(token) = next_node.first_token() {
get_lines_before_token(&token)
} else {
0
}
}
pub fn get_lines_before_token<L: Language>(token: &SyntaxToken<L>) -> usize {
token
.leading_trivia()
.pieces()
.take_while(|piece| {
// Stop at the first comment or skipped piece, the comment printer
// will handle newlines between the comment and the node
!(piece.is_comments() || piece.is_skipped())
})
.filter(|piece| piece.is_newline())
.count()
}
/// Builder to fill as many elements as possible on a single line.
#[must_use = "must eventually call `finish()` on Format builders"]
pub struct FillBuilder<'fmt, 'buf, Context> {

File diff suppressed because it is too large Load Diff

View File

@ -1,632 +0,0 @@
use super::{
map::CommentsMap, CommentPlacement, CommentStyle, CommentTextPosition, DecoratedComment,
SourceComment, TransformSourceMap,
};
use crate::source_map::{DeletedRangeEntry, DeletedRanges};
use crate::{TextRange, TextSize};
use ruff_rowan::syntax::SyntaxElementKey;
use ruff_rowan::{
Direction, Language, SyntaxElement, SyntaxKind, SyntaxNode, SyntaxToken, WalkEvent,
};
use rustc_hash::FxHashSet;
/// Extracts all comments from a syntax tree.
pub(super) struct CommentsBuilderVisitor<'a, Style: CommentStyle> {
builder: CommentsBuilder<Style::Language>,
style: &'a Style,
parentheses: SourceParentheses<'a>,
// State
pending_comments: Vec<DecoratedComment<Style::Language>>,
preceding_node: Option<SyntaxNode<Style::Language>>,
following_node_index: Option<usize>,
parents: Vec<SyntaxNode<Style::Language>>,
last_token: Option<SyntaxToken<Style::Language>>,
}
impl<'a, Style> CommentsBuilderVisitor<'a, Style>
where
Style: CommentStyle,
{
pub(super) fn new(style: &'a Style, source_map: Option<&'a TransformSourceMap>) -> Self {
Self {
style,
builder: Default::default(),
parentheses: SourceParentheses::from_source_map(source_map),
pending_comments: Default::default(),
preceding_node: Default::default(),
following_node_index: Default::default(),
parents: Default::default(),
last_token: Default::default(),
}
}
pub(super) fn visit(
mut self,
root: &SyntaxNode<Style::Language>,
) -> (
CommentsMap<SyntaxElementKey, SourceComment<Style::Language>>,
FxHashSet<SyntaxElementKey>,
) {
for event in root.preorder_with_tokens(Direction::Next) {
match event {
WalkEvent::Enter(SyntaxElement::Node(node)) => {
self.visit_node(WalkEvent::Enter(node))
}
WalkEvent::Leave(SyntaxElement::Node(node)) => {
self.visit_node(WalkEvent::Leave(node))
}
WalkEvent::Enter(SyntaxElement::Token(token)) => self.visit_token(token),
WalkEvent::Leave(SyntaxElement::Token(_)) => {
// Handled as part of enter
}
}
}
assert!(
self.parents.is_empty(),
"Expected all enclosing nodes to have been processed but contains {:#?}",
self.parents
);
// Process any comments attached to the last token.
// Important for range formatting where it isn't guaranteed that the
// last token is an EOF token.
if let Some(last_token) = self.last_token.take() {
self.parents.push(root.clone());
let (comments_start, lines_before, position, trailing_end) =
self.visit_trailing_comments(last_token, None);
Self::update_comments(
&mut self.pending_comments[comments_start..],
position,
lines_before,
trailing_end,
);
}
self.flush_comments(None);
self.builder.finish()
}
fn visit_node(&mut self, event: WalkEvent<SyntaxNode<Style::Language>>) {
match event {
WalkEvent::Enter(node) => {
// Lists cannot have comments attached. They either belong to the entire parent or to
// the first child. So we ignore lists all together
if node.kind().is_list() {
return;
}
let is_root = matches!(self.following_node_index, Some(0));
// Associate comments with the most outer node
// Set following here because it is the "following node" of the next token's leading trivia.
if self.following_node_index.is_none() || is_root {
// Flush in case the node doesn't have any tokens.
self.flush_comments(Some(&node));
self.following_node_index = Some(self.parents.len());
}
self.parents.push(node);
}
WalkEvent::Leave(node) => {
if node.kind().is_list() {
return;
}
self.parents.pop().unwrap();
// We're passed this node, flush any pending comments for its children
self.following_node_index = None;
self.flush_comments(None);
// We're passed this node, so it must precede the sibling that comes next.
self.preceding_node = Some(node);
}
}
}
fn visit_token(&mut self, token: SyntaxToken<Style::Language>) {
// Process the trailing trivia of the last token
let (comments_start, mut lines_before, mut position, mut trailing_end) =
if let Some(last_token) = self.last_token.take() {
self.visit_trailing_comments(last_token, Some(&token))
} else {
(
self.pending_comments.len(),
0,
CommentTextPosition::SameLine,
None,
)
};
// Process the leading trivia of the current token. the trailing trivia is handled as part of the next token
for leading in token.leading_trivia().pieces() {
if leading.is_newline() {
lines_before += 1;
// All comments following from here are own line comments
position = CommentTextPosition::OwnLine;
if trailing_end.is_none() {
trailing_end = Some(self.pending_comments.len());
}
} else if leading.is_skipped() {
self.builder.mark_has_skipped(&token);
lines_before = 0;
break;
} else if let Some(comment) = leading.as_comments() {
let kind = Style::get_comment_kind(&comment);
self.queue_comment(DecoratedComment {
enclosing: self.enclosing_node().clone(),
preceding: self.preceding_node.clone(),
following: None,
following_token: Some(token.clone()),
lines_before,
lines_after: 0,
text_position: position,
kind,
comment,
});
lines_before = 0;
}
}
self.last_token = Some(token);
Self::update_comments(
&mut self.pending_comments[comments_start..],
position,
lines_before,
trailing_end,
);
// Set following node to `None` because it now becomes the enclosing node.
if let Some(following_node) = self.following_node() {
self.flush_comments(Some(&following_node.clone()));
self.following_node_index = None;
// The following node is only set after entering a node
// That means, following node is only set for the first token of a node.
// Unset preceding node if this is the first token because the preceding node belongs to the parent.
self.preceding_node = None;
}
}
fn enclosing_node(&self) -> &SyntaxNode<Style::Language> {
let element = match self.following_node_index {
None => self.parents.last(),
Some(index) if index == 0 => Some(&self.parents[0]),
Some(index) => Some(&self.parents[index - 1]),
};
element.expect("Expected enclosing nodes to at least contain the root node.")
}
fn following_node(&self) -> Option<&SyntaxNode<Style::Language>> {
self.following_node_index.map(|index| {
self.parents
.get(index)
.expect("Expected following node index to point to a valid parent node")
})
}
fn queue_comment(&mut self, comment: DecoratedComment<Style::Language>) {
self.pending_comments.push(comment);
}
fn update_comments(
comments: &mut [DecoratedComment<Style::Language>],
position: CommentTextPosition,
lines_before: u32,
trailing_end: Option<usize>,
) {
let trailing_end = trailing_end.unwrap_or(comments.len());
let mut comments = comments.iter_mut().enumerate().peekable();
// Update the lines after of all comments as well as the positioning of end of line comments.
while let Some((index, comment)) = comments.next() {
// Update the position of all trailing comments to be end of line as we've seen a line break since.
if index < trailing_end && position.is_own_line() {
comment.text_position = CommentTextPosition::EndOfLine;
}
comment.lines_after = comments
.peek()
.map_or(lines_before, |(_, next)| next.lines_before);
}
}
fn flush_comments(&mut self, following: Option<&SyntaxNode<Style::Language>>) {
for mut comment in self.pending_comments.drain(..) {
comment.following = following.cloned();
let placement = self.style.place_comment(comment);
self.builder.add_comment(placement);
}
}
fn visit_trailing_comments(
&mut self,
token: SyntaxToken<Style::Language>,
following_token: Option<&SyntaxToken<Style::Language>>,
) -> (usize, u32, CommentTextPosition, Option<usize>) {
let mut comments_start = 0;
// The index of the last trailing comment in `pending_comments`.
let mut trailing_end: Option<usize> = None;
// Number of lines before the next comment, token, or skipped token trivia
let mut lines_before = 0;
// Trailing comments are all `SameLine` comments EXCEPT if any is followed by a line break,
// a leading comment (that always have line breaks), or there's a line break before the token.
let mut position = CommentTextPosition::SameLine;
// Process the trailing trivia of the last token
for piece in token.trailing_trivia().pieces() {
if piece.is_newline() {
lines_before += 1;
// All comments following from here are own line comments
position = CommentTextPosition::OwnLine;
if trailing_end.is_none() {
trailing_end = Some(self.pending_comments.len());
}
} else if let Some(comment) = piece.as_comments() {
self.queue_comment(DecoratedComment {
enclosing: self.enclosing_node().clone(),
preceding: self.preceding_node.clone(),
following: None,
following_token: following_token.cloned(),
lines_before,
lines_after: 0, // Will be initialized after
text_position: position,
kind: Style::get_comment_kind(&comment),
comment,
});
lines_before = 0;
}
if let Some(parens_source_range) = self
.parentheses
.r_paren_source_range(piece.text_range().end())
{
self.flush_before_r_paren_comments(
parens_source_range,
&token,
position,
lines_before,
comments_start,
trailing_end,
);
lines_before = 0;
position = CommentTextPosition::SameLine;
comments_start = 0;
trailing_end = None;
}
}
(comments_start, lines_before, position, trailing_end)
}
/// Processes comments appearing right before a `)` of a parenthesized expressions.
#[cold]
fn flush_before_r_paren_comments(
&mut self,
parens_source_range: TextRange,
last_token: &SyntaxToken<Style::Language>,
position: CommentTextPosition,
lines_before: u32,
start: usize,
trailing_end: Option<usize>,
) {
let enclosing = self.enclosing_node().clone();
let comments = &mut self.pending_comments[start..];
let trailing_end = trailing_end.unwrap_or(comments.len());
let mut comments = comments.iter_mut().enumerate().peekable();
let parenthesized_node = self
.parentheses
.outer_most_parenthesized_node(last_token, parens_source_range);
let preceding = parenthesized_node;
// Using the `enclosing` as default but it's mainly to satisfy Rust. The only case where it is used
// is if someone formats a Parenthesized expression as the root. Something we explicitly disallow
// in ruff_js_formatter
let enclosing = preceding.parent().unwrap_or(enclosing);
// Update the lines after of all comments as well as the positioning of end of line comments.
while let Some((index, comment)) = comments.next() {
// Update the position of all trailing comments to be end of line as we've seen a line break since.
if index < trailing_end && position.is_own_line() {
comment.text_position = CommentTextPosition::EndOfLine;
}
comment.preceding = Some(preceding.clone());
comment.enclosing = enclosing.clone();
comment.lines_after = comments
.peek()
.map_or(lines_before, |(_, next)| next.lines_before);
}
self.flush_comments(None);
}
}
struct CommentsBuilder<L: Language> {
comments: CommentsMap<SyntaxElementKey, SourceComment<L>>,
skipped: FxHashSet<SyntaxElementKey>,
}
impl<L: Language> CommentsBuilder<L> {
fn add_comment(&mut self, placement: CommentPlacement<L>) {
match placement {
CommentPlacement::Leading { node, comment } => {
self.push_leading_comment(&node, comment);
}
CommentPlacement::Trailing { node, comment } => {
self.push_trailing_comment(&node, comment);
}
CommentPlacement::Dangling { node, comment } => {
self.push_dangling_comment(&node, comment)
}
CommentPlacement::Default(mut comment) => {
match comment.text_position {
CommentTextPosition::EndOfLine => {
match (comment.take_preceding_node(), comment.take_following_node()) {
(Some(preceding), Some(_)) => {
// Attach comments with both preceding and following node to the preceding
// because there's a line break separating it from the following node.
// ```javascript
// a; // comment
// b
// ```
self.push_trailing_comment(&preceding, comment);
}
(Some(preceding), None) => {
self.push_trailing_comment(&preceding, comment);
}
(None, Some(following)) => {
self.push_leading_comment(&following, comment);
}
(None, None) => {
self.push_dangling_comment(
&comment.enclosing_node().clone(),
comment,
);
}
}
}
CommentTextPosition::OwnLine => {
match (comment.take_preceding_node(), comment.take_following_node()) {
// Following always wins for a leading comment
// ```javascript
// a;
// // comment
// b
// ```
// attach the comment to the `b` expression statement
(_, Some(following)) => {
self.push_leading_comment(&following, comment);
}
(Some(preceding), None) => {
self.push_trailing_comment(&preceding, comment);
}
(None, None) => {
self.push_dangling_comment(
&comment.enclosing_node().clone(),
comment,
);
}
}
}
CommentTextPosition::SameLine => {
match (comment.take_preceding_node(), comment.take_following_node()) {
(Some(preceding), Some(following)) => {
// Only make it a trailing comment if it directly follows the preceding node but not if it is separated
// by one or more tokens
// ```javascript
// a /* comment */ b; // Comment is a trailing comment
// a, /* comment */ b; // Comment should be a leading comment
// ```
if preceding.text_range().end()
== comment.piece().as_piece().token().text_range().end()
{
self.push_trailing_comment(&preceding, comment);
} else {
self.push_leading_comment(&following, comment);
}
}
(Some(preceding), None) => {
self.push_trailing_comment(&preceding, comment);
}
(None, Some(following)) => {
self.push_leading_comment(&following, comment);
}
(None, None) => {
self.push_dangling_comment(
&comment.enclosing_node().clone(),
comment,
);
}
}
}
}
}
}
}
fn mark_has_skipped(&mut self, token: &SyntaxToken<L>) {
self.skipped.insert(token.key());
}
fn push_leading_comment(&mut self, node: &SyntaxNode<L>, comment: impl Into<SourceComment<L>>) {
self.comments.push_leading(node.key(), comment.into());
}
fn push_dangling_comment(
&mut self,
node: &SyntaxNode<L>,
comment: impl Into<SourceComment<L>>,
) {
self.comments.push_dangling(node.key(), comment.into());
}
fn push_trailing_comment(
&mut self,
node: &SyntaxNode<L>,
comment: impl Into<SourceComment<L>>,
) {
self.comments.push_trailing(node.key(), comment.into());
}
fn finish(
self,
) -> (
CommentsMap<SyntaxElementKey, SourceComment<L>>,
FxHashSet<SyntaxElementKey>,
) {
(self.comments, self.skipped)
}
}
impl<L: Language> Default for CommentsBuilder<L> {
fn default() -> Self {
Self {
comments: CommentsMap::new(),
skipped: FxHashSet::default(),
}
}
}
enum SourceParentheses<'a> {
Empty,
SourceMap {
map: &'a TransformSourceMap,
next: Option<DeletedRangeEntry<'a>>,
tail: DeletedRanges<'a>,
},
}
impl<'a> SourceParentheses<'a> {
fn from_source_map(source_map: Option<&'a TransformSourceMap>) -> Self {
match source_map {
None => Self::Empty,
Some(source_map) => {
let mut deleted = source_map.deleted_ranges();
SourceParentheses::SourceMap {
map: source_map,
next: deleted.next(),
tail: deleted,
}
}
}
}
/// Returns the range of `node` including its parentheses if any. Otherwise returns the range as is
fn parenthesized_range<L: Language>(&self, node: &SyntaxNode<L>) -> TextRange {
match self {
SourceParentheses::Empty => node.text_trimmed_range(),
SourceParentheses::SourceMap { map, .. } => map.trimmed_source_range(node),
}
}
/// Tests if the next offset is at a position where the original source document used to have an `)`.
///
/// Must be called with offsets in increasing order.
///
/// Returns the source range of the `)` if there's any `)` in the deleted range at this offset. Returns `None` otherwise
fn r_paren_source_range(&mut self, offset: TextSize) -> Option<TextRange> {
match self {
SourceParentheses::Empty => None,
SourceParentheses::SourceMap { next, tail, .. } => {
while let Some(range) = next {
#[allow(clippy::comparison_chain)]
if range.transformed == offset {
// A deleted range can contain multiple tokens. See if there's any `)` in the deleted
// range and compute its source range.
return range.text.find(')').map(|r_paren_position| {
let start = range.source + TextSize::from(r_paren_position as u32);
TextRange::at(start, TextSize::from(1))
});
} else if range.transformed > offset {
return None;
} else {
*next = tail.next();
}
}
None
}
}
}
/// Searches the outer most node that still is inside of the parentheses specified by the `parentheses_source_range`.
fn outer_most_parenthesized_node<L: Language>(
&self,
token: &SyntaxToken<L>,
parentheses_source_range: TextRange,
) -> SyntaxNode<L> {
match self {
SourceParentheses::Empty => token.parent().unwrap(),
SourceParentheses::SourceMap { map, .. } => {
debug_assert_eq!(&map.text()[parentheses_source_range], ")");
// How this works: We search the outer most node that, in the source document ends right after the `)`.
// The issue is, it is possible that multiple nodes end right after the `)`
//
// ```javascript
// !(
// a
// /* comment */
// )
// ```
// The issue is, that in the transformed document, the `ReferenceIdentifier`, `IdentifierExpression`, `UnaryExpression`, and `ExpressionStatement`
// all end at the end position of `)`.
// However, not all the nodes start at the same position. That's why this code also tracks the start.
// We first find the closest node that directly ends at the position of the right paren. We then continue
// upwards to find the most outer node that starts at the same position as that node. (In this case,
// `ReferenceIdentifier` -> `IdentifierExpression`.
let mut start_offset = None;
let r_paren_source_end = parentheses_source_range.end();
let ancestors = token.ancestors().take_while(|node| {
let source_range = self.parenthesized_range(node);
if let Some(start) = start_offset {
TextRange::new(start, r_paren_source_end).contains_range(source_range)
}
// Greater than to guarantee that we always return at least one node AND
// handle the case where a node is wrapped in multiple parentheses.
// Take the first node that fully encloses the parentheses
else if source_range.end() >= r_paren_source_end {
start_offset = Some(source_range.start());
true
} else {
source_range.end() < r_paren_source_end
}
});
// SAFETY:
// * The builder starts with a node which guarantees that every token has a parent node.
// * The above `take_while` guarantees to return `true` for the parent of the token.
// Thus, there's always at least one node
ancestors.last().unwrap()
}
}
}
}

View File

@ -1,836 +0,0 @@
use countme::Count;
use rustc_hash::FxHashMap;
use std::fmt::{Debug, Formatter};
use std::iter::FusedIterator;
use std::num::NonZeroU32;
use std::ops::Range;
/// An optimized multi-map implementation for storing leading, dangling, and trailing parts for a key.
///
/// A naive implementation using three multimaps, one to store the leading, dangling, and trailing parts,
/// requires between `keys < allocations < keys * 3` vec allocations.
///
/// This map implementation optimises for the use case where:
/// * Parts belonging to the same key are inserted together. For example, all parts for the key `a` are inserted
/// before inserting any parts for the key `b`.
/// * The parts per key are inserted in the following order: leading, dangling, and then trailing parts.
///
/// Parts inserted in the above mentioned order are stored in a `Vec` shared by all keys to reduce the number
/// of allocations and increased cache locality. The implementation falls back to
/// storing the leading, dangling, and trailing parts of a key in dedicated `Vec`s if the parts
/// aren't inserted in the above described order. However, this comes with a slight performance penalty due to:
/// * Requiring up to three [Vec] allocations, one for the leading, dangling, and trailing parts.
/// * Requires copying already inserted parts for that key (by cloning) into the newly allocated [Vec]s.
/// * Resolving the slices for every part requires an extra level of indirection.
///
/// ## Limitations
///
/// The map supports storing up to `u32::MAX - 1` parts. Inserting the `u32::MAX`nth part panics.
///
/// ## Comments
///
/// Storing the leading, dangling, and trailing comments is an exemplary use case for this map implementation because
/// it is generally desired to keep the comments in the same order as in the source document. This translates to
/// inserting the comments per node and for every node in leading, dangling, trailing order (same order as this map optimises for).
///
/// Running Rome formatter on real world use cases showed that more than 99.99% of comments get inserted in
/// the described order.
///
/// The size limitation isn't a concern for comments because Rome supports source documents with a size up to 4GB (`u32::MAX`)
/// and every comment has at least a size of 2 bytes:
/// * 1 byte for the start sequence, e.g. `#`
/// * 1 byte for the end sequence, e.g. `\n`
///
/// Meaning, the upper bound for comments parts in a document are `u32::MAX / 2`.
pub(super) struct CommentsMap<K, V> {
/// Lookup table to retrieve the entry for a key.
index: FxHashMap<K, Entry>,
/// Flat array storing all the parts that have been inserted in order.
parts: Vec<V>,
/// Vector containing the leading, dangling, and trailing vectors for out of order entries.
///
/// The length of `out_of_order` is a multiple of 3 where:
/// * `index % 3 == 0`: Leading parts
/// * `index % 3 == 1`: Dangling parts
/// * `index % 3 == 2`: Trailing parts
out_of_order: Vec<Vec<V>>,
}
impl<K: std::hash::Hash + Eq, V> CommentsMap<K, V> {
pub fn new() -> Self {
Self {
index: FxHashMap::default(),
parts: Vec::new(),
out_of_order: Vec::new(),
}
}
/// Pushes a leading part for `key`.
pub fn push_leading(&mut self, key: K, part: V)
where
V: Clone,
{
match self.index.get_mut(&key) {
None => {
let start = self.parts.len();
self.parts.push(part);
self.index.insert(
key,
Entry::InOrder(InOrderEntry::leading(start..self.parts.len())),
);
}
// Has only leading comments and no elements have been pushed since
Some(Entry::InOrder(entry))
if entry.trailing_start.is_none() && self.parts.len() == entry.range().end =>
{
self.parts.push(part);
entry.increment_leading_range();
}
Some(Entry::OutOfOrder(entry)) => {
let leading = &mut self.out_of_order[entry.leading_index()];
leading.push(part);
}
Some(entry) => {
let out_of_order =
Self::entry_to_out_of_order(entry, &self.parts, &mut self.out_of_order);
self.out_of_order[out_of_order.leading_index()].push(part);
}
}
}
/// Pushes a dangling part for `key`
pub fn push_dangling(&mut self, key: K, part: V)
where
V: Clone,
{
match self.index.get_mut(&key) {
None => {
let start = self.parts.len();
self.parts.push(part);
self.index.insert(
key,
Entry::InOrder(InOrderEntry::dangling(start..self.parts.len())),
);
}
// Has leading and dangling comments and its comments are at the end of parts
Some(Entry::InOrder(entry))
if entry.trailing_end.is_none() && self.parts.len() == entry.range().end =>
{
self.parts.push(part);
entry.increment_dangling_range();
}
Some(Entry::OutOfOrder(entry)) => {
let dangling = &mut self.out_of_order[entry.dangling_index()];
dangling.push(part);
}
Some(entry) => {
let out_of_order =
Self::entry_to_out_of_order(entry, &self.parts, &mut self.out_of_order);
self.out_of_order[out_of_order.dangling_index()].push(part);
}
}
}
/// Pushes a trailing part for `key`.
pub fn push_trailing(&mut self, key: K, part: V)
where
V: Clone,
{
match self.index.get_mut(&key) {
None => {
let start = self.parts.len();
self.parts.push(part);
self.index.insert(
key,
Entry::InOrder(InOrderEntry::trailing(start..self.parts.len())),
);
}
// Its comments are at the end
Some(Entry::InOrder(entry)) if entry.range().end == self.parts.len() => {
self.parts.push(part);
entry.increment_trailing_range();
}
Some(Entry::OutOfOrder(entry)) => {
let trailing = &mut self.out_of_order[entry.trailing_index()];
trailing.push(part);
}
Some(entry) => {
let out_of_order =
Self::entry_to_out_of_order(entry, &self.parts, &mut self.out_of_order);
self.out_of_order[out_of_order.trailing_index()].push(part);
}
}
}
#[cold]
fn entry_to_out_of_order<'a>(
entry: &'a mut Entry,
parts: &[V],
out_of_order: &mut Vec<Vec<V>>,
) -> &'a mut OutOfOrderEntry
where
V: Clone,
{
match entry {
Entry::InOrder(in_order) => {
let index = out_of_order.len();
out_of_order.push(parts[in_order.leading_range()].to_vec());
out_of_order.push(parts[in_order.dangling_range()].to_vec());
out_of_order.push(parts[in_order.trailing_range()].to_vec());
*entry = Entry::OutOfOrder(OutOfOrderEntry {
leading_index: index,
_count: Count::new(),
});
match entry {
Entry::InOrder(_) => unreachable!(),
Entry::OutOfOrder(out_of_order) => out_of_order,
}
}
Entry::OutOfOrder(entry) => entry,
}
}
/// Retrieves all leading parts of `key`
pub fn leading(&self, key: &K) -> &[V] {
match self.index.get(key) {
None => &[],
Some(Entry::InOrder(in_order)) => &self.parts[in_order.leading_range()],
Some(Entry::OutOfOrder(entry)) => &self.out_of_order[entry.leading_index()],
}
}
/// Retrieves all dangling parts of `key`.
pub fn dangling(&self, key: &K) -> &[V] {
match self.index.get(key) {
None => &[],
Some(Entry::InOrder(in_order)) => &self.parts[in_order.dangling_range()],
Some(Entry::OutOfOrder(entry)) => &self.out_of_order[entry.dangling_index()],
}
}
/// Retrieves all trailing parts of `key`.
pub fn trailing(&self, key: &K) -> &[V] {
match self.index.get(key) {
None => &[],
Some(Entry::InOrder(in_order)) => &self.parts[in_order.trailing_range()],
Some(Entry::OutOfOrder(entry)) => &self.out_of_order[entry.trailing_index()],
}
}
/// Returns `true` if `key` has any leading, dangling, or trailing part.
pub fn has(&self, key: &K) -> bool {
self.index.get(key).is_some()
}
/// Returns an iterator over all leading, dangling, and trailing parts of `key`.
pub fn parts(&self, key: &K) -> PartsIterator<V> {
match self.index.get(key) {
None => PartsIterator::Slice([].iter()),
Some(entry) => PartsIterator::from_entry(entry, self),
}
}
/// Returns an iterator over the parts of all keys.
#[allow(unused)]
pub fn all_parts(&self) -> impl Iterator<Item = &V> {
self.index
.values()
.flat_map(|entry| PartsIterator::from_entry(entry, self))
}
}
impl<K: std::hash::Hash + Eq, V> Default for CommentsMap<K, V> {
fn default() -> Self {
Self::new()
}
}
impl<K, V> std::fmt::Debug for CommentsMap<K, V>
where
K: std::fmt::Debug,
V: std::fmt::Debug,
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut builder = f.debug_map();
for (key, entry) in &self.index {
builder.entry(&key, &DebugEntry { entry, map: self });
}
builder.finish()
}
}
/// Iterator to iterate over all leading, dangling, and trailing parts of a key.
pub(super) enum PartsIterator<'a, V> {
/// The slice into the [CommentsMap::parts] [Vec] if this is an in-order entry or the trailing parts
/// of an out-of-order entry.
Slice(std::slice::Iter<'a, V>),
/// Iterator over the leading parts of an out-of-order entry. Returns the dangling parts, and then the
/// trailing parts once the leading iterator is fully consumed.
Leading {
leading: std::slice::Iter<'a, V>,
dangling: &'a [V],
trailing: &'a [V],
},
/// Iterator over the dangling parts of an out-of-order entry. Returns the trailing parts
/// once the leading iterator is fully consumed.
Dangling {
dangling: std::slice::Iter<'a, V>,
trailing: &'a [V],
},
}
impl<'a, V> PartsIterator<'a, V> {
fn from_entry<K>(entry: &Entry, map: &'a CommentsMap<K, V>) -> Self {
match entry {
Entry::OutOfOrder(entry) => PartsIterator::Leading {
leading: map.out_of_order[entry.leading_index()].iter(),
dangling: &map.out_of_order[entry.dangling_index()],
trailing: &map.out_of_order[entry.trailing_index()],
},
Entry::InOrder(entry) => PartsIterator::Slice(map.parts[entry.range()].iter()),
}
}
}
impl<'a, V> Iterator for PartsIterator<'a, V> {
type Item = &'a V;
fn next(&mut self) -> Option<Self::Item> {
match self {
PartsIterator::Slice(inner) => inner.next(),
PartsIterator::Leading {
leading,
dangling,
trailing,
} => match leading.next() {
Some(next) => Some(next),
None if !dangling.is_empty() => {
let mut dangling_iterator = dangling.iter();
let next = dangling_iterator.next().unwrap();
*self = PartsIterator::Dangling {
dangling: dangling_iterator,
trailing,
};
Some(next)
}
None => {
let mut trailing_iterator = trailing.iter();
let next = trailing_iterator.next();
*self = PartsIterator::Slice(trailing_iterator);
next
}
},
PartsIterator::Dangling { dangling, trailing } => match dangling.next() {
Some(next) => Some(next),
None => {
let mut trailing_iterator = trailing.iter();
let next = trailing_iterator.next();
*self = PartsIterator::Slice(trailing_iterator);
next
}
},
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
match self {
PartsIterator::Slice(slice) => slice.size_hint(),
PartsIterator::Leading {
leading,
dangling,
trailing,
} => {
let len = leading.len() + dangling.len() + trailing.len();
(len, Some(len))
}
PartsIterator::Dangling { dangling, trailing } => {
let len = dangling.len() + trailing.len();
(len, Some(len))
}
}
}
fn last(self) -> Option<Self::Item>
where
Self: Sized,
{
match self {
PartsIterator::Slice(slice) => slice.last(),
PartsIterator::Leading {
leading,
dangling,
trailing,
} => trailing
.last()
.or_else(|| dangling.last())
.or_else(|| leading.last()),
PartsIterator::Dangling { dangling, trailing } => {
trailing.last().or_else(|| dangling.last())
}
}
}
}
impl<V> ExactSizeIterator for PartsIterator<'_, V> {}
impl<V> FusedIterator for PartsIterator<'_, V> {}
#[derive(Debug)]
enum Entry {
InOrder(InOrderEntry),
OutOfOrder(OutOfOrderEntry),
}
struct DebugEntry<'a, K, V> {
entry: &'a Entry,
map: &'a CommentsMap<K, V>,
}
impl<K, V> Debug for DebugEntry<'_, K, V>
where
K: Debug,
V: Debug,
{
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let leading = match self.entry {
Entry::OutOfOrder(entry) => self.map.out_of_order[entry.leading_index()].as_slice(),
Entry::InOrder(entry) => &self.map.parts[entry.leading_range()],
};
let dangling = match self.entry {
Entry::OutOfOrder(entry) => self.map.out_of_order[entry.dangling_index()].as_slice(),
Entry::InOrder(entry) => &self.map.parts[entry.dangling_range()],
};
let trailing = match self.entry {
Entry::OutOfOrder(entry) => self.map.out_of_order[entry.trailing_index()].as_slice(),
Entry::InOrder(entry) => &self.map.parts[entry.trailing_range()],
};
let mut list = f.debug_list();
list.entries(leading.iter().map(DebugValue::Leading));
list.entries(dangling.iter().map(DebugValue::Dangling));
list.entries(trailing.iter().map(DebugValue::Trailing));
list.finish()
}
}
enum DebugValue<'a, V> {
Leading(&'a V),
Dangling(&'a V),
Trailing(&'a V),
}
impl<V> Debug for DebugValue<'_, V>
where
V: Debug,
{
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
DebugValue::Leading(leading) => f.debug_tuple("Leading").field(leading).finish(),
DebugValue::Dangling(dangling) => f.debug_tuple("Dangling").field(dangling).finish(),
DebugValue::Trailing(trailing) => f.debug_tuple("Trailing").field(trailing).finish(),
}
}
}
#[derive(Debug)]
struct InOrderEntry {
/// Index into the [CommentsMap::parts] vector where the leading parts of this entry start
leading_start: PartIndex,
/// Index into the [CommentsMap::parts] vector where the dangling parts (and, thus, the leading parts end) start.
dangling_start: PartIndex,
/// Index into the [CommentsMap::parts] vector where the trailing parts (and, thus, the dangling parts end) of this entry start
trailing_start: Option<PartIndex>,
/// Index into the [CommentsMap::parts] vector where the trailing parts of this entry end
trailing_end: Option<PartIndex>,
_count: Count<InOrderEntry>,
}
impl InOrderEntry {
fn leading(range: Range<usize>) -> Self {
InOrderEntry {
leading_start: PartIndex::from_len(range.start),
dangling_start: PartIndex::from_len(range.end),
trailing_start: None,
trailing_end: None,
_count: Count::new(),
}
}
fn dangling(range: Range<usize>) -> Self {
let start = PartIndex::from_len(range.start);
InOrderEntry {
leading_start: start,
dangling_start: start,
trailing_start: Some(PartIndex::from_len(range.end)),
trailing_end: None,
_count: Count::new(),
}
}
fn trailing(range: Range<usize>) -> Self {
let start = PartIndex::from_len(range.start);
InOrderEntry {
leading_start: start,
dangling_start: start,
trailing_start: Some(start),
trailing_end: Some(PartIndex::from_len(range.end)),
_count: Count::new(),
}
}
fn increment_leading_range(&mut self) {
assert!(
self.trailing_start.is_none(),
"Can't extend the leading range for an in order entry with dangling comments."
);
self.dangling_start.increment();
}
fn increment_dangling_range(&mut self) {
assert!(
self.trailing_end.is_none(),
"Can't extend the dangling range for an in order entry with trailing comments."
);
match &mut self.trailing_start {
Some(start) => start.increment(),
None => self.trailing_start = Some(self.dangling_start.incremented()),
}
}
fn increment_trailing_range(&mut self) {
match (self.trailing_start, &mut self.trailing_end) {
// Already has some trailing comments
(Some(_), Some(end)) => end.increment(),
// Has dangling comments only
(Some(start), None) => self.trailing_end = Some(start.incremented()),
// Has leading comments only
(None, None) => {
self.trailing_start = Some(self.dangling_start);
self.trailing_end = Some(self.dangling_start.incremented())
}
(None, Some(_)) => {
unreachable!()
}
}
}
fn leading_range(&self) -> Range<usize> {
self.leading_start.value()..self.dangling_start.value()
}
fn dangling_range(&self) -> Range<usize> {
match self.trailing_start {
None => self.dangling_start.value()..self.dangling_start.value(),
Some(trailing_start) => self.dangling_start.value()..trailing_start.value(),
}
}
fn trailing_range(&self) -> Range<usize> {
match (self.trailing_start, self.trailing_end) {
(Some(trailing_start), Some(trailing_end)) => {
trailing_start.value()..trailing_end.value()
}
// Only dangling comments
(Some(trailing_start), None) => trailing_start.value()..trailing_start.value(),
(None, Some(_)) => {
panic!("Trailing end shouldn't be set if trailing start is none");
}
(None, None) => self.dangling_start.value()..self.dangling_start.value(),
}
}
fn range(&self) -> Range<usize> {
self.leading_start.value()
..self
.trailing_end
.or(self.trailing_start)
.unwrap_or(self.dangling_start)
.value()
}
}
#[derive(Debug)]
struct OutOfOrderEntry {
/// Index into the [CommentsMap::out_of_order] vector at which offset the leaading vec is stored.
leading_index: usize,
_count: Count<OutOfOrderEntry>,
}
impl OutOfOrderEntry {
const fn leading_index(&self) -> usize {
self.leading_index
}
const fn dangling_index(&self) -> usize {
self.leading_index + 1
}
const fn trailing_index(&self) -> usize {
self.leading_index + 2
}
}
/// Index into the [CommentsMap::parts] vector.
///
/// Stores the index as a [NonZeroU32], starting at 1 instead of 0 so that
/// `size_of::<PartIndex>() == size_of::<Option<PartIndex>>()`.
///
/// This means, that only `u32 - 1` parts can be stored. This should be sufficient for storing comments
/// because: Comments have length of two or more bytes because they consist of a start and end character sequence (`#` + new line, `/*` and `*/`).
/// Thus, a document with length `u32` can have at most `u32::MAX / 2` comment-parts.
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
struct PartIndex(NonZeroU32);
impl PartIndex {
fn from_len(value: usize) -> Self {
Self(NonZeroU32::try_from(value as u32 + 1).unwrap())
}
fn value(&self) -> usize {
(u32::from(self.0) - 1) as usize
}
fn increment(&mut self) {
*self = self.incremented();
}
fn incremented(&self) -> PartIndex {
PartIndex(NonZeroU32::new(self.0.get() + 1).unwrap())
}
}
#[cfg(test)]
mod tests {
use crate::comments::map::CommentsMap;
static EMPTY: [i32; 0] = [];
#[test]
fn leading_dangling_trailing() {
let mut map = CommentsMap::new();
map.push_leading("a", 1);
map.push_dangling("a", 2);
map.push_dangling("a", 3);
map.push_trailing("a", 4);
assert_eq!(map.parts, vec![1, 2, 3, 4]);
assert_eq!(map.leading(&"a"), &[1]);
assert_eq!(map.dangling(&"a"), &[2, 3]);
assert_eq!(map.trailing(&"a"), &[4]);
assert!(map.has(&"a"));
assert_eq!(
map.parts(&"a").copied().collect::<Vec<_>>(),
vec![1, 2, 3, 4]
);
}
#[test]
fn dangling_trailing() {
let mut map = CommentsMap::new();
map.push_dangling("a", 1);
map.push_dangling("a", 2);
map.push_trailing("a", 3);
assert_eq!(map.parts, vec![1, 2, 3]);
assert_eq!(map.leading(&"a"), &EMPTY);
assert_eq!(map.dangling(&"a"), &[1, 2]);
assert_eq!(map.trailing(&"a"), &[3]);
assert!(map.has(&"a"));
assert_eq!(map.parts(&"a").copied().collect::<Vec<_>>(), vec![1, 2, 3]);
}
#[test]
fn trailing() {
let mut map = CommentsMap::new();
map.push_trailing("a", 1);
map.push_trailing("a", 2);
assert_eq!(map.parts, vec![1, 2]);
assert_eq!(map.leading(&"a"), &EMPTY);
assert_eq!(map.dangling(&"a"), &EMPTY);
assert_eq!(map.trailing(&"a"), &[1, 2]);
assert!(map.has(&"a"));
assert_eq!(map.parts(&"a").copied().collect::<Vec<_>>(), vec![1, 2]);
}
#[test]
fn empty() {
let map = CommentsMap::<&str, i32>::default();
assert_eq!(map.parts, Vec::<i32>::new());
assert_eq!(map.leading(&"a"), &EMPTY);
assert_eq!(map.dangling(&"a"), &EMPTY);
assert_eq!(map.trailing(&"a"), &EMPTY);
assert!(!map.has(&"a"));
assert_eq!(
map.parts(&"a").copied().collect::<Vec<_>>(),
Vec::<i32>::new()
);
}
#[test]
fn multiple_keys() {
let mut map = CommentsMap::new();
map.push_leading("a", 1);
map.push_dangling("b", 2);
map.push_trailing("c", 3);
map.push_leading("d", 4);
map.push_dangling("d", 5);
map.push_trailing("d", 6);
assert_eq!(map.parts, &[1, 2, 3, 4, 5, 6]);
assert_eq!(map.leading(&"a"), &[1]);
assert_eq!(map.dangling(&"a"), &EMPTY);
assert_eq!(map.trailing(&"a"), &EMPTY);
assert_eq!(map.parts(&"a").copied().collect::<Vec<_>>(), vec![1]);
assert_eq!(map.leading(&"b"), &EMPTY);
assert_eq!(map.dangling(&"b"), &[2]);
assert_eq!(map.trailing(&"b"), &EMPTY);
assert_eq!(map.parts(&"b").copied().collect::<Vec<_>>(), vec![2]);
assert_eq!(map.leading(&"c"), &EMPTY);
assert_eq!(map.dangling(&"c"), &EMPTY);
assert_eq!(map.trailing(&"c"), &[3]);
assert_eq!(map.parts(&"c").copied().collect::<Vec<_>>(), vec![3]);
assert_eq!(map.leading(&"d"), &[4]);
assert_eq!(map.dangling(&"d"), &[5]);
assert_eq!(map.trailing(&"d"), &[6]);
assert_eq!(map.parts(&"d").copied().collect::<Vec<_>>(), vec![4, 5, 6]);
}
#[test]
fn dangling_leading() {
let mut map = CommentsMap::new();
map.push_dangling("a", 1);
map.push_leading("a", 2);
map.push_dangling("a", 3);
map.push_trailing("a", 4);
assert_eq!(map.leading(&"a"), [2]);
assert_eq!(map.dangling(&"a"), [1, 3]);
assert_eq!(map.trailing(&"a"), [4]);
assert_eq!(
map.parts(&"a").copied().collect::<Vec<_>>(),
vec![2, 1, 3, 4]
);
assert!(map.has(&"a"));
}
#[test]
fn trailing_leading() {
let mut map = CommentsMap::new();
map.push_trailing("a", 1);
map.push_leading("a", 2);
map.push_dangling("a", 3);
map.push_trailing("a", 4);
assert_eq!(map.leading(&"a"), [2]);
assert_eq!(map.dangling(&"a"), [3]);
assert_eq!(map.trailing(&"a"), [1, 4]);
assert_eq!(
map.parts(&"a").copied().collect::<Vec<_>>(),
vec![2, 3, 1, 4]
);
assert!(map.has(&"a"));
}
#[test]
fn trailing_dangling() {
let mut map = CommentsMap::new();
map.push_trailing("a", 1);
map.push_dangling("a", 2);
map.push_trailing("a", 3);
assert_eq!(map.leading(&"a"), &EMPTY);
assert_eq!(map.dangling(&"a"), &[2]);
assert_eq!(map.trailing(&"a"), &[1, 3]);
assert_eq!(map.parts(&"a").copied().collect::<Vec<_>>(), vec![2, 1, 3]);
assert!(map.has(&"a"));
}
#[test]
fn keys_out_of_order() {
let mut map = CommentsMap::new();
map.push_leading("a", 1);
map.push_dangling("b", 2);
map.push_leading("a", 3);
map.push_trailing("c", 4);
map.push_dangling("b", 5);
map.push_leading("d", 6);
map.push_trailing("c", 7);
assert_eq!(map.leading(&"a"), &[1, 3]);
assert_eq!(map.dangling(&"b"), &[2, 5]);
assert_eq!(map.trailing(&"c"), &[4, 7]);
assert!(map.has(&"a"));
assert!(map.has(&"b"));
assert!(map.has(&"c"));
}
}

View File

@ -1,5 +1,5 @@
use crate::prelude::TagKind;
use ruff_rowan::{SyntaxError, TextRange};
use ruff_text_size::TextRange;
use std::error::Error;
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
@ -43,20 +43,6 @@ impl std::fmt::Display for FormatError {
impl Error for FormatError {}
impl From<SyntaxError> for FormatError {
fn from(error: SyntaxError) -> Self {
FormatError::from(&error)
}
}
impl From<&SyntaxError> for FormatError {
fn from(syntax_error: &SyntaxError) -> Self {
match syntax_error {
SyntaxError::MissingRequiredChild => FormatError::SyntaxError,
}
}
}
impl From<PrintError> for FormatError {
fn from(error: PrintError) -> Self {
FormatError::from(&error)

View File

@ -4,10 +4,10 @@ pub mod tag;
use crate::format_element::tag::{LabelId, Tag};
use std::borrow::Cow;
use crate::{TagKind, TextSize};
#[cfg(target_pointer_width = "64")]
use ruff_rowan::static_assert;
use ruff_rowan::{SyntaxTokenText, TextRange};
use crate::static_assert;
use crate::{TagKind, TextSize};
use ruff_text_size::TextRange;
use std::hash::{Hash, Hasher};
use std::ops::Deref;
use std::rc::Rc;
@ -41,15 +41,6 @@ pub enum FormatElement {
/// Token constructed by slicing a defined range from a static string.
StaticTextSlice { text: Rc<str>, range: TextRange },
/// A token for a text that is taken as is from the source code (input text and formatted representation are identical).
/// Implementing by taking a slice from a `SyntaxToken` to avoid allocating a new string.
SyntaxTokenTextSlice {
/// The start position of the token in the unformatted source code
source_position: TextSize,
/// The token text
slice: SyntaxTokenText,
},
/// Prevents that line suffixes move past this boundary. Forces the printer to print any pending
/// line suffixes, potentially by inserting a hard line break.
LineSuffixBoundary,
@ -81,10 +72,6 @@ impl std::fmt::Debug for FormatElement {
FormatElement::StaticTextSlice { text, .. } => {
fmt.debug_tuple("Text").field(text).finish()
}
FormatElement::SyntaxTokenTextSlice { slice, .. } => fmt
.debug_tuple("SyntaxTokenTextSlice")
.field(slice)
.finish(),
FormatElement::LineSuffixBoundary => write!(fmt, "LineSuffixBoundary"),
FormatElement::BestFitting(best_fitting) => {
fmt.debug_tuple("BestFitting").field(&best_fitting).finish()
@ -230,8 +217,7 @@ impl FormatElement {
pub const fn is_text(&self) -> bool {
matches!(
self,
FormatElement::SyntaxTokenTextSlice { .. }
| FormatElement::StaticTextSlice { .. }
FormatElement::StaticTextSlice { .. }
| FormatElement::DynamicText { .. }
| FormatElement::StaticText { .. }
)
@ -251,7 +237,6 @@ impl FormatElements for FormatElement {
FormatElement::StaticText { text } => text.contains('\n'),
FormatElement::DynamicText { text, .. } => text.contains('\n'),
FormatElement::StaticTextSlice { text, range } => text[*range].contains('\n'),
FormatElement::SyntaxTokenTextSlice { slice, .. } => slice.contains('\n'),
FormatElement::Interned(interned) => interned.will_break(),
// Traverse into the most flat version because the content is guaranteed to expand when even
// the most flat version contains some content that forces a break.
@ -384,7 +369,7 @@ mod tests {
}
#[cfg(target_pointer_width = "64")]
static_assert!(std::mem::size_of::<ruff_rowan::TextRange>() == 8usize);
static_assert!(std::mem::size_of::<ruff_text_size::TextRange>() == 8usize);
#[cfg(target_pointer_width = "64")]
static_assert!(std::mem::size_of::<crate::format_element::tag::VerbatimKind>() == 8usize);

View File

@ -6,9 +6,9 @@ use crate::printer::LineEnding;
use crate::{format, write};
use crate::{
BufferExtensions, Format, FormatContext, FormatElement, FormatOptions, FormatResult, Formatter,
IndentStyle, LineWidth, PrinterOptions, TransformSourceMap,
IndentStyle, LineWidth, PrinterOptions,
};
use ruff_rowan::TextSize;
use ruff_text_size::TextSize;
use rustc_hash::FxHashMap;
use std::collections::HashMap;
use std::ops::Deref;
@ -82,7 +82,6 @@ impl Document {
FormatElement::StaticText { text } => text.contains('\n'),
FormatElement::DynamicText { text, .. } => text.contains('\n'),
FormatElement::StaticTextSlice { text, range } => text[*range].contains('\n'),
FormatElement::SyntaxTokenTextSlice { slice, .. } => slice.contains('\n'),
FormatElement::ExpandParent
| FormatElement::Line(LineMode::Hard | LineMode::Empty) => true,
_ => false,
@ -143,10 +142,6 @@ impl FormatContext for IrFormatContext {
fn options(&self) -> &Self::Options {
&IrFormatOptions
}
fn source_map(&self) -> Option<&TransformSourceMap> {
None
}
}
#[derive(Debug, Clone, Default)]
@ -195,8 +190,7 @@ impl Format<IrFormatContext> for &[FormatElement] {
element @ FormatElement::Space
| element @ FormatElement::StaticText { .. }
| element @ FormatElement::DynamicText { .. }
| element @ FormatElement::StaticTextSlice { .. }
| element @ FormatElement::SyntaxTokenTextSlice { .. } => {
| element @ FormatElement::StaticTextSlice { .. } => {
if !in_text {
write!(f, [text("\"")])?;
}

View File

@ -1,3 +1,5 @@
#![allow(dead_code)]
use crate::prelude::*;
use std::cell::RefCell;
use std::marker::PhantomData;
@ -5,7 +7,7 @@ use std::ops::Deref;
use crate::Buffer;
/// Utility trait that allows memorizing the output of a [Format].
/// Utility trait that allows memorizing the output of a [`Format`].
/// Useful to avoid re-formatting the same object twice.
pub trait MemoizeFormat<Context> {
/// Returns a formattable object that memoizes the result of `Format` by cloning.
@ -16,7 +18,7 @@ pub trait MemoizeFormat<Context> {
/// use std::cell::Cell;
/// use ruff_formatter::{format, write};
/// use ruff_formatter::prelude::*;
/// use ruff_rowan::TextSize;
/// use ruff_text_size::TextSize;
///
/// struct MyFormat {
/// value: Cell<u64>
@ -66,7 +68,7 @@ pub trait MemoizeFormat<Context> {
impl<T, Context> MemoizeFormat<Context> for T where T: Format<Context> {}
/// Memoizes the output of its inner [Format] to avoid re-formatting a potential expensive object.
/// Memoizes the output of its inner [`Format`] to avoid re-formatting a potential expensive object.
#[derive(Debug)]
pub struct Memoized<F, Context> {
inner: F,
@ -98,7 +100,7 @@ where
/// use std::cell::Cell;
/// use ruff_formatter::{format, write};
/// use ruff_formatter::prelude::*;
/// use ruff_rowan::TextSize;
/// use ruff_text_size::TextSize;
///
/// #[derive(Default)]
/// struct Counter {

View File

@ -1,10 +1,7 @@
use crate::buffer::BufferSnapshot;
use crate::builders::{FillBuilder, JoinBuilder, JoinNodesBuilder, Line};
use crate::builders::{FillBuilder, JoinBuilder};
use crate::prelude::*;
use crate::{
Arguments, Buffer, Comments, CstFormatContext, FormatContext, FormatState, FormatStateSnapshot,
GroupId, VecBuffer,
};
use crate::{Arguments, Buffer, FormatContext, FormatState, GroupId, VecBuffer};
/// Handles the formatting of a CST and stores the context how the CST should be formatted (user preferences).
/// The formatter is passed to the [Format] implementation of every node in the CST so that they
@ -111,28 +108,6 @@ impl<'buf, Context> Formatter<'buf, Context> {
JoinBuilder::with_separator(self, joiner)
}
/// Specialized version of [crate::Formatter::join_with] for joining SyntaxNodes separated by a space, soft
/// line break or empty line depending on the input file.
///
/// This functions inspects the input source and separates consecutive elements with either
/// a [crate::builders::soft_line_break_or_space] or [crate::builders::empty_line] depending on how many line breaks were
/// separating the elements in the original file.
pub fn join_nodes_with_soft_line<'a>(
&'a mut self,
) -> JoinNodesBuilder<'a, 'buf, Line, Context> {
JoinNodesBuilder::new(soft_line_break_or_space(), self)
}
/// Specialized version of [crate::Formatter::join_with] for joining SyntaxNodes separated by one or more
/// line breaks depending on the input file.
///
/// This functions inspects the input source and separates consecutive elements with either
/// a [crate::builders::hard_line_break] or [crate::builders::empty_line] depending on how many line breaks were separating the
/// elements in the original file.
pub fn join_nodes_with_hardline<'a>(&'a mut self) -> JoinNodesBuilder<'a, 'buf, Line, Context> {
JoinNodesBuilder::new(hard_line_break(), self)
}
/// Concatenates a list of [crate::Format] objects with spaces and line breaks to fit
/// them on as few lines as possible. Each element introduces a conceptual group. The printer
/// first tries to print the item in flat mode but then prints it in expanded mode if it doesn't fit.
@ -217,28 +192,16 @@ where
pub fn state_snapshot(&self) -> FormatterSnapshot {
FormatterSnapshot {
buffer: self.buffer.snapshot(),
state: self.state().snapshot(),
}
}
#[inline]
/// Restore the state of the formatter to a previous snapshot
pub fn restore_state_snapshot(&mut self, snapshot: FormatterSnapshot) {
self.state_mut().restore_snapshot(snapshot.state);
self.buffer.restore_snapshot(snapshot.buffer);
}
}
impl<Context> Formatter<'_, Context>
where
Context: CstFormatContext,
{
/// Returns the comments from the context.
pub fn comments(&self) -> &Comments<Context::Language> {
self.context().comments()
}
}
impl<Context> Buffer for Formatter<'_, Context> {
type Context = Context;
@ -284,5 +247,4 @@ impl<Context> Buffer for Formatter<'_, Context> {
/// mode and compiled to nothing in release mode
pub struct FormatterSnapshot {
buffer: BufferSnapshot,
state: FormatStateSnapshot,
}

View File

@ -25,7 +25,6 @@
mod arguments;
mod buffer;
mod builders;
pub mod comments;
pub mod diagnostics;
pub mod format_element;
mod format_extensions;
@ -33,14 +32,8 @@ pub mod formatter;
pub mod group_id;
pub mod macros;
pub mod prelude;
#[cfg(debug_assertions)]
pub mod printed_tokens;
pub mod printer;
pub mod separated;
mod source_map;
pub mod token;
pub mod trivia;
mod verbatim;
mod utility_types;
use crate::formatter::Formatter;
use crate::group_id::UniqueGroupIdBuilder;
@ -48,8 +41,6 @@ use crate::prelude::TagKind;
use std::fmt::Debug;
use crate::format_element::document::Document;
#[cfg(debug_assertions)]
use crate::printed_tokens::PrintedTokens;
use crate::printer::{Printer, PrinterOptions};
pub use arguments::{Argument, Arguments};
pub use buffer::{
@ -58,18 +49,10 @@ pub use buffer::{
};
pub use builders::BestFitting;
use crate::builders::syntax_token_cow_slice;
use crate::comments::{CommentStyle, Comments, SourceComment};
pub use crate::diagnostics::{ActualStart, FormatError, InvalidDocumentError, PrintError};
use crate::trivia::{format_skipped_token_trivia, format_trimmed_token};
pub use format_element::{normalize_newlines, FormatElement, LINE_TERMINATORS};
pub use group_id::GroupId;
use ruff_rowan::{
Language, SyntaxElement, SyntaxNode, SyntaxResult, SyntaxToken, SyntaxTriviaPiece, TextLen,
TextRange, TextSize, TokenAtOffset,
};
pub use source_map::{TransformSourceMap, TransformSourceMapBuilder};
use std::marker::PhantomData;
use ruff_text_size::{TextRange, TextSize};
use std::num::ParseIntError;
use std::str::FromStr;
@ -220,13 +203,6 @@ pub trait FormatContext {
/// Returns the formatting options
fn options(&self) -> &Self::Options;
/// Returns [None] if the CST has not been pre-processed.
///
/// Returns [Some] if the CST has been pre-processed to simplify formatting.
/// The source map can be used to map positions of the formatted nodes back to their original
/// source locations or to resolve the source text.
fn source_map(&self) -> Option<&TransformSourceMap>;
}
/// Options customizing how the source code should be formatted.
@ -241,21 +217,6 @@ pub trait FormatOptions {
fn as_print_options(&self) -> PrinterOptions;
}
/// The [CstFormatContext] is an extension of the CST unaware [FormatContext] and must be implemented
/// by every language.
///
/// The context customizes the comments formatting and stores the comments of the CST.
pub trait CstFormatContext: FormatContext {
type Language: Language;
type Style: CommentStyle<Language = Self::Language>;
/// Rule for formatting comments.
type CommentRule: FormatRule<SourceComment<Self::Language>, Context = Self> + Default;
/// Returns a reference to the program's comments.
fn comments(&self) -> &Comments<Self::Language>;
}
#[derive(Debug, Default, Eq, PartialEq)]
pub struct SimpleFormatContext {
options: SimpleFormatOptions,
@ -273,10 +234,6 @@ impl FormatContext for SimpleFormatContext {
fn options(&self) -> &Self::Options {
&self.options
}
fn source_map(&self) -> Option<&TransformSourceMap> {
None
}
}
#[derive(Debug, Default, Eq, PartialEq)]
@ -347,14 +304,8 @@ where
{
pub fn print(&self) -> PrintResult<Printed> {
let print_options = self.context.options().as_print_options();
let printed = Printer::new(print_options).print(&self.document)?;
let printed = match self.context.source_map() {
Some(source_map) => source_map.map_printed(printed),
None => printed,
};
Ok(printed)
}
@ -362,11 +313,6 @@ where
let print_options = self.context.options().as_print_options();
let printed = Printer::new(print_options).print_with_indent(&self.document, indent)?;
let printed = match self.context.source_map() {
Some(source_map) => source_map.map_printed(printed),
None => printed,
};
Ok(printed)
}
}
@ -474,7 +420,7 @@ pub type FormatResult<F> = Result<F, FormatError>;
/// ```
/// use ruff_formatter::{format, write, IndentStyle, LineWidth};
/// use ruff_formatter::prelude::*;
/// use ruff_rowan::TextSize;
/// use ruff_text_size::TextSize;
///
/// struct Paragraph(String);
///
@ -533,18 +479,6 @@ where
}
}
impl<T, Context> Format<Context> for SyntaxResult<T>
where
T: Format<Context>,
{
fn fmt(&self, f: &mut Formatter<Context>) -> FormatResult<()> {
match self {
Ok(value) => value.fmt(f),
Err(err) => Err(err.into()),
}
}
}
impl<Context> Format<Context> for () {
#[inline]
fn fmt(&self, _: &mut Formatter<Context>) -> FormatResult<()> {
@ -569,43 +503,6 @@ pub trait FormatRule<T> {
fn fmt(&self, item: &T, f: &mut Formatter<Self::Context>) -> FormatResult<()>;
}
/// Default implementation for formatting a token
pub struct FormatToken<C> {
context: PhantomData<C>,
}
impl<C> Default for FormatToken<C> {
fn default() -> Self {
Self {
context: PhantomData,
}
}
}
impl<C> FormatRule<SyntaxToken<C::Language>> for FormatToken<C>
where
C: CstFormatContext,
C::Language: 'static,
{
type Context = C;
fn fmt(
&self,
token: &SyntaxToken<C::Language>,
f: &mut Formatter<Self::Context>,
) -> FormatResult<()> {
f.state_mut().track_token(token);
crate::write!(
f,
[
format_skipped_token_trivia(token),
format_trimmed_token(token),
]
)
}
}
/// Rule that supports customizing how it formats an object of type `T`.
pub trait FormatRuleWithOptions<T>: FormatRule<T> {
type Options;
@ -855,448 +752,6 @@ where
Ok(Formatted::new(document, state.into_context()))
}
/// Entry point for formatting a [SyntaxNode] for a specific language.
pub trait FormatLanguage {
type SyntaxLanguage: Language;
/// The type of the formatting context
type Context: CstFormatContext<Language = Self::SyntaxLanguage>;
/// The rule type that can format a [SyntaxNode] of this language
type FormatRule: FormatRule<SyntaxNode<Self::SyntaxLanguage>, Context = Self::Context> + Default;
/// Performs an optional pre-processing of the tree. This can be useful to remove nodes
/// that otherwise complicate formatting.
///
/// Return [None] if the tree shouldn't be processed. Return [Some] with the transformed
/// tree and the source map otherwise.
fn transform(
&self,
_root: &SyntaxNode<Self::SyntaxLanguage>,
) -> Option<(SyntaxNode<Self::SyntaxLanguage>, TransformSourceMap)> {
None
}
/// This is used to select appropriate "root nodes" for the
/// range formatting process: for instance in JavaScript the function returns
/// true for statement and declaration nodes, to ensure the entire statement
/// gets formatted instead of the smallest sub-expression that fits the range
fn is_range_formatting_node(&self, _node: &SyntaxNode<Self::SyntaxLanguage>) -> bool {
true
}
/// Returns the formatting options
fn options(&self) -> &<Self::Context as FormatContext>::Options;
/// Creates the [FormatContext] with the given `source map` and `comments`
fn create_context(
self,
root: &SyntaxNode<Self::SyntaxLanguage>,
source_map: Option<TransformSourceMap>,
) -> Self::Context;
}
/// Formats a syntax node file based on its features.
///
/// It returns a [Formatted] result, which the user can use to override a file.
pub fn format_node<L: FormatLanguage>(
root: &SyntaxNode<L::SyntaxLanguage>,
language: L,
) -> FormatResult<Formatted<L::Context>> {
tracing::trace_span!("format_node").in_scope(move || {
let (root, source_map) = match language.transform(root) {
Some((root, source_map)) => (root, Some(source_map)),
None => (root.clone(), None),
};
let context = language.create_context(&root, source_map);
let format_node = FormatRefWithRule::new(&root, L::FormatRule::default());
let mut state = FormatState::new(context);
let mut buffer = VecBuffer::new(&mut state);
write!(buffer, [format_node])?;
let mut document = Document::from(buffer.into_vec());
document.propagate_expand();
state.assert_formatted_all_tokens(&root);
let context = state.into_context();
let comments = context.comments();
comments.assert_checked_all_suppressions(&root);
comments.assert_formatted_all_comments();
Ok(Formatted::new(document, context))
})
}
/// Returns the [TextRange] for this [SyntaxElement] with the leading and
/// trailing whitespace trimmed (but keeping comments or skipped trivias)
fn text_non_whitespace_range<E, L>(elem: &E) -> TextRange
where
E: Into<SyntaxElement<L>> + Clone,
L: Language,
{
let elem: SyntaxElement<L> = elem.clone().into();
let start = elem
.leading_trivia()
.into_iter()
.flat_map(|trivia| trivia.pieces())
.find_map(|piece| {
if piece.is_whitespace() || piece.is_newline() {
None
} else {
Some(piece.text_range().start())
}
})
.unwrap_or_else(|| elem.text_trimmed_range().start());
let end = elem
.trailing_trivia()
.into_iter()
.flat_map(|trivia| trivia.pieces().rev())
.find_map(|piece| {
if piece.is_whitespace() || piece.is_newline() {
None
} else {
Some(piece.text_range().end())
}
})
.unwrap_or_else(|| elem.text_trimmed_range().end());
TextRange::new(start, end)
}
/// Formats a range within a file, supported by Rome
///
/// This runs a simple heuristic to determine the initial indentation
/// level of the node based on the provided [FormatContext], which
/// must match currently the current initial of the file. Additionally,
/// because the reformatting happens only locally the resulting code
/// will be indented with the same level as the original selection,
/// even if it's a mismatch from the rest of the block the selection is in
///
/// It returns a [Formatted] result with a range corresponding to the
/// range of the input that was effectively overwritten by the formatter
pub fn format_range<Language: FormatLanguage>(
root: &SyntaxNode<Language::SyntaxLanguage>,
mut range: TextRange,
language: Language,
) -> FormatResult<Printed> {
if range.is_empty() {
return Ok(Printed::new(
String::new(),
Some(range),
Vec::new(),
Vec::new(),
));
}
let root_range = root.text_range();
if range.start() < root_range.start() || range.end() > root_range.end() {
return Err(FormatError::RangeError {
input: range,
tree: root_range,
});
}
// Find the tokens corresponding to the start and end of the range
let start_token = root.token_at_offset(range.start());
let end_token = root.token_at_offset(range.end());
// If these tokens were not found this means either:
// 1. The input [SyntaxNode] was empty
// 2. The input node was not the root [SyntaxNode] of the file
// In the first case we can return an empty result immediately,
// otherwise default to the first and last tokens in the root node
let mut start_token = match start_token {
// If the start of the range lies between two tokens,
// start at the rightmost one
TokenAtOffset::Between(_, token) => token,
TokenAtOffset::Single(token) => token,
TokenAtOffset::None => match root.first_token() {
Some(token) => token,
// root node is empty
None => return Ok(Printed::new_empty()),
},
};
let mut end_token = match end_token {
// If the end of the range lies between two tokens,
// end at the leftmost one
TokenAtOffset::Between(token, _) => token,
TokenAtOffset::Single(token) => token,
TokenAtOffset::None => match root.last_token() {
Some(token) => token,
// root node is empty
None => return Ok(Printed::new_empty()),
},
};
// Trim leading and trailing whitespace off from the formatting range
let mut trimmed_start = range.start();
let start_token_range = text_non_whitespace_range(&start_token);
let start_token_trimmed_start = start_token_range.start();
let start_token_trimmed_end = start_token_range.end();
if start_token_trimmed_start >= range.start() && start_token_trimmed_start <= range.end() {
// If the range starts before the trimmed start of the token, move the
// start towards that position
trimmed_start = start_token_trimmed_start;
} else if start_token_trimmed_end <= range.start() {
// If the range starts after the trimmed end of the token, move the
// start to the trimmed start of the next token if it exists
if let Some(next_token) = start_token.next_token() {
let next_token_start = text_non_whitespace_range(&next_token).start();
if next_token_start <= range.end() {
trimmed_start = next_token_start;
start_token = next_token;
}
}
}
let end_token_range = text_non_whitespace_range(&end_token);
let end_token_trimmed_start = end_token_range.start();
// If the range ends before the trimmed start of the token, move the
// end to the trimmed end of the previous token if it exists
if end_token_trimmed_start >= range.end() {
if let Some(next_token) = end_token.prev_token() {
let next_token_end = text_non_whitespace_range(&next_token).end();
if next_token_end >= trimmed_start {
end_token = next_token;
}
}
}
// Find suitable formatting-root nodes (matching the predicate provided by
// the language implementation) in the ancestors of the start and end tokens
let start_node = start_token
.ancestors()
.find(|node| language.is_range_formatting_node(node))
.unwrap_or_else(|| root.clone());
let end_node = end_token
.ancestors()
.find(|node| language.is_range_formatting_node(node))
.unwrap_or_else(|| root.clone());
let common_root = if start_node == end_node {
range = text_non_whitespace_range(&start_node);
Some(start_node)
} else {
// Find the two highest sibling nodes that satisfy the formatting range
// from the ancestors of the start and end nodes (this is roughly the
// same algorithm as the findSiblingAncestors function in Prettier, see
// https://github.com/prettier/prettier/blob/cae195187f524dd74e60849e0a4392654423415b/src/main/range-util.js#L36)
let start_node_start = start_node.text_range().start();
let end_node_end = end_node.text_range().end();
let result_end_node = end_node
.ancestors()
.take_while(|end_parent| end_parent.text_range().start() >= start_node_start)
.last()
.unwrap_or(end_node);
let result_start_node = start_node
.ancestors()
.take_while(|start_parent| start_parent.text_range().end() <= end_node_end)
.last()
.unwrap_or(start_node);
range = text_non_whitespace_range(&result_start_node)
.cover(text_non_whitespace_range(&result_end_node));
// Find the lowest common ancestor node for the previously selected
// sibling nodes by building the path to the root node from both
// nodes and iterating along the two paths at once to find the first
// divergence (the ancestors have to be collected into vectors first
// since the ancestor iterator isn't double ended)
#[allow(clippy::needless_collect)]
let start_to_root: Vec<_> = result_start_node.ancestors().collect();
#[allow(clippy::needless_collect)]
let end_to_root: Vec<_> = result_end_node.ancestors().collect();
start_to_root
.into_iter()
.rev()
.zip(end_to_root.into_iter().rev())
.map_while(|(lhs, rhs)| if lhs == rhs { Some(lhs) } else { None })
.last()
};
// Logically this should always return at least the root node,
// fallback to said node just in case
let common_root = common_root.as_ref().unwrap_or(root);
// Perform the actual formatting of the root node with
// an appropriate indentation level
let mut printed = format_sub_tree(common_root, language)?;
// This finds the closest marker to the beginning of the source
// starting before or at said starting point, and the closest
// marker to the end of the source range starting after or at
// said ending point respectively
let mut range_start = None;
let mut range_end = None;
let sourcemap = printed.sourcemap();
for marker in sourcemap {
// marker.source <= range.start()
if let Some(start_dist) = range.start().checked_sub(marker.source) {
range_start = match range_start {
Some((prev_marker, prev_dist)) => {
if start_dist < prev_dist {
Some((marker, start_dist))
} else {
Some((prev_marker, prev_dist))
}
}
None => Some((marker, start_dist)),
}
}
// marker.source >= range.end()
if let Some(end_dist) = marker.source.checked_sub(range.end()) {
range_end = match range_end {
Some((prev_marker, prev_dist)) => {
if end_dist <= prev_dist {
Some((marker, end_dist))
} else {
Some((prev_marker, prev_dist))
}
}
None => Some((marker, end_dist)),
}
}
}
// If no start or end were found, this means that the edge of the formatting
// range was near the edge of the input, and no marker were emitted before
// the start (or after the end) of the formatting range: in this case
// the start/end marker default to the start/end of the input
let (start_source, start_dest) = match range_start {
Some((start_marker, _)) => (start_marker.source, start_marker.dest),
None => (common_root.text_range().start(), TextSize::from(0)),
};
let (end_source, end_dest) = match range_end {
Some((end_marker, _)) => (end_marker.source, end_marker.dest),
None => (
common_root.text_range().end(),
TextSize::try_from(printed.as_code().len()).expect("code length out of bounds"),
),
};
let input_range = TextRange::new(start_source, end_source);
let output_range = TextRange::new(start_dest, end_dest);
let sourcemap = printed.take_sourcemap();
let verbatim_ranges = printed.take_verbatim_ranges();
let code = &printed.into_code()[output_range];
Ok(Printed::new(
code.into(),
Some(input_range),
sourcemap,
verbatim_ranges,
))
}
/// Formats a single node within a file, supported by Rome.
///
/// This runs a simple heuristic to determine the initial indentation
/// level of the node based on the provided [FormatContext], which
/// must match currently the current initial of the file. Additionally,
/// because the reformatting happens only locally the resulting code
/// will be indented with the same level as the original selection,
/// even if it's a mismatch from the rest of the block the selection is in
///
/// It returns a [Formatted] result
pub fn format_sub_tree<L: FormatLanguage>(
root: &SyntaxNode<L::SyntaxLanguage>,
language: L,
) -> FormatResult<Printed> {
// Determine the initial indentation level for the printer by inspecting the trivia pieces
// of each token from the first token of the common root towards the start of the file
let mut tokens = std::iter::successors(root.first_token(), |token| token.prev_token());
// From the iterator of tokens, build an iterator of trivia pieces (once again the iterator is
// reversed, starting from the last trailing trivia towards the first leading trivia).
// The first token is handled specially as we only wan to consider its leading trivia pieces
let first_token = tokens.next();
let first_token_trivias = first_token
.into_iter()
.flat_map(|token| token.leading_trivia().pieces().rev());
let next_tokens_trivias = tokens.flat_map(|token| {
token
.trailing_trivia()
.pieces()
.rev()
.chain(token.leading_trivia().pieces().rev())
});
let trivias = first_token_trivias
.chain(next_tokens_trivias)
.filter(|piece| {
// We're only interested in newline and whitespace trivias, skip over comments
let is_newline = piece.is_newline();
let is_whitespace = piece.is_whitespace();
is_newline || is_whitespace
});
// Finally run the iterator until a newline trivia is found, and get the last whitespace trivia before it
let last_whitespace = trivias.map_while(|piece| piece.as_whitespace()).last();
let initial_indent = match last_whitespace {
Some(trivia) => {
// This logic is based on the formatting options passed in
// the be user (or the editor) as we do not have any kind
// of indentation type detection yet. Unfortunately this
// may not actually match the current content of the file
let length = trivia.text().len() as u16;
match language.options().indent_style() {
IndentStyle::Tab => length,
IndentStyle::Space(width) => length / u16::from(width),
}
}
// No whitespace was found between the start of the range
// and the start of the file
None => 0,
};
let formatted = format_node(root, language)?;
let mut printed = formatted.print_with_indent(initial_indent)?;
let sourcemap = printed.take_sourcemap();
let verbatim_ranges = printed.take_verbatim_ranges();
Ok(Printed::new(
printed.into_code(),
Some(root.text_range()),
sourcemap,
verbatim_ranges,
))
}
impl<L: Language, Context> Format<Context> for SyntaxTriviaPiece<L> {
fn fmt(&self, f: &mut Formatter<Context>) -> FormatResult<()> {
let range = self.text_range();
// Trim start/end and update the range
let trimmed = self.text().trim_start();
let trimmed_start = range.start() + (range.len() - trimmed.text_len());
let trimmed = trimmed.trim_end();
write!(
f,
[syntax_token_cow_slice(
normalize_newlines(trimmed, LINE_TERMINATORS),
&self.token(),
trimmed_start
)]
)
}
}
/// This structure stores the state that is relevant for the formatting of the whole document.
///
/// This structure is different from [crate::Formatter] in that the formatting infrastructure
@ -1306,11 +761,6 @@ pub struct FormatState<Context> {
context: Context,
group_id_builder: UniqueGroupIdBuilder,
// This is using a RefCell as it only exists in debug mode,
// the Formatter is still completely immutable in release builds
#[cfg(debug_assertions)]
pub printed_tokens: PrintedTokens,
}
impl<Context> std::fmt::Debug for FormatState<Context>
@ -1330,9 +780,6 @@ impl<Context> FormatState<Context> {
Self {
context,
group_id_builder: Default::default(),
#[cfg(debug_assertions)]
printed_tokens: Default::default(),
}
}
@ -1356,81 +803,4 @@ impl<Context> FormatState<Context> {
pub fn group_id(&self, debug_name: &'static str) -> GroupId {
self.group_id_builder.group_id(debug_name)
}
/// Tracks the given token as formatted
#[inline]
pub fn track_token<L: Language>(&mut self, #[allow(unused_variables)] token: &SyntaxToken<L>) {
cfg_if::cfg_if! {
if #[cfg(debug_assertions)] {
self.printed_tokens.track_token(token);
}
}
}
#[cfg(not(debug_assertions))]
#[inline]
pub fn set_token_tracking_disabled(&mut self, _: bool) {}
/// Disables or enables token tracking for a portion of the code.
///
/// It can be useful to disable the token tracking when it is necessary to re-format a node with different parameters.
#[cfg(debug_assertions)]
pub fn set_token_tracking_disabled(&mut self, enabled: bool) {
self.printed_tokens.set_disabled(enabled)
}
#[cfg(not(debug_assertions))]
#[inline]
pub fn is_token_tracking_disabled(&self) -> bool {
false
}
/// Returns `true` if token tracking is currently disabled.
#[cfg(debug_assertions)]
pub fn is_token_tracking_disabled(&self) -> bool {
self.printed_tokens.is_disabled()
}
/// Asserts in debug builds that all tokens have been printed.
#[inline]
pub fn assert_formatted_all_tokens<L: Language>(
&self,
#[allow(unused_variables)] root: &SyntaxNode<L>,
) {
cfg_if::cfg_if! {
if #[cfg(debug_assertions)] {
self.printed_tokens.assert_all_tracked(root);
}
}
}
}
impl<Context> FormatState<Context>
where
Context: FormatContext,
{
pub fn snapshot(&self) -> FormatStateSnapshot {
FormatStateSnapshot {
#[cfg(debug_assertions)]
printed_tokens: self.printed_tokens.snapshot(),
}
}
pub fn restore_snapshot(&mut self, snapshot: FormatStateSnapshot) {
let FormatStateSnapshot {
#[cfg(debug_assertions)]
printed_tokens,
} = snapshot;
cfg_if::cfg_if! {
if #[cfg(debug_assertions)] {
self.printed_tokens.restore(printed_tokens);
}
}
}
}
pub struct FormatStateSnapshot {
#[cfg(debug_assertions)]
printed_tokens: printed_tokens::PrintedTokensSnapshot,
}

View File

@ -1,19 +1,10 @@
pub use crate::builders::*;
pub use crate::format_element::document::Document;
pub use crate::format_element::tag::{LabelId, Tag, TagKind};
pub use crate::format_element::*;
pub use crate::format_extensions::{MemoizeFormat, Memoized};
pub use crate::formatter::Formatter;
pub use crate::printer::PrinterOptions;
pub use crate::trivia::{
format_dangling_comments, format_leading_comments, format_only_if_breaks, format_removed,
format_replaced, format_trailing_comments, format_trimmed_token,
};
pub use crate::diagnostics::FormatError;
pub use crate::format_element::document::Document;
pub use crate::format_element::tag::{LabelId, Tag, TagKind};
pub use crate::verbatim::{
format_bogus_node, format_or_verbatim, format_suppressed_node, format_verbatim_node,
};
pub use crate::{
best_fitting, dbg_write, format, format_args, write, Buffer as _, BufferExtensions, Format,

View File

@ -1,81 +0,0 @@
use indexmap::IndexSet;
use ruff_rowan::{Direction, Language, SyntaxNode, SyntaxToken, TextSize};
/// Tracks the ranges of the formatted (including replaced or tokens formatted as verbatim) tokens.
///
/// This implementation uses the fact that no two tokens can have an overlapping range to avoid the need for an interval tree.
/// Thus, testing if a token has already been formatted only requires testing if a token starting at the same offset has been formatted.
#[derive(Debug, Clone, Default)]
pub struct PrintedTokens {
/// Key: Start of a token's range
offsets: IndexSet<TextSize>,
disabled: bool,
}
#[derive(Copy, Clone)]
pub struct PrintedTokensSnapshot {
len: usize,
disabled: bool,
}
impl PrintedTokens {
/// Tracks a formatted token
///
/// ## Panics
/// If this token has been formatted before.
pub fn track_token<L: Language>(&mut self, token: &SyntaxToken<L>) {
if self.disabled {
return;
}
let range = token.text_trimmed_range();
if !self.offsets.insert(range.start()) {
panic!("You tried to print the token '{token:?}' twice, and this is not valid.");
}
}
/// Enables or disables the assertion tracking
pub(crate) fn set_disabled(&mut self, disabled: bool) {
self.disabled = disabled;
}
pub(crate) fn is_disabled(&self) -> bool {
self.disabled
}
pub(crate) fn snapshot(&self) -> PrintedTokensSnapshot {
PrintedTokensSnapshot {
len: self.offsets.len(),
disabled: self.disabled,
}
}
pub(crate) fn restore(&mut self, snapshot: PrintedTokensSnapshot) {
let PrintedTokensSnapshot { len, disabled } = snapshot;
self.offsets.truncate(len);
self.disabled = disabled
}
/// Asserts that all tokens of the passed in node have been tracked
///
/// ## Panics
/// If any descendant token of `root` hasn't been tracked
pub fn assert_all_tracked<L: Language>(&self, root: &SyntaxNode<L>) {
let mut offsets = self.offsets.clone();
for token in root.descendants_tokens(Direction::Next) {
if !offsets.remove(&token.text_trimmed_range().start()) {
panic!("token has not been seen by the formatter: {token:#?}.\
\nUse `format_replaced` if you want to replace a token from the formatted output.\
\nUse `format_removed` if you want to remove a token from the formatted output.\n\
parent: {:#?}", token.parent())
}
}
for offset in offsets {
panic!("tracked offset {offset:?} doesn't match any token of {root:#?}. Have you passed a token from another tree?");
}
}
}

View File

@ -24,7 +24,7 @@ use crate::printer::queue::{
AllPredicate, FitsEndPredicate, FitsQueue, PrintQueue, Queue, SingleEntryPredicate,
};
use drop_bomb::DebugDropBomb;
use ruff_rowan::{TextLen, TextSize};
use ruff_text_size::{TextLen, TextSize};
use std::num::NonZeroU8;
use unicode_width::UnicodeWidthChar;
@ -100,11 +100,6 @@ impl<'a> Printer<'a> {
source_position,
} => self.print_text(text, Some(*source_position)),
FormatElement::StaticTextSlice { text, range } => self.print_text(&text[*range], None),
FormatElement::SyntaxTokenTextSlice {
slice,
source_position,
} => self.print_text(slice, Some(*source_position)),
FormatElement::Line(line_mode) => {
if args.mode().is_flat()
&& matches!(line_mode, LineMode::Soft | LineMode::SoftOrSpace)
@ -1001,8 +996,6 @@ impl<'a, 'print> FitsMeasurer<'a, 'print> {
FormatElement::StaticTextSlice { text, range } => {
return Ok(self.fits_text(&text[*range]))
}
FormatElement::SyntaxTokenTextSlice { slice, .. } => return Ok(self.fits_text(slice)),
FormatElement::LineSuffixBoundary => {
if self.state.has_line_suffix {
return Ok(Fits::No);

View File

@ -1,229 +0,0 @@
use crate::prelude::*;
use crate::{write, CstFormatContext, GroupId};
use ruff_rowan::{AstNode, AstSeparatedElement, SyntaxResult, SyntaxToken};
pub trait FormatSeparatedElementRule<N>
where
N: AstNode,
{
type Context;
type FormatNode<'a>: Format<Self::Context>
where
N: 'a;
type FormatSeparator<'a>: Format<Self::Context>
where
N: 'a;
fn format_node<'a>(&self, node: &'a N) -> Self::FormatNode<'a>;
fn format_separator<'a>(
&self,
separator: &'a SyntaxToken<N::Language>,
) -> Self::FormatSeparator<'a>;
}
/// Formats a single element inside a separated list.
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct FormatSeparatedElement<N, R>
where
N: AstNode,
R: FormatSeparatedElementRule<N>,
{
element: AstSeparatedElement<N::Language, N>,
rule: R,
is_last: bool,
/// The separator to write if the element has no separator yet.
separator: &'static str,
options: FormatSeparatedOptions,
}
impl<N, R> FormatSeparatedElement<N, R>
where
N: AstNode,
R: FormatSeparatedElementRule<N>,
{
/// Returns the node belonging to the element.
pub fn node(&self) -> SyntaxResult<&N> {
self.element.node()
}
}
impl<N, R, C> Format<C> for FormatSeparatedElement<N, R>
where
N: AstNode,
N::Language: 'static,
R: FormatSeparatedElementRule<N, Context = C>,
C: CstFormatContext<Language = N::Language>,
{
fn fmt(&self, f: &mut Formatter<C>) -> FormatResult<()> {
let node = self.element.node()?;
let separator = self.element.trailing_separator()?;
let format_node = self.rule.format_node(node);
if !self.options.nodes_grouped {
format_node.fmt(f)?;
} else {
group(&format_node).fmt(f)?;
}
// Reuse the existing trailing separator or create it if it wasn't in the
// input source. Only print the last trailing token if the outer group breaks
if let Some(separator) = separator {
let format_separator = self.rule.format_separator(separator);
if self.is_last {
match self.options.trailing_separator {
TrailingSeparator::Allowed => {
// Use format_replaced instead of wrapping the result of format_token
// in order to remove only the token itself when the group doesn't break
// but still print its associated trivia unconditionally
format_only_if_breaks(separator, &format_separator)
.with_group_id(self.options.group_id)
.fmt(f)?;
}
TrailingSeparator::Mandatory => {
write!(f, [format_separator])?;
}
TrailingSeparator::Disallowed => {
// A trailing separator was present where it wasn't allowed, opt out of formatting
return Err(FormatError::SyntaxError);
}
TrailingSeparator::Omit => {
write!(f, [format_removed(separator)])?;
}
}
} else {
write!(f, [format_separator])?;
}
} else if self.is_last {
match self.options.trailing_separator {
TrailingSeparator::Allowed => {
write!(
f,
[if_group_breaks(&text(self.separator))
.with_group_id(self.options.group_id)]
)?;
}
TrailingSeparator::Mandatory => {
text(self.separator).fmt(f)?;
}
TrailingSeparator::Omit | TrailingSeparator::Disallowed => { /* no op */ }
}
} else {
unreachable!(
"This is a syntax error, separator must be present between every two elements"
);
};
Ok(())
}
}
/// Iterator for formatting separated elements. Prints the separator between each element and
/// inserts a trailing separator if necessary
pub struct FormatSeparatedIter<I, Node, Rule>
where
Node: AstNode,
{
next: Option<AstSeparatedElement<Node::Language, Node>>,
rule: Rule,
inner: I,
separator: &'static str,
options: FormatSeparatedOptions,
}
impl<I, Node, Rule> FormatSeparatedIter<I, Node, Rule>
where
Node: AstNode,
{
pub fn new(inner: I, separator: &'static str, rule: Rule) -> Self {
Self {
inner,
rule,
separator,
next: None,
options: FormatSeparatedOptions::default(),
}
}
/// Wraps every node inside of a group
pub fn nodes_grouped(mut self) -> Self {
self.options.nodes_grouped = true;
self
}
pub fn with_trailing_separator(mut self, separator: TrailingSeparator) -> Self {
self.options.trailing_separator = separator;
self
}
#[allow(unused)]
pub fn with_group_id(mut self, group_id: Option<GroupId>) -> Self {
self.options.group_id = group_id;
self
}
}
impl<I, Node, Rule> Iterator for FormatSeparatedIter<I, Node, Rule>
where
Node: AstNode,
I: Iterator<Item = AstSeparatedElement<Node::Language, Node>>,
Rule: FormatSeparatedElementRule<Node> + Clone,
{
type Item = FormatSeparatedElement<Node, Rule>;
fn next(&mut self) -> Option<Self::Item> {
let element = self.next.take().or_else(|| self.inner.next())?;
self.next = self.inner.next();
let is_last = self.next.is_none();
Some(FormatSeparatedElement {
element,
rule: self.rule.clone(),
is_last,
separator: self.separator,
options: self.options,
})
}
}
impl<I, Node, Rule> std::iter::FusedIterator for FormatSeparatedIter<I, Node, Rule>
where
Node: AstNode,
I: Iterator<Item = AstSeparatedElement<Node::Language, Node>> + std::iter::FusedIterator,
Rule: FormatSeparatedElementRule<Node> + Clone,
{
}
impl<I, Node, Rule> std::iter::ExactSizeIterator for FormatSeparatedIter<I, Node, Rule>
where
Node: AstNode,
I: Iterator<Item = AstSeparatedElement<Node::Language, Node>> + ExactSizeIterator,
Rule: FormatSeparatedElementRule<Node> + Clone,
{
}
#[derive(Debug, Copy, Clone, Eq, PartialEq, Default)]
pub enum TrailingSeparator {
/// A trailing separator is allowed and preferred
#[default]
Allowed,
/// A trailing separator is not allowed
Disallowed,
/// A trailing separator is mandatory for the syntax to be correct
Mandatory,
/// A trailing separator might be present, but the consumer
/// decides to remove it
Omit,
}
#[derive(Debug, Default, Copy, Clone, Eq, PartialEq)]
pub struct FormatSeparatedOptions {
trailing_separator: TrailingSeparator,
group_id: Option<GroupId>,
nodes_grouped: bool,
}

View File

@ -1,770 +0,0 @@
use crate::{Printed, SourceMarker, TextRange};
use ruff_rowan::TextLen;
use ruff_rowan::{Language, SyntaxNode, TextSize};
use rustc_hash::FxHashMap;
use std::cmp::Ordering;
use std::iter::FusedIterator;
/// A source map for mapping positions of a pre-processed tree back to the locations in the source tree.
///
/// This is not a generic purpose source map but instead focused on supporting the case where
/// a language removes or re-orders nodes that would otherwise complicate the formatting logic.
/// A common use case for pre-processing is the removal of all parenthesized nodes.
/// Removing parenthesized nodes simplifies the formatting logic when it has different behaviour
/// depending if a child or parent is of a specific node kind. Performing such a test with parenthesized
/// nodes present in the source code means that the formatting logic has to skip over all parenthesized nodes
/// until it finds the first non-parenthesized node and then test if that node is of the expected kind.
///
/// This source map implementation supports removing tokens or re-structuring nodes
/// without changing the order of the tokens in the tree (requires no source map).
///
/// The following section uses parentheses as a concrete example to explain the functionality of the source map.
/// However, the source map implementation isn't restricted to removing parentheses only, it supports mapping
/// transformed to source position for any use case where a transform deletes text from the source tree.
///
/// ## Position Mapping
///
/// The source map internally tracks all the ranges that have been deleted from the source code sorted by the start of the deleted range.
/// It further stores the absolute count of deleted bytes preceding a range. The deleted range together
/// with the absolute count allows to re-compute the source location for every transformed location
/// and has the benefit that it requires significantly fewer memory
/// than source maps that use a source to destination position marker for every token.
///
/// ## Map Node Ranges
///
/// Only having the deleted ranges to resolve the original text of a node isn't sufficient.
/// Resolving the original text of a node is needed when formatting a node as verbatim, either because
/// formatting the node failed because of a syntax error, or formatting is suppressed with a `rome-ignore format:` comment.
///
/// ```text
/// // Source // Transformed
/// (a+b) + (c + d) a + b + c + d;
/// ```
///
/// Using the above example, the following source ranges should be returned when querying with the transformed ranges:
///
/// * `a` -> `a`: Should not include the leading `(`
/// * `b` -> `b`: Should not include the trailing `)`
/// * `a + b` -> `(a + b)`: Should include the leading `(` and trailing `)`.
/// * `a + b + c + d` -> `(a + b) + (c + d)`: Should include the fist `(` token and the last `)` token because the expression statement
/// fully encloses the `a + b` and `c + d` nodes.
///
/// This is why the source map also tracks the mapped trimmed ranges for every node.
#[derive(Debug, Clone)]
pub struct TransformSourceMap {
source_text: String,
/// The mappings stored in increasing order
deleted_ranges: Vec<DeletedRange>,
/// Key: Start or end position of node for which the trimmed range should be extended
/// Value: The trimmed range.
mapped_node_ranges: FxHashMap<TextSize, TrimmedNodeRangeMapping>,
}
impl TransformSourceMap {
/// Returns the text of the source document as it was before the transformation.
pub fn text(&self) -> &str {
&self.source_text
}
/// Maps a range of the transformed document to a range in the source document.
///
/// Complexity: `O(log(n))`
pub fn source_range(&self, transformed_range: TextRange) -> TextRange {
let range = TextRange::new(
self.source_offset(transformed_range.start(), RangePosition::Start),
self.source_offset(transformed_range.end(), RangePosition::End),
);
debug_assert!(range.end() <= self.source_text.text_len(), "Mapped range {:?} exceeds the length of the source document {:?}. Please check if the passed `transformed_range` is a range of the transformed tree and not of the source tree, and that it belongs to the tree for which the source map was created for.", range, self.source_text.len());
range
}
/// Maps the trimmed range of the transformed node to the trimmed range in the source document.
///
/// Average Complexity: `O(log(n))`
pub fn trimmed_source_range<L: Language>(&self, node: &SyntaxNode<L>) -> TextRange {
self.trimmed_source_range_from_transformed_range(node.text_trimmed_range())
}
fn resolve_trimmed_range(&self, mut source_range: TextRange) -> TextRange {
let start_mapping = self.mapped_node_ranges.get(&source_range.start());
if let Some(mapping) = start_mapping {
// If the queried node fully encloses the original range of the node, then extend the range
if source_range.contains_range(mapping.original_range) {
source_range = TextRange::new(mapping.extended_range.start(), source_range.end());
}
}
let end_mapping = self.mapped_node_ranges.get(&source_range.end());
if let Some(mapping) = end_mapping {
// If the queried node fully encloses the original range of the node, then extend the range
if source_range.contains_range(mapping.original_range) {
source_range = TextRange::new(source_range.start(), mapping.extended_range.end());
}
}
source_range
}
fn trimmed_source_range_from_transformed_range(
&self,
transformed_range: TextRange,
) -> TextRange {
let source_range = self.source_range(transformed_range);
let mut mapped_range = source_range;
loop {
let resolved = self.resolve_trimmed_range(mapped_range);
if resolved == mapped_range {
break resolved;
} else {
mapped_range = resolved;
}
}
}
/// Returns the source text of the trimmed range of `node`.
pub fn trimmed_source_text<L: Language>(&self, node: &SyntaxNode<L>) -> &str {
let range = self.trimmed_source_range(node);
&self.source_text[range]
}
/// Returns an iterator over all deleted ranges in increasing order by their start position.
pub fn deleted_ranges(&self) -> DeletedRanges {
DeletedRanges {
source_text: &self.source_text,
deleted_ranges: self.deleted_ranges.iter(),
}
}
#[cfg(test)]
fn trimmed_source_text_from_transformed_range(&self, range: TextRange) -> &str {
let range = self.trimmed_source_range_from_transformed_range(range);
&self.source_text[range]
}
fn source_offset(&self, transformed_offset: TextSize, position: RangePosition) -> TextSize {
let index = self
.deleted_ranges
.binary_search_by_key(&transformed_offset, |range| range.transformed_start());
let range = match index {
Ok(index) => Some(&self.deleted_ranges[index]),
Err(index) => {
if index == 0 {
None
} else {
self.deleted_ranges.get(index - 1)
}
}
};
self.source_offset_with_range(transformed_offset, position, range)
}
fn source_offset_with_range(
&self,
transformed_offset: TextSize,
position: RangePosition,
deleted_range: Option<&DeletedRange>,
) -> TextSize {
match deleted_range {
Some(range) => {
debug_assert!(
range.transformed_start() <= transformed_offset,
"Transformed start {:?} must be less than or equal to transformed offset {:?}.",
range.transformed_start(),
transformed_offset
);
// Transformed position directly falls onto a position where a deleted range starts or ends (depending on the position)
// For example when querying: `a` in `(a)` or (a + b)`, or `b`
if range.transformed_start() == transformed_offset {
match position {
RangePosition::Start => range.source_end(),
// `a)`, deleted range is right after the token. That's why `source_start` is the offset
// that truncates the `)` and `source_end` includes it
RangePosition::End => range.source_start(),
}
}
// The position falls outside of a position that has a leading/trailing deleted range.
// For example, if you get the position of `+` in `(a + b)`.
// That means, the trimmed and non-trimmed offsets are the same
else {
let transformed_delta = transformed_offset - range.transformed_start();
range.source_start() + range.len() + transformed_delta
}
}
None => transformed_offset,
}
}
/// Maps the source code positions relative to the transformed tree of `printed` to the location
/// in the original, untransformed source code.
///
/// The printer creates a source map that allows mapping positions from the newly formatted document
/// back to the locations of the tree. However, the source positions stored in [crate::FormatElement::DynamicText]
/// and [crate::FormatElement::SyntaxTokenTextSlice] are relative to the transformed tree
/// and not the original tree passed to [crate::format_node].
///
/// This function re-maps the positions from the positions in the transformed tree back to the positions
/// in the original, untransformed tree.
pub fn map_printed(&self, mut printed: Printed) -> Printed {
self.map_markers(&mut printed.sourcemap);
printed
}
/// Maps the printers source map marker to the source positions.
fn map_markers(&self, markers: &mut [SourceMarker]) {
if self.deleted_ranges.is_empty() {
return;
}
let mut previous_marker: Option<SourceMarker> = None;
let mut next_range_index = 0;
for marker in markers {
// It's not guaranteed that markers are sorted by source location (line suffix comments).
// It can, therefore, be necessary to navigate backwards again.
// In this case, do a binary search for the index of the next deleted range (`O(log(n)`).
let out_of_order_marker =
previous_marker.map_or(false, |previous| previous.source > marker.source);
if out_of_order_marker {
let index = self
.deleted_ranges
.binary_search_by_key(&marker.source, |range| range.transformed_start());
match index {
// Direct match
Ok(index) => {
next_range_index = index + 1;
}
Err(index) => next_range_index = index,
}
} else {
// Find the range for this mapping. In most cases this is a no-op or only involves a single step
// because markers are most of the time in increasing source order.
while next_range_index < self.deleted_ranges.len() {
let next_range = &self.deleted_ranges[next_range_index];
if next_range.transformed_start() > marker.source {
break;
}
next_range_index += 1;
}
}
previous_marker = Some(*marker);
let current_range = if next_range_index == 0 {
None
} else {
self.deleted_ranges.get(next_range_index - 1)
};
let source =
self.source_offset_with_range(marker.source, RangePosition::Start, current_range);
marker.source = source;
}
}
}
#[derive(Debug, Copy, Clone)]
struct TrimmedNodeRangeMapping {
/// The original trimmed range of the node.
///
/// ```javascript
/// (a + b)
/// ```
///
/// `1..6` `a + b`
original_range: TextRange,
/// The range to which the trimmed range of the node should be extended
/// ```javascript
/// (a + b)
/// ```
///
/// `0..7` for `a + b` if its range should also include the parenthesized range.
extended_range: TextRange,
}
#[derive(Copy, Clone, Debug)]
enum RangePosition {
Start,
End,
}
/// Stores the information about a range in the source document that isn't present in the transformed document
/// and provides means to map the transformed position back to the source position.
///
/// # Examples
///
/// ```javascript
/// (a + b)
/// ```
///
/// A transform that removes the parentheses from the above expression removes the ranges `0..1` (`(` token)
/// and `6..7` (`)` token) and the source map creates one [DeletedRange] for each:
///
/// ```text
/// DeletedRange {
/// source_range: 0..1,
/// total_length_preceding_deleted_ranges: 0,
/// },
/// DeletedRange {
/// source_range: 6..7,
/// total_length_preceding_deleted_ranges: 1,
/// }
/// ```
///
/// The first range indicates that the range `0..1` for the `(` token has been removed. The second range
/// indicates that the range `6..7` for the `)` token has been removed and it stores that, up to this point,
/// but not including, 1 more byte has been removed.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
struct DeletedRange {
/// The range in the source document of the bytes that have been omitted from the transformed document.
source_range: TextRange,
/// The accumulated count of all removed bytes up to (but not including) the start of this range.
total_length_preceding_deleted_ranges: TextSize,
}
impl DeletedRange {
fn new(source_range: TextRange, total_length_preceding_deleted_ranges: TextSize) -> Self {
debug_assert!(source_range.start() >= total_length_preceding_deleted_ranges, "The total number of deleted bytes ({:?}) can not exceed the offset from the start in the source document ({:?}). This is a bug in the source map implementation.", total_length_preceding_deleted_ranges, source_range.start());
Self {
source_range,
total_length_preceding_deleted_ranges,
}
}
/// The number of deleted characters starting from [source offset](DeletedRange::source_start).
fn len(&self) -> TextSize {
self.source_range.len()
}
/// The start position in bytes in the source document of the omitted sequence in the transformed document.
fn source_start(&self) -> TextSize {
self.source_range.start()
}
/// The end position in bytes in the source document of the omitted sequence in the transformed document.
fn source_end(&self) -> TextSize {
self.source_range.end()
}
/// Returns the byte position of [DeleteRange::source_start] in the transformed document.
fn transformed_start(&self) -> TextSize {
self.source_range.start() - self.total_length_preceding_deleted_ranges
}
}
/// Builder for creating a source map.
#[derive(Debug, Default)]
pub struct TransformSourceMapBuilder {
/// The original source text of the tree before it was transformed.
source_text: String,
/// The mappings in increasing order by transformed offset.
deleted_ranges: Vec<TextRange>,
/// The keys are a position in the source map where a trimmed node starts or ends.
/// The values are the metadata about a trimmed node range
mapped_node_ranges: FxHashMap<TextSize, TrimmedNodeRangeMapping>,
}
impl TransformSourceMapBuilder {
/// Creates a new builder.
pub fn new() -> Self {
Self {
..Default::default()
}
}
/// Creates a new builder for a document with the given source.
pub fn with_source(source: String) -> Self {
Self {
source_text: source,
..Default::default()
}
}
/// Appends `text` to the source text of the original document.
pub fn push_source_text(&mut self, text: &str) {
self.source_text.push_str(text);
}
/// Adds a new mapping for a deleted character range.
pub fn add_deleted_range(&mut self, source_range: TextRange) {
self.deleted_ranges.push(source_range);
}
/// Adds a mapping to widen a nodes trimmed range.
///
/// The formatter uses the trimmed range when formatting a node in verbatim either because the node
/// failed to format because of a syntax error or because it's formatting is suppressed with a `rome-ignore format:` comment.
///
/// This method adds a mapping to widen a nodes trimmed range to enclose another range instead. This is
/// e.g. useful when removing parentheses around expressions where `(/* comment */ a /* comment */)` because
/// the trimmed range of `a` should now enclose the full range including the `(` and `)` tokens to ensure
/// that the parentheses are retained when printing that node in verbatim style.
pub fn extend_trimmed_node_range(
&mut self,
original_range: TextRange,
extended_range: TextRange,
) {
let mapping = TrimmedNodeRangeMapping {
original_range,
extended_range,
};
self.mapped_node_ranges
.insert(original_range.start(), mapping);
self.mapped_node_ranges
.insert(original_range.end(), mapping);
}
/// Creates a source map that performs single position lookups in `O(log(n))`.
pub fn finish(mut self) -> TransformSourceMap {
let mut merged_mappings = Vec::with_capacity(self.deleted_ranges.len());
if !self.deleted_ranges.is_empty() {
self.deleted_ranges
.sort_by(|a, b| match a.start().cmp(&b.start()) {
Ordering::Equal => a.end().cmp(&b.end()),
ordering => ordering,
});
let mut last_mapping = DeletedRange::new(
// SAFETY: Safe because of the not empty check above
self.deleted_ranges[0],
TextSize::default(),
);
let mut transformed_offset = last_mapping.len();
for range in self.deleted_ranges.drain(1..) {
// Merge adjacent ranges to ensure there's only ever a single mapping starting at the same transformed offset.
if last_mapping.source_range.end() == range.start() {
last_mapping.source_range = last_mapping.source_range.cover(range);
} else {
merged_mappings.push(last_mapping);
last_mapping = DeletedRange::new(range, transformed_offset);
}
transformed_offset += range.len();
}
merged_mappings.push(last_mapping);
}
TransformSourceMap {
source_text: self.source_text,
deleted_ranges: merged_mappings,
mapped_node_ranges: self.mapped_node_ranges,
}
}
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct DeletedRangeEntry<'a> {
/// The start position of the removed range in the source document
pub source: TextSize,
/// The position in the transformed document where the removed range would have been (but is not, because it was removed)
pub transformed: TextSize,
/// The text of the removed range
pub text: &'a str,
}
/// Iterator over all removed ranges in a document.
///
/// Returns the ranges in increased order by their start position.
pub struct DeletedRanges<'a> {
source_text: &'a str,
/// The mappings stored in increasing order
deleted_ranges: std::slice::Iter<'a, DeletedRange>,
}
impl<'a> Iterator for DeletedRanges<'a> {
type Item = DeletedRangeEntry<'a>;
fn next(&mut self) -> Option<Self::Item> {
let next = self.deleted_ranges.next()?;
Some(DeletedRangeEntry {
source: next.source_range.start(),
transformed: next.transformed_start(),
text: &self.source_text[next.source_range],
})
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.deleted_ranges.size_hint()
}
fn last(self) -> Option<Self::Item>
where
Self: Sized,
{
let last = self.deleted_ranges.last()?;
Some(DeletedRangeEntry {
source: last.source_range.start(),
transformed: last.transformed_start(),
text: &self.source_text[last.source_range],
})
}
}
impl DoubleEndedIterator for DeletedRanges<'_> {
fn next_back(&mut self) -> Option<Self::Item> {
let back = self.deleted_ranges.next_back()?;
Some(DeletedRangeEntry {
source: back.source_range.start(),
transformed: back.transformed_start(),
text: &self.source_text[back.source_range],
})
}
}
impl FusedIterator for DeletedRanges<'_> {}
impl ExactSizeIterator for DeletedRanges<'_> {}
#[cfg(test)]
mod tests {
use crate::source_map::DeletedRangeEntry;
use crate::{TextRange, TextSize, TransformSourceMapBuilder};
use ruff_rowan::raw_language::{RawLanguageKind, RawSyntaxTreeBuilder};
#[test]
fn range_mapping() {
let mut cst_builder = RawSyntaxTreeBuilder::new();
cst_builder.start_node(RawLanguageKind::ROOT);
// The shape of the tree doesn't matter for the test case
cst_builder.token(RawLanguageKind::STRING_TOKEN, "(a + (((b + c)) + d)) + e");
cst_builder.finish_node();
let root = cst_builder.finish();
let mut builder = TransformSourceMapBuilder::new();
builder.push_source_text(&root.text().to_string());
// Add mappings for all removed parentheses.
// `(`
builder.add_deleted_range(TextRange::new(TextSize::from(0), TextSize::from(1)));
// `(((`
builder.add_deleted_range(TextRange::new(TextSize::from(5), TextSize::from(6)));
// Ranges can be added out of order
builder.add_deleted_range(TextRange::new(TextSize::from(7), TextSize::from(8)));
builder.add_deleted_range(TextRange::new(TextSize::from(6), TextSize::from(7)));
// `))`
builder.add_deleted_range(TextRange::new(TextSize::from(13), TextSize::from(14)));
builder.add_deleted_range(TextRange::new(TextSize::from(14), TextSize::from(15)));
// `))`
builder.add_deleted_range(TextRange::new(TextSize::from(19), TextSize::from(20)));
builder.add_deleted_range(TextRange::new(TextSize::from(20), TextSize::from(21)));
let source_map = builder.finish();
// The following mapping assume the transformed string to be (including whitespace):
// "a + b + c + d + e";
// `a`
assert_eq!(
source_map.source_range(TextRange::new(TextSize::from(0), TextSize::from(1))),
TextRange::new(TextSize::from(1), TextSize::from(2))
);
// `b`
assert_eq!(
source_map.source_range(TextRange::new(TextSize::from(4), TextSize::from(5))),
TextRange::new(TextSize::from(8), TextSize::from(9))
);
// `c`
assert_eq!(
source_map.source_range(TextRange::new(TextSize::from(8), TextSize::from(9))),
TextRange::new(TextSize::from(12), TextSize::from(13))
);
// `d`
assert_eq!(
source_map.source_range(TextRange::new(TextSize::from(12), TextSize::from(13))),
TextRange::new(TextSize::from(18), TextSize::from(19))
);
// `e`
assert_eq!(
source_map.source_range(TextRange::new(TextSize::from(16), TextSize::from(17))),
TextRange::new(TextSize::from(24), TextSize::from(25))
);
}
#[test]
fn trimmed_range() {
// Build up a tree for `((a))`
// Don't mind the unknown nodes, it doesn't really matter what the nodes are.
let mut cst_builder = RawSyntaxTreeBuilder::new();
cst_builder.start_node(RawLanguageKind::ROOT);
cst_builder.start_node(RawLanguageKind::BOGUS);
cst_builder.token(RawLanguageKind::STRING_TOKEN, "(");
cst_builder.start_node(RawLanguageKind::BOGUS);
cst_builder.token(RawLanguageKind::BOGUS, "(");
cst_builder.start_node(RawLanguageKind::LITERAL_EXPRESSION);
cst_builder.token(RawLanguageKind::STRING_TOKEN, "a");
cst_builder.finish_node();
cst_builder.token(RawLanguageKind::BOGUS, ")");
cst_builder.finish_node();
cst_builder.token(RawLanguageKind::BOGUS, ")");
cst_builder.finish_node();
cst_builder.token(RawLanguageKind::BOGUS, ";");
cst_builder.finish_node();
let root = cst_builder.finish();
assert_eq!(&root.text(), "((a));");
let mut bogus = root
.descendants()
.filter(|node| node.kind() == RawLanguageKind::BOGUS);
// `((a))`
let outer = bogus.next().unwrap();
// `(a)`
let inner = bogus.next().unwrap();
// `a`
let expression = root
.descendants()
.find(|node| node.kind() == RawLanguageKind::LITERAL_EXPRESSION)
.unwrap();
let mut builder = TransformSourceMapBuilder::new();
builder.push_source_text(&root.text().to_string());
// Add mappings for all removed parentheses.
builder.add_deleted_range(TextRange::new(TextSize::from(0), TextSize::from(2)));
builder.add_deleted_range(TextRange::new(TextSize::from(3), TextSize::from(5)));
// Extend `a` to the range of `(a)`
builder
.extend_trimmed_node_range(expression.text_trimmed_range(), inner.text_trimmed_range());
// Extend `(a)` to the range of `((a))`
builder.extend_trimmed_node_range(inner.text_trimmed_range(), outer.text_trimmed_range());
let source_map = builder.finish();
// Query `a`
assert_eq!(
source_map.trimmed_source_text_from_transformed_range(TextRange::new(
TextSize::from(0),
TextSize::from(1)
)),
"((a))"
);
// Query `a;` expression
assert_eq!(
source_map.trimmed_source_text_from_transformed_range(TextRange::new(
TextSize::from(0),
TextSize::from(2)
)),
"((a));"
);
}
#[test]
fn deleted_ranges() {
let mut cst_builder = RawSyntaxTreeBuilder::new();
cst_builder.start_node(RawLanguageKind::ROOT);
// The shape of the tree doesn't matter for the test case
cst_builder.token(RawLanguageKind::STRING_TOKEN, "(a + (((b + c)) + d)) + e");
cst_builder.finish_node();
let root = cst_builder.finish();
let mut builder = TransformSourceMapBuilder::new();
builder.push_source_text(&root.text().to_string());
// Add mappings for all removed parentheses.
// `(`
builder.add_deleted_range(TextRange::new(TextSize::from(0), TextSize::from(1)));
// `(((`
builder.add_deleted_range(TextRange::new(TextSize::from(5), TextSize::from(6)));
// Ranges can be added out of order
builder.add_deleted_range(TextRange::new(TextSize::from(7), TextSize::from(8)));
builder.add_deleted_range(TextRange::new(TextSize::from(6), TextSize::from(7)));
// `))`
builder.add_deleted_range(TextRange::new(TextSize::from(13), TextSize::from(14)));
builder.add_deleted_range(TextRange::new(TextSize::from(14), TextSize::from(15)));
// `))`
builder.add_deleted_range(TextRange::new(TextSize::from(19), TextSize::from(20)));
builder.add_deleted_range(TextRange::new(TextSize::from(20), TextSize::from(21)));
let source_map = builder.finish();
let deleted_ranges = source_map.deleted_ranges().collect::<Vec<_>>();
assert_eq!(
deleted_ranges,
vec![
DeletedRangeEntry {
source: TextSize::from(0),
transformed: TextSize::from(0),
text: "("
},
DeletedRangeEntry {
source: TextSize::from(5),
transformed: TextSize::from(4),
text: "((("
},
DeletedRangeEntry {
source: TextSize::from(13),
transformed: TextSize::from(9),
text: "))"
},
DeletedRangeEntry {
source: TextSize::from(19),
transformed: TextSize::from(13),
text: "))"
},
]
);
assert_eq!(
source_map.deleted_ranges().last(),
Some(DeletedRangeEntry {
source: TextSize::from(19),
transformed: TextSize::from(13),
text: "))"
})
);
}
}

View File

@ -1,2 +0,0 @@
pub mod number;
pub mod string;

View File

@ -1,296 +0,0 @@
use crate::token::string::ToAsciiLowercaseCow;
use ruff_rowan::{Language, SyntaxToken};
use std::borrow::Cow;
use std::num::NonZeroUsize;
use crate::prelude::*;
use crate::{CstFormatContext, Format};
pub fn format_number_token<L>(token: &SyntaxToken<L>) -> CleanedNumberLiteralText<L>
where
L: Language,
{
CleanedNumberLiteralText { token }
}
pub struct CleanedNumberLiteralText<'token, L>
where
L: Language,
{
token: &'token SyntaxToken<L>,
}
impl<L, C> Format<C> for CleanedNumberLiteralText<'_, L>
where
L: Language + 'static,
C: CstFormatContext<Language = L>,
{
fn fmt(&self, f: &mut Formatter<C>) -> FormatResult<()> {
format_replaced(
self.token,
&syntax_token_cow_slice(
format_trimmed_number(self.token.text_trimmed()),
self.token,
self.token.text_trimmed_range().start(),
),
)
.fmt(f)
}
}
enum FormatNumberLiteralState {
IntegerPart,
DecimalPart(FormatNumberLiteralDecimalPart),
Exponent(FormatNumberLiteralExponent),
}
struct FormatNumberLiteralDecimalPart {
dot_index: usize,
last_non_zero_index: Option<NonZeroUsize>,
}
struct FormatNumberLiteralExponent {
e_index: usize,
is_negative: bool,
first_digit_index: Option<NonZeroUsize>,
first_non_zero_index: Option<NonZeroUsize>,
}
// Regex-free version of https://github.com/prettier/prettier/blob/ca246afacee8e6d5db508dae01730c9523bbff1d/src/common/util.js#L341-L356
fn format_trimmed_number(text: &str) -> Cow<str> {
use FormatNumberLiteralState::*;
let text = text.to_ascii_lowercase_cow();
let mut copied_or_ignored_chars = 0usize;
let mut iter = text.chars().enumerate();
let mut curr = iter.next();
let mut state = IntegerPart;
// Will be filled only if and when the first place that needs reformatting is detected.
let mut cleaned_text = String::new();
// Look at only the start of the text, ignore any sign, and make sure numbers always start with a digit. Add 0 if missing.
if let Some((_, '+' | '-')) = curr {
curr = iter.next();
}
if let Some((curr_index, '.')) = curr {
cleaned_text.push_str(&text[copied_or_ignored_chars..curr_index]);
copied_or_ignored_chars = curr_index;
cleaned_text.push('0');
}
// Loop over the rest of the text, applying the remaining rules.
loop {
// We use a None pseudo-char at the end of the string to simplify the match cases that follow
let curr_or_none_terminator_char = match curr {
Some((curr_index, curr_char)) => (curr_index, Some(curr_char)),
None => (text.len(), None),
};
// Look for termination of the decimal part or exponent and see if we need to print it differently.
match (&state, curr_or_none_terminator_char) {
(
DecimalPart(FormatNumberLiteralDecimalPart {
dot_index,
last_non_zero_index: None,
}),
(curr_index, Some('e') | None),
) => {
// The decimal part equals zero, ignore it completely.
// Caveat: Prettier still prints a single `.0` unless there was *only* a trailing dot.
if curr_index > dot_index + 1 {
cleaned_text.push_str(&text[copied_or_ignored_chars..=*dot_index]);
cleaned_text.push('0');
} else {
cleaned_text.push_str(&text[copied_or_ignored_chars..*dot_index]);
}
copied_or_ignored_chars = curr_index;
}
(
DecimalPart(FormatNumberLiteralDecimalPart {
last_non_zero_index: Some(last_non_zero_index),
..
}),
(curr_index, Some('e') | None),
) if last_non_zero_index.get() < curr_index - 1 => {
// The decimal part ends with at least one zero, ignore them but copy the part from the dot until the last non-zero.
cleaned_text.push_str(&text[copied_or_ignored_chars..=last_non_zero_index.get()]);
copied_or_ignored_chars = curr_index;
}
(
Exponent(FormatNumberLiteralExponent {
e_index,
first_non_zero_index: None,
..
}),
(curr_index, None),
) => {
// The exponent equals zero, ignore it completely.
cleaned_text.push_str(&text[copied_or_ignored_chars..*e_index]);
copied_or_ignored_chars = curr_index;
}
(
Exponent(FormatNumberLiteralExponent {
e_index,
is_negative,
first_digit_index: Some(first_digit_index),
first_non_zero_index: Some(first_non_zero_index),
}),
(curr_index, None),
) if (first_digit_index.get() > e_index + 1 && !is_negative)
|| (first_non_zero_index.get() > first_digit_index.get()) =>
{
// The exponent begins with a plus or at least one zero, ignore them but copy the part from the first non-zero until the end.
cleaned_text.push_str(&text[copied_or_ignored_chars..=*e_index]);
if *is_negative {
cleaned_text.push('-');
}
cleaned_text.push_str(&text[first_non_zero_index.get()..curr_index]);
copied_or_ignored_chars = curr_index;
}
_ => {}
}
// Update state after the current char
match (&state, curr) {
// Cases entering or remaining in decimal part
(_, Some((curr_index, '.'))) => {
state = DecimalPart(FormatNumberLiteralDecimalPart {
dot_index: curr_index,
last_non_zero_index: None,
});
}
(DecimalPart(decimal_part), Some((curr_index, '1'..='9'))) => {
state = DecimalPart(FormatNumberLiteralDecimalPart {
last_non_zero_index: Some(unsafe {
// We've already entered InDecimalPart, so curr_index must be >0
NonZeroUsize::new_unchecked(curr_index)
}),
..*decimal_part
});
}
// Cases entering or remaining in exponent
(_, Some((curr_index, 'e'))) => {
state = Exponent(FormatNumberLiteralExponent {
e_index: curr_index,
is_negative: false,
first_digit_index: None,
first_non_zero_index: None,
});
}
(Exponent(exponent), Some((_, '-'))) => {
state = Exponent(FormatNumberLiteralExponent {
is_negative: true,
..*exponent
});
}
(
Exponent(
exponent @ FormatNumberLiteralExponent {
first_digit_index: None,
..
},
),
Some((curr_index, curr_char @ '0'..='9')),
) => {
state = Exponent(FormatNumberLiteralExponent {
first_digit_index: Some(unsafe {
// We've already entered InExponent, so curr_index must be >0
NonZeroUsize::new_unchecked(curr_index)
}),
first_non_zero_index: if curr_char != '0' {
Some(unsafe {
// We've already entered InExponent, so curr_index must be >0
NonZeroUsize::new_unchecked(curr_index)
})
} else {
None
},
..*exponent
});
}
(
Exponent(
exponent @ FormatNumberLiteralExponent {
first_non_zero_index: None,
..
},
),
Some((curr_index, '1'..='9')),
) => {
state = Exponent(FormatNumberLiteralExponent {
first_non_zero_index: Some(unsafe { NonZeroUsize::new_unchecked(curr_index) }),
..*exponent
});
}
_ => {}
}
// Repeat or exit
match curr {
None | Some((_, 'x') /* hex bailout */) => break,
Some(_) => curr = iter.next(),
}
}
if cleaned_text.is_empty() {
text
} else {
// Append any unconsidered text
cleaned_text.push_str(&text[copied_or_ignored_chars..]);
Cow::Owned(cleaned_text)
}
}
#[cfg(test)]
mod tests {
use std::borrow::Cow;
use super::format_trimmed_number;
#[test]
fn removes_unnecessary_plus_and_zeros_from_scientific_notation() {
assert_eq!("1e2", format_trimmed_number("1e02"));
assert_eq!("1e2", format_trimmed_number("1e+2"));
}
#[test]
fn removes_unnecessary_scientific_notation() {
assert_eq!("1", format_trimmed_number("1e0"));
assert_eq!("1", format_trimmed_number("1e-0"));
}
#[test]
fn does_not_get_bamboozled_by_hex() {
assert_eq!("0xe0", format_trimmed_number("0xe0"));
assert_eq!("0x10e0", format_trimmed_number("0x10e0"));
}
#[test]
fn makes_sure_numbers_always_start_with_a_digit() {
assert_eq!("0.2", format_trimmed_number(".2"));
}
#[test]
fn removes_extraneous_trailing_decimal_zeroes() {
assert_eq!("0.1", format_trimmed_number("0.10"));
}
#[test]
fn keeps_one_trailing_decimal_zero() {
assert_eq!("0.0", format_trimmed_number("0.00"));
}
#[test]
fn removes_trailing_dot() {
assert_eq!("1", format_trimmed_number("1."));
}
#[test]
fn cleans_all_at_once() {
assert_eq!("0.0", format_trimmed_number(".00e-0"));
}
#[test]
fn keeps_the_input_string_if_no_change_needed() {
assert!(matches!(
format_trimmed_number("0.1e2"),
Cow::Borrowed("0.1e2")
));
}
}

View File

@ -1,259 +0,0 @@
use std::borrow::Cow;
pub trait ToAsciiLowercaseCow {
/// Returns the same value as String::to_lowercase. The only difference
/// is that this functions returns ```Cow``` and does not allocate
/// if the string is already in lowercase.
fn to_ascii_lowercase_cow(&self) -> Cow<str>;
}
impl ToAsciiLowercaseCow for str {
fn to_ascii_lowercase_cow(&self) -> Cow<str> {
debug_assert!(self.is_ascii());
let bytes = self.as_bytes();
for idx in 0..bytes.len() {
let chr = bytes[idx];
if chr != chr.to_ascii_lowercase() {
let mut s = bytes.to_vec();
for b in &mut s[idx..] {
b.make_ascii_lowercase();
}
return Cow::Owned(unsafe { String::from_utf8_unchecked(s) });
}
}
Cow::Borrowed(self)
}
}
impl ToAsciiLowercaseCow for String {
#[inline(always)]
fn to_ascii_lowercase_cow(&self) -> Cow<str> {
self.as_str().to_ascii_lowercase_cow()
}
}
/// This signal is used to tell to the next character what it should do
#[derive(Eq, PartialEq)]
pub enum CharSignal {
/// There hasn't been any signal
None,
/// The function decided to keep the previous character
Keep,
/// The function has decided to print the character. Saves the character that was
/// already written
AlreadyPrinted(char),
}
#[derive(Debug, Eq, PartialEq, Clone, Copy)]
pub enum Quote {
Double,
Single,
}
impl Quote {
pub fn as_char(&self) -> char {
match self {
Quote::Double => '"',
Quote::Single => '\'',
}
}
pub fn as_string(&self) -> &str {
match self {
Quote::Double => "\"",
Quote::Single => "'",
}
}
/// Returns the quote, prepended with a backslash (escaped)
pub fn as_escaped(&self) -> &str {
match self {
Quote::Double => "\\\"",
Quote::Single => "\\'",
}
}
pub fn as_bytes(&self) -> u8 {
self.as_char() as u8
}
/// Given the current quote, it returns the other one
pub fn other(&self) -> Self {
match self {
Quote::Double => Quote::Single,
Quote::Single => Quote::Double,
}
}
}
/// This function is responsible of:
///
/// - reducing the number of escapes
/// - normalising the new lines
///
/// # Escaping
///
/// The way it works is the following: we split the content by analyzing all the
/// characters that could keep the escape.
///
/// Each time we retrieve one of this character, we push inside a new string all the content
/// found **before** the current character.
///
/// After that the function checks if the current character should be also be printed or not.
/// These characters (like quotes) can have an escape that might be removed. If that happens,
/// we use [CharSignal] to tell to the next iteration what it should do with that character.
///
/// For example, let's take this example:
/// ```js
/// ("hello! \'")
/// ```
///
/// Here, we want to remove the backslash (\) from the content. So when we encounter `\`,
/// the algorithm checks if after `\` there's a `'`, and if so, then we push inside the final string
/// only `'` and we ignore the backlash. Then we signal the next iteration with [CharSignal::AlreadyPrinted],
/// so when we process the next `'`, we decide to ignore it and reset the signal.
///
/// Another example is the following:
///
/// ```js
/// (" \\' ")
/// ```
///
/// Here, we need to keep all the backslash. We check the first one and we look ahead. We find another
/// `\`, so we keep it the first and we signal the next iteration with [CharSignal::Keep].
/// Then the next iteration comes along. We have the second `\`, we look ahead we find a `'`. Although,
/// as opposed to the previous example, we have a signal that says that we should keep the current
/// character. Then we do so. The third iteration comes along and we find `'`. We still have the
/// [CharSignal::Keep]. We do so and then we set the signal to [CharSignal::None]
///
/// # Newlines
///
/// By default the formatter uses `\n` as a newline. The function replaces
/// `\r\n` with `\n`,
pub fn normalize_string(raw_content: &str, preferred_quote: Quote) -> Cow<str> {
let alternate_quote = preferred_quote.other();
// A string should be manipulated only if its raw content contains backslash or quotes
if !raw_content.contains(['\\', preferred_quote.as_char(), alternate_quote.as_char()]) {
return Cow::Borrowed(raw_content);
}
let mut reduced_string = String::new();
let mut signal = CharSignal::None;
let mut chars = raw_content.char_indices().peekable();
while let Some((_, current_char)) = chars.next() {
let next_character = chars.peek();
if let CharSignal::AlreadyPrinted(char) = signal {
if char == current_char {
continue;
}
}
match current_char {
'\\' => {
let bytes = raw_content.as_bytes();
if let Some((next_index, next_character)) = next_character {
// If we encounter an alternate quote that is escaped, we have to
// remove the escape from it.
// This is done because of how the enclosed strings can change.
// Check `computed_preferred_quote` for more details.
if *next_character as u8 == alternate_quote.as_bytes()
// This check is a safety net for cases where the backslash is at the end
// of the raw content:
// ("\\")
// The second backslash is at the end.
&& *next_index < bytes.len()
{
match signal {
CharSignal::Keep => {
reduced_string.push(current_char);
}
_ => {
reduced_string.push(alternate_quote.as_char());
signal = CharSignal::AlreadyPrinted(alternate_quote.as_char());
}
}
} else if signal == CharSignal::Keep {
reduced_string.push(current_char);
signal = CharSignal::None;
}
// The next character is another backslash, or
// a character that should be kept in the next iteration
else if "^\n\r\"'01234567\\bfnrtuvx\u{2028}\u{2029}".contains(*next_character)
{
signal = CharSignal::Keep;
// fallback, keep the backslash
reduced_string.push(current_char);
} else {
// these, usually characters that can have their
// escape removed: "\a" => "a"
// So we ignore the current slash and we continue
// to the next iteration
continue;
}
} else {
// fallback, keep the backslash
reduced_string.push(current_char);
}
}
'\n' | '\t' => {
if let CharSignal::AlreadyPrinted(the_char) = signal {
if matches!(the_char, '\n' | '\t') {
signal = CharSignal::None
}
} else {
reduced_string.push(current_char);
}
}
// If the current character is \r and the
// next is \n, skip over the entire sequence
'\r' if next_character.map_or(false, |(_, c)| *c == '\n') => {
reduced_string.push('\n');
signal = CharSignal::AlreadyPrinted('\n');
}
_ => {
// If we encounter a preferred quote and it's not escaped, we have to replace it with
// an escaped version.
// This is done because of how the enclosed strings can change.
// Check `computed_preferred_quote` for more details.
if current_char == preferred_quote.as_char() {
let last_char = &reduced_string.chars().last();
if let Some('\\') = last_char {
reduced_string.push(preferred_quote.as_char());
} else {
reduced_string.push_str(preferred_quote.as_escaped());
}
} else if current_char == alternate_quote.as_char() {
match signal {
CharSignal::None | CharSignal::Keep => {
reduced_string.push(alternate_quote.as_char());
}
CharSignal::AlreadyPrinted(_) => (),
}
} else {
reduced_string.push(current_char);
}
signal = CharSignal::None;
}
}
}
// Don't allocate a new string of this is empty
if reduced_string.is_empty() {
Cow::Borrowed(raw_content)
} else {
// don't allocate a new string if the new string is still equals to the input string
if reduced_string == raw_content {
Cow::Borrowed(raw_content)
} else {
Cow::Owned(reduced_string)
}
}
}

View File

@ -1,597 +0,0 @@
//! Provides builders for comments and skipped token trivia.
use crate::format_element::tag::VerbatimKind;
use crate::prelude::*;
use crate::{
comments::{CommentKind, CommentStyle},
write, Argument, Arguments, CstFormatContext, FormatRefWithRule, GroupId, SourceComment,
TextRange,
};
use ruff_rowan::{Language, SyntaxNode, SyntaxToken};
#[cfg(debug_assertions)]
use std::cell::Cell;
/// Formats the leading comments of `node`
pub const fn format_leading_comments<L: Language>(
node: &SyntaxNode<L>,
) -> FormatLeadingComments<L> {
FormatLeadingComments::Node(node)
}
/// Formats the leading comments of a node.
#[derive(Debug, Copy, Clone)]
pub enum FormatLeadingComments<'a, L: Language> {
Node(&'a SyntaxNode<L>),
Comments(&'a [SourceComment<L>]),
}
impl<Context> Format<Context> for FormatLeadingComments<'_, Context::Language>
where
Context: CstFormatContext,
{
fn fmt(&self, f: &mut Formatter<Context>) -> FormatResult<()> {
let comments = f.context().comments().clone();
let leading_comments = match self {
FormatLeadingComments::Node(node) => comments.leading_comments(node),
FormatLeadingComments::Comments(comments) => comments,
};
for comment in leading_comments {
let format_comment = FormatRefWithRule::new(comment, Context::CommentRule::default());
write!(f, [format_comment])?;
match comment.kind() {
CommentKind::Block | CommentKind::InlineBlock => {
match comment.lines_after() {
0 => write!(f, [space()])?,
1 => {
if comment.lines_before() == 0 {
write!(f, [soft_line_break_or_space()])?;
} else {
write!(f, [hard_line_break()])?;
}
}
_ => write!(f, [empty_line()])?,
};
}
CommentKind::Line => match comment.lines_after() {
0 | 1 => write!(f, [hard_line_break()])?,
_ => write!(f, [empty_line()])?,
},
}
comment.mark_formatted()
}
Ok(())
}
}
/// Formats the trailing comments of `node`.
pub const fn format_trailing_comments<L: Language>(
node: &SyntaxNode<L>,
) -> FormatTrailingComments<L> {
FormatTrailingComments::Node(node)
}
/// Formats the trailing comments of `node`
#[derive(Debug, Clone, Copy)]
pub enum FormatTrailingComments<'a, L: Language> {
Node(&'a SyntaxNode<L>),
Comments(&'a [SourceComment<L>]),
}
impl<Context> Format<Context> for FormatTrailingComments<'_, Context::Language>
where
Context: CstFormatContext,
{
fn fmt(&self, f: &mut Formatter<Context>) -> FormatResult<()> {
let comments = f.context().comments().clone();
let trailing_comments = match self {
FormatTrailingComments::Node(node) => comments.trailing_comments(node),
FormatTrailingComments::Comments(comments) => comments,
};
let mut total_lines_before = 0;
for comment in trailing_comments {
total_lines_before += comment.lines_before();
let format_comment = FormatRefWithRule::new(comment, Context::CommentRule::default());
// This allows comments at the end of nested structures:
// {
// x: 1,
// y: 2
// // A comment
// }
// Those kinds of comments are almost always leading comments, but
// here it doesn't go "outside" the block and turns it into a
// trailing comment for `2`. We can simulate the above by checking
// if this a comment on its own line; normal trailing comments are
// always at the end of another expression.
if total_lines_before > 0 {
write!(
f,
[
line_suffix(&format_with(|f| {
match comment.lines_before() {
0 | 1 => write!(f, [hard_line_break()])?,
_ => write!(f, [empty_line()])?,
};
write!(f, [format_comment])
})),
expand_parent()
]
)?;
} else {
let content = format_with(|f| write!(f, [space(), format_comment]));
if comment.kind().is_line() {
write!(f, [line_suffix(&content), expand_parent()])?;
} else {
write!(f, [content])?;
}
}
comment.mark_formatted();
}
Ok(())
}
}
/// Formats the dangling comments of `node`.
pub const fn format_dangling_comments<L: Language>(
node: &SyntaxNode<L>,
) -> FormatDanglingComments<L> {
FormatDanglingComments::Node {
node,
indent: DanglingIndentMode::None,
}
}
/// Formats the dangling trivia of `token`.
pub enum FormatDanglingComments<'a, L: Language> {
Node {
node: &'a SyntaxNode<L>,
indent: DanglingIndentMode,
},
Comments {
comments: &'a [SourceComment<L>],
indent: DanglingIndentMode,
},
}
#[derive(Copy, Clone, Debug)]
pub enum DanglingIndentMode {
/// Writes every comment on its own line and indents them with a block indent.
///
/// # Examples
/// ```ignore
/// [
/// /* comment */
/// ]
///
/// [
/// /* comment */
/// /* multiple */
/// ]
/// ```
Block,
/// Writes every comment on its own line and indents them with a soft line indent.
/// Guarantees to write a line break if the last formatted comment is a [line](CommentKind::Line) comment.
///
/// # Examples
///
/// ```ignore
/// [/* comment */]
///
/// [
/// /* comment */
/// /* other */
/// ]
///
/// [
/// // line
/// ]
/// ```
Soft,
/// Writes every comment on its own line.
None,
}
impl<L: Language> FormatDanglingComments<'_, L> {
/// Indents the comments with a [block](DanglingIndentMode::Block) indent.
pub fn with_block_indent(self) -> Self {
self.with_indent_mode(DanglingIndentMode::Block)
}
/// Indents the comments with a [soft block](DanglingIndentMode::Soft) indent.
pub fn with_soft_block_indent(self) -> Self {
self.with_indent_mode(DanglingIndentMode::Soft)
}
fn with_indent_mode(mut self, mode: DanglingIndentMode) -> Self {
match &mut self {
FormatDanglingComments::Node { indent, .. } => *indent = mode,
FormatDanglingComments::Comments { indent, .. } => *indent = mode,
}
self
}
const fn indent(&self) -> DanglingIndentMode {
match self {
FormatDanglingComments::Node { indent, .. } => *indent,
FormatDanglingComments::Comments { indent, .. } => *indent,
}
}
}
impl<Context> Format<Context> for FormatDanglingComments<'_, Context::Language>
where
Context: CstFormatContext,
{
fn fmt(&self, f: &mut Formatter<Context>) -> FormatResult<()> {
let comments = f.context().comments().clone();
let dangling_comments = match self {
FormatDanglingComments::Node { node, .. } => comments.dangling_comments(node),
FormatDanglingComments::Comments { comments, .. } => *comments,
};
if dangling_comments.is_empty() {
return Ok(());
}
let format_dangling_comments = format_with(|f| {
// Write all comments up to the first skipped token trivia or the token
let mut join = f.join_with(hard_line_break());
for comment in dangling_comments {
let format_comment =
FormatRefWithRule::new(comment, Context::CommentRule::default());
join.entry(&format_comment);
comment.mark_formatted();
}
join.finish()?;
if matches!(self.indent(), DanglingIndentMode::Soft)
&& dangling_comments
.last()
.map_or(false, |comment| comment.kind().is_line())
{
write!(f, [hard_line_break()])?;
}
Ok(())
});
match self.indent() {
DanglingIndentMode::Block => {
write!(f, [block_indent(&format_dangling_comments)])
}
DanglingIndentMode::Soft => {
write!(f, [group(&soft_block_indent(&format_dangling_comments))])
}
DanglingIndentMode::None => {
write!(f, [format_dangling_comments])
}
}
}
}
/// Formats a token without its skipped token trivia
///
/// ## Warning
/// It's your responsibility to format any skipped trivia.
pub const fn format_trimmed_token<L: Language>(token: &SyntaxToken<L>) -> FormatTrimmedToken<L> {
FormatTrimmedToken { token }
}
#[derive(Debug, Eq, PartialEq, Copy, Clone)]
pub struct FormatTrimmedToken<'a, L: Language> {
token: &'a SyntaxToken<L>,
}
impl<L: Language + 'static, C> Format<C> for FormatTrimmedToken<'_, L>
where
C: CstFormatContext<Language = L>,
{
fn fmt(&self, f: &mut Formatter<C>) -> FormatResult<()> {
let trimmed_range = self.token.text_trimmed_range();
syntax_token_text_slice(self.token, trimmed_range).fmt(f)
}
}
/// Formats the skipped token trivia of a removed token and marks the token as tracked.
pub const fn format_removed<L>(token: &SyntaxToken<L>) -> FormatRemoved<L>
where
L: Language,
{
FormatRemoved { token }
}
/// Formats the trivia of a token that is present in the source text but should be omitted in the
/// formatted output.
pub struct FormatRemoved<'a, L>
where
L: Language,
{
token: &'a SyntaxToken<L>,
}
impl<C, L> Format<C> for FormatRemoved<'_, L>
where
L: Language + 'static,
C: CstFormatContext<Language = L>,
{
fn fmt(&self, f: &mut Formatter<C>) -> FormatResult<()> {
f.state_mut().track_token(self.token);
write!(f, [format_skipped_token_trivia(self.token)])
}
}
/// Print out a `token` from the original source with a different `content`.
///
/// This will print the skipped token trivia that belong to `token` to `content`;
/// `token` is then marked as consumed by the formatter.
pub fn format_replaced<'a, 'content, L, Context>(
token: &'a SyntaxToken<L>,
content: &'content impl Format<Context>,
) -> FormatReplaced<'a, 'content, L, Context>
where
L: Language,
{
FormatReplaced {
token,
content: Argument::new(content),
}
}
/// Formats a token's skipped token trivia but uses the provided content instead
/// of the token in the formatted output.
#[derive(Copy, Clone)]
pub struct FormatReplaced<'a, 'content, L, C>
where
L: Language,
{
token: &'a SyntaxToken<L>,
content: Argument<'content, C>,
}
impl<L, C> Format<C> for FormatReplaced<'_, '_, L, C>
where
L: Language + 'static,
C: CstFormatContext<Language = L>,
{
fn fmt(&self, f: &mut Formatter<C>) -> FormatResult<()> {
f.state_mut().track_token(self.token);
write!(f, [format_skipped_token_trivia(self.token)])?;
f.write_fmt(Arguments::from(&self.content))
}
}
/// Formats the given token only if the group does break and otherwise retains the token's skipped token trivia.
pub fn format_only_if_breaks<'a, 'content, L, Content, Context>(
token: &'a SyntaxToken<L>,
content: &'content Content,
) -> FormatOnlyIfBreaks<'a, 'content, L, Context>
where
L: Language,
Content: Format<Context>,
{
FormatOnlyIfBreaks {
token,
content: Argument::new(content),
group_id: None,
}
}
/// Formats a token with its skipped token trivia that only gets printed if its enclosing
/// group does break but otherwise gets omitted from the formatted output.
pub struct FormatOnlyIfBreaks<'a, 'content, L, C>
where
L: Language,
{
token: &'a SyntaxToken<L>,
content: Argument<'content, C>,
group_id: Option<GroupId>,
}
impl<'a, 'content, L, C> FormatOnlyIfBreaks<'a, 'content, L, C>
where
L: Language,
{
pub fn with_group_id(mut self, group_id: Option<GroupId>) -> Self {
self.group_id = group_id;
self
}
}
impl<L, C> Format<C> for FormatOnlyIfBreaks<'_, '_, L, C>
where
L: Language + 'static,
C: CstFormatContext<Language = L>,
{
fn fmt(&self, f: &mut Formatter<C>) -> FormatResult<()> {
write!(
f,
[if_group_breaks(&Arguments::from(&self.content)).with_group_id(self.group_id),]
)?;
if f.comments().has_skipped(self.token) {
// Print the trivia otherwise
write!(
f,
[
if_group_fits_on_line(&format_skipped_token_trivia(self.token))
.with_group_id(self.group_id)
]
)?;
}
Ok(())
}
}
/// Formats the skipped token trivia of `token`.
pub const fn format_skipped_token_trivia<L: Language>(
token: &SyntaxToken<L>,
) -> FormatSkippedTokenTrivia<L> {
FormatSkippedTokenTrivia { token }
}
/// Formats the skipped token trivia of `token`.
pub struct FormatSkippedTokenTrivia<'a, L: Language> {
token: &'a SyntaxToken<L>,
}
impl<L: Language> FormatSkippedTokenTrivia<'_, L> {
#[cold]
fn fmt_skipped<Context>(&self, f: &mut Formatter<Context>) -> FormatResult<()>
where
Context: CstFormatContext<Language = L>,
{
// Lines/spaces before the next token/comment
let (mut lines, mut spaces) = match self.token.prev_token() {
Some(token) => {
let mut lines = 0u32;
let mut spaces = 0u32;
for piece in token.trailing_trivia().pieces().rev() {
if piece.is_whitespace() {
spaces += 1;
} else if piece.is_newline() {
spaces = 0;
lines += 1;
} else {
break;
}
}
(lines, spaces)
}
None => (0, 0),
};
// The comments between the last skipped token trivia and the token
let mut dangling_comments = Vec::new();
let mut skipped_range: Option<TextRange> = None;
// Iterate over the remaining pieces to find the full range from the first to the last skipped token trivia.
// Extract the comments between the last skipped token trivia and the token.
for piece in self.token.leading_trivia().pieces() {
if piece.is_whitespace() {
spaces += 1;
continue;
}
if piece.is_newline() {
lines += 1;
spaces = 0;
} else if let Some(comment) = piece.as_comments() {
let source_comment = SourceComment {
kind: Context::Style::get_comment_kind(&comment),
lines_before: lines,
lines_after: 0,
piece: comment,
#[cfg(debug_assertions)]
formatted: Cell::new(true),
};
dangling_comments.push(source_comment);
lines = 0;
spaces = 0;
} else if piece.is_skipped() {
skipped_range = Some(match skipped_range {
Some(range) => range.cover(piece.text_range()),
None => {
if dangling_comments.is_empty() {
match lines {
0 if spaces == 0 => {
// Token had no space to previous token nor any preceding comment. Keep it that way
}
0 => write!(f, [space()])?,
_ => write!(f, [hard_line_break()])?,
};
} else {
match lines {
0 => write!(f, [space()])?,
1 => write!(f, [hard_line_break()])?,
_ => write!(f, [empty_line()])?,
};
}
piece.text_range()
}
});
lines = 0;
spaces = 0;
dangling_comments.clear();
}
}
let skipped_range =
skipped_range.unwrap_or_else(|| TextRange::empty(self.token.text_range().start()));
f.write_element(FormatElement::Tag(Tag::StartVerbatim(
VerbatimKind::Verbatim {
length: skipped_range.len(),
},
)))?;
write!(f, [syntax_token_text_slice(self.token, skipped_range)])?;
f.write_element(FormatElement::Tag(Tag::EndVerbatim))?;
// Write whitespace separator between skipped/last comment and token
if dangling_comments.is_empty() {
match lines {
0 if spaces == 0 => {
// Don't write a space if there was non in the source document
Ok(())
}
0 => write!(f, [space()]),
_ => write!(f, [hard_line_break()]),
}
} else {
match dangling_comments.first().unwrap().lines_before {
0 => write!(f, [space()])?,
1 => write!(f, [hard_line_break()])?,
_ => write!(f, [empty_line()])?,
}
write!(
f,
[FormatDanglingComments::Comments {
comments: &dangling_comments,
indent: DanglingIndentMode::None
}]
)?;
match lines {
0 => write!(f, [space()]),
_ => write!(f, [hard_line_break()]),
}
}
}
}
impl<Context> Format<Context> for FormatSkippedTokenTrivia<'_, Context::Language>
where
Context: CstFormatContext,
{
fn fmt(&self, f: &mut Formatter<Context>) -> FormatResult<()> {
if f.comments().has_skipped(self.token) {
self.fmt_skipped(f)
} else {
Ok(())
}
}
}

View File

@ -0,0 +1,7 @@
#[cfg(target_pointer_width = "64")]
#[macro_export]
macro_rules! static_assert {
($expr:expr) => {
const _: i32 = 0 / $expr as i32;
};
}

View File

@ -1,205 +0,0 @@
use crate::format_element::tag::VerbatimKind;
use crate::prelude::*;
use crate::trivia::{FormatLeadingComments, FormatTrailingComments};
use crate::{write, CstFormatContext, FormatWithRule};
use ruff_rowan::{AstNode, Direction, Language, SyntaxElement, SyntaxNode, TextRange};
/// "Formats" a node according to its original formatting in the source text. Being able to format
/// a node "as is" is useful if a node contains syntax errors. Formatting a node with syntax errors
/// has the risk that Rome misinterprets the structure of the code and formatting it could
/// "mess up" the developers, yet incomplete, work or accidentally introduce new syntax errors.
///
/// You may be inclined to call `node.text` directly. However, using `text` doesn't track the nodes
/// nor its children source mapping information, resulting in incorrect source maps for this subtree.
///
/// These nodes and tokens get tracked as [VerbatimKind::Verbatim], useful to understand
/// if these nodes still need to have their own implementation.
pub fn format_verbatim_node<L: Language>(node: &SyntaxNode<L>) -> FormatVerbatimNode<L> {
FormatVerbatimNode {
node,
kind: VerbatimKind::Verbatim {
length: node.text_range().len(),
},
format_comments: true,
}
}
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub struct FormatVerbatimNode<'node, L: Language> {
node: &'node SyntaxNode<L>,
kind: VerbatimKind,
format_comments: bool,
}
impl<Context> Format<Context> for FormatVerbatimNode<'_, Context::Language>
where
Context: CstFormatContext,
{
fn fmt(&self, f: &mut Formatter<Context>) -> FormatResult<()> {
for element in self.node.descendants_with_tokens(Direction::Next) {
match element {
SyntaxElement::Token(token) => f.state_mut().track_token(&token),
SyntaxElement::Node(node) => {
let comments = f.context().comments();
comments.mark_suppression_checked(&node);
for comment in comments.leading_dangling_trailing_comments(&node) {
comment.mark_formatted();
}
}
}
}
// The trimmed range of a node is its range without any of its leading or trailing trivia.
// Except for nodes that used to be parenthesized, the range than covers the source from the
// `(` to the `)` (the trimmed range of the parenthesized expression, not the inner expression)
let trimmed_source_range = f.context().source_map().map_or_else(
|| self.node.text_trimmed_range(),
|source_map| source_map.trimmed_source_range(self.node),
);
f.write_element(FormatElement::Tag(Tag::StartVerbatim(self.kind)))?;
fn source_range<Context>(f: &Formatter<Context>, range: TextRange) -> TextRange
where
Context: CstFormatContext,
{
f.context()
.source_map()
.map_or_else(|| range, |source_map| source_map.source_range(range))
}
// Format all leading comments that are outside of the node's source range.
if self.format_comments {
let comments = f.context().comments().clone();
let leading_comments = comments.leading_comments(self.node);
let outside_trimmed_range = leading_comments.partition_point(|comment| {
comment.piece().text_range().end() <= trimmed_source_range.start()
});
let (outside_trimmed_range, in_trimmed_range) =
leading_comments.split_at(outside_trimmed_range);
write!(f, [FormatLeadingComments::Comments(outside_trimmed_range)])?;
for comment in in_trimmed_range {
comment.mark_formatted();
}
}
// Find the first skipped token trivia, if any, and include it in the verbatim range because
// the comments only format **up to** but not including skipped token trivia.
let start_source = self
.node
.first_leading_trivia()
.into_iter()
.flat_map(|trivia| trivia.pieces())
.filter(|trivia| trivia.is_skipped())
.map(|trivia| source_range(f, trivia.text_range()).start())
.take_while(|start| *start < trimmed_source_range.start())
.next()
.unwrap_or_else(|| trimmed_source_range.start());
let original_source = f.context().source_map().map_or_else(
|| self.node.text_trimmed().to_string(),
|source_map| {
source_map.text()[trimmed_source_range.cover_offset(start_source)].to_string()
},
);
dynamic_text(
&normalize_newlines(&original_source, LINE_TERMINATORS),
self.node.text_trimmed_range().start(),
)
.fmt(f)?;
for comment in f.context().comments().dangling_comments(self.node) {
comment.mark_formatted();
}
// Format all trailing comments that are outside of the trimmed range.
if self.format_comments {
let comments = f.context().comments().clone();
let trailing_comments = comments.trailing_comments(self.node);
let outside_trimmed_range_start = trailing_comments.partition_point(|comment| {
source_range(f, comment.piece().text_range()).end() <= trimmed_source_range.end()
});
let (in_trimmed_range, outside_trimmed_range) =
trailing_comments.split_at(outside_trimmed_range_start);
for comment in in_trimmed_range {
comment.mark_formatted();
}
write!(f, [FormatTrailingComments::Comments(outside_trimmed_range)])?;
}
f.write_element(FormatElement::Tag(Tag::EndVerbatim))
}
}
impl<L: Language> FormatVerbatimNode<'_, L> {
pub fn skip_comments(mut self) -> Self {
self.format_comments = false;
self
}
}
/// Formats bogus nodes. The difference between this method and `format_verbatim` is that this method
/// doesn't track nodes/tokens as [VerbatimKind::Verbatim]. They are just printed as they are.
pub fn format_bogus_node<L: Language>(node: &SyntaxNode<L>) -> FormatVerbatimNode<L> {
FormatVerbatimNode {
node,
kind: VerbatimKind::Bogus,
format_comments: true,
}
}
/// Format a node having formatter suppression comment applied to it
pub fn format_suppressed_node<L: Language>(node: &SyntaxNode<L>) -> FormatVerbatimNode<L> {
FormatVerbatimNode {
node,
kind: VerbatimKind::Suppressed,
format_comments: true,
}
}
/// Formats an object using its [`Format`] implementation but falls back to printing the object as
/// it is in the source document if formatting it returns an [`FormatError::SyntaxError`].
pub const fn format_or_verbatim<F>(inner: F) -> FormatNodeOrVerbatim<F> {
FormatNodeOrVerbatim { inner }
}
/// Formats a node or falls back to verbatim printing if formatting this node fails.
#[derive(Copy, Clone)]
pub struct FormatNodeOrVerbatim<F> {
inner: F,
}
impl<F, Context, Item> Format<Context> for FormatNodeOrVerbatim<F>
where
F: FormatWithRule<Context, Item = Item>,
Item: AstNode,
Context: CstFormatContext<Language = Item::Language>,
{
fn fmt(&self, f: &mut Formatter<Context>) -> FormatResult<()> {
let snapshot = Formatter::state_snapshot(f);
match self.inner.fmt(f) {
Ok(result) => Ok(result),
Err(FormatError::SyntaxError) => {
f.restore_state_snapshot(snapshot);
// Lists that yield errors are formatted as they were suppressed nodes.
// Doing so, the formatter formats the nodes/tokens as is.
format_suppressed_node(self.inner.item().syntax()).fmt(f)
}
Err(err) => Err(err),
}
}
}

View File

@ -1,29 +0,0 @@
[package]
name = "ruff_rowan"
version = "0.0.0"
publish = false
edition = "2021"
[dependencies]
countme = { version = "3.0.1" }
hashbrown = { version = "0.12.3", features = ["inline-more"], default-features = false }
memoffset = { version = "0.6.5" }
ruff_text_edit = { path = "../ruff_text_edit" }
ruff_text_size = { path = "../ruff_text_size" }
rustc-hash = { workspace = true }
schemars = { version = "0.8.10", optional = true }
serde = { version = "1.0.133", optional = true, default-features = false }
tracing = { version = "0.1.31", default-features = false, features = ["std"] }
[dev-dependencies]
quickcheck = "1.0.3"
quickcheck_macros = "1.0.0"
serde_json = "1.0.79"
iai = "*"
[features]
serde = ["dep:serde", "schemars", "ruff_text_size/serde"]
[[bench]]
name = "mutation"
harness = false

View File

@ -1,69 +0,0 @@
use ruff_rowan::{
raw_language::{LiteralExpression, RawLanguageKind, RawLanguageRoot, RawSyntaxTreeBuilder},
AstNode, AstNodeExt, BatchMutationExt, SyntaxNodeCast,
};
/// ```
/// 0: ROOT@0..1
/// 0: LITERAL_EXPRESSION@0..1
/// 0: STRING_TOKEN@0..1 "a" [] []
/// ```
fn tree_one(a: &str) -> (RawLanguageRoot, String) {
let mut builder = RawSyntaxTreeBuilder::new();
builder
.start_node(RawLanguageKind::ROOT)
.start_node(RawLanguageKind::LITERAL_EXPRESSION)
.token(RawLanguageKind::STRING_TOKEN, a)
.finish_node()
.finish_node();
let root = builder.finish().cast::<RawLanguageRoot>().unwrap();
let s = format!("{:#?}", root.syntax());
(root, s)
}
fn find(root: &RawLanguageRoot, name: &str) -> LiteralExpression {
root.syntax()
.descendants()
.find(|x| x.kind() == RawLanguageKind::LITERAL_EXPRESSION && x.text_trimmed() == name)
.unwrap()
.cast::<LiteralExpression>()
.unwrap()
}
fn clone_detach(root: &RawLanguageRoot, name: &str) -> LiteralExpression {
root.syntax()
.descendants()
.find(|x| x.kind() == RawLanguageKind::LITERAL_EXPRESSION && x.text_trimmed() == name)
.unwrap()
.detach()
.cast::<LiteralExpression>()
.unwrap()
}
fn mutation_replace_node() -> usize {
let (before, _) = tree_one("a");
let (expected, _) = tree_one("b");
let a = find(&before, "a");
let b = clone_detach(&expected, "b");
let root = before.replace_node(a, b).unwrap();
root.syntax().descendants().count()
}
fn mutation_batch() -> usize {
let (before, _) = tree_one("a");
let (expected, _) = tree_one("b");
let a = find(&before, "a");
let b = clone_detach(&expected, "b");
let mut batch = before.begin();
batch.replace_node(a, b);
let root = batch.commit();
root.descendants().count()
}
iai::main!(mutation_replace_node, mutation_batch);

View File

@ -1,497 +0,0 @@
//! Vendored and stripped down version of triomphe
use std::{
alloc::{self, Layout},
cmp::Ordering,
hash::{Hash, Hasher},
marker::PhantomData,
mem::{self, ManuallyDrop},
ops::Deref,
ptr,
sync::atomic::{
self,
Ordering::{Acquire, Relaxed, Release},
},
};
use memoffset::offset_of;
/// A soft limit on the amount of references that may be made to an `Arc`.
///
/// Going above this limit will abort your program (although not
/// necessarily) at _exactly_ `MAX_REFCOUNT + 1` references.
const MAX_REFCOUNT: usize = (isize::MAX) as usize;
/// The object allocated by an Arc<T>
#[repr(C)]
pub(crate) struct ArcInner<T: ?Sized> {
pub(crate) count: atomic::AtomicUsize,
pub(crate) data: T,
}
unsafe impl<T: ?Sized + Sync + Send> Send for ArcInner<T> {}
unsafe impl<T: ?Sized + Sync + Send> Sync for ArcInner<T> {}
/// An atomically reference counted shared pointer
///
/// See the documentation for [`Arc`] in the standard library. Unlike the
/// standard library `Arc`, this `Arc` does not support weak reference counting.
///
/// [`Arc`]: https://doc.rust-lang.org/stable/std/sync/struct.Arc.html
#[repr(transparent)]
pub(crate) struct Arc<T: ?Sized> {
pub(crate) p: ptr::NonNull<ArcInner<T>>,
pub(crate) phantom: PhantomData<T>,
}
unsafe impl<T: ?Sized + Sync + Send> Send for Arc<T> {}
unsafe impl<T: ?Sized + Sync + Send> Sync for Arc<T> {}
impl<T> Arc<T> {
/// Reconstruct the Arc<T> from a raw pointer obtained from into_raw()
///
/// Note: This raw pointer will be offset in the allocation and must be preceded
/// by the atomic count.
///
/// It is recommended to use OffsetArc for this
#[inline]
pub(crate) unsafe fn from_raw(ptr: *const T) -> Self {
// To find the corresponding pointer to the `ArcInner` we need
// to subtract the offset of the `data` field from the pointer.
let ptr = (ptr as *const u8).sub(offset_of!(ArcInner<T>, data));
Arc {
p: ptr::NonNull::new_unchecked(ptr as *mut ArcInner<T>),
phantom: PhantomData,
}
}
}
impl<T: ?Sized> Arc<T> {
#[inline]
fn inner(&self) -> &ArcInner<T> {
// This unsafety is ok because while this arc is alive we're guaranteed
// that the inner pointer is valid. Furthermore, we know that the
// `ArcInner` structure itself is `Sync` because the inner data is
// `Sync` as well, so we're ok loaning out an immutable pointer to these
// contents.
unsafe { &*self.ptr() }
}
// Non-inlined part of `drop`. Just invokes the destructor.
#[inline(never)]
unsafe fn drop_slow(&mut self) {
let _ = Box::from_raw(self.ptr());
}
/// Test pointer equality between the two Arcs, i.e. they must be the _same_
/// allocation
#[inline]
pub(crate) fn ptr_eq(this: &Self, other: &Self) -> bool {
this.ptr() == other.ptr()
}
pub(crate) fn ptr(&self) -> *mut ArcInner<T> {
self.p.as_ptr()
}
}
impl<T: ?Sized> Clone for Arc<T> {
#[inline]
fn clone(&self) -> Self {
// Using a relaxed ordering is alright here, as knowledge of the
// original reference prevents other threads from erroneously deleting
// the object.
//
// As explained in the [Boost documentation][1], Increasing the
// reference counter can always be done with memory_order_relaxed: New
// references to an object can only be formed from an existing
// reference, and passing an existing reference from one thread to
// another must already provide any required synchronization.
//
// [1]: (www.boost.org/doc/libs/1_55_0/doc/html/atomic/usage_examples.html)
let old_size = self.inner().count.fetch_add(1, Relaxed);
// However we need to guard against massive refcounts in case someone
// is `mem::forget`ing Arcs. If we don't do this the count can overflow
// and users will use-after free. We racily saturate to `isize::MAX` on
// the assumption that there aren't ~2 billion threads incrementing
// the reference count at once. This branch will never be taken in
// any realistic program.
//
// We abort because such a program is incredibly degenerate, and we
// don't care to support it.
if old_size > MAX_REFCOUNT {
std::process::abort();
}
unsafe {
Arc {
p: ptr::NonNull::new_unchecked(self.ptr()),
phantom: PhantomData,
}
}
}
}
impl<T: ?Sized> Deref for Arc<T> {
type Target = T;
#[inline]
fn deref(&self) -> &T {
&self.inner().data
}
}
impl<T: ?Sized> Arc<T> {
/// Provides mutable access to the contents _if_ the `Arc` is uniquely owned.
#[inline]
pub(crate) fn get_mut(this: &mut Self) -> Option<&mut T> {
if this.is_unique() {
unsafe {
// See make_mut() for documentation of the threadsafety here.
Some(&mut (*this.ptr()).data)
}
} else {
None
}
}
/// Whether or not the `Arc` is uniquely owned (is the refcount 1?).
pub(crate) fn is_unique(&self) -> bool {
// See the extensive discussion in [1] for why this needs to be Acquire.
//
// [1] https://github.com/servo/servo/issues/21186
self.inner().count.load(Acquire) == 1
}
}
impl<T: ?Sized> Drop for Arc<T> {
#[inline]
fn drop(&mut self) {
// Because `fetch_sub` is already atomic, we do not need to synchronize
// with other threads unless we are going to delete the object.
if self.inner().count.fetch_sub(1, Release) != 1 {
return;
}
// FIXME(bholley): Use the updated comment when [2] is merged.
//
// This load is needed to prevent reordering of use of the data and
// deletion of the data. Because it is marked `Release`, the decreasing
// of the reference count synchronizes with this `Acquire` load. This
// means that use of the data happens before decreasing the reference
// count, which happens before this load, which happens before the
// deletion of the data.
//
// As explained in the [Boost documentation][1],
//
// > It is important to enforce any possible access to the object in one
// > thread (through an existing reference) to *happen before* deleting
// > the object in a different thread. This is achieved by a "release"
// > operation after dropping a reference (any access to the object
// > through this reference must obviously happened before), and an
// > "acquire" operation before deleting the object.
//
// [1]: (www.boost.org/doc/libs/1_55_0/doc/html/atomic/usage_examples.html)
// [2]: https://github.com/rust-lang/rust/pull/41714
self.inner().count.load(Acquire);
unsafe {
self.drop_slow();
}
}
}
impl<T: ?Sized + PartialEq> PartialEq for Arc<T> {
fn eq(&self, other: &Arc<T>) -> bool {
Self::ptr_eq(self, other) || *(*self) == *(*other)
}
}
impl<T: ?Sized + PartialOrd> PartialOrd for Arc<T> {
fn partial_cmp(&self, other: &Arc<T>) -> Option<Ordering> {
(**self).partial_cmp(&**other)
}
fn lt(&self, other: &Arc<T>) -> bool {
*(*self) < *(*other)
}
fn le(&self, other: &Arc<T>) -> bool {
*(*self) <= *(*other)
}
fn gt(&self, other: &Arc<T>) -> bool {
*(*self) > *(*other)
}
fn ge(&self, other: &Arc<T>) -> bool {
*(*self) >= *(*other)
}
}
impl<T: ?Sized + Ord> Ord for Arc<T> {
fn cmp(&self, other: &Arc<T>) -> Ordering {
(**self).cmp(&**other)
}
}
impl<T: ?Sized + Eq> Eq for Arc<T> {}
impl<T: ?Sized + Hash> Hash for Arc<T> {
fn hash<H: Hasher>(&self, state: &mut H) {
(**self).hash(state)
}
}
#[derive(Debug, Eq, PartialEq, Hash, PartialOrd)]
#[repr(C)]
pub(crate) struct HeaderSlice<H, T: ?Sized> {
pub(crate) header: H,
length: usize,
slice: T,
}
impl<H, T> HeaderSlice<H, [T]> {
pub(crate) fn slice(&self) -> &[T] {
&self.slice
}
/// Returns the number of items
pub(crate) fn len(&self) -> usize {
self.length
}
}
impl<H, T> Deref for HeaderSlice<H, [T; 0]> {
type Target = HeaderSlice<H, [T]>;
fn deref(&self) -> &Self::Target {
unsafe {
let len = self.length;
let fake_slice: *const [T] =
ptr::slice_from_raw_parts(self as *const _ as *const T, len);
&*(fake_slice as *const HeaderSlice<H, [T]>)
}
}
}
/// A "thin" `Arc` containing dynamically sized data
///
/// This is functionally equivalent to `Arc<(H, [T])>`
///
/// When you create an `Arc` containing a dynamically sized type
/// like `HeaderSlice<H, [T]>`, the `Arc` is represented on the stack
/// as a "fat pointer", where the length of the slice is stored
/// alongside the `Arc`'s pointer. In some situations you may wish to
/// have a thin pointer instead, perhaps for FFI compatibility
/// or space efficiency.
///
/// Note that we use `[T; 0]` in order to have the right alignment for `T`.
///
/// `ThinArc` solves this by storing the length in the allocation itself,
/// via `HeaderSlice`.
#[repr(transparent)]
pub(crate) struct ThinArc<H, T> {
ptr: ptr::NonNull<ArcInner<HeaderSlice<H, [T; 0]>>>,
phantom: PhantomData<(H, T)>,
}
unsafe impl<H: Sync + Send, T: Sync + Send> Send for ThinArc<H, T> {}
unsafe impl<H: Sync + Send, T: Sync + Send> Sync for ThinArc<H, T> {}
// Synthesize a fat pointer from a thin pointer.
fn thin_to_thick<H, T>(
thin: *mut ArcInner<HeaderSlice<H, [T; 0]>>,
) -> *mut ArcInner<HeaderSlice<H, [T]>> {
let len = unsafe { (*thin).data.length };
let fake_slice: *mut [T] = ptr::slice_from_raw_parts_mut(thin as *mut T, len);
// Transplants metadata.
fake_slice as *mut ArcInner<HeaderSlice<H, [T]>>
}
impl<H, T> ThinArc<H, T> {
/// Temporarily converts |self| into a bonafide Arc and exposes it to the
/// provided callback. The refcount is not modified.
#[inline]
pub(crate) fn with_arc<F, U>(&self, f: F) -> U
where
F: FnOnce(&Arc<HeaderSlice<H, [T]>>) -> U,
{
// Synthesize transient Arc, which never touches the refcount of the ArcInner.
let transient = unsafe {
ManuallyDrop::new(Arc {
p: ptr::NonNull::new_unchecked(thin_to_thick(self.ptr.as_ptr())),
phantom: PhantomData,
})
};
// Expose the transient Arc to the callback, which may clone it if it wants.
// Forward the result.
f(&transient)
}
/// Creates a `ThinArc` for a HeaderSlice using the given header struct and
/// iterator to generate the slice.
pub(crate) fn from_header_and_iter<I>(header: H, mut items: I) -> Self
where
I: Iterator<Item = T> + ExactSizeIterator,
{
assert_ne!(mem::size_of::<T>(), 0, "Need to think about ZST");
let num_items = items.len();
// Offset of the start of the slice in the allocation.
let inner_to_data_offset = offset_of!(ArcInner<HeaderSlice<H, [T; 0]>>, data);
let data_to_slice_offset = offset_of!(HeaderSlice<H, [T; 0]>, slice);
let slice_offset = inner_to_data_offset + data_to_slice_offset;
// Compute the size of the real payload.
let slice_size = mem::size_of::<T>()
.checked_mul(num_items)
.expect("size overflows");
let usable_size = slice_offset
.checked_add(slice_size)
.expect("size overflows");
// Round up size to alignment.
let align = mem::align_of::<ArcInner<HeaderSlice<H, [T; 0]>>>();
let size = usable_size.wrapping_add(align - 1) & !(align - 1);
assert!(size >= usable_size, "size overflows");
let layout = Layout::from_size_align(size, align).expect("invalid layout");
let ptr: *mut ArcInner<HeaderSlice<H, [T; 0]>>;
unsafe {
let buffer = alloc::alloc(layout);
if buffer.is_null() {
alloc::handle_alloc_error(layout);
}
// // Synthesize the fat pointer. We do this by claiming we have a direct
// // pointer to a [T], and then changing the type of the borrow. The key
// // point here is that the length portion of the fat pointer applies
// // only to the number of elements in the dynamically-sized portion of
// // the type, so the value will be the same whether it points to a [T]
// // or something else with a [T] as its last member.
// let fake_slice: &mut [T] = slice::from_raw_parts_mut(buffer as *mut T, num_items);
// ptr = fake_slice as *mut [T] as *mut ArcInner<HeaderSlice<H, [T]>>;
ptr = buffer as *mut _;
let count = atomic::AtomicUsize::new(1);
// Write the data.
//
// Note that any panics here (i.e. from the iterator) are safe, since
// we'll just leak the uninitialized memory.
ptr::write(ptr::addr_of_mut!((*ptr).count), count);
ptr::write(ptr::addr_of_mut!((*ptr).data.header), header);
ptr::write(ptr::addr_of_mut!((*ptr).data.length), num_items);
if num_items != 0 {
let mut current = ptr::addr_of_mut!((*ptr).data.slice) as *mut T;
debug_assert_eq!(current as usize - buffer as usize, slice_offset);
for _ in 0..num_items {
ptr::write(
current,
items
.next()
.expect("ExactSizeIterator over-reported length"),
);
current = current.offset(1);
}
assert!(
items.next().is_none(),
"ExactSizeIterator under-reported length"
);
// We should have consumed the buffer exactly.
debug_assert_eq!(current as *mut u8, buffer.add(usable_size));
}
assert!(
items.next().is_none(),
"ExactSizeIterator under-reported length"
);
}
ThinArc {
ptr: unsafe { ptr::NonNull::new_unchecked(ptr) },
phantom: PhantomData,
}
}
}
impl<H, T> Deref for ThinArc<H, T> {
type Target = HeaderSlice<H, [T]>;
#[inline]
fn deref(&self) -> &Self::Target {
unsafe { &(*thin_to_thick(self.ptr.as_ptr())).data }
}
}
impl<H, T> Clone for ThinArc<H, T> {
#[inline]
fn clone(&self) -> Self {
ThinArc::with_arc(self, |a| Arc::into_thin(a.clone()))
}
}
impl<H, T> Drop for ThinArc<H, T> {
#[inline]
fn drop(&mut self) {
let _ = Arc::from_thin(ThinArc {
ptr: self.ptr,
phantom: PhantomData,
});
}
}
impl<H, T> Arc<HeaderSlice<H, [T]>> {
/// Converts an `Arc` into a `ThinArc`. This consumes the `Arc`, so the refcount
/// is not modified.
#[inline]
pub(crate) fn into_thin(a: Self) -> ThinArc<H, T> {
assert_eq!(
a.length,
a.slice.len(),
"Length needs to be correct for ThinArc to work"
);
let fat_ptr: *mut ArcInner<HeaderSlice<H, [T]>> = a.ptr();
mem::forget(a);
let thin_ptr = fat_ptr as *mut [usize] as *mut usize;
ThinArc {
ptr: unsafe {
ptr::NonNull::new_unchecked(thin_ptr as *mut ArcInner<HeaderSlice<H, [T; 0]>>)
},
phantom: PhantomData,
}
}
/// Converts a `ThinArc` into an `Arc`. This consumes the `ThinArc`, so the refcount
/// is not modified.
#[inline]
pub(crate) fn from_thin(a: ThinArc<H, T>) -> Self {
let ptr = thin_to_thick(a.ptr.as_ptr());
mem::forget(a);
unsafe {
Arc {
p: ptr::NonNull::new_unchecked(ptr),
phantom: PhantomData,
}
}
}
}
impl<H: PartialEq, T: PartialEq> PartialEq for ThinArc<H, T> {
#[inline]
fn eq(&self, other: &ThinArc<H, T>) -> bool {
**self == **other
}
}
impl<H: Eq, T: Eq> Eq for ThinArc<H, T> {}
impl<H: Hash, T: Hash> Hash for ThinArc<H, T> {
fn hash<HSR: Hasher>(&self, state: &mut HSR) {
(**self).hash(state)
}
}

View File

@ -1,550 +0,0 @@
use crate::{
chain_trivia_pieces, AstNode, Language, SyntaxElement, SyntaxKind, SyntaxNode, SyntaxSlot,
SyntaxToken,
};
use ruff_text_edit::TextEdit;
use ruff_text_size::TextRange;
use std::{
cmp,
collections::BinaryHeap,
iter::{empty, once},
};
use tracing::debug;
pub trait BatchMutationExt<L>: AstNode<Language = L>
where
L: Language,
{
/// It starts a [BatchMutation]
#[must_use = "This method consumes the node and return the BatchMutation api that returns the new SynytaxNode on commit"]
fn begin(self) -> BatchMutation<L>;
}
impl<L, T> BatchMutationExt<L> for T
where
L: Language,
T: AstNode<Language = L>,
{
#[must_use = "This method consumes the node and return the BatchMutation api that returns the new SynytaxNode on commit"]
fn begin(self) -> BatchMutation<L> {
BatchMutation::new(self.into_syntax())
}
}
/// Stores the changes internally used by the [BatchMutation::commit] algorithm.
/// It needs to be sorted by depth in decreasing order, then by range start and
/// by slot in increasing order.
///
/// This is necesasry so we can aggregate all changes to the same node using "peek".
#[derive(Debug, Clone)]
struct CommitChange<L: Language> {
parent_depth: usize,
parent: Option<SyntaxNode<L>>,
parent_range: Option<(u32, u32)>,
new_node_slot: usize,
new_node: Option<SyntaxElement<L>>,
}
impl<L: Language> CommitChange<L> {
/// Returns the "ordering key" for a change, controlling in what order this
/// change will be applied relatively to other changes. The key consists of
/// a tuple of numeric values representing the depth, parent start and slot
/// of the corresponding change
fn key(&self) -> (usize, cmp::Reverse<u32>, cmp::Reverse<usize>) {
(
self.parent_depth,
cmp::Reverse(self.parent_range.map(|(start, _)| start).unwrap_or(0)),
cmp::Reverse(self.new_node_slot),
)
}
}
impl<L: Language> PartialEq for CommitChange<L> {
fn eq(&self, other: &Self) -> bool {
self.key() == other.key()
}
}
impl<L: Language> Eq for CommitChange<L> {}
/// We order first by depth. Then by the range of the node.
///
/// The first is important to guarantee that all nodes that will be changed
/// in the future are still valid with using SyntaxNode that we have.
///
/// The second is important to guarante that the ".peek()" we do below is sufficient
/// to see the same node in case of two or more nodes having the same depth.
impl<L: Language> PartialOrd for CommitChange<L> {
fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
Some(self.cmp(other))
}
}
impl<L: Language> Ord for CommitChange<L> {
fn cmp(&self, other: &Self) -> cmp::Ordering {
self.key().cmp(&other.key())
}
}
#[derive(Debug, Clone)]
pub struct BatchMutation<L>
where
L: Language,
{
root: SyntaxNode<L>,
changes: BinaryHeap<CommitChange<L>>,
}
impl<L> BatchMutation<L>
where
L: Language,
{
pub fn new(root: SyntaxNode<L>) -> Self {
Self {
root,
changes: BinaryHeap::new(),
}
}
/// Push a change to replace the "prev_node" with "next_node".
/// Trivia from "prev_node" is automatically copied to "next_node".
///
/// Changes to take effect must be committed.
pub fn replace_node<T>(&mut self, prev_node: T, next_node: T)
where
T: AstNode<Language = L>,
{
self.replace_element(
prev_node.into_syntax().into(),
next_node.into_syntax().into(),
)
}
/// Push a change to replace the "prev_token" with "next_token".
/// Trivia from "prev_token" is automatically copied to "next_token".
///
/// Changes to take effect must be committed.
pub fn replace_token(&mut self, prev_token: SyntaxToken<L>, next_token: SyntaxToken<L>) {
self.replace_element(prev_token.into(), next_token.into())
}
/// Push a change to replace the "prev_element" with "next_element".
/// Trivia from "prev_element" is automatically copied to "next_element".
///
/// Changes to take effect must be committed.
pub fn replace_element(
&mut self,
prev_element: SyntaxElement<L>,
next_element: SyntaxElement<L>,
) {
let (prev_leading_trivia, prev_trailing_trivia) = match &prev_element {
SyntaxElement::Node(node) => (
node.first_token().map(|token| token.leading_trivia()),
node.last_token().map(|token| token.trailing_trivia()),
),
SyntaxElement::Token(token) => {
(Some(token.leading_trivia()), Some(token.trailing_trivia()))
}
};
let next_element = match next_element {
SyntaxElement::Node(mut node) => {
if let Some(token) = node.first_token() {
let new_token = match prev_leading_trivia {
Some(prev_leading_trivia) => {
token.with_leading_trivia_pieces(prev_leading_trivia.pieces())
}
None => token.with_leading_trivia_pieces(empty()),
};
node = node.replace_child(token.into(), new_token.into()).unwrap();
}
if let Some(token) = node.last_token() {
let new_token = match prev_trailing_trivia {
Some(prev_trailing_trivia) => {
token.with_trailing_trivia_pieces(prev_trailing_trivia.pieces())
}
None => token.with_trailing_trivia_pieces(empty()),
};
node = node.replace_child(token.into(), new_token.into()).unwrap();
}
SyntaxElement::Node(node)
}
SyntaxElement::Token(token) => {
let new_token = match prev_leading_trivia {
Some(prev_leading_trivia) => {
token.with_leading_trivia_pieces(prev_leading_trivia.pieces())
}
None => token.with_leading_trivia_pieces(empty()),
};
let new_token = match prev_trailing_trivia {
Some(prev_trailing_trivia) => {
new_token.with_trailing_trivia_pieces(prev_trailing_trivia.pieces())
}
None => new_token.with_trailing_trivia_pieces(empty()),
};
SyntaxElement::Token(new_token)
}
};
self.push_change(prev_element, Some(next_element))
}
/// Push a change to replace the "prev_node" with "next_node".
///
/// Changes to take effect must be committed.
pub fn replace_node_discard_trivia<T>(&mut self, prev_node: T, next_node: T)
where
T: AstNode<Language = L>,
{
self.replace_element_discard_trivia(
prev_node.into_syntax().into(),
next_node.into_syntax().into(),
)
}
/// Push a change to replace the "prev_token" with "next_token".
///
/// Changes to take effect must be committed.
pub fn replace_token_discard_trivia(
&mut self,
prev_token: SyntaxToken<L>,
next_token: SyntaxToken<L>,
) {
self.replace_element_discard_trivia(prev_token.into(), next_token.into())
}
/// Push a change to replace the "prev_token" with "next_token".
///
/// - leading trivia of `prev_token`
/// - leading trivia of `next_token`
/// - trailing trivia of `prev_token`
/// - trailing trivia of `next_token`
pub fn replace_token_transfer_trivia(
&mut self,
prev_token: SyntaxToken<L>,
next_token: SyntaxToken<L>,
) {
let leading_trivia = chain_trivia_pieces(
prev_token.leading_trivia().pieces(),
next_token.leading_trivia().pieces(),
);
let trailing_trivia = chain_trivia_pieces(
prev_token.trailing_trivia().pieces(),
next_token.trailing_trivia().pieces(),
);
let new_token = next_token
.with_leading_trivia_pieces(leading_trivia)
.with_trailing_trivia_pieces(trailing_trivia);
self.replace_token_discard_trivia(prev_token, new_token)
}
/// Push a change to replace the "prev_element" with "next_element".
///
/// Changes to take effect must be committed.
pub fn replace_element_discard_trivia(
&mut self,
prev_element: SyntaxElement<L>,
next_element: SyntaxElement<L>,
) {
self.push_change(prev_element, Some(next_element))
}
/// Push a change to remove the specified token.
///
/// Changes to take effect must be committed.
pub fn remove_token(&mut self, prev_token: SyntaxToken<L>) {
self.remove_element(prev_token.into())
}
/// Push a change to remove the specified node.
///
/// Changes to take effect must be committed.
pub fn remove_node<T>(&mut self, prev_node: T)
where
T: AstNode<Language = L>,
{
self.remove_element(prev_node.into_syntax().into())
}
/// Push a change to remove the specified element.
///
/// Changes to take effect must be committed.
pub fn remove_element(&mut self, prev_element: SyntaxElement<L>) {
self.push_change(prev_element, None)
}
fn push_change(
&mut self,
prev_element: SyntaxElement<L>,
next_element: Option<SyntaxElement<L>>,
) {
let new_node_slot = prev_element.index();
let parent = prev_element.parent();
let parent_range: Option<(u32, u32)> = parent.as_ref().map(|p| {
let range = p.text_range();
(range.start().into(), range.end().into())
});
let parent_depth = parent.as_ref().map(|p| p.ancestors().count()).unwrap_or(0);
debug!("pushing change...");
self.changes.push(CommitChange {
parent_depth,
parent,
parent_range,
new_node_slot,
new_node: next_element,
});
}
/// Returns the range of the document modified by this mutation along with
/// a list of individual text edits to be performed on the source code, or
/// [None] if the mutation is empty
pub fn as_text_edits(&self) -> Option<(TextRange, TextEdit)> {
let mut range = None;
debug!(" changes {:?}", &self.changes);
for change in &self.changes {
let parent = change.parent.as_ref().unwrap_or(&self.root);
let delete = match parent.slots().nth(change.new_node_slot) {
Some(SyntaxSlot::Node(node)) => node.text_range(),
Some(SyntaxSlot::Token(token)) => token.text_range(),
_ => continue,
};
range = match range {
None => Some(delete),
Some(range) => Some(range.cover(delete)),
};
}
let text_range = range?;
let old = self.root.to_string();
let new = self.clone().commit().to_string();
let text_edit = TextEdit::from_unicode_words(&old, &new);
Some((text_range, text_edit))
}
/// The core of the batch mutation algorithm can be summarized as:
/// 1 - Iterate all requested changes;
/// 2 - Insert them into a heap (priority queue) by depth. Deeper changes are done first;
/// 3 - Loop popping requested changes from the heap, taking the deepest change we have for the moment;
/// 4 - Each requested change has a "parent", an "index" and the "new node" (or None);
/// 5 - Clone the current parent's "parent", the "grandparent";
/// 6 - Detach the current "parent" from the tree;
/// 7 - Replace the old node at "index" at the current "parent" with the current "new node";
/// 8 - Insert into the heap the grandparent as the parent and the current "parent" as the "new node";
///
/// This is the simple case. The algorithm also has a more complex case when to changes have a common ancestor,
/// which can actually be one of the changed nodes.
///
/// To address this case at step 3, when we pop a new change to apply it, we actually aggregate all changes to the current
/// parent together. This is done by the heap because we also sort by node and it's range.
///
pub fn commit(self) -> SyntaxNode<L> {
let BatchMutation { root, mut changes } = self;
// Fill the heap with the requested changes
while let Some(item) = changes.pop() {
// If parent is None, we reached the root
if let Some(current_parent) = item.parent {
// This must be done before the detachment below
// because we need nodes that are still valid in the old tree
let grandparent = current_parent.parent();
let grandparent_range = grandparent.as_ref().map(|g| {
let range = g.text_range();
(range.start().into(), range.end().into())
});
let current_parent_slot = current_parent.index();
// Aggregate all modifications to the current parent
// This works because of the Ord we defined in the [CommitChange] struct
let mut modifications = vec![(item.new_node_slot, item.new_node)];
loop {
if let Some(next_change_parent) = changes.peek().and_then(|i| i.parent.as_ref())
{
if *next_change_parent == current_parent {
// SAFETY: We can .pop().unwrap() because we .peek() above
let next_change = changes.pop().expect("changes.pop");
// If we have two modification to the same slot,
// last write wins
if let Some(last) = modifications.last() {
if last.0 == next_change.new_node_slot {
modifications.pop();
}
}
modifications.push((next_change.new_node_slot, next_change.new_node));
continue;
}
}
break;
}
// Now we detach the current parent, make all the modifications
// and push a pending change to its parent.
let mut current_parent = current_parent.detach();
let is_list = current_parent.kind().is_list();
let mut removed_slots = 0;
for (index, replace_with) in modifications {
debug_assert!(index >= removed_slots);
let index = index.checked_sub(removed_slots)
.unwrap_or_else(|| panic!("cannot replace element in slot {index} with {removed_slots} removed slots"));
current_parent = if is_list && replace_with.is_none() {
removed_slots += 1;
current_parent.clone().splice_slots(index..=index, empty())
} else {
current_parent
.clone()
.splice_slots(index..=index, once(replace_with))
};
}
changes.push(CommitChange {
parent_depth: item.parent_depth - 1,
parent: grandparent,
parent_range: grandparent_range,
new_node_slot: current_parent_slot,
new_node: Some(SyntaxElement::Node(current_parent)),
});
} else {
let root = item
.new_node
.expect("new_node")
.into_node()
.expect("expected root to be a node and not a token");
return root;
}
}
root
}
pub fn root(&self) -> &SyntaxNode<L> {
&self.root
}
}
#[cfg(test)]
pub mod tests {
use crate::{
raw_language::{LiteralExpression, RawLanguageKind, RawLanguageRoot, RawSyntaxTreeBuilder},
AstNode, BatchMutationExt, SyntaxNodeCast,
};
/// ```
/// 0: ROOT@0..1
/// 0: LITERAL_EXPRESSION@0..1
/// 0: STRING_TOKEN@0..1 "a" [] []
/// ```
fn tree_one(a: &str) -> (RawLanguageRoot, String) {
let mut builder = RawSyntaxTreeBuilder::new();
builder
.start_node(RawLanguageKind::ROOT)
.start_node(RawLanguageKind::LITERAL_EXPRESSION)
.token(RawLanguageKind::STRING_TOKEN, a)
.finish_node()
.finish_node();
let root = builder.finish().cast::<RawLanguageRoot>().unwrap();
let s = format!("{:#?}", root.syntax());
(root, s)
}
/// ```
/// 0: ROOT@0..1
/// 0: LITERAL_EXPRESSION@0..1
/// 0: STRING_TOKEN@0..1 "a" [] []
/// 1: LITERAL_EXPRESSION@0..1
/// 0: STRING_TOKEN@0..1 "b" [] []
/// ```
fn tree_two(a: &str, b: &str) -> (RawLanguageRoot, String) {
let mut builder = RawSyntaxTreeBuilder::new();
builder
.start_node(RawLanguageKind::ROOT)
.start_node(RawLanguageKind::LITERAL_EXPRESSION)
.token(RawLanguageKind::STRING_TOKEN, a)
.finish_node()
.start_node(RawLanguageKind::LITERAL_EXPRESSION)
.token(RawLanguageKind::STRING_TOKEN, b)
.finish_node()
.finish_node();
let root = builder.finish().cast::<RawLanguageRoot>().unwrap();
let s = format!("{:#?}", root.syntax());
(root, s)
}
fn find(root: &RawLanguageRoot, name: &str) -> LiteralExpression {
root.syntax()
.descendants()
.find(|x| x.kind() == RawLanguageKind::LITERAL_EXPRESSION && x.text_trimmed() == name)
.unwrap()
.cast::<LiteralExpression>()
.unwrap()
}
fn clone_detach(root: &RawLanguageRoot, name: &str) -> LiteralExpression {
root.syntax()
.descendants()
.find(|x| x.kind() == RawLanguageKind::LITERAL_EXPRESSION && x.text_trimmed() == name)
.unwrap()
.detach()
.cast::<LiteralExpression>()
.unwrap()
}
#[test]
pub fn ok_batch_mutation_no_changes() {
let (before, before_debug) = tree_one("a");
let batch = before.begin();
let after = batch.commit();
assert_eq!(before_debug, format!("{:#?}", after));
}
#[test]
pub fn ok_batch_mutation_one_change() {
let (before, _) = tree_one("a");
let (expected, expected_debug) = tree_one("b");
let a = find(&before, "a");
let b = clone_detach(&expected, "b");
let mut batch = before.begin();
batch.replace_node(a, b);
let root = batch.commit();
assert_eq!(expected_debug, format!("{:#?}", root));
}
#[test]
pub fn ok_batch_mutation_multiple_changes_different_branches() {
let (before, _) = tree_two("a", "b");
let (expected, expected_debug) = tree_two("c", "d");
let a = find(&before, "a");
let b = find(&before, "b");
let c = clone_detach(&expected, "c");
let d = clone_detach(&expected, "d");
let mut batch = before.begin();
batch.replace_node(a, c);
batch.replace_node(b, d);
let after = batch.commit();
assert_eq!(expected_debug, format!("{:#?}", after));
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,228 +0,0 @@
use std::ops;
use crate::{AstNode, AstNodeList, AstSeparatedList, SyntaxToken};
pub trait AstNodeExt: AstNode {
/// Return a new version of this node with the node `prev_node` replaced with `next_node`
///
/// `prev_node` can be a direct child of this node, or an indirect child through any descendant node
///
/// Returns `None` if `prev_node` is not a descendant of this node
fn replace_node_discard_trivia<N>(self, prev_node: N, next_node: N) -> Option<Self>
where
N: AstNode<Language = Self::Language>,
Self: Sized;
/// Return a new version of this node with the node `prev_node` replaced with `next_node`,
/// transferring the leading and trailing trivia of `prev_node` to `next_node`
///
/// `prev_node` can be a direct child of this node, or an indirect child through any descendant node
///
/// Returns `None` if `prev_node` is not a descendant of this node
fn replace_node<N>(self, prev_node: N, next_node: N) -> Option<Self>
where
N: AstNode<Language = Self::Language>,
Self: Sized;
/// Return a new version of this node with the token `prev_token` replaced with `next_token`
///
/// `prev_token` can be a direct child of this node, or an indirect child through any descendant node
///
/// Returns `None` if `prev_token` is not a descendant of this node
fn replace_token_discard_trivia(
self,
prev_token: SyntaxToken<Self::Language>,
next_token: SyntaxToken<Self::Language>,
) -> Option<Self>
where
Self: Sized;
/// Return a new version of this node with the token `prev_token` replaced with `next_token`,
/// transferring the leading and trailing trivia of `prev_token` to `next_token`
///
/// `prev_token` can be a direct child of this node, or an indirect child through any descendant node
///
/// Returns `None` if `prev_token` is not a descendant of this node
fn replace_token(
self,
prev_token: SyntaxToken<Self::Language>,
next_token: SyntaxToken<Self::Language>,
) -> Option<Self>
where
Self: Sized;
fn detach(self) -> Self;
}
impl<T> AstNodeExt for T
where
T: AstNode,
{
fn replace_node_discard_trivia<N>(self, prev_node: N, next_node: N) -> Option<Self>
where
N: AstNode<Language = Self::Language>,
Self: Sized,
{
Some(Self::unwrap_cast(self.into_syntax().replace_child(
prev_node.into_syntax().into(),
next_node.into_syntax().into(),
)?))
}
fn replace_node<N>(self, prev_node: N, mut next_node: N) -> Option<Self>
where
N: AstNode<Language = Self::Language>,
Self: Sized,
{
// Lookup the first token of `prev_node` and `next_node`, and transfer the leading
// trivia of the former to the later
let prev_first = prev_node.syntax().first_token();
let next_first = next_node.syntax().first_token();
if let (Some(prev_first), Some(next_first)) = (prev_first, next_first) {
let pieces: Vec<_> = prev_first.leading_trivia().pieces().collect();
next_node = next_node.replace_token_discard_trivia(
next_first.clone(),
next_first
.with_leading_trivia(pieces.iter().map(|piece| (piece.kind(), piece.text()))),
)?;
}
// Lookup the last token of `prev_node` and `next_node`, and transfer the trailing
// trivia of the former to the later
let prev_last = prev_node.syntax().last_token();
let next_last = next_node.syntax().last_token();
if let (Some(prev_last), Some(next_last)) = (prev_last, next_last) {
next_node = next_node.replace_token_discard_trivia(
next_last.clone(),
next_last.with_trailing_trivia_pieces(prev_last.trailing_trivia().pieces()),
)?;
}
// Call replace node with the modified `next_node`
self.replace_node_discard_trivia(prev_node, next_node)
}
fn replace_token_discard_trivia(
self,
prev_token: SyntaxToken<Self::Language>,
next_token: SyntaxToken<Self::Language>,
) -> Option<Self>
where
Self: Sized,
{
Some(Self::unwrap_cast(
self.into_syntax()
.replace_child(prev_token.into(), next_token.into())?,
))
}
fn replace_token(
self,
prev_token: SyntaxToken<Self::Language>,
next_token: SyntaxToken<Self::Language>,
) -> Option<Self>
where
Self: Sized,
{
let leading_trivia = prev_token.leading_trivia().pieces();
let trailing_trivia = prev_token.trailing_trivia().pieces();
self.replace_token_discard_trivia(
prev_token,
next_token
.with_leading_trivia_pieces(leading_trivia)
.with_trailing_trivia_pieces(trailing_trivia),
)
}
fn detach(self) -> Self {
Self::unwrap_cast(self.into_syntax().detach())
}
}
pub trait AstNodeListExt: AstNodeList {
/// Replace a range of the children of this list with the content of an iterator
fn splice<R, I>(self, range: R, replace_with: I) -> Self
where
Self: AstNode<Language = <Self as AstNodeList>::Language> + Sized,
R: ops::RangeBounds<usize>,
I: IntoIterator<Item = Self::Node>;
}
impl<T> AstNodeListExt for T
where
T: AstNodeList,
{
fn splice<R, I>(self, range: R, replace_with: I) -> Self
where
Self: AstNode<Language = <Self as AstNodeList>::Language> + Sized,
R: ops::RangeBounds<usize>,
I: IntoIterator<Item = Self::Node>,
{
Self::unwrap_cast(
self.into_syntax_list().into_node().splice_slots(
range,
replace_with
.into_iter()
.map(|node| Some(node.into_syntax().into())),
),
)
}
}
pub trait AstSeparatedListExt: AstSeparatedList {
/// Replace a range of the children of this list with the content of an iterator
///
/// Both the range and iterator work on pairs of node and separator token
fn splice<R, I>(self, range: R, replace_with: I) -> Self
where
Self: AstNode<Language = <Self as AstSeparatedList>::Language> + Sized,
R: ops::RangeBounds<usize>,
I: IntoIterator<
Item = (
Self::Node,
Option<SyntaxToken<<Self as AstSeparatedList>::Language>>,
),
>;
}
impl<T> AstSeparatedListExt for T
where
T: AstSeparatedList,
{
fn splice<R, I>(self, range: R, replace_with: I) -> Self
where
Self: AstNode<Language = <Self as AstSeparatedList>::Language> + Sized,
R: ops::RangeBounds<usize>,
I: IntoIterator<
Item = (
Self::Node,
Option<SyntaxToken<<Self as AstSeparatedList>::Language>>,
),
>,
{
let start_bound = match range.start_bound() {
ops::Bound::Included(index) => ops::Bound::Included(*index * 2),
ops::Bound::Excluded(index) => ops::Bound::Excluded(*index * 2),
ops::Bound::Unbounded => ops::Bound::Unbounded,
};
let end_bound = match range.end_bound() {
ops::Bound::Included(index) => ops::Bound::Included(*index * 2),
ops::Bound::Excluded(index) => ops::Bound::Excluded(*index * 2),
ops::Bound::Unbounded => ops::Bound::Unbounded,
};
Self::unwrap_cast(self.into_syntax_list().into_node().splice_slots(
(start_bound, end_bound),
replace_with.into_iter().flat_map(|(node, separator)| {
[
Some(node.into_syntax().into()),
separator.map(|token| token.into()),
]
}),
))
}
}

View File

@ -1,30 +0,0 @@
#[derive(Debug)]
pub(crate) enum CowMut<'a, T> {
Owned(T),
Borrowed(&'a mut T),
}
impl<T> std::ops::Deref for CowMut<'_, T> {
type Target = T;
fn deref(&self) -> &T {
match self {
CowMut::Owned(it) => it,
CowMut::Borrowed(it) => it,
}
}
}
impl<T> std::ops::DerefMut for CowMut<'_, T> {
fn deref_mut(&mut self) -> &mut T {
match self {
CowMut::Owned(it) => it,
CowMut::Borrowed(it) => it,
}
}
}
impl<T: Default> Default for CowMut<'_, T> {
fn default() -> Self {
CowMut::Owned(T::default())
}
}

View File

@ -1,382 +0,0 @@
//! Implementation of the cursors -- API for convenient access to syntax trees.
//!
//! Functional programmers will recognize that this module implements a zipper
//! for a purely functional (green) tree.
//!
//! A cursor node (`SyntaxNode`) points to a `GreenNode` and a parent
//! `SyntaxNode`. This allows cursor to provide iteration over both ancestors
//! and descendants, as well as a cheep access to absolute offset of the node in
//! file.
//!
// Implementation notes:
//
// The implementation is utterly and horribly unsafe. This whole module is an
// unsafety boundary. It is believed that the API here is, in principle, sound,
// but the implementation might have bugs.
//
// The core type is `NodeData` -- a heap-allocated reference counted object,
// which points to a green node or a green token, and to the parent `NodeData`.
// Publicly-exposed `SyntaxNode` and `SyntaxToken` own a reference to
// `NodeData`.
//
// `NodeData`s are transient, and are created and destroyed during tree
// traversals. In general, only currently referenced nodes and their ancestors
// are alive at any given moment.
//
// More specifically, `NodeData`'s ref count is equal to the number of
// outstanding `SyntaxNode` and `SyntaxToken` plus the number of children with
// non-zero ref counts. For example, if the user has only a single `SyntaxNode`
// pointing somewhere in the middle of the tree, then all `NodeData` on the path
// from that point towards the root have ref count equal to one.
//
// `NodeData` which doesn't have a parent (is a root) owns the corresponding
// green node or token, and is responsible for freeing it. For child `NodeData`
// however since they hold a strong reference to their parent node and thus
// to the root, their corresponding green node is guaranteed to be alive as
// a reference cycle to is know to exist (child `NodeData` -> root `NodeData`
// -> root `GreenNode` -> child `GreenNode`) and they can safely use a "weak
// reference" (raw pointer) to the corresponding green node as an optimization
// to avoid having to track atomic references on the traversal hot path
mod element;
mod node;
mod token;
mod trivia;
use std::{iter, ops};
use std::{ptr, rc::Rc};
use countme::Count;
pub(crate) use trivia::{SyntaxTrivia, SyntaxTriviaPiecesIterator};
use crate::cursor::node::Siblings;
pub(crate) use crate::cursor::token::SyntaxToken;
use crate::green::{self, GreenElement, GreenNodeData, GreenTokenData};
use crate::{
green::{GreenElementRef, RawSyntaxKind},
NodeOrToken, TextRange, TextSize,
};
pub(crate) use element::SyntaxElement;
pub(crate) use node::{
Preorder, PreorderWithTokens, SyntaxElementChildren, SyntaxNode, SyntaxNodeChildren,
SyntaxSlot, SyntaxSlots,
};
#[derive(Debug)]
struct _SyntaxElement;
pub(crate) fn has_live() -> bool {
countme::get::<_SyntaxElement>().live > 0
}
#[derive(Debug)]
struct NodeData {
_c: Count<_SyntaxElement>,
kind: NodeKind,
slot: u32,
/// Absolute offset for immutable nodes, unused for mutable nodes.
offset: TextSize,
}
/// A single NodeData (red node) is either a "root node" (no parent node and
/// holds a strong reference to the root of the green tree) or a "child node"
/// (holds a strong reference to its parent red node and a weak reference to its
/// counterpart green node)
#[derive(Debug)]
enum NodeKind {
Root {
green: GreenElement,
},
Child {
green: WeakGreenElement,
parent: Rc<NodeData>,
},
}
/// Child SyntaxNodes use "unsafe" weak pointers to refer to their green node.
/// Unlike the safe [std::sync::Weak] these are just a raw pointer: the
/// corresponding [ThinArc](crate::arc::ThinArc) doesn't keep a counter of
/// outstanding weak references or defer the release of the underlying memory
/// until the last `Weak` is dropped. On the other hand, a weak reference to a
/// released green node points to deallocated memory and it is undefined
/// behavior to dereference it, but in the context of `NodeData` this is
/// statically known to never happen
#[derive(Debug, Clone)]
enum WeakGreenElement {
Node { ptr: ptr::NonNull<GreenNodeData> },
Token { ptr: ptr::NonNull<GreenTokenData> },
}
impl WeakGreenElement {
fn new(green: GreenElementRef) -> Self {
match green {
NodeOrToken::Node(ptr) => Self::Node {
ptr: ptr::NonNull::from(ptr),
},
NodeOrToken::Token(ptr) => Self::Token {
ptr: ptr::NonNull::from(ptr),
},
}
}
fn as_deref(&self) -> GreenElementRef {
match self {
WeakGreenElement::Node { ptr } => GreenElementRef::Node(unsafe { ptr.as_ref() }),
WeakGreenElement::Token { ptr } => GreenElementRef::Token(unsafe { ptr.as_ref() }),
}
}
fn to_owned(&self) -> GreenElement {
match self {
WeakGreenElement::Node { ptr } => {
GreenElement::Node(unsafe { ptr.as_ref().to_owned() })
}
WeakGreenElement::Token { ptr } => {
GreenElement::Token(unsafe { ptr.as_ref().to_owned() })
}
}
}
}
impl NodeData {
#[inline]
fn new(kind: NodeKind, slot: u32, offset: TextSize) -> Rc<NodeData> {
let res = NodeData {
_c: Count::new(),
kind,
slot,
offset,
};
Rc::new(res)
}
#[inline]
fn key(&self) -> (ptr::NonNull<()>, TextSize) {
let weak = match &self.kind {
NodeKind::Root { green } => WeakGreenElement::new(green.as_deref()),
NodeKind::Child { green, .. } => green.clone(),
};
let ptr = match weak {
WeakGreenElement::Node { ptr } => ptr.cast(),
WeakGreenElement::Token { ptr } => ptr.cast(),
};
(ptr, self.offset())
}
#[inline]
fn parent_node(&self) -> Option<SyntaxNode> {
debug_assert!(matches!(
self.parent()?.green(),
GreenElementRef::Node { .. }
));
match &self.kind {
NodeKind::Child { parent, .. } => Some(SyntaxNode {
ptr: parent.clone(),
}),
NodeKind::Root { .. } => None,
}
}
#[inline]
fn parent(&self) -> Option<&NodeData> {
match &self.kind {
NodeKind::Child { parent, .. } => Some(&**parent),
NodeKind::Root { .. } => None,
}
}
#[inline]
fn green(&self) -> GreenElementRef<'_> {
match &self.kind {
NodeKind::Root { green } => green.as_deref(),
NodeKind::Child { green, .. } => green.as_deref(),
}
}
/// Returns an iterator over the siblings of this node. The iterator is positioned at the current node.
#[inline]
fn green_siblings(&self) -> Option<Siblings> {
match &self.parent()?.green() {
GreenElementRef::Node(ptr) => Some(Siblings::new(ptr, self.slot())),
GreenElementRef::Token(_) => {
debug_assert!(
false,
"A token should never be a parent of a token or node."
);
None
}
}
}
#[inline]
fn slot(&self) -> u32 {
self.slot
}
#[inline]
fn offset(&self) -> TextSize {
self.offset
}
#[inline]
fn text_range(&self) -> TextRange {
let offset = self.offset();
let len = self.green().text_len();
TextRange::at(offset, len)
}
#[inline]
fn kind(&self) -> RawSyntaxKind {
self.green().kind()
}
fn next_sibling(&self) -> Option<SyntaxNode> {
let siblings = self.green_siblings()?;
siblings.following().find_map(|child| {
child.element().into_node().and_then(|green| {
let parent = self.parent_node()?;
let offset = parent.offset() + child.rel_offset();
Some(SyntaxNode::new_child(green, parent, child.slot(), offset))
})
})
}
fn prev_sibling(&self) -> Option<SyntaxNode> {
let siblings = self.green_siblings()?;
siblings.previous().find_map(|child| {
child.element().into_node().and_then(|green| {
let parent = self.parent_node()?;
let offset = parent.offset() + child.rel_offset();
Some(SyntaxNode::new_child(green, parent, child.slot(), offset))
})
})
}
fn next_sibling_or_token(&self) -> Option<SyntaxElement> {
let siblings = self.green_siblings()?;
siblings.following().next().and_then(|child| {
let parent = self.parent_node()?;
let offset = parent.offset() + child.rel_offset();
Some(SyntaxElement::new(
child.element(),
parent,
child.slot(),
offset,
))
})
}
fn prev_sibling_or_token(&self) -> Option<SyntaxElement> {
let siblings = self.green_siblings()?;
siblings.previous().next().and_then(|child| {
let parent = self.parent_node()?;
let offset = parent.offset() + child.rel_offset();
Some(SyntaxElement::new(
child.element(),
parent,
child.slot(),
offset,
))
})
}
fn into_green(self: Rc<Self>) -> GreenElement {
match Rc::try_unwrap(self) {
Ok(data) => match data.kind {
NodeKind::Root { green } => green,
NodeKind::Child { green, .. } => green.to_owned(),
},
Err(ptr) => ptr.green().to_owned(),
}
}
/// Return a clone of this subtree detached from its parent
#[must_use = "syntax elements are immutable, the result of update methods must be propagated to have any effect"]
fn detach(self: Rc<Self>) -> Rc<Self> {
match &self.kind {
NodeKind::Child { green, .. } => Self::new(
NodeKind::Root {
green: green.to_owned(),
},
0,
0.into(),
),
// If this node is already detached, increment the reference count and return a clone
NodeKind::Root { .. } => self.clone(),
}
}
/// Return a clone of this node with the specified range of slots replaced
/// with the elements of the provided iterator
#[must_use = "syntax elements are immutable, the result of update methods must be propagated to have any effect"]
fn splice_slots<R, I>(mut self: Rc<Self>, range: R, replace_with: I) -> Rc<Self>
where
R: ops::RangeBounds<usize>,
I: Iterator<Item = Option<green::GreenElement>>,
{
let green = match self.green() {
NodeOrToken::Node(green) => green.splice_slots(range, replace_with).into(),
NodeOrToken::Token(_) => panic!("called splice_slots on a token node"),
};
// Try to reuse the underlying memory allocation if self is the only
// outstanding reference to this NodeData
match Rc::get_mut(&mut self) {
Some(node) => {
node.kind = NodeKind::Root { green };
node.slot = 0;
node.offset = TextSize::from(0);
self
}
None => Self::new(NodeKind::Root { green }, 0, 0.into()),
}
}
/// Return a new version of this node with the element `prev_elem` replaced with `next_elem`
///
/// `prev_elem` can be a direct child of this node, or an indirect child through any descendant node
///
/// Returns `None` if `prev_elem` is not a descendant of this node
#[must_use = "syntax elements are immutable, the result of update methods must be propagated to have any effect"]
fn replace_child(
mut self: Rc<Self>,
prev_elem: SyntaxElement,
next_elem: SyntaxElement,
) -> Option<Rc<Self>> {
let mut green = next_elem.into_green();
let mut elem = prev_elem;
loop {
let node = elem.parent()?;
let is_self = node.key() == self.key();
let index = elem.index();
let range = index..=index;
let replace_with = iter::once(Some(green));
green = node.green().splice_slots(range, replace_with).into();
elem = node.into();
if is_self {
break;
}
}
// Try to reuse the underlying memory allocation if self is the only
// outstanding reference to this NodeData
let result = match Rc::get_mut(&mut self) {
Some(node) => {
node.kind = NodeKind::Root { green };
node.slot = 0;
node.offset = TextSize::from(0);
self
}
None => Self::new(NodeKind::Root { green }, 0, 0.into()),
};
Some(result)
}
}

View File

@ -1,129 +0,0 @@
use crate::cursor::{SyntaxNode, SyntaxToken};
use crate::green::{GreenElement, GreenElementRef};
use crate::{NodeOrToken, RawSyntaxKind, TokenAtOffset};
use ruff_text_size::{TextRange, TextSize};
use std::iter;
pub(crate) type SyntaxElement = NodeOrToken<SyntaxNode, SyntaxToken>;
impl SyntaxElement {
pub(super) fn new(
element: GreenElementRef<'_>,
parent: SyntaxNode,
slot: u32,
offset: TextSize,
) -> SyntaxElement {
match element {
NodeOrToken::Node(node) => SyntaxNode::new_child(node, parent, slot, offset).into(),
NodeOrToken::Token(token) => SyntaxToken::new(token, parent, slot, offset).into(),
}
}
#[inline]
pub fn text_range(&self) -> TextRange {
match self {
NodeOrToken::Node(it) => it.text_range(),
NodeOrToken::Token(it) => it.text_range(),
}
}
#[inline]
pub fn index(&self) -> usize {
match self {
NodeOrToken::Node(it) => it.index(),
NodeOrToken::Token(it) => it.index(),
}
}
#[inline]
pub fn kind(&self) -> RawSyntaxKind {
match self {
NodeOrToken::Node(it) => it.kind(),
NodeOrToken::Token(it) => it.kind(),
}
}
#[inline]
pub fn parent(&self) -> Option<SyntaxNode> {
match self {
NodeOrToken::Node(it) => it.parent(),
NodeOrToken::Token(it) => it.parent(),
}
}
#[inline]
pub fn ancestors(&self) -> impl Iterator<Item = SyntaxNode> {
let first = match self {
NodeOrToken::Node(it) => Some(it.clone()),
NodeOrToken::Token(it) => it.parent(),
};
iter::successors(first, SyntaxNode::parent)
}
pub fn first_token(&self) -> Option<SyntaxToken> {
match self {
NodeOrToken::Node(it) => it.first_token(),
NodeOrToken::Token(it) => Some(it.clone()),
}
}
pub fn last_token(&self) -> Option<SyntaxToken> {
match self {
NodeOrToken::Node(it) => it.last_token(),
NodeOrToken::Token(it) => Some(it.clone()),
}
}
pub fn next_sibling_or_token(&self) -> Option<SyntaxElement> {
match self {
NodeOrToken::Node(it) => it.next_sibling_or_token(),
NodeOrToken::Token(it) => it.next_sibling_or_token(),
}
}
pub fn prev_sibling_or_token(&self) -> Option<SyntaxElement> {
match self {
NodeOrToken::Node(it) => it.prev_sibling_or_token(),
NodeOrToken::Token(it) => it.prev_sibling_or_token(),
}
}
pub(super) fn token_at_offset(&self, offset: TextSize) -> TokenAtOffset<SyntaxToken> {
assert!(self.text_range().start() <= offset && offset <= self.text_range().end());
match self {
NodeOrToken::Token(token) => TokenAtOffset::Single(token.clone()),
NodeOrToken::Node(node) => node.token_at_offset(offset),
}
}
#[must_use = "syntax elements are immutable, the result of update methods must be propagated to have any effect"]
pub fn detach(self) -> Self {
match self {
NodeOrToken::Node(it) => Self::Node(it.detach()),
NodeOrToken::Token(it) => Self::Token(it.detach()),
}
}
pub(crate) fn into_green(self) -> GreenElement {
match self {
NodeOrToken::Node(it) => it.ptr.into_green(),
NodeOrToken::Token(it) => it.into_green(),
}
}
}
// region: impls
impl From<SyntaxNode> for SyntaxElement {
#[inline]
fn from(node: SyntaxNode) -> SyntaxElement {
NodeOrToken::Node(node)
}
}
impl From<SyntaxToken> for SyntaxElement {
#[inline]
fn from(token: SyntaxToken) -> SyntaxElement {
NodeOrToken::Token(token)
}
}
// endregion

View File

@ -1,959 +0,0 @@
use crate::cursor::{NodeData, SyntaxElement, SyntaxToken, SyntaxTrivia};
use crate::green::{Child, Children, GreenElementRef, Slot};
use crate::{
Direction, GreenNode, GreenNodeData, NodeOrToken, RawSyntaxKind, SyntaxNodeText, TokenAtOffset,
WalkEvent,
};
use ruff_text_size::{TextRange, TextSize};
use std::hash::{Hash, Hasher};
use std::iter::FusedIterator;
use std::ops;
use std::ptr::NonNull;
use std::rc::Rc;
use std::{fmt, iter};
use super::{GreenElement, NodeKind, WeakGreenElement};
#[derive(Clone)]
pub(crate) struct SyntaxNode {
pub(super) ptr: Rc<NodeData>,
}
impl SyntaxNode {
pub(crate) fn new_root(green: GreenNode) -> SyntaxNode {
SyntaxNode {
ptr: NodeData::new(
NodeKind::Root {
green: GreenElement::Node(green),
},
0,
0.into(),
),
}
}
pub(super) fn new_child(
green: &GreenNodeData,
parent: SyntaxNode,
slot: u32,
offset: TextSize,
) -> SyntaxNode {
SyntaxNode {
ptr: NodeData::new(
NodeKind::Child {
green: WeakGreenElement::new(GreenElementRef::Node(green)),
parent: parent.ptr,
},
slot,
offset,
),
}
}
pub fn clone_subtree(&self) -> SyntaxNode {
SyntaxNode::new_root(self.green().into())
}
#[inline]
pub(super) fn data(&self) -> &NodeData {
self.ptr.as_ref()
}
#[inline]
pub fn kind(&self) -> RawSyntaxKind {
self.data().kind()
}
#[inline]
pub(super) fn offset(&self) -> TextSize {
self.data().offset()
}
pub(crate) fn element_in_slot(&self, slot_index: u32) -> Option<SyntaxElement> {
let slot = self
.slots()
.nth(slot_index as usize)
.expect("Slot index out of bounds");
slot.map(|element| element)
}
#[inline]
pub(crate) fn slots(&self) -> SyntaxSlots {
SyntaxSlots::new(self.clone())
}
#[inline]
pub fn text_range(&self) -> TextRange {
self.data().text_range()
}
pub fn text_trimmed_range(&self) -> TextRange {
let range = self.text_range();
let mut start = range.start();
let mut end = range.end();
// Remove all trivia from the start of the node
let mut token = self.first_token();
while let Some(t) = token.take() {
let (leading_len, trailing_len, total_len) = t.green().leading_trailing_total_len();
let token_len: u32 = (total_len - leading_len - trailing_len).into();
if token_len == 0 {
start += total_len;
token = t.next_token();
} else {
start += leading_len;
}
}
// Remove all trivia from the end of the node
let mut token = self.last_token();
while let Some(t) = token.take() {
let (leading_len, trailing_len, total_len) = t.green().leading_trailing_total_len();
let token_len: u32 = (total_len - leading_len - trailing_len).into();
if token_len == 0 {
end -= total_len;
token = t.prev_token();
} else {
end -= trailing_len;
}
}
TextRange::new(start, end.max(start))
}
pub fn first_leading_trivia(&self) -> Option<SyntaxTrivia> {
self.first_token().map(|x| x.leading_trivia())
}
pub fn last_trailing_trivia(&self) -> Option<SyntaxTrivia> {
self.last_token().map(|x| x.trailing_trivia())
}
#[inline]
pub fn index(&self) -> usize {
self.data().slot() as usize
}
#[inline]
pub fn text(&self) -> SyntaxNodeText {
SyntaxNodeText::new(self.clone())
}
#[inline]
pub fn text_trimmed(&self) -> SyntaxNodeText {
SyntaxNodeText::with_range(self.clone(), self.text_trimmed_range())
}
#[inline]
pub(crate) fn key(&self) -> (NonNull<()>, TextSize) {
self.data().key()
}
#[inline]
pub(crate) fn green(&self) -> &GreenNodeData {
self.data().green().into_node().unwrap()
}
#[inline]
pub fn parent(&self) -> Option<SyntaxNode> {
self.data().parent_node()
}
#[inline]
pub fn ancestors(&self) -> impl Iterator<Item = SyntaxNode> {
iter::successors(Some(self.clone()), SyntaxNode::parent)
}
#[inline]
pub fn children(&self) -> SyntaxNodeChildren {
SyntaxNodeChildren::new(self.clone())
}
#[inline]
pub fn children_with_tokens(&self) -> SyntaxElementChildren {
SyntaxElementChildren::new(self.clone())
}
#[inline]
pub fn tokens(&self) -> impl Iterator<Item = SyntaxToken> + DoubleEndedIterator + '_ {
self.green().children().filter_map(|child| {
child.element().into_token().map(|token| {
SyntaxToken::new(
token,
self.clone(),
child.slot(),
self.offset() + child.rel_offset(),
)
})
})
}
pub fn first_child(&self) -> Option<SyntaxNode> {
self.green().children().find_map(|child| {
child.element().into_node().map(|green| {
SyntaxNode::new_child(
green,
self.clone(),
child.slot(),
self.offset() + child.rel_offset(),
)
})
})
}
pub fn last_child(&self) -> Option<SyntaxNode> {
self.green().children().rev().find_map(|child| {
child.element().into_node().map(|green| {
SyntaxNode::new_child(
green,
self.clone(),
child.slot(),
self.offset() + child.rel_offset(),
)
})
})
}
pub fn first_child_or_token(&self) -> Option<SyntaxElement> {
self.green().children().next().map(|child| {
SyntaxElement::new(
child.element(),
self.clone(),
child.slot(),
self.offset() + child.rel_offset(),
)
})
}
pub fn last_child_or_token(&self) -> Option<SyntaxElement> {
self.green().children().next_back().map(|child| {
SyntaxElement::new(
child.element(),
self.clone(),
child.slot(),
self.offset() + child.rel_offset(),
)
})
}
pub fn next_sibling(&self) -> Option<SyntaxNode> {
self.data().next_sibling()
}
pub fn prev_sibling(&self) -> Option<SyntaxNode> {
self.data().prev_sibling()
}
pub fn next_sibling_or_token(&self) -> Option<SyntaxElement> {
self.data().next_sibling_or_token()
}
pub fn prev_sibling_or_token(&self) -> Option<SyntaxElement> {
self.data().prev_sibling_or_token()
}
pub fn first_token(&self) -> Option<SyntaxToken> {
self.descendants_with_tokens(Direction::Next)
.find_map(|x| x.into_token())
}
pub fn last_token(&self) -> Option<SyntaxToken> {
PreorderWithTokens::new(self.clone(), Direction::Prev)
.filter_map(|event| match event {
WalkEvent::Enter(it) => Some(it),
WalkEvent::Leave(_) => None,
})
.find_map(|x| x.into_token())
}
#[inline]
pub fn siblings(&self, direction: Direction) -> impl Iterator<Item = SyntaxNode> {
iter::successors(Some(self.clone()), move |node| match direction {
Direction::Next => node.next_sibling(),
Direction::Prev => node.prev_sibling(),
})
}
#[inline]
pub fn siblings_with_tokens(
&self,
direction: Direction,
) -> impl Iterator<Item = SyntaxElement> {
let me: SyntaxElement = self.clone().into();
iter::successors(Some(me), move |el| match direction {
Direction::Next => el.next_sibling_or_token(),
Direction::Prev => el.prev_sibling_or_token(),
})
}
#[inline]
pub fn descendants(&self) -> impl Iterator<Item = SyntaxNode> {
self.preorder().filter_map(|event| match event {
WalkEvent::Enter(node) => Some(node),
WalkEvent::Leave(_) => None,
})
}
#[inline]
pub fn descendants_with_tokens(
&self,
direction: Direction,
) -> impl Iterator<Item = SyntaxElement> {
self.preorder_with_tokens(direction)
.filter_map(|event| match event {
WalkEvent::Enter(it) => Some(it),
WalkEvent::Leave(_) => None,
})
}
#[inline]
pub fn preorder(&self) -> Preorder {
Preorder::new(self.clone())
}
#[inline]
pub fn preorder_with_tokens(&self, direction: Direction) -> PreorderWithTokens {
PreorderWithTokens::new(self.clone(), direction)
}
pub(crate) fn preorder_slots(&self) -> SlotsPreorder {
SlotsPreorder::new(self.clone())
}
pub fn token_at_offset(&self, offset: TextSize) -> TokenAtOffset<SyntaxToken> {
// TODO: this could be faster if we first drill-down to node, and only
// then switch to token search. We should also replace explicit
// recursion with a loop.
let range = self.text_range();
assert!(
range.start() <= offset && offset <= range.end(),
"Bad offset: range {:?} offset {:?}",
range,
offset
);
if range.is_empty() {
return TokenAtOffset::None;
}
let mut children = self.children_with_tokens().filter(|child| {
let child_range = child.text_range();
!child_range.is_empty() && child_range.contains_inclusive(offset)
});
let left = children.next().unwrap();
let right = children.next();
assert!(children.next().is_none());
if let Some(right) = right {
match (left.token_at_offset(offset), right.token_at_offset(offset)) {
(TokenAtOffset::Single(left), TokenAtOffset::Single(right)) => {
TokenAtOffset::Between(left, right)
}
_ => unreachable!(),
}
} else {
left.token_at_offset(offset)
}
}
pub fn covering_element(&self, range: TextRange) -> SyntaxElement {
let mut res: SyntaxElement = self.clone().into();
loop {
assert!(
res.text_range().contains_range(range),
"Bad range: node range {:?}, range {:?}",
res.text_range(),
range,
);
res = match &res {
NodeOrToken::Token(_) => return res,
NodeOrToken::Node(node) => match node.child_or_token_at_range(range) {
Some(it) => it,
None => return res,
},
};
}
}
pub fn child_or_token_at_range(&self, range: TextRange) -> Option<SyntaxElement> {
let rel_range = range - self.offset();
self.green()
.slot_at_range(rel_range)
.and_then(|(index, rel_offset, slot)| {
slot.as_ref().map(|green| {
SyntaxElement::new(
green,
self.clone(),
index as u32,
self.offset() + rel_offset,
)
})
})
}
#[must_use = "syntax elements are immutable, the result of update methods must be propagated to have any effect"]
pub fn detach(self) -> Self {
Self {
ptr: self.ptr.detach(),
}
}
#[must_use = "syntax elements are immutable, the result of update methods must be propagated to have any effect"]
pub fn splice_slots<R, I>(self, range: R, replace_with: I) -> Self
where
R: ops::RangeBounds<usize>,
I: Iterator<Item = Option<SyntaxElement>>,
{
Self {
ptr: self.ptr.splice_slots(
range,
replace_with.into_iter().map(|element| {
element.map(|child| match child.detach() {
NodeOrToken::Node(it) => it.ptr.into_green(),
NodeOrToken::Token(it) => it.into_green(),
})
}),
),
}
}
#[must_use = "syntax elements are immutable, the result of update methods must be propagated to have any effect"]
pub fn replace_child(self, prev_elem: SyntaxElement, next_elem: SyntaxElement) -> Option<Self> {
Some(Self {
ptr: self.ptr.replace_child(prev_elem, next_elem)?,
})
}
}
// Identity semantics for hash & eq
impl PartialEq for SyntaxNode {
#[inline]
fn eq(&self, other: &SyntaxNode) -> bool {
self.data().key() == other.data().key()
}
}
impl Eq for SyntaxNode {}
impl Hash for SyntaxNode {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
self.data().key().hash(state);
}
}
impl fmt::Debug for SyntaxNode {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("SyntaxNode")
.field("kind", &self.kind())
.field("text_range", &self.text_range())
.finish()
}
}
impl fmt::Display for SyntaxNode {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.preorder_with_tokens(Direction::Next)
.filter_map(|event| match event {
WalkEvent::Enter(NodeOrToken::Token(token)) => Some(token),
_ => None,
})
.try_for_each(|it| fmt::Display::fmt(&it, f))
}
}
// region: iterators
#[derive(Clone, Debug)]
pub(crate) struct SyntaxNodeChildren {
next: Option<SyntaxNode>,
}
impl SyntaxNodeChildren {
fn new(parent: SyntaxNode) -> SyntaxNodeChildren {
SyntaxNodeChildren {
next: parent.first_child(),
}
}
}
impl Iterator for SyntaxNodeChildren {
type Item = SyntaxNode;
fn next(&mut self) -> Option<SyntaxNode> {
self.next.take().map(|next| {
self.next = next.next_sibling();
next
})
}
}
impl FusedIterator for SyntaxNodeChildren {}
#[derive(Clone, Debug, Default)]
pub(crate) struct SyntaxElementChildren {
next: Option<SyntaxElement>,
}
impl SyntaxElementChildren {
fn new(parent: SyntaxNode) -> SyntaxElementChildren {
SyntaxElementChildren {
next: parent.first_child_or_token(),
}
}
}
impl Iterator for SyntaxElementChildren {
type Item = SyntaxElement;
fn next(&mut self) -> Option<SyntaxElement> {
self.next.take().map(|next| {
self.next = next.next_sibling_or_token();
next
})
}
}
impl FusedIterator for SyntaxElementChildren {}
pub(crate) struct Preorder {
start: SyntaxNode,
next: Option<WalkEvent<SyntaxNode>>,
skip_subtree: bool,
}
impl Preorder {
fn new(start: SyntaxNode) -> Preorder {
let next = Some(WalkEvent::Enter(start.clone()));
Preorder {
start,
next,
skip_subtree: false,
}
}
pub fn skip_subtree(&mut self) {
self.skip_subtree = true;
}
#[cold]
fn do_skip(&mut self) {
self.next = self.next.take().map(|next| match next {
WalkEvent::Enter(first_child) => WalkEvent::Leave(first_child.parent().unwrap()),
WalkEvent::Leave(parent) => WalkEvent::Leave(parent),
})
}
}
impl Iterator for Preorder {
type Item = WalkEvent<SyntaxNode>;
fn next(&mut self) -> Option<WalkEvent<SyntaxNode>> {
if self.skip_subtree {
self.do_skip();
self.skip_subtree = false;
}
let next = self.next.take();
self.next = next.as_ref().and_then(|next| {
Some(match next {
WalkEvent::Enter(node) => match node.first_child() {
Some(child) => WalkEvent::Enter(child),
None => WalkEvent::Leave(node.clone()),
},
WalkEvent::Leave(node) => {
if node == &self.start {
return None;
}
match node.next_sibling() {
Some(sibling) => WalkEvent::Enter(sibling),
None => WalkEvent::Leave(node.parent()?),
}
}
})
});
next
}
}
impl FusedIterator for Preorder {}
pub(crate) struct PreorderWithTokens {
start: SyntaxElement,
next: Option<WalkEvent<SyntaxElement>>,
skip_subtree: bool,
direction: Direction,
}
impl PreorderWithTokens {
fn new(start: SyntaxNode, direction: Direction) -> PreorderWithTokens {
let next = Some(WalkEvent::Enter(start.clone().into()));
PreorderWithTokens {
start: start.into(),
next,
direction,
skip_subtree: false,
}
}
pub fn skip_subtree(&mut self) {
self.skip_subtree = true;
}
#[cold]
fn do_skip(&mut self) {
self.next = self.next.take().map(|next| match next {
WalkEvent::Enter(first_child) => WalkEvent::Leave(first_child.parent().unwrap().into()),
WalkEvent::Leave(parent) => WalkEvent::Leave(parent),
})
}
}
impl Iterator for PreorderWithTokens {
type Item = WalkEvent<SyntaxElement>;
fn next(&mut self) -> Option<WalkEvent<SyntaxElement>> {
if self.skip_subtree {
self.do_skip();
self.skip_subtree = false;
}
let next = self.next.take();
self.next = next.as_ref().and_then(|next| {
Some(match next {
WalkEvent::Enter(el) => match el {
NodeOrToken::Node(node) => {
let next = match self.direction {
Direction::Next => node.first_child_or_token(),
Direction::Prev => node.last_child_or_token(),
};
match next {
Some(child) => WalkEvent::Enter(child),
None => WalkEvent::Leave(node.clone().into()),
}
}
NodeOrToken::Token(token) => WalkEvent::Leave(token.clone().into()),
},
WalkEvent::Leave(el) if el == &self.start => return None,
WalkEvent::Leave(el) => {
let next = match self.direction {
Direction::Next => el.next_sibling_or_token(),
Direction::Prev => el.prev_sibling_or_token(),
};
match next {
Some(sibling) => WalkEvent::Enter(sibling),
None => WalkEvent::Leave(el.parent()?.into()),
}
}
})
});
next
}
}
impl FusedIterator for PreorderWithTokens {}
/// Represents a cursor to a green node slot. A slot either contains an element or is empty
/// if the child isn't present in the source.
#[derive(Debug, Clone)]
pub(crate) enum SyntaxSlot {
Node(SyntaxNode),
Token(SyntaxToken),
Empty { parent: SyntaxNode, index: u32 },
}
impl From<SyntaxElement> for SyntaxSlot {
fn from(element: SyntaxElement) -> Self {
match element {
SyntaxElement::Node(node) => SyntaxSlot::Node(node),
SyntaxElement::Token(token) => SyntaxSlot::Token(token),
}
}
}
impl SyntaxSlot {
#[inline]
pub fn map<F, R>(self, mapper: F) -> Option<R>
where
F: FnOnce(SyntaxElement) -> R,
{
match self {
SyntaxSlot::Node(node) => Some(mapper(SyntaxElement::Node(node))),
SyntaxSlot::Token(token) => Some(mapper(SyntaxElement::Token(token))),
SyntaxSlot::Empty { .. } => None,
}
}
}
/// Iterator over a node's slots
#[derive(Debug, Clone)]
pub(crate) struct SyntaxSlots {
/// Position of the next element to return.
pos: u32,
/// Position of the last returned element from the back.
/// Initially points one element past the last slot.
///
/// [nth_back]: https://doc.rust-lang.org/std/iter/trait.DoubleEndedIterator.html#method.nth_back
back_pos: u32,
parent: SyntaxNode,
}
impl SyntaxSlots {
#[inline]
fn new(parent: SyntaxNode) -> Self {
Self {
pos: 0,
back_pos: parent.green().slice().len() as u32,
parent,
}
}
/// Returns a slice containing the remaining elements to iterate over
/// an empty slice if the iterator reached the end.
#[inline]
fn slice(&self) -> &[Slot] {
if self.pos < self.back_pos {
&self.parent.green().slice()[self.pos as usize..self.back_pos as usize]
} else {
&[]
}
}
fn map_slot(&self, slot: &Slot, slot_index: u32) -> SyntaxSlot {
match slot {
Slot::Empty { .. } => SyntaxSlot::Empty {
parent: self.parent.clone(),
index: slot_index,
},
Slot::Token { rel_offset, token } => SyntaxSlot::Token(SyntaxToken::new(
token,
self.parent.clone(),
slot_index,
self.parent.offset() + rel_offset,
)),
Slot::Node { rel_offset, node } => SyntaxSlot::Node(SyntaxNode::new_child(
node,
self.parent.clone(),
slot_index,
self.parent.offset() + rel_offset,
)),
}
}
}
impl Iterator for SyntaxSlots {
type Item = SyntaxSlot;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
let slot = self.slice().first()?;
let mapped = self.map_slot(slot, self.pos);
self.pos += 1;
Some(mapped)
}
#[inline(always)]
fn size_hint(&self) -> (usize, Option<usize>) {
let len = self.slice().len();
(len, Some(len))
}
#[inline(always)]
fn count(self) -> usize
where
Self: Sized,
{
self.len()
}
#[inline]
fn last(mut self) -> Option<Self::Item>
where
Self: Sized,
{
self.next_back()
}
#[inline]
fn nth(&mut self, n: usize) -> Option<Self::Item> {
self.pos += n as u32;
self.next()
}
}
impl ExactSizeIterator for SyntaxSlots {
#[inline(always)]
fn len(&self) -> usize {
self.slice().len()
}
}
impl FusedIterator for SyntaxSlots {}
impl DoubleEndedIterator for SyntaxSlots {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
let slot = self.slice().last()?;
let mapped = self.map_slot(slot, self.back_pos - 1);
self.back_pos -= 1;
Some(mapped)
}
#[inline]
fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
self.back_pos -= n as u32;
self.next_back()
}
}
/// Iterator to visit a node's slots in pre-order.
pub(crate) struct SlotsPreorder {
start: SyntaxNode,
next: Option<WalkEvent<SyntaxSlot>>,
}
impl SlotsPreorder {
fn new(start: SyntaxNode) -> Self {
let next = Some(WalkEvent::Enter(SyntaxSlot::Node(start.clone())));
SlotsPreorder { start, next }
}
}
impl Iterator for SlotsPreorder {
type Item = WalkEvent<SyntaxSlot>;
fn next(&mut self) -> Option<WalkEvent<SyntaxSlot>> {
let next = self.next.take();
self.next = next.as_ref().and_then(|next| {
Some(match next {
WalkEvent::Enter(slot) => match slot {
SyntaxSlot::Empty { .. } | SyntaxSlot::Token(_) => {
WalkEvent::Leave(slot.clone())
}
SyntaxSlot::Node(node) => match node.slots().next() {
None => WalkEvent::Leave(SyntaxSlot::Node(node.clone())),
Some(first_slot) => WalkEvent::Enter(first_slot),
},
},
WalkEvent::Leave(slot) => {
let (parent, slot_index) = match slot {
SyntaxSlot::Empty { parent, index } => (parent.clone(), *index as usize),
SyntaxSlot::Token(token) => (token.parent()?, token.index()),
SyntaxSlot::Node(node) => {
if node == &self.start {
return None;
}
(node.parent()?, node.index())
}
};
let next_slot = parent.slots().nth(slot_index + 1);
match next_slot {
Some(slot) => WalkEvent::Enter(slot),
None => WalkEvent::Leave(SyntaxSlot::Node(parent)),
}
}
})
});
next
}
}
impl FusedIterator for SlotsPreorder {}
#[derive(Debug, Clone)]
pub(crate) struct Siblings<'a> {
parent: &'a GreenNodeData,
start_slot: u32,
}
impl<'a> Siblings<'a> {
pub fn new(parent: &'a GreenNodeData, start_slot: u32) -> Self {
assert!(
(start_slot as usize) < parent.slots().len(),
"Start slot {} out of bounds {}",
start_slot,
parent.slots().len()
);
Self { parent, start_slot }
}
/// Creates an iterator over the siblings following the start node.
/// For example, the following siblings of the if statement's condition are
/// * the consequence
/// * potentially the else clause
pub fn following(&self) -> Children<'a> {
let mut slots = self.parent.slots().enumerate();
// Navigate to the start slot so that calling `next` returns the first following sibling
slots.nth(self.start_slot as usize);
Children::new(slots)
}
/// Creates an iterator over the siblings preceding the start node in reverse order.
/// For example, the preceding siblings of the if statement's condition are:
/// * opening parentheses: (
/// * if keyword: if
pub fn previous(&self) -> impl Iterator<Item = Child<'a>> {
let mut slots = self.parent.slots().enumerate();
// Navigate to the start slot from the back so that calling `next_back` (or rev().next()) returns
// the first slot preceding the start node
slots.nth_back(slots.len() - 1 - self.start_slot as usize);
Children::new(slots).rev()
}
}
// endregion
#[cfg(test)]
mod tests {
use crate::raw_language::{RawLanguageKind, RawSyntaxTreeBuilder};
#[test]
fn slots_iter() {
let mut builder = RawSyntaxTreeBuilder::new();
builder.start_node(RawLanguageKind::EXPRESSION_LIST);
for number in [1, 2, 3, 4] {
builder.start_node(RawLanguageKind::LITERAL_EXPRESSION);
builder.token(RawLanguageKind::NUMBER_TOKEN, &number.to_string());
builder.finish_node();
}
builder.finish_node();
let list = builder.finish();
let mut iter = list.slots();
assert_eq!(iter.size_hint(), (4, Some(4)));
assert_eq!(
iter.next()
.and_then(|slot| slot.into_node())
.map(|node| node.text().to_string())
.as_deref(),
Some("1")
);
assert_eq!(iter.size_hint(), (3, Some(3)));
assert_eq!(
iter.next_back()
.and_then(|slot| slot.into_node())
.map(|node| node.text().to_string())
.as_deref(),
Some("4")
);
assert_eq!(iter.size_hint(), (2, Some(2)));
assert_eq!(
iter.last()
.and_then(|slot| slot.into_node())
.map(|node| node.text().to_string())
.as_deref(),
Some("3")
);
}
}

View File

@ -1,258 +0,0 @@
use crate::cursor::{NodeData, SyntaxElement, SyntaxNode, SyntaxTrivia};
use crate::green::GreenElementRef;
use crate::{
green, Direction, GreenToken, GreenTokenData, RawSyntaxKind, SyntaxTokenText, WalkEvent,
};
use ruff_text_size::{TextRange, TextSize};
use std::hash::{Hash, Hasher};
use std::ptr::NonNull;
use std::rc::Rc;
use std::{fmt, iter};
use super::{GreenElement, NodeKind, WeakGreenElement};
#[derive(Clone, Debug)]
pub(crate) struct SyntaxToken {
ptr: Rc<NodeData>,
}
impl SyntaxToken {
pub(super) fn new(
green: &GreenTokenData,
parent: SyntaxNode,
index: u32,
offset: TextSize,
) -> SyntaxToken {
SyntaxToken {
ptr: NodeData::new(
NodeKind::Child {
green: WeakGreenElement::new(GreenElementRef::Token(green)),
parent: parent.ptr,
},
index,
offset,
),
}
}
pub(crate) fn new_detached(green: GreenToken) -> SyntaxToken {
SyntaxToken {
ptr: NodeData::new(
NodeKind::Root {
green: GreenElement::Token(green),
},
0,
TextSize::from(0),
),
}
}
#[inline]
pub(crate) fn green(&self) -> &GreenTokenData {
match self.data().green().as_token() {
Some(token) => token,
None => {
panic!(
"corrupted tree: a node thinks it is a token: {:?}",
self.data().green().as_node().unwrap().to_string()
);
}
}
}
pub(crate) fn key(&self) -> (NonNull<()>, TextSize) {
self.data().key()
}
#[inline]
pub(super) fn data(&self) -> &NodeData {
self.ptr.as_ref()
}
#[inline]
pub(super) fn into_green(self) -> green::GreenElement {
self.ptr.into_green()
}
#[inline]
pub fn kind(&self) -> RawSyntaxKind {
self.data().kind()
}
#[inline]
pub fn text_range(&self) -> TextRange {
self.data().text_range()
}
#[inline]
pub fn text_trimmed_range(&self) -> TextRange {
let green_token = self.green();
let leading_len = green_token.leading_trivia().text_len();
let trailing_len = green_token.trailing_trivia().text_len();
let range = self.text_range();
TextRange::new(range.start() + leading_len, range.end() - trailing_len)
}
#[inline]
pub fn index(&self) -> usize {
self.data().slot() as usize
}
#[inline]
pub fn text(&self) -> &str {
self.green().text()
}
#[inline]
pub fn token_text(&self) -> SyntaxTokenText {
SyntaxTokenText::new(self.green().to_owned())
}
#[inline]
pub fn token_text_trimmed(&self) -> SyntaxTokenText {
let green = self.green().to_owned();
let mut range = self.text_trimmed_range();
range -= self.data().offset;
SyntaxTokenText::with_range(green, range)
}
#[inline]
pub fn text_trimmed(&self) -> &str {
self.green().text_trimmed()
}
#[inline]
pub fn parent(&self) -> Option<SyntaxNode> {
self.data().parent_node()
}
#[inline]
pub fn ancestors(&self) -> impl Iterator<Item = SyntaxNode> {
std::iter::successors(self.parent(), SyntaxNode::parent)
}
pub fn next_sibling_or_token(&self) -> Option<SyntaxElement> {
self.data().next_sibling_or_token()
}
pub fn prev_sibling_or_token(&self) -> Option<SyntaxElement> {
self.data().prev_sibling_or_token()
}
#[inline]
pub fn siblings_with_tokens(
&self,
direction: Direction,
) -> impl Iterator<Item = SyntaxElement> {
let next = move |el: &SyntaxElement| match direction {
Direction::Next => el.next_sibling_or_token(),
Direction::Prev => el.prev_sibling_or_token(),
};
let me: SyntaxElement = self.clone().into();
iter::successors(next(&me), next)
}
pub fn next_token(&self) -> Option<SyntaxToken> {
self.next_token_impl(Direction::Next)
}
pub fn prev_token(&self) -> Option<SyntaxToken> {
self.next_token_impl(Direction::Prev)
}
/// Returns the token preceding or following this token depending on the passed `direction`.
fn next_token_impl(&self, direction: Direction) -> Option<SyntaxToken> {
let mut current: WalkEvent<SyntaxElement> =
WalkEvent::Leave(SyntaxElement::Token(self.clone()));
loop {
current = match current {
WalkEvent::Enter(element) => match element {
SyntaxElement::Token(token) => break Some(token),
SyntaxElement::Node(node) => {
let first_child = match direction {
Direction::Next => node.first_child_or_token(),
Direction::Prev => node.last_child_or_token(),
};
match first_child {
// If node is empty, leave parent
None => WalkEvent::Leave(SyntaxElement::Node(node)),
// Otherwise traverse full sub-tree
Some(child) => WalkEvent::Enter(child),
}
}
},
WalkEvent::Leave(element) => {
let mut current_element = element;
loop {
// Only traverse the left (pref) / right (next) siblings of the parent
// to avoid traversing into the same children again.
let sibling = match direction {
Direction::Next => current_element.next_sibling_or_token(),
Direction::Prev => current_element.prev_sibling_or_token(),
};
match sibling {
// Traverse all children of the sibling
Some(sibling) => break WalkEvent::Enter(sibling),
None => {
match current_element.parent() {
Some(node) => {
current_element = SyntaxElement::Node(node);
}
None => {
return None; // Reached root, no token found
}
}
}
}
}
}
}
}
}
#[must_use = "syntax elements are immutable, the result of update methods must be propagated to have any effect"]
pub fn detach(self) -> Self {
Self {
ptr: self.ptr.detach(),
}
}
#[inline]
pub fn leading_trivia(&self) -> SyntaxTrivia {
SyntaxTrivia::leading(self.clone())
}
#[inline]
pub fn trailing_trivia(&self) -> SyntaxTrivia {
SyntaxTrivia::trailing(self.clone())
}
}
// Identity semantics for hash & eq
impl PartialEq for SyntaxToken {
#[inline]
fn eq(&self, other: &SyntaxToken) -> bool {
self.data().key() == other.data().key()
}
}
impl Eq for SyntaxToken {}
impl Hash for SyntaxToken {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
self.data().key().hash(state);
}
}
impl fmt::Display for SyntaxToken {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self.text(), f)
}
}

View File

@ -1,182 +0,0 @@
use crate::cursor::SyntaxToken;
use crate::green::GreenTrivia;
use crate::TriviaPiece;
use ruff_text_size::{TextRange, TextSize};
use std::fmt;
use std::iter::FusedIterator;
#[derive(PartialEq, Eq, Clone, Hash)]
pub(crate) struct SyntaxTrivia {
token: SyntaxToken,
is_leading: bool,
}
impl SyntaxTrivia {
pub(super) fn leading(token: SyntaxToken) -> Self {
Self {
token,
is_leading: true,
}
}
pub(super) fn trailing(token: SyntaxToken) -> Self {
Self {
token,
is_leading: false,
}
}
pub(crate) fn text(&self) -> &str {
let trivia_range = self.text_range();
let relative_range = TextRange::at(
trivia_range.start() - self.token.data().offset,
trivia_range.len(),
);
&self.token.text()[relative_range]
}
pub(crate) fn token(&self) -> &SyntaxToken {
&self.token
}
pub(crate) fn text_range(&self) -> TextRange {
let length = self.green_trivia().text_len();
let token_range = self.token.text_range();
match self.is_leading {
true => TextRange::at(token_range.start(), length),
false => TextRange::at(token_range.end() - length, length),
}
}
/// Get the number of TriviaPiece inside this trivia
pub(crate) fn len(&self) -> usize {
self.green_trivia().len()
}
/// Gets index-th trivia piece when the token associated with this trivia was created.
/// See [SyntaxTriviaPiece].
pub(crate) fn get_piece(&self, index: usize) -> Option<&TriviaPiece> {
self.green_trivia().get_piece(index)
}
fn green_trivia(&self) -> &GreenTrivia {
match self.is_leading {
true => self.token.green().leading_trivia(),
false => self.token.green().trailing_trivia(),
}
}
/// Returns the last trivia piece element
pub(crate) fn last(&self) -> Option<&TriviaPiece> {
self.green_trivia().pieces().last()
}
/// Returns the first trivia piece element
pub(crate) fn first(&self) -> Option<&TriviaPiece> {
self.green_trivia().pieces().first()
}
/// Iterate over all pieces of the trivia. The iterator returns the offset
/// of the trivia as [TextSize] and its data as [Trivia], which contains its length.
/// See [SyntaxTriviaPiece].
pub(crate) fn pieces(&self) -> SyntaxTriviaPiecesIterator {
let range = self.text_range();
SyntaxTriviaPiecesIterator {
raw: self.clone(),
next_index: 0,
next_offset: range.start(),
end_index: self.len(),
end_offset: range.end(),
}
}
}
impl fmt::Debug for SyntaxTrivia {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut f = f.debug_struct("SyntaxTrivia");
f.field("text_range", &self.text_range());
f.finish()
}
}
impl fmt::Display for SyntaxTrivia {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self.text(), f)
}
}
#[derive(Clone)]
pub struct SyntaxTriviaPiecesIterator {
pub(crate) raw: SyntaxTrivia,
pub(crate) next_index: usize,
pub(crate) next_offset: TextSize,
pub(crate) end_index: usize,
pub(crate) end_offset: TextSize,
}
impl Iterator for SyntaxTriviaPiecesIterator {
type Item = (TextSize, TriviaPiece);
fn next(&mut self) -> Option<Self::Item> {
let trivia = self.raw.get_piece(self.next_index)?;
let piece = (self.next_offset, *trivia);
self.next_index += 1;
self.next_offset += trivia.text_len();
Some(piece)
}
fn size_hint(&self) -> (usize, Option<usize>) {
let len = self.end_index.saturating_sub(self.next_index);
(len, Some(len))
}
}
impl FusedIterator for SyntaxTriviaPiecesIterator {}
impl DoubleEndedIterator for SyntaxTriviaPiecesIterator {
fn next_back(&mut self) -> Option<Self::Item> {
if self.end_index == self.next_index {
return None;
}
self.end_index -= 1;
let trivia = self.raw.get_piece(self.end_index)?;
self.end_offset -= trivia.text_len();
Some((self.end_offset, *trivia))
}
}
impl ExactSizeIterator for SyntaxTriviaPiecesIterator {}
#[cfg(test)]
mod tests {
use crate::raw_language::{RawLanguage, RawLanguageKind, RawSyntaxTreeBuilder};
use crate::{SyntaxNode, TriviaPiece, TriviaPieceKind};
#[test]
fn trivia_text() {
let mut builder = RawSyntaxTreeBuilder::new();
builder.start_node(RawLanguageKind::ROOT);
builder.token_with_trivia(
RawLanguageKind::WHITESPACE,
"\t let \t\t",
&[TriviaPiece::new(TriviaPieceKind::Whitespace, 2)],
&[TriviaPiece::new(TriviaPieceKind::Whitespace, 3)],
);
builder.finish_node();
let root = builder.finish_green();
let syntax: SyntaxNode<RawLanguage> = SyntaxNode::new_root(root);
let token = syntax.first_token().unwrap();
assert_eq!(token.leading_trivia().text(), "\t ");
assert_eq!(token.trailing_trivia().text(), " \t\t");
}
}

View File

@ -1,47 +0,0 @@
mod element;
mod node;
mod node_cache;
mod token;
mod trivia;
pub(crate) use self::{
element::{GreenElement, GreenElementRef},
node::{Child, Children, GreenNode, GreenNodeData, Slot},
token::{GreenToken, GreenTokenData},
trivia::GreenTrivia,
};
pub use self::node_cache::NodeCache;
pub(crate) use self::node_cache::NodeCacheNodeEntryMut;
/// RawSyntaxKind is a type tag for each token or node.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct RawSyntaxKind(pub u16);
pub(crate) fn has_live() -> bool {
node::has_live() || token::has_live() || trivia::has_live()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::green::trivia::GreenTrivia;
#[test]
fn assert_send_sync() {
fn f<T: Send + Sync>() {}
f::<GreenNode>();
f::<GreenToken>();
f::<GreenElement>();
}
#[test]
fn test_size_of() {
use std::mem::size_of;
assert_eq!(8, size_of::<GreenNode>());
assert_eq!(8, size_of::<GreenToken>());
assert_eq!(8, size_of::<GreenTrivia>());
assert_eq!(16, size_of::<GreenElement>());
}
}

View File

@ -1,94 +0,0 @@
use crate::{
green::{GreenNode, GreenToken, RawSyntaxKind},
GreenNodeData, NodeOrToken, TextSize,
};
use std::borrow::Cow;
use super::GreenTokenData;
pub(crate) type GreenElement = NodeOrToken<GreenNode, GreenToken>;
pub(crate) type GreenElementRef<'a> = NodeOrToken<&'a GreenNodeData, &'a GreenTokenData>;
impl From<GreenNode> for GreenElement {
#[inline]
fn from(node: GreenNode) -> GreenElement {
NodeOrToken::Node(node)
}
}
impl<'a> From<&'a GreenNode> for GreenElementRef<'a> {
#[inline]
fn from(node: &'a GreenNode) -> GreenElementRef<'a> {
NodeOrToken::Node(node)
}
}
impl From<GreenToken> for GreenElement {
#[inline]
fn from(token: GreenToken) -> GreenElement {
NodeOrToken::Token(token)
}
}
impl From<Cow<'_, GreenNodeData>> for GreenElement {
#[inline]
fn from(cow: Cow<'_, GreenNodeData>) -> Self {
NodeOrToken::Node(cow.into_owned())
}
}
impl<'a> From<&'a GreenToken> for GreenElementRef<'a> {
#[inline]
fn from(token: &'a GreenToken) -> GreenElementRef<'a> {
NodeOrToken::Token(token)
}
}
impl GreenElementRef<'_> {
pub fn to_owned(self) -> GreenElement {
match self {
NodeOrToken::Node(it) => NodeOrToken::Node(it.to_owned()),
NodeOrToken::Token(it) => NodeOrToken::Token(it.to_owned()),
}
}
}
impl GreenElement {
/// Returns kind of this element.
#[inline]
pub fn kind(&self) -> RawSyntaxKind {
match self {
NodeOrToken::Node(node) => node.kind(),
NodeOrToken::Token(token) => token.kind(),
}
}
/// Returns the length of the text covered by this element.
#[inline]
pub fn text_len(&self) -> TextSize {
match self {
NodeOrToken::Token(token) => token.text_len(),
NodeOrToken::Node(node) => node.text_len(),
}
}
}
impl GreenElementRef<'_> {
/// Returns kind of this element.
#[inline]
pub fn kind(&self) -> RawSyntaxKind {
match self {
NodeOrToken::Node(it) => it.kind(),
NodeOrToken::Token(it) => it.kind(),
}
}
/// Returns the length of the text covered by this element.
#[inline]
pub fn text_len(self) -> TextSize {
match self {
NodeOrToken::Node(it) => it.text_len(),
NodeOrToken::Token(it) => it.text_len(),
}
}
}

View File

@ -1,546 +0,0 @@
use std::convert::TryFrom;
use std::fmt::Formatter;
use std::iter::Enumerate;
use std::{
borrow::{Borrow, Cow},
fmt,
iter::FusedIterator,
mem::{self, ManuallyDrop},
ops, ptr, slice,
};
#[cfg(target_pointer_width = "64")]
use crate::utility_types::static_assert;
use countme::Count;
use crate::{
arc::{Arc, HeaderSlice, ThinArc},
green::{GreenElement, GreenElementRef, RawSyntaxKind},
GreenToken, NodeOrToken, TextRange, TextSize,
};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub(super) struct GreenNodeHead {
kind: RawSyntaxKind,
text_len: TextSize,
_c: Count<GreenNode>,
}
pub(crate) fn has_live() -> bool {
countme::get::<GreenNode>().live > 0
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub(crate) enum Slot {
Node {
rel_offset: TextSize,
node: GreenNode,
},
Token {
rel_offset: TextSize,
token: GreenToken,
},
/// An empty slot for a child that was missing in the source because:
/// * it's an optional child which is missing for this node
/// * it's a mandatory child but it's missing because of a syntax error
Empty { rel_offset: TextSize },
}
impl std::fmt::Display for Slot {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match self {
Slot::Empty { .. } => write!(f, ""),
Slot::Node { node, .. } => std::fmt::Display::fmt(node, f),
Slot::Token { token, .. } => std::fmt::Display::fmt(token, f),
}
}
}
#[cfg(target_pointer_width = "64")]
static_assert!(mem::size_of::<Slot>() == mem::size_of::<usize>() * 2);
type Repr = HeaderSlice<GreenNodeHead, [Slot]>;
type ReprThin = HeaderSlice<GreenNodeHead, [Slot; 0]>;
#[repr(transparent)]
pub(crate) struct GreenNodeData {
data: ReprThin,
}
impl PartialEq for GreenNodeData {
fn eq(&self, other: &Self) -> bool {
self.header() == other.header() && self.slice() == other.slice()
}
}
/// Internal node in the immutable tree.
/// It has other nodes and tokens as children.
#[derive(Clone, PartialEq, Eq, Hash)]
#[repr(transparent)]
pub(crate) struct GreenNode {
ptr: ThinArc<GreenNodeHead, Slot>,
}
impl ToOwned for GreenNodeData {
type Owned = GreenNode;
#[inline]
fn to_owned(&self) -> GreenNode {
unsafe {
let green = GreenNode::from_raw(ptr::NonNull::from(self));
let green = ManuallyDrop::new(green);
GreenNode::clone(&green)
}
}
}
impl Borrow<GreenNodeData> for GreenNode {
#[inline]
fn borrow(&self) -> &GreenNodeData {
self
}
}
impl From<Cow<'_, GreenNodeData>> for GreenNode {
#[inline]
fn from(cow: Cow<'_, GreenNodeData>) -> Self {
cow.into_owned()
}
}
impl From<&'_ GreenNodeData> for GreenNode {
#[inline]
fn from(borrow: &'_ GreenNodeData) -> Self {
borrow.to_owned()
}
}
impl fmt::Debug for GreenNodeData {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("GreenNode")
.field("kind", &self.kind())
.field("text_len", &self.text_len())
.field("n_slots", &self.slots().len())
.finish()
}
}
impl fmt::Debug for GreenNode {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let data: &GreenNodeData = self;
fmt::Debug::fmt(data, f)
}
}
impl fmt::Display for GreenNode {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let data: &GreenNodeData = self;
fmt::Display::fmt(data, f)
}
}
impl fmt::Display for GreenNodeData {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for child in self.slots() {
write!(f, "{}", child)?;
}
Ok(())
}
}
impl GreenNodeData {
#[inline]
fn header(&self) -> &GreenNodeHead {
&self.data.header
}
#[inline]
pub(crate) fn slice(&self) -> &[Slot] {
self.data.slice()
}
/// Kind of this node.
#[inline]
pub fn kind(&self) -> RawSyntaxKind {
self.header().kind
}
/// Returns the length of the text covered by this node.
#[inline]
pub fn text_len(&self) -> TextSize {
self.header().text_len
}
/// Children of this node.
#[inline]
pub fn children(&self) -> Children<'_> {
Children::new(self.slots().enumerate())
}
/// Returns the slots of this node. Every node of a specific kind has the same number of slots
/// to allow using fixed offsets to retrieve a specific child even if some other child is missing.
#[inline]
pub fn slots(&self) -> Slots<'_> {
Slots {
raw: self.slice().iter(),
}
}
pub(crate) fn slot_at_range(
&self,
rel_range: TextRange,
) -> Option<(usize, TextSize, &'_ Slot)> {
let idx = self
.slice()
.binary_search_by(|it| {
let child_range = it.rel_range();
TextRange::ordering(child_range, rel_range)
})
// XXX: this handles empty ranges
.unwrap_or_else(|it| it.saturating_sub(1));
let slot = &self
.slice()
.get(idx)
.filter(|it| it.rel_range().contains_range(rel_range))?;
Some((idx, slot.rel_offset(), slot))
}
#[must_use = "syntax elements are immutable, the result of update methods must be propagated to have any effect"]
pub(crate) fn splice_slots<R, I>(&self, range: R, replace_with: I) -> GreenNode
where
R: ops::RangeBounds<usize>,
I: Iterator<Item = Option<GreenElement>>,
{
let mut slots: Vec<_> = self
.slots()
.map(|slot| match slot {
Slot::Empty { .. } => None,
Slot::Node { node, .. } => Some(NodeOrToken::Node(node.to_owned())),
Slot::Token { token, .. } => Some(NodeOrToken::Token(token.to_owned())),
})
.collect();
slots.splice(range, replace_with);
GreenNode::new(self.kind(), slots)
}
}
impl ops::Deref for GreenNode {
type Target = GreenNodeData;
#[inline]
fn deref(&self) -> &GreenNodeData {
unsafe {
let repr: &Repr = &self.ptr;
let repr: &ReprThin = &*(repr as *const Repr as *const ReprThin);
mem::transmute::<&ReprThin, &GreenNodeData>(repr)
}
}
}
impl GreenNode {
/// Creates new Node.
#[inline]
pub fn new<I>(kind: RawSyntaxKind, slots: I) -> GreenNode
where
I: IntoIterator<Item = Option<GreenElement>>,
I::IntoIter: ExactSizeIterator,
{
let mut text_len: TextSize = 0.into();
let slots = slots.into_iter().map(|el| {
let rel_offset = text_len;
match el {
Some(el) => {
text_len += el.text_len();
match el {
NodeOrToken::Node(node) => Slot::Node { rel_offset, node },
NodeOrToken::Token(token) => Slot::Token { rel_offset, token },
}
}
None => Slot::Empty { rel_offset },
}
});
let data = ThinArc::from_header_and_iter(
GreenNodeHead {
kind,
text_len: 0.into(),
_c: Count::new(),
},
slots,
);
// XXX: fixup `text_len` after construction, because we can't iterate
// `slots` twice.
let data = {
let mut data = Arc::from_thin(data);
Arc::get_mut(&mut data).unwrap().header.text_len = text_len;
Arc::into_thin(data)
};
GreenNode { ptr: data }
}
#[inline]
pub(crate) unsafe fn from_raw(ptr: ptr::NonNull<GreenNodeData>) -> GreenNode {
let arc = Arc::from_raw(&ptr.as_ref().data as *const ReprThin);
let arc = mem::transmute::<Arc<ReprThin>, ThinArc<GreenNodeHead, Slot>>(arc);
GreenNode { ptr: arc }
}
}
impl Slot {
#[inline]
pub(crate) fn as_ref(&self) -> Option<GreenElementRef> {
match self {
Slot::Node { node, .. } => Some(NodeOrToken::Node(node)),
Slot::Token { token, .. } => Some(NodeOrToken::Token(token)),
Slot::Empty { .. } => None,
}
}
#[inline]
pub(crate) fn rel_offset(&self) -> TextSize {
match self {
Slot::Node { rel_offset, .. }
| Slot::Token { rel_offset, .. }
| Slot::Empty { rel_offset } => *rel_offset,
}
}
#[inline]
fn rel_range(&self) -> TextRange {
let text_len = match self.as_ref() {
None => TextSize::from(0),
Some(element) => element.text_len(),
};
TextRange::at(self.rel_offset(), text_len)
}
}
#[derive(Debug, Clone)]
pub(crate) struct Slots<'a> {
pub(crate) raw: slice::Iter<'a, Slot>,
}
// NB: forward everything stable that iter::Slice specializes as of Rust 1.39.0
impl ExactSizeIterator for Slots<'_> {
#[inline(always)]
fn len(&self) -> usize {
self.raw.len()
}
}
impl<'a> Iterator for Slots<'a> {
type Item = &'a Slot;
#[inline]
fn next(&mut self) -> Option<&'a Slot> {
self.raw.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.raw.size_hint()
}
#[inline]
fn count(self) -> usize
where
Self: Sized,
{
self.raw.count()
}
#[inline]
fn last(mut self) -> Option<Self::Item>
where
Self: Sized,
{
self.next_back()
}
#[inline]
fn nth(&mut self, n: usize) -> Option<Self::Item> {
self.raw.nth(n)
}
#[inline]
fn fold<Acc, Fold>(self, init: Acc, mut f: Fold) -> Acc
where
Fold: FnMut(Acc, Self::Item) -> Acc,
{
let mut accum = init;
for x in self {
accum = f(accum, x);
}
accum
}
}
impl<'a> DoubleEndedIterator for Slots<'a> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
self.raw.next_back()
}
#[inline]
fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
self.raw.nth_back(n)
}
#[inline]
fn rfold<Acc, Fold>(mut self, init: Acc, mut f: Fold) -> Acc
where
Fold: FnMut(Acc, Self::Item) -> Acc,
{
let mut accum = init;
while let Some(x) = self.next_back() {
accum = f(accum, x);
}
accum
}
}
impl FusedIterator for Slots<'_> {}
#[derive(Debug, Clone)]
pub(crate) struct Child<'a> {
slot: u32,
rel_offset: TextSize,
element: GreenElementRef<'a>,
}
impl<'a> Child<'a> {
pub fn slot(&self) -> u32 {
self.slot
}
pub fn rel_offset(&self) -> TextSize {
self.rel_offset
}
pub fn element(&self) -> GreenElementRef<'a> {
self.element
}
}
impl<'a> TryFrom<(usize, &'a Slot)> for Child<'a> {
type Error = ();
fn try_from((index, slot): (usize, &'a Slot)) -> Result<Self, Self::Error> {
match slot {
Slot::Empty { .. } => Err(()),
Slot::Node { node, rel_offset } => Ok(Child {
element: NodeOrToken::Node(node),
slot: index as u32,
rel_offset: *rel_offset,
}),
Slot::Token { token, rel_offset } => Ok(Child {
element: NodeOrToken::Token(token),
slot: index as u32,
rel_offset: *rel_offset,
}),
}
}
}
#[derive(Debug, Clone)]
pub(crate) struct Children<'a> {
slots: Enumerate<Slots<'a>>,
}
impl<'a> Children<'a> {
pub fn new(slots: Enumerate<Slots<'a>>) -> Self {
Self { slots }
}
}
impl<'a> Iterator for Children<'a> {
type Item = Child<'a>;
fn next(&mut self) -> Option<Self::Item> {
self.slots.find_map(|it| Child::try_from(it).ok())
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.slots.size_hint()
}
}
impl<'a> DoubleEndedIterator for Children<'a> {
fn next_back(&mut self) -> Option<Self::Item> {
loop {
let next = self.slots.next_back()?;
if let Ok(child) = Child::try_from(next) {
return Some(child);
}
}
}
}
impl FusedIterator for Children<'_> {}
#[cfg(test)]
mod tests {
use crate::raw_language::{RawLanguageKind, RawSyntaxTreeBuilder};
use crate::GreenNode;
fn build_test_list() -> GreenNode {
let mut builder: RawSyntaxTreeBuilder = RawSyntaxTreeBuilder::new();
// list
builder.start_node(RawLanguageKind::SEPARATED_EXPRESSION_LIST);
// element 1
builder.start_node(RawLanguageKind::LITERAL_EXPRESSION);
builder.token(RawLanguageKind::STRING_TOKEN, "a");
builder.finish_node();
// Missing ,
// element 2
builder.start_node(RawLanguageKind::LITERAL_EXPRESSION);
builder.token(RawLanguageKind::STRING_TOKEN, "b");
builder.finish_node();
builder.finish_node();
builder.finish_green()
}
#[test]
fn children() {
let root = build_test_list();
// Test that children skips missing
assert_eq!(root.children().count(), 2);
assert_eq!(
root.children()
.map(|child| child.element.to_string())
.collect::<Vec<_>>(),
vec!["a", "b"]
);
// Slot 2 (index 1) is empty
assert_eq!(
root.children().map(|child| child.slot).collect::<Vec<_>>(),
vec![0, 2]
);
// Same when reverse
assert_eq!(
root.children()
.rev()
.map(|child| child.slot)
.collect::<Vec<_>>(),
vec![2, 0]
);
}
#[test]
fn slots() {
let root = build_test_list();
// Has 3 slots, one is missing
assert_eq!(root.slots().len(), 3);
}
}

View File

@ -1,358 +0,0 @@
use hashbrown::hash_map::{RawEntryMut, RawOccupiedEntryMut, RawVacantEntryMut};
use ruff_text_size::TextSize;
use rustc_hash::FxHasher;
use std::hash::{BuildHasherDefault, Hash, Hasher};
use crate::green::Slot;
use crate::syntax::{TriviaPiece, TriviaPieceKind};
use crate::{
green::GreenElementRef, GreenNode, GreenNodeData, GreenToken, GreenTokenData, NodeOrToken,
RawSyntaxKind,
};
use super::element::GreenElement;
use super::trivia::GreenTrivia;
type HashMap<K, V> = hashbrown::HashMap<K, V, BuildHasherDefault<FxHasher>>;
/// A token stored in the `NodeCache`.
/// Does intentionally not implement `Hash` to have compile-time guarantees that the `NodeCache`
/// uses the correct hash.
#[derive(Debug)]
struct CachedToken(GreenToken);
/// A node stored in the `NodeCache`. It stores a pre-computed hash
/// because re-computing the hash requires traversing the whole sub-tree.
/// The hash also differs from the `GreenNode` hash implementation as it
/// only hashes occupied slots and excludes empty slots.
///
/// Does intentionally not implement `Hash` to have compile-time guarantees that the `NodeCache`
/// uses the correct hash.
#[derive(Debug)]
struct CachedNode {
node: GreenNode,
// Store the hash as it's expensive to re-compute
// involves re-computing the hash of the whole sub-tree
hash: u64,
}
/// Interner for GreenTokens and GreenNodes
// XXX: the impl is a bit tricky. As usual when writing interners, we want to
// store all values in one HashSet.
//
// However, hashing trees is fun: hash of the tree is recursively defined. We
// maintain an invariant -- if the tree is interned, then all of its children
// are interned as well.
//
// That means that computing the hash naively is wasteful -- we just *know*
// hashes of children, and we can re-use those.
//
// So here we use *raw* API of hashbrown and provide the hashes manually,
// instead of going via a `Hash` impl. Our manual `Hash` and the
// `#[derive(Hash)]` are actually different! At some point we had a fun bug,
// where we accidentally mixed the two hashes, which made the cache much less
// efficient.
//
// To fix that, we additionally wrap the data in `Cached*` wrappers, to make sure
// we don't accidentally use the wrong hash!
#[derive(Default, Debug)]
pub struct NodeCache {
nodes: HashMap<CachedNode, ()>,
tokens: HashMap<CachedToken, ()>,
trivia: TriviaCache,
}
fn token_hash_of(kind: RawSyntaxKind, text: &str) -> u64 {
let mut h = FxHasher::default();
kind.hash(&mut h);
text.hash(&mut h);
h.finish()
}
fn token_hash(token: &GreenTokenData) -> u64 {
token_hash_of(token.kind(), token.text())
}
fn element_id(elem: GreenElementRef<'_>) -> *const () {
match elem {
NodeOrToken::Node(it) => it as *const GreenNodeData as *const (),
NodeOrToken::Token(it) => it as *const GreenTokenData as *const (),
}
}
impl NodeCache {
/// Hash used for nodes that haven't been cached because it has too many slots or
/// one of its children wasn't cached.
const UNCACHED_NODE_HASH: u64 = 0;
/// Tries to retrieve a node with the given `kind` and `children` from the cache.
///
/// Returns an entry that allows the caller to:
/// * Retrieve the cached node if it is present in the cache
/// * Insert a node if it isn't present in the cache
pub(crate) fn node(
&mut self,
kind: RawSyntaxKind,
children: &[(u64, GreenElement)],
) -> NodeCacheNodeEntryMut {
if children.len() > 3 {
return NodeCacheNodeEntryMut::NoCache(Self::UNCACHED_NODE_HASH);
}
let hash = {
let mut h = FxHasher::default();
kind.hash(&mut h);
for &(hash, _) in children {
if hash == Self::UNCACHED_NODE_HASH {
return NodeCacheNodeEntryMut::NoCache(Self::UNCACHED_NODE_HASH);
}
hash.hash(&mut h);
}
h.finish()
};
// Green nodes are fully immutable, so it's ok to deduplicate them.
// This is the same optimization that Roslyn does
// https://github.com/KirillOsenkov/Bliki/wiki/Roslyn-Immutable-Trees
//
// For example, all `#[inline]` in this file share the same green node!
// For `libsyntax/parse/parser.rs`, measurements show that deduping saves
// 17% of the memory for green nodes!
let entry = self.nodes.raw_entry_mut().from_hash(hash, |no_hash| {
no_hash.node.kind() == kind && {
let lhs = no_hash.node.slots().filter_map(|slot| match slot {
// Ignore empty slots. The queried node only has the present children
Slot::Empty { .. } => None,
Slot::Node { node, .. } => Some(element_id(NodeOrToken::Node(node))),
Slot::Token { token, .. } => Some(element_id(NodeOrToken::Token(token))),
});
let rhs = children
.iter()
.map(|(_, element)| element_id(element.as_deref()));
lhs.eq(rhs)
}
});
match entry {
RawEntryMut::Occupied(entry) => NodeCacheNodeEntryMut::Cached(CachedNodeEntry {
hash,
raw_entry: entry,
}),
RawEntryMut::Vacant(entry) => NodeCacheNodeEntryMut::Vacant(VacantNodeEntry {
raw_entry: entry,
original_kind: kind,
hash,
}),
}
}
pub(crate) fn token(&mut self, kind: RawSyntaxKind, text: &str) -> (u64, GreenToken) {
self.token_with_trivia(kind, text, &[], &[])
}
pub(crate) fn token_with_trivia(
&mut self,
kind: RawSyntaxKind,
text: &str,
leading: &[TriviaPiece],
trailing: &[TriviaPiece],
) -> (u64, GreenToken) {
let hash = token_hash_of(kind, text);
let entry = self.tokens.raw_entry_mut().from_hash(hash, |token| {
token.0.kind() == kind && token.0.text() == text
});
let token = match entry {
RawEntryMut::Occupied(entry) => entry.key().0.clone(),
RawEntryMut::Vacant(entry) => {
let leading = self.trivia.get(leading);
let trailing = self.trivia.get(trailing);
let token = GreenToken::with_trivia(kind, text, leading, trailing);
entry
.insert_with_hasher(hash, CachedToken(token.clone()), (), |t| token_hash(&t.0));
token
}
};
(hash, token)
}
}
pub(crate) enum NodeCacheNodeEntryMut<'a> {
Cached(CachedNodeEntry<'a>),
/// A node that should not be cached
NoCache(u64),
Vacant(VacantNodeEntry<'a>),
}
/// Represents a vacant entry, a node that hasn't been cached yet.
/// The `insert` method allows to place a node inside of the vacant entry. The inserted node
/// may have a different representation (kind or children) than the originally queried node.
/// For example, a node may change its kind to bogus or add empty slots. The only importance is
/// that these changes apply for all nodes that have the same shape as the originally queried node.
pub(crate) struct VacantNodeEntry<'a> {
hash: u64,
original_kind: RawSyntaxKind,
raw_entry: RawVacantEntryMut<'a, CachedNode, (), BuildHasherDefault<FxHasher>>,
}
/// Represents an entry of a cached node.
pub(crate) struct CachedNodeEntry<'a> {
hash: u64,
raw_entry: RawOccupiedEntryMut<'a, CachedNode, (), BuildHasherDefault<FxHasher>>,
}
impl<'a> CachedNodeEntry<'a> {
pub fn node(&self) -> &GreenNode {
&self.raw_entry.key().node
}
pub fn hash(&self) -> u64 {
self.hash
}
}
impl<'a> VacantNodeEntry<'a> {
/// Inserts the `node` into the cache so that future queries for the same kind and children resolve to the passed `node`.
///
/// Returns the hash of the node.
///
/// The cache does not cache the `node` if the kind doesn't match the `kind` of the queried node because
/// cache lookups wouldn't be successful because the hash collision prevention check compares the kinds of the
/// cached and queried node.
pub fn cache(self, node: GreenNode) -> u64 {
if self.original_kind != node.kind() {
// The kind has changed since it has been queried. For example, the node has been converted to an
// unknown node. Never cache these nodes because cache lookups will never match.
NodeCache::UNCACHED_NODE_HASH
} else {
self.raw_entry.insert_with_hasher(
self.hash,
CachedNode {
node,
hash: self.hash,
},
(),
|n| n.hash,
);
self.hash
}
}
}
/// A cached [GreenTrivia].
/// Deliberately doesn't implement `Hash` to make sure all
/// usages go through the custom `FxHasher`.
#[derive(Debug)]
struct CachedTrivia(GreenTrivia);
#[derive(Debug)]
struct TriviaCache {
/// Generic cache for trivia
cache: HashMap<CachedTrivia, ()>,
/// Cached single whitespace trivia.
whitespace: GreenTrivia,
}
impl Default for TriviaCache {
fn default() -> Self {
Self {
cache: Default::default(),
whitespace: GreenTrivia::new([TriviaPiece::whitespace(1)]),
}
}
}
impl TriviaCache {
/// Tries to retrieve a [GreenTrivia] with the given pieces from the cache or creates a new one and caches
/// it for further calls.
fn get(&mut self, pieces: &[TriviaPiece]) -> GreenTrivia {
match pieces {
[] => GreenTrivia::empty(),
[TriviaPiece {
kind: TriviaPieceKind::Whitespace,
length,
}] if *length == TextSize::from(1) => self.whitespace.clone(),
_ => {
let hash = Self::trivia_hash_of(pieces);
let entry = self
.cache
.raw_entry_mut()
.from_hash(hash, |trivia| trivia.0.pieces() == pieces);
match entry {
RawEntryMut::Occupied(entry) => entry.key().0.clone(),
RawEntryMut::Vacant(entry) => {
let trivia = GreenTrivia::new(pieces.iter().copied());
entry.insert_with_hasher(
hash,
CachedTrivia(trivia.clone()),
(),
|cached| Self::trivia_hash_of(cached.0.pieces()),
);
trivia
}
}
}
}
}
fn trivia_hash_of(pieces: &[TriviaPiece]) -> u64 {
let mut h = FxHasher::default();
pieces.len().hash(&mut h);
for piece in pieces {
piece.hash(&mut h);
}
h.finish()
}
}
#[cfg(test)]
mod tests {
use crate::green::node_cache::token_hash;
use crate::green::trivia::GreenTrivia;
use crate::{GreenToken, RawSyntaxKind};
use ruff_text_size::TextSize;
#[test]
fn green_token_hash() {
let kind = RawSyntaxKind(0);
let text = " let ";
let t1 = GreenToken::with_trivia(
kind,
text,
GreenTrivia::whitespace(TextSize::from(1)),
GreenTrivia::whitespace(TextSize::from(1)),
);
let t2 = GreenToken::with_trivia(
kind,
text,
GreenTrivia::whitespace(1),
GreenTrivia::whitespace(1),
);
assert_eq!(token_hash(&t1), token_hash(&t2));
let t3 = GreenToken::new(kind, "let");
assert_ne!(token_hash(&t1), token_hash(&t3));
let t4 = GreenToken::with_trivia(
kind,
"\tlet ",
GreenTrivia::whitespace(1),
GreenTrivia::whitespace(1),
);
assert_ne!(token_hash(&t1), token_hash(&t4));
}
}

View File

@ -1,235 +0,0 @@
use std::{
borrow::Borrow,
fmt,
mem::{self, ManuallyDrop},
ops, ptr,
};
use countme::Count;
use crate::green::trivia::GreenTrivia;
use crate::{
arc::{Arc, HeaderSlice, ThinArc},
green::RawSyntaxKind,
TextSize,
};
#[derive(PartialEq, Eq, Hash)]
struct GreenTokenHead {
kind: RawSyntaxKind,
leading: GreenTrivia,
trailing: GreenTrivia,
_c: Count<GreenToken>,
}
pub(crate) fn has_live() -> bool {
countme::get::<GreenToken>().live > 0
}
type Repr = HeaderSlice<GreenTokenHead, [u8]>;
type ReprThin = HeaderSlice<GreenTokenHead, [u8; 0]>;
#[repr(transparent)]
pub(crate) struct GreenTokenData {
data: ReprThin,
}
impl PartialEq for GreenTokenData {
fn eq(&self, other: &Self) -> bool {
self.kind() == other.kind() && self.text() == other.text()
}
}
/// Leaf node in the immutable tree.
#[derive(PartialEq, Eq, Hash, Clone)]
#[repr(transparent)]
pub(crate) struct GreenToken {
ptr: ThinArc<GreenTokenHead, u8>,
}
impl ToOwned for GreenTokenData {
type Owned = GreenToken;
#[inline]
fn to_owned(&self) -> GreenToken {
unsafe {
let green = GreenToken::from_raw(ptr::NonNull::from(self));
let green = ManuallyDrop::new(green);
GreenToken::clone(&green)
}
}
}
impl Borrow<GreenTokenData> for GreenToken {
#[inline]
fn borrow(&self) -> &GreenTokenData {
self
}
}
impl fmt::Debug for GreenTokenData {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("GreenToken")
.field("kind", &self.kind())
.field("text", &self.text())
.field("leading", &self.leading_trivia())
.field("trailing", &self.trailing_trivia())
.finish()
}
}
impl fmt::Debug for GreenToken {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let data: &GreenTokenData = self;
fmt::Debug::fmt(data, f)
}
}
impl fmt::Display for GreenToken {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let data: &GreenTokenData = self;
fmt::Display::fmt(data, f)
}
}
impl fmt::Display for GreenTokenData {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.text())
}
}
impl GreenTokenData {
/// Kind of this Token.
#[inline]
pub fn kind(&self) -> RawSyntaxKind {
self.data.header.kind
}
/// Whole text of this Token, including all trivia.
#[inline]
pub fn text(&self) -> &str {
unsafe { std::str::from_utf8_unchecked(self.data.slice()) }
}
pub(crate) fn leading_trailing_total_len(&self) -> (TextSize, TextSize, TextSize) {
let leading_len = self.data.header.leading.text_len();
let trailing_len = self.data.header.trailing.text_len();
let total_len = self.data.slice().len() as u32;
(leading_len, trailing_len, total_len.into())
}
/// Text of this Token, excluding all trivia.
#[inline]
pub fn text_trimmed(&self) -> &str {
let (leading_len, trailing_len, total_len) = self.leading_trailing_total_len();
let start: usize = leading_len.into();
let end: usize = (total_len - trailing_len).into();
let text = unsafe { std::str::from_utf8_unchecked(self.data.slice()) };
&text[start..end]
}
/// Returns the length of the text covered by this token.
#[inline]
pub fn text_len(&self) -> TextSize {
TextSize::of(self.text())
}
#[inline]
pub fn leading_trivia(&self) -> &GreenTrivia {
&self.data.header.leading
}
#[inline]
pub fn trailing_trivia(&self) -> &GreenTrivia {
&self.data.header.trailing
}
}
impl GreenToken {
#[inline]
#[cfg(test)]
pub fn new(kind: RawSyntaxKind, text: &str) -> GreenToken {
let leading = GreenTrivia::empty();
let trailing = leading.clone();
Self::with_trivia(kind, text, leading, trailing)
}
#[inline]
pub fn with_trivia(
kind: RawSyntaxKind,
text: &str,
leading: GreenTrivia,
trailing: GreenTrivia,
) -> GreenToken {
let head = GreenTokenHead {
kind,
leading,
trailing,
_c: Count::new(),
};
let ptr = ThinArc::from_header_and_iter(head, text.bytes());
GreenToken { ptr }
}
#[inline]
pub(crate) unsafe fn from_raw(ptr: ptr::NonNull<GreenTokenData>) -> GreenToken {
let arc = Arc::from_raw(&ptr.as_ref().data as *const ReprThin);
let arc = mem::transmute::<Arc<ReprThin>, ThinArc<GreenTokenHead, u8>>(arc);
GreenToken { ptr: arc }
}
}
impl ops::Deref for GreenToken {
type Target = GreenTokenData;
#[inline]
fn deref(&self) -> &GreenTokenData {
unsafe {
let repr: &Repr = &self.ptr;
let repr: &ReprThin = &*(repr as *const Repr as *const ReprThin);
mem::transmute::<&ReprThin, &GreenTokenData>(repr)
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use quickcheck_macros::*;
#[test]
fn green_token_text_and_len() {
let t = GreenToken::with_trivia(
RawSyntaxKind(0),
"\n\t let \t\t",
GreenTrivia::whitespace(3),
GreenTrivia::whitespace(3),
);
assert_eq!("\n\t let \t\t", t.text());
assert_eq!(TextSize::from(9), t.text_len());
assert_eq!("let", t.text_trimmed());
assert_eq!("\n\t let \t\t", format!("{}", t));
}
#[test]
fn empty_text_len() {
assert_eq!(TextSize::from(0), GreenTrivia::empty().text_len());
}
#[quickcheck]
fn whitespace_and_comments_text_len(len: u32) {
let len = TextSize::from(len);
assert_eq!(len, GreenTrivia::whitespace(len).text_len());
assert_eq!(len, GreenTrivia::single_line_comment(len).text_len());
}
#[test]
fn sizes() {
assert_eq!(24, std::mem::size_of::<GreenTokenHead>());
assert_eq!(8, std::mem::size_of::<GreenToken>());
}
}

View File

@ -1,148 +0,0 @@
use crate::arc::{HeaderSlice, ThinArc};
use crate::TriviaPiece;
use countme::Count;
use ruff_text_size::TextSize;
use std::fmt;
use std::fmt::Formatter;
#[derive(PartialEq, Eq, Hash)]
pub(crate) struct GreenTriviaHead {
_c: Count<GreenTrivia>,
}
pub(crate) fn has_live() -> bool {
countme::get::<GreenTrivia>().live > 0
}
type ReprThin = HeaderSlice<GreenTriviaHead, [TriviaPiece; 0]>;
#[repr(transparent)]
pub(crate) struct GreenTriviaData {
data: ReprThin,
}
impl GreenTriviaData {
#[allow(unused)]
#[inline]
pub fn header(&self) -> &GreenTriviaHead {
&self.data.header
}
#[inline]
pub fn pieces(&self) -> &[TriviaPiece] {
self.data.slice()
}
}
impl PartialEq for GreenTriviaData {
fn eq(&self, other: &Self) -> bool {
self.pieces() == other.pieces()
}
}
impl fmt::Debug for GreenTriviaData {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
f.debug_list().entries(self.pieces().iter()).finish()
}
}
/// List of trivia. Used to store either the leading or trailing trivia of a token.
/// The identity of a trivia is defined by the kinds and lengths of its items but not by
/// the texts of an individual piece. That means, that `\r` and `\n` can both be represented
/// by the same trivia, a trivia with a single `LINEBREAK` piece with the length 1.
/// This is safe because the text is stored on the token to which the trivia belongs and
/// `a\n` and `a\r` never resolve to the same tokens. Thus, they only share the trivia but are
/// otherwise two different tokens.
#[derive(Eq, PartialEq, Hash, Clone)]
#[repr(transparent)]
pub(crate) struct GreenTrivia {
ptr: Option<ThinArc<GreenTriviaHead, TriviaPiece>>,
}
impl fmt::Debug for GreenTrivia {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(self.pieces(), f)
}
}
impl GreenTrivia {
/// Creates a new trivia containing the passed in pieces
pub fn new<I>(pieces: I) -> Self
where
I: IntoIterator<Item = TriviaPiece>,
I::IntoIter: ExactSizeIterator,
{
let data =
ThinArc::from_header_and_iter(GreenTriviaHead { _c: Count::new() }, pieces.into_iter());
GreenTrivia { ptr: Some(data) }
}
/// Creates an empty trivia
pub fn empty() -> Self {
GreenTrivia { ptr: None }
}
/// Returns the total length of all pieces
pub fn text_len(&self) -> TextSize {
let mut len = TextSize::default();
for piece in self.pieces() {
len += piece.length
}
len
}
/// Returns the pieces count
pub fn len(&self) -> usize {
match &self.ptr {
None => 0,
Some(ptr) => ptr.len(),
}
}
/// Returns the pieces of the trivia
pub fn pieces(&self) -> &[TriviaPiece] {
match &self.ptr {
None => &[],
Some(ptr) => ptr.slice(),
}
}
/// Returns the piece at the given index.
pub fn get_piece(&self, index: usize) -> Option<&TriviaPiece> {
self.pieces().get(index)
}
}
#[cfg(test)]
mod tests {
use crate::green::trivia::{GreenTrivia, GreenTriviaHead};
use crate::syntax::TriviaPieceKind;
use crate::TriviaPiece;
use ruff_text_size::TextSize;
impl GreenTrivia {
/// Creates a trivia with a single whitespace piece
pub fn whitespace<L: Into<TextSize>>(len: L) -> Self {
Self::single(TriviaPieceKind::Whitespace, len.into())
}
/// Creates a trivia with one single line comment piece
pub fn single_line_comment<L: Into<TextSize>>(len: L) -> Self {
Self::single(TriviaPieceKind::SingleLineComment, len.into())
}
/// Creates a trivia containing a single piece
pub fn single<L: Into<TextSize>>(kind: TriviaPieceKind, len: L) -> Self {
Self::new(std::iter::once(TriviaPiece::new(kind, len)))
}
}
#[test]
fn sizes() {
assert_eq!(0, std::mem::size_of::<GreenTriviaHead>());
assert_eq!(8, std::mem::size_of::<GreenTrivia>());
}
}

View File

@ -1,63 +0,0 @@
//! A generic library for lossless syntax trees.
//! See `examples/s_expressions.rs` for a tutorial.
#![allow(clippy::pedantic)]
#![forbid(
// missing_debug_implementations,
unconditional_recursion,
future_incompatible,
// missing_docs,
)]
#![deny(unsafe_code)]
#![deny(rustdoc::broken_intra_doc_links)]
#[doc(hidden)]
pub mod macros;
#[allow(unsafe_code)]
pub mod cursor;
#[allow(unsafe_code)]
mod green;
pub mod syntax;
mod syntax_node_text;
mod utility_types;
#[allow(unsafe_code)]
mod arc;
mod ast;
mod cow_mut;
pub mod raw_language;
#[cfg(feature = "serde")]
mod serde_impls;
mod syntax_factory;
mod syntax_token_text;
mod tree_builder;
pub use ruff_text_size::{TextLen, TextRange, TextSize};
pub use crate::{
ast::*,
green::RawSyntaxKind,
syntax::{
chain_trivia_pieces, ChainTriviaPiecesIterator, Language, SendNode, SyntaxElement,
SyntaxElementChildren, SyntaxKind, SyntaxList, SyntaxNode, SyntaxNodeChildren,
SyntaxNodeOptionExt, SyntaxRewriter, SyntaxSlot, SyntaxToken, SyntaxTriviaPiece,
SyntaxTriviaPieceComments, TriviaPiece, TriviaPieceKind, VisitNodeSignal,
},
syntax_factory::*,
syntax_node_text::SyntaxNodeText,
syntax_token_text::SyntaxTokenText,
tree_builder::{Checkpoint, TreeBuilder},
utility_types::{Direction, NodeOrToken, TokenAtOffset, WalkEvent},
};
pub(crate) use crate::green::{GreenNode, GreenNodeData, GreenToken, GreenTokenData};
pub fn check_live() -> Option<String> {
if cursor::has_live() || green::has_live() {
Some(countme::get_all().to_string())
} else {
None
}
}

View File

@ -1,168 +0,0 @@
use crate::{AstNode, Language};
/// Matches a `SyntaxNode` against an `ast` type.
///
/// # Example:
///
/// ```
/// use ruff_rowan::{match_ast, AstNode};
/// use ruff_rowan::raw_language::{LiteralExpression, RawLanguageRoot, RawLanguageKind, RawSyntaxTreeBuilder};
///
/// let mut builder = RawSyntaxTreeBuilder::new();
/// builder.start_node(RawLanguageKind::ROOT);
/// builder.start_node(RawLanguageKind::LITERAL_EXPRESSION);
/// builder.token(RawLanguageKind::NUMBER_TOKEN, "5");
/// builder.finish_node();
/// builder.finish_node();
///
/// let root = builder.finish();
///
/// let text = match_ast! {
/// match &root {
/// RawLanguageRoot(root) => { format!("root: {}", root.text()) },
/// LiteralExpression(literal) => { format!("literal: {}", literal.text()) },
/// _ => {
/// root.text().to_string()
/// }
/// }
/// };
///
/// assert_eq!(text, "root: 5");
/// ```
#[macro_export]
macro_rules! match_ast {
// Necessary because expressions aren't allowed in front of `{`
(match &$node:ident { $($tt:tt)* }) => { match_ast!(match (&$node) { $($tt)* }) };
(match $node:ident { $($tt:tt)* }) => { match_ast!(match ($node) { $($tt)* }) };
(match ($node:expr) {
$( $( $path:ident )::+ ($it:pat) => $res:expr, )*
_ => $catch_all:expr $(,)?
}) => {{
$( if let Some($it) = $($path::)+cast_ref($node) { $res } else )*
{ $catch_all }
}};
}
/// Declares a custom union AstNode type with an ungram-like syntax
///
/// # Example
///
/// ```ignore
/// declare_node_union! {
/// /// Matches an if statement or a conditional expression
/// pub(crate) JsAnyConditional = JsIfStatement | JsConditionalExpression
/// }
/// ```
#[macro_export]
macro_rules! declare_node_union {
(@merge_kind $head:ident ) => {
$head::KIND_SET
};
(@merge_kind $head:ident $( $rest:ident )* ) => {
$head::KIND_SET.union($crate::declare_node_union!( @merge_kind $( $rest )* ))
};
( $( #[$attr:meta] )* $vis:vis $name:ident = $( $variant:ident )|* ) => {
$( #[$attr] )*
#[allow(clippy::enum_variant_names)]
#[derive(Clone, PartialEq, Eq, Hash)]
$vis enum $name {
$( $variant($variant), )*
}
impl $crate::AstNode for $name {
type Language = <( $( $variant, )* ) as $crate::macros::UnionLanguage>::Language;
const KIND_SET: $crate::SyntaxKindSet<Self::Language> = $crate::declare_node_union!( @merge_kind $( $variant )* );
fn can_cast(kind: <Self::Language as $crate::Language>::Kind) -> bool {
$( $variant::can_cast(kind) )||*
}
fn cast(syntax: $crate::SyntaxNode<Self::Language>) -> Option<Self>
where
Self: Sized,
{
$( if $variant::can_cast(syntax.kind()) {
return Some(Self::$variant($variant::unwrap_cast(syntax)));
} )*
None
}
fn syntax(&self) -> &$crate::SyntaxNode<Self::Language> {
match self {
$( Self::$variant(node) => node.syntax() ),*
}
}
fn into_syntax(self) -> $crate::SyntaxNode<Self::Language> {
match self {
$( Self::$variant(node) => node.into_syntax() ),*
}
}
}
impl std::fmt::Debug for $name {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
$( Self::$variant(it) => std::fmt::Debug::fmt(it, f), )*
}
}
}
impl std::fmt::Display for $name {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt($crate::AstNode::syntax(self), f)
}
}
$( impl From<$variant> for $name {
fn from(node: $variant) -> Self {
Self::$variant(node)
}
} )*
impl From<$name> for $crate::SyntaxNode<<$name as $crate::AstNode>::Language> {
fn from(n: $name) -> $crate::SyntaxNode<<$name as $crate::AstNode>::Language> {
match n {
$( $name::$variant(it) => it.into(), )*
}
}
}
impl From<$name> for $crate::SyntaxElement<<$name as $crate::AstNode>::Language> {
fn from(n: $name) -> $crate::SyntaxElement<<$name as $crate::AstNode>::Language> {
$crate::SyntaxNode::<<$name as $crate::AstNode>::Language>::from(n).into()
}
}
};
}
/// This trait is implemented for tuples of AstNode types of size 1 to 12 if
/// all node types share the same associated language (which is then aliased as
/// the `Language` associated type on [UnionLanguage] itself)
pub trait UnionLanguage {
type Language: Language;
}
macro_rules! impl_union_language {
( $head:ident $( , $rest:ident )* ) => {
impl<$head $( , $rest )*> UnionLanguage for ($head, $( $rest ),*)
where
$head: AstNode $( , $rest: AstNode<Language = <$head as AstNode>::Language> )*
{
type Language = <$head as AstNode>::Language;
}
impl_union_language!( $( $rest ),* );
};
() => {};
}
impl_union_language!(
T00, T01, T02, T03, T04, T05, T06, T07, T08, T09, T10, T11, T12, T13, T14, T15, T16, T17, T18,
T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29
);

View File

@ -1,284 +0,0 @@
use crate::raw_language::RawLanguageKind::{COMMA_TOKEN, LITERAL_EXPRESSION, ROOT};
///! Provides a sample language implementation that is useful in API explanation or tests
use crate::{
AstNode, AstSeparatedList, Language, ParsedChildren, RawNodeSlots, RawSyntaxKind,
RawSyntaxNode, SyntaxFactory, SyntaxKind, SyntaxKindSet, SyntaxList, SyntaxNode, TreeBuilder,
};
#[doc(hidden)]
#[derive(Debug, Default, Hash, Copy, Eq, Ord, PartialEq, PartialOrd, Clone)]
pub struct RawLanguage;
impl Language for RawLanguage {
type Kind = RawLanguageKind;
type Root = RawLanguageRoot;
}
#[doc(hidden)]
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
#[repr(u16)]
#[allow(bad_style)]
pub enum RawLanguageKind {
ROOT = 0,
EXPRESSION_LIST = 1,
SEPARATED_EXPRESSION_LIST = 2,
COMMA_TOKEN = 3,
STRING_TOKEN = 4,
NUMBER_TOKEN = 5,
LITERAL_EXPRESSION = 6,
BOGUS = 7,
FOR_KW = 8,
L_PAREN_TOKEN = 9,
SEMICOLON_TOKEN = 10,
R_PAREN_TOKEN = 11,
EQUAL_TOKEN = 12,
LET_TOKEN = 13,
CONDITION = 14,
PLUS_TOKEN = 15,
WHITESPACE = 16,
TOMBSTONE = 17,
EOF = 18,
__LAST,
}
impl SyntaxKind for RawLanguageKind {
const TOMBSTONE: Self = RawLanguageKind::TOMBSTONE;
const EOF: Self = RawLanguageKind::EOF;
fn is_bogus(&self) -> bool {
self == &RawLanguageKind::BOGUS
}
fn to_bogus(&self) -> Self {
RawLanguageKind::BOGUS
}
fn to_raw(&self) -> RawSyntaxKind {
RawSyntaxKind(*self as u16)
}
#[allow(unsafe_code)]
fn from_raw(raw: RawSyntaxKind) -> Self {
assert!(raw.0 < RawLanguageKind::__LAST as u16);
unsafe { std::mem::transmute::<u16, RawLanguageKind>(raw.0) }
}
fn is_root(&self) -> bool {
self == &RawLanguageKind::ROOT
}
fn is_list(&self) -> bool {
matches!(
self,
RawLanguageKind::EXPRESSION_LIST | RawLanguageKind::SEPARATED_EXPRESSION_LIST
)
}
fn to_string(&self) -> Option<&'static str> {
let str = match self {
COMMA_TOKEN => ",",
RawLanguageKind::FOR_KW => "for",
RawLanguageKind::L_PAREN_TOKEN => "(",
RawLanguageKind::SEMICOLON_TOKEN => ";",
RawLanguageKind::R_PAREN_TOKEN => ")",
RawLanguageKind::EQUAL_TOKEN => "=",
RawLanguageKind::LET_TOKEN => "let",
RawLanguageKind::PLUS_TOKEN => "+",
_ => return None,
};
Some(str)
}
}
#[doc(hidden)]
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct RawLanguageRoot {
node: SyntaxNode<RawLanguage>,
}
impl AstNode for RawLanguageRoot {
type Language = RawLanguage;
const KIND_SET: SyntaxKindSet<RawLanguage> =
SyntaxKindSet::from_raw(RawSyntaxKind(ROOT as u16));
fn can_cast(kind: RawLanguageKind) -> bool {
kind == ROOT
}
fn cast(syntax: SyntaxNode<RawLanguage>) -> Option<Self>
where
Self: Sized,
{
if syntax.kind() == ROOT {
Some(RawLanguageRoot { node: syntax })
} else {
None
}
}
fn syntax(&self) -> &SyntaxNode<RawLanguage> {
&self.node
}
fn into_syntax(self) -> SyntaxNode<RawLanguage> {
self.node
}
}
#[doc(hidden)]
#[derive(Clone, Eq, PartialEq, Debug)]
pub struct LiteralExpression {
node: SyntaxNode<RawLanguage>,
}
impl AstNode for LiteralExpression {
type Language = RawLanguage;
const KIND_SET: SyntaxKindSet<RawLanguage> =
SyntaxKindSet::from_raw(RawSyntaxKind(LITERAL_EXPRESSION as u16));
fn can_cast(kind: RawLanguageKind) -> bool {
kind == LITERAL_EXPRESSION
}
fn cast(syntax: SyntaxNode<RawLanguage>) -> Option<Self>
where
Self: Sized,
{
if syntax.kind() == LITERAL_EXPRESSION {
Some(LiteralExpression { node: syntax })
} else {
None
}
}
fn syntax(&self) -> &SyntaxNode<RawLanguage> {
&self.node
}
fn into_syntax(self) -> SyntaxNode<RawLanguage> {
self.node
}
}
#[doc(hidden)]
pub struct SeparatedExpressionList {
syntax_list: SyntaxList<RawLanguage>,
}
impl SeparatedExpressionList {
pub fn new(list: SyntaxList<RawLanguage>) -> Self {
Self { syntax_list: list }
}
}
impl AstSeparatedList for SeparatedExpressionList {
type Language = RawLanguage;
type Node = LiteralExpression;
fn syntax_list(&self) -> &SyntaxList<RawLanguage> {
&self.syntax_list
}
fn into_syntax_list(self) -> SyntaxList<RawLanguage> {
self.syntax_list
}
}
#[doc(hidden)]
#[derive(Debug)]
pub struct RawLanguageSyntaxFactory;
impl SyntaxFactory for RawLanguageSyntaxFactory {
type Kind = RawLanguageKind;
fn make_syntax(
kind: Self::Kind,
children: ParsedChildren<Self::Kind>,
) -> RawSyntaxNode<Self::Kind> {
match kind {
RawLanguageKind::BOGUS | RawLanguageKind::ROOT => {
RawSyntaxNode::new(kind, children.into_iter().map(Some))
}
RawLanguageKind::EXPRESSION_LIST => {
Self::make_node_list_syntax(kind, children, |kind| kind == LITERAL_EXPRESSION)
}
RawLanguageKind::SEPARATED_EXPRESSION_LIST => Self::make_separated_list_syntax(
kind,
children,
|kind| kind == LITERAL_EXPRESSION,
COMMA_TOKEN,
true,
),
RawLanguageKind::LITERAL_EXPRESSION => {
let actual_len = children.len();
if actual_len > 1 {
return RawSyntaxNode::new(kind.to_bogus(), children.into_iter().map(Some));
}
let mut elements = children.into_iter();
let current_element = elements.next();
if let Some(element) = &current_element {
if !matches!(
element.kind(),
RawLanguageKind::STRING_TOKEN | RawLanguageKind::NUMBER_TOKEN
) {
return RawSyntaxNode::new(
kind.to_bogus(),
std::iter::once(current_element),
);
}
} else {
return RawSyntaxNode::new(kind, std::iter::once(None));
}
RawSyntaxNode::new(kind, std::iter::once(current_element))
}
RawLanguageKind::CONDITION => {
let mut elements = (&children).into_iter();
let mut current_element = elements.next();
let mut slots: RawNodeSlots<3> = Default::default();
if let Some(element) = &current_element {
if element.kind() == RawLanguageKind::L_PAREN_TOKEN {
slots.mark_present();
current_element = elements.next();
}
}
slots.next_slot();
if let Some(element) = &current_element {
if element.kind() == RawLanguageKind::LITERAL_EXPRESSION {
slots.mark_present();
current_element = elements.next();
}
}
slots.next_slot();
if let Some(element) = &current_element {
if element.kind() == RawLanguageKind::R_PAREN_TOKEN {
slots.mark_present();
current_element = elements.next();
}
}
slots.next_slot();
if current_element.is_some() {
return RawSyntaxNode::new(kind.to_bogus(), children.into_iter().map(Some));
}
slots.into_node(kind, children)
}
_ => unreachable!("{:?} is not a node kind", kind),
}
}
}
#[doc(hidden)]
pub type RawSyntaxTreeBuilder<'a> = TreeBuilder<'a, RawLanguage, RawLanguageSyntaxFactory>;

View File

@ -1,89 +0,0 @@
use serde::ser::{Serialize, SerializeMap, SerializeSeq, Serializer};
use std::fmt;
use crate::{
syntax::{Language, SyntaxNode, SyntaxToken},
NodeOrToken,
};
struct SerDisplay<T>(T);
impl<T: fmt::Display> Serialize for SerDisplay<T> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.collect_str(&self.0)
}
}
struct DisplayDebug<T>(T);
impl<T: fmt::Debug> fmt::Display for DisplayDebug<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(&self.0, f)
}
}
impl<L: Language> Serialize for SyntaxNode<L> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut state = serializer.serialize_map(Some(3))?;
state.serialize_entry("kind", &SerDisplay(DisplayDebug(self.kind())))?;
state.serialize_entry("text_range", &self.text_range())?;
state.serialize_entry("children", &Children(self))?;
state.end()
}
}
impl<L: Language> Serialize for SyntaxToken<L> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut state = serializer.serialize_map(Some(3))?;
state.serialize_entry("kind", &SerDisplay(DisplayDebug(self.kind())))?;
state.serialize_entry("text_range", &self.text_range())?;
state.serialize_entry("text", &self.text())?;
// To implement this, SyntaxTrivia will need to expose the kind and the length of each trivia
// state.serialize_entry("leading", &self.leading())?;
// state.serialize_entry("trailing", &self.trailing())?;
state.end()
}
}
struct Children<T>(T);
impl<L: Language> Serialize for Children<&'_ SyntaxNode<L>> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut state = serializer.serialize_seq(None)?;
self.0
.children_with_tokens()
.try_for_each(|element| match element {
NodeOrToken::Node(it) => state.serialize_element(&it),
NodeOrToken::Token(it) => state.serialize_element(&it),
})?;
state.end()
}
}
#[cfg(test)]
mod test {
use crate::raw_language::{RawLanguage, RawLanguageKind, RawLanguageSyntaxFactory};
#[test]
pub fn serialization() {
let mut builder: crate::TreeBuilder<RawLanguage, RawLanguageSyntaxFactory> =
crate::TreeBuilder::new();
builder.start_node(RawLanguageKind::ROOT);
builder.token(RawLanguageKind::LET_TOKEN, "\n\tlet ");
builder.finish_node();
let root = builder.finish();
assert!(serde_json::to_string(&root).is_ok());
}
}

View File

@ -1,502 +0,0 @@
mod element;
mod node;
mod rewriter;
mod token;
mod trivia;
use crate::{AstNode, RawSyntaxKind};
pub use element::{SyntaxElement, SyntaxElementKey};
pub(crate) use node::SyntaxSlots;
pub use node::{
Preorder, PreorderWithTokens, SendNode, SyntaxElementChildren, SyntaxNode, SyntaxNodeChildren,
SyntaxNodeOptionExt, SyntaxSlot,
};
pub use rewriter::{SyntaxRewriter, VisitNodeSignal};
use std::fmt;
use std::fmt::Debug;
pub use token::SyntaxToken;
pub use trivia::{
chain_trivia_pieces, ChainTriviaPiecesIterator, SyntaxTrivia, SyntaxTriviaPiece,
SyntaxTriviaPieceComments, SyntaxTriviaPieceNewline, SyntaxTriviaPieceSkipped,
SyntaxTriviaPieceWhitespace, SyntaxTriviaPiecesIterator, TriviaPiece, TriviaPieceKind,
};
/// Type tag for each node or token of a language
pub trait SyntaxKind: fmt::Debug + PartialEq + Copy {
const TOMBSTONE: Self;
const EOF: Self;
/// Returns `true` if this is a kind of a bogus node.
fn is_bogus(&self) -> bool;
/// Converts this into to the best matching bogus node kind.
fn to_bogus(&self) -> Self;
/// Converts this kind to a raw syntax kind.
fn to_raw(&self) -> RawSyntaxKind;
/// Creates a syntax kind from a raw kind.
fn from_raw(raw: RawSyntaxKind) -> Self;
/// Returns `true` if this kind is for a root node.
fn is_root(&self) -> bool;
/// Returns `true` if this kind is a list node.
fn is_list(&self) -> bool;
/// Returns a string for keywords and punctuation tokens or `None` otherwise.
fn to_string(&self) -> Option<&'static str>;
}
pub trait Language: Sized + Clone + Copy + fmt::Debug + Eq + Ord + std::hash::Hash {
type Kind: SyntaxKind;
type Root: AstNode<Language = Self> + Clone + Eq + fmt::Debug;
}
/// A list of `SyntaxNode`s and/or `SyntaxToken`s
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct SyntaxList<L: Language> {
list: SyntaxNode<L>,
}
impl<L: Language> SyntaxList<L> {
/// Creates a new list wrapping a List `SyntaxNode`
fn new(node: SyntaxNode<L>) -> Self {
Self { list: node }
}
/// Iterates over the elements in the list.
pub fn iter(&self) -> SyntaxSlots<L> {
self.list.slots()
}
/// Returns the number of items in this list
pub fn len(&self) -> usize {
self.list.slots().len()
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn first(&self) -> Option<SyntaxSlot<L>> {
self.list.slots().next()
}
pub fn last(&self) -> Option<SyntaxSlot<L>> {
self.list.slots().last()
}
pub fn node(&self) -> &SyntaxNode<L> {
&self.list
}
pub fn into_node(self) -> SyntaxNode<L> {
self.list
}
}
impl<L: Language> IntoIterator for &SyntaxList<L> {
type Item = SyntaxSlot<L>;
type IntoIter = SyntaxSlots<L>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
impl<L: Language> IntoIterator for SyntaxList<L> {
type Item = SyntaxSlot<L>;
type IntoIter = SyntaxSlots<L>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
#[cfg(test)]
mod tests {
use ruff_text_size::TextRange;
use crate::raw_language::{RawLanguageKind, RawSyntaxTreeBuilder};
use crate::syntax::TriviaPiece;
use crate::Direction;
#[test]
fn empty_list() {
let mut builder: RawSyntaxTreeBuilder = RawSyntaxTreeBuilder::new();
builder.start_node(RawLanguageKind::EXPRESSION_LIST);
builder.finish_node();
let list = builder.finish().into_list();
assert!(list.is_empty());
assert_eq!(list.len(), 0);
assert_eq!(list.first(), None);
assert_eq!(list.last(), None);
assert_eq!(list.iter().collect::<Vec<_>>(), Vec::default());
}
#[test]
fn node_list() {
let mut builder = RawSyntaxTreeBuilder::new();
builder.start_node(RawLanguageKind::EXPRESSION_LIST);
builder.start_node(RawLanguageKind::LITERAL_EXPRESSION);
builder.token(RawLanguageKind::NUMBER_TOKEN, "1");
builder.finish_node();
builder.start_node(RawLanguageKind::LITERAL_EXPRESSION);
builder.token(RawLanguageKind::NUMBER_TOKEN, "2");
builder.finish_node();
builder.finish_node();
let node = builder.finish();
let list = node.into_list();
assert!(!list.is_empty());
assert_eq!(list.len(), 2);
let first = list.first().and_then(|e| e.into_node()).unwrap();
assert_eq!(first.kind(), RawLanguageKind::LITERAL_EXPRESSION);
assert_eq!(first.text(), "1");
let last = list.last().and_then(|e| e.into_node()).unwrap();
assert_eq!(last.kind(), RawLanguageKind::LITERAL_EXPRESSION);
assert_eq!(last.text(), "2");
let node_texts: Vec<_> = list
.iter()
.map(|e| e.into_node().map(|n| n.text().to_string()))
.collect();
assert_eq!(
node_texts,
vec![Some(String::from("1")), Some(String::from("2"))]
)
}
#[test]
fn node_or_token_list() {
let mut builder = RawSyntaxTreeBuilder::new();
builder.start_node(RawLanguageKind::SEPARATED_EXPRESSION_LIST);
builder.start_node(RawLanguageKind::LITERAL_EXPRESSION);
builder.token(RawLanguageKind::NUMBER_TOKEN, "1");
builder.finish_node();
builder.token(RawLanguageKind::NUMBER_TOKEN, ",");
builder.start_node(RawLanguageKind::LITERAL_EXPRESSION);
builder.token(RawLanguageKind::NUMBER_TOKEN, "2");
builder.finish_node();
builder.finish_node();
let node = builder.finish();
let list = node.into_list();
assert!(!list.is_empty());
assert_eq!(list.len(), 3);
let first = list.first().and_then(|e| e.into_node()).unwrap();
assert_eq!(first.kind(), RawLanguageKind::LITERAL_EXPRESSION);
assert_eq!(first.text(), "1");
let last = list.last().and_then(|e| e.into_node()).unwrap();
assert_eq!(last.kind(), RawLanguageKind::LITERAL_EXPRESSION);
assert_eq!(last.text(), "2");
let kinds: Vec<_> = list.iter().map(|e| e.kind()).collect();
assert_eq!(
kinds,
vec![
Some(RawLanguageKind::LITERAL_EXPRESSION),
Some(RawLanguageKind::NUMBER_TOKEN),
Some(RawLanguageKind::LITERAL_EXPRESSION)
]
)
}
#[test]
fn siblings() {
let mut builder = RawSyntaxTreeBuilder::new();
// list
builder.start_node(RawLanguageKind::SEPARATED_EXPRESSION_LIST);
// element 1
builder.start_node(RawLanguageKind::LITERAL_EXPRESSION);
builder.token(RawLanguageKind::NUMBER_TOKEN, "a");
builder.finish_node();
// element 2
builder.start_node(RawLanguageKind::LITERAL_EXPRESSION);
builder.token(RawLanguageKind::NUMBER_TOKEN, "b");
builder.finish_node();
// Missing ,
// element 3
builder.start_node(RawLanguageKind::LITERAL_EXPRESSION);
builder.token(RawLanguageKind::NUMBER_TOKEN, "c");
builder.finish_node();
builder.finish_node();
let root = builder.finish();
let first = root.children().next().unwrap();
assert_eq!(first.text().to_string(), "a");
assert_eq!(
first.next_sibling().map(|e| e.text().to_string()),
Some(String::from("b"))
);
let second = root.children().nth(1).unwrap();
assert_eq!(second.text().to_string(), "b");
// Skips the missing element
assert_eq!(
second.next_sibling().map(|e| e.text().to_string()),
Some(String::from("c"))
);
assert_eq!(
second.prev_sibling().map(|e| e.text().to_string()),
Some(String::from("a"))
);
let last = root.children().last().unwrap();
assert_eq!(last.text(), "c");
assert_eq!(last.next_sibling(), None);
assert_eq!(
last.prev_sibling().map(|e| e.text().to_string()),
Some(String::from("b"))
);
assert_eq!(
first
.siblings(Direction::Next)
.map(|s| s.text().to_string())
.collect::<Vec<_>>(),
vec!["a", "b", "c"]
);
assert_eq!(
last.siblings(Direction::Prev)
.map(|s| s.text().to_string())
.collect::<Vec<_>>(),
vec!["c", "b", "a"]
);
}
#[test]
fn siblings_with_tokens() {
let mut builder = RawSyntaxTreeBuilder::new();
builder.start_node(RawLanguageKind::ROOT);
builder.token(RawLanguageKind::FOR_KW, "for");
builder.token(RawLanguageKind::L_PAREN_TOKEN, "(");
builder.token(RawLanguageKind::SEMICOLON_TOKEN, ";");
builder.start_node(RawLanguageKind::LITERAL_EXPRESSION);
builder.token(RawLanguageKind::STRING_TOKEN, "x");
builder.finish_node();
builder.token(RawLanguageKind::SEMICOLON_TOKEN, ";");
builder.token(RawLanguageKind::R_PAREN_TOKEN, ")");
builder.finish_node();
let root = builder.finish();
let first_semicolon = root
.children_with_tokens()
.nth(2)
.and_then(|e| e.into_token())
.unwrap();
assert_eq!(first_semicolon.text(), ";");
assert_eq!(
first_semicolon
.siblings_with_tokens(Direction::Next)
.map(|e| e.to_string())
.collect::<Vec<_>>(),
vec!["x", ";", ")"]
);
assert_eq!(
first_semicolon.next_sibling_or_token(),
first_semicolon.siblings_with_tokens(Direction::Next).next()
);
assert_eq!(
first_semicolon.prev_sibling_or_token(),
first_semicolon.siblings_with_tokens(Direction::Prev).next()
);
}
#[test]
pub fn syntax_text_and_len() {
let mut builder = RawSyntaxTreeBuilder::new();
builder.start_node(RawLanguageKind::ROOT);
builder.token_with_trivia(
RawLanguageKind::LET_TOKEN,
"\n\t let \t\t",
&[TriviaPiece::whitespace(3)],
&[TriviaPiece::whitespace(3)],
);
builder.finish_node();
// // Node texts
let node = builder.finish();
assert_eq!("\n\t let \t\t", node.text());
assert_eq!("let", node.text_trimmed());
assert_eq!("\n\t ", node.first_leading_trivia().unwrap().text());
assert_eq!(" \t\t", node.last_trailing_trivia().unwrap().text());
// Token texts
let token = node.first_token().unwrap();
assert_eq!("\n\t let \t\t", token.text());
assert_eq!("let", token.text_trimmed());
assert_eq!("\n\t ", token.leading_trivia().text());
assert_eq!(" \t\t", token.trailing_trivia().text());
}
#[test]
pub fn syntax_range() {
let mut builder = RawSyntaxTreeBuilder::new();
builder.start_node(RawLanguageKind::ROOT);
builder.token_with_trivia(
RawLanguageKind::LET_TOKEN,
"\n\t let \t\t",
&[TriviaPiece::whitespace(3)],
&[TriviaPiece::whitespace(3)],
);
builder.token_with_trivia(
RawLanguageKind::LET_TOKEN,
"a ",
&[TriviaPiece::whitespace(0)],
&[TriviaPiece::whitespace(1)],
);
builder.token_with_trivia(
RawLanguageKind::EQUAL_TOKEN,
"\n=\n",
&[TriviaPiece::whitespace(1)],
&[TriviaPiece::whitespace(1)],
);
builder.token(RawLanguageKind::NUMBER_TOKEN, "1");
builder.token_with_trivia(
RawLanguageKind::SEMICOLON_TOKEN,
";\t\t",
&[],
&[TriviaPiece::whitespace(2)],
);
builder.finish_node();
let node = builder.finish();
// Node Ranges
assert_eq!(TextRange::new(0.into(), 18.into()), node.text_range());
assert_eq!(
TextRange::new(3.into(), 16.into()),
node.text_trimmed_range()
);
assert_eq!(
TextRange::new(0.into(), 3.into()),
node.first_leading_trivia().unwrap().text_range()
);
assert_eq!(
TextRange::new(16.into(), 18.into()),
node.last_trailing_trivia().unwrap().text_range()
);
// as NodeOrToken
let eq_token = node
.descendants_with_tokens(Direction::Next)
.find(|x| x.kind() == RawLanguageKind::EQUAL_TOKEN)
.unwrap();
assert_eq!(TextRange::new(11.into(), 14.into()), eq_token.text_range());
assert_eq!(
TextRange::new(12.into(), 13.into()),
eq_token.text_trimmed_range()
);
assert_eq!(
TextRange::new(11.into(), 12.into()),
eq_token.leading_trivia().unwrap().text_range()
);
assert_eq!(
TextRange::new(13.into(), 14.into()),
eq_token.trailing_trivia().unwrap().text_range()
);
// as Token
let eq_token = eq_token.as_token().unwrap();
assert_eq!(TextRange::new(11.into(), 14.into()), eq_token.text_range());
assert_eq!(
TextRange::new(12.into(), 13.into()),
eq_token.text_trimmed_range()
);
assert_eq!(
TextRange::new(11.into(), 12.into()),
eq_token.leading_trivia().text_range()
);
assert_eq!(
TextRange::new(13.into(), 14.into()),
eq_token.trailing_trivia().text_range()
);
}
#[test]
pub fn syntax_trivia_pieces() {
use crate::*;
let node = RawSyntaxTreeBuilder::wrap_with_node(RawLanguageKind::ROOT, |builder| {
builder.token_with_trivia(
RawLanguageKind::LET_TOKEN,
"\n\t /**/let \t\t",
&[
TriviaPiece::whitespace(3),
TriviaPiece::single_line_comment(4),
],
&[TriviaPiece::whitespace(3)],
);
});
let pieces: Vec<_> = node.first_leading_trivia().unwrap().pieces().collect();
assert_eq!(2, pieces.len());
assert_eq!("\n\t ", pieces[0].text());
assert_eq!(TextSize::from(3), pieces[0].text_len());
assert_eq!(TextRange::new(0.into(), 3.into()), pieces[0].text_range());
assert!(pieces[0].is_whitespace());
assert_eq!("/**/", pieces[1].text());
assert_eq!(TextSize::from(4), pieces[1].text_len());
assert_eq!(TextRange::new(3.into(), 7.into()), pieces[1].text_range());
assert!(pieces[1].is_comments());
let pieces_rev: Vec<_> = node
.first_leading_trivia()
.unwrap()
.pieces()
.rev()
.collect();
assert_eq!(2, pieces_rev.len());
assert_eq!("/**/", pieces_rev[0].text());
assert_eq!("\n\t ", pieces_rev[1].text());
}
}

View File

@ -1,137 +0,0 @@
use crate::syntax::SyntaxTrivia;
use crate::{cursor, Language, NodeOrToken, SyntaxNode, SyntaxToken};
use ruff_text_size::{TextRange, TextSize};
use std::iter;
use std::ptr::NonNull;
pub type SyntaxElement<L> = NodeOrToken<SyntaxNode<L>, SyntaxToken<L>>;
impl<L: Language> SyntaxElement<L> {
pub fn key(&self) -> SyntaxElementKey {
match self {
NodeOrToken::Node(it) => it.key(),
NodeOrToken::Token(it) => it.key(),
}
}
pub fn text_range(&self) -> TextRange {
match self {
NodeOrToken::Node(it) => it.text_range(),
NodeOrToken::Token(it) => it.text_range(),
}
}
pub fn text_trimmed_range(&self) -> TextRange {
match self {
NodeOrToken::Node(it) => it.text_trimmed_range(),
NodeOrToken::Token(it) => it.text_trimmed_range(),
}
}
pub fn leading_trivia(&self) -> Option<SyntaxTrivia<L>> {
match self {
NodeOrToken::Node(it) => it.first_leading_trivia(),
NodeOrToken::Token(it) => Some(it.leading_trivia()),
}
}
pub fn trailing_trivia(&self) -> Option<SyntaxTrivia<L>> {
match self {
NodeOrToken::Node(it) => it.last_trailing_trivia(),
NodeOrToken::Token(it) => Some(it.trailing_trivia()),
}
}
pub fn kind(&self) -> L::Kind {
match self {
NodeOrToken::Node(it) => it.kind(),
NodeOrToken::Token(it) => it.kind(),
}
}
pub fn parent(&self) -> Option<SyntaxNode<L>> {
match self {
NodeOrToken::Node(it) => it.parent(),
NodeOrToken::Token(it) => it.parent(),
}
}
pub(crate) fn index(&self) -> usize {
match self {
NodeOrToken::Node(it) => it.index(),
NodeOrToken::Token(it) => it.index(),
}
}
pub fn ancestors(&self) -> impl Iterator<Item = SyntaxNode<L>> {
let first = match self {
NodeOrToken::Node(it) => Some(it.clone()),
NodeOrToken::Token(it) => it.parent(),
};
iter::successors(first, SyntaxNode::parent)
}
pub fn next_sibling_or_token(&self) -> Option<SyntaxElement<L>> {
match self {
NodeOrToken::Node(it) => it.next_sibling_or_token(),
NodeOrToken::Token(it) => it.next_sibling_or_token(),
}
}
pub fn prev_sibling_or_token(&self) -> Option<SyntaxElement<L>> {
match self {
NodeOrToken::Node(it) => it.prev_sibling_or_token(),
NodeOrToken::Token(it) => it.prev_sibling_or_token(),
}
}
#[must_use = "syntax elements are immutable, the result of update methods must be propagated to have any effect"]
pub fn detach(self) -> Self {
match self {
NodeOrToken::Node(it) => Self::Node(it.detach()),
NodeOrToken::Token(it) => Self::Token(it.detach()),
}
}
}
impl<L: Language> From<cursor::SyntaxElement> for SyntaxElement<L> {
fn from(raw: cursor::SyntaxElement) -> SyntaxElement<L> {
match raw {
NodeOrToken::Node(it) => NodeOrToken::Node(it.into()),
NodeOrToken::Token(it) => NodeOrToken::Token(it.into()),
}
}
}
impl<L: Language> From<SyntaxElement<L>> for cursor::SyntaxElement {
fn from(element: SyntaxElement<L>) -> cursor::SyntaxElement {
match element {
NodeOrToken::Node(it) => NodeOrToken::Node(it.into()),
NodeOrToken::Token(it) => NodeOrToken::Token(it.into()),
}
}
}
impl<L: Language> From<SyntaxToken<L>> for SyntaxElement<L> {
fn from(token: SyntaxToken<L>) -> SyntaxElement<L> {
NodeOrToken::Token(token)
}
}
impl<L: Language> From<SyntaxNode<L>> for SyntaxElement<L> {
fn from(node: SyntaxNode<L>) -> SyntaxElement<L> {
NodeOrToken::Node(node)
}
}
#[derive(Copy, Clone, Eq, PartialEq, Hash)]
pub struct SyntaxElementKey {
node_data: NonNull<()>,
offset: TextSize,
}
impl SyntaxElementKey {
pub(crate) fn new(node_data: NonNull<()>, offset: TextSize) -> Self {
Self { node_data, offset }
}
}

View File

@ -1,817 +0,0 @@
use crate::green::GreenElement;
use crate::syntax::element::{SyntaxElement, SyntaxElementKey};
use crate::syntax::SyntaxTrivia;
use crate::{
cursor, Direction, GreenNode, Language, NodeOrToken, SyntaxKind, SyntaxList, SyntaxNodeText,
SyntaxToken, TokenAtOffset, WalkEvent,
};
use ruff_text_size::{TextRange, TextSize};
#[cfg(feature = "serde")]
use serde::Serialize;
use std::any::TypeId;
use std::fmt::{Debug, Formatter};
use std::iter::FusedIterator;
use std::marker::PhantomData;
use std::{fmt, ops};
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct SyntaxNode<L: Language> {
raw: cursor::SyntaxNode,
_p: PhantomData<L>,
}
impl<L: Language> SyntaxNode<L> {
pub(crate) fn new_root(green: GreenNode) -> SyntaxNode<L> {
SyntaxNode::from(cursor::SyntaxNode::new_root(green))
}
/// Create a new detached (root) node from a syntax kind and an iterator of slots
///
/// In general this function should not be used directly but through the
/// type-checked factory function / builders generated from the grammar of
/// the corresponding language (eg. `ruff_js_factory::make`)
pub fn new_detached<I>(kind: L::Kind, slots: I) -> SyntaxNode<L>
where
I: IntoIterator<Item = Option<SyntaxElement<L>>>,
I::IntoIter: ExactSizeIterator,
{
SyntaxNode::from(cursor::SyntaxNode::new_root(GreenNode::new(
kind.to_raw(),
slots.into_iter().map(|slot| {
slot.map(|element| match element {
NodeOrToken::Node(node) => GreenElement::Node(node.green_node()),
NodeOrToken::Token(token) => GreenElement::Token(token.green_token()),
})
}),
)))
}
fn green_node(&self) -> GreenNode {
self.raw.green().to_owned()
}
pub fn key(&self) -> SyntaxElementKey {
let (node_data, offset) = self.raw.key();
SyntaxElementKey::new(node_data, offset)
}
/// Returns the element stored in the slot with the given index. Returns [None] if the slot is empty.
///
/// ## Panics
/// If the slot index is out of bounds
#[inline]
pub fn element_in_slot(&self, slot: u32) -> Option<SyntaxElement<L>> {
self.raw.element_in_slot(slot).map(SyntaxElement::from)
}
pub fn kind(&self) -> L::Kind {
L::Kind::from_raw(self.raw.kind())
}
/// Returns the text of all descendants tokens combined, including all trivia.
///
/// ```
/// use ruff_rowan::raw_language::{RawLanguage, RawLanguageKind, RawSyntaxTreeBuilder};
/// use ruff_rowan::*;
/// let mut node = RawSyntaxTreeBuilder::wrap_with_node(RawLanguageKind::ROOT, |builder| {
/// builder.token_with_trivia(
/// RawLanguageKind::LET_TOKEN,
/// "\n\t let \t\t",
/// &[TriviaPiece::whitespace(3)],
/// &[TriviaPiece::whitespace(3)],
/// );
/// builder.token(RawLanguageKind::STRING_TOKEN, "a");
/// builder.token_with_trivia(
/// RawLanguageKind::SEMICOLON_TOKEN,
/// "; \t\t",
/// &[TriviaPiece::whitespace(3)],
/// &[TriviaPiece::whitespace(3)],
/// );
/// });
/// assert_eq!("\n\t let \t\ta; \t\t", node.text());
/// ```
pub fn text(&self) -> SyntaxNodeText {
self.raw.text()
}
/// Returns the text of all descendants tokens combined,
/// excluding the first token leading trivia, and the last token trailing trivia.
/// All other trivia is included.
///
/// ```
/// use ruff_rowan::raw_language::{RawLanguage, RawLanguageKind, RawSyntaxTreeBuilder};
/// use ruff_rowan::*;
/// let mut node = RawSyntaxTreeBuilder::wrap_with_node(RawLanguageKind::ROOT, |builder| {
/// builder.token_with_trivia(
/// RawLanguageKind::LET_TOKEN,
/// "\n\t let \t\t",
/// &[TriviaPiece::whitespace(3)],
/// &[TriviaPiece::whitespace(3)],
/// );
/// builder.token(RawLanguageKind::STRING_TOKEN, "a");
/// builder.token_with_trivia(
/// RawLanguageKind::SEMICOLON_TOKEN,
/// "; \t\t",
/// &[],
/// &[TriviaPiece::whitespace(3)],
/// );
/// });
/// assert_eq!("let \t\ta;", node.text_trimmed());
/// ```
pub fn text_trimmed(&self) -> SyntaxNodeText {
self.raw.text_trimmed()
}
/// Returns the range corresponding for the text of all descendants tokens combined, including all trivia.
///
/// ```
/// use ruff_rowan::raw_language::{RawLanguage, RawLanguageKind, RawSyntaxTreeBuilder};
/// use ruff_rowan::*;
/// let mut node = RawSyntaxTreeBuilder::wrap_with_node(RawLanguageKind::ROOT, |builder| {
/// builder.token_with_trivia(
/// RawLanguageKind::LET_TOKEN,
/// "\n\t let \t\t",
/// &[TriviaPiece::whitespace(3)],
/// &[TriviaPiece::whitespace(3)],
/// );
/// builder.token(RawLanguageKind::STRING_TOKEN, "a");
/// builder.token_with_trivia(
/// RawLanguageKind::SEMICOLON_TOKEN,
/// "; \t\t",
/// &[],
/// &[TriviaPiece::whitespace(3)],
/// );
/// });
/// let range = node.text_range();
/// assert_eq!(0u32, u32::from(range.start()));
/// assert_eq!(14u32, u32::from(range.end()));
/// ```
pub fn text_range(&self) -> TextRange {
self.raw.text_range()
}
/// Returns the range corresponding for the text of all descendants tokens combined,
/// excluding the first token leading trivia, and the last token trailing trivia.
/// All other trivia is included.
///
/// ```
/// use ruff_rowan::raw_language::{RawLanguage, RawLanguageKind, RawSyntaxTreeBuilder};
/// use ruff_rowan::*;
/// let mut node = RawSyntaxTreeBuilder::wrap_with_node(RawLanguageKind::ROOT, |builder| {
/// builder.token_with_trivia(
/// RawLanguageKind::LET_TOKEN,
/// "\n\t let \t\t",
/// &[TriviaPiece::whitespace(3)],
/// &[TriviaPiece::whitespace(3)],
/// );
/// builder.token(RawLanguageKind::STRING_TOKEN, "a");
/// builder.token_with_trivia(
/// RawLanguageKind::SEMICOLON_TOKEN,
/// "; \t\t",
/// &[],
/// &[TriviaPiece::whitespace(3)],
/// );
/// });
/// let range = node.text_trimmed_range();
/// assert_eq!(3u32, u32::from(range.start()));
/// assert_eq!(11u32, u32::from(range.end()));
/// ```
pub fn text_trimmed_range(&self) -> TextRange {
self.raw.text_trimmed_range()
}
/// Returns the leading trivia of the [first_token](SyntaxNode::first_token), or [None] if the node does not have any descendant tokens.
///
/// ```
/// use ruff_rowan::raw_language::{RawLanguage, RawLanguageKind, RawSyntaxTreeBuilder};
/// use ruff_rowan::*;
/// let mut node = RawSyntaxTreeBuilder::wrap_with_node(RawLanguageKind::ROOT, |builder| {
/// builder.token_with_trivia(
/// RawLanguageKind::LET_TOKEN,
/// "\n\t let \t\t",
/// &[TriviaPiece::whitespace(3)],
/// &[TriviaPiece::whitespace(3)],
/// );
/// builder.token(RawLanguageKind::STRING_TOKEN, "a");
/// builder.token_with_trivia(
/// RawLanguageKind::SEMICOLON_TOKEN,
/// "; \t\t",
/// &[],
/// &[TriviaPiece::whitespace(3)],
/// );
/// });
/// let trivia = node.first_leading_trivia();
/// assert!(trivia.is_some());
/// assert_eq!("\n\t ", trivia.unwrap().text());
///
/// let mut node = RawSyntaxTreeBuilder::wrap_with_node(RawLanguageKind::ROOT, |builder| {});
/// let trivia = node.first_leading_trivia();
/// assert!(trivia.is_none());
/// ```
pub fn first_leading_trivia(&self) -> Option<SyntaxTrivia<L>> {
self.raw.first_leading_trivia().map(SyntaxTrivia::new)
}
/// Returns the trailing trivia of the [last_token](SyntaxNode::last_token), or [None] if the node does not have any descendant tokens.
///
/// ```
/// use ruff_rowan::raw_language::{RawLanguage, RawLanguageKind, RawSyntaxTreeBuilder};
/// use ruff_rowan::*;
/// let mut node = RawSyntaxTreeBuilder::wrap_with_node(RawLanguageKind::ROOT, |builder| {
/// builder.token_with_trivia(
/// RawLanguageKind::LET_TOKEN,
/// "\n\t let \t\t",
/// &[TriviaPiece::whitespace(3)],
/// &[TriviaPiece::whitespace(3)],
/// );
/// builder.token(RawLanguageKind::STRING_TOKEN, "a");
/// builder.token_with_trivia(
/// RawLanguageKind::SEMICOLON_TOKEN,
/// "; \t\t",
/// &[],
/// &[TriviaPiece::whitespace(3)],
/// );
/// });
/// let trivia = node.last_trailing_trivia();
/// assert!(trivia.is_some());
/// assert_eq!(" \t\t", trivia.unwrap().text());
///
/// let mut node = RawSyntaxTreeBuilder::wrap_with_node(RawLanguageKind::ROOT, |builder| {});
/// let trivia = node.last_trailing_trivia();
/// assert!(trivia.is_none());
/// ```
pub fn last_trailing_trivia(&self) -> Option<SyntaxTrivia<L>> {
self.raw.last_trailing_trivia().map(SyntaxTrivia::new)
}
pub fn parent(&self) -> Option<SyntaxNode<L>> {
self.raw.parent().map(Self::from)
}
/// Returns the grand parent.
pub fn grand_parent(&self) -> Option<SyntaxNode<L>> {
self.parent().and_then(|parent| parent.parent())
}
/// Returns the index of this node inside of its parent
#[inline]
pub fn index(&self) -> usize {
self.raw.index()
}
pub fn ancestors(&self) -> impl Iterator<Item = SyntaxNode<L>> {
self.raw.ancestors().map(SyntaxNode::from)
}
pub fn children(&self) -> SyntaxNodeChildren<L> {
SyntaxNodeChildren {
raw: self.raw.children(),
_p: PhantomData,
}
}
/// Returns an iterator over all the slots of this syntax node.
pub fn slots(&self) -> SyntaxSlots<L> {
SyntaxSlots {
raw: self.raw.slots(),
_p: PhantomData,
}
}
pub fn children_with_tokens(&self) -> SyntaxElementChildren<L> {
SyntaxElementChildren {
raw: self.raw.children_with_tokens(),
_p: PhantomData,
}
}
pub fn tokens(&self) -> impl Iterator<Item = SyntaxToken<L>> + DoubleEndedIterator + '_ {
self.raw.tokens().map(SyntaxToken::from)
}
pub fn first_child(&self) -> Option<SyntaxNode<L>> {
self.raw.first_child().map(Self::from)
}
pub fn last_child(&self) -> Option<SyntaxNode<L>> {
self.raw.last_child().map(Self::from)
}
pub fn first_child_or_token(&self) -> Option<SyntaxElement<L>> {
self.raw.first_child_or_token().map(NodeOrToken::from)
}
pub fn last_child_or_token(&self) -> Option<SyntaxElement<L>> {
self.raw.last_child_or_token().map(NodeOrToken::from)
}
pub fn next_sibling(&self) -> Option<SyntaxNode<L>> {
self.raw.next_sibling().map(Self::from)
}
pub fn prev_sibling(&self) -> Option<SyntaxNode<L>> {
self.raw.prev_sibling().map(Self::from)
}
pub fn next_sibling_or_token(&self) -> Option<SyntaxElement<L>> {
self.raw.next_sibling_or_token().map(NodeOrToken::from)
}
pub fn prev_sibling_or_token(&self) -> Option<SyntaxElement<L>> {
self.raw.prev_sibling_or_token().map(NodeOrToken::from)
}
/// Return the leftmost token in the subtree of this node.
pub fn first_token(&self) -> Option<SyntaxToken<L>> {
self.raw.first_token().map(SyntaxToken::from)
}
/// Return the rightmost token in the subtree of this node.
pub fn last_token(&self) -> Option<SyntaxToken<L>> {
self.raw.last_token().map(SyntaxToken::from)
}
pub fn siblings(&self, direction: Direction) -> impl Iterator<Item = SyntaxNode<L>> {
self.raw.siblings(direction).map(SyntaxNode::from)
}
pub fn siblings_with_tokens(
&self,
direction: Direction,
) -> impl Iterator<Item = SyntaxElement<L>> {
self.raw
.siblings_with_tokens(direction)
.map(SyntaxElement::from)
}
pub fn descendants(&self) -> impl Iterator<Item = SyntaxNode<L>> {
self.raw.descendants().map(SyntaxNode::from)
}
pub fn descendants_tokens(&self, direction: Direction) -> impl Iterator<Item = SyntaxToken<L>> {
self.descendants_with_tokens(direction)
.filter_map(|x| x.as_token().cloned())
}
pub fn descendants_with_tokens(
&self,
direction: Direction,
) -> impl Iterator<Item = SyntaxElement<L>> {
self.raw
.descendants_with_tokens(direction)
.map(NodeOrToken::from)
}
/// Traverse the subtree rooted at the current node (including the current
/// node) in preorder, excluding tokens.
pub fn preorder(&self) -> Preorder<L> {
Preorder {
raw: self.raw.preorder(),
_p: PhantomData,
}
}
/// Traverse the subtree rooted at the current node (including the current
/// node) in preorder, including tokens.
pub fn preorder_with_tokens(&self, direction: Direction) -> PreorderWithTokens<L> {
PreorderWithTokens {
raw: self.raw.preorder_with_tokens(direction),
_p: PhantomData,
}
}
/// Find a token in the subtree corresponding to this node, which covers the offset.
/// Precondition: offset must be within node's range.
pub fn token_at_offset(&self, offset: TextSize) -> TokenAtOffset<SyntaxToken<L>> {
self.raw.token_at_offset(offset).map(SyntaxToken::from)
}
/// Return the deepest node or token in the current subtree that fully
/// contains the range. If the range is empty and is contained in two leaf
/// nodes, either one can be returned. Precondition: range must be contained
/// within the current node
pub fn covering_element(&self, range: TextRange) -> SyntaxElement<L> {
NodeOrToken::from(self.raw.covering_element(range))
}
/// Finds a [`SyntaxElement`] which intersects with a given `range`. If
/// there are several intersecting elements, any one can be returned.
///
/// The method uses binary search internally, so it's complexity is
/// `O(log(N))` where `N = self.children_with_tokens().count()`.
pub fn child_or_token_at_range(&self, range: TextRange) -> Option<SyntaxElement<L>> {
self.raw
.child_or_token_at_range(range)
.map(SyntaxElement::from)
}
/// Returns an independent copy of the subtree rooted at this node.
///
/// The parent of the returned node will be `None`, the start offset will be
/// zero, but, otherwise, it'll be equivalent to the source node.
pub fn clone_subtree(&self) -> SyntaxNode<L> {
SyntaxNode::from(self.raw.clone_subtree())
}
/// Return a new version of this node detached from its parent node
#[must_use = "syntax elements are immutable, the result of update methods must be propagated to have any effect"]
pub fn detach(self) -> Self {
Self {
raw: self.raw.detach(),
_p: PhantomData,
}
}
/// Return a clone of this node with the specified range of slots replaced
/// with the elements of the provided iterator
#[must_use = "syntax elements are immutable, the result of update methods must be propagated to have any effect"]
pub fn splice_slots<R, I>(self, range: R, replace_with: I) -> Self
where
R: ops::RangeBounds<usize>,
I: IntoIterator<Item = Option<SyntaxElement<L>>>,
{
Self {
raw: self.raw.splice_slots(
range,
replace_with
.into_iter()
.map(|element| element.map(cursor::SyntaxElement::from)),
),
_p: PhantomData,
}
}
/// Return a new version of this node with the element `prev_elem` replaced with `next_elem`
///
/// `prev_elem` can be a direct child of this node, or an indirect child through any descendant node
///
/// Returns `None` if `prev_elem` is not a descendant of this node
#[must_use = "syntax elements are immutable, the result of update methods must be propagated to have any effect"]
pub fn replace_child(
self,
prev_elem: SyntaxElement<L>,
next_elem: SyntaxElement<L>,
) -> Option<Self> {
Some(Self {
raw: self.raw.replace_child(prev_elem.into(), next_elem.into())?,
_p: PhantomData,
})
}
pub fn into_list(self) -> SyntaxList<L> {
SyntaxList::new(self)
}
/// Whether the node contains any comments. This function checks
/// **all the descendants** of the current node.
pub fn has_comments_descendants(&self) -> bool {
self.descendants_tokens(Direction::Next)
.any(|tok| tok.has_trailing_comments() || tok.has_leading_comments())
}
/// It checks if the current node has trailing or leading trivia
pub fn has_comments_direct(&self) -> bool {
self.has_trailing_comments() || self.has_leading_comments()
}
/// It checks if the current node has comments at the edges:
/// if first or last tokens contain comments (leading or trailing)
pub fn first_or_last_token_have_comments(&self) -> bool {
self.first_token_has_comments() || self.last_token_has_comments()
}
/// Whether the node contains trailing comments.
pub fn has_trailing_comments(&self) -> bool {
self.last_token()
.map_or(false, |tok| tok.has_trailing_comments())
}
/// Whether the last token of a node has comments (leading or trailing)
pub fn last_token_has_comments(&self) -> bool {
self.last_token().map_or(false, |tok| {
tok.has_trailing_comments() || tok.has_leading_comments()
})
}
/// Whether the first token of a node has comments (leading or trailing)
pub fn first_token_has_comments(&self) -> bool {
self.first_token().map_or(false, |tok| {
tok.has_trailing_comments() || tok.has_leading_comments()
})
}
/// Whether the node contains leading comments.
pub fn has_leading_comments(&self) -> bool {
self.first_token()
.map_or(false, |tok| tok.has_leading_comments())
}
/// Whether the node contains leading newlines.
pub fn has_leading_newline(&self) -> bool {
self.first_token()
.map_or(false, |tok| tok.has_leading_newline())
}
}
impl<L> SyntaxNode<L>
where
L: Language + 'static,
{
/// Create a [Send] + [Sync] handle to this node
///
/// Returns `None` if self is not a root node
pub fn as_send(&self) -> Option<SendNode> {
if self.parent().is_none() {
Some(SendNode {
language: TypeId::of::<L>(),
green: self.green_node(),
})
} else {
None
}
}
}
impl<L: Language> fmt::Debug for SyntaxNode<L> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if f.alternate() {
let mut level = 0;
for event in self.raw.preorder_slots() {
match event {
WalkEvent::Enter(element) => {
for _ in 0..level {
write!(f, " ")?;
}
match element {
cursor::SyntaxSlot::Node(node) => {
writeln!(f, "{}: {:?}", node.index(), SyntaxNode::<L>::from(node))?
}
cursor::SyntaxSlot::Token(token) => writeln!(
f,
"{}: {:?}",
token.index(),
SyntaxToken::<L>::from(token)
)?,
cursor::SyntaxSlot::Empty { index, .. } => {
writeln!(f, "{}: (empty)", index)?
}
}
level += 1;
}
WalkEvent::Leave(_) => level -= 1,
}
}
assert_eq!(level, 0);
Ok(())
} else {
write!(f, "{:?}@{:?}", self.kind(), self.text_range())
}
}
}
impl<L: Language> fmt::Display for SyntaxNode<L> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(&self.raw, f)
}
}
impl<L: Language> From<SyntaxNode<L>> for cursor::SyntaxNode {
fn from(node: SyntaxNode<L>) -> cursor::SyntaxNode {
node.raw
}
}
impl<L: Language> From<cursor::SyntaxNode> for SyntaxNode<L> {
fn from(raw: cursor::SyntaxNode) -> SyntaxNode<L> {
SyntaxNode {
raw,
_p: PhantomData,
}
}
}
/// Language-agnostic representation of the root node of a syntax tree, can be
/// sent or shared between threads
#[derive(Clone)]
pub struct SendNode {
language: TypeId,
green: GreenNode,
}
impl SendNode {
/// Downcast this handle back into a [SyntaxNode]
///
/// Returns `None` if the specified language `L` is not the one this node
/// was created with
pub fn into_node<L>(self) -> Option<SyntaxNode<L>>
where
L: Language + 'static,
{
if TypeId::of::<L>() == self.language {
Some(SyntaxNode::new_root(self.green))
} else {
None
}
}
}
#[derive(Debug, Clone)]
pub struct SyntaxNodeChildren<L: Language> {
raw: cursor::SyntaxNodeChildren,
_p: PhantomData<L>,
}
impl<L: Language> Iterator for SyntaxNodeChildren<L> {
type Item = SyntaxNode<L>;
fn next(&mut self) -> Option<Self::Item> {
self.raw.next().map(SyntaxNode::from)
}
}
#[derive(Clone)]
pub struct SyntaxElementChildren<L: Language> {
raw: cursor::SyntaxElementChildren,
_p: PhantomData<L>,
}
impl<L: Language> Debug for SyntaxElementChildren<L> {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
f.debug_list().entries(self.clone()).finish()
}
}
impl<L: Language> Default for SyntaxElementChildren<L> {
fn default() -> Self {
SyntaxElementChildren {
raw: cursor::SyntaxElementChildren::default(),
_p: PhantomData,
}
}
}
impl<L: Language> Iterator for SyntaxElementChildren<L> {
type Item = SyntaxElement<L>;
fn next(&mut self) -> Option<Self::Item> {
self.raw.next().map(NodeOrToken::from)
}
}
pub struct Preorder<L: Language> {
raw: cursor::Preorder,
_p: PhantomData<L>,
}
impl<L: Language> Preorder<L> {
pub fn skip_subtree(&mut self) {
self.raw.skip_subtree()
}
}
impl<L: Language> Iterator for Preorder<L> {
type Item = WalkEvent<SyntaxNode<L>>;
fn next(&mut self) -> Option<Self::Item> {
self.raw.next().map(|it| it.map(SyntaxNode::from))
}
}
pub struct PreorderWithTokens<L: Language> {
raw: cursor::PreorderWithTokens,
_p: PhantomData<L>,
}
impl<L: Language> PreorderWithTokens<L> {
pub fn skip_subtree(&mut self) {
self.raw.skip_subtree()
}
}
impl<L: Language> Iterator for PreorderWithTokens<L> {
type Item = WalkEvent<SyntaxElement<L>>;
fn next(&mut self) -> Option<Self::Item> {
self.raw.next().map(|it| it.map(SyntaxElement::from))
}
}
/// Each node has a slot for each of its children regardless if the child is present or not.
/// A child that isn't present either because it's optional or because of a syntax error
/// is stored in an [SyntaxSlot::Empty] to preserve the index of each child.
#[derive(Debug, Clone, Eq, PartialEq)]
#[cfg_attr(feature = "serde", derive(Serialize))]
pub enum SyntaxSlot<L: Language> {
/// Slot that stores a node child
Node(SyntaxNode<L>),
/// Slot that stores a token child
Token(SyntaxToken<L>),
/// Slot that marks that the child in this position isn't present in the source code.
Empty,
}
impl<L: Language> SyntaxSlot<L> {
pub fn into_node(self) -> Option<SyntaxNode<L>> {
match self {
SyntaxSlot::Node(node) => Some(node),
_ => None,
}
}
pub fn into_token(self) -> Option<SyntaxToken<L>> {
match self {
SyntaxSlot::Token(token) => Some(token),
_ => None,
}
}
pub fn into_syntax_element(self) -> Option<SyntaxElement<L>> {
match self {
SyntaxSlot::Node(node) => Some(SyntaxElement::Node(node)),
SyntaxSlot::Token(token) => Some(SyntaxElement::Token(token)),
_ => None,
}
}
pub fn kind(&self) -> Option<L::Kind> {
match self {
SyntaxSlot::Node(node) => Some(node.kind()),
SyntaxSlot::Token(token) => Some(token.kind()),
SyntaxSlot::Empty => None,
}
}
}
impl<L: Language> From<cursor::SyntaxSlot> for SyntaxSlot<L> {
fn from(raw: cursor::SyntaxSlot) -> Self {
match raw {
cursor::SyntaxSlot::Node(node) => SyntaxSlot::Node(node.into()),
cursor::SyntaxSlot::Token(token) => SyntaxSlot::Token(token.into()),
cursor::SyntaxSlot::Empty { .. } => SyntaxSlot::Empty,
}
}
}
/// Iterator over the slots of a node.
#[derive(Debug, Clone)]
pub struct SyntaxSlots<L> {
raw: cursor::SyntaxSlots,
_p: PhantomData<L>,
}
impl<L: Language> Iterator for SyntaxSlots<L> {
type Item = SyntaxSlot<L>;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.raw.next().map(SyntaxSlot::from)
}
#[inline(always)]
fn size_hint(&self) -> (usize, Option<usize>) {
self.raw.size_hint()
}
#[inline]
fn last(self) -> Option<Self::Item>
where
Self: Sized,
{
self.raw.last().map(SyntaxSlot::from)
}
#[inline]
fn nth(&mut self, n: usize) -> Option<Self::Item> {
self.raw.nth(n).map(SyntaxSlot::from)
}
}
impl<L: Language> FusedIterator for SyntaxSlots<L> {}
impl<L: Language> ExactSizeIterator for SyntaxSlots<L> {
#[inline(always)]
fn len(&self) -> usize {
self.raw.len()
}
}
impl<L: Language> DoubleEndedIterator for SyntaxSlots<L> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
self.raw.next_back().map(SyntaxSlot::from)
}
#[inline]
fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
self.raw.nth_back(n).map(SyntaxSlot::from)
}
}
/// Trait with extension methods for [Option<SyntaxNode>].
pub trait SyntaxNodeOptionExt<L: Language> {
/// Returns the kind of the node if self is [Some], [None] otherwise.
fn kind(&self) -> Option<L::Kind>;
}
impl<L: Language> SyntaxNodeOptionExt<L> for Option<&SyntaxNode<L>> {
fn kind(&self) -> Option<L::Kind> {
self.map(|node| node.kind())
}
}
impl<L: Language> SyntaxNodeOptionExt<L> for Option<SyntaxNode<L>> {
fn kind(&self) -> Option<L::Kind> {
self.as_ref().kind()
}
}

View File

@ -1,259 +0,0 @@
//! A module that exports utilities to rewrite a syntax trees
use crate::{Language, SyntaxNode, SyntaxSlot, SyntaxToken};
use std::iter::once;
/// A visitor that re-writes a syntax tree while visiting the nodes.
///
/// The rewriter visits the nodes in pre-order from top-down.
/// Meaning, it first visits the `root`, and then visits the children of the root from left to right,
/// recursively traversing into child nodes and calling [`visit_node`](SyntaxRewriter) for every node.
///
/// Inspired by Roslyn's [`CSharpSyntaxRewriter`](https://docs.microsoft.com/en-us/dotnet/api/microsoft.codeanalysis.csharp.csharpsyntaxrewriter?view=roslyn-dotnet-4.2.0)
///
/// # Unsupported
///
/// The current implementation does not yet support node removal.
///
/// # Examples
///
/// Implementation of a rewritten that replaces all literal expression nodes that contain a number token
/// with a bogus node.
///
/// ```
/// # use std::iter::once;
/// # use ruff_rowan::{AstNode, SyntaxNode, SyntaxRewriter, VisitNodeSignal};
/// # use ruff_rowan::raw_language::{LiteralExpression, RawLanguage, RawLanguageKind, RawSyntaxTreeBuilder};
///
/// struct ReplaceNumberLiteralRewriter;
///
/// impl SyntaxRewriter for ReplaceNumberLiteralRewriter {
/// type Language = RawLanguage;
///
/// fn visit_node(
/// &mut self,
/// node: SyntaxNode<Self::Language>,
/// ) -> VisitNodeSignal<Self::Language> {
/// match node.kind() {
/// RawLanguageKind::LITERAL_EXPRESSION => {
/// let expression = LiteralExpression::unwrap_cast(node);
///
/// let mut token = expression
/// .syntax()
/// .slots()
/// .nth(0)
/// .unwrap()
/// .into_token()
/// .unwrap();
///
/// match token.kind() {
/// RawLanguageKind::NUMBER_TOKEN => {
/// // Use your language's syntax factory instead
/// let bogus_node = SyntaxNode::new_detached(
/// RawLanguageKind::BOGUS,
/// once(Some(token.into())),
/// );
///
/// VisitNodeSignal::Replace(bogus_node)
/// }
/// // Not interested in string literal expressions, continue traversal
/// _ => VisitNodeSignal::Traverse(expression.into_syntax()),
/// }
/// }
/// _ => {
/// // Traverse into the childrens of node
/// VisitNodeSignal::Traverse(node)
/// }
/// }
/// }
/// }
///
/// let mut builder = RawSyntaxTreeBuilder::new();
///
/// builder.start_node(RawLanguageKind::ROOT);
/// builder.start_node(RawLanguageKind::SEPARATED_EXPRESSION_LIST);
///
/// builder.start_node(RawLanguageKind::LITERAL_EXPRESSION);
/// builder.token(RawLanguageKind::NUMBER_TOKEN, "5");
/// builder.finish_node();
///
/// builder.start_node(RawLanguageKind::LITERAL_EXPRESSION);
/// builder.token(RawLanguageKind::STRING_TOKEN, "'abcd'");
/// builder.finish_node();
///
/// builder.finish_node();
/// builder.finish_node();
///
/// let root = builder.finish();
///
/// let transformed = ReplaceNumberLiteralRewriter.transform(root.clone());
///
/// let original_literal_expressions: Vec<_> = root
/// .descendants()
/// .filter(|p| p.kind() == RawLanguageKind::LITERAL_EXPRESSION)
/// .collect();
///
/// assert_ne!(
/// &root, &transformed,
/// "It returns a new root with the updated children"
/// );
///
/// let literal_expressions: Vec<_> = transformed
/// .descendants()
/// .filter(|p| p.kind() == RawLanguageKind::LITERAL_EXPRESSION)
/// .collect();
///
/// // The literal expression containing a string token should be unchanged
/// assert_eq!(&literal_expressions, &original_literal_expressions[1..]);
///
/// let mut bogus: Vec<_> = transformed
/// .descendants()
/// .filter(|p| p.kind() == RawLanguageKind::BOGUS)
/// .collect();
///
/// // It replaced the number literal expression with a bogus node.
/// assert_eq!(bogus.len(), 1);
/// assert_eq!(bogus.pop().unwrap().text(), "5");
/// ```
pub trait SyntaxRewriter {
type Language: Language;
/// Recursively transforms the subtree of `node` by calling [`visit_node`](SyntaxRewriter::visit_node)
/// for every token and [`visit_token`](SyntaxRewriter::visit_token) for every token in the subtree.
///
/// Returns a new syntax tree reflecting the changes by the rewriter if it replaced any node and
/// returns `node` if no changes were made.
fn transform(&mut self, node: SyntaxNode<Self::Language>) -> SyntaxNode<Self::Language>
where
Self: Sized,
{
match self.visit_node(node) {
VisitNodeSignal::Replace(updated) => updated,
VisitNodeSignal::Traverse(node) => traverse(node, self),
}
}
/// Called for every node in the tree. The method should return a signal specifying what should be done with the node
///
/// * [VisitNodeSignal::Traverse]: Recourse into `node` so that [`visit_node`](SyntaxRewriter::visit_node)
/// gets called for all children of `node`. The `node` will only be replaced if any node in its subtree changes.
/// * [VisitNodeSignal::Replace]: Replaces `node` with the node specified in the [`Replace`](VisitNodeSignal::Replace) variant.
/// It's your responsibility to call [`traverse`](SyntaxRewriter::transform) for any child of `node` for which you want the rewritter
/// to recurse into its content.
fn visit_node(&mut self, node: SyntaxNode<Self::Language>) -> VisitNodeSignal<Self::Language> {
VisitNodeSignal::Traverse(node)
}
/// Called for every token in the tree. Returning a new token changes the token in the parent node.
fn visit_token(&mut self, token: SyntaxToken<Self::Language>) -> SyntaxToken<Self::Language> {
token
}
}
#[derive(Debug, Clone)]
pub enum VisitNodeSignal<L: Language> {
/// Signals the [SyntaxRewriter] to replace the current node with the specified node.
Replace(SyntaxNode<L>),
/// Signals the [SyntaxRewriter] to traverse into the children of the specified node.
Traverse(SyntaxNode<L>),
}
fn traverse<R>(mut parent: SyntaxNode<R::Language>, rewriter: &mut R) -> SyntaxNode<R::Language>
where
R: SyntaxRewriter,
{
for slot in parent.slots() {
match slot {
SyntaxSlot::Node(node) => {
let original_key = node.key();
let index = node.index();
let updated = rewriter.transform(node);
if updated.key() != original_key {
parent = parent.splice_slots(index..=index, once(Some(updated.into())));
}
}
SyntaxSlot::Token(token) => {
let original_key = token.key();
let index = token.index();
let updated = rewriter.visit_token(token);
if updated.key() != original_key {
parent = parent.splice_slots(index..=index, once(Some(updated.into())));
}
}
SyntaxSlot::Empty => {
// Nothing to visit
}
}
}
parent
}
#[cfg(test)]
mod tests {
use crate::raw_language::{RawLanguage, RawLanguageKind, RawSyntaxTreeBuilder};
use crate::{SyntaxNode, SyntaxRewriter, SyntaxToken, VisitNodeSignal};
#[test]
pub fn test_visits_each_node() {
let mut builder = RawSyntaxTreeBuilder::new();
builder.start_node(RawLanguageKind::ROOT);
builder.start_node(RawLanguageKind::LITERAL_EXPRESSION);
builder.token(RawLanguageKind::NUMBER_TOKEN, "5");
builder.finish_node();
builder.finish_node();
let root = builder.finish();
let mut recorder = RecordRewritter::default();
let transformed = recorder.transform(root.clone());
assert_eq!(
&root, &transformed,
"It should return the same node if the rewritter doesn't replace a node."
);
let literal_expression = root
.descendants()
.find(|node| node.kind() == RawLanguageKind::LITERAL_EXPRESSION)
.unwrap();
assert_eq!(&recorder.nodes, &[root.clone(), literal_expression]);
let number_literal = root.first_token().unwrap();
assert_eq!(&recorder.tokens, &[number_literal]);
}
/// Visitor that records every `visit_node` and `visit_token` call.
#[derive(Default)]
struct RecordRewritter {
nodes: Vec<SyntaxNode<RawLanguage>>,
tokens: Vec<SyntaxToken<RawLanguage>>,
}
impl SyntaxRewriter for RecordRewritter {
type Language = RawLanguage;
fn visit_node(
&mut self,
node: SyntaxNode<Self::Language>,
) -> VisitNodeSignal<Self::Language> {
self.nodes.push(node.clone());
VisitNodeSignal::Traverse(node)
}
fn visit_token(
&mut self,
token: SyntaxToken<Self::Language>,
) -> SyntaxToken<Self::Language> {
self.tokens.push(token.clone());
token
}
}
}

View File

@ -1,425 +0,0 @@
use crate::green::{GreenToken, GreenTrivia};
use crate::syntax::element::SyntaxElementKey;
use crate::syntax::SyntaxTrivia;
use crate::syntax_token_text::SyntaxTokenText;
use crate::{
cursor, Direction, Language, NodeOrToken, SyntaxElement, SyntaxKind, SyntaxNode,
SyntaxTriviaPiece, TriviaPiece, TriviaPieceKind,
};
use ruff_text_size::{TextLen, TextRange, TextSize};
use std::fmt;
use std::marker::PhantomData;
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct SyntaxToken<L: Language> {
raw: cursor::SyntaxToken,
_p: PhantomData<L>,
}
impl<L: Language> SyntaxToken<L> {
/// Create a new token detached from any tree
///
/// This is mainly useful for creating a small number of individual tokens
/// when mutating an existing tree, the bulk of the tokens in a given file
/// should be created through the [crate::TreeBuilder] abstraction instead
/// as it will efficiently cache and reuse the created tokens
pub fn new_detached<Leading, Trailing>(
kind: L::Kind,
text: &str,
leading: Leading,
trailing: Trailing,
) -> Self
where
Leading: IntoIterator<Item = TriviaPiece>,
Leading::IntoIter: ExactSizeIterator,
Trailing: IntoIterator<Item = TriviaPiece>,
Trailing::IntoIter: ExactSizeIterator,
{
Self {
raw: cursor::SyntaxToken::new_detached(GreenToken::with_trivia(
kind.to_raw(),
text,
GreenTrivia::new(leading),
GreenTrivia::new(trailing),
)),
_p: PhantomData,
}
}
pub(super) fn green_token(&self) -> GreenToken {
self.raw.green().to_owned()
}
pub fn key(&self) -> SyntaxElementKey {
let (node_data, offset) = self.raw.key();
SyntaxElementKey::new(node_data, offset)
}
pub fn kind(&self) -> L::Kind {
L::Kind::from_raw(self.raw.kind())
}
pub fn text_range(&self) -> TextRange {
self.raw.text_range()
}
pub fn text_trimmed_range(&self) -> TextRange {
self.raw.text_trimmed_range()
}
pub(crate) fn index(&self) -> usize {
self.raw.index()
}
/// Returns the text of the token, including all trivia.
///
/// ```
/// use ruff_rowan::raw_language::{RawLanguage, RawLanguageKind, RawSyntaxTreeBuilder};
/// use ruff_rowan::*;
/// let mut token = RawSyntaxTreeBuilder::wrap_with_node(RawLanguageKind::ROOT, |builder| {
/// builder.token_with_trivia(
/// RawLanguageKind::LET_TOKEN,
/// "\n\t let \t\t",
/// &[TriviaPiece::whitespace(3)],
/// &[TriviaPiece::whitespace(3)],
/// );
/// })
/// .first_token()
/// .unwrap();
/// assert_eq!("\n\t let \t\t", token.text());
/// ```
pub fn text(&self) -> &str {
self.raw.text()
}
/// Returns the text of a token, including all trivia as an owned value.
///
/// ```
/// use ruff_rowan::raw_language::{RawLanguage, RawLanguageKind, RawSyntaxTreeBuilder};
/// use ruff_rowan::*;
/// let mut token = RawSyntaxTreeBuilder::wrap_with_node(RawLanguageKind::ROOT, |builder| {
/// builder.token_with_trivia(
/// RawLanguageKind::LET_TOKEN,
/// "\n\t let \t\t",
/// &[TriviaPiece::whitespace(3)],
/// &[TriviaPiece::whitespace(3)],
/// );
/// })
/// .first_token()
/// .unwrap();
/// assert_eq!("\n\t let \t\t", token.token_text());
/// assert_eq!(
/// format!("{}", "\n\t let \t\t"),
/// format!("{}", token.token_text())
/// );
/// assert_eq!(
/// format!("{:?}", "\n\t let \t\t"),
/// format!("{:?}", token.token_text())
/// );
/// ```
pub fn token_text(&self) -> SyntaxTokenText {
self.raw.token_text()
}
pub fn token_text_trimmed(&self) -> SyntaxTokenText {
self.raw.token_text_trimmed()
}
/// Returns the text of the token, excluding all trivia.
///
/// ```
/// use ruff_rowan::raw_language::{RawLanguage, RawLanguageKind, RawSyntaxTreeBuilder};
/// use ruff_rowan::*;
/// let mut token = RawSyntaxTreeBuilder::wrap_with_node(RawLanguageKind::ROOT, |builder| {
/// builder.token_with_trivia(
/// RawLanguageKind::LET_TOKEN,
/// "\n\t let \t\t",
/// &[TriviaPiece::whitespace(3)],
/// &[TriviaPiece::whitespace(3)],
/// );
/// })
/// .first_token()
/// .unwrap();
/// assert_eq!("let", token.text_trimmed());
/// ```
pub fn text_trimmed(&self) -> &str {
self.raw.text_trimmed()
}
pub fn parent(&self) -> Option<SyntaxNode<L>> {
self.raw.parent().map(SyntaxNode::from)
}
pub fn ancestors(&self) -> impl Iterator<Item = SyntaxNode<L>> {
self.raw.ancestors().map(SyntaxNode::from)
}
pub fn next_sibling_or_token(&self) -> Option<SyntaxElement<L>> {
self.raw.next_sibling_or_token().map(NodeOrToken::from)
}
pub fn prev_sibling_or_token(&self) -> Option<SyntaxElement<L>> {
self.raw.prev_sibling_or_token().map(NodeOrToken::from)
}
pub fn siblings_with_tokens(
&self,
direction: Direction,
) -> impl Iterator<Item = SyntaxElement<L>> {
self.raw
.siblings_with_tokens(direction)
.map(SyntaxElement::from)
}
/// Next token in the tree (i.e, not necessary a sibling).
pub fn next_token(&self) -> Option<SyntaxToken<L>> {
self.raw.next_token().map(SyntaxToken::from)
}
/// Previous token in the tree (i.e, not necessary a sibling).
pub fn prev_token(&self) -> Option<SyntaxToken<L>> {
self.raw.prev_token().map(SyntaxToken::from)
}
/// Return a new version of this token detached from its parent node
#[must_use = "syntax elements are immutable, the result of update methods must be propagated to have any effect"]
pub fn detach(self) -> Self {
Self {
raw: self.raw.detach(),
_p: PhantomData,
}
}
/// Return a new version of this token with its leading trivia replaced with `trivia`
#[must_use = "syntax elements are immutable, the result of update methods must be propagated to have any effect"]
pub fn with_leading_trivia<'a, I>(&self, trivia: I) -> Self
where
I: IntoIterator<Item = (TriviaPieceKind, &'a str)>,
I::IntoIter: ExactSizeIterator,
{
let mut token_text = String::new();
let trivia = trivia.into_iter().map(|(kind, text)| {
token_text.push_str(text);
TriviaPiece::new(kind, TextSize::of(text))
});
let leading = GreenTrivia::new(trivia);
// Copy over token text and trailing trivia
let leading_len = self.raw.green().leading_trivia().text_len();
token_text.push_str(&self.text()[usize::from(leading_len)..]);
Self {
raw: cursor::SyntaxToken::new_detached(GreenToken::with_trivia(
self.kind().to_raw(),
&token_text,
leading,
self.green_token().trailing_trivia().clone(),
)),
_p: PhantomData,
}
}
/// Return a new version of this token with its leading trivia replaced with `trivia`
#[must_use = "syntax elements are immutable, the result of update methods must be propagated to have any effect"]
pub fn with_leading_trivia_pieces<I>(&self, trivia: I) -> Self
where
I: IntoIterator<Item = SyntaxTriviaPiece<L>>,
I::IntoIter: ExactSizeIterator,
{
let mut token_text = String::new();
let trivia = trivia.into_iter().map(|piece| {
token_text.push_str(piece.text());
piece.into_raw_piece()
});
let leading = GreenTrivia::new(trivia);
// Copy over token text and trailing trivia
let leading_len = self.raw.green().leading_trivia().text_len();
token_text.push_str(&self.text()[usize::from(leading_len)..]);
Self {
raw: cursor::SyntaxToken::new_detached(GreenToken::with_trivia(
self.kind().to_raw(),
&token_text,
leading,
self.green_token().trailing_trivia().clone(),
)),
_p: PhantomData,
}
}
/// Return a new version of this token with its trailing trivia replaced with `trivia`
#[must_use = "syntax elements are immutable, the result of update methods must be propagated to have any effect"]
pub fn with_trailing_trivia<'a, I>(&self, trivia: I) -> Self
where
I: IntoIterator<Item = (TriviaPieceKind, &'a str)>,
I::IntoIter: ExactSizeIterator,
{
let mut token_text = String::new();
// copy over leading trivia and token text
let trailing_len = self.green_token().trailing_trivia().text_len();
token_text.push_str(&self.text()[..usize::from(self.text().text_len() - trailing_len)]);
let trivia = trivia.into_iter().map(|(kind, text)| {
token_text.push_str(text);
TriviaPiece::new(kind, TextSize::of(text))
});
let trailing = GreenTrivia::new(trivia);
Self {
raw: cursor::SyntaxToken::new_detached(GreenToken::with_trivia(
self.kind().to_raw(),
&token_text,
self.green_token().leading_trivia().clone(),
trailing,
)),
_p: PhantomData,
}
}
/// Return a new version of this token with its trailing trivia replaced with `trivia`
#[must_use = "syntax elements are immutable, the result of update methods must be propagated to have any effect"]
pub fn with_trailing_trivia_pieces<I>(&self, trivia: I) -> Self
where
I: IntoIterator<Item = SyntaxTriviaPiece<L>>,
I::IntoIter: ExactSizeIterator,
{
let mut token_text = String::new();
// copy over leading trivia and token text
let trailing_len = self.green_token().trailing_trivia().text_len();
token_text.push_str(&self.text()[..usize::from(self.text().text_len() - trailing_len)]);
let trivia = trivia.into_iter().map(|piece| {
token_text.push_str(piece.text());
piece.into_raw_piece()
});
let trailing = GreenTrivia::new(trivia);
Self {
raw: cursor::SyntaxToken::new_detached(GreenToken::with_trivia(
self.kind().to_raw(),
&token_text,
self.green_token().leading_trivia().clone(),
trailing,
)),
_p: PhantomData,
}
}
/// Returns the token's leading trivia.
///
/// Looking backward in the text, a token owns all of its preceding trivia up to and including the first newline character.
///
/// ```
/// use ruff_rowan::raw_language::{RawLanguage, RawLanguageKind, RawSyntaxTreeBuilder};
/// use ruff_rowan::*;
/// let mut token = RawSyntaxTreeBuilder::wrap_with_node(RawLanguageKind::ROOT, |builder| {
/// builder.token_with_trivia(
/// RawLanguageKind::LET_TOKEN,
/// "\n\t let \t\t",
/// &[TriviaPiece::whitespace(3)],
/// &[TriviaPiece::whitespace(3)],
/// );
/// })
/// .first_token()
/// .unwrap();
/// assert_eq!("\n\t ", token.leading_trivia().text());
/// ```
#[inline]
pub fn leading_trivia(&self) -> SyntaxTrivia<L> {
SyntaxTrivia::new(self.raw.leading_trivia())
}
/// Returns the token's trailing trivia.
///
/// A token owns all of its following trivia up to, but not including, the next newline character.
///
/// ```
/// use ruff_rowan::raw_language::{RawLanguage, RawLanguageKind, RawSyntaxTreeBuilder};
/// use ruff_rowan::*;
/// let mut token = RawSyntaxTreeBuilder::wrap_with_node(RawLanguageKind::ROOT, |builder| {
/// builder.token_with_trivia(
/// RawLanguageKind::LET_TOKEN,
/// "\n\t let \t\t",
/// &[TriviaPiece::whitespace(3)],
/// &[TriviaPiece::whitespace(3)],
/// );
/// })
/// .first_token()
/// .unwrap();
/// assert_eq!(" \t\t", token.trailing_trivia().text());
/// ```
#[inline]
pub fn trailing_trivia(&self) -> SyntaxTrivia<L> {
SyntaxTrivia::new(self.raw.trailing_trivia())
}
/// Checks if the current token has trailing comments
pub fn has_trailing_comments(&self) -> bool {
self.trailing_trivia()
.pieces()
.any(|piece| piece.is_comments())
}
/// Checks if the current token has leading comments
pub fn has_leading_comments(&self) -> bool {
self.leading_trivia()
.pieces()
.any(|piece| piece.is_comments())
}
/// Checks if the token has any leading trivia that isn't a whitespace nor a line break
pub fn has_leading_non_whitespace_trivia(&self) -> bool {
self.leading_trivia()
.pieces()
.any(|piece| piece.is_whitespace() || piece.is_newline())
}
/// Checks if the current token has leading newline
pub fn has_leading_newline(&self) -> bool {
self.leading_trivia()
.pieces()
.any(|piece| piece.is_newline())
}
}
impl<L: Language> fmt::Debug for SyntaxToken<L> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{:?}@{:?} {:?} ",
self.kind(),
self.text_range(),
self.text_trimmed()
)?;
self.leading_trivia().fmt(f)?;
write!(f, " ")?;
self.trailing_trivia().fmt(f)
}
}
impl<L: Language> fmt::Display for SyntaxToken<L> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(&self.raw, f)
}
}
impl<L: Language> From<SyntaxToken<L>> for cursor::SyntaxToken {
fn from(token: SyntaxToken<L>) -> cursor::SyntaxToken {
token.raw
}
}
impl<L: Language> From<cursor::SyntaxToken> for SyntaxToken<L> {
fn from(raw: cursor::SyntaxToken) -> SyntaxToken<L> {
SyntaxToken {
raw,
_p: PhantomData,
}
}
}

View File

@ -1,810 +0,0 @@
use crate::{cursor, Language, SyntaxToken};
use ruff_text_size::{TextRange, TextSize};
use std::fmt;
use std::fmt::Formatter;
use std::iter::FusedIterator;
use std::marker::PhantomData;
#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
pub enum TriviaPieceKind {
/// A line break (`\n`, `\r`, `\r\n`, ...)
Newline,
/// Any whitespace character
Whitespace,
/// Comment that does not contain any line breaks
SingleLineComment,
/// Comment that contains at least one line break
MultiLineComment,
/// Token that the parser skipped for some reason.
Skipped,
}
impl TriviaPieceKind {
pub const fn is_newline(&self) -> bool {
matches!(self, TriviaPieceKind::Newline)
}
pub const fn is_whitespace(&self) -> bool {
matches!(self, TriviaPieceKind::Whitespace)
}
pub const fn is_single_line_comment(&self) -> bool {
matches!(self, TriviaPieceKind::SingleLineComment)
}
pub const fn is_multiline_comment(&self) -> bool {
matches!(self, TriviaPieceKind::MultiLineComment)
}
pub const fn is_skipped(&self) -> bool {
matches!(self, TriviaPieceKind::Skipped)
}
}
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
pub struct TriviaPiece {
pub(crate) kind: TriviaPieceKind,
pub(crate) length: TextSize,
}
impl TriviaPiece {
/// Creates a new whitespace trivia piece with the given length
pub fn whitespace<L: Into<TextSize>>(len: L) -> Self {
Self::new(TriviaPieceKind::Whitespace, len)
}
/// Creates a new newline trivia piece with the given text length
pub fn newline<L: Into<TextSize>>(len: L) -> Self {
Self::new(TriviaPieceKind::Newline, len)
}
/// Creates a new comment trivia piece that does not contain any line breaks.
/// For example, JavaScript's `//` comments are guaranteed to not spawn multiple lines. However,
/// this can also be a `/* ... */` comment if it doesn't contain any line break characters.
pub fn single_line_comment<L: Into<TextSize>>(len: L) -> Self {
Self::new(TriviaPieceKind::SingleLineComment, len)
}
/// Creates a new comment trivia piece that contains at least one line breaks.
/// For example, a JavaScript `/* ... */` comment that spawns at least two lines (contains at least one line break character).
pub fn multi_line_comment<L: Into<TextSize>>(len: L) -> Self {
Self::new(TriviaPieceKind::MultiLineComment, len)
}
pub fn new<L: Into<TextSize>>(kind: TriviaPieceKind, length: L) -> Self {
Self {
kind,
length: length.into(),
}
}
/// Returns the trivia's length
pub fn text_len(&self) -> TextSize {
self.length
}
/// Returns the trivia's kind
pub fn kind(&self) -> TriviaPieceKind {
self.kind
}
}
#[derive(Debug, Clone)]
pub struct SyntaxTriviaPieceNewline<L: Language>(SyntaxTriviaPiece<L>);
#[derive(Debug, Clone)]
pub struct SyntaxTriviaPieceWhitespace<L: Language>(SyntaxTriviaPiece<L>);
#[derive(Debug, Clone)]
pub struct SyntaxTriviaPieceComments<L: Language>(SyntaxTriviaPiece<L>);
#[derive(Debug, Clone)]
pub struct SyntaxTriviaPieceSkipped<L: Language>(SyntaxTriviaPiece<L>);
impl<L: Language> SyntaxTriviaPieceNewline<L> {
pub fn text(&self) -> &str {
self.0.text()
}
pub fn text_len(&self) -> TextSize {
self.0.text_len()
}
pub fn text_range(&self) -> TextRange {
self.0.text_range()
}
/// Returns a reference to its [SyntaxTriviaPiece]
pub fn as_piece(&self) -> &SyntaxTriviaPiece<L> {
&self.0
}
/// Returns its [SyntaxTriviaPiece]
pub fn into_piece(self) -> SyntaxTriviaPiece<L> {
self.0
}
}
impl<L: Language> SyntaxTriviaPieceWhitespace<L> {
pub fn text(&self) -> &str {
self.0.text()
}
pub fn text_len(&self) -> TextSize {
self.0.text_len()
}
pub fn text_range(&self) -> TextRange {
self.0.text_range()
}
/// Returns a reference to its [SyntaxTriviaPiece]
pub fn as_piece(&self) -> &SyntaxTriviaPiece<L> {
&self.0
}
/// Returns its [SyntaxTriviaPiece]
pub fn into_piece(self) -> SyntaxTriviaPiece<L> {
self.0
}
}
impl<L: Language> SyntaxTriviaPieceComments<L> {
pub fn text(&self) -> &str {
self.0.text()
}
pub fn text_len(&self) -> TextSize {
self.0.text_len()
}
pub fn text_range(&self) -> TextRange {
self.0.text_range()
}
pub fn has_newline(&self) -> bool {
self.0.trivia.kind.is_multiline_comment()
}
/// Returns a reference to its [SyntaxTriviaPiece]
pub fn as_piece(&self) -> &SyntaxTriviaPiece<L> {
&self.0
}
/// Returns its [SyntaxTriviaPiece]
pub fn into_piece(self) -> SyntaxTriviaPiece<L> {
self.0
}
}
impl<L: Language> SyntaxTriviaPieceSkipped<L> {
pub fn text(&self) -> &str {
self.0.text()
}
pub fn text_len(&self) -> TextSize {
self.0.text_len()
}
pub fn text_range(&self) -> TextRange {
self.0.text_range()
}
/// Returns a reference to its [SyntaxTriviaPiece]
pub fn as_piece(&self) -> &SyntaxTriviaPiece<L> {
&self.0
}
/// Returns its [SyntaxTriviaPiece]
pub fn into_piece(self) -> SyntaxTriviaPiece<L> {
self.0
}
}
/// [SyntaxTriviaPiece] gives access to the most granular information about the trivia
/// that was specified by the lexer at the token creation time.
///
/// For example:
///
/// ```no_test
/// builder.token_with_trivia(
/// RawSyntaxKind(1),
/// "\n\t /**/let \t\t",
/// &[TriviaPiece::whitespace(3), TriviaPiece::single_line_comment(4)],
/// &[TriviaPiece::whitespace(3)],
/// );
/// });
/// ```
/// This token has two pieces in the leading trivia, and one piece at the trailing trivia. Each
/// piece is defined by the [TriviaPiece]; its content is irrelevant.
///
#[derive(Clone)]
pub struct SyntaxTriviaPiece<L: Language> {
raw: cursor::SyntaxTrivia,
/// Absolute offset from the beginning of the file
offset: TextSize,
trivia: TriviaPiece,
_p: PhantomData<L>,
}
impl<L: Language> SyntaxTriviaPiece<L> {
pub(crate) fn into_raw_piece(self) -> TriviaPiece {
self.trivia
}
/// Returns the internal kind of this trivia piece
pub fn kind(&self) -> TriviaPieceKind {
self.trivia.kind()
}
/// Returns the associated text just for this trivia piece. This is different from [SyntaxTrivia::text()],
/// which returns the text of the whole trivia.
///
/// ```
/// use ruff_rowan::raw_language::{RawLanguage, RawLanguageKind, RawSyntaxTreeBuilder};
/// use ruff_rowan::*;
/// use std::iter::Iterator;
/// let mut node = RawSyntaxTreeBuilder::wrap_with_node(RawLanguageKind::ROOT, |builder| {
/// builder.token_with_trivia(
/// RawLanguageKind::LET_TOKEN,
/// "\n\t /**/let \t\t",
/// &[
/// TriviaPiece::whitespace(3),
/// TriviaPiece::single_line_comment(4),
/// ],
/// &[TriviaPiece::whitespace(3)],
/// );
/// });
/// let leading: Vec<_> = node.first_leading_trivia().unwrap().pieces().collect();
/// assert_eq!("\n\t ", leading[0].text());
/// assert_eq!("/**/", leading[1].text());
///
/// let trailing: Vec<_> = node.last_trailing_trivia().unwrap().pieces().collect();
/// assert_eq!(" \t\t", trailing[0].text());
/// ```
pub fn text(&self) -> &str {
let token = self.raw.token();
let txt = token.text();
// Compute the offset relative to the token
let start = self.offset - token.text_range().start();
let end = start + self.text_len();
// Don't use self.raw.text(). It iterates over all pieces
&txt[start.into()..end.into()]
}
/// Returns the associated text length just for this trivia piece. This is different from `SyntaxTrivia::len()`,
/// which returns the text length of the whole trivia.
///
/// ```
/// use ruff_rowan::raw_language::{RawLanguage, RawLanguageKind, RawSyntaxTreeBuilder};
/// use ruff_rowan::*;
/// use std::iter::Iterator;
/// let mut node = RawSyntaxTreeBuilder::wrap_with_node(RawLanguageKind::ROOT, |builder| {
/// builder.token_with_trivia(
/// RawLanguageKind::LET_TOKEN,
/// "\n\t /**/let \t\t",
/// &[
/// TriviaPiece::whitespace(3),
/// TriviaPiece::single_line_comment(4),
/// ],
/// &[TriviaPiece::whitespace(3)],
/// );
/// });
/// let pieces: Vec<_> = node.first_leading_trivia().unwrap().pieces().collect();
/// assert_eq!(TextSize::from(3), pieces[0].text_len());
/// ```
pub fn text_len(&self) -> TextSize {
self.trivia.text_len()
}
/// Returns the associated text range just for this trivia piece. This is different from [SyntaxTrivia::text_range()],
/// which returns the text range of the whole trivia.
///
/// ```
/// use ruff_rowan::raw_language::{RawLanguage, RawLanguageKind, RawSyntaxTreeBuilder};
/// use ruff_rowan::*;
/// use std::iter::Iterator;
/// let mut node = RawSyntaxTreeBuilder::wrap_with_node(RawLanguageKind::ROOT, |builder| {
/// builder.token_with_trivia(
/// RawLanguageKind::LET_TOKEN,
/// "\n\t /**/let \t\t",
/// &[
/// TriviaPiece::whitespace(3),
/// TriviaPiece::single_line_comment(4),
/// ],
/// &[TriviaPiece::whitespace(3)],
/// );
/// });
/// let pieces: Vec<_> = node.first_leading_trivia().unwrap().pieces().collect();
/// assert_eq!(TextRange::new(0.into(), 3.into()), pieces[0].text_range());
/// ```
pub fn text_range(&self) -> TextRange {
TextRange::at(self.offset, self.text_len())
}
/// Returns true if this trivia piece is a [SyntaxTriviaPieceNewline].
///
/// ```
/// use ruff_rowan::raw_language::{RawLanguage, RawLanguageKind, RawSyntaxTreeBuilder};
/// use ruff_rowan::*;
/// use std::iter::Iterator;
/// let mut node = RawSyntaxTreeBuilder::wrap_with_node(RawLanguageKind::ROOT, |builder| {
/// builder.token_with_trivia(
/// RawLanguageKind::LET_TOKEN,
/// "\n\t/**/let",
/// &[
/// TriviaPiece::newline(1),
/// TriviaPiece::whitespace(1),
/// TriviaPiece::single_line_comment(4),
/// ],
/// &[],
/// );
/// });
/// let pieces: Vec<_> = node.first_leading_trivia().unwrap().pieces().collect();
/// assert!(pieces[0].is_newline())
/// ```
pub fn is_newline(&self) -> bool {
self.trivia.kind.is_newline()
}
/// Returns true if this trivia piece is a [SyntaxTriviaPieceWhitespace].
///
/// ```
/// use ruff_rowan::raw_language::{RawLanguage, RawLanguageKind, RawSyntaxTreeBuilder};
/// use ruff_rowan::*;
/// use std::iter::Iterator;
/// let mut node = RawSyntaxTreeBuilder::wrap_with_node(RawLanguageKind::ROOT, |builder| {
/// builder.token_with_trivia(
/// RawLanguageKind::LET_TOKEN,
/// "\n\t/**/let",
/// &[
/// TriviaPiece::newline(1),
/// TriviaPiece::whitespace(1),
/// TriviaPiece::single_line_comment(4),
/// ],
/// &[],
/// );
/// });
/// let pieces: Vec<_> = node.first_leading_trivia().unwrap().pieces().collect();
/// assert!(pieces[1].is_whitespace())
/// ```
pub fn is_whitespace(&self) -> bool {
self.trivia.kind.is_whitespace()
}
/// Returns true if this trivia piece is a [SyntaxTriviaPieceComments].
///
/// ```
/// use ruff_rowan::raw_language::{RawLanguage, RawLanguageKind, RawSyntaxTreeBuilder};
/// use ruff_rowan::*;
/// use std::iter::Iterator;
/// let mut node = RawSyntaxTreeBuilder::wrap_with_node(RawLanguageKind::ROOT, |builder| {
/// builder.token_with_trivia(
/// RawLanguageKind::LET_TOKEN,
/// "\n\t/**/let",
/// &[
/// TriviaPiece::newline(1),
/// TriviaPiece::whitespace(1),
/// TriviaPiece::single_line_comment(4),
/// ],
/// &[],
/// );
/// });
/// let pieces: Vec<_> = node.first_leading_trivia().unwrap().pieces().collect();
/// assert!(pieces[2].is_comments())
/// ```
pub const fn is_comments(&self) -> bool {
matches!(
self.trivia.kind,
TriviaPieceKind::SingleLineComment | TriviaPieceKind::MultiLineComment
)
}
/// Returns true if this trivia piece is a [SyntaxTriviaPieceSkipped].
pub fn is_skipped(&self) -> bool {
self.trivia.kind.is_skipped()
}
/// Cast this trivia piece to [SyntaxTriviaPieceNewline].
///
/// ```
/// use ruff_rowan::raw_language::{RawLanguage, RawLanguageKind, RawSyntaxTreeBuilder};
/// use ruff_rowan::*;
/// use std::iter::Iterator;
/// let mut node = RawSyntaxTreeBuilder::wrap_with_node(RawLanguageKind::ROOT, |builder| {
/// builder.token_with_trivia(
/// RawLanguageKind::LET_TOKEN,
/// "\n/**/let \t\t",
/// &[TriviaPiece::newline(1), TriviaPiece::single_line_comment(4)],
/// &[TriviaPiece::newline(3)],
/// );
/// });
/// let pieces: Vec<_> = node.first_leading_trivia().unwrap().pieces().collect();
/// let w = pieces[0].as_newline();
/// assert!(w.is_some());
/// let w = pieces[1].as_newline();
/// assert!(w.is_none());
/// ```
pub fn as_newline(&self) -> Option<SyntaxTriviaPieceNewline<L>> {
match &self.trivia.kind {
TriviaPieceKind::Newline => Some(SyntaxTriviaPieceNewline(self.clone())),
_ => None,
}
}
/// Cast this trivia piece to [SyntaxTriviaPieceWhitespace].
///
/// ```
/// use ruff_rowan::raw_language::{RawLanguage, RawLanguageKind, RawSyntaxTreeBuilder};
/// use ruff_rowan::*;
/// use std::iter::Iterator;
/// let mut node = RawSyntaxTreeBuilder::wrap_with_node(RawLanguageKind::ROOT, |builder| {
/// builder.token_with_trivia(
/// RawLanguageKind::LET_TOKEN,
/// "\t /**/let \t\t",
/// &[
/// TriviaPiece::whitespace(2),
/// TriviaPiece::single_line_comment(4),
/// ],
/// &[TriviaPiece::whitespace(3)],
/// );
/// });
/// let pieces: Vec<_> = node.first_leading_trivia().unwrap().pieces().collect();
/// let w = pieces[0].as_whitespace();
/// assert!(w.is_some());
/// let w = pieces[1].as_whitespace();
/// assert!(w.is_none());
/// ```
pub fn as_whitespace(&self) -> Option<SyntaxTriviaPieceWhitespace<L>> {
match &self.trivia.kind {
TriviaPieceKind::Whitespace => Some(SyntaxTriviaPieceWhitespace(self.clone())),
_ => None,
}
}
/// Cast this trivia piece to [SyntaxTriviaPieceComments].
///
/// ```
/// use ruff_rowan::raw_language::{RawLanguage, RawLanguageKind, RawSyntaxTreeBuilder};
/// use ruff_rowan::*;
/// use std::iter::Iterator;
/// let mut node = RawSyntaxTreeBuilder::wrap_with_node(RawLanguageKind::ROOT, |builder| {
/// builder.token_with_trivia(
/// RawLanguageKind::LET_TOKEN,
/// "\n\t /**/let \t\t",
/// &[
/// TriviaPiece::whitespace(3),
/// TriviaPiece::single_line_comment(4),
/// ],
/// &[TriviaPiece::whitespace(3)],
/// );
/// });
/// let pieces: Vec<_> = node.first_leading_trivia().unwrap().pieces().collect();
/// let w = pieces[0].as_comments();
/// assert!(w.is_none());
/// let w = pieces[1].as_comments();
/// assert!(w.is_some());
/// ```
pub fn as_comments(&self) -> Option<SyntaxTriviaPieceComments<L>> {
match &self.trivia.kind {
TriviaPieceKind::SingleLineComment | TriviaPieceKind::MultiLineComment => {
Some(SyntaxTriviaPieceComments(self.clone()))
}
_ => None,
}
}
/// Casts this piece to a skipped trivia piece.
pub fn as_skipped(&self) -> Option<SyntaxTriviaPieceSkipped<L>> {
match &self.trivia.kind {
TriviaPieceKind::Skipped => Some(SyntaxTriviaPieceSkipped(self.clone())),
_ => None,
}
}
pub fn token(&self) -> SyntaxToken<L> {
SyntaxToken::from(self.raw.token().clone())
}
}
impl<L: Language> fmt::Debug for SyntaxTriviaPiece<L> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.trivia.kind {
TriviaPieceKind::Newline => write!(f, "Newline(")?,
TriviaPieceKind::Whitespace => write!(f, "Whitespace(")?,
TriviaPieceKind::SingleLineComment | TriviaPieceKind::MultiLineComment => {
write!(f, "Comments(")?
}
TriviaPieceKind::Skipped => write!(f, "Skipped(")?,
}
print_debug_str(self.text(), f)?;
write!(f, ")")
}
}
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct SyntaxTrivia<L: Language> {
raw: cursor::SyntaxTrivia,
_p: PhantomData<L>,
}
#[derive(Clone)]
pub struct SyntaxTriviaPiecesIterator<L: Language> {
iter: cursor::SyntaxTriviaPiecesIterator,
_p: PhantomData<L>,
}
impl<L: Language> Iterator for SyntaxTriviaPiecesIterator<L> {
type Item = SyntaxTriviaPiece<L>;
fn next(&mut self) -> Option<Self::Item> {
let (offset, trivia) = self.iter.next()?;
Some(SyntaxTriviaPiece {
raw: self.iter.raw.clone(),
offset,
trivia,
_p: PhantomData,
})
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
impl<L: Language> DoubleEndedIterator for SyntaxTriviaPiecesIterator<L> {
fn next_back(&mut self) -> Option<Self::Item> {
let (offset, trivia) = self.iter.next_back()?;
Some(SyntaxTriviaPiece {
raw: self.iter.raw.clone(),
offset,
trivia,
_p: PhantomData,
})
}
}
impl<L: Language> ExactSizeIterator for SyntaxTriviaPiecesIterator<L> {}
impl<L: Language> SyntaxTrivia<L> {
pub(super) fn new(raw: cursor::SyntaxTrivia) -> Self {
Self {
raw,
_p: PhantomData,
}
}
/// Returns all [SyntaxTriviaPiece] of this trivia.
///
/// ```
/// use crate::*;
/// use ruff_rowan::raw_language::{RawLanguage, RawLanguageKind, RawSyntaxTreeBuilder};
/// use ruff_rowan::*;
/// use std::iter::Iterator;
/// let mut node = RawSyntaxTreeBuilder::wrap_with_node(RawLanguageKind::ROOT, |builder| {
/// builder.token_with_trivia(
/// RawLanguageKind::LET_TOKEN,
/// "\n\t /**/let \t\t",
/// &[
/// TriviaPiece::whitespace(3),
/// TriviaPiece::single_line_comment(4),
/// ],
/// &[TriviaPiece::whitespace(3)],
/// );
/// });
/// let pieces: Vec<_> = node.first_leading_trivia().unwrap().pieces().collect();
/// assert_eq!(2, pieces.len());
/// let pieces: Vec<_> = node.last_trailing_trivia().unwrap().pieces().collect();
/// assert_eq!(1, pieces.len());
/// ```
pub fn pieces(&self) -> SyntaxTriviaPiecesIterator<L> {
SyntaxTriviaPiecesIterator {
iter: self.raw.pieces(),
_p: PhantomData,
}
}
pub fn last(&self) -> Option<SyntaxTriviaPiece<L>> {
let piece = self.raw.last()?;
Some(SyntaxTriviaPiece {
raw: self.raw.clone(),
offset: self.raw.text_range().end() - piece.length,
trivia: *piece,
_p: Default::default(),
})
}
pub fn first(&self) -> Option<SyntaxTriviaPiece<L>> {
let piece = self.raw.first()?;
Some(SyntaxTriviaPiece {
raw: self.raw.clone(),
offset: self.raw.text_range().start(),
trivia: *piece,
_p: Default::default(),
})
}
pub fn text(&self) -> &str {
self.raw.text()
}
pub fn text_range(&self) -> TextRange {
self.raw.text_range()
}
pub fn is_empty(&self) -> bool {
self.raw.len() == 0
}
pub fn has_skipped(&self) -> bool {
self.pieces().any(|piece| piece.is_skipped())
}
}
fn print_debug_str<S: AsRef<str>>(text: S, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let text = text.as_ref();
if text.len() < 25 {
write!(f, "{:?}", text)
} else {
for idx in 21..25 {
if text.is_char_boundary(idx) {
let text = format!("{} ...", &text[..idx]);
return write!(f, "{:?}", text);
}
}
write!(f, "")
}
}
impl<L: Language> std::fmt::Debug for SyntaxTrivia<L> {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "[")?;
let mut first_piece = true;
for piece in self.pieces() {
if !first_piece {
write!(f, ", ")?;
}
first_piece = false;
write!(f, "{:?}", piece)?;
}
write!(f, "]")
}
}
/// It creates an iterator by chaining two trivia pieces. This iterator
/// of trivia can be attached to a token using `*_pieces` APIs.
///
/// ## Examples
///
/// ```
/// use ruff_rowan::raw_language::{RawLanguage, RawLanguageKind, RawSyntaxTreeBuilder};
/// use ruff_rowan::{chain_trivia_pieces, RawSyntaxToken, SyntaxToken, TriviaPiece, TriviaPieceKind};
///
/// let first_token = SyntaxToken::<RawLanguage>::new_detached(
/// RawLanguageKind::LET_TOKEN,
/// "\n\t let \t\t",
/// [TriviaPiece::whitespace(3)],
/// [TriviaPiece::whitespace(3)]
/// );
/// let second_token = SyntaxToken::<RawLanguage>::new_detached(
/// RawLanguageKind::SEMICOLON_TOKEN,
/// "; \t\t",
/// [TriviaPiece::whitespace(1)],
/// [TriviaPiece::whitespace(1)],
/// );
///
/// let leading_trivia = chain_trivia_pieces(
/// first_token.leading_trivia().pieces(),
/// second_token.leading_trivia().pieces()
/// );
///
/// let new_first_token = first_token.with_leading_trivia_pieces(leading_trivia);
///
/// let new_token = format!("{:?}", new_first_token);
/// assert_eq!(new_token, "LET_TOKEN@0..10 \"let\" [Whitespace(\"\\n\\t \"), Whitespace(\";\")] [Whitespace(\" \\t\\t\")]");
///
/// ```
///
pub fn chain_trivia_pieces<L, F, S>(first: F, second: S) -> ChainTriviaPiecesIterator<F, S>
where
L: Language,
F: Iterator<Item = SyntaxTriviaPiece<L>>,
S: Iterator<Item = SyntaxTriviaPiece<L>>,
{
ChainTriviaPiecesIterator::new(first, second)
}
/// Chain iterator that chains two iterators over syntax trivia together.
///
/// This is the same as Rust's [std::iter::Chain] iterator but implements [ExactSizeIterator].
/// Rust doesn't implement [ExactSizeIterator] because adding the sizes of both pieces may overflow.
///
/// Implementing [ExactSizeIterator] in our case is safe because this may only overflow if
/// a source document has more than 2^32 trivia which isn't possible because our source documents are limited to 2^32
/// length.
pub struct ChainTriviaPiecesIterator<F, S> {
first: Option<F>,
second: S,
}
impl<F, S> ChainTriviaPiecesIterator<F, S> {
fn new(first: F, second: S) -> Self {
Self {
first: Some(first),
second,
}
}
}
impl<L, F, S> Iterator for ChainTriviaPiecesIterator<F, S>
where
L: Language,
F: Iterator<Item = SyntaxTriviaPiece<L>>,
S: Iterator<Item = SyntaxTriviaPiece<L>>,
{
type Item = SyntaxTriviaPiece<L>;
fn next(&mut self) -> Option<Self::Item> {
match &mut self.first {
Some(first) => match first.next() {
Some(next) => Some(next),
None => {
self.first.take();
self.second.next()
}
},
None => self.second.next(),
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
match &self.first {
Some(first) => {
let (first_lower, first_upper) = first.size_hint();
let (second_lower, second_upper) = self.second.size_hint();
let lower = first_lower.saturating_add(second_lower);
let upper = match (first_upper, second_upper) {
(Some(first), Some(second)) => first.checked_add(second),
_ => None,
};
(lower, upper)
}
None => self.second.size_hint(),
}
}
}
impl<L, F, S> FusedIterator for ChainTriviaPiecesIterator<F, S>
where
L: Language,
F: Iterator<Item = SyntaxTriviaPiece<L>>,
S: Iterator<Item = SyntaxTriviaPiece<L>>,
{
}
impl<L, F, S> ExactSizeIterator for ChainTriviaPiecesIterator<F, S>
where
L: Language,
F: ExactSizeIterator<Item = SyntaxTriviaPiece<L>>,
S: ExactSizeIterator<Item = SyntaxTriviaPiece<L>>,
{
fn len(&self) -> usize {
match &self.first {
Some(first) => {
let first_len = first.len();
let second_len = self.second.len();
// SAFETY: Should be safe because a program can never contain more than u32 pieces
// because the text ranges are represented as u32 (and each piece must at least contain a single character).
first_len + second_len
}
None => self.second.len(),
}
}
}

View File

@ -1,275 +0,0 @@
mod parsed_children;
mod raw_syntax;
use crate::SyntaxKind;
use std::fmt;
use std::iter::{FusedIterator, Peekable};
pub use self::parsed_children::{
ParsedChildren, ParsedChildrenIntoIterator, ParsedChildrenIterator,
};
pub use self::raw_syntax::{
RawSyntaxElement, RawSyntaxElementRef, RawSyntaxNode, RawSyntaxNodeRef, RawSyntaxToken,
RawSyntaxTokenRef,
};
/// Factory for creating syntax nodes of a particular kind.
pub trait SyntaxFactory: fmt::Debug {
/// The syntax kind used by the nodes constructed by this syntax factory.
type Kind: SyntaxKind;
/// Creates a new syntax node of the passed `kind` with the given children.
///
/// The `children` contains the parsed direct children of the node. There may be fewer children
/// in case there's a syntax error and a required child or an optional child isn't present in the source code.
/// The `make_syntax` implementation must then fill in empty slots to match the slots as they're defined in the grammar.
///
/// The implementation is free to change the `kind` of the node but that has the consequence that
/// such a node will not be cached. The reason for not caching these nodes is that the cache lookup is performed
/// before calling `make_syntax`, thus querying the cache with the old kind.
///
/// It's important that the factory function is idempotent, meaning, calling the function
/// multiple times with the same `kind` and `children` returns syntax nodes with the same structure.
/// This is important because the returned nodes may be cached by `kind` and what `children` are present.
fn make_syntax(
kind: Self::Kind,
children: ParsedChildren<Self::Kind>,
) -> RawSyntaxNode<Self::Kind>;
/// Crates a *node list* syntax node. Validates if all elements are valid and changes the node's kind to
/// [SyntaxKind::to_bogus] if that's not the case.
fn make_node_list_syntax<F>(
kind: Self::Kind,
children: ParsedChildren<Self::Kind>,
can_cast: F,
) -> RawSyntaxNode<Self::Kind>
where
F: Fn(Self::Kind) -> bool,
{
let valid = (&children)
.into_iter()
.all(|element| can_cast(element.kind()));
let kind = if valid { kind } else { kind.to_bogus() };
RawSyntaxNode::new(kind, children.into_iter().map(Some))
}
/// Creates a *separated list* syntax node. Validates if the elements are valid, are correctly
/// separated by the specified separator token.
///
/// It changes the kind of the node to [SyntaxKind::to_bogus] if an element isn't a valid list-node
/// nor separator.
///
/// It inserts empty slots for missing elements or missing markers
fn make_separated_list_syntax<F>(
kind: Self::Kind,
children: ParsedChildren<Self::Kind>,
can_cast: F,
separator: Self::Kind,
allow_trailing: bool,
) -> RawSyntaxNode<Self::Kind>
where
F: Fn(Self::Kind) -> bool,
{
let mut next_node = true;
let mut missing_count = 0;
let mut valid = true;
for child in &children {
let kind = child.kind();
if next_node {
if can_cast(kind) {
next_node = false;
} else if kind == separator {
// a missing element
missing_count += 1;
} else {
// an invalid element
valid = false;
break;
}
} else if kind == separator {
next_node = true;
} else if can_cast(kind) {
// a missing separator
missing_count += 1;
} else {
// something unexpected
valid = false;
}
}
if next_node && !allow_trailing && !children.is_empty() {
// a trailing comma in a list that doesn't support trailing commas
missing_count += 1;
}
if !valid {
RawSyntaxNode::new(kind.to_bogus(), children.into_iter().map(Some))
} else if missing_count > 0 {
RawSyntaxNode::new(
kind,
SeparatedListWithMissingNodesOrSeparatorSlotsIterator {
inner: children.into_iter().peekable(),
missing_count,
next_node: true,
separator,
},
)
} else {
RawSyntaxNode::new(kind, children.into_iter().map(Some))
}
}
}
/// Iterator that "fixes up" a separated list by inserting empty slots for any missing
/// separator or element.
struct SeparatedListWithMissingNodesOrSeparatorSlotsIterator<'a, K: SyntaxKind> {
inner: Peekable<ParsedChildrenIntoIterator<'a, K>>,
missing_count: usize,
next_node: bool,
separator: K,
}
impl<'a, K: SyntaxKind> Iterator for SeparatedListWithMissingNodesOrSeparatorSlotsIterator<'a, K> {
type Item = Option<RawSyntaxElement<K>>;
#[cold]
fn next(&mut self) -> Option<Self::Item> {
let peeked = self.inner.peek();
if let Some(peeked) = peeked {
let is_separator = self.separator == peeked.kind();
if self.next_node {
self.next_node = false;
if !is_separator {
Some(self.inner.next())
} else {
self.missing_count -= 1;
Some(None) // Missing separator
}
} else if is_separator {
self.next_node = true;
Some(self.inner.next())
} else {
// Missing node
self.missing_count -= 1;
self.next_node = true;
Some(None)
}
} else if self.missing_count > 0 {
// at a trailing comma in a list that doesn't allow trailing commas.
self.missing_count -= 1;
Some(None)
} else {
None
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
let len = self.len();
(len, Some(len))
}
}
impl<'a, K: SyntaxKind> FusedIterator
for SeparatedListWithMissingNodesOrSeparatorSlotsIterator<'a, K>
{
}
impl<'a, K: SyntaxKind> ExactSizeIterator
for SeparatedListWithMissingNodesOrSeparatorSlotsIterator<'a, K>
{
fn len(&self) -> usize {
self.inner.len() + self.missing_count
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum SlotContent {
Present,
Absent,
}
/// Description of the slots of a node in combination with [ParsedChildren].
/// It stores for each slot if the node is present in [ParsedChildren] or not, allowing
/// to generate a node with the right number of empty slots.
#[derive(Debug)]
pub struct RawNodeSlots<const COUNT: usize> {
slots: [SlotContent; COUNT],
current_slot: usize,
}
impl<const COUNT: usize> Default for RawNodeSlots<COUNT> {
fn default() -> Self {
Self {
slots: [SlotContent::Absent; COUNT],
current_slot: 0,
}
}
}
impl<const COUNT: usize> RawNodeSlots<COUNT> {
/// Progresses to the next slot
pub fn next_slot(&mut self) {
debug_assert!(self.current_slot < COUNT);
self.current_slot += 1;
}
/// Marks that the node for the current slot is *present* in the source code.
pub fn mark_present(&mut self) {
debug_assert!(self.current_slot < COUNT);
self.slots[self.current_slot] = SlotContent::Present;
}
/// Creates a node with the kind `kind`, filling in the nodes from the `children`.
pub fn into_node<K: SyntaxKind>(
self,
kind: K,
children: ParsedChildren<K>,
) -> RawSyntaxNode<K> {
debug_assert!(self.current_slot == COUNT, "Missing slots");
RawSyntaxNode::new(
kind,
RawNodeSlotIterator {
children: children.into_iter(),
slots: self.slots.as_slice().iter(),
},
)
}
}
struct RawNodeSlotIterator<'a, K: SyntaxKind> {
children: ParsedChildrenIntoIterator<'a, K>,
slots: std::slice::Iter<'a, SlotContent>,
}
impl<'a, K: SyntaxKind> Iterator for RawNodeSlotIterator<'a, K> {
type Item = Option<RawSyntaxElement<K>>;
fn next(&mut self) -> Option<Self::Item> {
let slot = self.slots.next()?;
match slot {
SlotContent::Present => {
Some(Some(self.children.next().expect(
"Expected a present node according to the slot description",
)))
}
SlotContent::Absent => Some(None),
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
(self.slots.len(), Some(self.slots.len()))
}
}
impl<'a, K: SyntaxKind> FusedIterator for RawNodeSlotIterator<'a, K> {}
impl<'a, K: SyntaxKind> ExactSizeIterator for RawNodeSlotIterator<'a, K> {}

View File

@ -1,132 +0,0 @@
use crate::green::GreenElement;
use crate::syntax_factory::raw_syntax::{RawSyntaxElement, RawSyntaxElementRef};
use crate::SyntaxKind;
use std::iter::FusedIterator;
use std::marker::PhantomData;
/// The parsed children of a node, not accounting for any missing children (required or optional)
#[derive(Debug)]
pub struct ParsedChildren<'a, K> {
/// Reference to an array containing all children of this node or any of its parents
all_children: &'a mut Vec<(u64, GreenElement)>,
/// The index of the first child of this node in the `all_children` array
first_child: usize,
ph: PhantomData<K>,
}
impl<'a, K: SyntaxKind> ParsedChildren<'a, K> {
pub(crate) fn new(all_children: &'a mut Vec<(u64, GreenElement)>, first_child: usize) -> Self {
Self {
all_children,
first_child,
ph: PhantomData,
}
}
pub fn len(&self) -> usize {
(self.first_child..self.all_children.len()).len()
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
}
impl<'a, K: SyntaxKind> IntoIterator for ParsedChildren<'a, K> {
type Item = RawSyntaxElement<K>;
type IntoIter = ParsedChildrenIntoIterator<'a, K>;
fn into_iter(self) -> Self::IntoIter {
ParsedChildrenIntoIterator {
inner: self.all_children.drain(self.first_child..),
ph: PhantomData,
}
}
}
#[derive(Debug)]
pub struct ParsedChildrenIntoIterator<'a, K> {
inner: std::vec::Drain<'a, (u64, GreenElement)>,
ph: PhantomData<K>,
}
impl<'a, K: SyntaxKind> Iterator for ParsedChildrenIntoIterator<'a, K> {
type Item = RawSyntaxElement<K>;
fn next(&mut self) -> Option<Self::Item> {
self.inner
.next()
.map(|(_, raw)| RawSyntaxElement::from(raw))
}
fn size_hint(&self) -> (usize, Option<usize>) {
let len = self.len();
(len, Some(len))
}
}
impl<'a, K: SyntaxKind> FusedIterator for ParsedChildrenIntoIterator<'a, K> {}
impl<'a, K: SyntaxKind> ExactSizeIterator for ParsedChildrenIntoIterator<'a, K> {
fn len(&self) -> usize {
self.inner.len()
}
}
impl<'a, K: SyntaxKind> DoubleEndedIterator for ParsedChildrenIntoIterator<'a, K> {
fn next_back(&mut self) -> Option<Self::Item> {
self.inner
.next_back()
.map(|(_, raw)| RawSyntaxElement::from(raw))
}
}
impl<'a, K: SyntaxKind> IntoIterator for &'a ParsedChildren<'a, K> {
type Item = RawSyntaxElementRef<'a, K>;
type IntoIter = ParsedChildrenIterator<'a, K>;
fn into_iter(self) -> Self::IntoIter {
ParsedChildrenIterator {
inner: self.all_children[self.first_child..].iter(),
ph: PhantomData,
}
}
}
#[derive(Debug)]
pub struct ParsedChildrenIterator<'a, K> {
inner: std::slice::Iter<'a, (u64, GreenElement)>,
ph: PhantomData<K>,
}
impl<'a, K: SyntaxKind> Iterator for ParsedChildrenIterator<'a, K> {
type Item = RawSyntaxElementRef<'a, K>;
fn next(&mut self) -> Option<Self::Item> {
self.inner
.next()
.map(|(_, raw)| RawSyntaxElementRef::from(raw))
}
fn size_hint(&self) -> (usize, Option<usize>) {
let len = self.len();
(len, Some(len))
}
}
impl<'a, K: SyntaxKind> FusedIterator for ParsedChildrenIterator<'a, K> {}
impl<'a, K: SyntaxKind> ExactSizeIterator for ParsedChildrenIterator<'a, K> {
fn len(&self) -> usize {
self.inner.len()
}
}
impl<'a, K: SyntaxKind> DoubleEndedIterator for ParsedChildrenIterator<'a, K> {
fn next_back(&mut self) -> Option<Self::Item> {
self.inner
.next_back()
.map(|(_, raw)| RawSyntaxElementRef::from(raw))
}
}

View File

@ -1,191 +0,0 @@
use crate::green::GreenElement;
use crate::{GreenNode, GreenToken, NodeOrToken, SyntaxKind};
use std::marker::PhantomData;
/// New-type wrapper around a `GreenNode`.
///
/// Allows third-party crates to access limited information about a `GreenNode` or construct
/// a `GreenNode` in a limited places.
#[derive(Debug)]
pub struct RawSyntaxNode<K: SyntaxKind> {
raw: GreenNode,
ph: PhantomData<K>,
}
impl<K: SyntaxKind> RawSyntaxNode<K> {
/// Creates a new node with the given `kind` and `slots`.
#[inline]
pub fn new<I>(kind: K, slots: I) -> Self
where
I: IntoIterator<Item = Option<RawSyntaxElement<K>>>,
I::IntoIter: ExactSizeIterator,
{
Self {
raw: GreenNode::new(
kind.to_raw(),
slots
.into_iter()
.map(|slot| slot.map(|element| element.into_green())),
),
ph: PhantomData,
}
}
#[inline]
pub fn kind(&self) -> K {
K::from_raw(self.raw.kind())
}
/// Unwraps this raw syntax into it's underlying green node.
#[inline]
pub(crate) fn into_green(self) -> GreenNode {
self.raw
}
}
impl<K: SyntaxKind> From<GreenNode> for RawSyntaxNode<K> {
#[inline]
fn from(node: GreenNode) -> Self {
Self {
raw: node,
ph: PhantomData,
}
}
}
/// New-type wrapper around a `GreenToken`. Allows third-party crates to access limited information
/// on not yet fully constructed nodes.
#[derive(Debug)]
pub struct RawSyntaxToken<K: SyntaxKind> {
raw: GreenToken,
ph: PhantomData<K>,
}
impl<K: SyntaxKind> RawSyntaxToken<K> {
#[inline]
pub fn kind(&self) -> K {
K::from_raw(self.raw.kind())
}
}
impl<K: SyntaxKind> From<GreenToken> for RawSyntaxToken<K> {
fn from(token: GreenToken) -> Self {
Self {
raw: token,
ph: PhantomData,
}
}
}
pub type RawSyntaxElement<K> = NodeOrToken<RawSyntaxNode<K>, RawSyntaxToken<K>>;
impl<K: SyntaxKind> RawSyntaxElement<K> {
#[inline]
pub fn kind(&self) -> K {
match self {
NodeOrToken::Node(node) => node.kind(),
NodeOrToken::Token(token) => token.kind(),
}
}
#[inline]
fn into_green(self) -> GreenElement {
match self {
NodeOrToken::Node(node) => NodeOrToken::Node(node.raw),
NodeOrToken::Token(token) => NodeOrToken::Token(token.raw),
}
}
}
impl<K: SyntaxKind> From<GreenElement> for RawSyntaxElement<K> {
#[inline]
fn from(element: GreenElement) -> Self {
match element {
NodeOrToken::Node(node) => NodeOrToken::Node(RawSyntaxNode::from(node)),
NodeOrToken::Token(token) => NodeOrToken::Token(RawSyntaxToken::from(token)),
}
}
}
/// New-type wrapper to a reference of a `GreenNode`.
#[derive(Debug)]
pub struct RawSyntaxNodeRef<'a, K: SyntaxKind> {
raw: &'a GreenNode,
ph: PhantomData<K>,
}
impl<'a, K: SyntaxKind> RawSyntaxNodeRef<'a, K> {
#[inline]
pub fn kind(&self) -> K {
K::from_raw(self.raw.kind())
}
}
impl<'a, K: SyntaxKind> From<&'a GreenNode> for RawSyntaxNodeRef<'a, K> {
#[inline]
fn from(node: &'a GreenNode) -> Self {
Self {
raw: node,
ph: PhantomData,
}
}
}
/// New-type wrapper to a reference of a `GreenToken`
#[derive(Debug)]
pub struct RawSyntaxTokenRef<'a, K: SyntaxKind> {
raw: &'a GreenToken,
ph: PhantomData<K>,
}
impl<'a, K: SyntaxKind> RawSyntaxTokenRef<'a, K> {
#[inline]
pub fn kind(&self) -> K {
K::from_raw(self.raw.kind())
}
}
impl<'a, K: SyntaxKind> From<&'a GreenToken> for RawSyntaxTokenRef<'a, K> {
#[inline]
fn from(token: &'a GreenToken) -> Self {
Self {
raw: token,
ph: PhantomData,
}
}
}
pub type RawSyntaxElementRef<'a, K> =
NodeOrToken<RawSyntaxNodeRef<'a, K>, RawSyntaxTokenRef<'a, K>>;
impl<'a, K: SyntaxKind> RawSyntaxElementRef<'a, K> {
#[inline]
pub fn kind(&self) -> K {
match self {
NodeOrToken::Node(node) => node.kind(),
NodeOrToken::Token(token) => token.kind(),
}
}
}
impl<'a, K: SyntaxKind> From<NodeOrToken<&'a GreenNode, &'a GreenToken>>
for RawSyntaxElementRef<'a, K>
{
#[inline]
fn from(element: NodeOrToken<&'a GreenNode, &'a GreenToken>) -> Self {
match element {
NodeOrToken::Node(node) => NodeOrToken::Node(RawSyntaxNodeRef::from(node)),
NodeOrToken::Token(token) => NodeOrToken::Token(RawSyntaxTokenRef::from(token)),
}
}
}
impl<'a, K: SyntaxKind> From<&'a GreenElement> for RawSyntaxElementRef<'a, K> {
#[inline]
fn from(element: &'a GreenElement) -> Self {
match element {
NodeOrToken::Node(node) => NodeOrToken::Node(RawSyntaxNodeRef::from(node)),
NodeOrToken::Token(token) => NodeOrToken::Token(RawSyntaxTokenRef::from(token)),
}
}
}

View File

@ -1,444 +0,0 @@
use crate::{
cursor::{SyntaxNode, SyntaxToken},
TextRange, TextSize, TokenAtOffset,
};
use ruff_text_size::TextLen;
use std::fmt;
use std::iter::FusedIterator;
#[derive(Clone)]
pub struct SyntaxNodeText {
node: SyntaxNode,
range: TextRange,
}
impl SyntaxNodeText {
pub(crate) fn new(node: SyntaxNode) -> SyntaxNodeText {
let range = node.text_range();
SyntaxNodeText { node, range }
}
pub(crate) fn with_range(node: SyntaxNode, range: TextRange) -> SyntaxNodeText {
SyntaxNodeText { node, range }
}
pub fn len(&self) -> TextSize {
self.range.len()
}
pub fn is_empty(&self) -> bool {
self.range.is_empty()
}
pub fn contains_char(&self, c: char) -> bool {
self.try_for_each_chunk(|chunk| if chunk.contains(c) { Err(()) } else { Ok(()) })
.is_err()
}
pub fn find_char(&self, c: char) -> Option<TextSize> {
let mut acc: TextSize = 0.into();
let res = self.try_for_each_chunk(|chunk| {
if let Some(pos) = chunk.find(c) {
let pos: TextSize = (pos as u32).into();
return Err(acc + pos);
}
acc += TextSize::of(chunk);
Ok(())
});
found(res)
}
pub fn char_at(&self, offset: TextSize) -> Option<char> {
let mut start: TextSize = 0.into();
let res = self.try_for_each_chunk(|chunk| {
let end = start + TextSize::of(chunk);
if start <= offset && offset < end {
let off: usize = u32::from(offset - start) as usize;
return Err(chunk[off..].chars().next().unwrap());
}
start = end;
Ok(())
});
found(res)
}
pub fn slice<R: private::SyntaxTextRange>(&self, range: R) -> SyntaxNodeText {
let start = range.start().unwrap_or_default();
let end = range.end().unwrap_or_else(|| self.len());
assert!(start <= end);
let len = end - start;
let start = self.range.start() + start;
let end = start + len;
assert!(
start <= end,
"invalid slice, range: {:?}, slice: {:?}",
self.range,
(range.start(), range.end()),
);
let range = TextRange::new(start, end);
assert!(
self.range.contains_range(range),
"invalid slice, range: {:?}, slice: {:?}",
self.range,
range,
);
SyntaxNodeText {
node: self.node.clone(),
range,
}
}
pub fn try_fold_chunks<T, F, E>(&self, init: T, mut f: F) -> Result<T, E>
where
F: FnMut(T, &str) -> Result<T, E>,
{
self.tokens_with_ranges()
.try_fold(init, move |acc, (token, range)| {
f(acc, &token.text()[range])
})
}
pub fn try_for_each_chunk<F: FnMut(&str) -> Result<(), E>, E>(
&self,
mut f: F,
) -> Result<(), E> {
self.try_fold_chunks((), move |(), chunk| f(chunk))
}
pub fn for_each_chunk<F: FnMut(&str)>(&self, mut f: F) {
enum Void {}
match self.try_for_each_chunk(|chunk| {
f(chunk);
Ok::<(), Void>(())
}) {
Ok(()) => (),
Err(void) => match void {},
}
}
fn tokens_with_ranges(&self) -> impl Iterator<Item = (SyntaxToken, TextRange)> + FusedIterator {
SyntaxNodeTokenWithRanges::new(self)
}
pub fn chars(&self) -> impl Iterator<Item = char> + FusedIterator {
SyntaxNodeTextChars::new(self)
}
}
#[derive(Clone)]
struct SyntaxNodeTokenWithRanges {
text_range: TextRange,
next_token: Option<(SyntaxToken, TextRange)>,
}
impl SyntaxNodeTokenWithRanges {
fn new(text: &SyntaxNodeText) -> Self {
let text_range = text.range;
let token = match text.node.token_at_offset(text_range.start()) {
TokenAtOffset::None => None,
TokenAtOffset::Single(token) => Some(token),
TokenAtOffset::Between(_, next) => Some(next),
};
Self {
next_token: token.and_then(|token| Self::with_intersecting_range(token, text_range)),
text_range,
}
}
fn with_intersecting_range(
token: SyntaxToken,
text_range: TextRange,
) -> Option<(SyntaxToken, TextRange)> {
let token_range = token.text_range();
let range = text_range.intersect(token_range)?;
Some((token, range - token_range.start()))
}
}
impl Iterator for SyntaxNodeTokenWithRanges {
type Item = (SyntaxToken, TextRange);
fn next(&mut self) -> Option<Self::Item> {
let (token, range) = self.next_token.take()?;
self.next_token = token
.next_token()
.and_then(|token| Self::with_intersecting_range(token, self.text_range));
Some((token, range))
}
}
impl FusedIterator for SyntaxNodeTokenWithRanges {}
#[derive(Clone)]
struct SyntaxNodeTextChars {
head: Option<(SyntaxToken, TextRange)>,
tail: SyntaxNodeTokenWithRanges,
index: TextSize,
}
impl SyntaxNodeTextChars {
fn new(text: &SyntaxNodeText) -> Self {
let mut chunks = SyntaxNodeTokenWithRanges::new(text);
Self {
head: chunks.next(),
tail: chunks,
index: TextSize::default(),
}
}
}
impl Iterator for SyntaxNodeTextChars {
type Item = char;
fn next(&mut self) -> Option<Self::Item> {
loop {
let (token, range) = self.head.as_ref()?;
if self.index >= range.end() {
self.head = self.tail.next();
self.index = TextSize::default();
continue;
}
let text = token.text();
// SAFETY: Index check above guarantees that there's at least some text left
let next_char = text[TextRange::new(self.index, range.end())]
.chars()
.next()
.unwrap();
self.index += next_char.text_len();
break Some(next_char);
}
}
}
impl FusedIterator for SyntaxNodeTextChars {}
fn found<T>(res: Result<(), T>) -> Option<T> {
match res {
Ok(()) => None,
Err(it) => Some(it),
}
}
impl fmt::Debug for SyntaxNodeText {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(&self.to_string(), f)
}
}
impl fmt::Display for SyntaxNodeText {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.try_for_each_chunk(|chunk| fmt::Display::fmt(chunk, f))
}
}
impl From<SyntaxNodeText> for String {
fn from(text: SyntaxNodeText) -> String {
text.to_string()
}
}
impl PartialEq<str> for SyntaxNodeText {
fn eq(&self, mut rhs: &str) -> bool {
self.try_for_each_chunk(|chunk| {
if !rhs.starts_with(chunk) {
return Err(());
}
rhs = &rhs[chunk.len()..];
Ok(())
})
.is_ok()
&& rhs.is_empty()
}
}
impl PartialEq<SyntaxNodeText> for str {
fn eq(&self, rhs: &SyntaxNodeText) -> bool {
rhs == self
}
}
impl PartialEq<&'_ str> for SyntaxNodeText {
fn eq(&self, rhs: &&str) -> bool {
self == *rhs
}
}
impl PartialEq<SyntaxNodeText> for &'_ str {
fn eq(&self, rhs: &SyntaxNodeText) -> bool {
rhs == self
}
}
impl PartialEq for SyntaxNodeText {
fn eq(&self, other: &SyntaxNodeText) -> bool {
if self.range.len() != other.range.len() {
return false;
}
let mut lhs = self.tokens_with_ranges();
let mut rhs = other.tokens_with_ranges();
zip_texts(&mut lhs, &mut rhs).is_none()
&& lhs.all(|it| it.1.is_empty())
&& rhs.all(|it| it.1.is_empty())
}
}
fn zip_texts<I: Iterator<Item = (SyntaxToken, TextRange)>>(xs: &mut I, ys: &mut I) -> Option<()> {
let mut x = xs.next()?;
let mut y = ys.next()?;
loop {
while x.1.is_empty() {
x = xs.next()?;
}
while y.1.is_empty() {
y = ys.next()?;
}
let x_text = &x.0.text()[x.1];
let y_text = &y.0.text()[y.1];
if !(x_text.starts_with(y_text) || y_text.starts_with(x_text)) {
return Some(());
}
let advance = std::cmp::min(x.1.len(), y.1.len());
x.1 = TextRange::new(x.1.start() + advance, x.1.end());
y.1 = TextRange::new(y.1.start() + advance, y.1.end());
}
}
impl Eq for SyntaxNodeText {}
mod private {
use std::ops;
use crate::{TextRange, TextSize};
pub trait SyntaxTextRange {
fn start(&self) -> Option<TextSize>;
fn end(&self) -> Option<TextSize>;
}
impl SyntaxTextRange for TextRange {
fn start(&self) -> Option<TextSize> {
Some(TextRange::start(*self))
}
fn end(&self) -> Option<TextSize> {
Some(TextRange::end(*self))
}
}
impl SyntaxTextRange for ops::Range<TextSize> {
fn start(&self) -> Option<TextSize> {
Some(self.start)
}
fn end(&self) -> Option<TextSize> {
Some(self.end)
}
}
impl SyntaxTextRange for ops::RangeFrom<TextSize> {
fn start(&self) -> Option<TextSize> {
Some(self.start)
}
fn end(&self) -> Option<TextSize> {
None
}
}
impl SyntaxTextRange for ops::RangeTo<TextSize> {
fn start(&self) -> Option<TextSize> {
None
}
fn end(&self) -> Option<TextSize> {
Some(self.end)
}
}
impl SyntaxTextRange for ops::RangeFull {
fn start(&self) -> Option<TextSize> {
None
}
fn end(&self) -> Option<TextSize> {
None
}
}
}
#[cfg(test)]
mod tests {
use crate::raw_language::{RawLanguage, RawLanguageKind, RawSyntaxTreeBuilder};
use crate::SyntaxNode;
fn build_tree(chunks: &[&str]) -> SyntaxNode<RawLanguage> {
let mut builder = RawSyntaxTreeBuilder::new();
builder.start_node(RawLanguageKind::ROOT);
for &chunk in chunks.iter() {
builder.token(RawLanguageKind::STRING_TOKEN, chunk);
}
builder.finish_node();
builder.finish()
}
#[test]
fn test_text_equality() {
fn do_check(t1: &[&str], t2: &[&str]) {
let t1 = build_tree(t1).text();
let t2 = build_tree(t2).text();
let expected = t1.to_string() == t2.to_string();
let actual = t1 == t2;
assert_eq!(
expected, actual,
"`{}` (SyntaxText) `{}` (SyntaxText)",
t1, t2
);
let actual = t1 == *t2.to_string();
assert_eq!(expected, actual, "`{}` (SyntaxText) `{}` (&str)", t1, t2);
}
fn check(t1: &[&str], t2: &[&str]) {
do_check(t1, t2);
do_check(t2, t1)
}
check(&[""], &[""]);
check(&["a"], &[""]);
check(&["a"], &["a"]);
check(&["abc"], &["def"]);
check(&["hello", "world"], &["hello", "world"]);
check(&["hellowo", "rld"], &["hell", "oworld"]);
check(&["hel", "lowo", "rld"], &["helloworld"]);
check(&["{", "abc", "}"], &["{", "123", "}"]);
check(&["{", "abc", "}", "{"], &["{", "123", "}"]);
check(&["{", "abc", "}"], &["{", "123", "}", "{"]);
check(&["{", "abc", "}ab"], &["{", "abc", "}", "ab"]);
}
#[test]
fn test_chars() {
fn check(t1: &[&str], expected: &str) {
let t1 = build_tree(t1).text();
let actual = t1.chars().collect::<String>();
assert_eq!(
expected, &actual,
"`{}` (SyntaxText) `{}` (SyntaxText)",
actual, expected
);
}
check(&[""], "");
check(&["a"], "a");
check(&["hello", "world"], "helloworld");
check(&["hellowo", "rld"], "helloworld");
check(&["hel", "lowo", "rld"], "helloworld");
check(&["{", "abc", "}"], "{abc}");
check(&["{", "abc", "}", "{"], "{abc}{");
check(&["{", "abc", "}ab"], "{abc}ab");
}
}

View File

@ -1,105 +0,0 @@
use crate::GreenToken;
use ruff_text_size::{TextRange, TextSize};
use std::ops::Deref;
use std::{borrow::Borrow, fmt::Formatter};
/// Reference to the text of a SyntaxToken without having to worry about the lifetime of `&str`.
#[derive(Eq, Clone)]
pub struct SyntaxTokenText {
// Using a green token to ensure this type is Send + Sync.
token: GreenToken,
/// Relative range of the "selected" token text.
range: TextRange,
}
impl std::hash::Hash for SyntaxTokenText {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.text().hash(state);
}
}
impl SyntaxTokenText {
pub(crate) fn new(token: GreenToken) -> SyntaxTokenText {
let range = TextRange::at(TextSize::default(), token.text_len());
Self { token, range }
}
pub(crate) fn with_range(token: GreenToken, range: TextRange) -> SyntaxTokenText {
debug_assert!(range.end() <= token.text_len());
Self { token, range }
}
/// Returns the length of the text
pub fn len(&self) -> TextSize {
self.range.len()
}
/// Returns `true` if the text is empty
pub fn is_empty(&self) -> bool {
self.range.is_empty()
}
/// Returns a subslice of the text.
pub fn slice(mut self, range: TextRange) -> SyntaxTokenText {
assert!(
self.range.contains_range(range),
"Range {range:?} exceeds bounds {:?}",
self.range
);
self.range = range;
self
}
pub fn range(&self) -> TextRange {
self.range
}
pub fn text(&self) -> &str {
&self.token.text()[self.range]
}
}
impl Deref for SyntaxTokenText {
type Target = str;
fn deref(&self) -> &Self::Target {
self.text()
}
}
impl std::fmt::Display for SyntaxTokenText {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.text())
}
}
impl std::fmt::Debug for SyntaxTokenText {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self.text())
}
}
impl PartialEq for SyntaxTokenText {
fn eq(&self, other: &Self) -> bool {
**self == **other
}
}
impl PartialEq<&'_ str> for SyntaxTokenText {
fn eq(&self, rhs: &&'_ str) -> bool {
**self == **rhs
}
}
impl PartialEq<SyntaxTokenText> for &'_ str {
fn eq(&self, other: &SyntaxTokenText) -> bool {
**self == **other
}
}
impl Borrow<str> for SyntaxTokenText {
fn borrow(&self) -> &str {
self.text()
}
}

View File

@ -1,282 +0,0 @@
use crate::green::NodeCacheNodeEntryMut;
use crate::{
cow_mut::CowMut,
green::{GreenElement, NodeCache},
syntax::TriviaPiece,
GreenNode, Language, NodeOrToken, ParsedChildren, SyntaxFactory, SyntaxKind, SyntaxNode,
};
use std::marker::PhantomData;
/// A checkpoint for maybe wrapping a node. See `GreenNodeBuilder::checkpoint` for details.
#[derive(Clone, Copy, Debug)]
pub struct Checkpoint(usize);
/// A builder for a syntax tree.
#[derive(Debug)]
pub struct TreeBuilder<'cache, L: Language, S: SyntaxFactory<Kind = L::Kind>> {
cache: CowMut<'cache, NodeCache>,
parents: Vec<(L::Kind, usize)>,
children: Vec<(u64, GreenElement)>,
ph: PhantomData<S>,
}
impl<L: Language, S: SyntaxFactory<Kind = L::Kind>> Default for TreeBuilder<'_, L, S> {
fn default() -> Self {
Self {
cache: CowMut::default(),
parents: Vec::default(),
children: Vec::default(),
ph: PhantomData,
}
}
}
impl<L: Language, S: SyntaxFactory<Kind = L::Kind>> TreeBuilder<'_, L, S> {
/// Creates new builder.
pub fn new() -> TreeBuilder<'static, L, S> {
TreeBuilder::default()
}
/// Reusing `NodeCache` between different [TreeBuilder]`s saves memory.
/// It allows to structurally share underlying trees.
pub fn with_cache(cache: &mut NodeCache) -> TreeBuilder<'_, L, S> {
TreeBuilder {
cache: CowMut::Borrowed(cache),
parents: Vec::new(),
children: Vec::new(),
ph: PhantomData,
}
}
/// Method to quickly wrap a tree with a node.
///
/// TreeBuilder::<RawLanguage>::wrap_with_node(RawSyntaxKind(0), |builder| {
/// builder.token(RawSyntaxKind(1), "let");
/// });
pub fn wrap_with_node<F>(kind: L::Kind, build: F) -> SyntaxNode<L>
where
F: Fn(&mut Self),
{
let mut builder = TreeBuilder::<L, S>::new();
builder.start_node(kind);
build(&mut builder);
builder.finish_node();
builder.finish()
}
/// Adds new token to the current branch.
#[inline]
pub fn token(&mut self, kind: L::Kind, text: &str) -> &mut Self {
let (hash, token) = self.cache.token(kind.to_raw(), text);
self.children.push((hash, token.into()));
self
}
/// Adds new token to the current branch.
#[inline]
pub fn token_with_trivia(
&mut self,
kind: L::Kind,
text: &str,
leading: &[TriviaPiece],
trailing: &[TriviaPiece],
) {
let (hash, token) = self
.cache
.token_with_trivia(kind.to_raw(), text, leading, trailing);
self.children.push((hash, token.into()));
}
/// Start new node and make it current.
#[inline]
pub fn start_node(&mut self, kind: L::Kind) -> &mut Self {
let len = self.children.len();
self.parents.push((kind, len));
self
}
/// Finish current branch and restore previous
/// branch as current.
#[inline]
pub fn finish_node(&mut self) -> &mut Self {
let (kind, first_child) = self.parents.pop().unwrap();
let raw_kind = kind.to_raw();
let slots = &self.children[first_child..];
let node_entry = self.cache.node(raw_kind, slots);
let mut build_node = || {
let children = ParsedChildren::new(&mut self.children, first_child);
S::make_syntax(kind, children).into_green()
};
let (hash, node) = match node_entry {
NodeCacheNodeEntryMut::NoCache(hash) => (hash, build_node()),
NodeCacheNodeEntryMut::Vacant(entry) => {
let node = build_node();
let hash = entry.cache(node.clone());
(hash, node)
}
NodeCacheNodeEntryMut::Cached(cached) => {
self.children.truncate(first_child);
(cached.hash(), cached.node().clone())
}
};
self.children.push((hash, node.into()));
self
}
/// Prepare for maybe wrapping the next node.
/// The way wrapping works is that you first of all get a checkpoint,
/// then you place all tokens you want to wrap, and then *maybe* call
/// `start_node_at`.
/// Example:
/// ```rust
/// # use ruff_rowan::raw_language::{RawLanguage, RawLanguageKind, RawSyntaxTreeBuilder};
/// # const PLUS: RawLanguageKind = RawLanguageKind::PLUS_TOKEN;
/// # const OPERATION: RawLanguageKind = RawLanguageKind::ROOT;
/// # struct Parser;
/// # impl Parser {
/// # fn peek(&self) -> Option<RawLanguageKind> { None }
/// # fn parse_expr(&mut self) {}
/// # }
/// # let mut builder = RawSyntaxTreeBuilder::new();
/// # let mut parser = Parser;
/// let checkpoint = builder.checkpoint();
/// parser.parse_expr();
/// if parser.peek() == Some(PLUS) {
/// // 1 + 2 = Add(1, 2)
/// builder.start_node_at(checkpoint, OPERATION);
/// parser.parse_expr();
/// builder.finish_node();
/// }
/// ```
#[inline]
pub fn checkpoint(&self) -> Checkpoint {
Checkpoint(self.children.len())
}
/// Wrap the previous branch marked by `checkpoint` in a new branch and
/// make it current.
#[inline]
pub fn start_node_at(&mut self, checkpoint: Checkpoint, kind: L::Kind) {
let Checkpoint(checkpoint) = checkpoint;
assert!(
checkpoint <= self.children.len(),
"checkpoint no longer valid, was finish_node called early?"
);
if let Some(&(_, first_child)) = self.parents.last() {
assert!(
checkpoint >= first_child,
"checkpoint no longer valid, was an unmatched start_node_at called?"
);
}
self.parents.push((kind, checkpoint));
}
/// Complete tree building. Make sure that
/// `start_node_at` and `finish_node` calls
/// are paired!
#[inline]
#[must_use]
pub fn finish(self) -> SyntaxNode<L> {
SyntaxNode::new_root(self.finish_green())
}
// For tests
#[must_use]
pub(crate) fn finish_green(mut self) -> GreenNode {
assert_eq!(self.children.len(), 1);
match self.children.pop().unwrap().1 {
NodeOrToken::Node(node) => node,
_ => panic!(),
}
}
}
#[cfg(test)]
mod tests {
use crate::green::GreenElementRef;
use crate::raw_language::{RawLanguageKind, RawSyntaxTreeBuilder};
use crate::{GreenNodeData, GreenTokenData, NodeOrToken};
// Builds a "Condition" like structure where the closing ) is missing
fn build_condition_with_missing_closing_parenthesis(builder: &mut RawSyntaxTreeBuilder) {
builder.start_node(RawLanguageKind::CONDITION);
builder.token(RawLanguageKind::L_PAREN_TOKEN, "(");
builder.start_node(RawLanguageKind::LITERAL_EXPRESSION);
builder.token(RawLanguageKind::STRING_TOKEN, "a");
builder.finish_node();
// missing )
builder.finish_node();
}
#[test]
fn caches_identical_nodes_with_empty_slots() {
let mut builder = RawSyntaxTreeBuilder::new();
builder.start_node(RawLanguageKind::ROOT); // Root
build_condition_with_missing_closing_parenthesis(&mut builder);
build_condition_with_missing_closing_parenthesis(&mut builder);
builder.finish_node();
let root = builder.finish_green();
let first = root.children().next().unwrap();
let last = root.children().last().unwrap();
assert_eq!(first.element(), last.element());
assert_same_elements(first.element(), last.element());
}
#[test]
fn doesnt_cache_node_if_empty_slots_differ() {
let mut builder = RawSyntaxTreeBuilder::new();
builder.start_node(RawLanguageKind::ROOT); // Root
build_condition_with_missing_closing_parenthesis(&mut builder); // misses the ')'
// Create a well formed condition
builder.start_node(RawLanguageKind::CONDITION);
builder.token(RawLanguageKind::L_PAREN_TOKEN, "(");
builder.start_node(RawLanguageKind::LITERAL_EXPRESSION);
builder.token(RawLanguageKind::STRING_TOKEN, "a");
builder.finish_node();
// missing )
builder.token(RawLanguageKind::R_PAREN_TOKEN, ")");
builder.finish_node();
// finish root
builder.finish_node();
let root = builder.finish_green();
let first_condition = root.children().next().unwrap();
let last_condition = root.children().last().unwrap();
assert_ne!(first_condition.element(), last_condition.element());
}
fn assert_same_elements(left: GreenElementRef<'_>, right: GreenElementRef<'_>) {
fn element_id(element: GreenElementRef<'_>) -> *const () {
match element {
NodeOrToken::Node(node) => node as *const GreenNodeData as *const (),
NodeOrToken::Token(token) => token as *const GreenTokenData as *const (),
}
}
assert_eq!(element_id(left), element_id(right),);
}
}

View File

@ -1,157 +0,0 @@
use std::{fmt, ops::Deref};
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum NodeOrToken<N, T> {
Node(N),
Token(T),
}
impl<N, T> NodeOrToken<N, T> {
pub fn into_node(self) -> Option<N> {
match self {
NodeOrToken::Node(node) => Some(node),
NodeOrToken::Token(_) => None,
}
}
pub fn into_token(self) -> Option<T> {
match self {
NodeOrToken::Node(_) => None,
NodeOrToken::Token(token) => Some(token),
}
}
pub fn as_node(&self) -> Option<&N> {
match self {
NodeOrToken::Node(node) => Some(node),
NodeOrToken::Token(_) => None,
}
}
pub fn as_token(&self) -> Option<&T> {
match self {
NodeOrToken::Node(_) => None,
NodeOrToken::Token(token) => Some(token),
}
}
}
impl<N: Deref, T: Deref> NodeOrToken<N, T> {
pub(crate) fn as_deref(&self) -> NodeOrToken<&N::Target, &T::Target> {
match self {
NodeOrToken::Node(node) => NodeOrToken::Node(&**node),
NodeOrToken::Token(token) => NodeOrToken::Token(&**token),
}
}
}
impl<N: fmt::Display, T: fmt::Display> fmt::Display for NodeOrToken<N, T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
NodeOrToken::Node(node) => fmt::Display::fmt(node, f),
NodeOrToken::Token(token) => fmt::Display::fmt(token, f),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Direction {
Next,
Prev,
}
/// `WalkEvent` describes tree walking process.
#[derive(Debug, Copy, Clone)]
pub enum WalkEvent<T> {
/// Fired before traversing the node.
Enter(T),
/// Fired after the node is traversed.
Leave(T),
}
impl<T> WalkEvent<T> {
pub fn map<F: FnOnce(T) -> U, U>(self, f: F) -> WalkEvent<U> {
match self {
WalkEvent::Enter(it) => WalkEvent::Enter(f(it)),
WalkEvent::Leave(it) => WalkEvent::Leave(f(it)),
}
}
}
/// There might be zero, one or two leaves at a given offset.
#[derive(Clone, Debug)]
pub enum TokenAtOffset<T> {
/// No leaves at offset -- possible for the empty file.
None,
/// Only a single leaf at offset.
Single(T),
/// Offset is exactly between two leaves.
Between(T, T),
}
impl<T> TokenAtOffset<T> {
pub fn map<F: Fn(T) -> U, U>(self, f: F) -> TokenAtOffset<U> {
match self {
TokenAtOffset::None => TokenAtOffset::None,
TokenAtOffset::Single(it) => TokenAtOffset::Single(f(it)),
TokenAtOffset::Between(l, r) => TokenAtOffset::Between(f(l), f(r)),
}
}
/// Convert to option, preferring the right leaf in case of a tie.
pub fn right_biased(self) -> Option<T> {
match self {
TokenAtOffset::None => None,
TokenAtOffset::Single(node) => Some(node),
TokenAtOffset::Between(_, right) => Some(right),
}
}
/// Convert to option, preferring the left leaf in case of a tie.
pub fn left_biased(self) -> Option<T> {
match self {
TokenAtOffset::None => None,
TokenAtOffset::Single(node) => Some(node),
TokenAtOffset::Between(left, _) => Some(left),
}
}
}
impl<T> Iterator for TokenAtOffset<T> {
type Item = T;
fn next(&mut self) -> Option<T> {
match std::mem::replace(self, TokenAtOffset::None) {
TokenAtOffset::None => None,
TokenAtOffset::Single(node) => {
*self = TokenAtOffset::None;
Some(node)
}
TokenAtOffset::Between(left, right) => {
*self = TokenAtOffset::Single(right);
Some(left)
}
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
match self {
TokenAtOffset::None => (0, Some(0)),
TokenAtOffset::Single(_) => (1, Some(1)),
TokenAtOffset::Between(_, _) => (2, Some(2)),
}
}
}
impl<T> ExactSizeIterator for TokenAtOffset<T> {}
#[cfg(target_pointer_width = "64")]
#[macro_export]
macro_rules! static_assert {
($expr:expr) => {
const _: i32 = 0 / $expr as i32;
};
}
#[cfg(target_pointer_width = "64")]
pub use static_assert;

View File

@ -1,14 +0,0 @@
[package]
name = "ruff_text_edit"
version = "0.0.0"
publish = false
edition = "2021"
[dependencies]
ruff_text_size = { path = "../ruff_text_size", features = ["serde"] }
serde = { version = "1.0.136", features = ["derive"] }
schemars = { version = "0.8.10", optional = true }
similar = { version = "2.1.0", features = ["unicode"] }
[features]
schemars = ["dep:schemars", "ruff_text_size/schemars"]

View File

@ -1,364 +0,0 @@
//! Representation of a `TextEdit`.
//!
//! This is taken from [rust-analyzer's `text_edit` crate](https://rust-analyzer.github.io/rust-analyzer/text_edit/index.html)
#![warn(
rust_2018_idioms,
unused_lifetimes,
semicolon_in_expressions_from_macros
)]
use std::{cmp::Ordering, num::NonZeroU32};
use ruff_text_size::{TextRange, TextSize};
use serde::{Deserialize, Serialize};
pub use similar::ChangeTag;
use similar::{utils::TextDiffRemapper, TextDiff};
#[derive(Default, Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
pub struct TextEdit {
dictionary: String,
ops: Vec<CompressedOp>,
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
pub enum CompressedOp {
DiffOp(DiffOp),
EqualLines { line_count: NonZeroU32 },
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
pub enum DiffOp {
Equal { range: TextRange },
Insert { range: TextRange },
Delete { range: TextRange },
}
impl DiffOp {
pub fn tag(self) -> ChangeTag {
match self {
DiffOp::Equal { .. } => ChangeTag::Equal,
DiffOp::Insert { .. } => ChangeTag::Insert,
DiffOp::Delete { .. } => ChangeTag::Delete,
}
}
pub fn text(self, diff: &TextEdit) -> &str {
let range = match self {
DiffOp::Equal { range } => range,
DiffOp::Insert { range } => range,
DiffOp::Delete { range } => range,
};
diff.get_text(range)
}
}
#[derive(Debug, Default, Clone)]
pub struct TextEditBuilder {
index: Vec<TextRange>,
edit: TextEdit,
}
impl TextEdit {
/// Convenience method for creating a new [`TextEditBuilder`]
pub fn builder() -> TextEditBuilder {
TextEditBuilder::default()
}
/// Create a diff of `old` to `new`, tokenized by Unicode words
pub fn from_unicode_words(old: &str, new: &str) -> Self {
let mut builder = Self::builder();
let diff = TextDiff::configure()
.newline_terminated(true)
.diff_unicode_words(old, new);
let remapper = TextDiffRemapper::from_text_diff(&diff, old, new);
for (tag, text) in diff.ops().iter().flat_map(|op| remapper.iter_slices(op)) {
match tag {
ChangeTag::Equal => {
builder.equal(text);
}
ChangeTag::Delete => {
builder.delete(text);
}
ChangeTag::Insert => {
builder.insert(text);
}
}
}
builder.finish()
}
/// Returns the number of [`DiffOp`] in this [`TextEdit`]
pub fn len(&self) -> usize {
self.ops.len()
}
/// Return `true` is this [`TextEdit`] doesn't contain any [`DiffOp`]
pub fn is_empty(&self) -> bool {
self.ops.is_empty()
}
/// Returns an [Iterator] over the [`DiffOp`] of this [`TextEdit`]
pub fn iter(&self) -> std::slice::Iter<'_, CompressedOp> {
self.into_iter()
}
/// Return the text value of range interned in this [`TextEdit`] dictionary
pub fn get_text(&self, range: TextRange) -> &str {
&self.dictionary[range]
}
/// Return the content of the "new" revision of the text represented in
/// this [`TextEdit`]. This methods needs to be provided with the "old"
/// revision of the string since [`TextEdit`] doesn't store the content of
/// text sections that are equal between revisions
pub fn new_string(&self, old_string: &str) -> String {
let mut output = String::new();
let mut input_position = TextSize::from(0);
for op in &self.ops {
match op {
CompressedOp::DiffOp(DiffOp::Equal { range }) => {
output.push_str(&self.dictionary[*range]);
input_position += range.len();
}
CompressedOp::DiffOp(DiffOp::Insert { range }) => {
output.push_str(&self.dictionary[*range]);
}
CompressedOp::DiffOp(DiffOp::Delete { range }) => {
input_position += range.len();
}
CompressedOp::EqualLines { line_count } => {
let start = u32::from(input_position) as usize;
let input = &old_string[start..];
let line_break_count = line_count.get() as usize + 1;
for line in input.split_inclusive('\n').take(line_break_count) {
output.push_str(line);
input_position += TextSize::of(line);
}
}
}
}
output
}
}
impl IntoIterator for TextEdit {
type Item = CompressedOp;
type IntoIter = std::vec::IntoIter<CompressedOp>;
fn into_iter(self) -> Self::IntoIter {
self.ops.into_iter()
}
}
impl<'a> IntoIterator for &'a TextEdit {
type Item = &'a CompressedOp;
type IntoIter = std::slice::Iter<'a, CompressedOp>;
fn into_iter(self) -> Self::IntoIter {
self.ops.iter()
}
}
impl TextEditBuilder {
pub fn is_empty(&self) -> bool {
self.edit.ops.is_empty()
}
/// Add a piece of string to the dictionary, returning the corresponding
/// range in the dictionary string
fn intern(&mut self, value: &str) -> TextRange {
let value_bytes = value.as_bytes();
let value_len = TextSize::of(value);
let index = self.index.binary_search_by(|range| {
let entry = self.edit.dictionary[*range].as_bytes();
for (lhs, rhs) in entry.iter().zip(value_bytes) {
match lhs.cmp(rhs) {
Ordering::Equal => continue,
ordering => return ordering,
}
}
match entry.len().cmp(&value_bytes.len()) {
// If all bytes in the shared sub-slice match, the dictionary
// entry is allowed to be longer than the text being inserted
Ordering::Greater => Ordering::Equal,
ordering => ordering,
}
});
match index {
Ok(index) => {
let range = self.index[index];
let len = value_len.min(range.len());
TextRange::at(range.start(), len)
}
Err(index) => {
let start = TextSize::of(&self.edit.dictionary);
self.edit.dictionary.push_str(value);
let range = TextRange::at(start, value_len);
self.index.insert(index, range);
range
}
}
}
pub fn equal(&mut self, text: &str) {
if let Some((start, mid, end)) = compress_equal_op(text) {
let start = self.intern(start);
self.edit
.ops
.push(CompressedOp::DiffOp(DiffOp::Equal { range: start }));
self.edit
.ops
.push(CompressedOp::EqualLines { line_count: mid });
let end = self.intern(end);
self.edit
.ops
.push(CompressedOp::DiffOp(DiffOp::Equal { range: end }));
} else {
let range = self.intern(text);
self.edit
.ops
.push(CompressedOp::DiffOp(DiffOp::Equal { range }));
}
}
pub fn insert(&mut self, text: &str) {
let range = self.intern(text);
self.edit
.ops
.push(CompressedOp::DiffOp(DiffOp::Insert { range }));
}
pub fn delete(&mut self, text: &str) {
let range = self.intern(text);
self.edit
.ops
.push(CompressedOp::DiffOp(DiffOp::Delete { range }));
}
pub fn replace(&mut self, old: &str, new: &str) {
self.delete(old);
self.insert(new);
}
pub fn finish(self) -> TextEdit {
self.edit
}
}
/// Number of lines to keep as [`DiffOp::Equal`] operations around a
/// [`CompressedOp::EqualCompressedLines`] operation. This has the effect of
/// making the compressed diff retain a few line of equal content around
/// changes, which is useful for display as it makes it possible to print a few
/// context lines around changes without having to keep the full original text
/// around.
const COMPRESSED_DIFFS_CONTEXT_LINES: usize = 2;
fn compress_equal_op(text: &str) -> Option<(&str, NonZeroU32, &str)> {
let mut iter = text.split('\n');
let mut leading_len = COMPRESSED_DIFFS_CONTEXT_LINES;
for _ in 0..=COMPRESSED_DIFFS_CONTEXT_LINES {
leading_len += iter.next()?.len();
}
let mut trailing_len = COMPRESSED_DIFFS_CONTEXT_LINES;
for _ in 0..=COMPRESSED_DIFFS_CONTEXT_LINES {
trailing_len += iter.next_back()?.len();
}
let mid_count = iter.count();
let mid_count = u32::try_from(mid_count).ok()?;
let mid_count = NonZeroU32::new(mid_count)?;
let trailing_start = text.len().saturating_sub(trailing_len);
Some((&text[..leading_len], mid_count, &text[trailing_start..]))
}
#[cfg(test)]
mod tests {
use std::num::NonZeroU32;
use crate::{compress_equal_op, TextEdit};
#[test]
fn compress_short() {
let output = compress_equal_op(
"
start 1
start 2
end 1
end 2
",
);
assert_eq!(output, None);
}
#[test]
fn compress_long() {
let output = compress_equal_op(
"
start 1
start 2
mid 1
mid 2
mid 3
end 1
end 2
",
);
assert_eq!(
output,
Some((
"\nstart 1\nstart 2",
NonZeroU32::new(3).unwrap(),
"end 1\nend 2\n"
))
);
}
#[test]
fn new_string_compressed() {
const OLD: &str = "line 1 old
line 2
line 3
line 4
line 5
line 6
line 7 old";
const NEW: &str = "line 1 new
line 2
line 3
line 4
line 5
line 6
line 7 new";
let diff = TextEdit::from_unicode_words(OLD, NEW);
let new_string = diff.new_string(OLD);
assert_eq!(new_string, NEW);
}
}