From 4dc030ba9dfc1c065211151959fa0901dbd4c90b Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Wed, 10 May 2023 15:27:28 +0900 Subject: [PATCH] Vendor SourceLocation from ruff --- Cargo.toml | 7 +- core/Cargo.toml | 10 +- core/src/source_code.rs | 4 +- ruff_source_location/Cargo.toml | 17 + ruff_source_location/src/lib.rs | 227 +++++++++ ruff_source_location/src/line_index.rs | 630 +++++++++++++++++++++++++ ruff_source_location/src/locator.rs | 409 ++++++++++++++++ ruff_source_location/src/newlines.rs | 446 +++++++++++++++++ 8 files changed, 1741 insertions(+), 9 deletions(-) create mode 100644 ruff_source_location/Cargo.toml create mode 100644 ruff_source_location/src/lib.rs create mode 100644 ruff_source_location/src/line_index.rs create mode 100644 ruff_source_location/src/locator.rs create mode 100644 ruff_source_location/src/newlines.rs diff --git a/Cargo.toml b/Cargo.toml index 974daa24f0..a452175894 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,13 +12,15 @@ include = ["LICENSE", "Cargo.toml", "src/**/*.rs"] resolver = "2" members = [ "ast", "core", "literal", "parser", - "ruff_text_size", + "ruff_text_size", "ruff_source_location", ] [workspace.dependencies] rustpython-ast = { path = "ast", version = "0.2.0" } rustpython-parser-core = { path = "core", version = "0.2.0" } rustpython-literal = { path = "literal", version = "0.2.0" } +ruff_text_size = { path = "ruff_text_size" } +ruff_source_location = { path = "ruff_source_location" } ahash = "0.7.6" anyhow = "1.0.45" @@ -32,9 +34,8 @@ num-traits = "0.2" rand = "0.8.5" serde = "1.0" static_assertions = "1.1" +once_cell = "1.17.1" unicode_names2 = { version = "0.6.0", git = "https://github.com/youknowone/unicode_names2.git", rev = "4ce16aa85cbcdd9cc830410f1a72ef9a235f2fde" } -ruff_python_ast = { git = "https://github.com/youknowone/ruff.git", rev = "088958e8fda2f74f1ebf315c75db13c232409b13" } -# ruff_python_ast = { path = "../ruff/crates/ruff_python_ast" } [profile.dev.package."*"] opt-level = 3 diff --git a/core/Cargo.toml b/core/Cargo.toml index e6af21832e..8269c44029 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -11,12 +11,14 @@ license = "MIT" itertools = { workspace = true } num-bigint = { workspace = true } num-complex = { workspace = true } -serde = { version = "1.0.133", optional = true, default-features = false, features = ["derive"] } -ruff_text_size = { path = "../ruff_text_size" } -ruff_python_ast = { workspace = true } +# ruff dependency shouldn't be placed out of this crate +ruff_text_size = { path = "../ruff_text_size" } +ruff_source_location = { path = "../ruff_source_location", optional = true } + +serde = { version = "1.0.133", optional = true, default-features = false, features = ["derive"] } lz4_flex = "0.9.2" [features] default = ["source-code"] -source-code = [] +source-code = ["ruff_source_location"] diff --git a/core/src/source_code.rs b/core/src/source_code.rs index c2134f7130..4440fd8215 100644 --- a/core/src/source_code.rs +++ b/core/src/source_code.rs @@ -1,7 +1,7 @@ // re-export our public interface -pub use ruff_python_ast::source_code::*; +pub use ruff_source_location::*; -pub type LineNumber = ruff_python_ast::source_code::OneIndexed; +pub type LineNumber = OneIndexed; #[derive(Debug)] pub struct SourceRange { diff --git a/ruff_source_location/Cargo.toml b/ruff_source_location/Cargo.toml new file mode 100644 index 0000000000..d9151693c8 --- /dev/null +++ b/ruff_source_location/Cargo.toml @@ -0,0 +1,17 @@ +# NOTE: RUSTPYTHON +# This crate is not a real crate of ruff, but cut off a part of `ruff_python_ast` and vendored it to avoid cross dependency + +[package] +name = "ruff_source_location" +version = "0.0.0" +publish = false +edition = { workspace = true } +rust-version = { workspace = true } + +[lib] + +[dependencies] +ruff_text_size = { workspace = true, features = ["serde"] } + +memchr = "2.5.0" +once_cell = { workspace = true } diff --git a/ruff_source_location/src/lib.rs b/ruff_source_location/src/lib.rs new file mode 100644 index 0000000000..04c134061e --- /dev/null +++ b/ruff_source_location/src/lib.rs @@ -0,0 +1,227 @@ +mod line_index; +// mod locator; +// pub mod newline; + +pub use crate::line_index::{LineIndex, OneIndexed}; +// TODO: RUSTPYTHON; import it later +// pub use locator::Locator; +use ruff_text_size::{TextRange, TextSize}; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; +use std::fmt::{Debug, Formatter}; +use std::sync::Arc; + +/// Gives access to the source code of a file and allows mapping between [`TextSize`] and [`SourceLocation`]. +#[derive(Debug)] +pub struct SourceCode<'src, 'index> { + text: &'src str, + index: &'index LineIndex, +} + +impl<'src, 'index> SourceCode<'src, 'index> { + pub fn new(content: &'src str, index: &'index LineIndex) -> Self { + Self { + text: content, + index, + } + } + + /// Computes the one indexed row and column numbers for `offset`. + #[inline] + pub fn source_location(&self, offset: TextSize) -> SourceLocation { + self.index.source_location(offset, self.text) + } + + #[inline] + pub fn line_index(&self, offset: TextSize) -> OneIndexed { + self.index.line_index(offset) + } + + /// Take the source code up to the given [`TextSize`]. + #[inline] + pub fn up_to(&self, offset: TextSize) -> &'src str { + &self.text[TextRange::up_to(offset)] + } + + /// Take the source code after the given [`TextSize`]. + #[inline] + pub fn after(&self, offset: TextSize) -> &'src str { + &self.text[usize::from(offset)..] + } + + /// Take the source code between the given [`TextRange`]. + pub fn slice(&self, range: TextRange) -> &'src str { + &self.text[range] + } + + pub fn line_start(&self, line: OneIndexed) -> TextSize { + self.index.line_start(line, self.text) + } + + pub fn line_end(&self, line: OneIndexed) -> TextSize { + self.index.line_end(line, self.text) + } + + pub fn line_range(&self, line: OneIndexed) -> TextRange { + self.index.line_range(line, self.text) + } + + /// Returns the source text of the line with the given index + #[inline] + pub fn line_text(&self, index: OneIndexed) -> &'src str { + let range = self.index.line_range(index, self.text); + &self.text[range] + } + + /// Returns the source text + pub fn text(&self) -> &'src str { + self.text + } + + /// Returns the number of lines + #[inline] + pub fn line_count(&self) -> usize { + self.index.line_count() + } +} + +impl PartialEq for SourceCode<'_, '_> { + fn eq(&self, other: &Self) -> bool { + self.text == other.text + } +} + +impl Eq for SourceCode<'_, '_> {} + +/// A Builder for constructing a [`SourceFile`] +pub struct SourceFileBuilder { + name: Box, + code: Box, + index: Option, +} + +impl SourceFileBuilder { + /// Creates a new builder for a file named `name`. + pub fn new>, Code: Into>>(name: Name, code: Code) -> Self { + Self { + name: name.into(), + code: code.into(), + index: None, + } + } + + #[must_use] + pub fn line_index(mut self, index: LineIndex) -> Self { + self.index = Some(index); + self + } + + pub fn set_line_index(&mut self, index: LineIndex) { + self.index = Some(index); + } + + /// Consumes `self` and returns the [`SourceFile`]. + pub fn finish(self) -> SourceFile { + let index = if let Some(index) = self.index { + once_cell::sync::OnceCell::with_value(index) + } else { + once_cell::sync::OnceCell::new() + }; + + SourceFile { + inner: Arc::new(SourceFileInner { + name: self.name, + code: self.code, + line_index: index, + }), + } + } +} + +/// A source file that is identified by its name. Optionally stores the source code and [`LineIndex`]. +/// +/// Cloning a [`SourceFile`] is cheap, because it only requires bumping a reference count. +#[derive(Clone, Eq, PartialEq)] +pub struct SourceFile { + inner: Arc, +} + +impl Debug for SourceFile { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("SourceFile") + .field("name", &self.name()) + .field("code", &self.source_text()) + .finish() + } +} + +impl SourceFile { + /// Returns the name of the source file (filename). + #[inline] + pub fn name(&self) -> &str { + &self.inner.name + } + + #[inline] + pub fn slice(&self, range: TextRange) -> &str { + &self.source_text()[range] + } + + pub fn to_source_code(&self) -> SourceCode { + SourceCode { + text: self.source_text(), + index: self.index(), + } + } + + fn index(&self) -> &LineIndex { + self.inner + .line_index + .get_or_init(|| LineIndex::from_source_text(self.source_text())) + } + + /// Returns `Some` with the source text if set, or `None`. + #[inline] + pub fn source_text(&self) -> &str { + &self.inner.code + } +} + +struct SourceFileInner { + name: Box, + code: Box, + line_index: once_cell::sync::OnceCell, +} + +impl PartialEq for SourceFileInner { + fn eq(&self, other: &Self) -> bool { + self.name == other.name && self.code == other.code + } +} + +impl Eq for SourceFileInner {} + +#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Copy)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct SourceLocation { + pub row: OneIndexed, + pub column: OneIndexed, +} + +impl Default for SourceLocation { + fn default() -> Self { + Self { + row: OneIndexed::MIN, + column: OneIndexed::MIN, + } + } +} + +impl Debug for SourceLocation { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("SourceLocation") + .field("row", &self.row.get()) + .field("column", &self.column.get()) + .finish() + } +} diff --git a/ruff_source_location/src/line_index.rs b/ruff_source_location/src/line_index.rs new file mode 100644 index 0000000000..35041b00b9 --- /dev/null +++ b/ruff_source_location/src/line_index.rs @@ -0,0 +1,630 @@ +use crate::SourceLocation; +use ruff_text_size::{TextLen, TextRange, TextSize}; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; +use std::fmt; +use std::fmt::{Debug, Formatter}; +use std::num::NonZeroU32; +use std::ops::Deref; +use std::sync::Arc; + +/// Index for fast [byte offset](TextSize) to [`SourceLocation`] conversions. +/// +/// Cloning a [`LineIndex`] is cheap because it only requires bumping a reference count. +#[derive(Clone)] +pub struct LineIndex { + inner: Arc, +} + +struct LineIndexInner { + line_starts: Vec, + kind: IndexKind, +} + +impl LineIndex { + /// Builds the [`LineIndex`] from the source text of a file. + pub fn from_source_text(text: &str) -> Self { + let mut line_starts: Vec = Vec::with_capacity(text.len() / 88); + line_starts.push(TextSize::default()); + + let bytes = text.as_bytes(); + let mut utf8 = false; + + assert!(u32::try_from(bytes.len()).is_ok()); + + for (i, byte) in bytes.iter().enumerate() { + utf8 |= !byte.is_ascii(); + + match byte { + // Only track one line break for `\r\n`. + b'\r' if bytes.get(i + 1) == Some(&b'\n') => continue, + b'\n' | b'\r' => { + // SAFETY: Assertion above guarantees `i <= u32::MAX` + #[allow(clippy::cast_possible_truncation)] + line_starts.push(TextSize::from(i as u32) + TextSize::from(1)); + } + _ => {} + } + } + + let kind = if utf8 { + IndexKind::Utf8 + } else { + IndexKind::Ascii + }; + + Self { + inner: Arc::new(LineIndexInner { line_starts, kind }), + } + } + + fn kind(&self) -> IndexKind { + self.inner.kind + } + + /// Returns the row and column index for an offset. + /// + /// ## Examples + /// + /// ``` + /// # use ruff_text_size::TextSize; + /// # use ruff_source_location::{LineIndex, OneIndexed, SourceLocation}; + /// let source = "def a():\n pass"; + /// let index = LineIndex::from_source_text(source); + /// + /// assert_eq!( + /// index.source_location(TextSize::from(0), source), + /// SourceLocation { row: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(0) } + /// ); + /// + /// assert_eq!( + /// index.source_location(TextSize::from(4), source), + /// SourceLocation { row: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(4) } + /// ); + /// assert_eq!( + /// index.source_location(TextSize::from(13), source), + /// SourceLocation { row: OneIndexed::from_zero_indexed(1), column: OneIndexed::from_zero_indexed(4) } + /// ); + /// ``` + /// + /// ## Panics + /// + /// If the offset is out of bounds. + pub fn source_location(&self, offset: TextSize, content: &str) -> SourceLocation { + match self.binary_search_line(&offset) { + // Offset is at the start of a line + Ok(row) => SourceLocation { + row: OneIndexed::from_zero_indexed(row), + column: OneIndexed::from_zero_indexed(0), + }, + Err(next_row) => { + // SAFETY: Safe because the index always contains an entry for the offset 0 + let row = next_row - 1; + let mut line_start = self.line_starts()[row as usize]; + + let column = if self.kind().is_ascii() { + u32::from(offset - line_start) + } else { + // Don't count the BOM character as a column. + if line_start == TextSize::from(0) && content.starts_with('\u{feff}') { + line_start = '\u{feff}'.text_len(); + } + + let range = TextRange::new(line_start, offset); + content[range].chars().count().try_into().unwrap() + }; + + SourceLocation { + row: OneIndexed::from_zero_indexed(row), + column: OneIndexed::from_zero_indexed(column), + } + } + } + } + + /// Return the number of lines in the source code. + pub(crate) fn line_count(&self) -> usize { + self.line_starts().len() + } + + /// Returns the row number for a given offset. + /// + /// ## Examples + /// + /// ``` + /// # use ruff_text_size::TextSize; + /// # use ruff_source_location::{LineIndex, OneIndexed, SourceLocation}; + /// let source = "def a():\n pass"; + /// let index = LineIndex::from_source_text(source); + /// + /// assert_eq!(index.line_index(TextSize::from(0)), OneIndexed::from_zero_indexed(0)); + /// assert_eq!(index.line_index(TextSize::from(4)), OneIndexed::from_zero_indexed(0)); + /// assert_eq!(index.line_index(TextSize::from(13)), OneIndexed::from_zero_indexed(1)); + /// ``` + /// + /// ## Panics + /// + /// If the offset is out of bounds. + pub fn line_index(&self, offset: TextSize) -> OneIndexed { + match self.binary_search_line(&offset) { + // Offset is at the start of a line + Ok(row) => OneIndexed::from_zero_indexed(row), + Err(row) => { + // SAFETY: Safe because the index always contains an entry for the offset 0 + OneIndexed::from_zero_indexed(row - 1) + } + } + } + + /// Returns the [byte offset](TextSize) for the `line` with the given index. + pub(crate) fn line_start(&self, line: OneIndexed, contents: &str) -> TextSize { + let row_index = line.to_zero_indexed_usize(); + let starts = self.line_starts(); + + // If start-of-line position after last line + if row_index == starts.len() { + contents.text_len() + } else { + starts[row_index] + } + } + + /// Returns the [byte offset](TextSize) of the `line`'s end. + /// The offset is the end of the line, up to and including the newline character ending the line (if any). + pub(crate) fn line_end(&self, line: OneIndexed, contents: &str) -> TextSize { + let row_index = line.to_zero_indexed_usize(); + let starts = self.line_starts(); + + // If start-of-line position after last line + if row_index.saturating_add(1) >= starts.len() { + contents.text_len() + } else { + starts[row_index + 1] + } + } + + /// Returns the [`TextRange`] of the `line` with the given index. + /// The start points to the first character's [byte offset](TextSize), the end up to, and including + /// the newline character ending the line (if any). + pub(crate) fn line_range(&self, line: OneIndexed, contents: &str) -> TextRange { + let starts = self.line_starts(); + + if starts.len() == line.to_zero_indexed_usize() { + TextRange::empty(contents.text_len()) + } else { + TextRange::new( + self.line_start(line, contents), + self.line_start(line.saturating_add(1), contents), + ) + } + } + + /// Returns the [byte offsets](TextSize) for every line + pub fn line_starts(&self) -> &[TextSize] { + &self.inner.line_starts + } + + #[allow(clippy::trivially_copy_pass_by_ref)] // to keep same interface as `[T]::binary_search` + fn binary_search_line(&self, offset: &TextSize) -> Result { + // `try_into()` always success as long as TextSize is u32 + match self.line_starts().binary_search(offset) { + Ok(index) => Ok(index.try_into().unwrap()), + Err(index) => Err(index.try_into().unwrap()), + } + } +} + +impl Deref for LineIndex { + type Target = [TextSize]; + + fn deref(&self) -> &Self::Target { + self.line_starts() + } +} + +impl Debug for LineIndex { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self.line_starts()).finish() + } +} + +#[derive(Debug, Clone, Copy)] +enum IndexKind { + /// Optimized index for an ASCII only document + Ascii, + + /// Index for UTF8 documents + Utf8, +} + +impl IndexKind { + const fn is_ascii(self) -> bool { + matches!(self, IndexKind::Ascii) + } +} + +/// Type-safe wrapper for a value whose logical range starts at `1`, for +/// instance the line or column numbers in a file +/// +/// Internally this is represented as a [`NonZeroU32`], this enables some +/// memory optimizations +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct OneIndexed(NonZeroU32); + +#[allow(clippy::cast_possible_truncation)] // manually checked +const fn try_to_u32(value: usize) -> Result { + if value <= u32::MAX as usize { + Ok(value as u32) + } else { + Err(value) + } +} + +impl OneIndexed { + // SAFETY: These constants are being initialized with non-zero values + /// The smallest value that can be represented by this integer type. + pub const MIN: Self = unwrap(Self::new(1)); + /// The largest value that can be represented by this integer type + pub const MAX: Self = unwrap(Self::new(u32::MAX)); + + const ONE: NonZeroU32 = unwrap(NonZeroU32::new(1)); + + /// Creates a non-zero if the given value is not zero. + pub const fn new(value: u32) -> Option { + match NonZeroU32::new(value) { + Some(value) => Some(Self(value)), + None => None, + } + } + + /// Construct a new [`OneIndexed`] from a zero-indexed value + pub const fn from_zero_indexed(value: u32) -> Self { + Self(Self::ONE.saturating_add(value)) + } + + /// Construct a new [`OneIndexed`] from a zero-indexed usize value + pub const fn try_from_zero_indexed(value: usize) -> Result { + match try_to_u32(value) { + Ok(value) => Ok(Self(Self::ONE.saturating_add(value))), + Err(value) => Err(value), + } + } + + /// Returns the value as a primitive type. + pub const fn get(self) -> u32 { + self.0.get() + } + + /// Return the usize value for this [`OneIndexed`] + pub const fn to_usize(self) -> usize { + self.get() as _ + } + + /// Return the zero-indexed primitive value for this [`OneIndexed`] + pub const fn to_zero_indexed(self) -> u32 { + self.0.get() - 1 + } + + /// Return the zero-indexed usize value for this [`OneIndexed`] + pub const fn to_zero_indexed_usize(self) -> usize { + self.to_zero_indexed() as _ + } + + /// Saturating integer addition. Computes `self + rhs`, saturating at + /// the numeric bounds instead of overflowing. + #[must_use] + pub const fn saturating_add(self, rhs: u32) -> Self { + match NonZeroU32::new(self.0.get().saturating_add(rhs)) { + Some(value) => Self(value), + None => Self::MAX, + } + } + + /// Saturating integer subtraction. Computes `self - rhs`, saturating + /// at the numeric bounds instead of overflowing. + #[must_use] + pub const fn saturating_sub(self, rhs: u32) -> Self { + match NonZeroU32::new(self.0.get().saturating_sub(rhs)) { + Some(value) => Self(value), + None => Self::MIN, + } + } +} + +impl std::fmt::Display for OneIndexed { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Debug::fmt(&self.0.get(), f) + } +} + +/// A const `Option::unwrap` without nightly features: +/// [Tracking issue](https://github.com/rust-lang/rust/issues/67441) +const fn unwrap(option: Option) -> T { + match option { + Some(value) => value, + None => panic!("unwrapping None"), + } +} + +#[cfg(test)] +mod tests { + use crate::line_index::LineIndex; + use crate::{OneIndexed, SourceLocation}; + use ruff_text_size::TextSize; + + #[test] + fn ascii_index() { + let index = LineIndex::from_source_text(""); + assert_eq!(index.line_starts(), &[TextSize::from(0)]); + + let index = LineIndex::from_source_text("x = 1"); + assert_eq!(index.line_starts(), &[TextSize::from(0)]); + + let index = LineIndex::from_source_text("x = 1\n"); + assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(6)]); + + let index = LineIndex::from_source_text("x = 1\ny = 2\nz = x + y\n"); + assert_eq!( + index.line_starts(), + &[ + TextSize::from(0), + TextSize::from(6), + TextSize::from(12), + TextSize::from(22) + ] + ); + } + + #[test] + fn ascii_source_location() { + let contents = "x = 1\ny = 2"; + let index = LineIndex::from_source_text(contents); + + // First row. + let loc = index.source_location(TextSize::from(2), contents); + assert_eq!( + loc, + SourceLocation { + row: OneIndexed::from_zero_indexed(0), + column: OneIndexed::from_zero_indexed(2) + } + ); + + // Second row. + let loc = index.source_location(TextSize::from(6), contents); + assert_eq!( + loc, + SourceLocation { + row: OneIndexed::from_zero_indexed(1), + column: OneIndexed::from_zero_indexed(0) + } + ); + + let loc = index.source_location(TextSize::from(11), contents); + assert_eq!( + loc, + SourceLocation { + row: OneIndexed::from_zero_indexed(1), + column: OneIndexed::from_zero_indexed(5) + } + ); + } + + #[test] + fn ascii_carriage_return() { + let contents = "x = 4\ry = 3"; + let index = LineIndex::from_source_text(contents); + assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(6)]); + + assert_eq!( + index.source_location(TextSize::from(4), contents), + SourceLocation { + row: OneIndexed::from_zero_indexed(0), + column: OneIndexed::from_zero_indexed(4) + } + ); + assert_eq!( + index.source_location(TextSize::from(6), contents), + SourceLocation { + row: OneIndexed::from_zero_indexed(1), + column: OneIndexed::from_zero_indexed(0) + } + ); + assert_eq!( + index.source_location(TextSize::from(7), contents), + SourceLocation { + row: OneIndexed::from_zero_indexed(1), + column: OneIndexed::from_zero_indexed(1) + } + ); + } + + #[test] + fn ascii_carriage_return_newline() { + let contents = "x = 4\r\ny = 3"; + let index = LineIndex::from_source_text(contents); + assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(7)]); + + assert_eq!( + index.source_location(TextSize::from(4), contents), + SourceLocation { + row: OneIndexed::from_zero_indexed(0), + column: OneIndexed::from_zero_indexed(4) + } + ); + assert_eq!( + index.source_location(TextSize::from(7), contents), + SourceLocation { + row: OneIndexed::from_zero_indexed(1), + column: OneIndexed::from_zero_indexed(0) + } + ); + assert_eq!( + index.source_location(TextSize::from(8), contents), + SourceLocation { + row: OneIndexed::from_zero_indexed(1), + column: OneIndexed::from_zero_indexed(1) + } + ); + } + + #[test] + fn utf8_index() { + let index = LineIndex::from_source_text("x = '🫣'"); + assert_eq!(index.line_count(), 1); + assert_eq!(index.line_starts(), &[TextSize::from(0)]); + + let index = LineIndex::from_source_text("x = '🫣'\n"); + assert_eq!(index.line_count(), 2); + assert_eq!( + index.line_starts(), + &[TextSize::from(0), TextSize::from(11)] + ); + + let index = LineIndex::from_source_text("x = '🫣'\ny = 2\nz = x + y\n"); + assert_eq!(index.line_count(), 4); + assert_eq!( + index.line_starts(), + &[ + TextSize::from(0), + TextSize::from(11), + TextSize::from(17), + TextSize::from(27) + ] + ); + + let index = LineIndex::from_source_text("# 🫣\nclass Foo:\n \"\"\".\"\"\""); + assert_eq!(index.line_count(), 3); + assert_eq!( + index.line_starts(), + &[TextSize::from(0), TextSize::from(7), TextSize::from(18)] + ); + } + + #[test] + fn utf8_carriage_return() { + let contents = "x = '🫣'\ry = 3"; + let index = LineIndex::from_source_text(contents); + assert_eq!(index.line_count(), 2); + assert_eq!( + index.line_starts(), + &[TextSize::from(0), TextSize::from(11)] + ); + + // Second ' + assert_eq!( + index.source_location(TextSize::from(9), contents), + SourceLocation { + row: OneIndexed::from_zero_indexed(0), + column: OneIndexed::from_zero_indexed(6) + } + ); + assert_eq!( + index.source_location(TextSize::from(11), contents), + SourceLocation { + row: OneIndexed::from_zero_indexed(1), + column: OneIndexed::from_zero_indexed(0) + } + ); + assert_eq!( + index.source_location(TextSize::from(12), contents), + SourceLocation { + row: OneIndexed::from_zero_indexed(1), + column: OneIndexed::from_zero_indexed(1) + } + ); + } + + #[test] + fn utf8_carriage_return_newline() { + let contents = "x = '🫣'\r\ny = 3"; + let index = LineIndex::from_source_text(contents); + assert_eq!(index.line_count(), 2); + assert_eq!( + index.line_starts(), + &[TextSize::from(0), TextSize::from(12)] + ); + + // Second ' + assert_eq!( + index.source_location(TextSize::from(9), contents), + SourceLocation { + row: OneIndexed::from_zero_indexed(0), + column: OneIndexed::from_zero_indexed(6) + } + ); + assert_eq!( + index.source_location(TextSize::from(12), contents), + SourceLocation { + row: OneIndexed::from_zero_indexed(1), + column: OneIndexed::from_zero_indexed(0) + } + ); + assert_eq!( + index.source_location(TextSize::from(13), contents), + SourceLocation { + row: OneIndexed::from_zero_indexed(1), + column: OneIndexed::from_zero_indexed(1) + } + ); + } + + #[test] + fn utf8_byte_offset() { + let contents = "x = '☃'\ny = 2"; + let index = LineIndex::from_source_text(contents); + assert_eq!( + index.line_starts(), + &[TextSize::from(0), TextSize::from(10)] + ); + + // First row. + let loc = index.source_location(TextSize::from(0), contents); + assert_eq!( + loc, + SourceLocation { + row: OneIndexed::from_zero_indexed(0), + column: OneIndexed::from_zero_indexed(0) + } + ); + + let loc = index.source_location(TextSize::from(5), contents); + assert_eq!( + loc, + SourceLocation { + row: OneIndexed::from_zero_indexed(0), + column: OneIndexed::from_zero_indexed(5) + } + ); + + let loc = index.source_location(TextSize::from(8), contents); + assert_eq!( + loc, + SourceLocation { + row: OneIndexed::from_zero_indexed(0), + column: OneIndexed::from_zero_indexed(6) + } + ); + + // Second row. + let loc = index.source_location(TextSize::from(10), contents); + assert_eq!( + loc, + SourceLocation { + row: OneIndexed::from_zero_indexed(1), + column: OneIndexed::from_zero_indexed(0) + } + ); + + // One-past-the-end. + let loc = index.source_location(TextSize::from(15), contents); + assert_eq!( + loc, + SourceLocation { + row: OneIndexed::from_zero_indexed(1), + column: OneIndexed::from_zero_indexed(5) + } + ); + } +} diff --git a/ruff_source_location/src/locator.rs b/ruff_source_location/src/locator.rs new file mode 100644 index 0000000000..a19b51af3a --- /dev/null +++ b/ruff_source_location/src/locator.rs @@ -0,0 +1,409 @@ +//! Struct used to efficiently slice source code at (row, column) Locations. + +use crate::newlines::find_newline; +use crate::{LineIndex, OneIndexed, SourceCode, SourceLocation}; +use memchr::{memchr2, memrchr2}; +use once_cell::unsync::OnceCell; +use ruff_text_size::{TextLen, TextRange, TextSize}; +use std::ops::Add; + +pub struct Locator<'a> { + contents: &'a str, + index: OnceCell, +} + +impl<'a> Locator<'a> { + pub const fn new(contents: &'a str) -> Self { + Self { + contents, + index: OnceCell::new(), + } + } + + #[deprecated( + note = "This is expensive, avoid using outside of the diagnostic phase. Prefer the other `Locator` methods instead." + )] + pub fn compute_line_index(&self, offset: TextSize) -> OneIndexed { + self.to_index().line_index(offset) + } + + #[deprecated( + note = "This is expensive, avoid using outside of the diagnostic phase. Prefer the other `Locator` methods instead." + )] + pub fn compute_source_location(&self, offset: TextSize) -> SourceLocation { + self.to_source_code().source_location(offset) + } + + fn to_index(&self) -> &LineIndex { + self.index + .get_or_init(|| LineIndex::from_source_text(self.contents)) + } + + pub fn line_index(&self) -> Option<&LineIndex> { + self.index.get() + } + + pub fn to_source_code(&self) -> SourceCode { + SourceCode { + index: self.to_index(), + text: self.contents, + } + } + + /// Computes the start position of the line of `offset`. + /// + /// ## Examples + /// + /// ``` + /// # use ruff_text_size::TextSize; + /// # use ruff_python_ast::source_code::Locator; + /// + /// let locator = Locator::new("First line\nsecond line\rthird line"); + /// + /// assert_eq!(locator.line_start(TextSize::from(0)), TextSize::from(0)); + /// assert_eq!(locator.line_start(TextSize::from(4)), TextSize::from(0)); + /// + /// assert_eq!(locator.line_start(TextSize::from(14)), TextSize::from(11)); + /// assert_eq!(locator.line_start(TextSize::from(28)), TextSize::from(23)); + /// ``` + /// + /// ## Panics + /// If `offset` is out of bounds. + pub fn line_start(&self, offset: TextSize) -> TextSize { + let bytes = self.contents[TextRange::up_to(offset)].as_bytes(); + if let Some(index) = memrchr2(b'\n', b'\r', bytes) { + // SAFETY: Safe because `index < offset` + TextSize::try_from(index).unwrap().add(TextSize::from(1)) + } else { + TextSize::default() + } + } + + pub fn is_at_start_of_line(&self, offset: TextSize) -> bool { + offset == TextSize::from(0) + || self.contents[TextRange::up_to(offset)].ends_with(['\n', '\r']) + } + + /// Computes the offset that is right after the newline character that ends `offset`'s line. + /// + /// ## Examples + /// + /// ``` + /// # use ruff_text_size::{TextRange, TextSize}; + /// # use ruff_python_ast::source_code::Locator; + /// + /// let locator = Locator::new("First line\nsecond line\r\nthird line"); + /// + /// assert_eq!(locator.full_line_end(TextSize::from(3)), TextSize::from(11)); + /// assert_eq!(locator.full_line_end(TextSize::from(14)), TextSize::from(24)); + /// assert_eq!(locator.full_line_end(TextSize::from(28)), TextSize::from(34)); + /// ``` + /// + /// ## Panics + /// + /// If `offset` is passed the end of the content. + pub fn full_line_end(&self, offset: TextSize) -> TextSize { + let slice = &self.contents[usize::from(offset)..]; + if let Some((index, line_ending)) = find_newline(slice) { + offset + TextSize::try_from(index).unwrap() + line_ending.text_len() + } else { + self.contents.text_len() + } + } + + /// Computes the offset that is right before the newline character that ends `offset`'s line. + /// + /// ## Examples + /// + /// ``` + /// # use ruff_text_size::{TextRange, TextSize}; + /// # use ruff_python_ast::source_code::Locator; + /// + /// let locator = Locator::new("First line\nsecond line\r\nthird line"); + /// + /// assert_eq!(locator.line_end(TextSize::from(3)), TextSize::from(10)); + /// assert_eq!(locator.line_end(TextSize::from(14)), TextSize::from(22)); + /// assert_eq!(locator.line_end(TextSize::from(28)), TextSize::from(34)); + /// ``` + /// + /// ## Panics + /// + /// If `offset` is passed the end of the content. + pub fn line_end(&self, offset: TextSize) -> TextSize { + let slice = &self.contents[usize::from(offset)..]; + if let Some(index) = memchr2(b'\n', b'\r', slice.as_bytes()) { + offset + TextSize::try_from(index).unwrap() + } else { + self.contents.text_len() + } + } + + /// Computes the range of this `offset`s line. + /// + /// The range starts at the beginning of the line and goes up to, and including, the new line character + /// at the end of the line. + /// + /// ## Examples + /// + /// ``` + /// # use ruff_text_size::{TextRange, TextSize}; + /// # use ruff_python_ast::source_code::Locator; + /// + /// let locator = Locator::new("First line\nsecond line\r\nthird line"); + /// + /// assert_eq!(locator.full_line_range(TextSize::from(3)), TextRange::new(TextSize::from(0), TextSize::from(11))); + /// assert_eq!(locator.full_line_range(TextSize::from(14)), TextRange::new(TextSize::from(11), TextSize::from(24))); + /// assert_eq!(locator.full_line_range(TextSize::from(28)), TextRange::new(TextSize::from(24), TextSize::from(34))); + /// ``` + /// + /// ## Panics + /// If `offset` is out of bounds. + pub fn full_line_range(&self, offset: TextSize) -> TextRange { + TextRange::new(self.line_start(offset), self.full_line_end(offset)) + } + + /// Computes the range of this `offset`s line ending before the newline character. + /// + /// The range starts at the beginning of the line and goes up to, but excluding, the new line character + /// at the end of the line. + /// + /// ## Examples + /// + /// ``` + /// # use ruff_text_size::{TextRange, TextSize}; + /// # use ruff_python_ast::source_code::Locator; + /// + /// let locator = Locator::new("First line\nsecond line\r\nthird line"); + /// + /// assert_eq!(locator.line_range(TextSize::from(3)), TextRange::new(TextSize::from(0), TextSize::from(10))); + /// assert_eq!(locator.line_range(TextSize::from(14)), TextRange::new(TextSize::from(11), TextSize::from(22))); + /// assert_eq!(locator.line_range(TextSize::from(28)), TextRange::new(TextSize::from(24), TextSize::from(34))); + /// ``` + /// + /// ## Panics + /// If `offset` is out of bounds. + pub fn line_range(&self, offset: TextSize) -> TextRange { + TextRange::new(self.line_start(offset), self.line_end(offset)) + } + + /// Returns the text of the `offset`'s line. + /// + /// The line includes the newline characters at the end of the line. + /// + /// ## Examples + /// + /// ``` + /// # use ruff_text_size::{TextRange, TextSize}; + /// # use ruff_python_ast::source_code::Locator; + /// + /// let locator = Locator::new("First line\nsecond line\r\nthird line"); + /// + /// assert_eq!(locator.full_line(TextSize::from(3)), "First line\n"); + /// assert_eq!(locator.full_line(TextSize::from(14)), "second line\r\n"); + /// assert_eq!(locator.full_line(TextSize::from(28)), "third line"); + /// ``` + /// + /// ## Panics + /// If `offset` is out of bounds. + pub fn full_line(&self, offset: TextSize) -> &'a str { + &self.contents[self.full_line_range(offset)] + } + + /// Returns the text of the `offset`'s line. + /// + /// Excludes the newline characters at the end of the line. + /// + /// ## Examples + /// + /// ``` + /// # use ruff_text_size::{TextRange, TextSize}; + /// # use ruff_python_ast::source_code::Locator; + /// + /// let locator = Locator::new("First line\nsecond line\r\nthird line"); + /// + /// assert_eq!(locator.line(TextSize::from(3)), "First line"); + /// assert_eq!(locator.line(TextSize::from(14)), "second line"); + /// assert_eq!(locator.line(TextSize::from(28)), "third line"); + /// ``` + /// + /// ## Panics + /// If `offset` is out of bounds. + pub fn line(&self, offset: TextSize) -> &'a str { + &self.contents[self.line_range(offset)] + } + + /// Computes the range of all lines that this `range` covers. + /// + /// The range starts at the beginning of the line at `range.start()` and goes up to, and including, the new line character + /// at the end of `range.ends()`'s line. + /// + /// ## Examples + /// + /// ``` + /// # use ruff_text_size::{TextRange, TextSize}; + /// # use ruff_python_ast::source_code::Locator; + /// + /// let locator = Locator::new("First line\nsecond line\r\nthird line"); + /// + /// assert_eq!( + /// locator.full_lines_range(TextRange::new(TextSize::from(3), TextSize::from(5))), + /// TextRange::new(TextSize::from(0), TextSize::from(11)) + /// ); + /// assert_eq!( + /// locator.full_lines_range(TextRange::new(TextSize::from(3), TextSize::from(14))), + /// TextRange::new(TextSize::from(0), TextSize::from(24)) + /// ); + /// ``` + /// + /// ## Panics + /// If the start or end of `range` is out of bounds. + pub fn full_lines_range(&self, range: TextRange) -> TextRange { + TextRange::new( + self.line_start(range.start()), + self.full_line_end(range.end()), + ) + } + + /// Computes the range of all lines that this `range` covers. + /// + /// The range starts at the beginning of the line at `range.start()` and goes up to, but excluding, the new line character + /// at the end of `range.end()`'s line. + /// + /// ## Examples + /// + /// ``` + /// # use ruff_text_size::{TextRange, TextSize}; + /// # use ruff_python_ast::source_code::Locator; + /// + /// let locator = Locator::new("First line\nsecond line\r\nthird line"); + /// + /// assert_eq!( + /// locator.lines_range(TextRange::new(TextSize::from(3), TextSize::from(5))), + /// TextRange::new(TextSize::from(0), TextSize::from(10)) + /// ); + /// assert_eq!( + /// locator.lines_range(TextRange::new(TextSize::from(3), TextSize::from(14))), + /// TextRange::new(TextSize::from(0), TextSize::from(22)) + /// ); + /// ``` + /// + /// ## Panics + /// If the start or end of `range` is out of bounds. + pub fn lines_range(&self, range: TextRange) -> TextRange { + TextRange::new(self.line_start(range.start()), self.line_end(range.end())) + } + + /// Returns true if the text of `range` contains any line break. + /// + /// ``` + /// # use ruff_text_size::{TextRange, TextSize}; + /// # use ruff_python_ast::source_code::Locator; + /// + /// let locator = Locator::new("First line\nsecond line\r\nthird line"); + /// + /// assert!( + /// !locator.contains_line_break(TextRange::new(TextSize::from(3), TextSize::from(5))), + /// ); + /// assert!( + /// locator.contains_line_break(TextRange::new(TextSize::from(3), TextSize::from(14))), + /// ); + /// ``` + /// + /// ## Panics + /// If the `range` is out of bounds. + pub fn contains_line_break(&self, range: TextRange) -> bool { + let text = &self.contents[range]; + text.contains(['\n', '\r']) + } + + /// Returns the text of all lines that include `range`. + /// + /// ## Examples + /// + /// ``` + /// # use ruff_text_size::{TextRange, TextSize}; + /// # use ruff_python_ast::source_code::Locator; + /// + /// let locator = Locator::new("First line\nsecond line\r\nthird line"); + /// + /// assert_eq!( + /// locator.lines(TextRange::new(TextSize::from(3), TextSize::from(5))), + /// "First line" + /// ); + /// assert_eq!( + /// locator.lines(TextRange::new(TextSize::from(3), TextSize::from(14))), + /// "First line\nsecond line" + /// ); + /// ``` + /// + /// ## Panics + /// If the start or end of `range` is out of bounds. + pub fn lines(&self, range: TextRange) -> &'a str { + &self.contents[self.lines_range(range)] + } + + /// Returns the text of all lines that include `range`. + /// + /// Includes the newline characters of the last line. + /// + /// ## Examples + /// + /// ``` + /// # use ruff_text_size::{TextRange, TextSize}; + /// # use ruff_python_ast::source_code::Locator; + /// + /// let locator = Locator::new("First line\nsecond line\r\nthird line"); + /// + /// assert_eq!( + /// locator.full_lines(TextRange::new(TextSize::from(3), TextSize::from(5))), + /// "First line\n" + /// ); + /// assert_eq!( + /// locator.full_lines(TextRange::new(TextSize::from(3), TextSize::from(14))), + /// "First line\nsecond line\r\n" + /// ); + /// ``` + /// + /// ## Panics + /// If the start or end of `range` is out of bounds. + pub fn full_lines(&self, range: TextRange) -> &'a str { + &self.contents[self.full_lines_range(range)] + } + + /// Take the source code up to the given [`TextSize`]. + #[inline] + pub fn up_to(&self, offset: TextSize) -> &'a str { + &self.contents[TextRange::up_to(offset)] + } + + /// Take the source code after the given [`TextSize`]. + #[inline] + pub fn after(&self, offset: TextSize) -> &'a str { + &self.contents[usize::from(offset)..] + } + + /// Take the source code between the given [`TextRange`]. + #[inline] + pub fn slice(&self, range: TextRange) -> &'a str { + &self.contents[range] + } + + /// Return the underlying source code. + pub fn contents(&self) -> &'a str { + self.contents + } + + /// Return the number of bytes in the source code. + pub const fn len(&self) -> usize { + self.contents.len() + } + + pub fn text_len(&self) -> TextSize { + self.contents.text_len() + } + + /// Return `true` if the source code is empty. + pub const fn is_empty(&self) -> bool { + self.contents.is_empty() + } +} diff --git a/ruff_source_location/src/newlines.rs b/ruff_source_location/src/newlines.rs new file mode 100644 index 0000000000..5e3b97f85a --- /dev/null +++ b/ruff_source_location/src/newlines.rs @@ -0,0 +1,446 @@ +use memchr::{memchr2, memrchr2}; +use ruff_text_size::{TextLen, TextRange, TextSize}; +use std::iter::FusedIterator; +use std::ops::Deref; + +/// Extension trait for [`str`] that provides a [`UniversalNewlineIterator`]. +pub trait StrExt { + fn universal_newlines(&self) -> UniversalNewlineIterator<'_>; +} + +impl StrExt for str { + fn universal_newlines(&self) -> UniversalNewlineIterator<'_> { + UniversalNewlineIterator::from(self) + } +} + +/// Like [`str#lines`], but accommodates LF, CRLF, and CR line endings, +/// the latter of which are not supported by [`str#lines`]. +/// +/// ## Examples +/// +/// ```rust +/// # use ruff_text_size::TextSize; +/// # use ruff_python_ast::newlines::{Line, UniversalNewlineIterator}; +/// let mut lines = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop"); +/// +/// assert_eq!(lines.next_back(), Some(Line::new("bop", TextSize::from(14)))); +/// assert_eq!(lines.next(), Some(Line::new("foo\n", TextSize::from(0)))); +/// assert_eq!(lines.next_back(), Some(Line::new("baz\r", TextSize::from(10)))); +/// assert_eq!(lines.next(), Some(Line::new("bar\n", TextSize::from(4)))); +/// assert_eq!(lines.next_back(), Some(Line::new("\r\n", TextSize::from(8)))); +/// assert_eq!(lines.next(), None); +/// ``` +pub struct UniversalNewlineIterator<'a> { + text: &'a str, + offset: TextSize, + offset_back: TextSize, +} + +impl<'a> UniversalNewlineIterator<'a> { + pub fn with_offset(text: &'a str, offset: TextSize) -> UniversalNewlineIterator<'a> { + UniversalNewlineIterator { + text, + offset, + offset_back: offset + text.text_len(), + } + } + + pub fn from(text: &'a str) -> UniversalNewlineIterator<'a> { + Self::with_offset(text, TextSize::default()) + } +} + +/// Finds the next newline character. Returns its position and the [`LineEnding`]. +#[inline] +pub fn find_newline(text: &str) -> Option<(usize, LineEnding)> { + let bytes = text.as_bytes(); + if let Some(position) = memchr2(b'\n', b'\r', bytes) { + // SAFETY: memchr guarantees to return valid positions + #[allow(unsafe_code)] + let newline_character = unsafe { *bytes.get_unchecked(position) }; + + let line_ending = match newline_character { + // Explicit branch for `\n` as this is the most likely path + b'\n' => LineEnding::Lf, + // '\r\n' + b'\r' if bytes.get(position.saturating_add(1)) == Some(&b'\n') => LineEnding::CrLf, + // '\r' + _ => LineEnding::Cr, + }; + + Some((position, line_ending)) + } else { + None + } +} + +impl<'a> Iterator for UniversalNewlineIterator<'a> { + type Item = Line<'a>; + + #[inline] + fn next(&mut self) -> Option> { + if self.text.is_empty() { + return None; + } + + let line = if let Some((newline_position, line_ending)) = find_newline(self.text) { + let (text, remainder) = self.text.split_at(newline_position + line_ending.len()); + + let line = Line { + offset: self.offset, + text, + }; + + self.text = remainder; + self.offset += text.text_len(); + + line + } + // Last line + else { + Line { + offset: self.offset, + text: std::mem::take(&mut self.text), + } + }; + + Some(line) + } + + fn last(mut self) -> Option { + self.next_back() + } +} + +impl DoubleEndedIterator for UniversalNewlineIterator<'_> { + #[inline] + fn next_back(&mut self) -> Option { + if self.text.is_empty() { + return None; + } + + let len = self.text.len(); + + // Trim any trailing newlines. + let haystack = match self.text.as_bytes()[len - 1] { + b'\n' if len > 1 && self.text.as_bytes()[len - 2] == b'\r' => &self.text[..len - 2], + b'\n' | b'\r' => &self.text[..len - 1], + _ => self.text, + }; + + // Find the end of the previous line. The previous line is the text up to, but not including + // the newline character. + let line = if let Some(line_end) = memrchr2(b'\n', b'\r', haystack.as_bytes()) { + // '\n' or '\r' or '\r\n' + let (remainder, line) = self.text.split_at(line_end + 1); + self.text = remainder; + self.offset_back -= line.text_len(); + + Line { + text: line, + offset: self.offset_back, + } + } else { + // Last line + let offset = self.offset_back - self.text.text_len(); + Line { + text: std::mem::take(&mut self.text), + offset, + } + }; + + Some(line) + } +} + +impl FusedIterator for UniversalNewlineIterator<'_> {} + +/// Like [`UniversalNewlineIterator`], but includes a trailing newline as an empty line. +pub struct NewlineWithTrailingNewline<'a> { + trailing: Option>, + underlying: UniversalNewlineIterator<'a>, +} + +impl<'a> NewlineWithTrailingNewline<'a> { + pub fn from(input: &'a str) -> NewlineWithTrailingNewline<'a> { + Self::with_offset(input, TextSize::default()) + } + + pub fn with_offset(input: &'a str, offset: TextSize) -> Self { + NewlineWithTrailingNewline { + underlying: UniversalNewlineIterator::with_offset(input, offset), + trailing: if input.ends_with(['\r', '\n']) { + Some(Line { + text: "", + offset: offset + input.text_len(), + }) + } else { + None + }, + } + } +} + +impl<'a> Iterator for NewlineWithTrailingNewline<'a> { + type Item = Line<'a>; + + #[inline] + fn next(&mut self) -> Option> { + self.underlying.next().or_else(|| self.trailing.take()) + } +} + +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct Line<'a> { + text: &'a str, + offset: TextSize, +} + +impl<'a> Line<'a> { + pub fn new(text: &'a str, offset: TextSize) -> Self { + Self { text, offset } + } + + #[inline] + pub const fn start(&self) -> TextSize { + self.offset + } + + /// Returns the byte offset where the line ends, including its terminating new line character. + #[inline] + pub fn full_end(&self) -> TextSize { + self.offset + self.full_text_len() + } + + /// Returns the byte offset where the line ends, excluding its new line character + #[inline] + pub fn end(&self) -> TextSize { + self.offset + self.as_str().text_len() + } + + /// Returns the range of the line, including its terminating new line character. + #[inline] + pub fn full_range(&self) -> TextRange { + TextRange::at(self.offset, self.text.text_len()) + } + + /// Returns the range of the line, excluding its terminating new line character + #[inline] + pub fn range(&self) -> TextRange { + TextRange::new(self.start(), self.end()) + } + + /// Returns the text of the line, excluding the terminating new line character. + #[inline] + pub fn as_str(&self) -> &'a str { + let mut bytes = self.text.bytes().rev(); + + let newline_len = match bytes.next() { + Some(b'\n') => { + if bytes.next() == Some(b'\r') { + 2 + } else { + 1 + } + } + Some(b'\r') => 1, + _ => 0, + }; + + &self.text[..self.text.len() - newline_len] + } + + /// Returns the line's text, including the terminating new line character. + #[inline] + pub fn as_full_str(&self) -> &'a str { + self.text + } + + #[inline] + pub fn full_text_len(&self) -> TextSize { + self.text.text_len() + } +} + +impl Deref for Line<'_> { + type Target = str; + + fn deref(&self) -> &Self::Target { + self.as_str() + } +} + +impl PartialEq<&str> for Line<'_> { + fn eq(&self, other: &&str) -> bool { + self.as_str() == *other + } +} + +impl PartialEq> for &str { + fn eq(&self, other: &Line<'_>) -> bool { + *self == other.as_str() + } +} + +/// The line ending style used in Python source code. +/// See +#[derive(Debug, PartialEq, Eq, Copy, Clone)] +pub enum LineEnding { + Lf, + Cr, + CrLf, +} + +impl Default for LineEnding { + fn default() -> Self { + if cfg!(windows) { + LineEnding::CrLf + } else { + LineEnding::Lf + } + } +} + +impl LineEnding { + pub const fn as_str(&self) -> &'static str { + match self { + LineEnding::Lf => "\n", + LineEnding::CrLf => "\r\n", + LineEnding::Cr => "\r", + } + } + + #[allow(clippy::len_without_is_empty)] + pub const fn len(&self) -> usize { + match self { + LineEnding::Lf | LineEnding::Cr => 1, + LineEnding::CrLf => 2, + } + } + + pub const fn text_len(&self) -> TextSize { + match self { + LineEnding::Lf | LineEnding::Cr => TextSize::new(1), + LineEnding::CrLf => TextSize::new(2), + } + } +} + +impl Deref for LineEnding { + type Target = str; + + fn deref(&self) -> &Self::Target { + self.as_str() + } +} + +#[cfg(test)] +mod tests { + use super::UniversalNewlineIterator; + use crate::newlines::Line; + use ruff_text_size::TextSize; + + #[test] + fn universal_newlines_empty_str() { + let lines: Vec<_> = UniversalNewlineIterator::from("").collect(); + assert_eq!(lines, Vec::::new()); + + let lines: Vec<_> = UniversalNewlineIterator::from("").rev().collect(); + assert_eq!(lines, Vec::::new()); + } + + #[test] + fn universal_newlines_forward() { + let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop").collect(); + assert_eq!( + lines, + vec![ + Line::new("foo\n", TextSize::from(0)), + Line::new("bar\n", TextSize::from(4)), + Line::new("\r\n", TextSize::from(8)), + Line::new("baz\r", TextSize::from(10)), + Line::new("bop", TextSize::from(14)), + ] + ); + + let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop\n").collect(); + assert_eq!( + lines, + vec![ + Line::new("foo\n", TextSize::from(0)), + Line::new("bar\n", TextSize::from(4)), + Line::new("\r\n", TextSize::from(8)), + Line::new("baz\r", TextSize::from(10)), + Line::new("bop\n", TextSize::from(14)), + ] + ); + + let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop\n\n").collect(); + assert_eq!( + lines, + vec![ + Line::new("foo\n", TextSize::from(0)), + Line::new("bar\n", TextSize::from(4)), + Line::new("\r\n", TextSize::from(8)), + Line::new("baz\r", TextSize::from(10)), + Line::new("bop\n", TextSize::from(14)), + Line::new("\n", TextSize::from(18)), + ] + ); + } + + #[test] + fn universal_newlines_backwards() { + let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop") + .rev() + .collect(); + assert_eq!( + lines, + vec![ + Line::new("bop", TextSize::from(14)), + Line::new("baz\r", TextSize::from(10)), + Line::new("\r\n", TextSize::from(8)), + Line::new("bar\n", TextSize::from(4)), + Line::new("foo\n", TextSize::from(0)), + ] + ); + + let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\nbaz\rbop\n") + .rev() + .map(|line| line.as_str()) + .collect(); + + assert_eq!( + lines, + vec![ + Line::new("bop\n", TextSize::from(13)), + Line::new("baz\r", TextSize::from(9)), + Line::new("\n", TextSize::from(8)), + Line::new("bar\n", TextSize::from(4)), + Line::new("foo\n", TextSize::from(0)), + ] + ); + } + + #[test] + fn universal_newlines_mixed() { + let mut lines = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop"); + + assert_eq!( + lines.next_back(), + Some(Line::new("bop", TextSize::from(14))) + ); + assert_eq!(lines.next(), Some(Line::new("foo\n", TextSize::from(0)))); + assert_eq!( + lines.next_back(), + Some(Line::new("baz\r", TextSize::from(10))) + ); + assert_eq!(lines.next(), Some(Line::new("bar\n", TextSize::from(4)))); + assert_eq!( + lines.next_back(), + Some(Line::new("\r\n", TextSize::from(8))) + ); + assert_eq!(lines.next(), None); + } +}