mirror of https://github.com/astral-sh/ruff
Box other strings
This commit is contained in:
parent
0a5a4f6d92
commit
56b148bb43
|
|
@ -217,12 +217,12 @@ checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07"
|
|||
|
||||
[[package]]
|
||||
name = "bstr"
|
||||
version = "1.6.2"
|
||||
version = "1.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4c2f7349907b712260e64b0afe2f84692af14a454be26187d9df565c7f69266a"
|
||||
checksum = "c48f0051a4b4c5e0b6d365cd04af53aeaa209e3cc15ec2cdb69e73cc87fbd0dc"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"regex-automata 0.3.9",
|
||||
"regex-automata 0.4.3",
|
||||
"serde",
|
||||
]
|
||||
|
||||
|
|
@ -1921,12 +1921,6 @@ dependencies = [
|
|||
"regex-syntax 0.6.29",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "59b23e92ee4318893fa3fe3e6fb365258efbfe6ac6ab30f090cdcbb7aa37efa9"
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.4.3"
|
||||
|
|
@ -2342,6 +2336,7 @@ version = "0.0.0"
|
|||
dependencies = [
|
||||
"anyhow",
|
||||
"bitflags 2.4.1",
|
||||
"bstr",
|
||||
"insta",
|
||||
"is-macro",
|
||||
"itertools 0.12.1",
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ argfile = { version = "0.1.6" }
|
|||
assert_cmd = { version = "2.0.13" }
|
||||
bincode = { version = "1.3.3" }
|
||||
bitflags = { version = "2.4.1" }
|
||||
bstr = { version = "1.9.0" }
|
||||
cachedir = { version = "0.3.1" }
|
||||
chrono = { version = "0.4.33", default-features = false, features = ["clock"] }
|
||||
clap = { version = "4.4.18", features = ["derive"] }
|
||||
|
|
|
|||
|
|
@ -40,7 +40,9 @@ impl Violation for HardcodedBindAllInterfaces {
|
|||
pub(crate) fn hardcoded_bind_all_interfaces(checker: &mut Checker, string: StringLike) {
|
||||
let is_bind_all_interface = match string {
|
||||
StringLike::StringLiteral(ast::ExprStringLiteral { value, .. }) => value == "0.0.0.0",
|
||||
StringLike::FStringLiteral(ast::FStringLiteralElement { value, .. }) => value == "0.0.0.0",
|
||||
StringLike::FStringLiteral(ast::FStringLiteralElement { value, .. }) => {
|
||||
&**value == "0.0.0.0"
|
||||
}
|
||||
StringLike::BytesLiteral(_) => return,
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ fn to_f_string_expression_element(inner: &Expr) -> ast::FStringElement {
|
|||
/// Convert a string to a [`ast::FStringElement::Literal`].
|
||||
pub(super) fn to_f_string_literal_element(s: &str) -> ast::FStringElement {
|
||||
ast::FStringElement::Literal(ast::FStringLiteralElement {
|
||||
value: s.to_owned(),
|
||||
value: s.to_string().into_boxed_str(),
|
||||
range: TextRange::default(),
|
||||
})
|
||||
}
|
||||
|
|
@ -53,7 +53,7 @@ pub(super) fn to_f_string_element(expr: &Expr) -> Option<ast::FStringElement> {
|
|||
match expr {
|
||||
Expr::StringLiteral(ast::ExprStringLiteral { value, range }) => {
|
||||
Some(ast::FStringElement::Literal(ast::FStringLiteralElement {
|
||||
value: value.to_string(),
|
||||
value: value.to_string().into_boxed_str(),
|
||||
range: *range,
|
||||
}))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -644,7 +644,7 @@ pub struct ComparableBytesLiteral<'a> {
|
|||
impl<'a> From<&'a ast::BytesLiteral> for ComparableBytesLiteral<'a> {
|
||||
fn from(bytes_literal: &'a ast::BytesLiteral) -> Self {
|
||||
Self {
|
||||
value: bytes_literal.value.as_slice(),
|
||||
value: &bytes_literal.value,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -949,7 +949,7 @@ impl Ranged for FStringExpressionElement {
|
|||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct FStringLiteralElement {
|
||||
pub range: TextRange,
|
||||
pub value: String,
|
||||
pub value: Box<str>,
|
||||
}
|
||||
|
||||
impl Ranged for FStringLiteralElement {
|
||||
|
|
@ -962,7 +962,7 @@ impl Deref for FStringLiteralElement {
|
|||
type Target = str;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.value.as_str()
|
||||
&self.value
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1607,7 +1607,7 @@ impl Default for BytesLiteralValueInner {
|
|||
#[derive(Clone, Debug, Default, PartialEq)]
|
||||
pub struct BytesLiteral {
|
||||
pub range: TextRange,
|
||||
pub value: Vec<u8>,
|
||||
pub value: Box<[u8]>,
|
||||
}
|
||||
|
||||
impl Ranged for BytesLiteral {
|
||||
|
|
@ -1620,7 +1620,7 @@ impl Deref for BytesLiteral {
|
|||
type Target = [u8];
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.value.as_slice()
|
||||
&self.value
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -19,14 +19,15 @@ ruff_text_size = { path = "../ruff_text_size" }
|
|||
|
||||
anyhow = { workspace = true }
|
||||
bitflags = { workspace = true }
|
||||
bstr = { workspace = true }
|
||||
is-macro = { workspace = true }
|
||||
itertools = { workspace = true }
|
||||
lalrpop-util = { workspace = true, default-features = false }
|
||||
memchr = { workspace = true }
|
||||
unicode-ident = { workspace = true }
|
||||
unicode_names2 = { workspace = true }
|
||||
rustc-hash = { workspace = true }
|
||||
static_assertions = { workspace = true }
|
||||
unicode-ident = { workspace = true }
|
||||
unicode_names2 = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
insta = { workspace = true }
|
||||
|
|
|
|||
|
|
@ -1,345 +0,0 @@
|
|||
#![allow(
|
||||
clippy::cast_possible_truncation,
|
||||
clippy::cast_possible_wrap,
|
||||
clippy::cast_ptr_alignment,
|
||||
clippy::inline_always,
|
||||
clippy::ptr_as_ptr,
|
||||
unsafe_code
|
||||
)]
|
||||
|
||||
//! Source: <https://github.com/BurntSushi/bstr/blob/d4aeee2eac5d5ef6ec4d2206f6ebffe7b3dd3e1f/src/ascii.rs>
|
||||
|
||||
// The following ~400 lines of code exists for exactly one purpose, which is
|
||||
// to optimize this code:
|
||||
//
|
||||
// byte_slice.iter().position(|&b| b > 0x7F).unwrap_or(byte_slice.len())
|
||||
//
|
||||
// Yes... Overengineered is a word that comes to mind, but this is effectively
|
||||
// a very similar problem to memchr, and virtually nobody has been able to
|
||||
// resist optimizing the crap out of that (except for perhaps the BSD and MUSL
|
||||
// folks). In particular, this routine makes a very common case (ASCII) very
|
||||
// fast, which seems worth it. We do stop short of adding AVX variants of the
|
||||
// code below in order to retain our sanity and also to avoid needing to deal
|
||||
// with runtime target feature detection. RESIST!
|
||||
//
|
||||
// In order to understand the SIMD version below, it would be good to read this
|
||||
// comment describing how my memchr routine works:
|
||||
// https://github.com/BurntSushi/rust-memchr/blob/b0a29f267f4a7fad8ffcc8fe8377a06498202883/src/x86/sse2.rs#L19-L106
|
||||
//
|
||||
// The primary difference with memchr is that for ASCII, we can do a bit less
|
||||
// work. In particular, we don't need to detect the presence of a specific
|
||||
// byte, but rather, whether any byte has its most significant bit set. That
|
||||
// means we can effectively skip the _mm_cmpeq_epi8 step and jump straight to
|
||||
// _mm_movemask_epi8.
|
||||
|
||||
#[cfg(any(test, miri, not(target_arch = "x86_64")))]
|
||||
const USIZE_BYTES: usize = core::mem::size_of::<usize>();
|
||||
#[cfg(any(test, miri, not(target_arch = "x86_64")))]
|
||||
const FALLBACK_LOOP_SIZE: usize = 2 * USIZE_BYTES;
|
||||
|
||||
// This is a mask where the most significant bit of each byte in the usize
|
||||
// is set. We test this bit to determine whether a character is ASCII or not.
|
||||
// Namely, a single byte is regarded as an ASCII codepoint if and only if it's
|
||||
// most significant bit is not set.
|
||||
#[cfg(any(test, miri, not(target_arch = "x86_64")))]
|
||||
const ASCII_MASK_U64: u64 = 0x8080_8080_8080_8080;
|
||||
#[cfg(any(test, miri, not(target_arch = "x86_64")))]
|
||||
const ASCII_MASK: usize = ASCII_MASK_U64 as usize;
|
||||
|
||||
/// Returns the index of the first non ASCII byte in the given slice.
|
||||
///
|
||||
/// If slice only contains ASCII bytes, then the length of the slice is
|
||||
/// returned.
|
||||
pub(crate) fn first_non_ascii_byte(slice: &[u8]) -> usize {
|
||||
#[cfg(any(miri, not(target_arch = "x86_64")))]
|
||||
{
|
||||
first_non_ascii_byte_fallback(slice)
|
||||
}
|
||||
|
||||
#[cfg(all(not(miri), target_arch = "x86_64"))]
|
||||
{
|
||||
first_non_ascii_byte_sse2(slice)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(any(test, miri, not(target_arch = "x86_64")))]
|
||||
fn first_non_ascii_byte_fallback(slice: &[u8]) -> usize {
|
||||
let align = USIZE_BYTES - 1;
|
||||
let start_ptr = slice.as_ptr();
|
||||
let end_ptr = slice[slice.len()..].as_ptr();
|
||||
let mut ptr = start_ptr;
|
||||
|
||||
unsafe {
|
||||
if slice.len() < USIZE_BYTES {
|
||||
return first_non_ascii_byte_slow(start_ptr, end_ptr, ptr);
|
||||
}
|
||||
|
||||
let chunk = read_unaligned_usize(ptr);
|
||||
let mask = chunk & ASCII_MASK;
|
||||
if mask != 0 {
|
||||
return first_non_ascii_byte_mask(mask);
|
||||
}
|
||||
|
||||
ptr = ptr_add(ptr, USIZE_BYTES - (start_ptr as usize & align));
|
||||
debug_assert!(ptr > start_ptr);
|
||||
debug_assert!(ptr_sub(end_ptr, USIZE_BYTES) >= start_ptr);
|
||||
if slice.len() >= FALLBACK_LOOP_SIZE {
|
||||
while ptr <= ptr_sub(end_ptr, FALLBACK_LOOP_SIZE) {
|
||||
debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
|
||||
|
||||
let a = *ptr.cast::<usize>();
|
||||
let b = *ptr_add(ptr, USIZE_BYTES).cast::<usize>();
|
||||
if (a | b) & ASCII_MASK != 0 {
|
||||
// What a kludge. We wrap the position finding code into
|
||||
// a non-inlineable function, which makes the codegen in
|
||||
// the tight loop above a bit better by avoiding a
|
||||
// couple extra movs. We pay for it by two additional
|
||||
// stores, but only in the case of finding a non-ASCII
|
||||
// byte.
|
||||
#[inline(never)]
|
||||
unsafe fn findpos(start_ptr: *const u8, ptr: *const u8) -> usize {
|
||||
let a = *ptr.cast::<usize>();
|
||||
let b = *ptr_add(ptr, USIZE_BYTES).cast::<usize>();
|
||||
|
||||
let mut at = sub(ptr, start_ptr);
|
||||
let maska = a & ASCII_MASK;
|
||||
if maska != 0 {
|
||||
return at + first_non_ascii_byte_mask(maska);
|
||||
}
|
||||
|
||||
at += USIZE_BYTES;
|
||||
let maskb = b & ASCII_MASK;
|
||||
debug_assert!(maskb != 0);
|
||||
at + first_non_ascii_byte_mask(maskb)
|
||||
}
|
||||
return findpos(start_ptr, ptr);
|
||||
}
|
||||
ptr = ptr_add(ptr, FALLBACK_LOOP_SIZE);
|
||||
}
|
||||
}
|
||||
first_non_ascii_byte_slow(start_ptr, end_ptr, ptr)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(all(not(miri), target_arch = "x86_64"))]
|
||||
fn first_non_ascii_byte_sse2(slice: &[u8]) -> usize {
|
||||
use core::arch::x86_64::{
|
||||
__m128i, _mm_load_si128, _mm_loadu_si128, _mm_movemask_epi8, _mm_or_si128,
|
||||
};
|
||||
|
||||
const VECTOR_SIZE: usize = core::mem::size_of::<__m128i>();
|
||||
const VECTOR_ALIGN: usize = VECTOR_SIZE - 1;
|
||||
const VECTOR_LOOP_SIZE: usize = 4 * VECTOR_SIZE;
|
||||
|
||||
let start_ptr = slice.as_ptr();
|
||||
let end_ptr = slice[slice.len()..].as_ptr();
|
||||
let mut ptr = start_ptr;
|
||||
|
||||
unsafe {
|
||||
if slice.len() < VECTOR_SIZE {
|
||||
return first_non_ascii_byte_slow(start_ptr, end_ptr, ptr);
|
||||
}
|
||||
|
||||
let chunk = _mm_loadu_si128(ptr as *const __m128i);
|
||||
let mask = _mm_movemask_epi8(chunk);
|
||||
if mask != 0 {
|
||||
return mask.trailing_zeros() as usize;
|
||||
}
|
||||
|
||||
ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
|
||||
debug_assert!(ptr > start_ptr);
|
||||
debug_assert!(end_ptr.sub(VECTOR_SIZE) >= start_ptr);
|
||||
if slice.len() >= VECTOR_LOOP_SIZE {
|
||||
while ptr <= ptr_sub(end_ptr, VECTOR_LOOP_SIZE) {
|
||||
debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
|
||||
|
||||
let a = _mm_load_si128(ptr as *const __m128i);
|
||||
let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
|
||||
let c = _mm_load_si128(ptr.add(2 * VECTOR_SIZE) as *const __m128i);
|
||||
let d = _mm_load_si128(ptr.add(3 * VECTOR_SIZE) as *const __m128i);
|
||||
|
||||
let or1 = _mm_or_si128(a, b);
|
||||
let or2 = _mm_or_si128(c, d);
|
||||
let or3 = _mm_or_si128(or1, or2);
|
||||
if _mm_movemask_epi8(or3) != 0 {
|
||||
let mut at = sub(ptr, start_ptr);
|
||||
let mask = _mm_movemask_epi8(a);
|
||||
if mask != 0 {
|
||||
return at + mask.trailing_zeros() as usize;
|
||||
}
|
||||
|
||||
at += VECTOR_SIZE;
|
||||
let mask = _mm_movemask_epi8(b);
|
||||
if mask != 0 {
|
||||
return at + mask.trailing_zeros() as usize;
|
||||
}
|
||||
|
||||
at += VECTOR_SIZE;
|
||||
let mask = _mm_movemask_epi8(c);
|
||||
if mask != 0 {
|
||||
return at + mask.trailing_zeros() as usize;
|
||||
}
|
||||
|
||||
at += VECTOR_SIZE;
|
||||
let mask = _mm_movemask_epi8(d);
|
||||
debug_assert!(mask != 0);
|
||||
return at + mask.trailing_zeros() as usize;
|
||||
}
|
||||
ptr = ptr_add(ptr, VECTOR_LOOP_SIZE);
|
||||
}
|
||||
}
|
||||
while ptr <= end_ptr.sub(VECTOR_SIZE) {
|
||||
debug_assert!(sub(end_ptr, ptr) >= VECTOR_SIZE);
|
||||
|
||||
let chunk = _mm_loadu_si128(ptr as *const __m128i);
|
||||
let mask = _mm_movemask_epi8(chunk);
|
||||
if mask != 0 {
|
||||
return sub(ptr, start_ptr) + mask.trailing_zeros() as usize;
|
||||
}
|
||||
ptr = ptr.add(VECTOR_SIZE);
|
||||
}
|
||||
first_non_ascii_byte_slow(start_ptr, end_ptr, ptr)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
unsafe fn first_non_ascii_byte_slow(
|
||||
start_ptr: *const u8,
|
||||
end_ptr: *const u8,
|
||||
mut ptr: *const u8,
|
||||
) -> usize {
|
||||
debug_assert!(start_ptr <= ptr);
|
||||
debug_assert!(ptr <= end_ptr);
|
||||
|
||||
while ptr < end_ptr {
|
||||
if *ptr > 0x7F {
|
||||
return sub(ptr, start_ptr);
|
||||
}
|
||||
ptr = ptr.offset(1);
|
||||
}
|
||||
sub(end_ptr, start_ptr)
|
||||
}
|
||||
|
||||
/// Compute the position of the first ASCII byte in the given mask.
|
||||
///
|
||||
/// The mask should be computed by `chunk & ASCII_MASK`, where `chunk` is
|
||||
/// 8 contiguous bytes of the slice being checked where *at least* one of those
|
||||
/// bytes is not an ASCII byte.
|
||||
///
|
||||
/// The position returned is always in the inclusive range [0, 7].
|
||||
#[cfg(any(test, miri, not(target_arch = "x86_64")))]
|
||||
fn first_non_ascii_byte_mask(mask: usize) -> usize {
|
||||
#[cfg(target_endian = "little")]
|
||||
{
|
||||
mask.trailing_zeros() as usize / 8
|
||||
}
|
||||
#[cfg(target_endian = "big")]
|
||||
{
|
||||
mask.leading_zeros() as usize / 8
|
||||
}
|
||||
}
|
||||
|
||||
/// Increment the given pointer by the given amount.
|
||||
unsafe fn ptr_add(ptr: *const u8, amt: usize) -> *const u8 {
|
||||
debug_assert!(amt < ::core::isize::MAX as usize);
|
||||
ptr.add(amt)
|
||||
}
|
||||
|
||||
/// Decrement the given pointer by the given amount.
|
||||
unsafe fn ptr_sub(ptr: *const u8, amt: usize) -> *const u8 {
|
||||
debug_assert!(amt < ::core::isize::MAX as usize);
|
||||
ptr.offset((amt as isize).wrapping_neg())
|
||||
}
|
||||
|
||||
#[cfg(any(test, miri, not(target_arch = "x86_64")))]
|
||||
unsafe fn read_unaligned_usize(ptr: *const u8) -> usize {
|
||||
use core::ptr;
|
||||
|
||||
let mut n: usize = 0;
|
||||
ptr::copy_nonoverlapping(ptr, std::ptr::addr_of_mut!(n) as *mut u8, USIZE_BYTES);
|
||||
n
|
||||
}
|
||||
|
||||
/// Subtract `b` from `a` and return the difference. `a` should be greater than
|
||||
/// or equal to `b`.
|
||||
fn sub(a: *const u8, b: *const u8) -> usize {
|
||||
debug_assert!(a >= b);
|
||||
(a as usize) - (b as usize)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// Our testing approach here is to try and exhaustively test every case.
|
||||
// This includes the position at which a non-ASCII byte occurs in addition
|
||||
// to the alignment of the slice that we're searching.
|
||||
|
||||
#[test]
|
||||
fn positive_fallback_forward() {
|
||||
for i in 0..517 {
|
||||
let s = "a".repeat(i);
|
||||
assert_eq!(
|
||||
i,
|
||||
first_non_ascii_byte_fallback(s.as_bytes()),
|
||||
"i: {:?}, len: {:?}, s: {:?}",
|
||||
i,
|
||||
s.len(),
|
||||
s
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[cfg(not(miri))]
|
||||
fn positive_sse2_forward() {
|
||||
for i in 0..517 {
|
||||
let b = "a".repeat(i).into_bytes();
|
||||
assert_eq!(b.len(), first_non_ascii_byte_sse2(&b));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(not(miri))]
|
||||
fn negative_fallback_forward() {
|
||||
for i in 0..517 {
|
||||
for align in 0..65 {
|
||||
let mut s = "a".repeat(i);
|
||||
s.push_str("☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃");
|
||||
let s = s.get(align..).unwrap_or("");
|
||||
assert_eq!(
|
||||
i.saturating_sub(align),
|
||||
first_non_ascii_byte_fallback(s.as_bytes()),
|
||||
"i: {:?}, align: {:?}, len: {:?}, s: {:?}",
|
||||
i,
|
||||
align,
|
||||
s.len(),
|
||||
s
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[cfg(not(miri))]
|
||||
fn negative_sse2_forward() {
|
||||
for i in 0..517 {
|
||||
for align in 0..65 {
|
||||
let mut s = "a".repeat(i);
|
||||
s.push_str("☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃");
|
||||
let s = s.get(align..).unwrap_or("");
|
||||
assert_eq!(
|
||||
i.saturating_sub(align),
|
||||
first_non_ascii_byte_sse2(s.as_bytes()),
|
||||
"i: {:?}, align: {:?}, len: {:?}, s: {:?}",
|
||||
i,
|
||||
align,
|
||||
s.len(),
|
||||
s
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -119,11 +119,10 @@ pub use token::{StringKind, Tok, TokenKind};
|
|||
|
||||
use crate::lexer::LexResult;
|
||||
|
||||
mod function;
|
||||
// Skip flattening lexer to distinguish from full ruff_python_parser
|
||||
mod ascii;
|
||||
mod context;
|
||||
mod function;
|
||||
mod invalid;
|
||||
// Skip flattening lexer to distinguish from full ruff_python_parser
|
||||
pub mod lexer;
|
||||
mod parser;
|
||||
mod soft_keywords;
|
||||
|
|
|
|||
|
|
@ -1,9 +1,10 @@
|
|||
//! Parsing of string literals, bytes literals, and implicit string concatenation.
|
||||
|
||||
use bstr::ByteSlice;
|
||||
|
||||
use ruff_python_ast::{self as ast, Expr};
|
||||
use ruff_text_size::{Ranged, TextRange, TextSize};
|
||||
|
||||
use crate::ascii::first_non_ascii_byte;
|
||||
use crate::lexer::{LexicalError, LexicalErrorType};
|
||||
use crate::token::{StringKind, Tok};
|
||||
|
||||
|
|
@ -218,9 +219,9 @@ impl StringParser {
|
|||
|
||||
let mut value = String::with_capacity(self.source.len());
|
||||
loop {
|
||||
// Add the characters before the escape sequence to the string.
|
||||
let before_with_slash = self.skip_bytes(index + 1);
|
||||
let before = &before_with_slash[..before_with_slash.len() - 1];
|
||||
// Add the characters before the escape sequence (or curly brace) to the string.
|
||||
let before_with_slash_or_brace = self.skip_bytes(index + 1);
|
||||
let before = &before_with_slash_or_brace[..before_with_slash_or_brace.len() - 1];
|
||||
value.push_str(before);
|
||||
|
||||
// Add the escaped character to the string.
|
||||
|
|
@ -284,14 +285,13 @@ impl StringParser {
|
|||
}
|
||||
|
||||
Ok(ast::FStringElement::Literal(ast::FStringLiteralElement {
|
||||
value,
|
||||
value: value.into_boxed_str(),
|
||||
range: self.range,
|
||||
}))
|
||||
}
|
||||
|
||||
fn parse_bytes(mut self) -> Result<StringType, LexicalError> {
|
||||
let index = first_non_ascii_byte(self.source.as_bytes());
|
||||
if index < self.source.len() {
|
||||
if let Some(index) = self.source.as_bytes().find_non_ascii_byte() {
|
||||
return Err(LexicalError::new(
|
||||
LexicalErrorType::OtherError(
|
||||
"bytes can only contain ASCII literal characters"
|
||||
|
|
@ -305,7 +305,7 @@ impl StringParser {
|
|||
if self.kind.is_raw() {
|
||||
// For raw strings, no escaping is necessary.
|
||||
return Ok(StringType::Bytes(ast::BytesLiteral {
|
||||
value: self.source.into_bytes(),
|
||||
value: self.source.into_boxed_bytes(),
|
||||
range: self.range,
|
||||
}));
|
||||
}
|
||||
|
|
@ -313,7 +313,7 @@ impl StringParser {
|
|||
let Some(mut escape) = memchr::memchr(b'\\', self.source.as_bytes()) else {
|
||||
// If the string doesn't contain any escape sequences, return the owned string.
|
||||
return Ok(StringType::Bytes(ast::BytesLiteral {
|
||||
value: self.source.into_bytes(),
|
||||
value: self.source.into_boxed_bytes(),
|
||||
range: self.range,
|
||||
}));
|
||||
};
|
||||
|
|
@ -349,7 +349,7 @@ impl StringParser {
|
|||
}
|
||||
|
||||
Ok(StringType::Bytes(ast::BytesLiteral {
|
||||
value,
|
||||
value: value.into_boxed_slice(),
|
||||
range: self.range,
|
||||
}))
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue