From 1c65e0ad255fceeb71ef279a01b6e5d08bdd88dd Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Sun, 27 Apr 2025 11:27:33 +0100 Subject: [PATCH] Split `SourceLocation` into `LineColumn` and `SourceLocation` (#17587) --- crates/red_knot_server/src/document.rs | 10 + crates/red_knot_server/src/document/range.rs | 128 +---- crates/red_knot_test/src/matcher.rs | 2 +- crates/red_knot_wasm/src/lib.rs | 35 +- crates/ruff/src/args.rs | 19 +- crates/ruff_db/src/diagnostic/render.rs | 4 +- .../src/checkers/ast/analyze/definitions.rs | 2 +- crates/ruff_linter/src/locator.rs | 6 +- crates/ruff_linter/src/logging.rs | 22 +- crates/ruff_linter/src/message/azure.rs | 6 +- crates/ruff_linter/src/message/github.rs | 12 +- crates/ruff_linter/src/message/gitlab.rs | 4 +- crates/ruff_linter/src/message/grouped.rs | 12 +- crates/ruff_linter/src/message/json.rs | 62 ++- crates/ruff_linter/src/message/junit.rs | 8 +- crates/ruff_linter/src/message/mod.rs | 14 +- crates/ruff_linter/src/message/pylint.rs | 2 +- crates/ruff_linter/src/message/rdjson.rs | 28 +- crates/ruff_linter/src/message/sarif.rs | 8 +- crates/ruff_linter/src/message/text.rs | 12 +- crates/ruff_notebook/src/index.rs | 25 +- .../ruff_python_formatter/tests/fixtures.rs | 4 +- crates/ruff_server/src/edit.rs | 10 + crates/ruff_server/src/edit/range.rs | 170 ++---- crates/ruff_server/src/server.rs | 3 + crates/ruff_source_file/src/lib.rs | 68 ++- crates/ruff_source_file/src/line_index.rs | 503 +++++++++++++----- crates/ruff_wasm/src/lib.rs | 37 +- crates/ruff_wasm/tests/api.rs | 16 +- 29 files changed, 695 insertions(+), 537 deletions(-) diff --git a/crates/red_knot_server/src/document.rs b/crates/red_knot_server/src/document.rs index 7dca10720f..741a7f7413 100644 --- a/crates/red_knot_server/src/document.rs +++ b/crates/red_knot_server/src/document.rs @@ -27,6 +27,16 @@ pub enum PositionEncoding { UTF8, } +impl From for ruff_source_file::PositionEncoding { + fn from(value: PositionEncoding) -> Self { + match value { + PositionEncoding::UTF8 => Self::Utf8, + PositionEncoding::UTF16 => Self::Utf16, + PositionEncoding::UTF32 => Self::Utf32, + } + } +} + /// A unique document ID, derived from a URL passed as part of an LSP request. /// This document ID can point to either be a standalone Python file, a full notebook, or a cell within a notebook. #[derive(Clone, Debug)] diff --git a/crates/red_knot_server/src/document/range.rs b/crates/red_knot_server/src/document/range.rs index 0e268dcfb1..492f2f86ee 100644 --- a/crates/red_knot_server/src/document/range.rs +++ b/crates/red_knot_server/src/document/range.rs @@ -9,8 +9,8 @@ use red_knot_python_semantic::Db; use ruff_db::files::FileRange; use ruff_db::source::{line_index, source_text}; use ruff_notebook::NotebookIndex; -use ruff_source_file::OneIndexed; -use ruff_source_file::{LineIndex, SourceLocation}; +use ruff_source_file::LineIndex; +use ruff_source_file::{OneIndexed, SourceLocation}; use ruff_text_size::{Ranged, TextRange, TextSize}; #[expect(dead_code)] @@ -46,7 +46,7 @@ impl TextSizeExt for TextSize { index: &LineIndex, encoding: PositionEncoding, ) -> types::Position { - let source_location = offset_to_source_location(self, text, index, encoding); + let source_location = index.source_location(self, text, encoding.into()); source_location_to_position(&source_location) } } @@ -75,36 +75,14 @@ fn u32_index_to_usize(index: u32) -> usize { impl PositionExt for lsp_types::Position { fn to_text_size(&self, text: &str, index: &LineIndex, encoding: PositionEncoding) -> TextSize { - let start_line = index.line_range( - OneIndexed::from_zero_indexed(u32_index_to_usize(self.line)), + index.offset( + SourceLocation { + line: OneIndexed::from_zero_indexed(u32_index_to_usize(self.line)), + character_offset: OneIndexed::from_zero_indexed(u32_index_to_usize(self.character)), + }, text, - ); - - let start_column_offset = match encoding { - PositionEncoding::UTF8 => TextSize::new(self.character), - - PositionEncoding::UTF16 => { - // Fast path for ASCII only documents - if index.is_ascii() { - TextSize::new(self.character) - } else { - // UTF16 encodes characters either as one or two 16 bit words. - // The position in `range` is the 16-bit word offset from the start of the line (and not the character offset) - // UTF-16 with a text that may use variable-length characters. - utf8_column_offset(self.character, &text[start_line]) - } - } - PositionEncoding::UTF32 => { - // UTF-32 uses 4 bytes for each character. Meaning, the position in range is a character offset. - return index.offset( - OneIndexed::from_zero_indexed(u32_index_to_usize(self.line)), - OneIndexed::from_zero_indexed(u32_index_to_usize(self.character)), - text, - ); - } - }; - - start_line.start() + start_column_offset.clamp(TextSize::new(0), start_line.end()) + encoding.into(), + ) } } @@ -142,26 +120,23 @@ impl ToRangeExt for TextRange { notebook_index: &NotebookIndex, encoding: PositionEncoding, ) -> NotebookRange { - let start = offset_to_source_location(self.start(), text, source_index, encoding); - let mut end = offset_to_source_location(self.end(), text, source_index, encoding); - let starting_cell = notebook_index.cell(start.row); + let start = source_index.source_location(self.start(), text, encoding.into()); + let mut end = source_index.source_location(self.end(), text, encoding.into()); + let starting_cell = notebook_index.cell(start.line); // weird edge case here - if the end of the range is where the newline after the cell got added (making it 'out of bounds') // we need to move it one character back (which should place it at the end of the last line). // we test this by checking if the ending offset is in a different (or nonexistent) cell compared to the cell of the starting offset. - if notebook_index.cell(end.row) != starting_cell { - end.row = end.row.saturating_sub(1); - end.column = offset_to_source_location( - self.end().checked_sub(1.into()).unwrap_or_default(), - text, - source_index, - encoding, - ) - .column; + if notebook_index.cell(end.line) != starting_cell { + end.line = end.line.saturating_sub(1); + let offset = self.end().checked_sub(1.into()).unwrap_or_default(); + end.character_offset = source_index + .source_location(offset, text, encoding.into()) + .character_offset; } - let start = source_location_to_position(¬ebook_index.translate_location(&start)); - let end = source_location_to_position(¬ebook_index.translate_location(&end)); + let start = source_location_to_position(¬ebook_index.translate_source_location(&start)); + let end = source_location_to_position(¬ebook_index.translate_source_location(&end)); NotebookRange { cell: starting_cell @@ -172,67 +147,10 @@ impl ToRangeExt for TextRange { } } -/// Converts a UTF-16 code unit offset for a given line into a UTF-8 column number. -fn utf8_column_offset(utf16_code_unit_offset: u32, line: &str) -> TextSize { - let mut utf8_code_unit_offset = TextSize::new(0); - - let mut i = 0u32; - - for c in line.chars() { - if i >= utf16_code_unit_offset { - break; - } - - // Count characters encoded as two 16 bit words as 2 characters. - { - utf8_code_unit_offset += - TextSize::new(u32::try_from(c.len_utf8()).expect("utf8 len always <=4")); - i += u32::try_from(c.len_utf16()).expect("utf16 len always <=2"); - } - } - - utf8_code_unit_offset -} - -fn offset_to_source_location( - offset: TextSize, - text: &str, - index: &LineIndex, - encoding: PositionEncoding, -) -> SourceLocation { - match encoding { - PositionEncoding::UTF8 => { - let row = index.line_index(offset); - let column = offset - index.line_start(row, text); - - SourceLocation { - column: OneIndexed::from_zero_indexed(column.to_usize()), - row, - } - } - PositionEncoding::UTF16 => { - let row = index.line_index(offset); - - let column = if index.is_ascii() { - (offset - index.line_start(row, text)).to_usize() - } else { - let up_to_line = &text[TextRange::new(index.line_start(row, text), offset)]; - up_to_line.encode_utf16().count() - }; - - SourceLocation { - column: OneIndexed::from_zero_indexed(column), - row, - } - } - PositionEncoding::UTF32 => index.source_location(offset, text), - } -} - fn source_location_to_position(location: &SourceLocation) -> types::Position { types::Position { - line: u32::try_from(location.row.to_zero_indexed()).expect("row usize fits in u32"), - character: u32::try_from(location.column.to_zero_indexed()) + line: u32::try_from(location.line.to_zero_indexed()).expect("line usize fits in u32"), + character: u32::try_from(location.character_offset.to_zero_indexed()) .expect("character usize fits in u32"), } } diff --git a/crates/red_knot_test/src/matcher.rs b/crates/red_knot_test/src/matcher.rs index 63ae8c44af..4be6658b00 100644 --- a/crates/red_knot_test/src/matcher.rs +++ b/crates/red_knot_test/src/matcher.rs @@ -263,7 +263,7 @@ impl Matcher { .and_then(|span| span.range()) .map(|range| { self.line_index - .source_location(range.start(), &self.source) + .line_column(range.start(), &self.source) .column }) .unwrap_or(OneIndexed::from_zero_indexed(0)) diff --git a/crates/red_knot_wasm/src/lib.rs b/crates/red_knot_wasm/src/lib.rs index ed96760409..5f9333d028 100644 --- a/crates/red_knot_wasm/src/lib.rs +++ b/crates/red_knot_wasm/src/lib.rs @@ -19,7 +19,7 @@ use ruff_db::system::{ use ruff_db::Upcast; use ruff_notebook::Notebook; use ruff_python_formatter::formatted_file; -use ruff_source_file::{LineIndex, OneIndexed, SourceLocation}; +use ruff_source_file::{LineColumn, LineIndex, OneIndexed, PositionEncoding, SourceLocation}; use ruff_text_size::{Ranged, TextSize}; use wasm_bindgen::prelude::*; @@ -408,8 +408,8 @@ impl Range { } } -impl From<(SourceLocation, SourceLocation)> for Range { - fn from((start, end): (SourceLocation, SourceLocation)) -> Self { +impl From<(LineColumn, LineColumn)> for Range { + fn from((start, end): (LineColumn, LineColumn)) -> Self { Self { start: start.into(), end: end.into(), @@ -438,29 +438,34 @@ impl Position { impl Position { fn to_text_size(self, text: &str, index: &LineIndex) -> Result { let text_size = index.offset( - OneIndexed::new(self.line).ok_or_else(|| { - Error::new("Invalid value `0` for `position.line`. The line index is 1-indexed.") - })?, - OneIndexed::new(self.column).ok_or_else(|| { - Error::new( - "Invalid value `0` for `position.column`. The column index is 1-indexed.", - ) - })?, + SourceLocation { + line: OneIndexed::new(self.line).ok_or_else(|| { + Error::new( + "Invalid value `0` for `position.line`. The line index is 1-indexed.", + ) + })?, + character_offset: OneIndexed::new(self.column).ok_or_else(|| { + Error::new( + "Invalid value `0` for `position.column`. The column index is 1-indexed.", + ) + })?, + }, text, + PositionEncoding::Utf32, ); Ok(text_size) } fn from_text_size(offset: TextSize, line_index: &LineIndex, source: &str) -> Self { - line_index.source_location(offset, source).into() + line_index.line_column(offset, source).into() } } -impl From for Position { - fn from(location: SourceLocation) -> Self { +impl From for Position { + fn from(location: LineColumn) -> Self { Self { - line: location.row.get(), + line: location.line.get(), column: location.column.get(), } } diff --git a/crates/ruff/src/args.rs b/crates/ruff/src/args.rs index fa4655de57..e28508b381 100644 --- a/crates/ruff/src/args.rs +++ b/crates/ruff/src/args.rs @@ -5,6 +5,7 @@ use std::path::{Path, PathBuf}; use std::str::FromStr; use std::sync::Arc; +use crate::commands::completions::config::{OptionString, OptionStringParser}; use anyhow::bail; use clap::builder::{TypedValueParser, ValueParserFactory}; use clap::{command, Parser, Subcommand}; @@ -22,7 +23,7 @@ use ruff_linter::settings::types::{ }; use ruff_linter::{RuleParser, RuleSelector, RuleSelectorParser}; use ruff_python_ast as ast; -use ruff_source_file::{LineIndex, OneIndexed}; +use ruff_source_file::{LineIndex, OneIndexed, PositionEncoding}; use ruff_text_size::TextRange; use ruff_workspace::configuration::{Configuration, RuleSelection}; use ruff_workspace::options::{Options, PycodestyleOptions}; @@ -31,8 +32,6 @@ use ruff_workspace::resolver::ConfigurationTransformer; use rustc_hash::FxHashMap; use toml; -use crate::commands::completions::config::{OptionString, OptionStringParser}; - /// All configuration options that can be passed "globally", /// i.e., can be passed to all subcommands #[derive(Debug, Default, Clone, clap::Args)] @@ -1070,8 +1069,9 @@ impl FormatRange { /// /// Returns an empty range if the start range is past the end of `source`. pub(super) fn to_text_range(self, source: &str, line_index: &LineIndex) -> TextRange { - let start_byte_offset = line_index.offset(self.start.line, self.start.column, source); - let end_byte_offset = line_index.offset(self.end.line, self.end.column, source); + let start_byte_offset = + line_index.offset(self.start.into(), source, PositionEncoding::Utf32); + let end_byte_offset = line_index.offset(self.end.into(), source, PositionEncoding::Utf32); TextRange::new(start_byte_offset, end_byte_offset) } @@ -1142,6 +1142,15 @@ pub struct LineColumn { pub column: OneIndexed, } +impl From for ruff_source_file::SourceLocation { + fn from(value: LineColumn) -> Self { + Self { + line: value.line, + character_offset: value.column, + } + } +} + impl std::fmt::Display for LineColumn { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!(f, "{line}:{column}", line = self.line, column = self.column) diff --git a/crates/ruff_db/src/diagnostic/render.rs b/crates/ruff_db/src/diagnostic/render.rs index ece51ae381..f69980d068 100644 --- a/crates/ruff_db/src/diagnostic/render.rs +++ b/crates/ruff_db/src/diagnostic/render.rs @@ -71,8 +71,8 @@ impl std::fmt::Display for DisplayDiagnostic<'_> { write!(f, " {path}", path = self.resolver.path(span.file()))?; if let Some(range) = span.range() { let input = self.resolver.input(span.file()); - let start = input.as_source_code().source_location(range.start()); - write!(f, ":{line}:{col}", line = start.row, col = start.column)?; + let start = input.as_source_code().line_column(range.start()); + write!(f, ":{line}:{col}", line = start.line, col = start.column)?; } write!(f, ":")?; } diff --git a/crates/ruff_linter/src/checkers/ast/analyze/definitions.rs b/crates/ruff_linter/src/checkers/ast/analyze/definitions.rs index f0a4cd2f0a..392094ce56 100644 --- a/crates/ruff_linter/src/checkers/ast/analyze/definitions.rs +++ b/crates/ruff_linter/src/checkers/ast/analyze/definitions.rs @@ -191,7 +191,7 @@ pub(crate) fn definitions(checker: &mut Checker) { warn_user!( "Docstring at {}:{}:{} contains implicit string concatenation; ignoring...", relativize_path(checker.path), - location.row, + location.line, location.column ); continue; diff --git a/crates/ruff_linter/src/locator.rs b/crates/ruff_linter/src/locator.rs index 5aeaced1b3..afc212e6a6 100644 --- a/crates/ruff_linter/src/locator.rs +++ b/crates/ruff_linter/src/locator.rs @@ -2,7 +2,7 @@ use std::cell::OnceCell; -use ruff_source_file::{LineIndex, LineRanges, OneIndexed, SourceCode, SourceLocation}; +use ruff_source_file::{LineColumn, LineIndex, LineRanges, OneIndexed, SourceCode}; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; #[derive(Debug)] @@ -36,8 +36,8 @@ impl<'a> Locator<'a> { #[deprecated( note = "This is expensive, avoid using outside of the diagnostic phase. Prefer the other `Locator` methods instead." )] - pub fn compute_source_location(&self, offset: TextSize) -> SourceLocation { - self.to_source_code().source_location(offset) + pub fn compute_source_location(&self, offset: TextSize) -> LineColumn { + self.to_source_code().line_column(offset) } pub fn to_index(&self) -> &LineIndex { diff --git a/crates/ruff_linter/src/logging.rs b/crates/ruff_linter/src/logging.rs index 8e07e0e84f..554c4e2c84 100644 --- a/crates/ruff_linter/src/logging.rs +++ b/crates/ruff_linter/src/logging.rs @@ -9,7 +9,7 @@ use log::Level; use ruff_python_parser::{ParseError, ParseErrorType}; use rustc_hash::FxHashSet; -use ruff_source_file::{LineIndex, OneIndexed, SourceCode, SourceLocation}; +use ruff_source_file::{LineColumn, LineIndex, OneIndexed, SourceCode}; use crate::fs; use crate::source_kind::SourceKind; @@ -195,21 +195,21 @@ impl DisplayParseError { // Translate the byte offset to a location in the originating source. let location = if let Some(jupyter_index) = source_kind.as_ipy_notebook().map(Notebook::index) { - let source_location = source_code.source_location(error.location.start()); + let source_location = source_code.line_column(error.location.start()); ErrorLocation::Cell( jupyter_index - .cell(source_location.row) + .cell(source_location.line) .unwrap_or(OneIndexed::MIN), - SourceLocation { - row: jupyter_index - .cell_row(source_location.row) + LineColumn { + line: jupyter_index + .cell_row(source_location.line) .unwrap_or(OneIndexed::MIN), column: source_location.column, }, ) } else { - ErrorLocation::File(source_code.source_location(error.location.start())) + ErrorLocation::File(source_code.line_column(error.location.start())) }; Self { @@ -245,7 +245,7 @@ impl Display for DisplayParseError { write!( f, "{row}{colon}{column}{colon} {inner}", - row = location.row, + row = location.line, column = location.column, colon = ":".cyan(), inner = &DisplayParseErrorType(&self.error.error) @@ -256,7 +256,7 @@ impl Display for DisplayParseError { f, "{cell}{colon}{row}{colon}{column}{colon} {inner}", cell = cell, - row = location.row, + row = location.line, column = location.column, colon = ":".cyan(), inner = &DisplayParseErrorType(&self.error.error) @@ -283,9 +283,9 @@ impl Display for DisplayParseErrorType<'_> { #[derive(Debug)] enum ErrorLocation { /// The error occurred in a Python file. - File(SourceLocation), + File(LineColumn), /// The error occurred in a Jupyter cell. - Cell(OneIndexed, SourceLocation), + Cell(OneIndexed, LineColumn), } /// Truncates the display text before the first newline character to avoid line breaks. diff --git a/crates/ruff_linter/src/message/azure.rs b/crates/ruff_linter/src/message/azure.rs index c7d6049eac..727a427f9b 100644 --- a/crates/ruff_linter/src/message/azure.rs +++ b/crates/ruff_linter/src/message/azure.rs @@ -1,6 +1,6 @@ use std::io::Write; -use ruff_source_file::SourceLocation; +use ruff_source_file::LineColumn; use crate::message::{Emitter, EmitterContext, Message}; @@ -20,7 +20,7 @@ impl Emitter for AzureEmitter { let location = if context.is_notebook(message.filename()) { // We can't give a reasonable location for the structured formats, // so we show one that's clearly a fallback - SourceLocation::default() + LineColumn::default() } else { message.compute_start_location() }; @@ -30,7 +30,7 @@ impl Emitter for AzureEmitter { "##vso[task.logissue type=error\ ;sourcepath={filename};linenumber={line};columnnumber={col};{code}]{body}", filename = message.filename(), - line = location.row, + line = location.line, col = location.column, code = message .rule() diff --git a/crates/ruff_linter/src/message/github.rs b/crates/ruff_linter/src/message/github.rs index 9fd0a5ee6b..a038b17704 100644 --- a/crates/ruff_linter/src/message/github.rs +++ b/crates/ruff_linter/src/message/github.rs @@ -1,6 +1,6 @@ use std::io::Write; -use ruff_source_file::SourceLocation; +use ruff_source_file::LineColumn; use crate::fs::relativize_path; use crate::message::{Emitter, EmitterContext, Message}; @@ -22,9 +22,9 @@ impl Emitter for GithubEmitter { let location = if context.is_notebook(message.filename()) { // We can't give a reasonable location for the structured formats, // so we show one that's clearly a fallback - SourceLocation::default() + LineColumn::default() } else { - source_location.clone() + source_location }; let end_location = message.compute_end_location(); @@ -34,9 +34,9 @@ impl Emitter for GithubEmitter { "::error title=Ruff{code},file={file},line={row},col={column},endLine={end_row},endColumn={end_column}::", code = message.rule().map_or_else(String::new, |rule| format!(" ({})", rule.noqa_code())), file = message.filename(), - row = source_location.row, + row = source_location.line, column = source_location.column, - end_row = end_location.row, + end_row = end_location.line, end_column = end_location.column, )?; @@ -44,7 +44,7 @@ impl Emitter for GithubEmitter { writer, "{path}:{row}:{column}:", path = relativize_path(message.filename()), - row = location.row, + row = location.line, column = location.column, )?; diff --git a/crates/ruff_linter/src/message/gitlab.rs b/crates/ruff_linter/src/message/gitlab.rs index 6433184ac5..6d03a7c803 100644 --- a/crates/ruff_linter/src/message/gitlab.rs +++ b/crates/ruff_linter/src/message/gitlab.rs @@ -71,8 +71,8 @@ impl Serialize for SerializedMessages<'_> { }) } else { json!({ - "begin": start_location.row, - "end": end_location.row + "begin": start_location.line, + "end": end_location.line }) }; diff --git a/crates/ruff_linter/src/message/grouped.rs b/crates/ruff_linter/src/message/grouped.rs index 1dfa5d15e6..b60830c961 100644 --- a/crates/ruff_linter/src/message/grouped.rs +++ b/crates/ruff_linter/src/message/grouped.rs @@ -57,7 +57,7 @@ impl Emitter for GroupedEmitter { let mut max_column_length = OneIndexed::MIN; for message in &messages { - max_row_length = max_row_length.max(message.start_location.row); + max_row_length = max_row_length.max(message.start_location.line); max_column_length = max_column_length.max(message.start_location.column); } @@ -115,8 +115,8 @@ impl Display for DisplayGroupedMessage<'_> { write!( f, " {row_padding}", - row_padding = - " ".repeat(self.row_length.get() - calculate_print_width(start_location.row).get()) + row_padding = " " + .repeat(self.row_length.get() - calculate_print_width(start_location.line).get()) )?; // Check if we're working on a jupyter notebook and translate positions with cell accordingly @@ -125,18 +125,18 @@ impl Display for DisplayGroupedMessage<'_> { f, "cell {cell}{sep}", cell = jupyter_index - .cell(start_location.row) + .cell(start_location.line) .unwrap_or(OneIndexed::MIN), sep = ":".cyan() )?; ( jupyter_index - .cell_row(start_location.row) + .cell_row(start_location.line) .unwrap_or(OneIndexed::MIN), start_location.column, ) } else { - (start_location.row, start_location.column) + (start_location.line, start_location.column) }; writeln!( diff --git a/crates/ruff_linter/src/message/json.rs b/crates/ruff_linter/src/message/json.rs index eaa968c167..b8d91c7db3 100644 --- a/crates/ruff_linter/src/message/json.rs +++ b/crates/ruff_linter/src/message/json.rs @@ -6,7 +6,7 @@ use serde_json::{json, Value}; use ruff_diagnostics::Edit; use ruff_notebook::NotebookIndex; -use ruff_source_file::{OneIndexed, SourceCode, SourceLocation}; +use ruff_source_file::{LineColumn, OneIndexed, SourceCode}; use ruff_text_size::Ranged; use crate::message::{Emitter, EmitterContext, Message}; @@ -60,22 +60,23 @@ pub(crate) fn message_to_json_value(message: &Message, context: &EmitterContext) }) }); - let mut start_location = source_code.source_location(message.start()); - let mut end_location = source_code.source_location(message.end()); + let mut start_location = source_code.line_column(message.start()); + let mut end_location = source_code.line_column(message.end()); let mut noqa_location = message .noqa_offset() - .map(|offset| source_code.source_location(offset)); + .map(|offset| source_code.line_column(offset)); let mut notebook_cell_index = None; if let Some(notebook_index) = notebook_index { notebook_cell_index = Some( notebook_index - .cell(start_location.row) + .cell(start_location.line) .unwrap_or(OneIndexed::MIN), ); - start_location = notebook_index.translate_location(&start_location); - end_location = notebook_index.translate_location(&end_location); - noqa_location = noqa_location.map(|location| notebook_index.translate_location(&location)); + start_location = notebook_index.translate_line_column(&start_location); + end_location = notebook_index.translate_line_column(&end_location); + noqa_location = + noqa_location.map(|location| notebook_index.translate_line_column(&location)); } json!({ @@ -84,10 +85,17 @@ pub(crate) fn message_to_json_value(message: &Message, context: &EmitterContext) "message": message.body(), "fix": fix, "cell": notebook_cell_index, - "location": start_location, - "end_location": end_location, + "location": location_to_json(start_location), + "end_location": location_to_json(end_location), "filename": message.filename(), - "noqa_row": noqa_location.map(|location| location.row) + "noqa_row": noqa_location.map(|location| location.line) + }) +} + +fn location_to_json(location: LineColumn) -> serde_json::Value { + json!({ + "row": location.line, + "column": location.column }) } @@ -105,8 +113,8 @@ impl Serialize for ExpandedEdits<'_> { let mut s = serializer.serialize_seq(Some(self.edits.len()))?; for edit in self.edits { - let mut location = self.source_code.source_location(edit.start()); - let mut end_location = self.source_code.source_location(edit.end()); + let mut location = self.source_code.line_column(edit.start()); + let mut end_location = self.source_code.line_column(edit.end()); if let Some(notebook_index) = self.notebook_index { // There exists a newline between each cell's source code in the @@ -118,44 +126,44 @@ impl Serialize for ExpandedEdits<'_> { // If it does, we need to translate the end location to the last // character of the previous cell. match ( - notebook_index.cell(location.row), - notebook_index.cell(end_location.row), + notebook_index.cell(location.line), + notebook_index.cell(end_location.line), ) { (Some(start_cell), Some(end_cell)) if start_cell != end_cell => { debug_assert_eq!(end_location.column.get(), 1); - let prev_row = end_location.row.saturating_sub(1); - end_location = SourceLocation { - row: notebook_index.cell_row(prev_row).unwrap_or(OneIndexed::MIN), + let prev_row = end_location.line.saturating_sub(1); + end_location = LineColumn { + line: notebook_index.cell_row(prev_row).unwrap_or(OneIndexed::MIN), column: self .source_code - .source_location(self.source_code.line_end_exclusive(prev_row)) + .line_column(self.source_code.line_end_exclusive(prev_row)) .column, }; } (Some(_), None) => { debug_assert_eq!(end_location.column.get(), 1); - let prev_row = end_location.row.saturating_sub(1); - end_location = SourceLocation { - row: notebook_index.cell_row(prev_row).unwrap_or(OneIndexed::MIN), + let prev_row = end_location.line.saturating_sub(1); + end_location = LineColumn { + line: notebook_index.cell_row(prev_row).unwrap_or(OneIndexed::MIN), column: self .source_code - .source_location(self.source_code.line_end_exclusive(prev_row)) + .line_column(self.source_code.line_end_exclusive(prev_row)) .column, }; } _ => { - end_location = notebook_index.translate_location(&end_location); + end_location = notebook_index.translate_line_column(&end_location); } } - location = notebook_index.translate_location(&location); + location = notebook_index.translate_line_column(&location); } let value = json!({ "content": edit.content().unwrap_or_default(), - "location": location, - "end_location": end_location + "location": location_to_json(location), + "end_location": location_to_json(end_location) }); s.serialize_element(&value)?; diff --git a/crates/ruff_linter/src/message/junit.rs b/crates/ruff_linter/src/message/junit.rs index 7c7e341809..38edfdfca8 100644 --- a/crates/ruff_linter/src/message/junit.rs +++ b/crates/ruff_linter/src/message/junit.rs @@ -3,7 +3,7 @@ use std::path::Path; use quick_junit::{NonSuccessKind, Report, TestCase, TestCaseStatus, TestSuite, XmlString}; -use ruff_source_file::SourceLocation; +use ruff_source_file::LineColumn; use crate::message::{ group_messages_by_filename, Emitter, EmitterContext, Message, MessageWithLocation, @@ -47,14 +47,14 @@ impl Emitter for JunitEmitter { let location = if context.is_notebook(message.filename()) { // We can't give a reasonable location for the structured formats, // so we show one that's clearly a fallback - SourceLocation::default() + LineColumn::default() } else { start_location }; status.set_description(format!( "line {row}, col {col}, {body}", - row = location.row, + row = location.line, col = location.column, body = message.body() )); @@ -72,7 +72,7 @@ impl Emitter for JunitEmitter { case.set_classname(classname.to_str().unwrap()); case.extra.insert( XmlString::new("line"), - XmlString::new(location.row.to_string()), + XmlString::new(location.line.to_string()), ); case.extra.insert( XmlString::new("column"), diff --git a/crates/ruff_linter/src/message/mod.rs b/crates/ruff_linter/src/message/mod.rs index 7f16eeac54..a3146ed93d 100644 --- a/crates/ruff_linter/src/message/mod.rs +++ b/crates/ruff_linter/src/message/mod.rs @@ -18,7 +18,7 @@ pub use rdjson::RdjsonEmitter; use ruff_diagnostics::{Diagnostic, DiagnosticKind, Fix}; use ruff_notebook::NotebookIndex; use ruff_python_parser::{ParseError, UnsupportedSyntaxError}; -use ruff_source_file::{SourceFile, SourceLocation}; +use ruff_source_file::{LineColumn, SourceFile}; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; pub use sarif::SarifEmitter; pub use text::TextEmitter; @@ -239,17 +239,15 @@ impl Message { } /// Computes the start source location for the message. - pub fn compute_start_location(&self) -> SourceLocation { + pub fn compute_start_location(&self) -> LineColumn { self.source_file() .to_source_code() - .source_location(self.start()) + .line_column(self.start()) } /// Computes the end source location for the message. - pub fn compute_end_location(&self) -> SourceLocation { - self.source_file() - .to_source_code() - .source_location(self.end()) + pub fn compute_end_location(&self) -> LineColumn { + self.source_file().to_source_code().line_column(self.end()) } /// Returns the [`SourceFile`] which the message belongs to. @@ -284,7 +282,7 @@ impl Ranged for Message { struct MessageWithLocation<'a> { message: &'a Message, - start_location: SourceLocation, + start_location: LineColumn, } impl Deref for MessageWithLocation<'_> { diff --git a/crates/ruff_linter/src/message/pylint.rs b/crates/ruff_linter/src/message/pylint.rs index 10b1f81f10..91d79326a7 100644 --- a/crates/ruff_linter/src/message/pylint.rs +++ b/crates/ruff_linter/src/message/pylint.rs @@ -23,7 +23,7 @@ impl Emitter for PylintEmitter { // so we show one that's clearly a fallback OneIndexed::from_zero_indexed(0) } else { - message.compute_start_location().row + message.compute_start_location().line }; let body = if let Some(rule) = message.rule() { diff --git a/crates/ruff_linter/src/message/rdjson.rs b/crates/ruff_linter/src/message/rdjson.rs index 99b3fc481e..26c3e1d091 100644 --- a/crates/ruff_linter/src/message/rdjson.rs +++ b/crates/ruff_linter/src/message/rdjson.rs @@ -8,7 +8,7 @@ use ruff_diagnostics::Edit; use ruff_source_file::SourceCode; use ruff_text_size::Ranged; -use crate::message::{Emitter, EmitterContext, Message, SourceLocation}; +use crate::message::{Emitter, EmitterContext, LineColumn, Message}; #[derive(Default)] pub struct RdjsonEmitter; @@ -59,15 +59,15 @@ impl Serialize for ExpandedMessages<'_> { fn message_to_rdjson_value(message: &Message) -> Value { let source_code = message.source_file().to_source_code(); - let start_location = source_code.source_location(message.start()); - let end_location = source_code.source_location(message.end()); + let start_location = source_code.line_column(message.start()); + let end_location = source_code.line_column(message.end()); if let Some(fix) = message.fix() { json!({ "message": message.body(), "location": { "path": message.filename(), - "range": rdjson_range(&start_location, &end_location), + "range": rdjson_range(start_location, end_location), }, "code": { "value": message.rule().map(|rule| rule.noqa_code().to_string()), @@ -80,7 +80,7 @@ fn message_to_rdjson_value(message: &Message) -> Value { "message": message.body(), "location": { "path": message.filename(), - "range": rdjson_range(&start_location, &end_location), + "range": rdjson_range(start_location, end_location), }, "code": { "value": message.rule().map(|rule| rule.noqa_code().to_string()), @@ -95,11 +95,11 @@ fn rdjson_suggestions(edits: &[Edit], source_code: &SourceCode) -> Value { edits .iter() .map(|edit| { - let location = source_code.source_location(edit.start()); - let end_location = source_code.source_location(edit.end()); + let location = source_code.line_column(edit.start()); + let end_location = source_code.line_column(edit.end()); json!({ - "range": rdjson_range(&location, &end_location), + "range": rdjson_range(location, end_location), "text": edit.content().unwrap_or_default(), }) }) @@ -107,16 +107,10 @@ fn rdjson_suggestions(edits: &[Edit], source_code: &SourceCode) -> Value { ) } -fn rdjson_range(start: &SourceLocation, end: &SourceLocation) -> Value { +fn rdjson_range(start: LineColumn, end: LineColumn) -> Value { json!({ - "start": { - "line": start.row, - "column": start.column, - }, - "end": { - "line": end.row, - "column": end.column, - }, + "start": start, + "end": end, }) } diff --git a/crates/ruff_linter/src/message/sarif.rs b/crates/ruff_linter/src/message/sarif.rs index a04bb44148..b74e293fd4 100644 --- a/crates/ruff_linter/src/message/sarif.rs +++ b/crates/ruff_linter/src/message/sarif.rs @@ -129,9 +129,9 @@ impl SarifResult { uri: url::Url::from_file_path(&path) .map_err(|()| anyhow::anyhow!("Failed to convert path to URL: {}", path.display()))? .to_string(), - start_line: start_location.row, + start_line: start_location.line, start_column: start_location.column, - end_line: end_location.row, + end_line: end_location.line, end_column: end_location.column, }) } @@ -147,9 +147,9 @@ impl SarifResult { level: "error".to_string(), message: message.body().to_string(), uri: path.display().to_string(), - start_line: start_location.row, + start_line: start_location.line, start_column: start_location.column, - end_line: end_location.row, + end_line: end_location.line, end_column: end_location.column, }) } diff --git a/crates/ruff_linter/src/message/text.rs b/crates/ruff_linter/src/message/text.rs index 914f12a075..f1ede8c621 100644 --- a/crates/ruff_linter/src/message/text.rs +++ b/crates/ruff_linter/src/message/text.rs @@ -7,7 +7,7 @@ use colored::Colorize; use ruff_annotate_snippets::{Level, Renderer, Snippet}; use ruff_notebook::NotebookIndex; -use ruff_source_file::{OneIndexed, SourceLocation}; +use ruff_source_file::{LineColumn, OneIndexed}; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; use crate::fs::relativize_path; @@ -86,14 +86,14 @@ impl Emitter for TextEmitter { writer, "cell {cell}{sep}", cell = notebook_index - .cell(start_location.row) + .cell(start_location.line) .unwrap_or(OneIndexed::MIN), sep = ":".cyan(), )?; - SourceLocation { - row: notebook_index - .cell_row(start_location.row) + LineColumn { + line: notebook_index + .cell_row(start_location.line) .unwrap_or(OneIndexed::MIN), column: start_location.column, } @@ -104,7 +104,7 @@ impl Emitter for TextEmitter { writeln!( writer, "{row}{sep}{col}{sep} {code_and_body}", - row = diagnostic_location.row, + row = diagnostic_location.line, col = diagnostic_location.column, sep = ":".cyan(), code_and_body = RuleCodeAndBody { diff --git a/crates/ruff_notebook/src/index.rs b/crates/ruff_notebook/src/index.rs index 9912b94e62..35e4e07fcb 100644 --- a/crates/ruff_notebook/src/index.rs +++ b/crates/ruff_notebook/src/index.rs @@ -1,6 +1,6 @@ use serde::{Deserialize, Serialize}; -use ruff_source_file::{OneIndexed, SourceLocation}; +use ruff_source_file::{LineColumn, OneIndexed, SourceLocation}; /// Jupyter Notebook indexing table /// @@ -33,16 +33,29 @@ impl NotebookIndex { self.row_to_row_in_cell.get(row.to_zero_indexed()).copied() } - /// Translates the given source location based on the indexing table. + /// Translates the given [`LineColumn`] based on the indexing table. /// /// This will translate the row/column in the concatenated source code /// to the row/column in the Jupyter Notebook. - pub fn translate_location(&self, source_location: &SourceLocation) -> SourceLocation { - SourceLocation { - row: self - .cell_row(source_location.row) + pub fn translate_line_column(&self, source_location: &LineColumn) -> LineColumn { + LineColumn { + line: self + .cell_row(source_location.line) .unwrap_or(OneIndexed::MIN), column: source_location.column, } } + + /// Translates the given [`SourceLocation`] based on the indexing table. + /// + /// This will translate the line/character in the concatenated source code + /// to the line/character in the Jupyter Notebook. + pub fn translate_source_location(&self, source_location: &SourceLocation) -> SourceLocation { + SourceLocation { + line: self + .cell_row(source_location.line) + .unwrap_or(OneIndexed::MIN), + character_offset: source_location.character_offset, + } + } } diff --git a/crates/ruff_python_formatter/tests/fixtures.rs b/crates/ruff_python_formatter/tests/fixtures.rs index 7e83d969cb..c2489d2d75 100644 --- a/crates/ruff_python_formatter/tests/fixtures.rs +++ b/crates/ruff_python_formatter/tests/fixtures.rs @@ -430,10 +430,10 @@ fn ensure_unchanged_ast( formatted_unsupported_syntax_errors .into_values() .map(|error| { - let location = index.source_location(error.start(), formatted_code); + let location = index.line_column(error.start(), formatted_code); format!( "{row}:{col} {error}", - row = location.row, + row = location.line, col = location.column ) }) diff --git a/crates/ruff_server/src/edit.rs b/crates/ruff_server/src/edit.rs index 3a7ffb4e3e..d0dfb91ae3 100644 --- a/crates/ruff_server/src/edit.rs +++ b/crates/ruff_server/src/edit.rs @@ -31,6 +31,16 @@ pub enum PositionEncoding { UTF8, } +impl From for ruff_source_file::PositionEncoding { + fn from(value: PositionEncoding) -> Self { + match value { + PositionEncoding::UTF8 => Self::Utf8, + PositionEncoding::UTF16 => Self::Utf16, + PositionEncoding::UTF32 => Self::Utf32, + } + } +} + /// A unique document ID, derived from a URL passed as part of an LSP request. /// This document ID can point to either be a standalone Python file, a full notebook, or a cell within a notebook. #[derive(Clone, Debug)] diff --git a/crates/ruff_server/src/edit/range.rs b/crates/ruff_server/src/edit/range.rs index 9ccef9e67d..bde1e454da 100644 --- a/crates/ruff_server/src/edit/range.rs +++ b/crates/ruff_server/src/edit/range.rs @@ -2,9 +2,9 @@ use super::notebook; use super::PositionEncoding; use lsp_types as types; use ruff_notebook::NotebookIndex; -use ruff_source_file::OneIndexed; -use ruff_source_file::{LineIndex, SourceLocation}; -use ruff_text_size::{TextRange, TextSize}; +use ruff_source_file::LineIndex; +use ruff_source_file::{OneIndexed, SourceLocation}; +use ruff_text_size::TextRange; pub(crate) struct NotebookRange { pub(crate) cell: notebook::CellId, @@ -38,76 +38,43 @@ impl RangeExt for lsp_types::Range { index: &LineIndex, encoding: PositionEncoding, ) -> TextRange { - let start_line = index.line_range( - OneIndexed::from_zero_indexed(u32_index_to_usize(self.start.line)), + let start = index.offset( + SourceLocation { + line: OneIndexed::from_zero_indexed(u32_index_to_usize(self.start.line)), + character_offset: OneIndexed::from_zero_indexed(u32_index_to_usize( + self.start.character, + )), + }, text, + encoding.into(), ); - let end_line = index.line_range( - OneIndexed::from_zero_indexed(u32_index_to_usize(self.end.line)), + let end = index.offset( + SourceLocation { + line: OneIndexed::from_zero_indexed(u32_index_to_usize(self.end.line)), + character_offset: OneIndexed::from_zero_indexed(u32_index_to_usize( + self.end.character, + )), + }, text, + encoding.into(), ); - let (start_column_offset, end_column_offset) = match encoding { - PositionEncoding::UTF8 => ( - TextSize::new(self.start.character), - TextSize::new(self.end.character), - ), - - PositionEncoding::UTF16 => { - // Fast path for ASCII only documents - if index.is_ascii() { - ( - TextSize::new(self.start.character), - TextSize::new(self.end.character), - ) - } else { - // UTF16 encodes characters either as one or two 16 bit words. - // The position in `range` is the 16-bit word offset from the start of the line (and not the character offset) - // UTF-16 with a text that may use variable-length characters. - ( - utf8_column_offset(self.start.character, &text[start_line]), - utf8_column_offset(self.end.character, &text[end_line]), - ) - } - } - PositionEncoding::UTF32 => { - // UTF-32 uses 4 bytes for each character. Meaning, the position in range is a character offset. - return TextRange::new( - index.offset( - OneIndexed::from_zero_indexed(u32_index_to_usize(self.start.line)), - OneIndexed::from_zero_indexed(u32_index_to_usize(self.start.character)), - text, - ), - index.offset( - OneIndexed::from_zero_indexed(u32_index_to_usize(self.end.line)), - OneIndexed::from_zero_indexed(u32_index_to_usize(self.end.character)), - text, - ), - ); - } - }; - - TextRange::new( - start_line.start() + start_column_offset.clamp(TextSize::new(0), start_line.end()), - end_line.start() + end_column_offset.clamp(TextSize::new(0), end_line.end()), - ) + TextRange::new(start, end) } } impl ToRangeExt for TextRange { fn to_range(&self, text: &str, index: &LineIndex, encoding: PositionEncoding) -> types::Range { types::Range { - start: source_location_to_position(&offset_to_source_location( + start: source_location_to_position(&index.source_location( self.start(), text, - index, - encoding, + encoding.into(), )), - end: source_location_to_position(&offset_to_source_location( + end: source_location_to_position(&index.source_location( self.end(), text, - index, - encoding, + encoding.into(), )), } } @@ -119,26 +86,26 @@ impl ToRangeExt for TextRange { notebook_index: &NotebookIndex, encoding: PositionEncoding, ) -> NotebookRange { - let start = offset_to_source_location(self.start(), text, source_index, encoding); - let mut end = offset_to_source_location(self.end(), text, source_index, encoding); - let starting_cell = notebook_index.cell(start.row); + let start = source_index.source_location(self.start(), text, encoding.into()); + let mut end = source_index.source_location(self.end(), text, encoding.into()); + let starting_cell = notebook_index.cell(start.line); // weird edge case here - if the end of the range is where the newline after the cell got added (making it 'out of bounds') // we need to move it one character back (which should place it at the end of the last line). // we test this by checking if the ending offset is in a different (or nonexistent) cell compared to the cell of the starting offset. - if notebook_index.cell(end.row) != starting_cell { - end.row = end.row.saturating_sub(1); - end.column = offset_to_source_location( - self.end().checked_sub(1.into()).unwrap_or_default(), - text, - source_index, - encoding, - ) - .column; + if notebook_index.cell(end.line) != starting_cell { + end.line = end.line.saturating_sub(1); + end.character_offset = source_index + .source_location( + self.end().checked_sub(1.into()).unwrap_or_default(), + text, + encoding.into(), + ) + .character_offset; } - let start = source_location_to_position(¬ebook_index.translate_location(&start)); - let end = source_location_to_position(¬ebook_index.translate_location(&end)); + let start = source_location_to_position(¬ebook_index.translate_source_location(&start)); + let end = source_location_to_position(¬ebook_index.translate_source_location(&end)); NotebookRange { cell: starting_cell @@ -149,67 +116,10 @@ impl ToRangeExt for TextRange { } } -/// Converts a UTF-16 code unit offset for a given line into a UTF-8 column number. -fn utf8_column_offset(utf16_code_unit_offset: u32, line: &str) -> TextSize { - let mut utf8_code_unit_offset = TextSize::new(0); - - let mut i = 0u32; - - for c in line.chars() { - if i >= utf16_code_unit_offset { - break; - } - - // Count characters encoded as two 16 bit words as 2 characters. - { - utf8_code_unit_offset += - TextSize::new(u32::try_from(c.len_utf8()).expect("utf8 len always <=4")); - i += u32::try_from(c.len_utf16()).expect("utf16 len always <=2"); - } - } - - utf8_code_unit_offset -} - -fn offset_to_source_location( - offset: TextSize, - text: &str, - index: &LineIndex, - encoding: PositionEncoding, -) -> SourceLocation { - match encoding { - PositionEncoding::UTF8 => { - let row = index.line_index(offset); - let column = offset - index.line_start(row, text); - - SourceLocation { - column: OneIndexed::from_zero_indexed(column.to_usize()), - row, - } - } - PositionEncoding::UTF16 => { - let row = index.line_index(offset); - - let column = if index.is_ascii() { - (offset - index.line_start(row, text)).to_usize() - } else { - let up_to_line = &text[TextRange::new(index.line_start(row, text), offset)]; - up_to_line.encode_utf16().count() - }; - - SourceLocation { - column: OneIndexed::from_zero_indexed(column), - row, - } - } - PositionEncoding::UTF32 => index.source_location(offset, text), - } -} - fn source_location_to_position(location: &SourceLocation) -> types::Position { types::Position { - line: u32::try_from(location.row.to_zero_indexed()).expect("row usize fits in u32"), - character: u32::try_from(location.column.to_zero_indexed()) + line: u32::try_from(location.line.to_zero_indexed()).expect("row usize fits in u32"), + character: u32::try_from(location.character_offset.to_zero_indexed()) .expect("character usize fits in u32"), } } diff --git a/crates/ruff_server/src/server.rs b/crates/ruff_server/src/server.rs index 88c0273fb6..b515147ca2 100644 --- a/crates/ruff_server/src/server.rs +++ b/crates/ruff_server/src/server.rs @@ -59,6 +59,7 @@ impl Server { let client_capabilities = init_params.capabilities; let position_encoding = Self::find_best_position_encoding(&client_capabilities); + let server_capabilities = Self::server_capabilities(position_encoding); let connection = connection.initialize_finish( @@ -98,6 +99,8 @@ impl Server { workspace_settings.unwrap_or_default(), )?; + tracing::debug!("Negotiated position encoding: {position_encoding:?}"); + Ok(Self { connection, worker_threads, diff --git a/crates/ruff_source_file/src/lib.rs b/crates/ruff_source_file/src/lib.rs index 5bf43e3a1d..51bd8852a1 100644 --- a/crates/ruff_source_file/src/lib.rs +++ b/crates/ruff_source_file/src/lib.rs @@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize}; use ruff_text_size::{Ranged, TextRange, TextSize}; -pub use crate::line_index::{LineIndex, OneIndexed}; +pub use crate::line_index::{LineIndex, OneIndexed, PositionEncoding}; pub use crate::line_ranges::LineRanges; pub use crate::newlines::{ find_newline, Line, LineEnding, NewlineWithTrailingNewline, UniversalNewlineIterator, @@ -18,7 +18,7 @@ mod line_index; mod line_ranges; mod newlines; -/// Gives access to the source code of a file and allows mapping between [`TextSize`] and [`SourceLocation`]. +/// Gives access to the source code of a file and allows mapping between [`TextSize`] and [`LineColumn`]. #[derive(Debug)] pub struct SourceCode<'src, 'index> { text: &'src str, @@ -33,10 +33,20 @@ impl<'src, 'index> SourceCode<'src, 'index> { } } - /// Computes the one indexed row and column numbers for `offset`. + /// Computes the one indexed line and column numbers for `offset`, skipping any potential BOM. #[inline] - pub fn source_location(&self, offset: TextSize) -> SourceLocation { - self.index.source_location(offset, self.text) + pub fn line_column(&self, offset: TextSize) -> LineColumn { + self.index.line_column(offset, self.text) + } + + #[inline] + pub fn source_location( + &self, + offset: TextSize, + position_encoding: PositionEncoding, + ) -> SourceLocation { + self.index + .source_location(offset, self.text, position_encoding) } #[inline] @@ -229,34 +239,62 @@ impl PartialEq for SourceFileInner { impl Eq for SourceFileInner {} -#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +/// The line and column of an offset in a source file. +/// +/// See [`LineIndex::line_column`] for more information. +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -pub struct SourceLocation { - pub row: OneIndexed, +pub struct LineColumn { + /// The line in the source text. + pub line: OneIndexed, + /// The column (UTF scalar values) relative to the start of the line except any + /// potential BOM on the first line. pub column: OneIndexed, } -impl Default for SourceLocation { +impl Default for LineColumn { fn default() -> Self { Self { - row: OneIndexed::MIN, + line: OneIndexed::MIN, column: OneIndexed::MIN, } } } -impl Debug for SourceLocation { +impl Debug for LineColumn { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_struct("SourceLocation") - .field("row", &self.row.get()) + f.debug_struct("LineColumn") + .field("line", &self.line.get()) .field("column", &self.column.get()) .finish() } } -impl std::fmt::Display for SourceLocation { +impl std::fmt::Display for LineColumn { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "{row}:{column}", row = self.row, column = self.column) + write!(f, "{line}:{column}", line = self.line, column = self.column) + } +} + +/// A position into a source file represented by the line number and the offset to that character relative to the start of that line. +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct SourceLocation { + /// The line in the source text. + pub line: OneIndexed, + /// The offset from the start of the line to the character. + /// + /// This can be a byte offset, the number of UTF16 code points, or the UTF8 code units, depending on the + /// [`PositionEncoding`] used. + pub character_offset: OneIndexed, +} + +impl Default for SourceLocation { + fn default() -> Self { + Self { + line: OneIndexed::MIN, + character_offset: OneIndexed::MIN, + } } } diff --git a/crates/ruff_source_file/src/line_index.rs b/crates/ruff_source_file/src/line_index.rs index e9d211ca5c..893baa7ef4 100644 --- a/crates/ruff_source_file/src/line_index.rs +++ b/crates/ruff_source_file/src/line_index.rs @@ -5,13 +5,12 @@ use std::ops::Deref; use std::str::FromStr; use std::sync::Arc; +use crate::{LineColumn, SourceLocation}; use ruff_text_size::{TextLen, TextRange, TextSize}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; -use crate::SourceLocation; - -/// Index for fast [byte offset](TextSize) to [`SourceLocation`] conversions. +/// Index for fast [byte offset](TextSize) to [`LineColumn`] conversions. /// /// Cloning a [`LineIndex`] is cheap because it only requires bumping a reference count. #[derive(Clone, Eq, PartialEq)] @@ -66,60 +65,146 @@ impl LineIndex { self.inner.kind } - /// Returns the row and column index for an offset. + /// Returns the line and column number for an UTF-8 byte offset. + /// + /// The `column` number is the nth-character of the line, except for the first line + /// where it doesn't include the UTF-8 BOM marker at the start of the file. + /// + /// ### BOM handling + /// + /// For files starting with a UTF-8 BOM marker, the byte offsets + /// in the range `0...3` are all mapped to line 0 and column 0. + /// Because of this, the conversion isn't losless. /// /// ## Examples /// /// ``` /// # use ruff_text_size::TextSize; - /// # use ruff_source_file::{LineIndex, OneIndexed, SourceLocation}; - /// let source = "def a():\n pass"; - /// let index = LineIndex::from_source_text(source); + /// # use ruff_source_file::{LineIndex, OneIndexed, LineColumn}; + /// let source = format!("\u{FEFF}{}", "def a():\n pass"); + /// let index = LineIndex::from_source_text(&source); /// + /// // Before BOM, maps to after BOM /// assert_eq!( - /// index.source_location(TextSize::from(0), source), - /// SourceLocation { row: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(0) } + /// index.line_column(TextSize::from(0), &source), + /// LineColumn { line: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(0) } + /// ); + /// + /// // After BOM, maps to after BOM + /// assert_eq!( + /// index.line_column(TextSize::from(3), &source), + /// LineColumn { line: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(0) } /// ); /// /// assert_eq!( - /// index.source_location(TextSize::from(4), source), - /// SourceLocation { row: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(4) } + /// index.line_column(TextSize::from(7), &source), + /// LineColumn { line: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(4) } /// ); /// assert_eq!( - /// index.source_location(TextSize::from(13), source), - /// SourceLocation { row: OneIndexed::from_zero_indexed(1), column: OneIndexed::from_zero_indexed(4) } + /// index.line_column(TextSize::from(16), &source), + /// LineColumn { line: OneIndexed::from_zero_indexed(1), column: OneIndexed::from_zero_indexed(4) } /// ); /// ``` /// /// ## Panics /// - /// If the offset is out of bounds. - pub fn source_location(&self, offset: TextSize, content: &str) -> SourceLocation { - match self.line_starts().binary_search(&offset) { - // Offset is at the start of a line - Ok(row) => SourceLocation { - row: OneIndexed::from_zero_indexed(row), - column: OneIndexed::from_zero_indexed(0), - }, - Err(next_row) => { - // SAFETY: Safe because the index always contains an entry for the offset 0 - let row = next_row - 1; - let mut line_start = self.line_starts()[row]; + /// If the byte offset isn't within the bounds of `content`. + pub fn line_column(&self, offset: TextSize, content: &str) -> LineColumn { + let location = self.source_location(offset, content, PositionEncoding::Utf32); - let column = if self.kind().is_ascii() { - usize::from(offset) - usize::from(line_start) - } else { - // Don't count the BOM character as a column. - if line_start == TextSize::from(0) && content.starts_with('\u{feff}') { - line_start = '\u{feff}'.text_len(); - } + // Don't count the BOM character as a column, but only on the first line. + let column = if location.line.to_zero_indexed() == 0 && content.starts_with('\u{feff}') { + location.character_offset.saturating_sub(1) + } else { + location.character_offset + }; - content[TextRange::new(line_start, offset)].chars().count() - }; + LineColumn { + line: location.line, + column, + } + } + + /// Given a UTF-8 byte offset, returns the line and character offset according to the given encoding. + /// + /// ### BOM handling + /// + /// Unlike [`Self::line_column`], this method does not skip the BOM character at the start of the file. + /// This allows for bidirectional mapping between [`SourceLocation`] and [`TextSize`] (see [`Self::offset`]). + /// + /// ## Examples + /// + /// ``` + /// # use ruff_text_size::TextSize; + /// # use ruff_source_file::{LineIndex, OneIndexed, LineColumn, SourceLocation, PositionEncoding, Line}; + /// let source = format!("\u{FEFF}{}", "def a():\n pass"); + /// let index = LineIndex::from_source_text(&source); + /// + /// // Before BOM, maps to character 0 + /// assert_eq!( + /// index.source_location(TextSize::from(0), &source, PositionEncoding::Utf32), + /// SourceLocation { line: OneIndexed::from_zero_indexed(0), character_offset: OneIndexed::from_zero_indexed(0) } + /// ); + /// + /// // After BOM, maps to after BOM + /// assert_eq!( + /// index.source_location(TextSize::from(3), &source, PositionEncoding::Utf32), + /// SourceLocation { line: OneIndexed::from_zero_indexed(0), character_offset: OneIndexed::from_zero_indexed(1) } + /// ); + /// + /// assert_eq!( + /// index.source_location(TextSize::from(7), &source, PositionEncoding::Utf32), + /// SourceLocation { line: OneIndexed::from_zero_indexed(0), character_offset: OneIndexed::from_zero_indexed(5) } + /// ); + /// assert_eq!( + /// index.source_location(TextSize::from(16), &source, PositionEncoding::Utf32), + /// SourceLocation { line: OneIndexed::from_zero_indexed(1), character_offset: OneIndexed::from_zero_indexed(4) } + /// ); + /// ``` + /// + /// ## Panics + /// + /// If the UTF-8 byte offset is out of bounds of `text`. + pub fn source_location( + &self, + offset: TextSize, + text: &str, + encoding: PositionEncoding, + ) -> SourceLocation { + let line = self.line_index(offset); + let line_start = self.line_start(line, text); + + if self.is_ascii() { + return SourceLocation { + line, + character_offset: OneIndexed::from_zero_indexed((offset - line_start).to_usize()), + }; + } + + match encoding { + PositionEncoding::Utf8 => { + let character_offset = offset - line_start; + SourceLocation { + line, + character_offset: OneIndexed::from_zero_indexed(character_offset.to_usize()), + } + } + PositionEncoding::Utf16 => { + let up_to_character = &text[TextRange::new(line_start, offset)]; + let character = up_to_character.encode_utf16().count(); SourceLocation { - row: OneIndexed::from_zero_indexed(row), - column: OneIndexed::from_zero_indexed(column), + line, + character_offset: OneIndexed::from_zero_indexed(character), + } + } + PositionEncoding::Utf32 => { + let up_to_character = &text[TextRange::new(line_start, offset)]; + let character = up_to_character.chars().count(); + + SourceLocation { + line, + character_offset: OneIndexed::from_zero_indexed(character), } } } @@ -141,7 +226,7 @@ impl LineIndex { /// /// ``` /// # use ruff_text_size::TextSize; - /// # use ruff_source_file::{LineIndex, OneIndexed, SourceLocation}; + /// # use ruff_source_file::{LineIndex, OneIndexed, LineColumn}; /// let source = "def a():\n pass"; /// let index = LineIndex::from_source_text(source); /// @@ -221,83 +306,211 @@ impl LineIndex { } } - /// Returns the [byte offset](TextSize) at `line` and `column`. + /// Returns the [UTF-8 byte offset](TextSize) at `line` and `character` where character is counted using the given encoding. /// /// ## Examples /// - /// ### ASCII + /// ### ASCII only source text /// /// ``` - /// use ruff_source_file::{LineIndex, OneIndexed}; - /// use ruff_text_size::TextSize; + /// # use ruff_source_file::{SourceLocation, LineIndex, OneIndexed, PositionEncoding}; + /// # use ruff_text_size::TextSize; /// let source = r#"a = 4 /// c = "some string" /// x = b"#; /// /// let index = LineIndex::from_source_text(source); /// - /// // First line, first column - /// assert_eq!(index.offset(OneIndexed::from_zero_indexed(0), OneIndexed::from_zero_indexed(0), source), TextSize::new(0)); + /// // First line, first character + /// assert_eq!( + /// index.offset( + /// SourceLocation { + /// line: OneIndexed::from_zero_indexed(0), + /// character_offset: OneIndexed::from_zero_indexed(0) + /// }, + /// source, + /// PositionEncoding::Utf32, + /// ), + /// TextSize::new(0) + /// ); /// - /// // Second line, 4th column - /// assert_eq!(index.offset(OneIndexed::from_zero_indexed(1), OneIndexed::from_zero_indexed(4), source), TextSize::new(10)); + /// assert_eq!( + /// index.offset( + /// SourceLocation { + /// line: OneIndexed::from_zero_indexed(1), + /// character_offset: OneIndexed::from_zero_indexed(4) + /// }, + /// source, + /// PositionEncoding::Utf32, + /// ), + /// TextSize::new(10) + /// ); /// /// // Offset past the end of the first line - /// assert_eq!(index.offset(OneIndexed::from_zero_indexed(0), OneIndexed::from_zero_indexed(10), source), TextSize::new(6)); + /// assert_eq!( + /// index.offset( + /// SourceLocation { + /// line: OneIndexed::from_zero_indexed(0), + /// character_offset: OneIndexed::from_zero_indexed(10) + /// }, + /// source, + /// PositionEncoding::Utf32, + /// ), + /// TextSize::new(6) + /// ); /// /// // Offset past the end of the file - /// assert_eq!(index.offset(OneIndexed::from_zero_indexed(3), OneIndexed::from_zero_indexed(0), source), TextSize::new(29)); + /// assert_eq!( + /// index.offset( + /// SourceLocation { + /// line: OneIndexed::from_zero_indexed(3), + /// character_offset: OneIndexed::from_zero_indexed(0) + /// }, + /// source, + /// PositionEncoding::Utf32, + /// ), + /// TextSize::new(29) + /// ); /// ``` /// - /// ### UTF8 + /// ### Non-ASCII source text /// /// ``` - /// use ruff_source_file::{LineIndex, OneIndexed}; + /// use ruff_source_file::{LineIndex, OneIndexed, SourceLocation, PositionEncoding}; /// use ruff_text_size::TextSize; - /// let source = r#"a = 4 + /// let source = format!("\u{FEFF}{}", r#"a = 4 /// c = "❤️" - /// x = b"#; + /// x = b"#); /// - /// let index = LineIndex::from_source_text(source); + /// let index = LineIndex::from_source_text(&source); /// - /// // First line, first column - /// assert_eq!(index.offset(OneIndexed::from_zero_indexed(0), OneIndexed::from_zero_indexed(0), source), TextSize::new(0)); + /// // First line, first character, points at the BOM + /// assert_eq!( + /// index.offset( + /// SourceLocation { + /// line: OneIndexed::from_zero_indexed(0), + /// character_offset: OneIndexed::from_zero_indexed(0) + /// }, + /// &source, + /// PositionEncoding::Utf32, + /// ), + /// TextSize::new(0) + /// ); + /// + /// // First line, after the BOM + /// assert_eq!( + /// index.offset( + /// SourceLocation { + /// line: OneIndexed::from_zero_indexed(0), + /// character_offset: OneIndexed::from_zero_indexed(1) + /// }, + /// &source, + /// PositionEncoding::Utf32, + /// ), + /// TextSize::new(3) + /// ); + /// + /// // second line, 7th character, after emoji, UTF32 + /// assert_eq!( + /// index.offset( + /// SourceLocation { + /// line: OneIndexed::from_zero_indexed(1), + /// character_offset: OneIndexed::from_zero_indexed(7) + /// }, + /// &source, + /// PositionEncoding::Utf32, + /// ), + /// TextSize::new(20) + /// ); + /// + /// // Second line, 7th character, after emoji, UTF 16 + /// assert_eq!( + /// index.offset( + /// SourceLocation { + /// line: OneIndexed::from_zero_indexed(1), + /// character_offset: OneIndexed::from_zero_indexed(7) + /// }, + /// &source, + /// PositionEncoding::Utf16, + /// ), + /// TextSize::new(20) + /// ); /// - /// // Third line, 2nd column, after emoji - /// assert_eq!(index.offset(OneIndexed::from_zero_indexed(2), OneIndexed::from_zero_indexed(1), source), TextSize::new(20)); /// /// // Offset past the end of the second line - /// assert_eq!(index.offset(OneIndexed::from_zero_indexed(1), OneIndexed::from_zero_indexed(10), source), TextSize::new(19)); + /// assert_eq!( + /// index.offset( + /// SourceLocation { + /// line: OneIndexed::from_zero_indexed(1), + /// character_offset: OneIndexed::from_zero_indexed(10) + /// }, + /// &source, + /// PositionEncoding::Utf32, + /// ), + /// TextSize::new(22) + /// ); /// /// // Offset past the end of the file - /// assert_eq!(index.offset(OneIndexed::from_zero_indexed(3), OneIndexed::from_zero_indexed(0), source), TextSize::new(24)); + /// assert_eq!( + /// index.offset( + /// SourceLocation { + /// line: OneIndexed::from_zero_indexed(3), + /// character_offset: OneIndexed::from_zero_indexed(0) + /// }, + /// &source, + /// PositionEncoding::Utf32, + /// ), + /// TextSize::new(27) + /// ); /// ``` - /// - pub fn offset(&self, line: OneIndexed, column: OneIndexed, contents: &str) -> TextSize { + pub fn offset( + &self, + position: SourceLocation, + text: &str, + position_encoding: PositionEncoding, + ) -> TextSize { // If start-of-line position after last line - if line.to_zero_indexed() > self.line_starts().len() { - return contents.text_len(); + if position.line.to_zero_indexed() > self.line_starts().len() { + return text.text_len(); } - let line_range = self.line_range(line, contents); + let line_range = self.line_range(position.line, text); - match self.kind() { - IndexKind::Ascii => { - line_range.start() - + TextSize::try_from(column.to_zero_indexed()) - .unwrap_or(line_range.len()) - .clamp(TextSize::new(0), line_range.len()) - } - IndexKind::Utf8 => { - let rest = &contents[line_range]; - let column_offset: TextSize = rest + let character_offset = position.character_offset.to_zero_indexed(); + let character_byte_offset = if self.is_ascii() { + TextSize::try_from(character_offset).unwrap() + } else { + let line = &text[line_range]; + + match position_encoding { + PositionEncoding::Utf8 => { + TextSize::try_from(position.character_offset.to_zero_indexed()).unwrap() + } + PositionEncoding::Utf16 => { + let mut byte_offset = TextSize::new(0); + let mut utf16_code_unit_offset = 0; + + for c in line.chars() { + if utf16_code_unit_offset >= character_offset { + break; + } + + // Count characters encoded as two 16 bit words as 2 characters. + byte_offset += c.text_len(); + utf16_code_unit_offset += c.len_utf16(); + } + + byte_offset + } + PositionEncoding::Utf32 => line .chars() - .take(column.to_zero_indexed()) + .take(position.character_offset.to_zero_indexed()) .map(ruff_text_size::TextLen::text_len) - .sum(); - line_range.start() + column_offset + .sum(), } - } + }; + + line_range.start() + character_byte_offset.clamp(TextSize::new(0), line_range.len()) } /// Returns the [byte offsets](TextSize) for every line @@ -430,12 +643,26 @@ impl FromStr for OneIndexed { } } +#[derive(Default, Copy, Clone, Debug)] +pub enum PositionEncoding { + /// Character offsets count the number of bytes from the start of the line. + #[default] + Utf8, + + /// Character offsets count the number of UTF-16 code units from the start of the line. + Utf16, + + /// Character offsets count the number of UTF-32 code points units (the same as number of characters in Rust) + /// from the start of the line. + Utf32, +} + #[cfg(test)] mod tests { use ruff_text_size::TextSize; use crate::line_index::LineIndex; - use crate::{OneIndexed, SourceLocation}; + use crate::{LineColumn, OneIndexed}; #[test] fn ascii_index() { @@ -466,30 +693,30 @@ mod tests { let index = LineIndex::from_source_text(contents); // First row. - let loc = index.source_location(TextSize::from(2), contents); + let loc = index.line_column(TextSize::from(2), contents); assert_eq!( loc, - SourceLocation { - row: OneIndexed::from_zero_indexed(0), + LineColumn { + line: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(2) } ); // Second row. - let loc = index.source_location(TextSize::from(6), contents); + let loc = index.line_column(TextSize::from(6), contents); assert_eq!( loc, - SourceLocation { - row: OneIndexed::from_zero_indexed(1), + LineColumn { + line: OneIndexed::from_zero_indexed(1), column: OneIndexed::from_zero_indexed(0) } ); - let loc = index.source_location(TextSize::from(11), contents); + let loc = index.line_column(TextSize::from(11), contents); assert_eq!( loc, - SourceLocation { - row: OneIndexed::from_zero_indexed(1), + LineColumn { + line: OneIndexed::from_zero_indexed(1), column: OneIndexed::from_zero_indexed(5) } ); @@ -502,23 +729,23 @@ mod tests { assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(6)]); assert_eq!( - index.source_location(TextSize::from(4), contents), - SourceLocation { - row: OneIndexed::from_zero_indexed(0), + index.line_column(TextSize::from(4), contents), + LineColumn { + line: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(4) } ); assert_eq!( - index.source_location(TextSize::from(6), contents), - SourceLocation { - row: OneIndexed::from_zero_indexed(1), + index.line_column(TextSize::from(6), contents), + LineColumn { + line: OneIndexed::from_zero_indexed(1), column: OneIndexed::from_zero_indexed(0) } ); assert_eq!( - index.source_location(TextSize::from(7), contents), - SourceLocation { - row: OneIndexed::from_zero_indexed(1), + index.line_column(TextSize::from(7), contents), + LineColumn { + line: OneIndexed::from_zero_indexed(1), column: OneIndexed::from_zero_indexed(1) } ); @@ -531,23 +758,23 @@ mod tests { assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(7)]); assert_eq!( - index.source_location(TextSize::from(4), contents), - SourceLocation { - row: OneIndexed::from_zero_indexed(0), + index.line_column(TextSize::from(4), contents), + LineColumn { + line: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(4) } ); assert_eq!( - index.source_location(TextSize::from(7), contents), - SourceLocation { - row: OneIndexed::from_zero_indexed(1), + index.line_column(TextSize::from(7), contents), + LineColumn { + line: OneIndexed::from_zero_indexed(1), column: OneIndexed::from_zero_indexed(0) } ); assert_eq!( - index.source_location(TextSize::from(8), contents), - SourceLocation { - row: OneIndexed::from_zero_indexed(1), + index.line_column(TextSize::from(8), contents), + LineColumn { + line: OneIndexed::from_zero_indexed(1), column: OneIndexed::from_zero_indexed(1) } ); @@ -598,23 +825,23 @@ mod tests { // Second ' assert_eq!( - index.source_location(TextSize::from(9), contents), - SourceLocation { - row: OneIndexed::from_zero_indexed(0), + index.line_column(TextSize::from(9), contents), + LineColumn { + line: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(6) } ); assert_eq!( - index.source_location(TextSize::from(11), contents), - SourceLocation { - row: OneIndexed::from_zero_indexed(1), + index.line_column(TextSize::from(11), contents), + LineColumn { + line: OneIndexed::from_zero_indexed(1), column: OneIndexed::from_zero_indexed(0) } ); assert_eq!( - index.source_location(TextSize::from(12), contents), - SourceLocation { - row: OneIndexed::from_zero_indexed(1), + index.line_column(TextSize::from(12), contents), + LineColumn { + line: OneIndexed::from_zero_indexed(1), column: OneIndexed::from_zero_indexed(1) } ); @@ -632,23 +859,23 @@ mod tests { // Second ' assert_eq!( - index.source_location(TextSize::from(9), contents), - SourceLocation { - row: OneIndexed::from_zero_indexed(0), + index.line_column(TextSize::from(9), contents), + LineColumn { + line: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(6) } ); assert_eq!( - index.source_location(TextSize::from(12), contents), - SourceLocation { - row: OneIndexed::from_zero_indexed(1), + index.line_column(TextSize::from(12), contents), + LineColumn { + line: OneIndexed::from_zero_indexed(1), column: OneIndexed::from_zero_indexed(0) } ); assert_eq!( - index.source_location(TextSize::from(13), contents), - SourceLocation { - row: OneIndexed::from_zero_indexed(1), + index.line_column(TextSize::from(13), contents), + LineColumn { + line: OneIndexed::from_zero_indexed(1), column: OneIndexed::from_zero_indexed(1) } ); @@ -664,49 +891,49 @@ mod tests { ); // First row. - let loc = index.source_location(TextSize::from(0), contents); + let loc = index.line_column(TextSize::from(0), contents); assert_eq!( loc, - SourceLocation { - row: OneIndexed::from_zero_indexed(0), + LineColumn { + line: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(0) } ); - let loc = index.source_location(TextSize::from(5), contents); + let loc = index.line_column(TextSize::from(5), contents); assert_eq!( loc, - SourceLocation { - row: OneIndexed::from_zero_indexed(0), + LineColumn { + line: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(5) } ); - let loc = index.source_location(TextSize::from(8), contents); + let loc = index.line_column(TextSize::from(8), contents); assert_eq!( loc, - SourceLocation { - row: OneIndexed::from_zero_indexed(0), + LineColumn { + line: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(6) } ); // Second row. - let loc = index.source_location(TextSize::from(10), contents); + let loc = index.line_column(TextSize::from(10), contents); assert_eq!( loc, - SourceLocation { - row: OneIndexed::from_zero_indexed(1), + LineColumn { + line: OneIndexed::from_zero_indexed(1), column: OneIndexed::from_zero_indexed(0) } ); // One-past-the-end. - let loc = index.source_location(TextSize::from(15), contents); + let loc = index.line_column(TextSize::from(15), contents); assert_eq!( loc, - SourceLocation { - row: OneIndexed::from_zero_indexed(1), + LineColumn { + line: OneIndexed::from_zero_indexed(1), column: OneIndexed::from_zero_indexed(5) } ); diff --git a/crates/ruff_wasm/src/lib.rs b/crates/ruff_wasm/src/lib.rs index 06b0b6973c..db007349bc 100644 --- a/crates/ruff_wasm/src/lib.rs +++ b/crates/ruff_wasm/src/lib.rs @@ -21,7 +21,7 @@ use ruff_python_formatter::{format_module_ast, pretty_comments, PyFormatContext, use ruff_python_index::Indexer; use ruff_python_parser::{parse, parse_unchecked, Mode, ParseOptions, Parsed}; use ruff_python_trivia::CommentRanges; -use ruff_source_file::SourceLocation; +use ruff_source_file::{LineColumn, OneIndexed}; use ruff_text_size::Ranged; use ruff_workspace::configuration::Configuration; use ruff_workspace::options::{FormatOptions, LintCommonOptions, LintOptions, Options}; @@ -61,8 +61,8 @@ export interface Diagnostic { pub struct ExpandedMessage { pub code: Option, pub message: String, - pub start_location: SourceLocation, - pub end_location: SourceLocation, + pub start_location: Location, + pub end_location: Location, pub fix: Option, } @@ -74,8 +74,8 @@ pub struct ExpandedFix { #[derive(Serialize, Deserialize, Eq, PartialEq, Debug)] struct ExpandedEdit { - location: SourceLocation, - end_location: SourceLocation, + location: Location, + end_location: Location, content: Option, } @@ -214,16 +214,16 @@ impl Workspace { }) => ExpandedMessage { code: Some(kind.rule().noqa_code().to_string()), message: kind.body, - start_location: source_code.source_location(range.start()), - end_location: source_code.source_location(range.end()), + start_location: source_code.line_column(range.start()).into(), + end_location: source_code.line_column(range.end()).into(), fix: fix.map(|fix| ExpandedFix { message: kind.suggestion, edits: fix .edits() .iter() .map(|edit| ExpandedEdit { - location: source_code.source_location(edit.start()), - end_location: source_code.source_location(edit.end()), + location: source_code.line_column(edit.start()).into(), + end_location: source_code.line_column(edit.end()).into(), content: edit.content().map(ToString::to_string), }) .collect(), @@ -233,8 +233,8 @@ impl Workspace { ExpandedMessage { code: None, message, - start_location: source_code.source_location(range.start()), - end_location: source_code.source_location(range.end()), + start_location: source_code.line_column(range.start()).into(), + end_location: source_code.line_column(range.end()).into(), fix: None, } } @@ -316,3 +316,18 @@ impl<'a> ParsedModule<'a> { ) } } + +#[derive(Serialize, Deserialize, Eq, PartialEq, Debug)] +pub struct Location { + pub row: OneIndexed, + pub column: OneIndexed, +} + +impl From for Location { + fn from(value: LineColumn) -> Self { + Self { + row: value.line, + column: value.column, + } + } +} diff --git a/crates/ruff_wasm/tests/api.rs b/crates/ruff_wasm/tests/api.rs index 49864125ff..ee6166e142 100644 --- a/crates/ruff_wasm/tests/api.rs +++ b/crates/ruff_wasm/tests/api.rs @@ -3,8 +3,8 @@ use wasm_bindgen_test::wasm_bindgen_test; use ruff_linter::registry::Rule; -use ruff_source_file::{OneIndexed, SourceLocation}; -use ruff_wasm::{ExpandedMessage, Workspace}; +use ruff_source_file::OneIndexed; +use ruff_wasm::{ExpandedMessage, Location, Workspace}; macro_rules! check { ($source:expr, $config:expr, $expected:expr) => {{ @@ -27,11 +27,11 @@ fn empty_config() { [ExpandedMessage { code: Some(Rule::IfTuple.noqa_code().to_string()), message: "If test is a tuple, which is always `True`".to_string(), - start_location: SourceLocation { + start_location: Location { row: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(3) }, - end_location: SourceLocation { + end_location: Location { row: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(9) }, @@ -48,11 +48,11 @@ fn syntax_error() { [ExpandedMessage { code: None, message: "SyntaxError: Expected an expression".to_string(), - start_location: SourceLocation { + start_location: Location { row: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(3) }, - end_location: SourceLocation { + end_location: Location { row: OneIndexed::from_zero_indexed(1), column: OneIndexed::from_zero_indexed(0) }, @@ -69,11 +69,11 @@ fn unsupported_syntax_error() { [ExpandedMessage { code: None, message: "SyntaxError: Cannot use `match` statement on Python 3.9 (syntax was added in Python 3.10)".to_string(), - start_location: SourceLocation { + start_location: Location { row: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(0) }, - end_location: SourceLocation { + end_location: Location { row: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(5) },