From c33c9dc5859b187094c51efda2ce26383f544c62 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 11 Apr 2023 10:28:55 +0200 Subject: [PATCH] Introduce SourceFile to avoid cloning the message filename (#3904) --- crates/ruff/src/linter.rs | 80 +++--- crates/ruff/src/message/azure.rs | 4 +- crates/ruff/src/message/github.rs | 6 +- crates/ruff/src/message/gitlab.rs | 11 +- crates/ruff/src/message/grouped.rs | 2 +- crates/ruff/src/message/json.rs | 2 +- crates/ruff/src/message/junit.rs | 2 +- crates/ruff/src/message/mod.rs | 49 ++-- crates/ruff/src/message/pylint.rs | 4 +- crates/ruff/src/message/text.rs | 11 +- crates/ruff_cli/src/cache.rs | 137 +++++++---- crates/ruff_cli/src/commands/run.rs | 6 +- crates/ruff_cli/src/diagnostics.rs | 4 +- .../src/source_code/locator.rs | 18 +- crates/ruff_python_ast/src/source_code/mod.rs | 231 +++++++++--------- 15 files changed, 296 insertions(+), 271 deletions(-) diff --git a/crates/ruff/src/linter.rs b/crates/ruff/src/linter.rs index 6e487a273a..db42254372 100644 --- a/crates/ruff/src/linter.rs +++ b/crates/ruff/src/linter.rs @@ -1,4 +1,5 @@ use std::borrow::Cow; +use std::ops::Deref; use std::path::Path; use anyhow::{anyhow, Result}; @@ -10,7 +11,7 @@ use rustpython_parser::ParseError; use ruff_diagnostics::Diagnostic; use ruff_python_ast::imports::ImportMap; -use ruff_python_ast::source_code::{Indexer, Locator, Stylist}; +use ruff_python_ast::source_code::{Indexer, Locator, SourceFileBuilder, Stylist}; use ruff_python_stdlib::path::is_python_stub_file; use crate::autofix::fix_file; @@ -353,35 +354,41 @@ pub fn lint_only( autofix, ); - // Convert from diagnostics to messages. - let path_lossy = path.to_string_lossy(); - - result.map(|(messages, imports)| { - let source_code = if settings.show_source && !messages.is_empty() { - Some(locator.to_source_code_buf()) - } else { - None - }; - + result.map(|(diagnostics, imports)| { ( - messages - .into_iter() - .map(|diagnostic| { - let lineno = diagnostic.location.row(); - let noqa_row = *directives.noqa_line_for.get(&lineno).unwrap_or(&lineno); - Message::from_diagnostic( - diagnostic, - path_lossy.to_string(), - source_code.clone(), - noqa_row, - ) - }) - .collect(), + diagnostics_to_messages(diagnostics, path, settings, &locator, &directives), imports, ) }) } +/// Convert from diagnostics to messages. +fn diagnostics_to_messages( + diagnostics: Vec, + path: &Path, + settings: &Settings, + locator: &Locator, + directives: &Directives, +) -> Vec { + let file = once_cell::unsync::Lazy::new(|| { + let mut builder = SourceFileBuilder::new(&path.to_string_lossy()); + if settings.show_source { + builder.set_source_code(&locator.to_source_code()); + } + + builder.finish() + }); + + diagnostics + .into_iter() + .map(|diagnostic| { + let lineno = diagnostic.location.row(); + let noqa_row = *directives.noqa_line_for.get(&lineno).unwrap_or(&lineno); + Message::from_diagnostic(diagnostic, file.deref().clone(), noqa_row) + }) + .collect() +} + /// Generate `Diagnostic`s from source code content, iteratively autofixing /// until stable. pub fn lint_fix<'a>( @@ -503,31 +510,10 @@ This indicates a bug in `{}`. If you could open an issue at: } } - // Convert to messages. - let path_lossy = path.to_string_lossy(); return Ok(FixerResult { - result: result.map(|(messages, imports)| { - let source_code = if settings.show_source && !messages.is_empty() { - Some(locator.to_source_code_buf()) - } else { - None - }; - + result: result.map(|(diagnostics, imports)| { ( - messages - .into_iter() - .map(|diagnostic| { - let lineno = diagnostic.location.row(); - let noqa_row = - *directives.noqa_line_for.get(&lineno).unwrap_or(&lineno); - Message::from_diagnostic( - diagnostic, - path_lossy.to_string(), - source_code.clone(), - noqa_row, - ) - }) - .collect(), + diagnostics_to_messages(diagnostics, path, settings, &locator, &directives), imports, ) }), diff --git a/crates/ruff/src/message/azure.rs b/crates/ruff/src/message/azure.rs index 6563aaa53e..8396dd4e35 100644 --- a/crates/ruff/src/message/azure.rs +++ b/crates/ruff/src/message/azure.rs @@ -15,7 +15,7 @@ impl Emitter for AzureEmitter { context: &EmitterContext, ) -> anyhow::Result<()> { for message in messages { - let (line, col) = if context.is_jupyter_notebook(&message.filename) { + let (line, col) = if context.is_jupyter_notebook(message.filename()) { // We can't give a reasonable location for the structured formats, // so we show one that's clearly a fallback (1, 0) @@ -27,7 +27,7 @@ impl Emitter for AzureEmitter { writer, "##vso[task.logissue type=error\ ;sourcepath={filename};linenumber={line};columnnumber={col};code={code};]{body}", - filename = message.filename, + filename = message.filename(), code = message.kind.rule().noqa_code(), body = message.kind.body, )?; diff --git a/crates/ruff/src/message/github.rs b/crates/ruff/src/message/github.rs index 8aa2daac7a..013bf5426d 100644 --- a/crates/ruff/src/message/github.rs +++ b/crates/ruff/src/message/github.rs @@ -16,7 +16,7 @@ impl Emitter for GithubEmitter { context: &EmitterContext, ) -> anyhow::Result<()> { for message in messages { - let (row, column) = if context.is_jupyter_notebook(&message.filename) { + let (row, column) = if context.is_jupyter_notebook(message.filename()) { // We can't give a reasonable location for the structured formats, // so we show one that's clearly a fallback (1, 0) @@ -29,7 +29,7 @@ impl Emitter for GithubEmitter { "::error title=Ruff \ ({code}),file={file},line={row},col={column},endLine={end_row},endColumn={end_column}::", code = message.kind.rule().noqa_code(), - file = message.filename, + file = message.filename(), row = message.location.row(), column = message.location.column(), end_row = message.end_location.row(), @@ -39,7 +39,7 @@ impl Emitter for GithubEmitter { writeln!( writer, "{path}:{row}:{column}: {code} {body}", - path = relativize_path(&message.filename), + path = relativize_path(message.filename()), code = message.kind.rule().noqa_code(), body = message.kind.body, )?; diff --git a/crates/ruff/src/message/gitlab.rs b/crates/ruff/src/message/gitlab.rs index cbee136ebd..7d70610ee9 100644 --- a/crates/ruff/src/message/gitlab.rs +++ b/crates/ruff/src/message/gitlab.rs @@ -56,7 +56,7 @@ impl Serialize for SerializedMessages<'_> { let mut s = serializer.serialize_seq(Some(self.messages.len()))?; for message in self.messages { - let lines = if self.context.is_jupyter_notebook(&message.filename) { + let lines = if self.context.is_jupyter_notebook(message.filename()) { // We can't give a reasonable location for the structured formats, // so we show one that's clearly a fallback json!({ @@ -71,8 +71,8 @@ impl Serialize for SerializedMessages<'_> { }; let path = self.project_dir.as_ref().map_or_else( - || relativize_path(&message.filename), - |project_dir| relativize_path_to(&message.filename, project_dir), + || relativize_path(message.filename()), + |project_dir| relativize_path_to(message.filename(), project_dir), ); let value = json!({ @@ -99,8 +99,7 @@ fn fingerprint(message: &Message) -> String { location, end_location, fix: _fix, - filename, - source: _source, + file, noqa_row: _noqa_row, } = message; @@ -111,7 +110,7 @@ fn fingerprint(message: &Message) -> String { location.column().hash(&mut hasher); end_location.row().hash(&mut hasher); end_location.column().hash(&mut hasher); - filename.hash(&mut hasher); + file.name().hash(&mut hasher); format!("{:x}", hasher.finish()) } diff --git a/crates/ruff/src/message/grouped.rs b/crates/ruff/src/message/grouped.rs index d35430f423..cec82a5915 100644 --- a/crates/ruff/src/message/grouped.rs +++ b/crates/ruff/src/message/grouped.rs @@ -57,7 +57,7 @@ impl Emitter for GroupedEmitter { show_fix_status: self.show_fix_status, row_length, column_length, - jupyter_index: context.jupyter_index(&message.filename), + jupyter_index: context.jupyter_index(message.filename()), } )?; } diff --git a/crates/ruff/src/message/json.rs b/crates/ruff/src/message/json.rs index 46bdd994cd..68b6c1f2f6 100644 --- a/crates/ruff/src/message/json.rs +++ b/crates/ruff/src/message/json.rs @@ -49,7 +49,7 @@ impl Serialize for ExpandedMessages<'_> { "fix": fix, "location": message.location, "end_location": message.end_location, - "filename": message.filename, + "filename": message.filename(), "noqa_row": message.noqa_row }); diff --git a/crates/ruff/src/message/junit.rs b/crates/ruff/src/message/junit.rs index 52d57c16f9..fbfa7c6467 100644 --- a/crates/ruff/src/message/junit.rs +++ b/crates/ruff/src/message/junit.rs @@ -25,7 +25,7 @@ impl Emitter for JunitEmitter { for message in messages { let mut status = TestCaseStatus::non_success(NonSuccessKind::Failure); status.set_message(message.kind.body.clone()); - let (row, col) = if context.is_jupyter_notebook(&message.filename) { + let (row, col) = if context.is_jupyter_notebook(message.filename()) { // We can't give a reasonable location for the structured formats, // so we show one that's clearly a fallback (1, 0) diff --git a/crates/ruff/src/message/mod.rs b/crates/ruff/src/message/mod.rs index bf1720175b..20020460ac 100644 --- a/crates/ruff/src/message/mod.rs +++ b/crates/ruff/src/message/mod.rs @@ -24,7 +24,7 @@ pub use text::TextEmitter; use crate::jupyter::JupyterIndex; use ruff_diagnostics::{Diagnostic, DiagnosticKind, Fix}; -use ruff_python_ast::source_code::SourceCodeBuf; +use ruff_python_ast::source_code::SourceFile; #[derive(Debug, PartialEq, Eq)] pub struct Message { @@ -32,18 +32,12 @@ pub struct Message { pub location: Location, pub end_location: Location, pub fix: Fix, - pub filename: String, - pub source: Option, + pub file: SourceFile, pub noqa_row: usize, } impl Message { - pub fn from_diagnostic( - diagnostic: Diagnostic, - filename: String, - source: Option, - noqa_row: usize, - ) -> Self { + pub fn from_diagnostic(diagnostic: Diagnostic, file: SourceFile, noqa_row: usize) -> Self { Self { kind: diagnostic.kind, location: Location::new(diagnostic.location.row(), diagnostic.location.column() + 1), @@ -52,17 +46,20 @@ impl Message { diagnostic.end_location.column() + 1, ), fix: diagnostic.fix, - filename, - source, + file, noqa_row, } } + + pub fn filename(&self) -> &str { + self.file.name() + } } impl Ord for Message { fn cmp(&self, other: &Self) -> Ordering { - (&self.filename, self.location.row(), self.location.column()).cmp(&( - &other.filename, + (self.filename(), self.location.row(), self.location.column()).cmp(&( + other.filename(), other.location.row(), other.location.column(), )) @@ -79,7 +76,7 @@ fn group_messages_by_filename(messages: &[Message]) -> BTreeMap<&str, Vec<&Messa let mut grouped_messages = BTreeMap::default(); for message in messages { grouped_messages - .entry(message.filename.as_str()) + .entry(message.filename()) .or_insert_with(Vec::new) .push(message); } @@ -125,7 +122,7 @@ mod tests { use crate::message::{Emitter, EmitterContext, Location, Message}; use crate::rules::pyflakes::rules::{UndefinedName, UnusedImport, UnusedVariable}; use ruff_diagnostics::{Diagnostic, Edit, Fix}; - use ruff_python_ast::source_code::SourceCodeBuf; + use ruff_python_ast::source_code::SourceFileBuilder; use ruff_python_ast::types::Range; use rustc_hash::FxHashMap; @@ -153,7 +150,7 @@ def fibonacci(n): Range::new(Location::new(1, 7), Location::new(1, 9)), ); - let fib_source = SourceCodeBuf::from_content(fib); + let fib_source = SourceFileBuilder::new("fib.py").source_text(fib).finish(); let unused_variable = Diagnostic::new( UnusedVariable { @@ -175,22 +172,14 @@ def fibonacci(n): Range::new(Location::new(1, 3), Location::new(1, 4)), ); - let file_2_source = SourceCodeBuf::from_content(file_2); + let file_2_source = SourceFileBuilder::new("undef.py") + .source_text(file_2) + .finish(); vec![ - Message::from_diagnostic( - unused_import, - "fib.py".to_string(), - Some(fib_source.clone()), - 1, - ), - Message::from_diagnostic(unused_variable, "fib.py".to_string(), Some(fib_source), 1), - Message::from_diagnostic( - undefined_name, - "undef.py".to_string(), - Some(file_2_source), - 1, - ), + Message::from_diagnostic(unused_import, fib_source.clone(), 1), + Message::from_diagnostic(unused_variable, fib_source, 1), + Message::from_diagnostic(undefined_name, file_2_source, 1), ] } diff --git a/crates/ruff/src/message/pylint.rs b/crates/ruff/src/message/pylint.rs index 8e4b17607a..0b0e682b40 100644 --- a/crates/ruff/src/message/pylint.rs +++ b/crates/ruff/src/message/pylint.rs @@ -16,7 +16,7 @@ impl Emitter for PylintEmitter { context: &EmitterContext, ) -> anyhow::Result<()> { for message in messages { - let row = if context.is_jupyter_notebook(&message.filename) { + let row = if context.is_jupyter_notebook(message.filename()) { // We can't give a reasonable location for the structured formats, // so we show one that's clearly a fallback 1 @@ -27,7 +27,7 @@ impl Emitter for PylintEmitter { writeln!( writer, "{path}:{row}: [{code}] {body}", - path = relativize_path(&message.filename), + path = relativize_path(message.filename()), code = message.kind.rule().noqa_code(), body = message.kind.body, )?; diff --git a/crates/ruff/src/message/text.rs b/crates/ruff/src/message/text.rs index 70b2ffffec..edf6e3bb8e 100644 --- a/crates/ruff/src/message/text.rs +++ b/crates/ruff/src/message/text.rs @@ -35,12 +35,13 @@ impl Emitter for TextEmitter { write!( writer, "{path}{sep}", - path = relativize_path(&message.filename).bold(), + path = relativize_path(message.filename()).bold(), sep = ":".cyan(), )?; // Check if we're working on a jupyter notebook and translate positions with cell accordingly - let (row, col) = if let Some(jupyter_index) = context.jupyter_index(&message.filename) { + let (row, col) = if let Some(jupyter_index) = context.jupyter_index(message.filename()) + { write!( writer, "cell {cell}{sep}", @@ -66,7 +67,7 @@ impl Emitter for TextEmitter { } )?; - if message.source.is_some() { + if message.file.source_code().is_some() { writeln!(writer, "{}", MessageCodeFrame { message })?; } } @@ -121,13 +122,13 @@ impl Display for MessageCodeFrame<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { let Message { kind, - source, + file, location, end_location, .. } = self.message; - if let Some(source_code) = source { + if let Some(source_code) = file.source_code() { let suggestion = kind.suggestion.as_deref(); let footer = if suggestion.is_some() { vec![Annotation { diff --git a/crates/ruff_cli/src/cache.rs b/crates/ruff_cli/src/cache.rs index 046117f0e4..ac69581ee9 100644 --- a/crates/ruff_cli/src/cache.rs +++ b/crates/ruff_cli/src/cache.rs @@ -1,3 +1,4 @@ +use std::cell::RefCell; use std::fs; use std::hash::Hasher; use std::io::Write; @@ -5,7 +6,6 @@ use std::path::Path; use anyhow::Result; use filetime::FileTime; -use itertools::Itertools; use log::error; use path_absolutize::Absolutize; use ruff::message::{Location, Message}; @@ -13,8 +13,7 @@ use ruff::settings::{flags, AllSettings, Settings}; use ruff_cache::{CacheKey, CacheKeyHasher}; use ruff_diagnostics::{DiagnosticKind, Fix}; use ruff_python_ast::imports::ImportMap; -use ruff_python_ast::source_code::{LineIndex, SourceCodeBuf}; -use rustc_hash::FxHashMap; +use ruff_python_ast::source_code::SourceFileBuilder; use serde::ser::{SerializeSeq, SerializeStruct}; use serde::{Deserialize, Serialize, Serializer}; #[cfg(unix)] @@ -22,28 +21,77 @@ use std::os::unix::fs::PermissionsExt; const CARGO_PKG_VERSION: &str = env!("CARGO_PKG_VERSION"); -#[derive(Serialize)] +/// Vec storing all source files. The tuple is (filename, source code). +type Files<'a> = Vec<(&'a str, Option<&'a str>)>; +type FilesBuf = Vec<(String, Option)>; + struct CheckResultRef<'a> { - #[serde(serialize_with = "serialize_messages")] - messages: &'a [Message], imports: &'a ImportMap, - sources: Vec<(&'a str, &'a str)>, + messages: &'a [Message], } -fn serialize_messages(messages: &[Message], serializer: S) -> Result -where - S: Serializer, -{ - let mut s = serializer.serialize_seq(Some(messages.len()))?; +impl Serialize for CheckResultRef<'_> { + fn serialize(&self, serializer: S) -> std::result::Result + where + S: Serializer, + { + let mut s = serializer.serialize_struct("CheckResultRef", 3)?; - for message in messages { - s.serialize_element(&SerializeMessage(message))?; + s.serialize_field("imports", &self.imports)?; + + let serialize_messages = SerializeMessages { + messages: self.messages, + files: RefCell::default(), + }; + + s.serialize_field("messages", &serialize_messages)?; + + let files = serialize_messages.files.take(); + + s.serialize_field("files", &files)?; + + s.end() } - - s.end() } -struct SerializeMessage<'a>(&'a Message); +struct SerializeMessages<'a> { + messages: &'a [Message], + files: RefCell>, +} + +impl Serialize for SerializeMessages<'_> { + fn serialize(&self, serializer: S) -> std::result::Result + where + S: Serializer, + { + let mut s = serializer.serialize_seq(Some(self.messages.len()))?; + let mut files = self.files.borrow_mut(); + + for message in self.messages { + // Using a Vec instead of a HashMap because the cache is per file and the large majority of + // files have exactly one source file. + let file_id = if let Some(position) = files + .iter() + .position(|(filename, _)| *filename == message.filename()) + { + position + } else { + let index = files.len(); + files.push((message.filename(), message.file.source_text())); + index + }; + + s.serialize_element(&SerializeMessage { message, file_id })?; + } + + s.end() + } +} + +struct SerializeMessage<'a> { + message: &'a Message, + file_id: usize, +} impl Serialize for SerializeMessage<'_> { fn serialize(&self, serializer: S) -> Result @@ -55,11 +103,10 @@ impl Serialize for SerializeMessage<'_> { location, end_location, fix, - filename, // Serialized manually for all files - source: _source, + file: _, noqa_row, - } = self.0; + } = self.message; let mut s = serializer.serialize_struct("Message", 6)?; @@ -67,7 +114,7 @@ impl Serialize for SerializeMessage<'_> { s.serialize_field("location", &location)?; s.serialize_field("end_location", &end_location)?; s.serialize_field("fix", &fix)?; - s.serialize_field("filename", &filename)?; + s.serialize_field("file_id", &self.file_id)?; s.serialize_field("noqa_row", &noqa_row)?; s.end() @@ -80,15 +127,15 @@ struct MessageHeader { location: Location, end_location: Location, fix: Fix, - filename: String, + file_id: usize, noqa_row: usize, } #[derive(Deserialize)] struct CheckResult { - messages: Vec, imports: ImportMap, - sources: Vec<(String, String)>, + messages: Vec, + files: FilesBuf, } fn content_dir() -> &'static Path { @@ -170,25 +217,35 @@ pub fn get( Ok(CheckResult { messages: headers, imports, - sources, + files: sources, }) => { let mut messages = Vec::with_capacity(headers.len()); - let sources: FxHashMap<_, _> = sources + + let source_files: Vec<_> = sources .into_iter() - .map(|(filename, content)| { - let index = LineIndex::from_source_text(&content); - (filename, SourceCodeBuf::new(&content, index)) + .map(|(filename, text)| { + let mut builder = SourceFileBuilder::from_string(filename); + + if let Some(text) = text { + builder.set_source_text_string(text); + } + + builder.finish() }) .collect(); for header in headers { + let Some(source_file) = source_files.get(header.file_id) else { + error!("Failed to retrieve source file for cached entry"); + return None; + }; + messages.push(Message { kind: header.kind, location: header.location, end_location: header.end_location, fix: header.fix, - source: sources.get(&header.filename).cloned(), - filename: header.filename, + file: source_file.clone(), noqa_row: header.noqa_row, }); } @@ -212,23 +269,7 @@ pub fn set( messages: &[Message], imports: &ImportMap, ) { - // Store the content of the source files, assuming that all files with the same name have the same content - let sources: Vec<_> = messages - .iter() - .filter_map(|message| { - message - .source - .as_ref() - .map(|source| (&*message.filename, source.text())) - }) - .unique_by(|(filename, _)| *filename) - .collect(); - - let check_result = CheckResultRef { - messages, - imports, - sources, - }; + let check_result = CheckResultRef { imports, messages }; if let Err(e) = write_sync( &settings.cli.cache_dir, cache_key(path, package, metadata, &settings.lib, autofix), diff --git a/crates/ruff_cli/src/commands/run.rs b/crates/ruff_cli/src/commands/run.rs index b178dd65fe..bd0ddcb3d2 100644 --- a/crates/ruff_cli/src/commands/run.rs +++ b/crates/ruff_cli/src/commands/run.rs @@ -16,6 +16,7 @@ use ruff::settings::{flags, AllSettings}; use ruff::{fs, packaging, resolver, warn_user_once, IOError, Range}; use ruff_diagnostics::Diagnostic; use ruff_python_ast::imports::ImportMap; +use ruff_python_ast::source_code::SourceFileBuilder; use crate::args::Overrides; use crate::cache; @@ -116,14 +117,15 @@ pub fn run( ); let settings = resolver.resolve(path, pyproject_strategy); if settings.rules.enabled(Rule::IOError) { + let file = SourceFileBuilder::new(&path.to_string_lossy()).finish(); + Diagnostics::new( vec![Message::from_diagnostic( Diagnostic::new( IOError { message }, Range::new(Location::default(), Location::default()), ), - format!("{}", path.display()), - None, + file, 1, )], ImportMap::default(), diff --git a/crates/ruff_cli/src/diagnostics.rs b/crates/ruff_cli/src/diagnostics.rs index 57022aa9a9..a63792d19f 100644 --- a/crates/ruff_cli/src/diagnostics.rs +++ b/crates/ruff_cli/src/diagnostics.rs @@ -18,6 +18,7 @@ use ruff::linter::{lint_fix, lint_only, FixTable, FixerResult, LinterResult}; use ruff::message::Message; use ruff::settings::{flags, AllSettings, Settings}; use ruff_python_ast::imports::ImportMap; +use ruff_python_ast::source_code::SourceFileBuilder; use crate::cache; @@ -85,8 +86,7 @@ fn load_jupyter_notebook(path: &Path) -> Result<(String, JupyterIndex), Box Locator<'a> { .get_or_init(|| LineIndex::from_source_text(self.contents)) } - fn source_code(&self) -> SourceCode<'a, '_> { + #[inline] + pub fn to_source_code(&self) -> SourceCode<'a, '_> { SourceCode { index: self.get_or_init_index(), text: self.contents, } } - #[inline] - pub fn to_source_code_buf(&self) -> SourceCodeBuf { - self.source_code().to_owned() - } - /// Take the source code up to the given [`Location`]. #[inline] pub fn up_to(&self, location: Location) -> &'a str { - self.source_code().up_to(location) + self.to_source_code().up_to(location) } /// Take the source code after the given [`Location`]. #[inline] pub fn after(&self, location: Location) -> &'a str { - self.source_code().after(location) + self.to_source_code().after(location) } /// Take the source code between the given [`Range`]. #[inline] pub fn slice>(&self, range: R) -> &'a str { - self.source_code().slice(range) + self.to_source_code().slice(range) } /// Return the byte offset of the given [`Location`]. #[inline] pub fn offset(&self, location: Location) -> TextSize { - self.source_code().offset(location) + self.to_source_code().offset(location) } /// Return the underlying source code. diff --git a/crates/ruff_python_ast/src/source_code/mod.rs b/crates/ruff_python_ast/src/source_code/mod.rs index c956b058ea..68223226b4 100644 --- a/crates/ruff_python_ast/src/source_code/mod.rs +++ b/crates/ruff_python_ast/src/source_code/mod.rs @@ -13,6 +13,7 @@ use ruff_text_size::{TextRange, TextSize}; use rustpython_parser as parser; use rustpython_parser::ast::Location; use rustpython_parser::{lexer, Mode, ParseError}; +use std::fmt::{Debug, Formatter}; use std::sync::Arc; pub use stylist::{LineEnding, Stylist}; @@ -107,22 +108,16 @@ impl<'src, 'index> SourceCode<'src, 'index> { &self.text[range] } + /// Returns the source text pub fn text(&self) -> &'src str { self.text } + /// Returns the number of lines #[inline] pub fn line_count(&self) -> usize { self.index.line_count() } - - pub fn to_source_code_buf(&self) -> SourceCodeBuf { - self.to_owned() - } - - pub fn to_owned(&self) -> SourceCodeBuf { - SourceCodeBuf::new(self.text, self.index.clone()) - } } impl PartialEq for SourceCode<'_, '_> { @@ -133,117 +128,133 @@ impl PartialEq for SourceCode<'_, '_> { impl Eq for SourceCode<'_, '_> {} -impl PartialEq for SourceCode<'_, '_> { - fn eq(&self, other: &SourceCodeBuf) -> bool { - self.text == &*other.text +/// A Builder for constructing a [`SourceFile`] +pub struct SourceFileBuilder { + name: Box, + code: Option, +} + +impl SourceFileBuilder { + /// Creates a new builder for a file named `name`. + pub fn new(name: &str) -> Self { + Self { + name: Box::from(name), + code: None, + } + } + + /// Creates a enw builder for a file named `name` + pub fn from_string(name: String) -> Self { + Self { + name: Box::from(name), + code: None, + } + } + + /// Consumes `self` and returns a builder for a file with the source text and the [`LineIndex`] copied + /// from `source`. + #[must_use] + pub fn source_code(mut self, source: &SourceCode) -> Self { + self.set_source_code(source); + self + } + + /// Copies the source text and [`LineIndex`] from `source`. + pub fn set_source_code(&mut self, source: &SourceCode) { + self.code = Some(FileSourceCode { + text: Box::from(source.text()), + index: source.index.clone(), + }); + } + + /// Consumes `self` and returns a builder for a file with the source text `text`. Builds the [`LineIndex`] from `text`. + #[must_use] + pub fn source_text(self, text: &str) -> Self { + self.source_code(&SourceCode::new(text, &LineIndex::from_source_text(text))) + } + + /// Consumes `self` and returns a builder for a file with the source text `text`. Builds the [`LineIndex`] from `text`. + #[must_use] + pub fn source_text_string(mut self, text: String) -> Self { + self.set_source_text_string(text); + self + } + + /// Copies the source text `text` and builds the [`LineIndex`] from `text`. + pub fn set_source_text_string(&mut self, text: String) { + self.code = Some(FileSourceCode { + index: LineIndex::from_source_text(&text), + text: Box::from(text), + }); + } + + /// Consumes `self` and returns the [`SourceFile`]. + pub fn finish(self) -> SourceFile { + SourceFile { + inner: Arc::new(SourceFileInner { + name: self.name, + code: self.code, + }), + } } } -/// Gives access to the source code of a file and allows mapping between [`Location`] and byte offsets. +/// A source file that is identified by its name. Optionally stores the source code and [`LineIndex`]. /// -/// This is the owned pendant to [`SourceCode`]. Cloning only requires bumping reference counters. -#[derive(Clone, Debug)] -pub struct SourceCodeBuf { - text: Arc, +/// Cloning a [`SourceFile`] is cheap, because it only requires bumping a reference count. +#[derive(Clone, Eq, PartialEq)] +pub struct SourceFile { + inner: Arc, +} + +impl Debug for SourceFile { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("SourceFile") + .field("name", &self.name()) + .field("code", &self.source_code()) + .finish() + } +} + +impl SourceFile { + /// Returns the name of the source file (filename). + #[inline] + pub fn name(&self) -> &str { + &self.inner.name + } + + /// Returns `Some` with the source code if set, or `None`. + #[inline] + pub fn source_code(&self) -> Option { + self.inner.code.as_ref().map(|code| SourceCode { + text: &code.text, + index: &code.index, + }) + } + + /// Returns `Some` with the source text if set, or `None`. + #[inline] + pub fn source_text(&self) -> Option<&str> { + self.inner.code.as_ref().map(|code| &*code.text) + } +} + +#[derive(Eq, PartialEq)] +struct SourceFileInner { + name: Box, + code: Option, +} + +struct FileSourceCode { + text: Box, index: LineIndex, } -impl SourceCodeBuf { - pub fn new(content: &str, index: LineIndex) -> Self { - Self { - text: Arc::from(content), - index, - } - } - - /// Creates the [`LineIndex`] for `text` and returns the [`SourceCodeBuf`]. - pub fn from_content(text: &str) -> Self { - Self::new(text, LineIndex::from_source_text(text)) - } - - #[inline] - fn as_source_code(&self) -> SourceCode { - SourceCode { - text: &self.text, - index: &self.index, - } - } - - /// Take the source code up to the given [`Location`]. - pub fn up_to(&self, location: Location) -> &str { - self.as_source_code().up_to(location) - } - - /// Take the source code after the given [`Location`]. - pub fn after(&self, location: Location) -> &str { - self.as_source_code().after(location) - } - - /// Take the source code between the given [`Range`]. - #[inline] - pub fn slice>(&self, range: R) -> &str { - self.as_source_code().slice(range) - } - - /// Converts a [`Location`] range to a byte offset range - #[inline] - pub fn text_range>(&self, range: R) -> TextRange { - self.as_source_code().text_range(range) - } - - #[inline] - pub fn line_range(&self, line: OneIndexed) -> TextRange { - self.as_source_code().line_range(line) - } - - /// Return the byte offset of the given [`Location`]. - #[inline] - pub fn offset(&self, location: Location) -> TextSize { - self.as_source_code().offset(location) - } - - #[inline] - pub fn line_end(&self, line: OneIndexed) -> TextSize { - self.as_source_code().line_end(line) - } - - #[inline] - pub fn line_start(&self, line: OneIndexed) -> TextSize { - self.as_source_code().line_start(line) - } - - #[inline] - pub fn lines(&self, range: Range) -> &str { - self.as_source_code().lines(range) - } - - /// Returns the source text of the line with the given index - #[inline] - pub fn line_text(&self, index: OneIndexed) -> &str { - self.as_source_code().line_text(index) - } - - #[inline] - pub fn line_count(&self) -> usize { - self.index.line_count() - } - - pub fn text(&self) -> &str { - &self.text - } -} - -impl PartialEq for SourceCodeBuf { - // The same source text should have the same index +impl PartialEq for FileSourceCode { fn eq(&self, other: &Self) -> bool { + // It should be safe to assume that the index for two source files are identical self.text == other.text } } -impl PartialEq> for SourceCodeBuf { - fn eq(&self, other: &SourceCode<'_, '_>) -> bool { - &*self.text == other.text - } -} - -impl Eq for SourceCodeBuf {} +impl Eq for FileSourceCode {}