From 1d756dc3a74f2f421a74fa72e4290bd1ac678edd Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Fri, 9 Jun 2023 20:59:57 -0400 Subject: [PATCH] Move Python whitespace utilities into new `ruff_python_whitespace` crate (#4993) ## Summary `ruff_newlines` becomes `ruff_python_whitespace`, and includes the existing "universal newline" handlers alongside the Python whitespace-specific utilities. --- Cargo.lock | 24 ++++++------- crates/ruff/Cargo.toml | 2 +- crates/ruff/src/autofix/edits.rs | 2 +- crates/ruff/src/checkers/physical_lines.rs | 2 +- crates/ruff/src/doc_lines.rs | 2 +- crates/ruff/src/docstrings/sections.rs | 10 +++--- crates/ruff/src/importer/insertion.rs | 4 +-- crates/ruff/src/noqa.rs | 4 +-- .../ruff/src/rules/flake8_return/helpers.rs | 2 +- .../src/rules/flake8_simplify/rules/ast_if.rs | 2 +- .../rules/flake8_simplify/rules/ast_with.rs | 2 +- crates/ruff/src/rules/isort/helpers.rs | 2 +- .../src/rules/isort/rules/organize_imports.rs | 5 ++- crates/ruff/src/rules/pycodestyle/helpers.rs | 2 +- .../pycodestyle/rules/doc_line_too_long.rs | 2 +- .../pycodestyle/rules/lambda_assignment.rs | 5 ++- .../rules/pycodestyle/rules/line_too_long.rs | 2 +- .../rules/mixed_spaces_and_tabs.rs | 5 ++- .../pycodestyle/rules/tab_indentation.rs | 5 ++- .../pycodestyle/rules/trailing_whitespace.rs | 2 +- crates/ruff/src/rules/pydocstyle/helpers.rs | 2 +- .../pydocstyle/rules/blank_after_summary.rs | 2 +- .../rules/blank_before_after_class.rs | 2 +- .../rules/blank_before_after_function.rs | 2 +- .../pydocstyle/rules/ends_with_period.rs | 2 +- .../pydocstyle/rules/ends_with_punctuation.rs | 2 +- .../ruff/src/rules/pydocstyle/rules/indent.rs | 14 ++++---- .../rules/multi_line_summary_start.rs | 2 +- .../rules/newline_after_last_paragraph.rs | 6 ++-- .../rules/pydocstyle/rules/no_signature.rs | 2 +- .../rules/no_surrounding_whitespace.rs | 2 +- .../pydocstyle/rules/non_imperative_mood.rs | 2 +- .../src/rules/pydocstyle/rules/one_liner.rs | 2 +- .../src/rules/pydocstyle/rules/sections.rs | 27 +++++++------- .../rules/pygrep_hooks/rules/blanket_noqa.rs | 2 +- .../pygrep_hooks/rules/blanket_type_ignore.rs | 2 +- .../pylint/rules/bidirectional_unicode.rs | 2 +- .../rules/unnecessary_coding_comment.rs | 2 +- crates/ruff_python_ast/Cargo.toml | 2 +- crates/ruff_python_ast/src/docstrings.rs | 22 ++++++++++++ crates/ruff_python_ast/src/helpers.rs | 2 +- crates/ruff_python_ast/src/lib.rs | 1 + .../src/source_code/generator.rs | 4 +-- .../src/source_code/locator.rs | 2 +- .../src/source_code/stylist.rs | 4 +-- crates/ruff_python_ast/src/whitespace.rs | 36 ++----------------- crates/ruff_python_formatter/Cargo.toml | 2 +- .../src/comments/placement.rs | 2 +- .../src/comments/visitor.rs | 2 +- crates/ruff_python_formatter/src/trivia.rs | 9 +++-- .../Cargo.toml | 2 +- crates/ruff_python_whitespace/src/lib.rs | 5 +++ .../src/newlines.rs} | 6 ++-- .../ruff_python_whitespace/src/whitespace.rs | 15 ++++++++ crates/ruff_textwrap/Cargo.toml | 2 +- crates/ruff_textwrap/src/lib.rs | 2 +- fuzz/Cargo.lock | 10 +++--- 57 files changed, 153 insertions(+), 140 deletions(-) create mode 100644 crates/ruff_python_ast/src/docstrings.rs rename crates/{ruff_newlines => ruff_python_whitespace}/Cargo.toml (86%) create mode 100644 crates/ruff_python_whitespace/src/lib.rs rename crates/{ruff_newlines/src/lib.rs => ruff_python_whitespace/src/newlines.rs} (99%) create mode 100644 crates/ruff_python_whitespace/src/whitespace.rs diff --git a/Cargo.lock b/Cargo.lock index bc32dffa04..d0c654d156 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1775,10 +1775,10 @@ dependencies = [ "ruff_cache", "ruff_diagnostics", "ruff_macros", - "ruff_newlines", "ruff_python_ast", "ruff_python_semantic", "ruff_python_stdlib", + "ruff_python_whitespace", "ruff_rustpython", "ruff_text_size", "ruff_textwrap", @@ -1945,14 +1945,6 @@ dependencies = [ "syn 2.0.18", ] -[[package]] -name = "ruff_newlines" -version = "0.0.0" -dependencies = [ - "memchr", - "ruff_text_size", -] - [[package]] name = "ruff_python_ast" version = "0.0.0" @@ -1967,7 +1959,7 @@ dependencies = [ "num-bigint", "num-traits", "once_cell", - "ruff_newlines", + "ruff_python_whitespace", "ruff_text_size", "rustc-hash", "rustpython-ast", @@ -1989,8 +1981,8 @@ dependencies = [ "itertools", "once_cell", "ruff_formatter", - "ruff_newlines", "ruff_python_ast", + "ruff_python_whitespace", "ruff_testing_macros", "ruff_text_size", "rustc-hash", @@ -2024,6 +2016,14 @@ dependencies = [ "rustc-hash", ] +[[package]] +name = "ruff_python_whitespace" +version = "0.0.0" +dependencies = [ + "memchr", + "ruff_text_size", +] + [[package]] name = "ruff_rustpython" version = "0.0.0" @@ -2055,7 +2055,7 @@ dependencies = [ name = "ruff_textwrap" version = "0.0.0" dependencies = [ - "ruff_newlines", + "ruff_python_whitespace", "ruff_text_size", ] diff --git a/crates/ruff/Cargo.toml b/crates/ruff/Cargo.toml index 022691acdd..f32364904b 100644 --- a/crates/ruff/Cargo.toml +++ b/crates/ruff/Cargo.toml @@ -17,7 +17,7 @@ name = "ruff" ruff_cache = { path = "../ruff_cache" } ruff_diagnostics = { path = "../ruff_diagnostics", features = ["serde"] } ruff_macros = { path = "../ruff_macros" } -ruff_newlines = { path = "../ruff_newlines" } +ruff_python_whitespace = { path = "../ruff_python_whitespace" } ruff_python_ast = { path = "../ruff_python_ast", features = ["serde"] } ruff_python_semantic = { path = "../ruff_python_semantic" } ruff_python_stdlib = { path = "../ruff_python_stdlib" } diff --git a/crates/ruff/src/autofix/edits.rs b/crates/ruff/src/autofix/edits.rs index e660c0f20b..00a86bbb5c 100644 --- a/crates/ruff/src/autofix/edits.rs +++ b/crates/ruff/src/autofix/edits.rs @@ -5,9 +5,9 @@ use rustpython_parser::ast::{self, Excepthandler, Expr, Keyword, Ranged, Stmt}; use rustpython_parser::{lexer, Mode, Tok}; use ruff_diagnostics::Edit; -use ruff_newlines::NewlineWithTrailingNewline; use ruff_python_ast::helpers; use ruff_python_ast::source_code::{Indexer, Locator, Stylist}; +use ruff_python_whitespace::NewlineWithTrailingNewline; use crate::autofix::codemods; diff --git a/crates/ruff/src/checkers/physical_lines.rs b/crates/ruff/src/checkers/physical_lines.rs index 124821f221..3a5345ae58 100644 --- a/crates/ruff/src/checkers/physical_lines.rs +++ b/crates/ruff/src/checkers/physical_lines.rs @@ -4,8 +4,8 @@ use ruff_text_size::TextSize; use std::path::Path; use ruff_diagnostics::Diagnostic; -use ruff_newlines::StrExt; use ruff_python_ast::source_code::{Indexer, Locator, Stylist}; +use ruff_python_whitespace::UniversalNewlines; use crate::registry::Rule; use crate::rules::flake8_executable::helpers::{extract_shebang, ShebangDirective}; diff --git a/crates/ruff/src/doc_lines.rs b/crates/ruff/src/doc_lines.rs index 3e2de44920..44ca69cc0c 100644 --- a/crates/ruff/src/doc_lines.rs +++ b/crates/ruff/src/doc_lines.rs @@ -8,9 +8,9 @@ use rustpython_parser::ast::{self, Constant, Expr, Ranged, Stmt, Suite}; use rustpython_parser::lexer::LexResult; use rustpython_parser::Tok; -use ruff_newlines::UniversalNewlineIterator; use ruff_python_ast::source_code::Locator; use ruff_python_ast::statement_visitor::{walk_stmt, StatementVisitor}; +use ruff_python_whitespace::UniversalNewlineIterator; /// Extract doc lines (standalone comments) from a token sequence. pub(crate) fn doc_lines_from_tokens(lxr: &[LexResult]) -> DocLines { diff --git a/crates/ruff/src/docstrings/sections.rs b/crates/ruff/src/docstrings/sections.rs index f1b4ced286..d61697a1c5 100644 --- a/crates/ruff/src/docstrings/sections.rs +++ b/crates/ruff/src/docstrings/sections.rs @@ -1,11 +1,11 @@ use std::fmt::{Debug, Formatter}; use std::iter::FusedIterator; +use ruff_python_ast::docstrings::{leading_space, leading_words}; use ruff_text_size::{TextLen, TextRange, TextSize}; use strum_macros::EnumIter; -use ruff_newlines::{StrExt, UniversalNewlineIterator}; -use ruff_python_ast::whitespace; +use ruff_python_whitespace::{UniversalNewlineIterator, UniversalNewlines}; use crate::docstrings::styles::SectionStyle; use crate::docstrings::{Docstring, DocstringBody}; @@ -154,8 +154,8 @@ impl<'a> SectionContexts<'a> { } if let Some(section_kind) = suspected_as_section(&line, style) { - let indent = whitespace::leading_space(&line); - let section_name = whitespace::leading_words(&line); + let indent = leading_space(&line); + let section_name = leading_words(&line); let section_name_range = TextRange::at(indent.text_len(), section_name.text_len()); @@ -379,7 +379,7 @@ impl Debug for SectionContext<'_> { } fn suspected_as_section(line: &str, style: SectionStyle) -> Option { - if let Some(kind) = SectionKind::from_str(whitespace::leading_words(line)) { + if let Some(kind) = SectionKind::from_str(leading_words(line)) { if style.sections().contains(&kind) { return Some(kind); } diff --git a/crates/ruff/src/importer/insertion.rs b/crates/ruff/src/importer/insertion.rs index 713ca8524e..a347f795fa 100644 --- a/crates/ruff/src/importer/insertion.rs +++ b/crates/ruff/src/importer/insertion.rs @@ -6,9 +6,9 @@ use rustpython_parser::ast::{Ranged, Stmt}; use rustpython_parser::{lexer, Mode, Tok}; use ruff_diagnostics::Edit; -use ruff_newlines::UniversalNewlineIterator; use ruff_python_ast::helpers::is_docstring_stmt; use ruff_python_ast::source_code::{Locator, Stylist}; +use ruff_python_whitespace::UniversalNewlineIterator; use ruff_textwrap::indent; #[derive(Debug, Clone, PartialEq, Eq)] @@ -304,8 +304,8 @@ mod tests { use rustpython_parser::lexer::LexResult; use rustpython_parser::Parse; - use ruff_newlines::LineEnding; use ruff_python_ast::source_code::{Locator, Stylist}; + use ruff_python_whitespace::LineEnding; use super::Insertion; diff --git a/crates/ruff/src/noqa.rs b/crates/ruff/src/noqa.rs index 62fcdf4b77..4dba6aee9d 100644 --- a/crates/ruff/src/noqa.rs +++ b/crates/ruff/src/noqa.rs @@ -11,8 +11,8 @@ use regex::Regex; use ruff_text_size::{TextLen, TextRange, TextSize}; use ruff_diagnostics::Diagnostic; -use ruff_newlines::LineEnding; use ruff_python_ast::source_code::Locator; +use ruff_python_whitespace::LineEnding; use crate::codes::NoqaCode; use crate::registry::{AsRule, Rule, RuleSet}; @@ -514,8 +514,8 @@ mod tests { use ruff_text_size::{TextRange, TextSize}; use ruff_diagnostics::Diagnostic; - use ruff_newlines::LineEnding; use ruff_python_ast::source_code::Locator; + use ruff_python_whitespace::LineEnding; use crate::noqa::{add_noqa_inner, NoqaMapping, NOQA_LINE_REGEX}; use crate::rules::pycodestyle::rules::AmbiguousVariableName; diff --git a/crates/ruff/src/rules/flake8_return/helpers.rs b/crates/ruff/src/rules/flake8_return/helpers.rs index 652ac72050..bfa8508e14 100644 --- a/crates/ruff/src/rules/flake8_return/helpers.rs +++ b/crates/ruff/src/rules/flake8_return/helpers.rs @@ -1,8 +1,8 @@ use ruff_text_size::TextSize; use rustpython_parser::ast::{Expr, Ranged, Stmt}; -use ruff_newlines::StrExt; use ruff_python_ast::source_code::Locator; +use ruff_python_whitespace::UniversalNewlines; /// Return `true` if a function's return statement include at least one /// non-`None` value. diff --git a/crates/ruff/src/rules/flake8_simplify/rules/ast_if.rs b/crates/ruff/src/rules/flake8_simplify/rules/ast_if.rs index 42caeff588..431838a350 100644 --- a/crates/ruff/src/rules/flake8_simplify/rules/ast_if.rs +++ b/crates/ruff/src/rules/flake8_simplify/rules/ast_if.rs @@ -5,12 +5,12 @@ use rustpython_parser::ast::{self, Cmpop, Constant, Expr, ExprContext, Ranged, S use ruff_diagnostics::{AutofixKind, Diagnostic, Edit, Fix, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_newlines::StrExt; use ruff_python_ast::comparable::{ComparableConstant, ComparableExpr, ComparableStmt}; use ruff_python_ast::helpers::{ any_over_expr, contains_effect, first_colon_range, has_comments, has_comments_in, }; use ruff_python_semantic::model::SemanticModel; +use ruff_python_whitespace::UniversalNewlines; use crate::checkers::ast::Checker; use crate::line_width::LineWidth; diff --git a/crates/ruff/src/rules/flake8_simplify/rules/ast_with.rs b/crates/ruff/src/rules/flake8_simplify/rules/ast_with.rs index 84c041d0f8..cf88a3ba94 100644 --- a/crates/ruff/src/rules/flake8_simplify/rules/ast_with.rs +++ b/crates/ruff/src/rules/flake8_simplify/rules/ast_with.rs @@ -5,8 +5,8 @@ use rustpython_parser::ast::{self, Ranged, Stmt, Withitem}; use ruff_diagnostics::{AutofixKind, Violation}; use ruff_diagnostics::{Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; -use ruff_newlines::StrExt; use ruff_python_ast::helpers::{first_colon_range, has_comments_in}; +use ruff_python_whitespace::UniversalNewlines; use crate::checkers::ast::Checker; use crate::line_width::LineWidth; diff --git a/crates/ruff/src/rules/isort/helpers.rs b/crates/ruff/src/rules/isort/helpers.rs index 0f520d2dce..fe60ccef67 100644 --- a/crates/ruff/src/rules/isort/helpers.rs +++ b/crates/ruff/src/rules/isort/helpers.rs @@ -1,8 +1,8 @@ use rustpython_parser::ast::{Ranged, Stmt}; use rustpython_parser::{lexer, Mode, Tok}; -use ruff_newlines::StrExt; use ruff_python_ast::source_code::Locator; +use ruff_python_whitespace::UniversalNewlines; use crate::rules::isort::types::TrailingComma; diff --git a/crates/ruff/src/rules/isort/rules/organize_imports.rs b/crates/ruff/src/rules/isort/rules/organize_imports.rs index 9b8140d610..e1acc53c49 100644 --- a/crates/ruff/src/rules/isort/rules/organize_imports.rs +++ b/crates/ruff/src/rules/isort/rules/organize_imports.rs @@ -6,12 +6,11 @@ use rustpython_parser::ast::{Ranged, Stmt}; use ruff_diagnostics::{AutofixKind, Diagnostic, Edit, Fix, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_newlines::StrExt; use ruff_python_ast::helpers::{ followed_by_multi_statement_line, preceded_by_multi_statement_line, trailing_lines_end, }; use ruff_python_ast::source_code::{Indexer, Locator, Stylist}; -use ruff_python_ast::whitespace::leading_space; +use ruff_python_whitespace::{leading_indentation, UniversalNewlines}; use ruff_textwrap::indent; use crate::line_width::LineWidth; @@ -89,7 +88,7 @@ pub(crate) fn organize_imports( package: Option<&Path>, ) -> Option { let indentation = locator.slice(extract_indentation_range(&block.imports, locator)); - let indentation = leading_space(indentation); + let indentation = leading_indentation(indentation); let range = extract_range(&block.imports); diff --git a/crates/ruff/src/rules/pycodestyle/helpers.rs b/crates/ruff/src/rules/pycodestyle/helpers.rs index 830eed6694..967efe60b2 100644 --- a/crates/ruff/src/rules/pycodestyle/helpers.rs +++ b/crates/ruff/src/rules/pycodestyle/helpers.rs @@ -2,8 +2,8 @@ use ruff_text_size::{TextLen, TextRange}; use rustpython_parser::ast::{self, Cmpop, Expr}; use unicode_width::UnicodeWidthStr; -use ruff_newlines::Line; use ruff_python_ast::source_code::Generator; +use ruff_python_whitespace::Line; use crate::line_width::{LineLength, LineWidth, TabSize}; diff --git a/crates/ruff/src/rules/pycodestyle/rules/doc_line_too_long.rs b/crates/ruff/src/rules/pycodestyle/rules/doc_line_too_long.rs index efca0844b2..68890d0235 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/doc_line_too_long.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/doc_line_too_long.rs @@ -1,6 +1,6 @@ use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_newlines::Line; +use ruff_python_whitespace::Line; use crate::rules::pycodestyle::helpers::is_overlong; use crate::settings::Settings; diff --git a/crates/ruff/src/rules/pycodestyle/rules/lambda_assignment.rs b/crates/ruff/src/rules/pycodestyle/rules/lambda_assignment.rs index 338a18e6c0..bca493a8d8 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/lambda_assignment.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/lambda_assignment.rs @@ -3,11 +3,10 @@ use rustpython_parser::ast::{self, Arg, Arguments, Constant, Expr, Ranged, Stmt} use ruff_diagnostics::{AutofixKind, Diagnostic, Edit, Fix, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_newlines::StrExt; use ruff_python_ast::helpers::{has_leading_content, has_trailing_content}; use ruff_python_ast::source_code::Generator; -use ruff_python_ast::whitespace::leading_space; use ruff_python_semantic::model::SemanticModel; +use ruff_python_whitespace::{leading_indentation, UniversalNewlines}; use crate::checkers::ast::Checker; use crate::registry::AsRule; @@ -84,7 +83,7 @@ pub(crate) fn lambda_assignment( && !has_trailing_content(stmt, checker.locator) { let first_line = checker.locator.line(stmt.start()); - let indentation = &leading_space(first_line); + let indentation = leading_indentation(first_line); let mut indented = String::new(); for (idx, line) in function( checker.semantic_model(), diff --git a/crates/ruff/src/rules/pycodestyle/rules/line_too_long.rs b/crates/ruff/src/rules/pycodestyle/rules/line_too_long.rs index 3eb1e2b1e5..e59f61f9a0 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/line_too_long.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/line_too_long.rs @@ -1,6 +1,6 @@ use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_newlines::Line; +use ruff_python_whitespace::Line; use crate::rules::pycodestyle::helpers::is_overlong; use crate::settings::Settings; diff --git a/crates/ruff/src/rules/pycodestyle/rules/mixed_spaces_and_tabs.rs b/crates/ruff/src/rules/pycodestyle/rules/mixed_spaces_and_tabs.rs index cd85e1ed3e..a0d6444a39 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/mixed_spaces_and_tabs.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/mixed_spaces_and_tabs.rs @@ -2,8 +2,7 @@ use ruff_text_size::{TextLen, TextRange}; use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_newlines::Line; -use ruff_python_ast::whitespace::leading_space; +use ruff_python_whitespace::{leading_indentation, Line}; /// ## What it does /// Checks for mixed tabs and spaces in indentation. @@ -37,7 +36,7 @@ impl Violation for MixedSpacesAndTabs { /// E101 pub(crate) fn mixed_spaces_and_tabs(line: &Line) -> Option { - let indent = leading_space(line.as_str()); + let indent = leading_indentation(line.as_str()); if indent.contains(' ') && indent.contains('\t') { Some(Diagnostic::new( diff --git a/crates/ruff/src/rules/pycodestyle/rules/tab_indentation.rs b/crates/ruff/src/rules/pycodestyle/rules/tab_indentation.rs index 2fa18315b6..d1a4b0c15e 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/tab_indentation.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/tab_indentation.rs @@ -2,9 +2,8 @@ use ruff_text_size::{TextLen, TextRange, TextSize}; use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_newlines::Line; use ruff_python_ast::source_code::Indexer; -use ruff_python_ast::whitespace::leading_space; +use ruff_python_whitespace::{leading_indentation, Line}; #[violation] pub struct TabIndentation; @@ -18,7 +17,7 @@ impl Violation for TabIndentation { /// W191 pub(crate) fn tab_indentation(line: &Line, indexer: &Indexer) -> Option { - let indent = leading_space(line); + let indent = leading_indentation(line); if let Some(tab_index) = indent.find('\t') { // If the tab character is within a multi-line string, abort. let tab_offset = line.start() + TextSize::try_from(tab_index).unwrap(); diff --git a/crates/ruff/src/rules/pycodestyle/rules/trailing_whitespace.rs b/crates/ruff/src/rules/pycodestyle/rules/trailing_whitespace.rs index d79b238efc..836eafe9e9 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/trailing_whitespace.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/trailing_whitespace.rs @@ -2,7 +2,7 @@ use ruff_text_size::{TextLen, TextRange, TextSize}; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; -use ruff_newlines::Line; +use ruff_python_whitespace::Line; use crate::registry::Rule; use crate::settings::Settings; diff --git a/crates/ruff/src/rules/pydocstyle/helpers.rs b/crates/ruff/src/rules/pydocstyle/helpers.rs index 3c22305049..a6f8ef6cb9 100644 --- a/crates/ruff/src/rules/pydocstyle/helpers.rs +++ b/crates/ruff/src/rules/pydocstyle/helpers.rs @@ -1,12 +1,12 @@ use std::collections::BTreeSet; -use ruff_newlines::StrExt; use ruff_python_ast::call_path::from_qualified_name; use ruff_python_ast::cast; use ruff_python_ast::helpers::map_callable; use ruff_python_ast::str::is_implicit_concatenation; use ruff_python_semantic::definition::{Definition, Member, MemberKind}; use ruff_python_semantic::model::SemanticModel; +use ruff_python_whitespace::UniversalNewlines; /// Return the index of the first logical line in a string. pub(crate) fn logical_line(content: &str) -> Option { diff --git a/crates/ruff/src/rules/pydocstyle/rules/blank_after_summary.rs b/crates/ruff/src/rules/pydocstyle/rules/blank_after_summary.rs index 35381b5a8d..423b77f78d 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/blank_after_summary.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/blank_after_summary.rs @@ -1,6 +1,6 @@ use ruff_diagnostics::{AutofixKind, Diagnostic, Edit, Fix, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_newlines::{StrExt, UniversalNewlineIterator}; +use ruff_python_whitespace::{UniversalNewlineIterator, UniversalNewlines}; use crate::checkers::ast::Checker; use crate::docstrings::Docstring; diff --git a/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_class.rs b/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_class.rs index b5fd96528d..54e5c887e7 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_class.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_class.rs @@ -3,8 +3,8 @@ use rustpython_parser::ast::Ranged; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; -use ruff_newlines::{StrExt, UniversalNewlineIterator}; use ruff_python_semantic::definition::{Definition, Member, MemberKind}; +use ruff_python_whitespace::{UniversalNewlineIterator, UniversalNewlines}; use crate::checkers::ast::Checker; use crate::docstrings::Docstring; diff --git a/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_function.rs b/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_function.rs index beb33af9d7..f7b4a5182a 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_function.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_function.rs @@ -5,8 +5,8 @@ use rustpython_parser::ast::Ranged; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; -use ruff_newlines::{StrExt, UniversalNewlineIterator}; use ruff_python_semantic::definition::{Definition, Member, MemberKind}; +use ruff_python_whitespace::{UniversalNewlineIterator, UniversalNewlines}; use crate::checkers::ast::Checker; use crate::docstrings::Docstring; diff --git a/crates/ruff/src/rules/pydocstyle/rules/ends_with_period.rs b/crates/ruff/src/rules/pydocstyle/rules/ends_with_period.rs index 4dca6ae21f..85045134a8 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/ends_with_period.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/ends_with_period.rs @@ -3,7 +3,7 @@ use strum::IntoEnumIterator; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; -use ruff_newlines::{StrExt, UniversalNewlineIterator}; +use ruff_python_whitespace::{UniversalNewlineIterator, UniversalNewlines}; use crate::checkers::ast::Checker; use crate::docstrings::sections::SectionKind; diff --git a/crates/ruff/src/rules/pydocstyle/rules/ends_with_punctuation.rs b/crates/ruff/src/rules/pydocstyle/rules/ends_with_punctuation.rs index d4cfa195e3..8341266f01 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/ends_with_punctuation.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/ends_with_punctuation.rs @@ -3,7 +3,7 @@ use strum::IntoEnumIterator; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; -use ruff_newlines::{StrExt, UniversalNewlineIterator}; +use ruff_python_whitespace::{UniversalNewlineIterator, UniversalNewlines}; use crate::checkers::ast::Checker; use crate::docstrings::sections::SectionKind; diff --git a/crates/ruff/src/rules/pydocstyle/rules/indent.rs b/crates/ruff/src/rules/pydocstyle/rules/indent.rs index f3fc53a700..3be1583909 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/indent.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/indent.rs @@ -3,8 +3,8 @@ use ruff_text_size::{TextLen, TextRange}; use ruff_diagnostics::{AlwaysAutofixableViolation, Violation}; use ruff_diagnostics::{Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; -use ruff_newlines::NewlineWithTrailingNewline; -use ruff_python_ast::whitespace; +use ruff_python_ast::docstrings::{clean_space, leading_space}; +use ruff_python_whitespace::NewlineWithTrailingNewline; use crate::checkers::ast::Checker; use crate::docstrings::Docstring; @@ -76,7 +76,7 @@ pub(crate) fn indent(checker: &mut Checker, docstring: &Docstring) { continue; } - let line_indent = whitespace::leading_space(line); + let line_indent = leading_space(line); // We only report tab indentation once, so only check if we haven't seen a tab // yet. @@ -93,7 +93,7 @@ pub(crate) fn indent(checker: &mut Checker, docstring: &Docstring) { if checker.patch(diagnostic.kind.rule()) { #[allow(deprecated)] diagnostic.set_fix(Fix::unspecified(Edit::range_replacement( - whitespace::clean(docstring.indentation), + clean_space(docstring.indentation), TextRange::at(line.start(), line_indent.text_len()), ))); } @@ -133,7 +133,7 @@ pub(crate) fn indent(checker: &mut Checker, docstring: &Docstring) { let mut diagnostic = Diagnostic::new(OverIndentation, TextRange::empty(over_indented.start())); if checker.patch(diagnostic.kind.rule()) { - let indent = whitespace::clean(docstring.indentation); + let indent = clean_space(docstring.indentation); let edit = if indent.is_empty() { Edit::range_deletion(over_indented) } else { @@ -148,12 +148,12 @@ pub(crate) fn indent(checker: &mut Checker, docstring: &Docstring) { // If the last line is over-indented... if let Some(last) = lines.last() { - let line_indent = whitespace::leading_space(last); + let line_indent = leading_space(last); if line_indent.len() > docstring.indentation.len() { let mut diagnostic = Diagnostic::new(OverIndentation, TextRange::empty(last.start())); if checker.patch(diagnostic.kind.rule()) { - let indent = whitespace::clean(docstring.indentation); + let indent = clean_space(docstring.indentation); let range = TextRange::at(last.start(), line_indent.text_len()); let edit = if indent.is_empty() { Edit::range_deletion(range) diff --git a/crates/ruff/src/rules/pydocstyle/rules/multi_line_summary_start.rs b/crates/ruff/src/rules/pydocstyle/rules/multi_line_summary_start.rs index c15fe2c40b..8453973125 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/multi_line_summary_start.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/multi_line_summary_start.rs @@ -3,9 +3,9 @@ use rustpython_parser::ast::Ranged; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; -use ruff_newlines::{NewlineWithTrailingNewline, UniversalNewlineIterator}; use ruff_python_ast::str::{is_triple_quote, leading_quote}; use ruff_python_semantic::definition::{Definition, Member}; +use ruff_python_whitespace::{NewlineWithTrailingNewline, UniversalNewlineIterator}; use crate::checkers::ast::Checker; use crate::docstrings::Docstring; diff --git a/crates/ruff/src/rules/pydocstyle/rules/newline_after_last_paragraph.rs b/crates/ruff/src/rules/pydocstyle/rules/newline_after_last_paragraph.rs index 10c3ff222e..499cfb4bae 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/newline_after_last_paragraph.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/newline_after_last_paragraph.rs @@ -3,8 +3,8 @@ use rustpython_parser::ast::Ranged; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; -use ruff_newlines::{NewlineWithTrailingNewline, StrExt}; -use ruff_python_ast::whitespace; +use ruff_python_ast::docstrings::clean_space; +use ruff_python_whitespace::{NewlineWithTrailingNewline, UniversalNewlines}; use crate::checkers::ast::Checker; use crate::docstrings::Docstring; @@ -56,7 +56,7 @@ pub(crate) fn newline_after_last_paragraph(checker: &mut Checker, docstring: &Do let content = format!( "{}{}", checker.stylist.line_ending().as_str(), - whitespace::clean(docstring.indentation) + clean_space(docstring.indentation) ); #[allow(deprecated)] diagnostic.set_fix(Fix::unspecified(Edit::replacement( diff --git a/crates/ruff/src/rules/pydocstyle/rules/no_signature.rs b/crates/ruff/src/rules/pydocstyle/rules/no_signature.rs index fe6b538637..bbca269d99 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/no_signature.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/no_signature.rs @@ -2,8 +2,8 @@ use rustpython_parser::ast::{self, Stmt}; use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_newlines::StrExt; use ruff_python_semantic::definition::{Definition, Member, MemberKind}; +use ruff_python_whitespace::UniversalNewlines; use crate::checkers::ast::Checker; use crate::docstrings::Docstring; diff --git a/crates/ruff/src/rules/pydocstyle/rules/no_surrounding_whitespace.rs b/crates/ruff/src/rules/pydocstyle/rules/no_surrounding_whitespace.rs index 1125f0482b..105df09759 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/no_surrounding_whitespace.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/no_surrounding_whitespace.rs @@ -2,7 +2,7 @@ use ruff_text_size::{TextLen, TextRange}; use ruff_diagnostics::{AutofixKind, Diagnostic, Edit, Fix, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_newlines::NewlineWithTrailingNewline; +use ruff_python_whitespace::NewlineWithTrailingNewline; use crate::checkers::ast::Checker; use crate::docstrings::Docstring; diff --git a/crates/ruff/src/rules/pydocstyle/rules/non_imperative_mood.rs b/crates/ruff/src/rules/pydocstyle/rules/non_imperative_mood.rs index 7df49c500c..5459f09512 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/non_imperative_mood.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/non_imperative_mood.rs @@ -5,11 +5,11 @@ use once_cell::sync::Lazy; use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_newlines::StrExt; use ruff_python_ast::call_path::{from_qualified_name, CallPath}; use ruff_python_ast::cast; use ruff_python_semantic::analyze::visibility::{is_property, is_test}; use ruff_python_semantic::definition::{Definition, Member, MemberKind}; +use ruff_python_whitespace::UniversalNewlines; use crate::checkers::ast::Checker; use crate::docstrings::Docstring; diff --git a/crates/ruff/src/rules/pydocstyle/rules/one_liner.rs b/crates/ruff/src/rules/pydocstyle/rules/one_liner.rs index 6403d9578a..616083fc15 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/one_liner.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/one_liner.rs @@ -1,7 +1,7 @@ use ruff_diagnostics::{AutofixKind, Diagnostic, Edit, Fix, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_newlines::NewlineWithTrailingNewline; use ruff_python_ast::str::{leading_quote, trailing_quote}; +use ruff_python_whitespace::NewlineWithTrailingNewline; use crate::checkers::ast::Checker; use crate::docstrings::Docstring; diff --git a/crates/ruff/src/rules/pydocstyle/rules/sections.rs b/crates/ruff/src/rules/pydocstyle/rules/sections.rs index 458d87ac4b..f101a18246 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/sections.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/sections.rs @@ -8,11 +8,12 @@ use rustpython_parser::ast::{self, Stmt}; use ruff_diagnostics::{AlwaysAutofixableViolation, Violation}; use ruff_diagnostics::{Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; -use ruff_newlines::NewlineWithTrailingNewline; +use ruff_python_ast::cast; +use ruff_python_ast::docstrings::{clean_space, leading_space}; use ruff_python_ast::helpers::identifier_range; -use ruff_python_ast::{cast, whitespace}; use ruff_python_semantic::analyze::visibility::is_staticmethod; use ruff_python_semantic::definition::{Definition, Member, MemberKind}; +use ruff_python_whitespace::NewlineWithTrailingNewline; use ruff_textwrap::dedent; use crate::checkers::ast::Checker; @@ -400,7 +401,7 @@ fn blanks_and_section_underline( // Replace the existing underline with a line of the appropriate length. let content = format!( "{}{}{}", - whitespace::clean(docstring.indentation), + clean_space(docstring.indentation), "-".repeat(context.section_name().len()), checker.stylist.line_ending().as_str() ); @@ -416,7 +417,7 @@ fn blanks_and_section_underline( } if checker.enabled(Rule::SectionUnderlineNotOverIndented) { - let leading_space = whitespace::leading_space(&non_blank_line); + let leading_space = leading_space(&non_blank_line); if leading_space.len() > docstring.indentation.len() { let mut diagnostic = Diagnostic::new( SectionUnderlineNotOverIndented { @@ -433,7 +434,7 @@ fn blanks_and_section_underline( // Replace the existing indentation with whitespace of the appropriate length. #[allow(deprecated)] diagnostic.set_fix(Fix::unspecified(Edit::range_replacement( - whitespace::clean(docstring.indentation), + clean_space(docstring.indentation), range, ))); }; @@ -503,7 +504,7 @@ fn blanks_and_section_underline( let content = format!( "{}{}{}", checker.stylist.line_ending().as_str(), - whitespace::clean(docstring.indentation), + clean_space(docstring.indentation), "-".repeat(context.section_name().len()), ); #[allow(deprecated)] @@ -548,7 +549,7 @@ fn blanks_and_section_underline( let content = format!( "{}{}{}", checker.stylist.line_ending().as_str(), - whitespace::clean(docstring.indentation), + clean_space(docstring.indentation), "-".repeat(context.section_name().len()), ); @@ -601,7 +602,7 @@ fn common_section( } if checker.enabled(Rule::SectionNotOverIndented) { - let leading_space = whitespace::leading_space(context.summary_line()); + let leading_space = leading_space(context.summary_line()); if leading_space.len() > docstring.indentation.len() { let mut diagnostic = Diagnostic::new( SectionNotOverIndented { @@ -611,7 +612,7 @@ fn common_section( ); if checker.patch(diagnostic.kind.rule()) { // Replace the existing indentation with whitespace of the appropriate length. - let content = whitespace::clean(docstring.indentation); + let content = clean_space(docstring.indentation); let fix_range = TextRange::at(context.range().start(), leading_space.text_len()); #[allow(deprecated)] @@ -775,7 +776,7 @@ fn args_section(context: &SectionContext) -> FxHashSet { // Normalize leading whitespace, by removing any lines with less indentation // than the first. - let leading_space = whitespace::leading_space(first_line.as_str()); + let leading_space = leading_space(first_line.as_str()); let relevant_lines = std::iter::once(first_line) .chain(following_lines) .map(|l| l.as_str()) @@ -819,7 +820,7 @@ fn args_section(context: &SectionContext) -> FxHashSet { fn parameters_section(checker: &mut Checker, docstring: &Docstring, context: &SectionContext) { // Collect the list of arguments documented in the docstring. let mut docstring_args: FxHashSet = FxHashSet::default(); - let section_level_indent = whitespace::leading_space(context.summary_line()); + let section_level_indent = leading_space(context.summary_line()); // Join line continuations, then resplit by line. let adjusted_following_lines = context @@ -830,9 +831,9 @@ fn parameters_section(checker: &mut Checker, docstring: &Docstring, context: &Se let mut lines = NewlineWithTrailingNewline::from(&adjusted_following_lines); if let Some(mut current_line) = lines.next() { for next_line in lines { - let current_leading_space = whitespace::leading_space(current_line.as_str()); + let current_leading_space = leading_space(current_line.as_str()); if current_leading_space == section_level_indent - && (whitespace::leading_space(&next_line).len() > current_leading_space.len()) + && (leading_space(&next_line).len() > current_leading_space.len()) && !next_line.trim().is_empty() { let parameters = if let Some(semi_index) = current_line.find(':') { diff --git a/crates/ruff/src/rules/pygrep_hooks/rules/blanket_noqa.rs b/crates/ruff/src/rules/pygrep_hooks/rules/blanket_noqa.rs index 540ccb6fc1..7a51270a0d 100644 --- a/crates/ruff/src/rules/pygrep_hooks/rules/blanket_noqa.rs +++ b/crates/ruff/src/rules/pygrep_hooks/rules/blanket_noqa.rs @@ -4,7 +4,7 @@ use ruff_text_size::{TextLen, TextRange, TextSize}; use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_newlines::Line; +use ruff_python_whitespace::Line; /// ## What it does /// Check for `noqa` annotations that suppress all diagnostics, as opposed to diff --git a/crates/ruff/src/rules/pygrep_hooks/rules/blanket_type_ignore.rs b/crates/ruff/src/rules/pygrep_hooks/rules/blanket_type_ignore.rs index f4675229e0..7563d26b82 100644 --- a/crates/ruff/src/rules/pygrep_hooks/rules/blanket_type_ignore.rs +++ b/crates/ruff/src/rules/pygrep_hooks/rules/blanket_type_ignore.rs @@ -5,7 +5,7 @@ use ruff_text_size::{TextLen, TextRange, TextSize}; use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_newlines::Line; +use ruff_python_whitespace::Line; /// ## What it does /// Check for `type: ignore` annotations that suppress all type warnings, as diff --git a/crates/ruff/src/rules/pylint/rules/bidirectional_unicode.rs b/crates/ruff/src/rules/pylint/rules/bidirectional_unicode.rs index 1bf4583995..3cabdde303 100644 --- a/crates/ruff/src/rules/pylint/rules/bidirectional_unicode.rs +++ b/crates/ruff/src/rules/pylint/rules/bidirectional_unicode.rs @@ -1,6 +1,6 @@ use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_newlines::Line; +use ruff_python_whitespace::Line; const BIDI_UNICODE: [char; 10] = [ '\u{202A}', //{LEFT-TO-RIGHT EMBEDDING} diff --git a/crates/ruff/src/rules/pyupgrade/rules/unnecessary_coding_comment.rs b/crates/ruff/src/rules/pyupgrade/rules/unnecessary_coding_comment.rs index 2da53041e4..7b474ab5f0 100644 --- a/crates/ruff/src/rules/pyupgrade/rules/unnecessary_coding_comment.rs +++ b/crates/ruff/src/rules/pyupgrade/rules/unnecessary_coding_comment.rs @@ -3,7 +3,7 @@ use regex::Regex; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; -use ruff_newlines::Line; +use ruff_python_whitespace::Line; // TODO: document referencing [PEP 3120]: https://peps.python.org/pep-3120/ #[violation] diff --git a/crates/ruff_python_ast/Cargo.toml b/crates/ruff_python_ast/Cargo.toml index 5f5a53ef35..14726020cd 100644 --- a/crates/ruff_python_ast/Cargo.toml +++ b/crates/ruff_python_ast/Cargo.toml @@ -8,7 +8,7 @@ rust-version = { workspace = true } [lib] [dependencies] -ruff_newlines = { path = "../ruff_newlines" } +ruff_python_whitespace = { path = "../ruff_python_whitespace" } ruff_text_size = { workspace = true } anyhow = { workspace = true } diff --git a/crates/ruff_python_ast/src/docstrings.rs b/crates/ruff_python_ast/src/docstrings.rs new file mode 100644 index 0000000000..3722afe636 --- /dev/null +++ b/crates/ruff_python_ast/src/docstrings.rs @@ -0,0 +1,22 @@ +//! Utilities for parsing Python docstrings. + +/// Extract the leading words from a line of text within a Python docstring. +pub fn leading_words(line: &str) -> &str { + let line = line.trim(); + line.find(|char: char| !char.is_alphanumeric() && !char.is_whitespace()) + .map_or(line, |index| &line[..index]) +} + +/// Extract the leading whitespace from a line of text within a Python docstring. +pub fn leading_space(line: &str) -> &str { + line.find(|char: char| !char.is_whitespace()) + .map_or(line, |index| &line[..index]) +} + +/// Replace any non-whitespace characters from an indentation string within a Python docstring. +pub fn clean_space(indentation: &str) -> String { + indentation + .chars() + .map(|char| if char.is_whitespace() { char } else { ' ' }) + .collect() +} diff --git a/crates/ruff_python_ast/src/helpers.rs b/crates/ruff_python_ast/src/helpers.rs index dc620ceded..627b9c2f09 100644 --- a/crates/ruff_python_ast/src/helpers.rs +++ b/crates/ruff_python_ast/src/helpers.rs @@ -4,7 +4,7 @@ use std::path::Path; use itertools::Itertools; use log::error; use num_traits::Zero; -use ruff_newlines::UniversalNewlineIterator; +use ruff_python_whitespace::UniversalNewlineIterator; use ruff_text_size::{TextRange, TextSize}; use rustc_hash::{FxHashMap, FxHashSet}; use rustpython_parser::ast::{ diff --git a/crates/ruff_python_ast/src/lib.rs b/crates/ruff_python_ast/src/lib.rs index 552c7f02bd..be82bf8233 100644 --- a/crates/ruff_python_ast/src/lib.rs +++ b/crates/ruff_python_ast/src/lib.rs @@ -2,6 +2,7 @@ pub mod all; pub mod call_path; pub mod cast; pub mod comparable; +pub mod docstrings; pub mod function; pub mod hashable; pub mod helpers; diff --git a/crates/ruff_python_ast/src/source_code/generator.rs b/crates/ruff_python_ast/src/source_code/generator.rs index 718364591f..f7b4e8949c 100644 --- a/crates/ruff_python_ast/src/source_code/generator.rs +++ b/crates/ruff_python_ast/src/source_code/generator.rs @@ -8,7 +8,7 @@ use rustpython_parser::ast::{ Excepthandler, Expr, Identifier, MatchCase, Operator, Pattern, Stmt, Suite, Withitem, }; -use ruff_newlines::LineEnding; +use ruff_python_whitespace::LineEnding; use crate::source_code::stylist::{Indentation, Quote, Stylist}; @@ -1459,7 +1459,7 @@ mod tests { use rustpython_ast::Suite; use rustpython_parser::Parse; - use ruff_newlines::LineEnding; + use ruff_python_whitespace::LineEnding; use crate::source_code::stylist::{Indentation, Quote}; use crate::source_code::Generator; diff --git a/crates/ruff_python_ast/src/source_code/locator.rs b/crates/ruff_python_ast/src/source_code/locator.rs index cf4faedc81..c2884c1522 100644 --- a/crates/ruff_python_ast/src/source_code/locator.rs +++ b/crates/ruff_python_ast/src/source_code/locator.rs @@ -6,7 +6,7 @@ use memchr::{memchr2, memrchr2}; use once_cell::unsync::OnceCell; use ruff_text_size::{TextLen, TextRange, TextSize}; -use ruff_newlines::find_newline; +use ruff_python_whitespace::find_newline; use crate::source_code::{LineIndex, OneIndexed, SourceCode, SourceLocation}; diff --git a/crates/ruff_python_ast/src/source_code/stylist.rs b/crates/ruff_python_ast/src/source_code/stylist.rs index e0c09ad31a..b5eefa702c 100644 --- a/crates/ruff_python_ast/src/source_code/stylist.rs +++ b/crates/ruff_python_ast/src/source_code/stylist.rs @@ -4,7 +4,7 @@ use std::fmt; use std::ops::Deref; use once_cell::unsync::OnceCell; -use ruff_newlines::{find_newline, LineEnding}; +use ruff_python_whitespace::{find_newline, LineEnding}; use rustpython_literal::escape::Quote as StrQuote; use rustpython_parser::lexer::LexResult; use rustpython_parser::Tok; @@ -166,7 +166,7 @@ mod tests { use rustpython_parser::lexer::lex; use rustpython_parser::Mode; - use ruff_newlines::{find_newline, LineEnding}; + use ruff_python_whitespace::{find_newline, LineEnding}; use crate::source_code::stylist::{Indentation, Quote}; use crate::source_code::{Locator, Stylist}; diff --git a/crates/ruff_python_ast/src/whitespace.rs b/crates/ruff_python_ast/src/whitespace.rs index 0fd47276e2..26bc9130aa 100644 --- a/crates/ruff_python_ast/src/whitespace.rs +++ b/crates/ruff_python_ast/src/whitespace.rs @@ -1,6 +1,8 @@ use ruff_text_size::{TextRange, TextSize}; use rustpython_parser::ast::Ranged; +use ruff_python_whitespace::is_python_whitespace; + use crate::source_code::Locator; /// Extract the leading indentation from a line. @@ -17,41 +19,9 @@ pub fn indentation_at_offset<'a>(locator: &'a Locator, offset: TextSize) -> Opti let line_start = locator.line_start(offset); let indentation = &locator.contents()[TextRange::new(line_start, offset)]; - if indentation.chars().all(char::is_whitespace) { + if indentation.chars().all(is_python_whitespace) { Some(indentation) } else { None } } - -/// Extract the leading words from a line of text. -pub fn leading_words(line: &str) -> &str { - let line = line.trim(); - line.find(|char: char| !char.is_alphanumeric() && !char.is_whitespace()) - .map_or(line, |index| &line[..index]) -} - -/// Extract the leading whitespace from a line of text. -pub fn leading_space(line: &str) -> &str { - line.find(|char: char| !char.is_whitespace()) - .map_or(line, |index| &line[..index]) -} - -/// Replace any non-whitespace characters from an indentation string. -pub fn clean(indentation: &str) -> String { - indentation - .chars() - .map(|char| if char.is_whitespace() { char } else { ' ' }) - .collect() -} - -/// Returns `true` for [whitespace](https://docs.python.org/3/reference/lexical_analysis.html#whitespace-between-tokens) -/// or new-line characters. -pub const fn is_python_whitespace(c: char) -> bool { - matches!( - c, - ' ' | '\n' | '\t' | '\r' | - // Form-feed - '\x0C' - ) -} diff --git a/crates/ruff_python_formatter/Cargo.toml b/crates/ruff_python_formatter/Cargo.toml index 15e3d215a4..bd5526bc43 100644 --- a/crates/ruff_python_formatter/Cargo.toml +++ b/crates/ruff_python_formatter/Cargo.toml @@ -7,7 +7,7 @@ rust-version = { workspace = true } [dependencies] ruff_formatter = { path = "../ruff_formatter" } -ruff_newlines = { path = "../ruff_newlines" } +ruff_python_whitespace = { path = "../ruff_python_whitespace" } ruff_python_ast = { path = "../ruff_python_ast" } ruff_text_size = { workspace = true } diff --git a/crates/ruff_python_formatter/src/comments/placement.rs b/crates/ruff_python_formatter/src/comments/placement.rs index 730544dac1..b5e99495a1 100644 --- a/crates/ruff_python_formatter/src/comments/placement.rs +++ b/crates/ruff_python_formatter/src/comments/placement.rs @@ -1,10 +1,10 @@ use crate::comments::visitor::{CommentPlacement, DecoratedComment}; use crate::comments::CommentTextPosition; use crate::trivia::{SimpleTokenizer, TokenKind}; -use ruff_newlines::StrExt; use ruff_python_ast::node::AnyNodeRef; use ruff_python_ast::source_code::Locator; use ruff_python_ast::whitespace; +use ruff_python_whitespace::UniversalNewlines; use ruff_text_size::{TextRange, TextSize}; use rustpython_parser::ast::Ranged; use std::cmp::Ordering; diff --git a/crates/ruff_python_formatter/src/comments/visitor.rs b/crates/ruff_python_formatter/src/comments/visitor.rs index e3d4cb9934..a6cb1e40cc 100644 --- a/crates/ruff_python_formatter/src/comments/visitor.rs +++ b/crates/ruff_python_formatter/src/comments/visitor.rs @@ -9,7 +9,7 @@ use ruff_python_ast::source_code::{CommentRanges, Locator}; // pre-order. #[allow(clippy::wildcard_imports)] use ruff_python_ast::visitor::preorder::*; -use ruff_python_ast::whitespace::is_python_whitespace; +use ruff_python_whitespace::is_python_whitespace; use ruff_text_size::TextRange; use std::iter::Peekable; diff --git a/crates/ruff_python_formatter/src/trivia.rs b/crates/ruff_python_formatter/src/trivia.rs index 40a767b07e..c1f5bcfdf6 100644 --- a/crates/ruff_python_formatter/src/trivia.rs +++ b/crates/ruff_python_formatter/src/trivia.rs @@ -1,7 +1,9 @@ -use ruff_python_ast::whitespace::is_python_whitespace; -use ruff_text_size::{TextLen, TextRange, TextSize}; use std::str::Chars; +use ruff_text_size::{TextLen, TextRange, TextSize}; + +use ruff_python_whitespace::is_python_whitespace; + /// Searches for the first non-trivia character in `range`. /// /// The search skips over any whitespace and comments. @@ -528,10 +530,11 @@ impl<'a> Cursor<'a> { #[cfg(test)] mod tests { - use crate::trivia::{lines_after, lines_before, SimpleTokenizer, Token}; use insta::assert_debug_snapshot; use ruff_text_size::{TextLen, TextRange, TextSize}; + use crate::trivia::{lines_after, lines_before, SimpleTokenizer, Token}; + struct TokenizationTestCase { source: &'static str, range: TextRange, diff --git a/crates/ruff_newlines/Cargo.toml b/crates/ruff_python_whitespace/Cargo.toml similarity index 86% rename from crates/ruff_newlines/Cargo.toml rename to crates/ruff_python_whitespace/Cargo.toml index a2dfe8959d..584418405b 100644 --- a/crates/ruff_newlines/Cargo.toml +++ b/crates/ruff_python_whitespace/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "ruff_newlines" +name = "ruff_python_whitespace" version = "0.0.0" publish = false edition = { workspace = true } diff --git a/crates/ruff_python_whitespace/src/lib.rs b/crates/ruff_python_whitespace/src/lib.rs new file mode 100644 index 0000000000..36d3ddee97 --- /dev/null +++ b/crates/ruff_python_whitespace/src/lib.rs @@ -0,0 +1,5 @@ +mod newlines; +mod whitespace; + +pub use newlines::*; +pub use whitespace::*; diff --git a/crates/ruff_newlines/src/lib.rs b/crates/ruff_python_whitespace/src/newlines.rs similarity index 99% rename from crates/ruff_newlines/src/lib.rs rename to crates/ruff_python_whitespace/src/newlines.rs index 919d4ac231..73043c5c0b 100644 --- a/crates/ruff_newlines/src/lib.rs +++ b/crates/ruff_python_whitespace/src/newlines.rs @@ -5,11 +5,11 @@ use memchr::{memchr2, memrchr2}; use ruff_text_size::{TextLen, TextRange, TextSize}; /// Extension trait for [`str`] that provides a [`UniversalNewlineIterator`]. -pub trait StrExt { +pub trait UniversalNewlines { fn universal_newlines(&self) -> UniversalNewlineIterator<'_>; } -impl StrExt for str { +impl UniversalNewlines for str { fn universal_newlines(&self) -> UniversalNewlineIterator<'_> { UniversalNewlineIterator::from(self) } @@ -22,7 +22,7 @@ impl StrExt for str { /// /// ```rust /// # use ruff_text_size::TextSize; -/// # use ruff_newlines::{Line, UniversalNewlineIterator}; +/// # use ruff_python_whitespace::{Line, UniversalNewlineIterator}; /// let mut lines = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop"); /// /// assert_eq!(lines.next_back(), Some(Line::new("bop", TextSize::from(14)))); diff --git a/crates/ruff_python_whitespace/src/whitespace.rs b/crates/ruff_python_whitespace/src/whitespace.rs new file mode 100644 index 0000000000..00139d3640 --- /dev/null +++ b/crates/ruff_python_whitespace/src/whitespace.rs @@ -0,0 +1,15 @@ +/// Returns `true` for [whitespace](https://docs.python.org/3/reference/lexical_analysis.html#whitespace-between-tokens) +/// characters. +pub const fn is_python_whitespace(c: char) -> bool { + matches!( + c, + // Space, tab, or form-feed + ' ' | '\t' | '\x0C' + ) +} + +/// Extract the leading indentation from a line. +pub fn leading_indentation(line: &str) -> &str { + line.find(|char: char| !is_python_whitespace(char)) + .map_or(line, |index| &line[..index]) +} diff --git a/crates/ruff_textwrap/Cargo.toml b/crates/ruff_textwrap/Cargo.toml index a5217c16cc..864a259d33 100644 --- a/crates/ruff_textwrap/Cargo.toml +++ b/crates/ruff_textwrap/Cargo.toml @@ -6,5 +6,5 @@ edition = { workspace = true } rust-version = { workspace = true } [dependencies] -ruff_newlines = { path = "../ruff_newlines" } +ruff_python_whitespace = { path = "../ruff_python_whitespace" } ruff_text_size = { workspace = true } diff --git a/crates/ruff_textwrap/src/lib.rs b/crates/ruff_textwrap/src/lib.rs index bd803851df..f1e35dba97 100644 --- a/crates/ruff_textwrap/src/lib.rs +++ b/crates/ruff_textwrap/src/lib.rs @@ -4,7 +4,7 @@ use std::borrow::Cow; use std::cmp; -use ruff_newlines::StrExt; +use ruff_python_whitespace::UniversalNewlines; /// Indent each line by the given prefix. /// diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index fb4e5f09a8..677868be0c 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -1061,7 +1061,7 @@ dependencies = [ "ruff_cache", "ruff_diagnostics", "ruff_macros", - "ruff_newlines", + "ruff_python_whitespace", "ruff_python_ast", "ruff_python_semantic", "ruff_python_stdlib", @@ -1149,7 +1149,7 @@ dependencies = [ ] [[package]] -name = "ruff_newlines" +name = "ruff_python_whitespace" version = "0.0.0" dependencies = [ "memchr", @@ -1169,7 +1169,7 @@ dependencies = [ "num-bigint", "num-traits", "once_cell", - "ruff_newlines", + "ruff_python_whitespace", "ruff_text_size", "rustc-hash", "rustpython-ast", @@ -1190,7 +1190,7 @@ dependencies = [ "itertools", "once_cell", "ruff_formatter", - "ruff_newlines", + "ruff_python_whitespace", "ruff_python_ast", "ruff_text_size", "rustc-hash", @@ -1242,7 +1242,7 @@ dependencies = [ name = "ruff_textwrap" version = "0.0.0" dependencies = [ - "ruff_newlines", + "ruff_python_whitespace", "ruff_text_size", ]