Use unicode-width to determine line-length instead of character count (#3714)

This commit is contained in:
Micha Reiser 2023-03-24 22:17:05 +01:00 committed by GitHub
parent dc4d7619ee
commit 7af83460ce
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 59 additions and 34 deletions

1
Cargo.lock generated
View File

@ -2033,6 +2033,7 @@ dependencies = [
"textwrap", "textwrap",
"thiserror", "thiserror",
"toml", "toml",
"unicode-width",
] ]
[[package]] [[package]]

View File

@ -58,6 +58,7 @@ rustpython-parser = { workspace = true }
schemars = { workspace = true } schemars = { workspace = true }
semver = { version = "1.0.16" } semver = { version = "1.0.16" }
serde = { workspace = true } serde = { workspace = true }
serde_json = { workspace = true }
shellexpand = { workspace = true } shellexpand = { workspace = true }
smallvec = { version = "1.10.0" } smallvec = { version = "1.10.0" }
strum = { workspace = true } strum = { workspace = true }
@ -65,7 +66,7 @@ strum_macros = { workspace = true }
textwrap = { workspace = true } textwrap = { workspace = true }
thiserror = { version = "1.0.38" } thiserror = { version = "1.0.38" }
toml = { workspace = true } toml = { workspace = true }
serde_json = { workspace = true } unicode-width = "0.1.10"
[dev-dependencies] [dev-dependencies]
insta = { workspace = true, features = ["yaml", "redactions"] } insta = { workspace = true, features = ["yaml", "redactions"] }

View File

@ -211,7 +211,7 @@ mod tests {
flags::Autofix::Enabled, flags::Autofix::Enabled,
) )
}; };
assert!(!check_with_max_line_length(6).is_empty()); assert_eq!(check_with_max_line_length(8), vec![]);
assert!(check_with_max_line_length(7).is_empty()); assert_eq!(check_with_max_line_length(8), vec![]);
} }
} }

View File

@ -1,6 +1,7 @@
use log::error; use log::error;
use rustc_hash::FxHashSet; use rustc_hash::FxHashSet;
use rustpython_parser::ast::{Cmpop, Constant, Expr, ExprContext, ExprKind, Stmt, StmtKind}; use rustpython_parser::ast::{Cmpop, Constant, Expr, ExprContext, ExprKind, Stmt, StmtKind};
use unicode_width::UnicodeWidthStr;
use ruff_diagnostics::{AutofixKind, Diagnostic, Fix, Violation}; use ruff_diagnostics::{AutofixKind, Diagnostic, Fix, Violation};
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
@ -287,7 +288,7 @@ pub fn nested_if_statements(
if fix if fix
.content .content
.universal_newlines() .universal_newlines()
.all(|line| line.len() <= checker.settings.line_length) .all(|line| line.width() <= checker.settings.line_length)
{ {
diagnostic.amend(fix); diagnostic.amend(fix);
} }
@ -490,7 +491,7 @@ pub fn use_ternary_operator(checker: &mut Checker, stmt: &Stmt, parent: Option<&
let contents = unparse_stmt(&ternary, checker.stylist); let contents = unparse_stmt(&ternary, checker.stylist);
// Don't flag if the resulting expression would exceed the maximum line length. // Don't flag if the resulting expression would exceed the maximum line length.
if stmt.location.column() + contents.len() > checker.settings.line_length { if stmt.location.column() + contents.width() > checker.settings.line_length {
return; return;
} }
@ -839,7 +840,7 @@ pub fn use_dict_get_with_default(
); );
// Don't flag if the resulting expression would exceed the maximum line length. // Don't flag if the resulting expression would exceed the maximum line length.
if stmt.location.column() + contents.len() > checker.settings.line_length { if stmt.location.column() + contents.width() > checker.settings.line_length {
return; return;
} }

View File

@ -1,5 +1,6 @@
use log::error; use log::error;
use rustpython_parser::ast::{Located, Stmt, StmtKind, Withitem}; use rustpython_parser::ast::{Located, Stmt, StmtKind, Withitem};
use unicode_width::UnicodeWidthStr;
use ruff_diagnostics::Diagnostic; use ruff_diagnostics::Diagnostic;
use ruff_diagnostics::{AutofixKind, Violation}; use ruff_diagnostics::{AutofixKind, Violation};
@ -117,7 +118,7 @@ pub fn multiple_with_statements(
if fix if fix
.content .content
.universal_newlines() .universal_newlines()
.all(|line| line.len() <= checker.settings.line_length) .all(|line| line.width() <= checker.settings.line_length)
{ {
diagnostic.amend(fix); diagnostic.amend(fix);
} }

View File

@ -1,6 +1,7 @@
use rustpython_parser::ast::{ use rustpython_parser::ast::{
Cmpop, Comprehension, Constant, Expr, ExprContext, ExprKind, Location, Stmt, StmtKind, Unaryop, Cmpop, Comprehension, Constant, Expr, ExprContext, ExprKind, Location, Stmt, StmtKind, Unaryop,
}; };
use unicode_width::UnicodeWidthStr;
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix};
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
@ -211,7 +212,7 @@ pub fn convert_for_loop_to_any_all(checker: &mut Checker, stmt: &Stmt, sibling:
); );
// Don't flag if the resulting expression would exceed the maximum line length. // Don't flag if the resulting expression would exceed the maximum line length.
if stmt.location.column() + contents.len() > checker.settings.line_length { if stmt.location.column() + contents.width() > checker.settings.line_length {
return; return;
} }
@ -288,7 +289,7 @@ pub fn convert_for_loop_to_any_all(checker: &mut Checker, stmt: &Stmt, sibling:
); );
// Don't flag if the resulting expression would exceed the maximum line length. // Don't flag if the resulting expression would exceed the maximum line length.
if stmt.location.column() + contents.len() > checker.settings.line_length { if stmt.location.column() + contents.width() > checker.settings.line_length {
return; return;
} }

View File

@ -1,4 +1,5 @@
use ruff_python_ast::source_code::Stylist; use ruff_python_ast::source_code::Stylist;
use unicode_width::UnicodeWidthStr;
use super::types::{AliasData, CommentSet, ImportFromData, Importable}; use super::types::{AliasData, CommentSet, ImportFromData, Importable};
@ -69,9 +70,9 @@ pub fn format_import_from(
|| aliases.len() == 1 || aliases.len() == 1
|| aliases.iter().all(|(alias, _)| alias.asname.is_none())) || aliases.iter().all(|(alias, _)| alias.asname.is_none()))
{ {
let (single_line, import_length) = let (single_line, import_width) =
format_single_line(import_from, comments, aliases, is_first, stylist); format_single_line(import_from, comments, aliases, is_first, stylist);
if import_length <= line_length || aliases.iter().any(|(alias, _)| alias.name == "*") { if import_width <= line_length || aliases.iter().any(|(alias, _)| alias.name == "*") {
return single_line; return single_line;
} }
} }
@ -90,7 +91,7 @@ fn format_single_line(
stylist: &Stylist, stylist: &Stylist,
) -> (String, usize) { ) -> (String, usize) {
let mut output = String::with_capacity(CAPACITY); let mut output = String::with_capacity(CAPACITY);
let mut line_length = 0; let mut line_width = 0;
if !is_first && !comments.atop.is_empty() { if !is_first && !comments.atop.is_empty() {
output.push_str(stylist.line_ending()); output.push_str(stylist.line_ending());
@ -104,28 +105,28 @@ fn format_single_line(
output.push_str("from "); output.push_str("from ");
output.push_str(&module_name); output.push_str(&module_name);
output.push_str(" import "); output.push_str(" import ");
line_length += 5 + module_name.len() + 8; line_width += 5 + module_name.width() + 8;
for (index, (AliasData { name, asname }, comments)) in aliases.iter().enumerate() { for (index, (AliasData { name, asname }, comments)) in aliases.iter().enumerate() {
if let Some(asname) = asname { if let Some(asname) = asname {
output.push_str(name); output.push_str(name);
output.push_str(" as "); output.push_str(" as ");
output.push_str(asname); output.push_str(asname);
line_length += name.len() + 4 + asname.len(); line_width += name.width() + 4 + asname.width();
} else { } else {
output.push_str(name); output.push_str(name);
line_length += name.len(); line_width += name.width();
} }
if index < aliases.len() - 1 { if index < aliases.len() - 1 {
output.push_str(", "); output.push_str(", ");
line_length += 2; line_width += 2;
} }
for comment in &comments.inline { for comment in &comments.inline {
output.push(' '); output.push(' ');
output.push(' '); output.push(' ');
output.push_str(comment); output.push_str(comment);
line_length += 2 + comment.len(); line_width += 2 + comment.width();
} }
} }
@ -133,12 +134,12 @@ fn format_single_line(
output.push(' '); output.push(' ');
output.push(' '); output.push(' ');
output.push_str(comment); output.push_str(comment);
line_length += 2 + comment.len(); line_width += 2 + comment.width();
} }
output.push_str(stylist.line_ending()); output.push_str(stylist.line_ending());
(output, line_length) (output, line_width)
} }
/// Format an import-from statement in multi-line format. /// Format an import-from statement in multi-line format.

View File

@ -25,12 +25,12 @@ static URL_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^https?://\S+$").unwra
pub fn is_overlong( pub fn is_overlong(
line: &str, line: &str,
line_length: usize, line_width: usize,
limit: usize, limit: usize,
ignore_overlong_task_comments: bool, ignore_overlong_task_comments: bool,
task_tags: &[String], task_tags: &[String],
) -> bool { ) -> bool {
if line_length <= limit { if line_width <= limit {
return false; return false;
} }

View File

@ -2,6 +2,7 @@ use bitflags::bitflags;
use rustpython_parser::ast::Location; use rustpython_parser::ast::Location;
use rustpython_parser::lexer::LexResult; use rustpython_parser::lexer::LexResult;
use rustpython_parser::Tok; use rustpython_parser::Tok;
use unicode_width::UnicodeWidthStr;
use ruff_python_ast::source_code::Locator; use ruff_python_ast::source_code::Locator;
use ruff_python_ast::types::Range; use ruff_python_ast::types::Range;
@ -86,7 +87,7 @@ fn build_line<'a>(
// TODO(charlie): "Mute" strings. // TODO(charlie): "Mute" strings.
let s; let s;
let text = if let Tok::String { value, .. } = tok { let text = if let Tok::String { value, .. } = tok {
s = format!("\"{}\"", "x".repeat(value.len()).clone()); s = format!("\"{}\"", "x".repeat(value.width()).clone());
&s &s
} else { } else {
locator.slice(Range { locator.slice(Range {

View File

@ -1,4 +1,5 @@
use rustpython_parser::ast::Location; use rustpython_parser::ast::Location;
use unicode_width::UnicodeWidthStr;
use ruff_diagnostics::{Diagnostic, Violation}; use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
@ -34,8 +35,8 @@ pub struct DocLineTooLong(pub usize, pub usize);
impl Violation for DocLineTooLong { impl Violation for DocLineTooLong {
#[derive_message_formats] #[derive_message_formats]
fn message(&self) -> String { fn message(&self) -> String {
let DocLineTooLong(length, limit) = self; let DocLineTooLong(width, limit) = self;
format!("Doc line too long ({length} > {limit} characters)") format!("Doc line too long ({width} > {limit} characters)")
} }
} }
@ -45,19 +46,19 @@ pub fn doc_line_too_long(lineno: usize, line: &str, settings: &Settings) -> Opti
return None; return None;
}; };
let line_length = line.chars().count(); let line_width = line.width();
if is_overlong( if is_overlong(
line, line,
line_length, line_width,
limit, limit,
settings.pycodestyle.ignore_overlong_task_comments, settings.pycodestyle.ignore_overlong_task_comments,
&settings.task_tags, &settings.task_tags,
) { ) {
Some(Diagnostic::new( Some(Diagnostic::new(
DocLineTooLong(line_length, limit), DocLineTooLong(line_width, limit),
Range::new( Range::new(
Location::new(lineno + 1, limit), Location::new(lineno + 1, limit),
Location::new(lineno + 1, line_length), Location::new(lineno + 1, line.chars().count()),
), ),
)) ))
} else { } else {

View File

@ -1,4 +1,5 @@
use rustpython_parser::ast::Location; use rustpython_parser::ast::Location;
use unicode_width::UnicodeWidthStr;
use ruff_diagnostics::{Diagnostic, Violation}; use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation}; use ruff_macros::{derive_message_formats, violation};
@ -31,27 +32,27 @@ pub struct LineTooLong(pub usize, pub usize);
impl Violation for LineTooLong { impl Violation for LineTooLong {
#[derive_message_formats] #[derive_message_formats]
fn message(&self) -> String { fn message(&self) -> String {
let LineTooLong(length, limit) = self; let LineTooLong(width, limit) = self;
format!("Line too long ({length} > {limit} characters)") format!("Line too long ({width} > {limit} characters)")
} }
} }
/// E501 /// E501
pub fn line_too_long(lineno: usize, line: &str, settings: &Settings) -> Option<Diagnostic> { pub fn line_too_long(lineno: usize, line: &str, settings: &Settings) -> Option<Diagnostic> {
let line_length = line.chars().count(); let line_width = line.width();
let limit = settings.line_length; let limit = settings.line_length;
if is_overlong( if is_overlong(
line, line,
line_length, line_width,
limit, limit,
settings.pycodestyle.ignore_overlong_task_comments, settings.pycodestyle.ignore_overlong_task_comments,
&settings.task_tags, &settings.task_tags,
) { ) {
Some(Diagnostic::new( Some(Diagnostic::new(
LineTooLong(line_length, limit), LineTooLong(line_width, limit),
Range::new( Range::new(
Location::new(lineno + 1, limit), Location::new(lineno + 1, limit),
Location::new(lineno + 1, line_length), Location::new(lineno + 1, line.chars().count()),
), ),
)) ))
} else { } else {

View File

@ -15,6 +15,19 @@ expression: diagnostics
column: 123 column: 123
fix: ~ fix: ~
parent: ~ parent: ~
- kind:
name: LineTooLong
body: Line too long (95 > 88 characters)
suggestion: ~
fixable: false
location:
row: 16
column: 88
end_location:
row: 16
column: 88
fix: ~
parent: ~
- kind: - kind:
name: LineTooLong name: LineTooLong
body: Line too long (127 > 88 characters) body: Line too long (127 > 88 characters)

View File

@ -32,3 +32,6 @@ mimalloc = "0.1.34"
[target.'cfg(all(not(target_os = "windows"), not(target_os = "openbsd"), any(target_arch = "x86_64", target_arch = "aarch64", target_arch = "powerpc64")))'.dev-dependencies] [target.'cfg(all(not(target_os = "windows"), not(target_os = "openbsd"), any(target_arch = "x86_64", target_arch = "aarch64", target_arch = "powerpc64")))'.dev-dependencies]
tikv-jemallocator = "0.5.0" tikv-jemallocator = "0.5.0"
[features]
logical_lines = [ "ruff/logical_lines" ]