Move `utf8-encoding-declaration` to token-based rules (#6110)

Closes #5979.
This commit is contained in:
Charlie Marsh 2023-07-26 18:42:37 -04:00 committed by GitHub
parent 8113615534
commit 64a186272f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 56 additions and 46 deletions

View File

@ -0,0 +1,4 @@
print('Hello world')
#!/usr/bin/python
# -*- coding: utf-8 -*-

View File

@ -13,7 +13,6 @@ use crate::rules::pycodestyle::rules::{
tab_indentation, trailing_whitespace,
};
use crate::rules::pylint;
use crate::rules::pyupgrade::rules::unnecessary_coding_comment;
use crate::settings::Settings;
pub(crate) fn check_physical_lines(
@ -28,7 +27,6 @@ pub(crate) fn check_physical_lines(
let enforce_doc_line_too_long = settings.rules.enabled(Rule::DocLineTooLong);
let enforce_line_too_long = settings.rules.enabled(Rule::LineTooLong);
let enforce_no_newline_at_end_of_file = settings.rules.enabled(Rule::MissingNewlineAtEndOfFile);
let enforce_unnecessary_coding_comment = settings.rules.enabled(Rule::UTF8EncodingDeclaration);
let enforce_mixed_spaces_and_tabs = settings.rules.enabled(Rule::MixedSpacesAndTabs);
let enforce_bidirectional_unicode = settings.rules.enabled(Rule::BidirectionalUnicode);
let enforce_trailing_whitespace = settings.rules.enabled(Rule::TrailingWhitespace);
@ -37,27 +35,9 @@ pub(crate) fn check_physical_lines(
let enforce_tab_indentation = settings.rules.enabled(Rule::TabIndentation);
let enforce_copyright_notice = settings.rules.enabled(Rule::MissingCopyrightNotice);
let fix_unnecessary_coding_comment = settings.rules.should_fix(Rule::UTF8EncodingDeclaration);
let mut commented_lines_iter = indexer.comment_ranges().iter().peekable();
let mut doc_lines_iter = doc_lines.iter().peekable();
for (index, line) in locator.contents().universal_newlines().enumerate() {
while commented_lines_iter
.next_if(|comment_range| line.range().contains_range(**comment_range))
.is_some()
{
if enforce_unnecessary_coding_comment {
if index < 2 {
if let Some(diagnostic) =
unnecessary_coding_comment(&line, fix_unnecessary_coding_comment)
{
diagnostics.push(diagnostic);
}
}
}
}
for line in locator.contents().universal_newlines() {
while doc_lines_iter
.next_if(|doc_line_start| line.range().contains_inclusive(**doc_line_start))
.is_some()

View File

@ -69,6 +69,10 @@ pub(crate) fn check_tokens(
eradicate::rules::commented_out_code(&mut diagnostics, locator, indexer, settings);
}
if settings.rules.enabled(Rule::UTF8EncodingDeclaration) {
pyupgrade::rules::unnecessary_coding_comment(&mut diagnostics, locator, indexer, settings);
}
if settings.rules.enabled(Rule::InvalidEscapeSequence) {
for (tok, range) in tokens.iter().flatten() {
if tok.is_string() {

View File

@ -246,8 +246,7 @@ impl Rule {
| Rule::MissingNewlineAtEndOfFile
| Rule::MixedSpacesAndTabs
| Rule::TabIndentation
| Rule::TrailingWhitespace
| Rule::UTF8EncodingDeclaration => LintSource::PhysicalLines,
| Rule::TrailingWhitespace => LintSource::PhysicalLines,
Rule::AmbiguousUnicodeCharacterComment
| Rule::AmbiguousUnicodeCharacterDocstring
| Rule::AmbiguousUnicodeCharacterString
@ -289,7 +288,8 @@ impl Rule {
| Rule::SingleLineImplicitStringConcatenation
| Rule::TrailingCommaOnBareTuple
| Rule::TypeCommentInStub
| Rule::UselessSemicolon => LintSource::Tokens,
| Rule::UselessSemicolon
| Rule::UTF8EncodingDeclaration => LintSource::Tokens,
Rule::IOError => LintSource::Io,
Rule::UnsortedImports | Rule::MissingRequiredImport => LintSource::Imports,
Rule::ImplicitNamespacePackage | Rule::InvalidModuleName => LintSource::Filesystem,

View File

@ -68,6 +68,7 @@ mod tests {
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_2.py"))]
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_3.py"))]
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_4.py"))]
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_5.py"))]
#[test_case(Rule::UnicodeKindPrefix, Path::new("UP025.py"))]
#[test_case(Rule::UnnecessaryBuiltinImport, Path::new("UP029.py"))]
#[test_case(Rule::UnnecessaryClassParentheses, Path::new("UP039.py"))]

View File

@ -3,7 +3,11 @@ use regex::Regex;
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_source_file::Line;
use ruff_python_index::Indexer;
use ruff_source_file::Locator;
use crate::registry::AsRule;
use crate::settings::Settings;
/// ## What it does
/// Checks for unnecessary UTF-8 encoding declarations.
@ -43,18 +47,31 @@ static CODING_COMMENT_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^[ \t\f]*#.*?coding[:=][ \t]*utf-?8").unwrap());
/// UP009
pub(crate) fn unnecessary_coding_comment(line: &Line, autofix: bool) -> Option<Diagnostic> {
// PEP3120 makes utf-8 the default encoding.
if CODING_COMMENT_REGEX.is_match(line.as_str()) {
let mut diagnostic = Diagnostic::new(UTF8EncodingDeclaration, line.full_range());
if autofix {
pub(crate) fn unnecessary_coding_comment(
diagnostics: &mut Vec<Diagnostic>,
locator: &Locator,
indexer: &Indexer,
settings: &Settings,
) {
// The coding comment must be on one of the first two lines. Since each comment spans at least
// one line, we only need to check the first two comments at most.
for range in indexer.comment_ranges().iter().take(2) {
let line = locator.slice(*range);
if CODING_COMMENT_REGEX.is_match(line) {
#[allow(deprecated)]
let line = locator.compute_line_index(range.start());
if line.to_zero_indexed() > 1 {
continue;
}
let mut diagnostic = Diagnostic::new(UTF8EncodingDeclaration, *range);
if settings.rules.should_fix(diagnostic.kind.rule()) {
diagnostic.set_fix(Fix::automatic(Edit::deletion(
line.start(),
line.full_end(),
range.start(),
locator.full_line_end(range.end()),
)));
}
Some(diagnostic)
} else {
None
diagnostics.push(diagnostic);
}
}
}

View File

@ -3,9 +3,9 @@ source: crates/ruff/src/rules/pyupgrade/mod.rs
---
UP009_0.py:1:1: UP009 [*] UTF-8 encoding declaration is unnecessary
|
1 | / # coding=utf8
2 | |
| |_^ UP009
1 | # coding=utf8
| ^^^^^^^^^^^^^ UP009
2 |
3 | print("Hello world")
|
= help: Remove unnecessary coding comment

View File

@ -4,9 +4,9 @@ source: crates/ruff/src/rules/pyupgrade/mod.rs
UP009_1.py:2:1: UP009 [*] UTF-8 encoding declaration is unnecessary
|
1 | #!/usr/bin/python
2 | / # -*- coding: utf-8 -*-
3 | |
| |_^ UP009
2 | # -*- coding: utf-8 -*-
| ^^^^^^^^^^^^^^^^^^^^^^^ UP009
3 |
4 | print('Hello world')
|
= help: Remove unnecessary coding comment

View File

@ -0,0 +1,4 @@
---
source: crates/ruff/src/rules/pyupgrade/mod.rs
---