Files
ruff/src/eradicate/detection.rs
Martin Fischer 8d56e412ef Add task-tags setting
Programmers often leave comments to themselves and others such as:

    # TODO: Use a faster algorithm?

The keywords used to prefix such comments are just a convention and vary
from project to project. Other common keywords include FIXME and HACK.

The keywords in use for the codebase are of interest to ruff because
ruff does also lint comments. For example the ERA lint detects
commented-out code but ignores comments starting with such a keyword.
Previously the ERA lint simply hardcoded the regular expression
TODO|FIXME|XXX to achieve that. This commit introduces a new `task-tags`
setting to make this configurable (and to allow other comment lints to
recognize the same set of keywords).

The term "task tags" has probably been popularized by the Eclipse
IDE.[1] For Python there has been the proposal PEP 350[2], which
referred to such keywords as "codetags". That proposal however has been
rejected. We are choosing the term "task tags" over "code tags" because
the former is more descriptive: a task tag describes a task.

While according to the PEP 350 such keywords are also sometimes used for
non-tasks e.g. NOBUG to describe a well-known problem that will never be
addressed due to design problems or domain limitations, such keywords
are so rare that we are neglecting them here in favor of more
descriptive terminology. The vast majority of such keywords does
describe tasks, so naming the setting "task-tags" is apt.

[1]: https://www.eclipse.org/pdt/help/html/task_tags.htm
[2]: https://peps.python.org/pep-0350/

Co-authored-by: Charlie Marsh <charlie.r.marsh@gmail.com>
2023-01-04 23:54:50 -05:00

269 lines
9.8 KiB
Rust

/// See: [eradicate.py](https://github.com/myint/eradicate/blob/98f199940979c94447a461d50d27862b118b282d/eradicate.py)
use once_cell::sync::Lazy;
use regex::Regex;
static ALLOWLIST_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(
r"^(?i)(?:pylint|pyright|noqa|nosec|type:\s*ignore|fmt:\s*(on|off)|isort:\s*(on|off|skip|skip_file|split|dont-add-imports(:\s*\[.*?])?))"
).unwrap()
});
static BRACKET_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[()\[\]{}\s]+$").unwrap());
static CODE_INDICATORS: &[&str] = &[
"(", ")", "[", "]", "{", "}", ":", "=", "%", "print", "return", "break", "continue", "import",
];
static CODE_KEYWORDS: Lazy<Vec<Regex>> = Lazy::new(|| {
vec![
Regex::new(r"^\s*elif\s+.*\s*:\s*$").unwrap(),
Regex::new(r"^\s*else\s*:\s*$").unwrap(),
Regex::new(r"^\s*try\s*:\s*$").unwrap(),
Regex::new(r"^\s*finally\s*:\s*$").unwrap(),
Regex::new(r"^\s*except\s+.*\s*:\s*$").unwrap(),
]
});
static CODING_COMMENT_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)").unwrap());
static HASH_NUMBER: Lazy<Regex> = Lazy::new(|| Regex::new(r"#\d").unwrap());
static MULTILINE_ASSIGNMENT_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^\s*([(\[]\s*)?(\w+\s*,\s*)*\w+\s*([)\]]\s*)?=.*[(\[{]$").unwrap());
static PARTIAL_DICTIONARY_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"^\s*['"]\w+['"]\s*:.+[,{]\s*$"#).unwrap());
static PRINT_RETURN_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^(print|return)\b\s*").unwrap());
/// Returns `true` if a comment contains Python code.
pub fn comment_contains_code(line: &str, task_tags: &[String]) -> bool {
let line = if let Some(line) = line.trim().strip_prefix('#') {
line.trim()
} else {
return false;
};
// Ignore non-comment related hashes (e.g., "# Issue #999").
if HASH_NUMBER.is_match(line) {
return false;
}
// Ignore whitelisted comments.
if ALLOWLIST_REGEX.is_match(line) {
return false;
}
if let Some(first) = line.split(&[' ', ':']).next() {
if task_tags.iter().any(|tag| tag == first) {
return false;
}
}
if CODING_COMMENT_REGEX.is_match(line) {
return false;
}
// Check that this is possibly code.
if CODE_INDICATORS.iter().all(|symbol| !line.contains(symbol)) {
return false;
}
if multiline_case(line) {
return true;
}
if CODE_KEYWORDS.iter().any(|symbol| symbol.is_match(line)) {
return true;
}
let line = PRINT_RETURN_REGEX.replace_all(line, "");
if PARTIAL_DICTIONARY_REGEX.is_match(&line) {
return true;
}
// Finally, compile the source code.
rustpython_parser::parser::parse_program(&line, "<filename>").is_ok()
}
/// Returns `true` if a line is probably part of some multiline code.
fn multiline_case(line: &str) -> bool {
if line.ends_with('\\') {
return true;
}
if MULTILINE_ASSIGNMENT_REGEX.is_match(line) {
return true;
}
if BRACKET_REGEX.is_match(line) {
return true;
}
false
}
#[cfg(test)]
mod tests {
use crate::eradicate::detection::comment_contains_code;
#[test]
fn comment_contains_code_basic() {
assert!(comment_contains_code("# x = 1", &[]));
assert!(comment_contains_code("#from foo import eradicate", &[]));
assert!(comment_contains_code("#import eradicate", &[]));
assert!(comment_contains_code(r#"#"key": value,"#, &[]));
assert!(comment_contains_code(r#"#"key": "value","#, &[]));
assert!(comment_contains_code(r#"#"key": 1 + 1,"#, &[]));
assert!(comment_contains_code("#'key': 1 + 1,", &[]));
assert!(comment_contains_code(r#"#"key": {"#, &[]));
assert!(comment_contains_code("#}", &[]));
assert!(comment_contains_code("#} )]", &[]));
assert!(!comment_contains_code("#", &[]));
assert!(!comment_contains_code("# This is a (real) comment.", &[]));
assert!(!comment_contains_code("# 123", &[]));
assert!(!comment_contains_code("# 123.1", &[]));
assert!(!comment_contains_code("# 1, 2, 3", &[]));
assert!(!comment_contains_code("x = 1 # x = 1", &[]));
assert!(!comment_contains_code(
"# pylint: disable=redefined-outer-name",
&[]
),);
assert!(!comment_contains_code(
"# Issue #999: This is not code",
&[]
));
// TODO(charlie): This should be `true` under aggressive mode.
assert!(!comment_contains_code("#},", &[]));
}
#[test]
fn comment_contains_code_with_print() {
assert!(comment_contains_code("#print", &[]));
assert!(comment_contains_code("#print(1)", &[]));
assert!(comment_contains_code("#print 1", &[]));
assert!(!comment_contains_code("#to print", &[]));
}
#[test]
fn comment_contains_code_with_return() {
assert!(comment_contains_code("#return x", &[]));
assert!(!comment_contains_code("#to print", &[]));
}
#[test]
fn comment_contains_code_with_multiline() {
assert!(comment_contains_code("#else:", &[]));
assert!(comment_contains_code("# else : ", &[]));
assert!(comment_contains_code(r#"# "foo %d" % \\"#, &[]));
assert!(comment_contains_code("#elif True:", &[]));
assert!(comment_contains_code("#x = foo(", &[]));
assert!(comment_contains_code("#except Exception:", &[]));
assert!(!comment_contains_code("# this is = to that :(", &[]));
assert!(!comment_contains_code("#else", &[]));
assert!(!comment_contains_code("#or else:", &[]));
assert!(!comment_contains_code("#else True:", &[]));
// Unpacking assignments
assert!(comment_contains_code(
"# user_content_type, _ = TimelineEvent.objects.using(db_alias).get_or_create(",
&[]
),);
assert!(comment_contains_code(
"# (user_content_type, _) = TimelineEvent.objects.using(db_alias).get_or_create(",
&[]
),);
assert!(comment_contains_code(
"# ( user_content_type , _ )= TimelineEvent.objects.using(db_alias).get_or_create(",
&[]
));
assert!(comment_contains_code(
"# app_label=\"core\", model=\"user\"",
&[]
));
assert!(comment_contains_code("# )", &[]));
// TODO(charlie): This should be `true` under aggressive mode.
assert!(!comment_contains_code("#def foo():", &[]));
}
#[test]
fn comment_contains_code_with_sentences() {
assert!(!comment_contains_code("#code is good", &[]));
}
#[test]
fn comment_contains_code_with_encoding() {
assert!(comment_contains_code("# codings=utf-8", &[]));
assert!(!comment_contains_code("# coding=utf-8", &[]));
assert!(!comment_contains_code("#coding= utf-8", &[]));
assert!(!comment_contains_code("# coding: utf-8", &[]));
assert!(!comment_contains_code("# encoding: utf8", &[]));
}
#[test]
fn comment_contains_code_with_default_allowlist() {
assert!(!comment_contains_code("# pylint: disable=A0123", &[]));
assert!(!comment_contains_code("# pylint:disable=A0123", &[]));
assert!(!comment_contains_code("# pylint: disable = A0123", &[]));
assert!(!comment_contains_code("# pylint:disable = A0123", &[]));
assert!(!comment_contains_code(
"# pyright: reportErrorName=true",
&[]
));
assert!(!comment_contains_code("# noqa", &[]));
assert!(!comment_contains_code("# NOQA", &[]));
assert!(!comment_contains_code("# noqa: A123", &[]));
assert!(!comment_contains_code("# noqa:A123", &[]));
assert!(!comment_contains_code("# nosec", &[]));
assert!(!comment_contains_code("# fmt: on", &[]));
assert!(!comment_contains_code("# fmt: off", &[]));
assert!(!comment_contains_code("# fmt:on", &[]));
assert!(!comment_contains_code("# fmt:off", &[]));
assert!(!comment_contains_code("# isort: on", &[]));
assert!(!comment_contains_code("# isort:on", &[]));
assert!(!comment_contains_code("# isort: off", &[]));
assert!(!comment_contains_code("# isort:off", &[]));
assert!(!comment_contains_code("# isort: skip", &[]));
assert!(!comment_contains_code("# isort:skip", &[]));
assert!(!comment_contains_code("# isort: skip_file", &[]));
assert!(!comment_contains_code("# isort:skip_file", &[]));
assert!(!comment_contains_code("# isort: split", &[]));
assert!(!comment_contains_code("# isort:split", &[]));
assert!(!comment_contains_code("# isort: dont-add-imports", &[]));
assert!(!comment_contains_code("# isort:dont-add-imports", &[]));
assert!(!comment_contains_code(
"# isort: dont-add-imports: [\"import os\"]",
&[]
));
assert!(!comment_contains_code(
"# isort:dont-add-imports: [\"import os\"]",
&[]
));
assert!(!comment_contains_code(
"# isort: dont-add-imports:[\"import os\"]",
&[]
));
assert!(!comment_contains_code(
"# isort:dont-add-imports:[\"import os\"]",
&[]
));
assert!(!comment_contains_code("# type: ignore", &[]));
assert!(!comment_contains_code("# type:ignore", &[]));
assert!(!comment_contains_code("# type: ignore[import]", &[]));
assert!(!comment_contains_code("# type:ignore[import]", &[]));
assert!(!comment_contains_code(
"# TODO: Do that",
&["TODO".to_string()]
));
assert!(!comment_contains_code(
"# FIXME: Fix that",
&["FIXME".to_string()]
));
assert!(!comment_contains_code(
"# XXX: What ever",
&["XXX".to_string()]
));
}
}