Add messy fixable detection to reduce rule changes

This commit is contained in:
Zanie 2023-10-27 12:08:44 -05:00
parent c806b4a35a
commit 6025081825
5 changed files with 260 additions and 97 deletions

View File

@ -271,4 +271,4 @@ _New rules are added in [preview](https://docs.astral.sh/ruff/preview/)._
### Playground ### Playground
- Fix playground `Quick Fix` action ([#7824](https://github.com/astral-sh/ruff/pull/7824)) - Fix playground `Quick Fix` action ([#7824](https://github.com/astral-sh/ruff/pull/7824))

View File

@ -8,6 +8,7 @@ import re
import time import time
from asyncio import create_subprocess_exec from asyncio import create_subprocess_exec
from collections import Counter from collections import Counter
import dataclasses
from dataclasses import dataclass, field from dataclasses import dataclass, field
from pathlib import Path from pathlib import Path
from subprocess import PIPE from subprocess import PIPE
@ -24,7 +25,7 @@ from ruff_ecosystem.types import (
) )
if TYPE_CHECKING: if TYPE_CHECKING:
from ruff_ecosystem.projects import ClonedRepository from ruff_ecosystem.projects import ClonedRepository, Project
# Matches lines that are summaries rather than diagnostics # Matches lines that are summaries rather than diagnostics
@ -35,16 +36,26 @@ CHECK_DIFF_LINE_RE = re.compile(
r"^(?P<pre>[+-]) (?P<inner>(?P<path>[^:]+):(?P<lnum>\d+):\d+:) (?P<post>.*)$", r"^(?P<pre>[+-]) (?P<inner>(?P<path>[^:]+):(?P<lnum>\d+):\d+:) (?P<post>.*)$",
) )
CHECK_DIAGNOSTIC_LINE_RE = re.compile(
r"^(?P<diff>[+-])? ?(?P<location>.*): (?P<code>[A-Z]{1,4}[0-9]{3,4})(?P<fixable> \[\*\])? (?P<message>.*)"
)
CHECK_VIOLATION_FIX_INDICATOR = " [*]"
def markdown_check_result(result: Result) -> str: def markdown_check_result(result: Result) -> str:
# Calculate the total number of rule changes # Calculate the total number of rule changes
all_rule_changes = RuleChanges() all_rule_changes = RuleChanges()
for _, comparison in result.completed: project_rule_changes: dict[Project, Comparison] = {}
all_rule_changes.update(RuleChanges.from_diff(comparison.diff)) for project, comparison in result.completed:
project_rule_changes[project] = changes = RuleChanges.from_diff(comparison.diff)
all_rule_changes.update(changes)
lines = [] lines = []
total_removed = all_rule_changes.total_removed() total_removed = all_rule_changes.total_removed_violations()
total_added = all_rule_changes.total_added() total_added = all_rule_changes.total_added_violations()
total_added_fixes = all_rule_changes.total_added_fixes()
total_removed_fixes = all_rule_changes.total_removed_fixes()
error_count = len(result.errored) error_count = len(result.errored)
if total_removed == 0 and total_added == 0 and error_count == 0: if total_removed == 0 and total_added == 0 and error_count == 0:
@ -52,7 +63,7 @@ def markdown_check_result(result: Result) -> str:
# Summarize the total changes # Summarize the total changes
s = "s" if error_count != 1 else "" s = "s" if error_count != 1 else ""
changes = f"(+{total_added}, -{total_removed}, {error_count} error{s})" changes = f"(+{total_added}, -{total_removed} violations; +{total_added_fixes}, -{total_removed_fixes} fixes; {error_count} error{s})"
lines.append(f"\u2139\ufe0f ecosystem check **detected linter changes**. {changes}") lines.append(f"\u2139\ufe0f ecosystem check **detected linter changes**. {changes}")
lines.append("") lines.append("")
@ -78,9 +89,17 @@ def markdown_check_result(result: Result) -> str:
diff_lines.append("</pre>") diff_lines.append("</pre>")
rule_changes = project_rule_changes[project]
title = (
f"+{rule_changes.added_violations}, "
f"-{rule_changes.removed_violations} violations; "
f"+{rule_changes.added_fixes} "
f"-{rule_changes.removed_fixes} fixes; "
)
lines.extend( lines.extend(
markdown_project_section( markdown_project_section(
title=f"+{diff.lines_added}, -{diff.lines_removed}", title=title,
content=diff_lines, content=diff_lines,
options=project.check_options, options=project.check_options,
project=project, project=project,
@ -100,18 +119,23 @@ def markdown_check_result(result: Result) -> str:
# Display a summary table of changed rules # Display a summary table of changed rules
if all_rule_changes: if all_rule_changes:
table_lines = [] table_lines = []
table_lines.append("| Rule | Changes | Additions | Removals |") table_lines.append("| code | total | + violation | - violation | + fix | - fix")
table_lines.append("| ---- | ------- | --------- | -------- |") table_lines.append("| ---- | ------- | --------- | -------- |")
for rule, total in sorted( for rule, total in sorted(
all_rule_changes.total_changes_by_rule(), all_rule_changes.total_changes_by_rule(),
key=lambda item: item[1], # Sort by the total changes key=lambda item: item[1], # Sort by the total changes
reverse=True, reverse=True,
): ):
additions, removals = ( added_violations, removed_violations, added_fixes, removed_fixes = (
all_rule_changes.added[rule], all_rule_changes.added_violations[rule],
all_rule_changes.removed[rule], all_rule_changes.removed_violations[rule],
all_rule_changes.added_fixes[rule],
all_rule_changes.removed_fixes[rule],
)
table_lines.append(
f"| {rule} | {total} | {added_violations} | {removed_violations} "
f"| {added_fixes} | {removed_fixes} |"
) )
table_lines.append(f"| {rule} | {total} | {additions} | {removals} |")
lines.extend( lines.extend(
markdown_details( markdown_details(
@ -130,11 +154,13 @@ class RuleChanges:
The number of additions and removals by rule code The number of additions and removals by rule code
""" """
added: Counter = field(default_factory=Counter) added_violations: Counter = field(default_factory=Counter)
removed: Counter = field(default_factory=Counter) removed_violations: Counter = field(default_factory=Counter)
added_fixes: Counter = field(default_factory=Counter)
removed_fixes: Counter = field(default_factory=Counter)
def rule_codes(self) -> set[str]: def rule_codes(self) -> set[str]:
return set(self.added.keys()).union(self.removed.keys()) return set(self.added_violations.keys()).union(self.removed_violations.keys())
def __add__(self, other: Self) -> Self: def __add__(self, other: Self) -> Self:
if not isinstance(other, type(self)): if not isinstance(other, type(self)):
@ -146,23 +172,33 @@ class RuleChanges:
return new return new
def update(self, other: Self) -> Self: def update(self, other: Self) -> Self:
self.added.update(other.added) self.added_violations.update(other.added_violations)
self.removed.update(other.removed) self.removed_violations.update(other.removed_violations)
self.added_fixes.update(other.added_fixes)
self.removed_fixes.update(other.removed_fixes)
return self return self
def total_added(self) -> int: def total_added_violations(self) -> int:
return sum(self.added.values()) return sum(self.added_violations.values())
def total_removed(self) -> int: def total_removed_violations(self) -> int:
return sum(self.removed.values()) return sum(self.removed_violations.values())
def total_added_fixes(self) -> int:
return sum(self.added_fixes.values())
def total_removed_fixes(self) -> int:
return sum(self.removed_fixes.values())
def total_changes_by_rule(self) -> Iterator[str, int]: def total_changes_by_rule(self) -> Iterator[str, int]:
""" """
Yields the sum of changes for each rule Yields the sum of changes for each rule
""" """
totals = Counter() totals = Counter()
totals.update(self.added) totals.update(self.added_violations)
totals.update(self.removed) totals.update(self.removed_violations)
totals.update(self.added_fixes)
totals.update(self.removed_fixes)
yield from totals.items() yield from totals.items()
@classmethod @classmethod
@ -172,37 +208,156 @@ class RuleChanges:
""" """
rule_changes = cls() rule_changes = cls()
for line in set(diff.added): parsed_lines: list[DiagnosticLine] = list(
code = parse_rule_code(line) filter(None, (parse_diagnostic_line(line) for line in set(diff)))
if code is not None: )
rule_changes.added[code] += 1 added: set[DiagnosticLine] = set()
removed: set[DiagnosticLine] = set()
fix_only: set[DiagnosticLine] = set()
for line in set(diff.removed): for line in parsed_lines:
code = parse_rule_code(line) if line.is_added:
if code is not None: added.add(line.without_diff())
rule_changes.removed[code] += 1 elif line.is_removed:
removed.add(line.without_diff())
for line in parsed_lines:
other_set = removed if line.is_added else added
if (
line.fix_available
and line.without_fix_available().without_diff() in other_set
):
if line.is_added:
rule_changes.added_fixes[line.rule_code] += 1
else:
rule_changes.removed_fixes[line.rule_code] += 1
fix_only.add(line)
elif (
not line.fix_available
and line.with_fix_available().without_diff() in other_set
):
if line.is_removed:
rule_changes.added_fixes[line.rule_code] += 1
else:
rule_changes.removed_fixes[line.rule_code] += 1
fix_only.add(line)
for line in parsed_lines:
if line in fix_only:
continue
if line.is_added:
rule_changes.added_violations[line.rule_code] += 1
elif line.is_removed:
rule_changes.removed_violations[line.rule_code] += 1
# only_fix_status_changed = set()
# for (check, other) in [(added, removed), (removed, added)]
# for line in check:
# if line.fix_available:
# line_with_toggled = line.without_fix_available().construct_line()
# else:
# line_with_toggled = line.with_fix_available().construct_line()
# if line_without_fix in removed:
# rule_changes.added_fixes[line.rule_code] += 1
# # It's important to track both version of the line or we would falsely
# # report the paired `line_without_fix` as added or removed
# only_fix_status_changed.add(line)
# only_fix_status_changed.add(line_with_toggled)
# else:
# line_without_fix = line.without_fix_available().construct_line()
# if line_without_fix in removed:
# for line in removed:
# line_without_fix = line.replace(CHECK_VIOLATION_FIX_INDICATOR, "")
# if line_without_fix in added:
# only_fix_status_changed.add(line)
# only_fix_status_changed.add(line_without_fix)
# for line in added:
# code = parse_rule_code(line)
# if code is None:
# continue
# if line not in only_fix_status_changed:
# rule_changes.added_violations[code] += 1
# else:
# # We cannot know how many fixes are added or removed with the above approach
# # the values will always be symmetrical so we just report how many have changed
# rule_changes.added_fixes[code] += 1
# for line in removed:
# code = parse_rule_code(line)
# if code is None:
# continue
# if line not in only_fix_status_changed:
# rule_changes.removed_violations[code] += 1
return rule_changes return rule_changes
def __bool__(self): def __bool__(self):
return bool(self.added or self.removed) return bool(self.added_violations or self.removed_violations)
def parse_rule_code(line: str) -> str | None: @dataclass(frozen=True)
class DiagnosticLine:
is_added: bool | None
is_removed: bool | None
fix_available: bool
rule_code: str
location: str
message: str
def construct_line(self) -> str:
"""
Construct the line from the components
"""
line = ""
if self.is_added:
line += "+ "
elif self.is_removd:
line += "- "
line += f"{self.location}: {self.rule_code} "
if self.fix_available:
line += "[*] "
line += self.message
def with_fix_available(self) -> DiagnosticLine:
return DiagnosticLine(**{**dataclasses.asdict(self), "fix_available": True})
def without_fix_available(self) -> DiagnosticLine:
return DiagnosticLine(**{**dataclasses.asdict(self), "fix_available": False})
def without_diff(self) -> DiagnosticLine:
return DiagnosticLine(
**{**dataclasses.asdict(self), "is_added": None, "is_removed": None}
)
def parse_diagnostic_line(line: str) -> DiagnosticLine | None:
""" """
Parse the rule code from a diagnostic line Parse the rule code from a diagnostic line
+ <rule>/<path>:<line>:<column>: <rule_code> <message>
""" """
matches = re.search(r": ([A-Z]{1,4}[0-9]{3,4})", line) match = CHECK_DIAGNOSTIC_LINE_RE.match(line)
if matches is None: if match is None:
# Handle case where there are no regex matches e.g. # Handle case where there are no regex match e.g.
# + "?application=AIRFLOW&authenticator=TEST_AUTH&role=TEST_ROLE&warehouse=TEST_WAREHOUSE" # noqa: E501, ERA001 # + "?application=AIRFLOW&authenticator=TEST_AUTH&role=TEST_ROLE&warehouse=TEST_WAREHOUSE" # noqa: E501, ERA001
# Which was found in local testing # Which was found in local testing
return None return None
return matches.group(1) match_items = match.groupdict()
return DiagnosticLine(
location=match_items["location"],
is_removed=match_items.get("diff") == "-",
is_added=match_items.get("diff") == "+",
fix_available=match_items.get("fixable") is not None,
rule_code=match_items["code"],
message=match_items["message"],
)
def deduplicate_and_sort_diff(diff: Diff) -> Diff: def deduplicate_and_sort_diff(diff: Diff) -> Diff:
@ -231,7 +386,7 @@ def limit_rule_lines(diff: Diff, max_per_rule: int | None = 100) -> list[str]:
reduced = [] reduced = []
for line in diff: for line in diff:
code = parse_rule_code(line) code = parse_diagnostic_line(line).rule_code
# Do not omit any unparsable lines # Do not omit any unparsable lines
if not code: if not code:
@ -291,7 +446,9 @@ async def compare_check(
baseline_task.result(), baseline_task.result(),
comparison_task.result(), comparison_task.result(),
) )
diff = Diff.from_pair(baseline_output, comparison_output)
# Drop unchanged lines from the diff since we don't need "context" for diagnostic lines
diff = Diff.from_pair(baseline_output, comparison_output).without_unchanged_lines()
return Comparison(diff=diff, repo=cloned_repo) return Comparison(diff=diff, repo=cloned_repo)

View File

@ -5,63 +5,63 @@ from ruff_ecosystem.projects import CheckOptions, Project, Repository
# TODO(zanieb): Consider exporting this as JSON and loading from there instead # TODO(zanieb): Consider exporting this as JSON and loading from there instead
DEFAULT_TARGETS = [ DEFAULT_TARGETS = [
Project(repo=Repository(owner="DisnakeDev", name="disnake", ref="master")), # Project(repo=Repository(owner="DisnakeDev", name="disnake", ref="master")),
Project(repo=Repository(owner="PostHog", name="HouseWatch", ref="main")), # Project(repo=Repository(owner="PostHog", name="HouseWatch", ref="main")),
Project(repo=Repository(owner="RasaHQ", name="rasa", ref="main")), # Project(repo=Repository(owner="RasaHQ", name="rasa", ref="main")),
Project(repo=Repository(owner="Snowflake-Labs", name="snowcli", ref="main")), # Project(repo=Repository(owner="Snowflake-Labs", name="snowcli", ref="main")),
Project(repo=Repository(owner="aiven", name="aiven-client", ref="main")), # Project(repo=Repository(owner="aiven", name="aiven-client", ref="main")),
Project(repo=Repository(owner="alteryx", name="featuretools", ref="main")), # Project(repo=Repository(owner="alteryx", name="featuretools", ref="main")),
Project( # Project(
repo=Repository(owner="apache", name="airflow", ref="main"), # repo=Repository(owner="apache", name="airflow", ref="main"),
check_options=CheckOptions(select="ALL"), # check_options=CheckOptions(select="ALL"),
), # ),
Project(repo=Repository(owner="aws", name="aws-sam-cli", ref="develop")), # Project(repo=Repository(owner="aws", name="aws-sam-cli", ref="develop")),
Project(repo=Repository(owner="bloomberg", name="pytest-memray", ref="main")), # Project(repo=Repository(owner="bloomberg", name="pytest-memray", ref="main")),
Project( Project(
repo=Repository(owner="bokeh", name="bokeh", ref="branch-3.3"), repo=Repository(owner="bokeh", name="bokeh", ref="branch-3.3"),
check_options=CheckOptions(select="ALL"), check_options=CheckOptions(select="ALL"),
), ),
Project(repo=Repository(owner="commaai", name="openpilot", ref="master")), # Project(repo=Repository(owner="commaai", name="openpilot", ref="master")),
Project(repo=Repository(owner="demisto", name="content", ref="master")), # Project(repo=Repository(owner="demisto", name="content", ref="master")),
Project(repo=Repository(owner="docker", name="docker-py", ref="main")), # Project(repo=Repository(owner="docker", name="docker-py", ref="main")),
Project(repo=Repository(owner="freedomofpress", name="securedrop", ref="develop")), # Project(repo=Repository(owner="freedomofpress", name="securedrop", ref="develop")),
Project(repo=Repository(owner="fronzbot", name="blinkpy", ref="dev")), # Project(repo=Repository(owner="fronzbot", name="blinkpy", ref="dev")),
Project(repo=Repository(owner="ibis-project", name="ibis", ref="master")), # Project(repo=Repository(owner="ibis-project", name="ibis", ref="master")),
Project(repo=Repository(owner="ing-bank", name="probatus", ref="main")), # Project(repo=Repository(owner="ing-bank", name="probatus", ref="main")),
Project(repo=Repository(owner="jrnl-org", name="jrnl", ref="develop")), # Project(repo=Repository(owner="jrnl-org", name="jrnl", ref="develop")),
Project(repo=Repository(owner="latchbio", name="latch", ref="main")), # Project(repo=Repository(owner="latchbio", name="latch", ref="main")),
Project(repo=Repository(owner="lnbits", name="lnbits", ref="main")), # Project(repo=Repository(owner="lnbits", name="lnbits", ref="main")),
Project(repo=Repository(owner="milvus-io", name="pymilvus", ref="master")), # Project(repo=Repository(owner="milvus-io", name="pymilvus", ref="master")),
Project(repo=Repository(owner="mlflow", name="mlflow", ref="master")), # Project(repo=Repository(owner="mlflow", name="mlflow", ref="master")),
Project(repo=Repository(owner="model-bakers", name="model_bakery", ref="main")), # Project(repo=Repository(owner="model-bakers", name="model_bakery", ref="main")),
Project(repo=Repository(owner="pandas-dev", name="pandas", ref="main")), # Project(repo=Repository(owner="pandas-dev", name="pandas", ref="main")),
Project(repo=Repository(owner="prefecthq", name="prefect", ref="main")), # Project(repo=Repository(owner="prefecthq", name="prefect", ref="main")),
Project(repo=Repository(owner="pypa", name="build", ref="main")), # Project(repo=Repository(owner="pypa", name="build", ref="main")),
Project(repo=Repository(owner="pypa", name="cibuildwheel", ref="main")), # Project(repo=Repository(owner="pypa", name="cibuildwheel", ref="main")),
Project(repo=Repository(owner="pypa", name="pip", ref="main")), # Project(repo=Repository(owner="pypa", name="pip", ref="main")),
Project(repo=Repository(owner="pypa", name="setuptools", ref="main")), # Project(repo=Repository(owner="pypa", name="setuptools", ref="main")),
Project(repo=Repository(owner="python", name="mypy", ref="master")), # Project(repo=Repository(owner="python", name="mypy", ref="master")),
Project( # Project(
repo=Repository( # repo=Repository(
owner="python", # owner="python",
name="typeshed", # name="typeshed",
ref="main", # ref="main",
), # ),
check_options=CheckOptions(select="PYI"), # check_options=CheckOptions(select="PYI"),
), # ),
Project(repo=Repository(owner="python-poetry", name="poetry", ref="master")), # Project(repo=Repository(owner="python-poetry", name="poetry", ref="master")),
Project(repo=Repository(owner="reflex-dev", name="reflex", ref="main")), # Project(repo=Repository(owner="reflex-dev", name="reflex", ref="main")),
Project(repo=Repository(owner="rotki", name="rotki", ref="develop")), # Project(repo=Repository(owner="rotki", name="rotki", ref="develop")),
Project(repo=Repository(owner="scikit-build", name="scikit-build", ref="main")), # Project(repo=Repository(owner="scikit-build", name="scikit-build", ref="main")),
Project( # Project(
repo=Repository(owner="scikit-build", name="scikit-build-core", ref="main") # repo=Repository(owner="scikit-build", name="scikit-build-core", ref="main")
), # ),
Project(repo=Repository(owner="sphinx-doc", name="sphinx", ref="master")), # Project(repo=Repository(owner="sphinx-doc", name="sphinx", ref="master")),
Project(repo=Repository(owner="spruceid", name="siwe-py", ref="main")), # Project(repo=Repository(owner="spruceid", name="siwe-py", ref="main")),
Project(repo=Repository(owner="tiangolo", name="fastapi", ref="master")), # Project(repo=Repository(owner="tiangolo", name="fastapi", ref="master")),
Project(repo=Repository(owner="yandex", name="ch-backup", ref="main")), # Project(repo=Repository(owner="yandex", name="ch-backup", ref="main")),
Project( # Project(
repo=Repository(owner="zulip", name="zulip", ref="main"), # repo=Repository(owner="zulip", name="zulip", ref="main"),
check_options=CheckOptions(select="ALL"), # check_options=CheckOptions(select="ALL"),
), # ),
] ]

View File

@ -15,6 +15,7 @@ from ruff_ecosystem.projects import (
from ruff_ecosystem.types import Comparison, Result, Serializable from ruff_ecosystem.types import Comparison, Result, Serializable
T = TypeVar("T") T = TypeVar("T")
GITHUB_MAX_COMMENT_LENGTH = 65536
class OutputFormat(Enum): class OutputFormat(Enum):

View File

@ -52,6 +52,11 @@ class Diff(Serializable):
""" """
return cls(difflib.ndiff(baseline, comparison)) return cls(difflib.ndiff(baseline, comparison))
def without_unchanged_lines(self) -> Diff:
return Diff(
line for line in self.lines if line.startswith("+") or line.startswith("-")
)
def jsonable(self) -> Any: def jsonable(self) -> Any:
return self.lines return self.lines