diff --git a/CHANGELOG.md b/CHANGELOG.md index ee2a8cf4ce..408dfcc7a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -271,4 +271,4 @@ _New rules are added in [preview](https://docs.astral.sh/ruff/preview/)._ ### Playground -- Fix playground `Quick Fix` action ([#7824](https://github.com/astral-sh/ruff/pull/7824)) +- Fix playground `Quick Fix` action ([#7824](https://github.com/astral-sh/ruff/pull/7824)) \ No newline at end of file diff --git a/python/ruff-ecosystem/ruff_ecosystem/check.py b/python/ruff-ecosystem/ruff_ecosystem/check.py index f95e7ea3ed..f399592df7 100644 --- a/python/ruff-ecosystem/ruff_ecosystem/check.py +++ b/python/ruff-ecosystem/ruff_ecosystem/check.py @@ -8,6 +8,7 @@ import re import time from asyncio import create_subprocess_exec from collections import Counter +import dataclasses from dataclasses import dataclass, field from pathlib import Path from subprocess import PIPE @@ -24,7 +25,7 @@ from ruff_ecosystem.types import ( ) if TYPE_CHECKING: - from ruff_ecosystem.projects import ClonedRepository + from ruff_ecosystem.projects import ClonedRepository, Project # Matches lines that are summaries rather than diagnostics @@ -35,16 +36,26 @@ CHECK_DIFF_LINE_RE = re.compile( r"^(?P
[+-]) (?P(?P[^:]+):(?P\d+):\d+:) (?P.*)$",
 )
 
+CHECK_DIAGNOSTIC_LINE_RE = re.compile(
+    r"^(?P[+-])? ?(?P.*): (?P[A-Z]{1,4}[0-9]{3,4})(?P \[\*\])? (?P.*)"
+)
+
+CHECK_VIOLATION_FIX_INDICATOR = " [*]"
+
 
 def markdown_check_result(result: Result) -> str:
     # Calculate the total number of rule changes
     all_rule_changes = RuleChanges()
-    for _, comparison in result.completed:
-        all_rule_changes.update(RuleChanges.from_diff(comparison.diff))
+    project_rule_changes: dict[Project, Comparison] = {}
+    for project, comparison in result.completed:
+        project_rule_changes[project] = changes = RuleChanges.from_diff(comparison.diff)
+        all_rule_changes.update(changes)
 
     lines = []
-    total_removed = all_rule_changes.total_removed()
-    total_added = all_rule_changes.total_added()
+    total_removed = all_rule_changes.total_removed_violations()
+    total_added = all_rule_changes.total_added_violations()
+    total_added_fixes = all_rule_changes.total_added_fixes()
+    total_removed_fixes = all_rule_changes.total_removed_fixes()
     error_count = len(result.errored)
 
     if total_removed == 0 and total_added == 0 and error_count == 0:
@@ -52,7 +63,7 @@ def markdown_check_result(result: Result) -> str:
 
     # Summarize the total changes
     s = "s" if error_count != 1 else ""
-    changes = f"(+{total_added}, -{total_removed}, {error_count} error{s})"
+    changes = f"(+{total_added}, -{total_removed} violations; +{total_added_fixes}, -{total_removed_fixes} fixes; {error_count} error{s})"
 
     lines.append(f"\u2139\ufe0f ecosystem check **detected linter changes**. {changes}")
     lines.append("")
@@ -78,9 +89,17 @@ def markdown_check_result(result: Result) -> str:
 
         diff_lines.append("
") + rule_changes = project_rule_changes[project] + title = ( + f"+{rule_changes.added_violations}, " + f"-{rule_changes.removed_violations} violations; " + f"+{rule_changes.added_fixes} " + f"-{rule_changes.removed_fixes} fixes; " + ) + lines.extend( markdown_project_section( - title=f"+{diff.lines_added}, -{diff.lines_removed}", + title=title, content=diff_lines, options=project.check_options, project=project, @@ -100,18 +119,23 @@ def markdown_check_result(result: Result) -> str: # Display a summary table of changed rules if all_rule_changes: table_lines = [] - table_lines.append("| Rule | Changes | Additions | Removals |") + table_lines.append("| code | total | + violation | - violation | + fix | - fix") table_lines.append("| ---- | ------- | --------- | -------- |") for rule, total in sorted( all_rule_changes.total_changes_by_rule(), key=lambda item: item[1], # Sort by the total changes reverse=True, ): - additions, removals = ( - all_rule_changes.added[rule], - all_rule_changes.removed[rule], + added_violations, removed_violations, added_fixes, removed_fixes = ( + all_rule_changes.added_violations[rule], + all_rule_changes.removed_violations[rule], + all_rule_changes.added_fixes[rule], + all_rule_changes.removed_fixes[rule], + ) + table_lines.append( + f"| {rule} | {total} | {added_violations} | {removed_violations} " + f"| {added_fixes} | {removed_fixes} |" ) - table_lines.append(f"| {rule} | {total} | {additions} | {removals} |") lines.extend( markdown_details( @@ -130,11 +154,13 @@ class RuleChanges: The number of additions and removals by rule code """ - added: Counter = field(default_factory=Counter) - removed: Counter = field(default_factory=Counter) + added_violations: Counter = field(default_factory=Counter) + removed_violations: Counter = field(default_factory=Counter) + added_fixes: Counter = field(default_factory=Counter) + removed_fixes: Counter = field(default_factory=Counter) def rule_codes(self) -> set[str]: - return set(self.added.keys()).union(self.removed.keys()) + return set(self.added_violations.keys()).union(self.removed_violations.keys()) def __add__(self, other: Self) -> Self: if not isinstance(other, type(self)): @@ -146,23 +172,33 @@ class RuleChanges: return new def update(self, other: Self) -> Self: - self.added.update(other.added) - self.removed.update(other.removed) + self.added_violations.update(other.added_violations) + self.removed_violations.update(other.removed_violations) + self.added_fixes.update(other.added_fixes) + self.removed_fixes.update(other.removed_fixes) return self - def total_added(self) -> int: - return sum(self.added.values()) + def total_added_violations(self) -> int: + return sum(self.added_violations.values()) - def total_removed(self) -> int: - return sum(self.removed.values()) + def total_removed_violations(self) -> int: + return sum(self.removed_violations.values()) + + def total_added_fixes(self) -> int: + return sum(self.added_fixes.values()) + + def total_removed_fixes(self) -> int: + return sum(self.removed_fixes.values()) def total_changes_by_rule(self) -> Iterator[str, int]: """ Yields the sum of changes for each rule """ totals = Counter() - totals.update(self.added) - totals.update(self.removed) + totals.update(self.added_violations) + totals.update(self.removed_violations) + totals.update(self.added_fixes) + totals.update(self.removed_fixes) yield from totals.items() @classmethod @@ -172,37 +208,156 @@ class RuleChanges: """ rule_changes = cls() - for line in set(diff.added): - code = parse_rule_code(line) - if code is not None: - rule_changes.added[code] += 1 + parsed_lines: list[DiagnosticLine] = list( + filter(None, (parse_diagnostic_line(line) for line in set(diff))) + ) + added: set[DiagnosticLine] = set() + removed: set[DiagnosticLine] = set() + fix_only: set[DiagnosticLine] = set() - for line in set(diff.removed): - code = parse_rule_code(line) - if code is not None: - rule_changes.removed[code] += 1 + for line in parsed_lines: + if line.is_added: + added.add(line.without_diff()) + elif line.is_removed: + removed.add(line.without_diff()) + + for line in parsed_lines: + other_set = removed if line.is_added else added + if ( + line.fix_available + and line.without_fix_available().without_diff() in other_set + ): + if line.is_added: + rule_changes.added_fixes[line.rule_code] += 1 + else: + rule_changes.removed_fixes[line.rule_code] += 1 + fix_only.add(line) + elif ( + not line.fix_available + and line.with_fix_available().without_diff() in other_set + ): + if line.is_removed: + rule_changes.added_fixes[line.rule_code] += 1 + else: + rule_changes.removed_fixes[line.rule_code] += 1 + fix_only.add(line) + + for line in parsed_lines: + if line in fix_only: + continue + + if line.is_added: + rule_changes.added_violations[line.rule_code] += 1 + elif line.is_removed: + rule_changes.removed_violations[line.rule_code] += 1 + + # only_fix_status_changed = set() + + # for (check, other) in [(added, removed), (removed, added)] + # for line in check: + # if line.fix_available: + # line_with_toggled = line.without_fix_available().construct_line() + # else: + # line_with_toggled = line.with_fix_available().construct_line() + + # if line_without_fix in removed: + # rule_changes.added_fixes[line.rule_code] += 1 + + # # It's important to track both version of the line or we would falsely + # # report the paired `line_without_fix` as added or removed + # only_fix_status_changed.add(line) + # only_fix_status_changed.add(line_with_toggled) + # else: + # line_without_fix = line.without_fix_available().construct_line() + # if line_without_fix in removed: + + # for line in removed: + # line_without_fix = line.replace(CHECK_VIOLATION_FIX_INDICATOR, "") + # if line_without_fix in added: + # only_fix_status_changed.add(line) + # only_fix_status_changed.add(line_without_fix) + + # for line in added: + # code = parse_rule_code(line) + # if code is None: + # continue + # if line not in only_fix_status_changed: + # rule_changes.added_violations[code] += 1 + # else: + # # We cannot know how many fixes are added or removed with the above approach + # # the values will always be symmetrical so we just report how many have changed + # rule_changes.added_fixes[code] += 1 + + # for line in removed: + # code = parse_rule_code(line) + # if code is None: + # continue + # if line not in only_fix_status_changed: + # rule_changes.removed_violations[code] += 1 return rule_changes def __bool__(self): - return bool(self.added or self.removed) + return bool(self.added_violations or self.removed_violations) -def parse_rule_code(line: str) -> str | None: +@dataclass(frozen=True) +class DiagnosticLine: + is_added: bool | None + is_removed: bool | None + fix_available: bool + rule_code: str + location: str + message: str + + def construct_line(self) -> str: + """ + Construct the line from the components + """ + line = "" + if self.is_added: + line += "+ " + elif self.is_removd: + line += "- " + line += f"{self.location}: {self.rule_code} " + if self.fix_available: + line += "[*] " + line += self.message + + def with_fix_available(self) -> DiagnosticLine: + return DiagnosticLine(**{**dataclasses.asdict(self), "fix_available": True}) + + def without_fix_available(self) -> DiagnosticLine: + return DiagnosticLine(**{**dataclasses.asdict(self), "fix_available": False}) + + def without_diff(self) -> DiagnosticLine: + return DiagnosticLine( + **{**dataclasses.asdict(self), "is_added": None, "is_removed": None} + ) + + +def parse_diagnostic_line(line: str) -> DiagnosticLine | None: """ Parse the rule code from a diagnostic line - - + /::: """ - matches = re.search(r": ([A-Z]{1,4}[0-9]{3,4})", line) + match = CHECK_DIAGNOSTIC_LINE_RE.match(line) - if matches is None: - # Handle case where there are no regex matches e.g. + if match is None: + # Handle case where there are no regex match e.g. # + "?application=AIRFLOW&authenticator=TEST_AUTH&role=TEST_ROLE&warehouse=TEST_WAREHOUSE" # noqa: E501, ERA001 # Which was found in local testing return None - return matches.group(1) + match_items = match.groupdict() + + return DiagnosticLine( + location=match_items["location"], + is_removed=match_items.get("diff") == "-", + is_added=match_items.get("diff") == "+", + fix_available=match_items.get("fixable") is not None, + rule_code=match_items["code"], + message=match_items["message"], + ) def deduplicate_and_sort_diff(diff: Diff) -> Diff: @@ -231,7 +386,7 @@ def limit_rule_lines(diff: Diff, max_per_rule: int | None = 100) -> list[str]: reduced = [] for line in diff: - code = parse_rule_code(line) + code = parse_diagnostic_line(line).rule_code # Do not omit any unparsable lines if not code: @@ -291,7 +446,9 @@ async def compare_check( baseline_task.result(), comparison_task.result(), ) - diff = Diff.from_pair(baseline_output, comparison_output) + + # Drop unchanged lines from the diff since we don't need "context" for diagnostic lines + diff = Diff.from_pair(baseline_output, comparison_output).without_unchanged_lines() return Comparison(diff=diff, repo=cloned_repo) diff --git a/python/ruff-ecosystem/ruff_ecosystem/defaults.py b/python/ruff-ecosystem/ruff_ecosystem/defaults.py index 472860f5bb..7d11a06824 100644 --- a/python/ruff-ecosystem/ruff_ecosystem/defaults.py +++ b/python/ruff-ecosystem/ruff_ecosystem/defaults.py @@ -5,63 +5,63 @@ from ruff_ecosystem.projects import CheckOptions, Project, Repository # TODO(zanieb): Consider exporting this as JSON and loading from there instead DEFAULT_TARGETS = [ - Project(repo=Repository(owner="DisnakeDev", name="disnake", ref="master")), - Project(repo=Repository(owner="PostHog", name="HouseWatch", ref="main")), - Project(repo=Repository(owner="RasaHQ", name="rasa", ref="main")), - Project(repo=Repository(owner="Snowflake-Labs", name="snowcli", ref="main")), - Project(repo=Repository(owner="aiven", name="aiven-client", ref="main")), - Project(repo=Repository(owner="alteryx", name="featuretools", ref="main")), - Project( - repo=Repository(owner="apache", name="airflow", ref="main"), - check_options=CheckOptions(select="ALL"), - ), - Project(repo=Repository(owner="aws", name="aws-sam-cli", ref="develop")), - Project(repo=Repository(owner="bloomberg", name="pytest-memray", ref="main")), + # Project(repo=Repository(owner="DisnakeDev", name="disnake", ref="master")), + # Project(repo=Repository(owner="PostHog", name="HouseWatch", ref="main")), + # Project(repo=Repository(owner="RasaHQ", name="rasa", ref="main")), + # Project(repo=Repository(owner="Snowflake-Labs", name="snowcli", ref="main")), + # Project(repo=Repository(owner="aiven", name="aiven-client", ref="main")), + # Project(repo=Repository(owner="alteryx", name="featuretools", ref="main")), + # Project( + # repo=Repository(owner="apache", name="airflow", ref="main"), + # check_options=CheckOptions(select="ALL"), + # ), + # Project(repo=Repository(owner="aws", name="aws-sam-cli", ref="develop")), + # Project(repo=Repository(owner="bloomberg", name="pytest-memray", ref="main")), Project( repo=Repository(owner="bokeh", name="bokeh", ref="branch-3.3"), check_options=CheckOptions(select="ALL"), ), - Project(repo=Repository(owner="commaai", name="openpilot", ref="master")), - Project(repo=Repository(owner="demisto", name="content", ref="master")), - Project(repo=Repository(owner="docker", name="docker-py", ref="main")), - Project(repo=Repository(owner="freedomofpress", name="securedrop", ref="develop")), - Project(repo=Repository(owner="fronzbot", name="blinkpy", ref="dev")), - Project(repo=Repository(owner="ibis-project", name="ibis", ref="master")), - Project(repo=Repository(owner="ing-bank", name="probatus", ref="main")), - Project(repo=Repository(owner="jrnl-org", name="jrnl", ref="develop")), - Project(repo=Repository(owner="latchbio", name="latch", ref="main")), - Project(repo=Repository(owner="lnbits", name="lnbits", ref="main")), - Project(repo=Repository(owner="milvus-io", name="pymilvus", ref="master")), - Project(repo=Repository(owner="mlflow", name="mlflow", ref="master")), - Project(repo=Repository(owner="model-bakers", name="model_bakery", ref="main")), - Project(repo=Repository(owner="pandas-dev", name="pandas", ref="main")), - Project(repo=Repository(owner="prefecthq", name="prefect", ref="main")), - Project(repo=Repository(owner="pypa", name="build", ref="main")), - Project(repo=Repository(owner="pypa", name="cibuildwheel", ref="main")), - Project(repo=Repository(owner="pypa", name="pip", ref="main")), - Project(repo=Repository(owner="pypa", name="setuptools", ref="main")), - Project(repo=Repository(owner="python", name="mypy", ref="master")), - Project( - repo=Repository( - owner="python", - name="typeshed", - ref="main", - ), - check_options=CheckOptions(select="PYI"), - ), - Project(repo=Repository(owner="python-poetry", name="poetry", ref="master")), - Project(repo=Repository(owner="reflex-dev", name="reflex", ref="main")), - Project(repo=Repository(owner="rotki", name="rotki", ref="develop")), - Project(repo=Repository(owner="scikit-build", name="scikit-build", ref="main")), - Project( - repo=Repository(owner="scikit-build", name="scikit-build-core", ref="main") - ), - Project(repo=Repository(owner="sphinx-doc", name="sphinx", ref="master")), - Project(repo=Repository(owner="spruceid", name="siwe-py", ref="main")), - Project(repo=Repository(owner="tiangolo", name="fastapi", ref="master")), - Project(repo=Repository(owner="yandex", name="ch-backup", ref="main")), - Project( - repo=Repository(owner="zulip", name="zulip", ref="main"), - check_options=CheckOptions(select="ALL"), - ), + # Project(repo=Repository(owner="commaai", name="openpilot", ref="master")), + # Project(repo=Repository(owner="demisto", name="content", ref="master")), + # Project(repo=Repository(owner="docker", name="docker-py", ref="main")), + # Project(repo=Repository(owner="freedomofpress", name="securedrop", ref="develop")), + # Project(repo=Repository(owner="fronzbot", name="blinkpy", ref="dev")), + # Project(repo=Repository(owner="ibis-project", name="ibis", ref="master")), + # Project(repo=Repository(owner="ing-bank", name="probatus", ref="main")), + # Project(repo=Repository(owner="jrnl-org", name="jrnl", ref="develop")), + # Project(repo=Repository(owner="latchbio", name="latch", ref="main")), + # Project(repo=Repository(owner="lnbits", name="lnbits", ref="main")), + # Project(repo=Repository(owner="milvus-io", name="pymilvus", ref="master")), + # Project(repo=Repository(owner="mlflow", name="mlflow", ref="master")), + # Project(repo=Repository(owner="model-bakers", name="model_bakery", ref="main")), + # Project(repo=Repository(owner="pandas-dev", name="pandas", ref="main")), + # Project(repo=Repository(owner="prefecthq", name="prefect", ref="main")), + # Project(repo=Repository(owner="pypa", name="build", ref="main")), + # Project(repo=Repository(owner="pypa", name="cibuildwheel", ref="main")), + # Project(repo=Repository(owner="pypa", name="pip", ref="main")), + # Project(repo=Repository(owner="pypa", name="setuptools", ref="main")), + # Project(repo=Repository(owner="python", name="mypy", ref="master")), + # Project( + # repo=Repository( + # owner="python", + # name="typeshed", + # ref="main", + # ), + # check_options=CheckOptions(select="PYI"), + # ), + # Project(repo=Repository(owner="python-poetry", name="poetry", ref="master")), + # Project(repo=Repository(owner="reflex-dev", name="reflex", ref="main")), + # Project(repo=Repository(owner="rotki", name="rotki", ref="develop")), + # Project(repo=Repository(owner="scikit-build", name="scikit-build", ref="main")), + # Project( + # repo=Repository(owner="scikit-build", name="scikit-build-core", ref="main") + # ), + # Project(repo=Repository(owner="sphinx-doc", name="sphinx", ref="master")), + # Project(repo=Repository(owner="spruceid", name="siwe-py", ref="main")), + # Project(repo=Repository(owner="tiangolo", name="fastapi", ref="master")), + # Project(repo=Repository(owner="yandex", name="ch-backup", ref="main")), + # Project( + # repo=Repository(owner="zulip", name="zulip", ref="main"), + # check_options=CheckOptions(select="ALL"), + # ), ] diff --git a/python/ruff-ecosystem/ruff_ecosystem/main.py b/python/ruff-ecosystem/ruff_ecosystem/main.py index f2875707ef..862907c1f1 100644 --- a/python/ruff-ecosystem/ruff_ecosystem/main.py +++ b/python/ruff-ecosystem/ruff_ecosystem/main.py @@ -15,6 +15,7 @@ from ruff_ecosystem.projects import ( from ruff_ecosystem.types import Comparison, Result, Serializable T = TypeVar("T") +GITHUB_MAX_COMMENT_LENGTH = 65536 class OutputFormat(Enum): diff --git a/python/ruff-ecosystem/ruff_ecosystem/types.py b/python/ruff-ecosystem/ruff_ecosystem/types.py index 60b9bcb3c6..665107bf21 100644 --- a/python/ruff-ecosystem/ruff_ecosystem/types.py +++ b/python/ruff-ecosystem/ruff_ecosystem/types.py @@ -52,6 +52,11 @@ class Diff(Serializable): """ return cls(difflib.ndiff(baseline, comparison)) + def without_unchanged_lines(self) -> Diff: + return Diff( + line for line in self.lines if line.startswith("+") or line.startswith("-") + ) + def jsonable(self) -> Any: return self.lines