ruff/python/ruff-ecosystem/ruff_ecosystem/check.py

351 lines
10 KiB
Python

from __future__ import annotations
import asyncio
import re
import time
from asyncio import create_subprocess_exec
from collections import Counter
from dataclasses import dataclass, field
from pathlib import Path
from subprocess import PIPE
from typing import TYPE_CHECKING, Iterator, Self, Sequence
from ruff_ecosystem import logger
from ruff_ecosystem.markdown import project_section
from ruff_ecosystem.types import (
Comparison,
Diff,
Result,
RuffError,
Serializable,
)
if TYPE_CHECKING:
from ruff_ecosystem.projects import ClonedRepository
# Matches lines that are summaries rather than diagnostics
CHECK_SUMMARY_LINE_RE = re.compile(r"^(Found \d+ error.*)|(.* fixable with .*)$")
# Parses a diagnostic line (in a diff) to retrieve path and line number
CHECK_DIFF_LINE_RE = re.compile(
r"^(?P<pre>[+-]) (?P<inner>(?P<path>[^:]+):(?P<lnum>\d+):\d+:) (?P<post>.*)$",
)
async def compare_check(
ruff_baseline_executable: Path,
ruff_comparison_executable: Path,
options: CheckOptions,
cloned_repo: ClonedRepository,
) -> Comparison:
async with asyncio.TaskGroup() as tg:
baseline_task = tg.create_task(
ruff_check(
executable=ruff_baseline_executable.resolve(),
path=cloned_repo.path,
name=cloned_repo.fullname,
options=options,
),
)
comparison_task = tg.create_task(
ruff_check(
executable=ruff_comparison_executable.resolve(),
path=cloned_repo.path,
name=cloned_repo.fullname,
options=options,
),
)
baseline_output, comparison_output = (
baseline_task.result(),
comparison_task.result(),
)
diff = Diff.new(baseline_output, comparison_output)
return Comparison(diff=diff, repo=cloned_repo)
def summarize_check_result(result: Result) -> str:
# Calculate the total number of rule changes
all_rule_changes = RuleChanges()
for _, comparison in result.completed:
all_rule_changes.update(RuleChanges.from_diff(comparison.diff))
lines = []
total_removed = all_rule_changes.total_removed()
total_added = all_rule_changes.total_added()
error_count = len(result.errored)
if total_removed == 0 and total_added == 0 and error_count == 0:
return "\u2705 ecosystem check detected no `ruff check` changes."
# Summarize the total changes
s = "s" if error_count != 1 else ""
changes = f"(+{total_added}, -{total_removed}, {error_count} error{s})"
lines.append(
f"\u2139\ufe0f ecosystem check **detected `ruff check` changes**. {changes}"
)
lines.append("")
# Then per-project changes
max_lines_per_project = 200
for project, comparison in result.completed:
if not comparison.diff:
continue # Skip empty diffs
diff = deduplicate_and_sort_diff(comparison.diff)
limited_diff = limit_rule_lines(diff, project.check_options.max_lines_per_rule)
# Display the diff
# Wrap with `<pre>` for code-styling with support for links
diff_lines = ["<pre>"]
for line in limited_diff[:max_lines_per_project]:
diff_lines.append(add_permalink_to_diagnostic_line(comparison.repo, line))
omitted_lines = len(limited_diff) - max_lines_per_project
if omitted_lines > 0:
diff_lines.append(f"... {omitted_lines} additional lines omitted")
diff_lines.append("</pre>")
lines.extend(
project_section(
title=f"+{diff.lines_added}, -{diff.lines_removed}",
content="\n".join(diff_lines),
options=project.check_options.markdown(),
project=project,
)
)
for project, error in result.errored:
lines.extend(
project_section(
title="error",
content=str(error),
options="",
project=project,
)
)
# Display a summary table of changed rules
if all_rule_changes:
lines.append(f"Rules changed: {len(all_rule_changes.rule_codes())}")
lines.append("")
lines.append("| Rule | Changes | Additions | Removals |")
lines.append("| ---- | ------- | --------- | -------- |")
for rule, total in sorted(
all_rule_changes.total_changes_by_rule(),
key=lambda item: item[1], # Sort by the total changes
reverse=True,
):
additions, removals = (
all_rule_changes.added[rule],
all_rule_changes.removed[rule],
)
lines.append(f"| {rule} | {total} | {additions} | {removals} |")
return "\n".join(lines)
def add_permalink_to_diagnostic_line(repo: ClonedRepository, line: str) -> str:
match = CHECK_DIFF_LINE_RE.match(line)
if match is None:
return line
pre, inner, path, lnum, post = match.groups()
url = repo.url_for(path, int(lnum))
return f"{pre} <a href='{url}'>{inner}</a> {post}"
async def ruff_check(
*, executable: Path, path: Path, name: str, options: CheckOptions
) -> Sequence[str]:
"""Run the given ruff binary against the specified path."""
logger.debug(f"Checking {name} with {executable}")
ruff_args = ["check", "--no-cache", "--exit-zero"]
if options.select:
ruff_args.extend(["--select", options.select])
if options.ignore:
ruff_args.extend(["--ignore", options.ignore])
if options.exclude:
ruff_args.extend(["--exclude", options.exclude])
if options.show_fixes:
ruff_args.extend(["--show-fixes", "--ecosystem-ci"])
start = time.time()
proc = await create_subprocess_exec(
executable.absolute(),
*ruff_args,
".",
stdout=PIPE,
stderr=PIPE,
cwd=path,
)
result, err = await proc.communicate()
end = time.time()
logger.debug(f"Finished checking {name} with {executable} in {end - start:.2f}s")
if proc.returncode != 0:
raise RuffError(err.decode("utf8"))
# Strip summary lines so the diff is only diagnostic lines
lines = [
line
for line in result.decode("utf8").splitlines()
if not CHECK_SUMMARY_LINE_RE.match(line)
]
return lines
@dataclass(frozen=True)
class CheckOptions(Serializable):
"""
Ruff check options
"""
select: str = ""
ignore: str = ""
exclude: str = ""
# Generating fixes is slow and verbose
show_fixes: bool = False
# Limit the number of reported lines per rule
max_lines_per_rule: int | None = 50
def markdown(self) -> str:
return f"select {self.select} ignore {self.ignore} exclude {self.exclude}"
@dataclass(frozen=True)
class RuleChanges:
"""
The number of additions and removals by rule code
"""
added: Counter = field(default_factory=Counter)
removed: Counter = field(default_factory=Counter)
def rule_codes(self) -> set[str]:
return set(self.added.keys()).union(self.removed.keys())
def __add__(self, other: Self) -> Self:
if not isinstance(other, type(self)):
return NotImplemented
new = RuleChanges()
new.update(self)
new.update(other)
return new
def update(self, other: Self) -> Self:
self.added.update(other.added)
self.removed.update(other.removed)
return self
def total_added(self) -> int:
return sum(self.added.values())
def total_removed(self) -> int:
return sum(self.removed.values())
def total_changes_by_rule(self) -> Iterator[str, int]:
"""
Yields the sum of changes for each rule
"""
totals = Counter()
totals.update(self.added)
totals.update(self.removed)
yield from totals.items()
@classmethod
def from_diff(cls: type[Self], diff: Diff) -> Self:
"""
Parse a diff from `ruff check` to determine the additions and removals for each rule
"""
rule_changes = cls()
for line in sorted(diff.added):
code = parse_rule_code(line)
if code is not None:
rule_changes.added[code] += 1
for line in sorted(diff.removed):
code = parse_rule_code(line)
if code is not None:
rule_changes.removed[code] += 1
return rule_changes
def __bool__(self):
return bool(self.added or self.removed)
def parse_rule_code(line: str) -> str | None:
"""
Parse the rule code from a diagnostic line
+ <rule>/<path>:<line>:<column>: <rule_code> <message>
"""
matches = re.search(r": ([A-Z]{1,4}[0-9]{3,4})", line)
if matches is None:
# Handle case where there are no regex matches e.g.
# + "?application=AIRFLOW&authenticator=TEST_AUTH&role=TEST_ROLE&warehouse=TEST_WAREHOUSE" # noqa: E501, ERA001
# Which was found in local testing
return None
return matches.group(1)
def deduplicate_and_sort_diff(diff: Diff) -> Diff:
"""
Removes any duplicate lines and any unchanged lines from the diff.
"""
lines = set()
for line in diff:
if line.startswith("+ "):
lines.add(line)
elif line.startswith("- "):
lines.add(line)
# Sort without the leading + or -
return Diff(list(sorted(lines, key=lambda line: line[2:])))
def limit_rule_lines(diff: Diff, max_per_rule: int | None = 100) -> list[str]:
"""
Reduce the diff to include a maximum number of lines for each rule.
"""
if max_per_rule is None:
return diff
counts = Counter()
reduced = []
for line in diff:
code = parse_rule_code(line)
# Do not omit any unparsable lines
if not code:
reduced.append(line)
continue
counts[code] += 1
if counts[code] > max_per_rule:
continue
reduced.append(line)
# Add lines summarizing the omitted changes
for code, count in counts.items():
hidden_count = count - max_per_rule
if hidden_count > 0:
reduced.append(f"... {hidden_count} changes omitted for rule {code}")
return reduced