ruff/python/ruff-ecosystem/ruff_ecosystem/main.py

236 lines
7.4 KiB
Python

from ruff_ecosystem.models import (
RuffCommand,
Target,
Diff,
ClonedRepository,
RuleChanges,
CheckComparison,
Result,
)
from pathlib import Path
from ruff_ecosystem import logger
import asyncio
from ruff_ecosystem.git import clone
from ruff_ecosystem.ruff import ruff_check, ruff_format
from ruff_ecosystem.emitters import Emitter
import difflib
from typing import TypeVar
import re
T = TypeVar("T")
async def main(
command: RuffCommand,
ruff_baseline_executable: Path,
ruff_comparison_executable: Path,
targets: list[Target],
cache: Path | None,
emitter: Emitter,
max_parallelism: int = 50,
raise_on_failure: bool = False,
) -> None:
logger.debug("Using command %s", command.value)
logger.debug("Using baseline executable at %s", ruff_baseline_executable)
logger.debug("Using comparison executable at %s", ruff_comparison_executable)
logger.debug("Using cache directory %s", cache)
logger.debug("Checking %s targets", len(targets))
semaphore = asyncio.Semaphore(max_parallelism)
async def limited_parallelism(coroutine: T) -> T:
async with semaphore:
return await coroutine
comparisons: list[Exception | CheckComparison] = await asyncio.gather(
*[
limited_parallelism(
clone_and_compare(
command,
ruff_baseline_executable,
ruff_comparison_executable,
target,
cache,
)
)
for target in targets
],
return_exceptions=not raise_on_failure,
)
comparisons_by_target = dict(zip(targets, comparisons, strict=True))
# Calculate totals
total_removed = total_added = errors = 0
total_rule_changes = RuleChanges()
for comparison in comparisons_by_target.values():
if isinstance(comparison, Exception):
errors += 1
else:
total_removed += len(comparison.diff.removed)
total_added += len(comparison.diff.added)
total_rule_changes += comparison.rule_changes
errors = []
comparisons = []
for target, comparison in comparisons_by_target.items():
if isinstance(comparison, Exception):
errors.append((target, comparison))
continue
if comparison.diff:
comparisons.append((target, comparison))
else:
continue
result = Result(
total_added=total_added,
total_removed=total_removed,
total_rule_changes=total_rule_changes,
comparisons=comparisons,
errors=errors,
)
emitter.emit_result(result)
return
if total_removed == 0 and total_added == 0 and errors == 0:
print("\u2705 ecosystem check detected no changes.")
return
s = "s" if errors != 1 else ""
changes = f"(+{total_added}, -{total_removed}, {errors} error{s})"
print(f"\u2139\ufe0f ecosystem check **detected changes**. {changes}")
print()
for target, comparison in comparisons_by_target.items():
if isinstance(comparison, Exception):
emitter.emit_error(target, comparison)
continue
if comparison.diff:
emitter.emit_diff(target, comparison.diff, comparison.repo)
else:
continue
if len(total_rule_changes.rule_codes()) > 0:
print(f"Rules changed: {len(total_rule_changes.rule_codes())}")
print()
print("| Rule | Changes | Additions | Removals |")
print("| ---- | ------- | --------- | -------- |")
for rule, (additions, removals) in sorted(
total_rule_changes.items(),
key=lambda x: (x[1][0] + x[1][1]),
reverse=True,
):
print(f"| {rule} | {additions + removals} | {additions} | {removals} |")
async def clone_and_compare(
command: RuffCommand,
ruff_baseline_executable: Path,
ruff_comparison_executable: Path,
target: Target,
cache: Path,
) -> CheckComparison:
"""Check a specific repository against two versions of ruff."""
assert ":" not in target.repo.owner
assert ":" not in target.repo.name
match command:
case RuffCommand.check:
ruff_task, create_comparison, options = (
ruff_check,
create_check_comparison,
target.check_options,
)
case RuffCommand.format:
ruff_task, create_comparison, options = (
ruff_format,
create_format_comparison,
target.format_options,
)
case _:
raise ValueError(f"Unknowm target Ruff command {command}")
checkout_dir = cache.joinpath(f"{target.repo.owner}:{target.repo.name}")
async with clone(target.repo, checkout_dir) as cloned_repo:
try:
async with asyncio.TaskGroup() as tg:
baseline_task = tg.create_task(
ruff_task(
executable=ruff_baseline_executable.resolve(),
path=cloned_repo.path,
name=cloned_repo.fullname,
options=options,
),
)
comparison_task = tg.create_task(
ruff_task(
executable=ruff_comparison_executable.resolve(),
path=cloned_repo.path,
name=cloned_repo.fullname,
options=options,
),
)
except ExceptionGroup as e:
raise e.exceptions[0] from e
return create_comparison(
cloned_repo, baseline_task.result(), comparison_task.result()
)
def create_check_comparison(
repo: ClonedRepository, baseline_output: str, comparison_output: str
) -> CheckComparison:
removed, added = set(), set()
for line in difflib.ndiff(baseline_output, comparison_output):
if line.startswith("- "):
removed.add(line[2:])
elif line.startswith("+ "):
added.add(line[2:])
diff = Diff(removed=removed, added=added)
return CheckComparison(
diff=diff, repo=repo, rule_changes=rule_changes_from_diff(diff)
)
def rule_changes_from_diff(diff: Diff) -> RuleChanges:
"""
Parse a diff from `ruff check` to determine the additions and removals for each rule.
"""
rule_changes = RuleChanges()
# Count rule changes
for line in diff.lines():
# Find rule change for current line or construction
# + <rule>/<path>:<line>:<column>: <rule_code> <message>
matches = re.search(r": ([A-Z]{1,4}[0-9]{3,4})", line)
if matches is None:
# Handle case where there are no regex matches e.g.
# + "?application=AIRFLOW&authenticator=TEST_AUTH&role=TEST_ROLE&warehouse=TEST_WAREHOUSE" # noqa: E501, ERA001
# Which was found in local testing
continue
rule_code = matches.group(1)
# Get current additions and removals for this rule
current_changes = rule_changes[rule_code]
# Check if addition or removal depending on the first character
if line[0] == "+":
current_changes = (current_changes[0] + 1, current_changes[1])
elif line[0] == "-":
current_changes = (current_changes[0], current_changes[1] + 1)
rule_changes[rule_code] = current_changes
return rule_changes