Add formatter ecosystem checks

This commit is contained in:
Zanie
2023-10-24 08:57:45 -05:00
parent 4b79f57872
commit ab49eaae61
15 changed files with 1018 additions and 651 deletions

View File

@@ -8,3 +8,7 @@ version = "0.0.0"
[project.scripts]
ruff-ecosystem = "ruff_ecosystem.cli:entrypoint"
[tool.ruff]
extend-select = ["I"]

View File

@@ -2,7 +2,7 @@
Enables usage with `python -m ruff_ecosystem`
"""
from ruff_ecosystem.cli import entrypoint
import ruff_ecosystem.cli
if __name__ == "__main__":
entrypoint()
ruff_ecosystem.cli.entrypoint()

View File

@@ -0,0 +1,341 @@
from __future__ import annotations
import asyncio
import re
import time
from asyncio import create_subprocess_exec
from collections import Counter
from dataclasses import dataclass, field
from pathlib import Path
from subprocess import PIPE
from typing import TYPE_CHECKING, Any, Iterator, Self, Sequence
from ruff_ecosystem import logger
from ruff_ecosystem.types import (
Comparison,
Diff,
Result,
RuffError,
Serializable,
)
from ruff_ecosystem.markdown import project_section
if TYPE_CHECKING:
from ruff_ecosystem.projects import ClonedRepository, Project
# Matches lines that are summaries rather than diagnostics
CHECK_SUMMARY_LINE_RE = re.compile(r"^(Found \d+ error.*)|(.* fixable with .*)$")
# Parses a diagnostic line (in a diff) to retrieve path and line number
CHECK_DIFF_LINE_RE = re.compile(
r"^(?P<pre>[+-]) (?P<inner>(?P<path>[^:]+):(?P<lnum>\d+):\d+:) (?P<post>.*)$",
)
async def compare_check(
ruff_baseline_executable: Path,
ruff_comparison_executable: Path,
options: CheckOptions,
cloned_repo: ClonedRepository,
) -> Comparison:
async with asyncio.TaskGroup() as tg:
baseline_task = tg.create_task(
ruff_check(
executable=ruff_baseline_executable.resolve(),
path=cloned_repo.path,
name=cloned_repo.fullname,
options=options,
),
)
comparison_task = tg.create_task(
ruff_check(
executable=ruff_comparison_executable.resolve(),
path=cloned_repo.path,
name=cloned_repo.fullname,
options=options,
),
)
baseline_output, comparison_output = (
baseline_task.result(),
comparison_task.result(),
)
diff = Diff.new(baseline_output, comparison_output)
return Comparison(diff=diff, repo=cloned_repo)
def summarize_check_result(result: Result) -> str:
# Calculate the total number of rule changes
all_rule_changes = RuleChanges()
for _, comparison in result.completed:
all_rule_changes.update(RuleChanges.from_diff(comparison.diff))
lines = []
total_removed = all_rule_changes.total_removed()
total_added = all_rule_changes.total_added()
error_count = len(result.errored)
if total_removed == 0 and total_added == 0 and error_count == 0:
return "\u2705 ecosystem check detected no `ruff check` changes."
# Summarize the total changes
s = "s" if error_count != 1 else ""
changes = f"(+{total_added}, -{total_removed}, {error_count} error{s})"
lines.append(
f"\u2139\ufe0f ecosystem check **detected `ruff check` changes**. {changes}"
)
lines.append("")
# Then per-project changes
for project, comparison in result.completed:
if not comparison.diff:
continue # Skip empty diffs
diff = deduplicate_and_sort_diff(comparison.diff)
# Display the output diff
diff_lines = []
for line in limit_rule_lines(diff, project.check_options.max_lines_per_rule):
diff_lines.append(add_permalink_to_diagnostic_line(comparison.repo, line))
lines.extend(
project_section(
title=f"+{diff.lines_added}, -{diff.lines_removed}",
content="\n".join(diff_lines),
options=project.check_options.markdown(),
project=project,
)
)
for project, error in result.errored:
lines.extend(
project_section(
title="error",
content=str(error),
options="",
project=project,
)
)
# Display a summary table of changed rules
if all_rule_changes:
lines.append(f"Rules changed: {len(all_rule_changes.rule_codes())}")
lines.append("")
lines.append("| Rule | Changes | Additions | Removals |")
lines.append("| ---- | ------- | --------- | -------- |")
for rule, total in sorted(
all_rule_changes.total_changes_by_rule(),
key=lambda item: item[1], # Sort by the total changes
reverse=True,
):
additions, removals = (
all_rule_changes.added[rule],
all_rule_changes.removed[rule],
)
lines.append(f"| {rule} | {total} | {additions} | {removals} |")
return "\n".join(lines)
def add_permalink_to_diagnostic_line(repo: ClonedRepository, line: str) -> str:
match = CHECK_DIFF_LINE_RE.match(line)
if match is None:
return line
pre, inner, path, lnum, post = match.groups()
url = repo.url_for(path, int(lnum))
return f"{pre} <a href='{url}'>{inner}</a> {post}"
async def ruff_check(
*, executable: Path, path: Path, name: str, options: CheckOptions
) -> Sequence[str]:
"""Run the given ruff binary against the specified path."""
logger.debug(f"Checking {name} with {executable}")
ruff_args = ["check", "--no-cache", "--exit-zero"]
if options.select:
ruff_args.extend(["--select", options.select])
if options.ignore:
ruff_args.extend(["--ignore", options.ignore])
if options.exclude:
ruff_args.extend(["--exclude", options.exclude])
if options.show_fixes:
ruff_args.extend(["--show-fixes", "--ecosystem-ci"])
start = time.time()
proc = await create_subprocess_exec(
executable.absolute(),
*ruff_args,
".",
stdout=PIPE,
stderr=PIPE,
cwd=path,
)
result, err = await proc.communicate()
end = time.time()
logger.debug(f"Finished checking {name} with {executable} in {end - start:.2f}s")
if proc.returncode != 0:
raise RuffError(err.decode("utf8"))
# Strip summary lines so the diff is only diagnostic lines
lines = [
line
for line in result.decode("utf8").splitlines()
if not CHECK_SUMMARY_LINE_RE.match(line)
]
return lines
@dataclass(frozen=True)
class CheckOptions(Serializable):
"""
Ruff check options
"""
select: str = ""
ignore: str = ""
exclude: str = ""
# Generating fixes is slow and verbose
show_fixes: bool = False
# Limit the number of reported lines per rule
max_lines_per_rule: int | None = 50
def markdown(self) -> str:
return f"select {self.select} ignore {self.ignore} exclude {self.exclude}"
@dataclass(frozen=True)
class RuleChanges:
"""
The number of additions and removals by rule code
"""
added: Counter = field(default_factory=Counter)
removed: Counter = field(default_factory=Counter)
def rule_codes(self) -> set[str]:
return set(self.added.keys()).union(self.removed.keys())
def __add__(self, other: Self) -> Self:
if not isinstance(other, type(self)):
return NotImplemented
new = RuleChanges()
new.update(self)
new.update(other)
return new
def update(self, other: Self) -> Self:
self.added.update(other.added)
self.removed.update(other.removed)
return self
def total_added(self) -> int:
return sum(self.added.values())
def total_removed(self) -> int:
return sum(self.removed.values())
def total_changes_by_rule(self) -> Iterator[str, int]:
"""
Yields the sum of changes for each rule
"""
totals = Counter()
totals.update(self.added)
totals.update(self.removed)
yield from totals.items()
@classmethod
def from_diff(cls: type[Self], diff: Diff) -> Self:
"""
Parse a diff from `ruff check` to determine the additions and removals for each rule
"""
rule_changes = cls()
for line in sorted(diff.added):
code = parse_rule_code(line)
if code is not None:
rule_changes.added[code] += 1
for line in sorted(diff.removed):
code = parse_rule_code(line)
if code is not None:
rule_changes.removed[code] += 1
return rule_changes
def __bool__(self):
return bool(self.added or self.removed)
def parse_rule_code(line: str) -> str | None:
"""
Parse the rule code from a diagnostic line
+ <rule>/<path>:<line>:<column>: <rule_code> <message>
"""
matches = re.search(r": ([A-Z]{1,4}[0-9]{3,4})", line)
if matches is None:
# Handle case where there are no regex matches e.g.
# + "?application=AIRFLOW&authenticator=TEST_AUTH&role=TEST_ROLE&warehouse=TEST_WAREHOUSE" # noqa: E501, ERA001
# Which was found in local testing
return None
return matches.group(1)
def deduplicate_and_sort_diff(diff: Diff) -> Diff:
"""
Removes any duplicate lines and any unchanged lines from the diff.
"""
lines = set()
for line in diff:
if line.startswith("+ "):
lines.add(line)
elif line.startswith("- "):
lines.add(line)
# Sort without the leading + or -
return Diff(list(sorted(lines, key=lambda line: line[2:])))
def limit_rule_lines(diff: Diff, max_per_rule: int | None = 100) -> list[str]:
"""
Reduce the diff to include a maximum number of lines for each rule.
"""
if max_per_rule is None:
return diff
counts = Counter()
reduced = []
for line in diff:
code = parse_rule_code(line)
# Do not omit any unparsable lines
if not code:
reduced.append(line)
continue
counts[code] += 1
if counts[code] > max_per_rule:
continue
reduced.append(line)
# Add lines summarizing the omitted changes
for code, count in counts.items():
hidden_count = count - max_per_rule
if hidden_count > 0:
reduced.append(f"{hidden_count} changes omitted for rule {code}")
return reduced

View File

@@ -1,16 +1,18 @@
import argparse
import asyncio
import logging
import os
import shutil
import sys
import sysconfig
import tempfile
from pathlib import Path
from contextlib import nullcontext
from ruff_ecosystem.models import RuffCommand
from ruff_ecosystem.emitters import EmitterType
from ruff_ecosystem.defaults import DEFAULT_TARGETS
from ruff_ecosystem.main import main
from pathlib import Path
from signal import SIGINT, SIGTERM
import sys
from ruff_ecosystem.defaults import DEFAULT_TARGETS
from ruff_ecosystem.main import OutputFormat, main
from ruff_ecosystem.projects import RuffCommand
def excepthook(type, value, tb):
@@ -18,7 +20,8 @@ def excepthook(type, value, tb):
# we are in interactive mode or we don't have a tty so call the default
sys.__excepthook__(type, value, tb)
else:
import traceback, pdb
import pdb
import traceback
traceback.print_exception(type, value, tb)
print()
@@ -49,7 +52,7 @@ def entrypoint():
ruff_baseline_executable=args.ruff_baseline,
ruff_comparison_executable=args.ruff_comparison,
targets=DEFAULT_TARGETS,
emitter=EmitterType(args.output_format).to_emitter(),
format=OutputFormat(args.output_format),
cache=Path(cache),
raise_on_failure=args.pdb,
)
@@ -84,7 +87,7 @@ def parse_args() -> argparse.Namespace:
)
parser.add_argument(
"--output-format",
choices=[option.name for option in EmitterType],
choices=[option.name for option in OutputFormat],
default="json",
help="Location for caching cloned repositories",
)
@@ -114,3 +117,21 @@ def parse_args() -> argparse.Namespace:
)
return parser.parse_args()
def get_executable_path(name: str) -> str | None:
# Add suffix for Windows executables
name += ".exe" if sys.platform == "win32" else ""
path = os.path.join(sysconfig.get_path("scripts"), name)
# The executable in the current interpreter's scripts directory.
if os.path.exists(path):
return path
# The executable in the global environment.
environment_path = shutil.which("ruff")
if environment_path:
return environment_path
return None

View File

@@ -1,66 +1,64 @@
from .models import Repository, CheckOptions, Target
from ruff_ecosystem.projects import CheckOptions, Project, Repository
# TODO: Consider exporting this as JSON instead for consistent setup
# TODO: Consider exporting this as JSON
DEFAULT_TARGETS = [
# Target(repo=Repository(owner="DisnakeDev", name="disnake", branch="master")),
# Target(repo=Repository(owner="PostHog", name="HouseWatch", branch="main")),
# Target(repo=Repository(owner="RasaHQ", name="rasa", branch="main")),
# Target(repo=Repository(owner="Snowflake-Labs", name="snowcli", branch="main")),
# Target(repo=Repository(owner="aiven", name="aiven-client", branch="main")),
# Target(repo=Repository(owner="alteryx", name="featuretools", branch="main")),
# Target(
# repo=Repository(owner="apache", name="airflow", branch="main"),
# Project(repo=Repository(owner="DisnakeDev", name="disnake", ref="master")),
# Project(repo=Repository(owner="PostHog", name="HouseWatch", ref="main")),
# Project(repo=Repository(owner="RasaHQ", name="rasa", ref="main")),
# Project(repo=Repository(owner="Snowflake-Labs", name="snowcli", ref="main")),
# Project(repo=Repository(owner="aiven", name="aiven-client", ref="main")),
# Project(repo=Repository(owner="alteryx", name="featuretools", ref="main")),
# Project(
# repo=Repository(owner="apache", name="airflow", ref="main"),
# check_options=CheckOptions(select="ALL"),
# ),
# Target(repo=Repository(owner="aws", name="aws-sam-cli", branch="develop")),
# Target(repo=Repository(owner="bloomberg", name="pytest-memray", branch="main")),
# Target(
# repo=Repository(owner="bokeh", name="bokeh", branch="branch-3.3"),
# check_options=CheckOptions(select="ALL"),
# ),
# Target(repo=Repository(owner="commaai", name="openpilot", branch="master")),
# Target(repo=Repository(owner="demisto", name="content", branch="master")),
# Target(repo=Repository(owner="docker", name="docker-py", branch="main")),
# Target(
# repo=Repository(owner="freedomofpress", name="securedrop", branch="develop")
# ),
# Target(repo=Repository(owner="fronzbot", name="blinkpy", branch="dev")),
# Target(repo=Repository(owner="ibis-project", name="ibis", branch="master")),
# Target(repo=Repository(owner="ing-bank", name="probatus", branch="main")),
# Target(repo=Repository(owner="jrnl-org", name="jrnl", branch="develop")),
# Target(repo=Repository(owner="latchbio", name="latch", branch="main")),
# Target(repo=Repository(owner="lnbits", name="lnbits", branch="main")),
# Target(repo=Repository(owner="milvus-io", name="pymilvus", branch="master")),
# Target(repo=Repository(owner="mlflow", name="mlflow", branch="master")),
# Target(repo=Repository(owner="model-bakers", name="model_bakery", branch="main")),
# Target(repo=Repository(owner="pandas-dev", name="pandas", branch="main")),
# Target(repo=Repository(owner="prefecthq", name="prefect", branch="main")),
# Target(repo=Repository(owner="pypa", name="build", branch="main")),
# Target(repo=Repository(owner="pypa", name="cibuildwheel", branch="main")),
# Target(repo=Repository(owner="pypa", name="pip", branch="main")),
# Target(repo=Repository(owner="pypa", name="setuptools", branch="main")),
# Target(repo=Repository(owner="python", name="mypy", branch="master")),
# Target(
# Project(repo=Repository(owner="aws", name="aws-sam-cli", ref="develop")),
# Project(repo=Repository(owner="bloomberg", name="pytest-memray", ref="main")),
Project(
repo=Repository(owner="bokeh", name="bokeh", ref="branch-3.3"),
check_options=CheckOptions(select="ALL"),
),
# Project(repo=Repository(owner="commaai", name="openpilot", ref="master")),
# Project(repo=Repository(owner="demisto", name="content", ref="master")),
# Project(repo=Repository(owner="docker", name="docker-py", ref="main")),
# Project(repo=Repository(owner="freedomofpress", name="securedrop", ref="develop")),
# Project(repo=Repository(owner="fronzbot", name="blinkpy", ref="dev")),
# Project(repo=Repository(owner="ibis-project", name="ibis", ref="master")),
# Project(repo=Repository(owner="ing-bank", name="probatus", ref="main")),
# Project(repo=Repository(owner="jrnl-org", name="jrnl", ref="develop")),
# Project(repo=Repository(owner="latchbio", name="latch", ref="main")),
# Project(repo=Repository(owner="lnbits", name="lnbits", ref="main")),
# Project(repo=Repository(owner="milvus-io", name="pymilvus", ref="master")),
# Project(repo=Repository(owner="mlflow", name="mlflow", ref="master")),
# Project(repo=Repository(owner="model-bakers", name="model_bakery", ref="main")),
# Project(repo=Repository(owner="pandas-dev", name="pandas", ref="main")),
# Project(repo=Repository(owner="prefecthq", name="prefect", ref="main")),
# Project(repo=Repository(owner="pypa", name="build", ref="main")),
# Project(repo=Repository(owner="pypa", name="cibuildwheel", ref="main")),
# Project(repo=Repository(owner="pypa", name="pip", ref="main")),
# Project(repo=Repository(owner="pypa", name="setuptools", ref="main")),
# Project(repo=Repository(owner="python", name="mypy", ref="master")),
# Project(
# repo=Repository(
# owner="python",
# name="typeshed",
# branch="main",
# ref="main",
# ),
# check_options=CheckOptions(select="PYI"),
# ),
# Target(repo=Repository(owner="python-poetry", name="poetry", branch="master")),
# Target(repo=Repository(owner="reflex-dev", name="reflex", branch="main")),
# Target(repo=Repository(owner="rotki", name="rotki", branch="develop")),
# Target(repo=Repository(owner="scikit-build", name="scikit-build", branch="main")),
# Target(
# repo=Repository(owner="scikit-build", name="scikit-build-core", branch="main")
# Project(repo=Repository(owner="python-poetry", name="poetry", ref="master")),
# Project(repo=Repository(owner="reflex-dev", name="reflex", ref="main")),
# Project(repo=Repository(owner="rotki", name="rotki", ref="develop")),
# Project(repo=Repository(owner="scikit-build", name="scikit-build", ref="main")),
# Project(
# repo=Repository(owner="scikit-build", name="scikit-build-core", ref="main")
# ),
# Project(repo=Repository(owner="sphinx-doc", name="sphinx", ref="master")),
# Project(repo=Repository(owner="spruceid", name="siwe-py", ref="main")),
# Project(repo=Repository(owner="tiangolo", name="fastapi", ref="master")),
# Project(repo=Repository(owner="yandex", name="ch-backup", ref="main")),
# Project(
# repo=Repository(owner="zulip", name="zulip", ref="main"),
# check_options=CheckOptions(select="ALL"),
# ),
# Target(repo=Repository(owner="sphinx-doc", name="sphinx", branch="master")),
# Target(repo=Repository(owner="spruceid", name="siwe-py", branch="main")),
# Target(repo=Repository(owner="tiangolo", name="fastapi", branch="master")),
# Target(repo=Repository(owner="yandex", name="ch-backup", branch="main")),
Target(
repo=Repository(owner="zulip", name="zulip", branch="main"),
check_options=CheckOptions(select="ALL"),
),
]

View File

@@ -1,99 +0,0 @@
from enum import Enum
import abc
from ruff_ecosystem.models import Target, Diff, ClonedRepository, Result
from ruff_ecosystem.ruff import CHECK_DIFF_LINE_RE
import traceback
import json
from pathlib import Path
import dataclasses
class Emitter(abc.ABC):
@abc.abstractclassmethod
def emit_error(cls, target: Target, exc: Exception):
pass
@abc.abstractclassmethod
def emit_diff(cls, target: Target, diff: Diff, cloned_repo: ClonedRepository):
pass
@abc.abstractclassmethod
def emit_result(cls, result: Result):
pass
class DebugEmitter(Emitter):
def emit_error(cls, target: Target, exc: Exception):
print(f"Error in {target.repo.fullname}")
traceback.print_exception(exc)
def emit_diff(cls, target: Target, diff: Diff, cloned_repo: ClonedRepository):
pass
class JSONEmitter(Emitter):
class DataclassJSONEncoder(json.JSONEncoder):
def default(self, o):
if dataclasses.is_dataclass(o):
return dataclasses.asdict(o)
if isinstance(o, set):
return tuple(o)
if isinstance(o, Path):
return str(o)
return super().default(o)
def emit_error(cls, target: Target, exc: Exception):
pass
def emit_diff(cls, target: Target, diff: Diff, cloned_repo: ClonedRepository):
pass
def emit_result(cls, result: Result):
print(json.dumps(result, indent=4, cls=cls.DataclassJSONEncoder))
class MarkdownEmitter(Emitter):
def emit_error(cls, target: Target, exc: Exception):
cls._print(title="error", content=f"```\n{exc}\n```", target=target)
def emit_diff(cls, target: Target, diff: Diff, cloned_repo: ClonedRepository):
changes = f"+{len(diff.added)}, -{len(diff.removed)}"
content = ""
for line in list(diff):
match = CHECK_DIFF_LINE_RE.match(line)
if match is None:
content += line + "\n"
continue
pre, inner, path, lnum, post = match.groups()
url = cloned_repo.url_for(path, int(lnum))
content += f"{pre} <a href='{url}'>{inner}</a> {post}" + "\n"
cls._print(title=changes, content=f"<pre>\n{content}\n</pre>", target=target)
def _print(cls, title: str, content: str, target: Target):
print(f"<details><summary>{target.repo.fullname} ({title})</summary>")
print(target.repo.url, target.check_options.summary())
print("<p>")
print()
print(content)
print()
print("</p>")
print("</details>")
class EmitterType(Enum):
markdown = "markdown"
json = "json"
def to_emitter(self) -> Emitter:
match self:
case self.markdown:
return MarkdownEmitter()
case self.json:
return JSONEmitter()
case _:
raise ValueError("Unknown emitter type {self}")

View File

@@ -0,0 +1,238 @@
from __future__ import annotations
import time
from asyncio import create_subprocess_exec
from dataclasses import dataclass
from pathlib import Path
from subprocess import PIPE
from unidiff import PatchSet
from typing import TYPE_CHECKING, Self, Sequence
import re
from ruff_ecosystem import logger
from ruff_ecosystem.markdown import project_section
from ruff_ecosystem.types import Comparison, Diff, Result, RuffError
if TYPE_CHECKING:
from ruff_ecosystem.projects import ClonedRepository, Project
FORMAT_IGNORE_LINES = re.compile("^warning: `ruff format` is a work-in-progress.*")
def summarize_format_result(result: Result) -> str:
lines = []
total_lines_removed = total_lines_added = 0
total_files_modified = 0
error_count = len(result.errored)
patch_sets = []
for project, comparison in result.completed:
total_lines_added += comparison.diff.lines_added
total_lines_removed += comparison.diff.lines_removed
patch_set = PatchSet("\n".join(comparison.diff.lines))
patch_sets.append(patch_set)
total_files_modified += len(patch_set.modified_files)
if total_lines_removed == 0 and total_lines_added == 0 and error_count == 0:
return "\u2705 ecosystem check detected no format changes."
# Summarize the total changes
es = "s" if error_count != 1 else ""
fs = "s" if total_files_modified != 1 else ""
changes = f"(+{total_lines_added}, -{total_lines_removed} lines in {total_files_modified} file{fs}, {error_count} error{es})"
lines.append(f"\u2139\ufe0f ecosystem check **detected format changes**. {changes}")
lines.append("")
# Then per-project changes
for (project, comparison), patch_set in zip(result.completed, patch_sets):
if not comparison.diff:
continue # Skip empty diffs
files = len(patch_set.modified_files)
s = "s" if files != 1 else ""
title = f"+{comparison.diff.lines_added}, -{comparison.diff.lines_removed} lines in {files} file{s}"
lines.extend(
project_section(
title=title,
content=patch_set_with_permalinks(patch_set, comparison.repo),
options=project.format_options.markdown(),
project=project,
)
)
for project, error in result.errored:
lines.extend(
project_section(
title="error",
content=str(error),
options="",
project=project,
)
)
return "\n".join(lines)
async def ruff_format(
*,
executable: Path,
path: Path,
name: str,
options: FormatOptions,
diff: bool = False,
) -> Sequence[str]:
"""Run the given ruff binary against the specified path."""
logger.debug(f"Formatting {name} with {executable}")
ruff_args = ["format"]
if diff:
ruff_args.append("--diff")
start = time.time()
proc = await create_subprocess_exec(
executable.absolute(),
*ruff_args,
".",
stdout=PIPE,
stderr=PIPE,
cwd=path,
)
result, err = await proc.communicate()
end = time.time()
logger.debug(f"Finished formatting {name} with {executable} in {end - start:.2f}s")
if proc.returncode not in [0, 1]:
raise RuffError(err.decode("utf8"))
lines = result.decode("utf8").splitlines()
return lines
async def black_format(
*,
executable: Path,
path: Path,
name: str,
) -> Sequence[str]:
"""Run the given black binary against the specified path."""
logger.debug(f"Formatting {name} with {executable}")
black_args = []
start = time.time()
proc = await create_subprocess_exec(
executable.absolute(),
*black_args,
".",
stdout=PIPE,
stderr=PIPE,
cwd=path,
)
result, err = await proc.communicate()
end = time.time()
logger.debug(f"Finished formatting {name} with {executable} in {end - start:.2f}s")
if proc.returncode != 0:
raise RuffError(err.decode("utf8"))
lines = result.decode("utf8").splitlines()
return [line for line in lines if not FORMAT_IGNORE_LINES.match(line)]
async def compare_format(
ruff_baseline_executable: Path,
ruff_comparison_executable: Path,
options: FormatOptions,
cloned_repo: ClonedRepository,
):
# Run format without diff to get the baseline
await ruff_format(
executable=ruff_baseline_executable.resolve(),
path=cloned_repo.path,
name=cloned_repo.fullname,
options=options,
)
# Then get the diff from stdout
diff = await ruff_format(
executable=ruff_comparison_executable.resolve(),
path=cloned_repo.path,
name=cloned_repo.fullname,
options=options,
diff=True,
)
return create_format_comparison(cloned_repo, FormatDiff(lines=diff))
def create_format_comparison(repo: ClonedRepository, diff: str) -> FormatComparison:
return FormatComparison(diff=diff, repo=repo)
@dataclass(frozen=True)
class FormatOptions:
"""
Ruff format options
"""
pass
def markdown(self: Self) -> str:
return ""
@dataclass(frozen=True)
class FormatDiff(Diff):
"""A diff from ruff format."""
lines: list[str]
def __bool__(self: Self) -> bool:
"""Return true if this diff is non-empty."""
return bool(self.lines)
@property
def added(self) -> set[str]:
return set(line for line in self.lines if line.startswith("+"))
@property
def removed(self) -> set[str]:
return set(line for line in self.lines if line.startswith("-"))
@dataclass(frozen=True)
class FormatComparison(Comparison):
diff: FormatDiff
repo: ClonedRepository
@dataclass(frozen=True)
class FormatResult(Result):
comparisons: tuple[Project, FormatComparison]
def patch_set_with_permalinks(patch_set: PatchSet, repo: ClonedRepository) -> str:
lines = []
for file_patch in patch_set:
file_link = repo.url_for(file_patch.path)
lines.append(f"<a href='{file_link}'>{file_patch.path}</a>")
for hunk in file_patch:
hunk_link = repo.url_for(
file_patch.path,
hunk.source_start,
hunk.source_start + hunk.source_length,
)
hunk_lines = str(hunk).splitlines()
# Add a link before the hunk
link_title = file_patch.path + hunk_link.split(file_patch.path)[-1]
lines.append(f"<a href='{hunk_link}'>{link_title}</a>")
# Wrap the contents of the hunk in a diff code block
lines.append("```diff")
lines.extend(hunk_lines[1:])
lines.append("```")
return "\n".join(lines)

View File

@@ -1,72 +0,0 @@
from ruff_ecosystem.models import Repository, ClonedRepository
from contextlib import asynccontextmanager
from pathlib import Path
from typing import AsyncGenerator
from asyncio import create_subprocess_exec
from subprocess import PIPE
from ruff_ecosystem import logger
@asynccontextmanager
async def clone(
repo: Repository, checkout_dir: Path
) -> AsyncGenerator[ClonedRepository, None]:
"""Shallow clone this repository to a temporary directory."""
if checkout_dir.exists():
logger.debug(f"Reusing {repo.owner}:{repo.name}")
yield await _cloned_repository(repo, checkout_dir)
return
logger.debug(f"Cloning {repo.owner}:{repo.name} to {checkout_dir}")
command = [
"git",
"clone",
"--config",
"advice.detachedHead=false",
"--quiet",
"--depth",
"1",
"--no-tags",
]
if repo.branch:
command.extend(["--branch", repo.branch])
command.extend(
[
f"https://github.com/{repo.owner}/{repo.name}",
checkout_dir,
],
)
process = await create_subprocess_exec(*command, env={"GIT_TERMINAL_PROMPT": "0"})
status_code = await process.wait()
logger.debug(
f"Finished cloning {repo.fullname} with status {status_code}",
)
yield await _cloned_repository(repo, checkout_dir)
async def _cloned_repository(repo: Repository, checkout_dir: Path) -> ClonedRepository:
return ClonedRepository(
name=repo.name,
owner=repo.owner,
branch=repo.branch,
path=checkout_dir,
commit_hash=await _get_commit_hash(checkout_dir),
)
async def _get_commit_hash(checkout_dir: Path) -> str:
"""
Return the commit sha for the repository in the checkout directory.
"""
process = await create_subprocess_exec(
*["git", "rev-parse", "HEAD"],
cwd=checkout_dir,
stdout=PIPE,
)
stdout, _ = await process.communicate()
assert await process.wait() == 0, f"Failed to retrieve commit sha at {checkout_dir}"
return stdout.decode().strip()

View File

@@ -1,32 +1,34 @@
from ruff_ecosystem.models import (
RuffCommand,
Target,
Diff,
ClonedRepository,
RuleChanges,
CheckComparison,
Result,
)
from pathlib import Path
from ruff_ecosystem import logger
import asyncio
from ruff_ecosystem.git import clone
from ruff_ecosystem.ruff import ruff_check, ruff_format
from ruff_ecosystem.emitters import Emitter
import difflib
import dataclasses
import json
from enum import Enum
from pathlib import Path
from typing import TypeVar
import re
from ruff_ecosystem import logger
from ruff_ecosystem.check import compare_check, summarize_check_result
from ruff_ecosystem.format import compare_format, summarize_format_result
from ruff_ecosystem.projects import (
Project,
RuffCommand,
)
from ruff_ecosystem.types import Comparison, Result, Serializable
T = TypeVar("T")
class OutputFormat(Enum):
markdown = "markdown"
json = "json"
async def main(
command: RuffCommand,
ruff_baseline_executable: Path,
ruff_comparison_executable: Path,
targets: list[Target],
targets: list[Project],
cache: Path | None,
emitter: Emitter,
format: OutputFormat,
max_parallelism: int = 50,
raise_on_failure: bool = False,
) -> None:
@@ -42,7 +44,7 @@ async def main(
async with semaphore:
return await coroutine
comparisons: list[Exception | CheckComparison] = await asyncio.gather(
comparisons: list[Exception | Comparison] = await asyncio.gather(
*[
limited_parallelism(
clone_and_compare(
@@ -59,177 +61,82 @@ async def main(
)
comparisons_by_target = dict(zip(targets, comparisons, strict=True))
# Calculate totals
total_removed = total_added = errors = 0
total_rule_changes = RuleChanges()
for comparison in comparisons_by_target.values():
if isinstance(comparison, Exception):
errors += 1
else:
total_removed += len(comparison.diff.removed)
total_added += len(comparison.diff.added)
total_rule_changes += comparison.rule_changes
errors = []
comparisons = []
errors, successes = [], []
for target, comparison in comparisons_by_target.items():
if isinstance(comparison, Exception):
errors.append((target, comparison))
continue
if comparison.diff:
comparisons.append((target, comparison))
successes.append((target, comparison))
else:
continue
result = Result(completed=successes, errored=errors)
result = Result(
total_added=total_added,
total_removed=total_removed,
total_rule_changes=total_rule_changes,
comparisons=comparisons,
errors=errors,
)
match format:
case OutputFormat.json:
print(json.dumps(result, indent=4, cls=JSONEncoder))
case OutputFormat.markdown:
match command:
case RuffCommand.check:
print(summarize_check_result(result))
case RuffCommand.format:
print(summarize_format_result(result))
case _:
raise ValueError(f"Unknown target Ruff command {command}")
case _:
raise ValueError(f"Unknown output format {format}")
emitter.emit_result(result)
return
if total_removed == 0 and total_added == 0 and errors == 0:
print("\u2705 ecosystem check detected no changes.")
return
s = "s" if errors != 1 else ""
changes = f"(+{total_added}, -{total_removed}, {errors} error{s})"
print(f"\u2139\ufe0f ecosystem check **detected changes**. {changes}")
print()
for target, comparison in comparisons_by_target.items():
if isinstance(comparison, Exception):
emitter.emit_error(target, comparison)
continue
if comparison.diff:
emitter.emit_diff(target, comparison.diff, comparison.repo)
else:
continue
if len(total_rule_changes.rule_codes()) > 0:
print(f"Rules changed: {len(total_rule_changes.rule_codes())}")
print()
print("| Rule | Changes | Additions | Removals |")
print("| ---- | ------- | --------- | -------- |")
for rule, (additions, removals) in sorted(
total_rule_changes.items(),
key=lambda x: (x[1][0] + x[1][1]),
reverse=True,
):
print(f"| {rule} | {additions + removals} | {additions} | {removals} |")
return None
async def clone_and_compare(
command: RuffCommand,
ruff_baseline_executable: Path,
ruff_comparison_executable: Path,
target: Target,
target: Project,
cache: Path,
) -> CheckComparison:
) -> Comparison:
"""Check a specific repository against two versions of ruff."""
assert ":" not in target.repo.owner
assert ":" not in target.repo.name
match command:
case RuffCommand.check:
ruff_task, create_comparison, options = (
ruff_check,
create_check_comparison,
compare, options = (
compare_check,
target.check_options,
)
case RuffCommand.format:
ruff_task, create_comparison, options = (
ruff_format,
create_format_comparison,
compare, options = (
compare_format,
target.format_options,
)
case _:
raise ValueError(f"Unknowm target Ruff command {command}")
raise ValueError(f"Unknown target Ruff command {command}")
checkout_dir = cache.joinpath(f"{target.repo.owner}:{target.repo.name}")
async with clone(target.repo, checkout_dir) as cloned_repo:
try:
async with asyncio.TaskGroup() as tg:
baseline_task = tg.create_task(
ruff_task(
executable=ruff_baseline_executable.resolve(),
path=cloned_repo.path,
name=cloned_repo.fullname,
options=options,
),
)
comparison_task = tg.create_task(
ruff_task(
executable=ruff_comparison_executable.resolve(),
path=cloned_repo.path,
name=cloned_repo.fullname,
options=options,
),
)
except ExceptionGroup as e:
raise e.exceptions[0] from e
cloned_repo = await target.repo.clone(checkout_dir)
return create_comparison(
cloned_repo, baseline_task.result(), comparison_task.result()
)
try:
return await compare(
ruff_baseline_executable,
ruff_comparison_executable,
options,
cloned_repo,
)
except ExceptionGroup as e:
raise e.exceptions[0] from e
def create_check_comparison(
repo: ClonedRepository, baseline_output: str, comparison_output: str
) -> CheckComparison:
removed, added = set(), set()
for line in difflib.ndiff(baseline_output, comparison_output):
if line.startswith("- "):
removed.add(line[2:])
elif line.startswith("+ "):
added.add(line[2:])
diff = Diff(removed=removed, added=added)
return CheckComparison(
diff=diff, repo=repo, rule_changes=rule_changes_from_diff(diff)
)
def rule_changes_from_diff(diff: Diff) -> RuleChanges:
"""
Parse a diff from `ruff check` to determine the additions and removals for each rule.
"""
rule_changes = RuleChanges()
# Count rule changes
for line in diff.lines():
# Find rule change for current line or construction
# + <rule>/<path>:<line>:<column>: <rule_code> <message>
matches = re.search(r": ([A-Z]{1,4}[0-9]{3,4})", line)
if matches is None:
# Handle case where there are no regex matches e.g.
# + "?application=AIRFLOW&authenticator=TEST_AUTH&role=TEST_ROLE&warehouse=TEST_WAREHOUSE" # noqa: E501, ERA001
# Which was found in local testing
continue
rule_code = matches.group(1)
# Get current additions and removals for this rule
current_changes = rule_changes[rule_code]
# Check if addition or removal depending on the first character
if line[0] == "+":
current_changes = (current_changes[0] + 1, current_changes[1])
elif line[0] == "-":
current_changes = (current_changes[0], current_changes[1] + 1)
rule_changes[rule_code] = current_changes
return rule_changes
class JSONEncoder(json.JSONEncoder):
def default(self, o):
if isinstance(o, Serializable):
return o.jsonable()
if dataclasses.is_dataclass(o):
return dataclasses.asdict(o)
if isinstance(o, set):
return tuple(o)
if isinstance(o, Path):
return str(o)
if isinstance(o, Exception):
return str(o)
return super().default(o)

View File

@@ -0,0 +1,25 @@
from __future__ import annotations
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from ruff_ecosystem.projects import Project
def project_section(
title: str, content: str, options: str, project: Project
) -> list[str]:
lines = []
lines.append(
f'<details><summary><a href="{project.repo.url}">{project.repo.fullname}</a> ({title})</summary>'
)
lines.append(options)
lines.append("<p>")
lines.append("")
lines.append(content)
lines.append("")
lines.append("</p>")
lines.append("</details>")
return lines

View File

@@ -1,160 +0,0 @@
from enum import Enum
from dataclasses import dataclass, field
from typing import Self, Iterator
import heapq
from pathlib import Path
class RuffCommand(Enum):
check = "check"
format = "format"
@dataclass(frozen=True)
class Repository:
"""
A remote GitHub repository
"""
owner: str
name: str
branch: str | None
@property
def fullname(self) -> str:
return f"{self.owner}/{self.name}"
@property
def url(self: Self) -> str:
return f"https://github.com/{self.owner}/{self.name}"
@dataclass(frozen=True)
class ClonedRepository(Repository):
"""
A cloned GitHub repository, which includes the hash of the cloned commit.
"""
commit_hash: str
path: Path
def url_for(self: Self, path: str, line_number: int | None = None) -> str:
"""
Return the remote GitHub URL for the given path in this repository.
"""
# Default to main branch
url = f"https://github.com/{self.owner}/{self.name}/blob/{self.commit_hash}/{path}"
if line_number:
url += f"#L{line_number}"
return url
@property
def url(self: Self) -> str:
return f"https://github.com/{self.owner}/{self.name}@{self.commit_hash}"
@dataclass(frozen=True)
class Diff:
"""A diff between two runs of ruff."""
removed: set[str]
added: set[str]
def __bool__(self: Self) -> bool:
"""Return true if this diff is non-empty."""
return bool(self.removed or self.added)
def lines(self: Self) -> Iterator[str]:
"""Iterate through the changed lines in diff format."""
for line in heapq.merge(sorted(self.removed), sorted(self.added)):
if line in self.removed:
yield f"- {line}"
else:
yield f"+ {line}"
@dataclass(frozen=True)
class RuleChanges:
changes: dict[str, tuple[int, int]] = field(default_factory=dict)
def rule_codes(self) -> list[str]:
return list(self.changes.keys())
def items(self) -> Iterator[tuple[str, tuple[int, int]]]:
return self.changes.items()
def __setitem__(self, key: str, value: tuple[int, int]) -> None:
self.changes[key] = value
def __getitem__(self, key: str) -> tuple[int, int]:
return self.changes.get(key, (0, 0))
def __add__(self, other: Self) -> Self:
if not isinstance(other, type(self)):
return NotImplemented
result = self.changes.copy()
for rule_code, (added, removed) in other.changes.items():
if rule_code in result:
result[rule_code] = (
result[rule_code][0] + added,
result[rule_code][1] + removed,
)
else:
result[rule_code] = (added, removed)
return RuleChanges(changes=result)
@dataclass(frozen=True)
class CheckComparison:
diff: Diff
repo: ClonedRepository
rule_changes: RuleChanges
@dataclass(frozen=True)
class CheckOptions:
"""
Ruff check options
"""
select: str = ""
ignore: str = ""
exclude: str = ""
# Generating fixes is slow and verbose
show_fixes: bool = False
def summary(self) -> str:
return f"select {self.select} ignore {self.ignore} exclude {self.exclude}"
@dataclass(frozen=True)
class FormatOptions:
"""
Ruff format options
"""
pass
@dataclass(frozen=True)
class Target:
"""
An ecosystem target
"""
repo: Repository
check_options: CheckOptions = field(default_factory=CheckOptions)
format_options: FormatOptions = field(default_factory=FormatOptions)
@dataclass(frozen=True)
class Result:
total_added: int
total_removed: int
total_rule_changes: RuleChanges
comparisons: tuple[Target, CheckComparison]
errors: tuple[Target, Exception]

View File

@@ -0,0 +1,168 @@
from __future__ import annotations
from asyncio import create_subprocess_exec
from dataclasses import dataclass, field
from enum import Enum
from pathlib import Path
from subprocess import PIPE
from typing import AsyncGenerator, Self
from ruff_ecosystem import logger
from ruff_ecosystem.check import CheckOptions
from ruff_ecosystem.format import FormatOptions
from ruff_ecosystem.types import Serializable
@dataclass(frozen=True)
class Project(Serializable):
"""
An ecosystem target
"""
repo: Repository
check_options: CheckOptions = field(default_factory=lambda: CheckOptions())
format_options: FormatOptions = field(default_factory=lambda: FormatOptions())
class RuffCommand(Enum):
check = "check"
format = "format"
class ProjectSetupError(Exception):
"""An error setting up a project."""
@dataclass(frozen=True)
class Repository(Serializable):
"""
A remote GitHub repository
"""
owner: str
name: str
ref: str | None
@property
def fullname(self) -> str:
return f"{self.owner}/{self.name}"
@property
def url(self: Self) -> str:
return f"https://github.com/{self.owner}/{self.name}"
async def clone(
self: Self, checkout_dir: Path
) -> AsyncGenerator[ClonedRepository, None]:
"""
Shallow clone this repository
"""
if checkout_dir.exists():
logger.debug(f"Reusing {self.owner}:{self.name}")
if self.ref:
logger.debug(f"Checking out ref {self.ref}")
process = await create_subprocess_exec(
*["git", "checkout", "-f", self.ref],
cwd=checkout_dir,
env={"GIT_TERMINAL_PROMPT": "0"},
stdout=PIPE,
stderr=PIPE,
)
if await process.wait() != 0:
_, stderr = await process.communicate()
raise ProjectSetupError(
f"Failed to checkout {self.ref}: {stderr.decode()}"
)
return ClonedRepository(
name=self.name,
owner=self.owner,
ref=self.ref,
path=checkout_dir,
commit_hash=await self._get_head_commit(checkout_dir),
)
logger.debug(f"Cloning {self.owner}:{self.name} to {checkout_dir}")
command = [
"git",
"clone",
"--config",
"advice.detachedHead=false",
"--quiet",
"--depth",
"1",
"--no-tags",
]
if self.ref:
command.extend(["--branch", self.ref])
command.extend(
[
f"https://github.com/{self.owner}/{self.name}",
checkout_dir,
],
)
process = await create_subprocess_exec(
*command, env={"GIT_TERMINAL_PROMPT": "0"}
)
status_code = await process.wait()
logger.debug(
f"Finished cloning {self.fullname} with status {status_code}",
)
return ClonedRepository(
name=self.name,
owner=self.owner,
ref=self.ref,
path=checkout_dir,
commit_hash=await self._get_head_commit(checkout_dir),
)
@staticmethod
async def _get_head_commit(checkout_dir: Path) -> str:
"""
Return the commit sha for the repository in the checkout directory.
"""
process = await create_subprocess_exec(
*["git", "rev-parse", "HEAD"],
cwd=checkout_dir,
stdout=PIPE,
)
stdout, _ = await process.communicate()
if await process.wait() != 0:
raise ProjectSetupError(f"Failed to retrieve commit sha at {checkout_dir}")
return stdout.decode().strip()
@dataclass(frozen=True)
class ClonedRepository(Repository, Serializable):
"""
A cloned GitHub repository, which includes the hash of the cloned commit.
"""
commit_hash: str
path: Path
def url_for(
self: Self,
path: str,
line_number: int | None = None,
end_line_number: int | None = None,
) -> str:
"""
Return the remote GitHub URL for the given path in this repository.
"""
url = f"https://github.com/{self.owner}/{self.name}/blob/{self.commit_hash}/{path}"
if line_number:
url += f"#L{line_number}"
if end_line_number:
url += f"-L{end_line_number}"
return url
@property
def url(self: Self) -> str:
return f"https://github.com/{self.owner}/{self.name}@{self.commit_hash}"

View File

@@ -1,90 +0,0 @@
from pathlib import Path
from ruff_ecosystem import logger
from ruff_ecosystem.models import CheckOptions, FormatOptions
import time
from asyncio import create_subprocess_exec
from subprocess import PIPE
from typing import Sequence
import re
CHECK_SUMMARY_LINE_RE = re.compile(
r"^(Found \d+ error.*)|(.*potentially fixable with.*)$"
)
CHECK_DIFF_LINE_RE = re.compile(
r"^(?P<pre>[+-]) (?P<inner>(?P<path>[^:]+):(?P<lnum>\d+):\d+:) (?P<post>.*)$",
)
class RuffError(Exception):
"""An error reported by ruff."""
async def ruff_check(
*, executable: Path, path: Path, name: str, options: CheckOptions
) -> Sequence[str]:
"""Run the given ruff binary against the specified path."""
logger.debug(f"Checking {name} with {executable}")
ruff_args = ["check", "--no-cache", "--exit-zero"]
if options.select:
ruff_args.extend(["--select", options.select])
if options.ignore:
ruff_args.extend(["--ignore", options.ignore])
if options.exclude:
ruff_args.extend(["--exclude", options.exclude])
if options.show_fixes:
ruff_args.extend(["--show-fixes", "--ecosystem-ci"])
start = time.time()
proc = await create_subprocess_exec(
executable.absolute(),
*ruff_args,
".",
stdout=PIPE,
stderr=PIPE,
cwd=path,
)
result, err = await proc.communicate()
end = time.time()
logger.debug(f"Finished checking {name} with {executable} in {end - start:.2f}")
if proc.returncode != 0:
raise RuffError(err.decode("utf8"))
lines = [
line
for line in result.decode("utf8").splitlines()
if not CHECK_SUMMARY_LINE_RE.match(line)
]
return sorted(lines)
async def ruff_format(
*, executable: Path, path: Path, name: str, options: FormatOptions
) -> Sequence[str]:
"""Run the given ruff binary against the specified path."""
logger.debug(f"Checking {name} with {executable}")
ruff_args = ["format", "--no-cache", "--exit-zero"]
start = time.time()
proc = await create_subprocess_exec(
executable.absolute(),
*ruff_args,
".",
stdout=PIPE,
stderr=PIPE,
cwd=path,
)
result, err = await proc.communicate()
end = time.time()
logger.debug(f"Finished formatting {name} with {executable} in {end - start:.2f}")
if proc.returncode != 0:
raise RuffError(err.decode("utf8"))
lines = result.decode("utf8").splitlines()
return lines

View File

@@ -0,0 +1,70 @@
from __future__ import annotations
import abc
import dataclasses
import difflib
from dataclasses import dataclass, is_dataclass
from typing import TYPE_CHECKING, Any, Iterable, Sequence, Generator
import heapq
if TYPE_CHECKING:
from ruff_ecosystem.projects import ClonedRepository, Project
class Serializable(abc.ABC):
"""
Allows serialization of content by casting to a JSON-compatible type.
"""
def jsonable(self) -> Any:
# Default implementation for dataclasses
if is_dataclass(self):
return dataclasses.asdict(self)
raise NotImplementedError()
class Diff(Serializable):
def __init__(self, lines: Iterable[str]) -> None:
self.lines = list(lines)
# Compute added and removed lines once
self.added = list(line[2:] for line in self.lines if line.startswith("+ "))
self.removed = list(line[2:] for line in self.lines if line.startswith("- "))
def __bool__(self) -> bool:
return bool(self.added or self.removed)
def __iter__(self) -> Generator[str, None, None]:
yield from self.lines
@property
def lines_added(self):
return len(self.added)
@property
def lines_removed(self):
return len(self.removed)
@classmethod
def new(cls, baseline: Sequence[str], comparison: Sequence[str]):
return cls(difflib.ndiff(baseline, comparison))
def jsonable(self) -> Any:
return self.lines
@dataclass(frozen=True)
class Result(Serializable):
errored: list[tuple[Project, Exception]]
completed: list[tuple[Project, Comparison]]
@dataclass(frozen=True)
class Comparison(Serializable):
diff: Diff
repo: ClonedRepository
class RuffError(Exception):
"""An error reported by ruff."""