diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 48fd480201..800d815cb5 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -175,7 +175,7 @@ jobs: - cargo-test - determine_changes # Only runs on pull requests, since that is the only we way we can find the base version for comparison. - if: github.event_name == 'pull_request' && needs.determine_changes.outputs.linter == 'true' + if: github.event_name == 'pull_request' steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v4 @@ -196,13 +196,29 @@ jobs: branch: ${{ github.event.pull_request.base.ref }} check_artifacts: true - - name: Run ecosystem check + - name: Install ruff-ecosystem + run: | + pip install ./python/ruff-ecosystem + + - name: Run `ruff check` ecosystem check + if: ${{ needs.determine_changes.outputs.linter == 'true' }} run: | # Make executable, since artifact download doesn't preserve this chmod +x ruff ${{ steps.ruff-target.outputs.download-path }}/ruff - scripts/check_ecosystem.py ruff ${{ steps.ruff-target.outputs.download-path }}/ruff | tee ecosystem-result - cat ecosystem-result > $GITHUB_STEP_SUMMARY + ruff-ecosystem check ruff ${{ steps.ruff-target.outputs.download-path }}/ruff --cache ./checkouts --output-format markdown | tee ecosystem-result + cat ecosystem-result >> $GITHUB_STEP_SUMMARY + + echo ${{ github.event.number }} > pr-number + + - name: Run `ruff format` ecosystem check + if: ${{ needs.determine_changes.outputs.linter == 'true' }} + run: | + # Make executable, since artifact download doesn't preserve this + chmod +x ruff ${{ steps.ruff-target.outputs.download-path }}/ruff + + ruff-ecosystem format ruff ${{ steps.ruff-target.outputs.download-path }}/ruff --cache ./checkouts --output-format markdown | tee ecosystem-result + cat ecosystem-result >> $GITHUB_STEP_SUMMARY echo ${{ github.event.number }} > pr-number diff --git a/python/ruff-ecosystem/pyproject.toml b/python/ruff-ecosystem/pyproject.toml index 919bbe4ab5..1183bcc8f8 100644 --- a/python/ruff-ecosystem/pyproject.toml +++ b/python/ruff-ecosystem/pyproject.toml @@ -8,3 +8,7 @@ version = "0.0.0" [project.scripts] ruff-ecosystem = "ruff_ecosystem.cli:entrypoint" + + +[tool.ruff] +extend-select = ["I"] \ No newline at end of file diff --git a/python/ruff-ecosystem/ruff_ecosystem/__main__.py b/python/ruff-ecosystem/ruff_ecosystem/__main__.py index 8eab9c1c75..bc1e7d2c79 100644 --- a/python/ruff-ecosystem/ruff_ecosystem/__main__.py +++ b/python/ruff-ecosystem/ruff_ecosystem/__main__.py @@ -2,7 +2,7 @@ Enables usage with `python -m ruff_ecosystem` """ -from ruff_ecosystem.cli import entrypoint +import ruff_ecosystem.cli if __name__ == "__main__": - entrypoint() + ruff_ecosystem.cli.entrypoint() diff --git a/python/ruff-ecosystem/ruff_ecosystem/check.py b/python/ruff-ecosystem/ruff_ecosystem/check.py new file mode 100644 index 0000000000..ad048c0e82 --- /dev/null +++ b/python/ruff-ecosystem/ruff_ecosystem/check.py @@ -0,0 +1,341 @@ +from __future__ import annotations + +import asyncio +import re +import time +from asyncio import create_subprocess_exec +from collections import Counter +from dataclasses import dataclass, field +from pathlib import Path +from subprocess import PIPE +from typing import TYPE_CHECKING, Any, Iterator, Self, Sequence + +from ruff_ecosystem import logger +from ruff_ecosystem.types import ( + Comparison, + Diff, + Result, + RuffError, + Serializable, +) +from ruff_ecosystem.markdown import project_section + +if TYPE_CHECKING: + from ruff_ecosystem.projects import ClonedRepository, Project + + +# Matches lines that are summaries rather than diagnostics +CHECK_SUMMARY_LINE_RE = re.compile(r"^(Found \d+ error.*)|(.* fixable with .*)$") + +# Parses a diagnostic line (in a diff) to retrieve path and line number +CHECK_DIFF_LINE_RE = re.compile( + r"^(?P
[+-]) (?P(?P [^:]+):(?P \d+):\d+:) (?P .*)$", +) + + +async def compare_check( + ruff_baseline_executable: Path, + ruff_comparison_executable: Path, + options: CheckOptions, + cloned_repo: ClonedRepository, +) -> Comparison: + async with asyncio.TaskGroup() as tg: + baseline_task = tg.create_task( + ruff_check( + executable=ruff_baseline_executable.resolve(), + path=cloned_repo.path, + name=cloned_repo.fullname, + options=options, + ), + ) + comparison_task = tg.create_task( + ruff_check( + executable=ruff_comparison_executable.resolve(), + path=cloned_repo.path, + name=cloned_repo.fullname, + options=options, + ), + ) + + baseline_output, comparison_output = ( + baseline_task.result(), + comparison_task.result(), + ) + diff = Diff.new(baseline_output, comparison_output) + + return Comparison(diff=diff, repo=cloned_repo) + + +def summarize_check_result(result: Result) -> str: + # Calculate the total number of rule changes + all_rule_changes = RuleChanges() + for _, comparison in result.completed: + all_rule_changes.update(RuleChanges.from_diff(comparison.diff)) + + lines = [] + total_removed = all_rule_changes.total_removed() + total_added = all_rule_changes.total_added() + error_count = len(result.errored) + + if total_removed == 0 and total_added == 0 and error_count == 0: + return "\u2705 ecosystem check detected no `ruff check` changes." + + # Summarize the total changes + s = "s" if error_count != 1 else "" + changes = f"(+{total_added}, -{total_removed}, {error_count} error{s})" + + lines.append( + f"\u2139\ufe0f ecosystem check **detected `ruff check` changes**. {changes}" + ) + lines.append("") + + # Then per-project changes + for project, comparison in result.completed: + if not comparison.diff: + continue # Skip empty diffs + + diff = deduplicate_and_sort_diff(comparison.diff) + + # Display the output diff + diff_lines = [] + for line in limit_rule_lines(diff, project.check_options.max_lines_per_rule): + diff_lines.append(add_permalink_to_diagnostic_line(comparison.repo, line)) + + lines.extend( + project_section( + title=f"+{diff.lines_added}, -{diff.lines_removed}", + content="\n".join(diff_lines), + options=project.check_options.markdown(), + project=project, + ) + ) + + for project, error in result.errored: + lines.extend( + project_section( + title="error", + content=str(error), + options="", + project=project, + ) + ) + + # Display a summary table of changed rules + if all_rule_changes: + lines.append(f"Rules changed: {len(all_rule_changes.rule_codes())}") + lines.append("") + lines.append("| Rule | Changes | Additions | Removals |") + lines.append("| ---- | ------- | --------- | -------- |") + for rule, total in sorted( + all_rule_changes.total_changes_by_rule(), + key=lambda item: item[1], # Sort by the total changes + reverse=True, + ): + additions, removals = ( + all_rule_changes.added[rule], + all_rule_changes.removed[rule], + ) + lines.append(f"| {rule} | {total} | {additions} | {removals} |") + + return "\n".join(lines) + + +def add_permalink_to_diagnostic_line(repo: ClonedRepository, line: str) -> str: + match = CHECK_DIFF_LINE_RE.match(line) + if match is None: + return line + + pre, inner, path, lnum, post = match.groups() + url = repo.url_for(path, int(lnum)) + return f"{pre} {inner} {post}" + + +async def ruff_check( + *, executable: Path, path: Path, name: str, options: CheckOptions +) -> Sequence[str]: + """Run the given ruff binary against the specified path.""" + logger.debug(f"Checking {name} with {executable}") + ruff_args = ["check", "--no-cache", "--exit-zero"] + if options.select: + ruff_args.extend(["--select", options.select]) + if options.ignore: + ruff_args.extend(["--ignore", options.ignore]) + if options.exclude: + ruff_args.extend(["--exclude", options.exclude]) + if options.show_fixes: + ruff_args.extend(["--show-fixes", "--ecosystem-ci"]) + + start = time.time() + proc = await create_subprocess_exec( + executable.absolute(), + *ruff_args, + ".", + stdout=PIPE, + stderr=PIPE, + cwd=path, + ) + result, err = await proc.communicate() + end = time.time() + + logger.debug(f"Finished checking {name} with {executable} in {end - start:.2f}s") + + if proc.returncode != 0: + raise RuffError(err.decode("utf8")) + + # Strip summary lines so the diff is only diagnostic lines + lines = [ + line + for line in result.decode("utf8").splitlines() + if not CHECK_SUMMARY_LINE_RE.match(line) + ] + + return lines + + +@dataclass(frozen=True) +class CheckOptions(Serializable): + """ + Ruff check options + """ + + select: str = "" + ignore: str = "" + exclude: str = "" + + # Generating fixes is slow and verbose + show_fixes: bool = False + + # Limit the number of reported lines per rule + max_lines_per_rule: int | None = 50 + + def markdown(self) -> str: + return f"select {self.select} ignore {self.ignore} exclude {self.exclude}" + + +@dataclass(frozen=True) +class RuleChanges: + """ + The number of additions and removals by rule code + """ + + added: Counter = field(default_factory=Counter) + removed: Counter = field(default_factory=Counter) + + def rule_codes(self) -> set[str]: + return set(self.added.keys()).union(self.removed.keys()) + + def __add__(self, other: Self) -> Self: + if not isinstance(other, type(self)): + return NotImplemented + + new = RuleChanges() + new.update(self) + new.update(other) + return new + + def update(self, other: Self) -> Self: + self.added.update(other.added) + self.removed.update(other.removed) + return self + + def total_added(self) -> int: + return sum(self.added.values()) + + def total_removed(self) -> int: + return sum(self.removed.values()) + + def total_changes_by_rule(self) -> Iterator[str, int]: + """ + Yields the sum of changes for each rule + """ + totals = Counter() + totals.update(self.added) + totals.update(self.removed) + yield from totals.items() + + @classmethod + def from_diff(cls: type[Self], diff: Diff) -> Self: + """ + Parse a diff from `ruff check` to determine the additions and removals for each rule + """ + rule_changes = cls() + + for line in sorted(diff.added): + code = parse_rule_code(line) + if code is not None: + rule_changes.added[code] += 1 + + for line in sorted(diff.removed): + code = parse_rule_code(line) + if code is not None: + rule_changes.removed[code] += 1 + + return rule_changes + + def __bool__(self): + return bool(self.added or self.removed) + + +def parse_rule_code(line: str) -> str | None: + """ + Parse the rule code from a diagnostic line + + + / : : : + """ + matches = re.search(r": ([A-Z]{1,4}[0-9]{3,4})", line) + + if matches is None: + # Handle case where there are no regex matches e.g. + # + "?application=AIRFLOW&authenticator=TEST_AUTH&role=TEST_ROLE&warehouse=TEST_WAREHOUSE" # noqa: E501, ERA001 + # Which was found in local testing + return None + + return matches.group(1) + + +def deduplicate_and_sort_diff(diff: Diff) -> Diff: + """ + Removes any duplicate lines and any unchanged lines from the diff. + """ + lines = set() + for line in diff: + if line.startswith("+ "): + lines.add(line) + elif line.startswith("- "): + lines.add(line) + + # Sort without the leading + or - + return Diff(list(sorted(lines, key=lambda line: line[2:]))) + + +def limit_rule_lines(diff: Diff, max_per_rule: int | None = 100) -> list[str]: + """ + Reduce the diff to include a maximum number of lines for each rule. + """ + if max_per_rule is None: + return diff + + counts = Counter() + reduced = [] + + for line in diff: + code = parse_rule_code(line) + + # Do not omit any unparsable lines + if not code: + reduced.append(line) + continue + + counts[code] += 1 + if counts[code] > max_per_rule: + continue + + reduced.append(line) + + # Add lines summarizing the omitted changes + for code, count in counts.items(): + hidden_count = count - max_per_rule + if hidden_count > 0: + reduced.append(f"{hidden_count} changes omitted for rule {code}") + + return reduced diff --git a/python/ruff-ecosystem/ruff_ecosystem/cli.py b/python/ruff-ecosystem/ruff_ecosystem/cli.py index 9f576f58df..010bb09b00 100644 --- a/python/ruff-ecosystem/ruff_ecosystem/cli.py +++ b/python/ruff-ecosystem/ruff_ecosystem/cli.py @@ -1,16 +1,18 @@ import argparse import asyncio import logging +import os +import shutil +import sys +import sysconfig import tempfile -from pathlib import Path from contextlib import nullcontext -from ruff_ecosystem.models import RuffCommand -from ruff_ecosystem.emitters import EmitterType -from ruff_ecosystem.defaults import DEFAULT_TARGETS -from ruff_ecosystem.main import main +from pathlib import Path from signal import SIGINT, SIGTERM -import sys +from ruff_ecosystem.defaults import DEFAULT_TARGETS +from ruff_ecosystem.main import OutputFormat, main +from ruff_ecosystem.projects import RuffCommand def excepthook(type, value, tb): @@ -18,7 +20,8 @@ def excepthook(type, value, tb): # we are in interactive mode or we don't have a tty so call the default sys.__excepthook__(type, value, tb) else: - import traceback, pdb + import pdb + import traceback traceback.print_exception(type, value, tb) print() @@ -49,7 +52,7 @@ def entrypoint(): ruff_baseline_executable=args.ruff_baseline, ruff_comparison_executable=args.ruff_comparison, targets=DEFAULT_TARGETS, - emitter=EmitterType(args.output_format).to_emitter(), + format=OutputFormat(args.output_format), cache=Path(cache), raise_on_failure=args.pdb, ) @@ -84,7 +87,7 @@ def parse_args() -> argparse.Namespace: ) parser.add_argument( "--output-format", - choices=[option.name for option in EmitterType], + choices=[option.name for option in OutputFormat], default="json", help="Location for caching cloned repositories", ) @@ -114,3 +117,21 @@ def parse_args() -> argparse.Namespace: ) return parser.parse_args() + + +def get_executable_path(name: str) -> str | None: + # Add suffix for Windows executables + name += ".exe" if sys.platform == "win32" else "" + + path = os.path.join(sysconfig.get_path("scripts"), name) + + # The executable in the current interpreter's scripts directory. + if os.path.exists(path): + return path + + # The executable in the global environment. + environment_path = shutil.which("ruff") + if environment_path: + return environment_path + + return None diff --git a/python/ruff-ecosystem/ruff_ecosystem/defaults.py b/python/ruff-ecosystem/ruff_ecosystem/defaults.py index b203f5f93f..3c9c6bfc89 100644 --- a/python/ruff-ecosystem/ruff_ecosystem/defaults.py +++ b/python/ruff-ecosystem/ruff_ecosystem/defaults.py @@ -1,66 +1,64 @@ -from .models import Repository, CheckOptions, Target +from ruff_ecosystem.projects import CheckOptions, Project, Repository -# TODO: Consider exporting this as JSON instead for consistent setup +# TODO: Consider exporting this as JSON DEFAULT_TARGETS = [ - # Target(repo=Repository(owner="DisnakeDev", name="disnake", branch="master")), - # Target(repo=Repository(owner="PostHog", name="HouseWatch", branch="main")), - # Target(repo=Repository(owner="RasaHQ", name="rasa", branch="main")), - # Target(repo=Repository(owner="Snowflake-Labs", name="snowcli", branch="main")), - # Target(repo=Repository(owner="aiven", name="aiven-client", branch="main")), - # Target(repo=Repository(owner="alteryx", name="featuretools", branch="main")), - # Target( - # repo=Repository(owner="apache", name="airflow", branch="main"), + # Project(repo=Repository(owner="DisnakeDev", name="disnake", ref="master")), + # Project(repo=Repository(owner="PostHog", name="HouseWatch", ref="main")), + # Project(repo=Repository(owner="RasaHQ", name="rasa", ref="main")), + # Project(repo=Repository(owner="Snowflake-Labs", name="snowcli", ref="main")), + # Project(repo=Repository(owner="aiven", name="aiven-client", ref="main")), + # Project(repo=Repository(owner="alteryx", name="featuretools", ref="main")), + # Project( + # repo=Repository(owner="apache", name="airflow", ref="main"), # check_options=CheckOptions(select="ALL"), # ), - # Target(repo=Repository(owner="aws", name="aws-sam-cli", branch="develop")), - # Target(repo=Repository(owner="bloomberg", name="pytest-memray", branch="main")), - # Target( - # repo=Repository(owner="bokeh", name="bokeh", branch="branch-3.3"), - # check_options=CheckOptions(select="ALL"), - # ), - # Target(repo=Repository(owner="commaai", name="openpilot", branch="master")), - # Target(repo=Repository(owner="demisto", name="content", branch="master")), - # Target(repo=Repository(owner="docker", name="docker-py", branch="main")), - # Target( - # repo=Repository(owner="freedomofpress", name="securedrop", branch="develop") - # ), - # Target(repo=Repository(owner="fronzbot", name="blinkpy", branch="dev")), - # Target(repo=Repository(owner="ibis-project", name="ibis", branch="master")), - # Target(repo=Repository(owner="ing-bank", name="probatus", branch="main")), - # Target(repo=Repository(owner="jrnl-org", name="jrnl", branch="develop")), - # Target(repo=Repository(owner="latchbio", name="latch", branch="main")), - # Target(repo=Repository(owner="lnbits", name="lnbits", branch="main")), - # Target(repo=Repository(owner="milvus-io", name="pymilvus", branch="master")), - # Target(repo=Repository(owner="mlflow", name="mlflow", branch="master")), - # Target(repo=Repository(owner="model-bakers", name="model_bakery", branch="main")), - # Target(repo=Repository(owner="pandas-dev", name="pandas", branch="main")), - # Target(repo=Repository(owner="prefecthq", name="prefect", branch="main")), - # Target(repo=Repository(owner="pypa", name="build", branch="main")), - # Target(repo=Repository(owner="pypa", name="cibuildwheel", branch="main")), - # Target(repo=Repository(owner="pypa", name="pip", branch="main")), - # Target(repo=Repository(owner="pypa", name="setuptools", branch="main")), - # Target(repo=Repository(owner="python", name="mypy", branch="master")), - # Target( + # Project(repo=Repository(owner="aws", name="aws-sam-cli", ref="develop")), + # Project(repo=Repository(owner="bloomberg", name="pytest-memray", ref="main")), + Project( + repo=Repository(owner="bokeh", name="bokeh", ref="branch-3.3"), + check_options=CheckOptions(select="ALL"), + ), + # Project(repo=Repository(owner="commaai", name="openpilot", ref="master")), + # Project(repo=Repository(owner="demisto", name="content", ref="master")), + # Project(repo=Repository(owner="docker", name="docker-py", ref="main")), + # Project(repo=Repository(owner="freedomofpress", name="securedrop", ref="develop")), + # Project(repo=Repository(owner="fronzbot", name="blinkpy", ref="dev")), + # Project(repo=Repository(owner="ibis-project", name="ibis", ref="master")), + # Project(repo=Repository(owner="ing-bank", name="probatus", ref="main")), + # Project(repo=Repository(owner="jrnl-org", name="jrnl", ref="develop")), + # Project(repo=Repository(owner="latchbio", name="latch", ref="main")), + # Project(repo=Repository(owner="lnbits", name="lnbits", ref="main")), + # Project(repo=Repository(owner="milvus-io", name="pymilvus", ref="master")), + # Project(repo=Repository(owner="mlflow", name="mlflow", ref="master")), + # Project(repo=Repository(owner="model-bakers", name="model_bakery", ref="main")), + # Project(repo=Repository(owner="pandas-dev", name="pandas", ref="main")), + # Project(repo=Repository(owner="prefecthq", name="prefect", ref="main")), + # Project(repo=Repository(owner="pypa", name="build", ref="main")), + # Project(repo=Repository(owner="pypa", name="cibuildwheel", ref="main")), + # Project(repo=Repository(owner="pypa", name="pip", ref="main")), + # Project(repo=Repository(owner="pypa", name="setuptools", ref="main")), + # Project(repo=Repository(owner="python", name="mypy", ref="master")), + # Project( # repo=Repository( # owner="python", # name="typeshed", - # branch="main", + # ref="main", # ), # check_options=CheckOptions(select="PYI"), # ), - # Target(repo=Repository(owner="python-poetry", name="poetry", branch="master")), - # Target(repo=Repository(owner="reflex-dev", name="reflex", branch="main")), - # Target(repo=Repository(owner="rotki", name="rotki", branch="develop")), - # Target(repo=Repository(owner="scikit-build", name="scikit-build", branch="main")), - # Target( - # repo=Repository(owner="scikit-build", name="scikit-build-core", branch="main") + # Project(repo=Repository(owner="python-poetry", name="poetry", ref="master")), + # Project(repo=Repository(owner="reflex-dev", name="reflex", ref="main")), + # Project(repo=Repository(owner="rotki", name="rotki", ref="develop")), + # Project(repo=Repository(owner="scikit-build", name="scikit-build", ref="main")), + # Project( + # repo=Repository(owner="scikit-build", name="scikit-build-core", ref="main") + # ), + # Project(repo=Repository(owner="sphinx-doc", name="sphinx", ref="master")), + # Project(repo=Repository(owner="spruceid", name="siwe-py", ref="main")), + # Project(repo=Repository(owner="tiangolo", name="fastapi", ref="master")), + # Project(repo=Repository(owner="yandex", name="ch-backup", ref="main")), + # Project( + # repo=Repository(owner="zulip", name="zulip", ref="main"), + # check_options=CheckOptions(select="ALL"), # ), - # Target(repo=Repository(owner="sphinx-doc", name="sphinx", branch="master")), - # Target(repo=Repository(owner="spruceid", name="siwe-py", branch="main")), - # Target(repo=Repository(owner="tiangolo", name="fastapi", branch="master")), - # Target(repo=Repository(owner="yandex", name="ch-backup", branch="main")), - Target( - repo=Repository(owner="zulip", name="zulip", branch="main"), - check_options=CheckOptions(select="ALL"), - ), ] diff --git a/python/ruff-ecosystem/ruff_ecosystem/emitters.py b/python/ruff-ecosystem/ruff_ecosystem/emitters.py deleted file mode 100644 index a0c64df84f..0000000000 --- a/python/ruff-ecosystem/ruff_ecosystem/emitters.py +++ /dev/null @@ -1,99 +0,0 @@ -from enum import Enum -import abc -from ruff_ecosystem.models import Target, Diff, ClonedRepository, Result -from ruff_ecosystem.ruff import CHECK_DIFF_LINE_RE -import traceback -import json -from pathlib import Path -import dataclasses - - -class Emitter(abc.ABC): - @abc.abstractclassmethod - def emit_error(cls, target: Target, exc: Exception): - pass - - @abc.abstractclassmethod - def emit_diff(cls, target: Target, diff: Diff, cloned_repo: ClonedRepository): - pass - - @abc.abstractclassmethod - def emit_result(cls, result: Result): - pass - - -class DebugEmitter(Emitter): - def emit_error(cls, target: Target, exc: Exception): - print(f"Error in {target.repo.fullname}") - traceback.print_exception(exc) - - def emit_diff(cls, target: Target, diff: Diff, cloned_repo: ClonedRepository): - pass - - -class JSONEmitter(Emitter): - class DataclassJSONEncoder(json.JSONEncoder): - def default(self, o): - if dataclasses.is_dataclass(o): - return dataclasses.asdict(o) - if isinstance(o, set): - return tuple(o) - if isinstance(o, Path): - return str(o) - return super().default(o) - - def emit_error(cls, target: Target, exc: Exception): - pass - - def emit_diff(cls, target: Target, diff: Diff, cloned_repo: ClonedRepository): - pass - - def emit_result(cls, result: Result): - print(json.dumps(result, indent=4, cls=cls.DataclassJSONEncoder)) - - -class MarkdownEmitter(Emitter): - def emit_error(cls, target: Target, exc: Exception): - cls._print(title="error", content=f"```\n{exc}\n```", target=target) - - def emit_diff(cls, target: Target, diff: Diff, cloned_repo: ClonedRepository): - changes = f"+{len(diff.added)}, -{len(diff.removed)}" - - content = "" - for line in list(diff): - match = CHECK_DIFF_LINE_RE.match(line) - if match is None: - content += line + "\n" - continue - - pre, inner, path, lnum, post = match.groups() - url = cloned_repo.url_for(path, int(lnum)) - content += f"{pre} {inner} {post}" + "\n" - - cls._print(title=changes, content=f" \n{content}\n", target=target) - - def _print(cls, title: str, content: str, target: Target): - print(f"") - - -class EmitterType(Enum): - markdown = "markdown" - json = "json" - - def to_emitter(self) -> Emitter: - match self: - case self.markdown: - return MarkdownEmitter() - case self.json: - return JSONEmitter() - case _: - raise ValueError("Unknown emitter type {self}") diff --git a/python/ruff-ecosystem/ruff_ecosystem/format.py b/python/ruff-ecosystem/ruff_ecosystem/format.py new file mode 100644 index 0000000000..027a744ff1 --- /dev/null +++ b/python/ruff-ecosystem/ruff_ecosystem/format.py @@ -0,0 +1,238 @@ +from __future__ import annotations + +import time +from asyncio import create_subprocess_exec +from dataclasses import dataclass +from pathlib import Path +from subprocess import PIPE +from unidiff import PatchSet +from typing import TYPE_CHECKING, Self, Sequence +import re +from ruff_ecosystem import logger +from ruff_ecosystem.markdown import project_section +from ruff_ecosystem.types import Comparison, Diff, Result, RuffError + +if TYPE_CHECKING: + from ruff_ecosystem.projects import ClonedRepository, Project + + +FORMAT_IGNORE_LINES = re.compile("^warning: `ruff format` is a work-in-progress.*") + + +def summarize_format_result(result: Result) -> str: + lines = [] + total_lines_removed = total_lines_added = 0 + total_files_modified = 0 + error_count = len(result.errored) + patch_sets = [] + + for project, comparison in result.completed: + total_lines_added += comparison.diff.lines_added + total_lines_removed += comparison.diff.lines_removed + + patch_set = PatchSet("\n".join(comparison.diff.lines)) + patch_sets.append(patch_set) + total_files_modified += len(patch_set.modified_files) + + if total_lines_removed == 0 and total_lines_added == 0 and error_count == 0: + return "\u2705 ecosystem check detected no format changes." + + # Summarize the total changes + es = "s" if error_count != 1 else "" + fs = "s" if total_files_modified != 1 else "" + changes = f"(+{total_lines_added}, -{total_lines_removed} lines in {total_files_modified} file{fs}, {error_count} error{es})" + lines.append(f"\u2139\ufe0f ecosystem check **detected format changes**. {changes}") + lines.append("") + + # Then per-project changes + for (project, comparison), patch_set in zip(result.completed, patch_sets): + if not comparison.diff: + continue # Skip empty diffs + + files = len(patch_set.modified_files) + s = "s" if files != 1 else "" + title = f"+{comparison.diff.lines_added}, -{comparison.diff.lines_removed} lines in {files} file{s}" + + lines.extend( + project_section( + title=title, + content=patch_set_with_permalinks(patch_set, comparison.repo), + options=project.format_options.markdown(), + project=project, + ) + ) + + for project, error in result.errored: + lines.extend( + project_section( + title="error", + content=str(error), + options="", + project=project, + ) + ) + + return "\n".join(lines) + + +async def ruff_format( + *, + executable: Path, + path: Path, + name: str, + options: FormatOptions, + diff: bool = False, +) -> Sequence[str]: + """Run the given ruff binary against the specified path.""" + logger.debug(f"Formatting {name} with {executable}") + ruff_args = ["format"] + + if diff: + ruff_args.append("--diff") + + start = time.time() + proc = await create_subprocess_exec( + executable.absolute(), + *ruff_args, + ".", + stdout=PIPE, + stderr=PIPE, + cwd=path, + ) + result, err = await proc.communicate() + end = time.time() + + logger.debug(f"Finished formatting {name} with {executable} in {end - start:.2f}s") + + if proc.returncode not in [0, 1]: + raise RuffError(err.decode("utf8")) + + lines = result.decode("utf8").splitlines() + return lines + + +async def black_format( + *, + executable: Path, + path: Path, + name: str, +) -> Sequence[str]: + """Run the given black binary against the specified path.""" + logger.debug(f"Formatting {name} with {executable}") + black_args = [] + + start = time.time() + proc = await create_subprocess_exec( + executable.absolute(), + *black_args, + ".", + stdout=PIPE, + stderr=PIPE, + cwd=path, + ) + result, err = await proc.communicate() + end = time.time() + + logger.debug(f"Finished formatting {name} with {executable} in {end - start:.2f}s") + + if proc.returncode != 0: + raise RuffError(err.decode("utf8")) + + lines = result.decode("utf8").splitlines() + return [line for line in lines if not FORMAT_IGNORE_LINES.match(line)] + + +async def compare_format( + ruff_baseline_executable: Path, + ruff_comparison_executable: Path, + options: FormatOptions, + cloned_repo: ClonedRepository, +): + # Run format without diff to get the baseline + await ruff_format( + executable=ruff_baseline_executable.resolve(), + path=cloned_repo.path, + name=cloned_repo.fullname, + options=options, + ) + # Then get the diff from stdout + diff = await ruff_format( + executable=ruff_comparison_executable.resolve(), + path=cloned_repo.path, + name=cloned_repo.fullname, + options=options, + diff=True, + ) + + return create_format_comparison(cloned_repo, FormatDiff(lines=diff)) + + +def create_format_comparison(repo: ClonedRepository, diff: str) -> FormatComparison: + return FormatComparison(diff=diff, repo=repo) + + +@dataclass(frozen=True) +class FormatOptions: + """ + Ruff format options + """ + + pass + + def markdown(self: Self) -> str: + return "" + + +@dataclass(frozen=True) +class FormatDiff(Diff): + """A diff from ruff format.""" + + lines: list[str] + + def __bool__(self: Self) -> bool: + """Return true if this diff is non-empty.""" + return bool(self.lines) + + @property + def added(self) -> set[str]: + return set(line for line in self.lines if line.startswith("+")) + + @property + def removed(self) -> set[str]: + return set(line for line in self.lines if line.startswith("-")) + + +@dataclass(frozen=True) +class FormatComparison(Comparison): + diff: FormatDiff + repo: ClonedRepository + + +@dataclass(frozen=True) +class FormatResult(Result): + comparisons: tuple[Project, FormatComparison] + + +def patch_set_with_permalinks(patch_set: PatchSet, repo: ClonedRepository) -> str: + lines = [] + for file_patch in patch_set: + file_link = repo.url_for(file_patch.path) + lines.append(f"{file_patch.path}") + for hunk in file_patch: + hunk_link = repo.url_for( + file_patch.path, + hunk.source_start, + hunk.source_start + hunk.source_length, + ) + hunk_lines = str(hunk).splitlines() + + # Add a link before the hunk + link_title = file_patch.path + hunk_link.split(file_patch.path)[-1] + lines.append(f"{link_title}") + + # Wrap the contents of the hunk in a diff code block + lines.append("```diff") + lines.extend(hunk_lines[1:]) + lines.append("```") + + return "\n".join(lines) diff --git a/python/ruff-ecosystem/ruff_ecosystem/git.py b/python/ruff-ecosystem/ruff_ecosystem/git.py deleted file mode 100644 index 16cb6ca1c5..0000000000 --- a/python/ruff-ecosystem/ruff_ecosystem/git.py +++ /dev/null @@ -1,72 +0,0 @@ -from ruff_ecosystem.models import Repository, ClonedRepository -from contextlib import asynccontextmanager -from pathlib import Path -from typing import AsyncGenerator -from asyncio import create_subprocess_exec -from subprocess import PIPE -from ruff_ecosystem import logger - - -@asynccontextmanager -async def clone( - repo: Repository, checkout_dir: Path -) -> AsyncGenerator[ClonedRepository, None]: - """Shallow clone this repository to a temporary directory.""" - if checkout_dir.exists(): - logger.debug(f"Reusing {repo.owner}:{repo.name}") - yield await _cloned_repository(repo, checkout_dir) - return - - logger.debug(f"Cloning {repo.owner}:{repo.name} to {checkout_dir}") - command = [ - "git", - "clone", - "--config", - "advice.detachedHead=false", - "--quiet", - "--depth", - "1", - "--no-tags", - ] - if repo.branch: - command.extend(["--branch", repo.branch]) - - command.extend( - [ - f"https://github.com/{repo.owner}/{repo.name}", - checkout_dir, - ], - ) - - process = await create_subprocess_exec(*command, env={"GIT_TERMINAL_PROMPT": "0"}) - - status_code = await process.wait() - - logger.debug( - f"Finished cloning {repo.fullname} with status {status_code}", - ) - yield await _cloned_repository(repo, checkout_dir) - - -async def _cloned_repository(repo: Repository, checkout_dir: Path) -> ClonedRepository: - return ClonedRepository( - name=repo.name, - owner=repo.owner, - branch=repo.branch, - path=checkout_dir, - commit_hash=await _get_commit_hash(checkout_dir), - ) - - -async def _get_commit_hash(checkout_dir: Path) -> str: - """ - Return the commit sha for the repository in the checkout directory. - """ - process = await create_subprocess_exec( - *["git", "rev-parse", "HEAD"], - cwd=checkout_dir, - stdout=PIPE, - ) - stdout, _ = await process.communicate() - assert await process.wait() == 0, f"Failed to retrieve commit sha at {checkout_dir}" - return stdout.decode().strip() diff --git a/python/ruff-ecosystem/ruff_ecosystem/main.py b/python/ruff-ecosystem/ruff_ecosystem/main.py index bc08e32df1..e271957650 100644 --- a/python/ruff-ecosystem/ruff_ecosystem/main.py +++ b/python/ruff-ecosystem/ruff_ecosystem/main.py @@ -1,32 +1,34 @@ -from ruff_ecosystem.models import ( - RuffCommand, - Target, - Diff, - ClonedRepository, - RuleChanges, - CheckComparison, - Result, -) -from pathlib import Path -from ruff_ecosystem import logger import asyncio -from ruff_ecosystem.git import clone -from ruff_ecosystem.ruff import ruff_check, ruff_format -from ruff_ecosystem.emitters import Emitter -import difflib +import dataclasses +import json +from enum import Enum +from pathlib import Path from typing import TypeVar -import re + +from ruff_ecosystem import logger +from ruff_ecosystem.check import compare_check, summarize_check_result +from ruff_ecosystem.format import compare_format, summarize_format_result +from ruff_ecosystem.projects import ( + Project, + RuffCommand, +) +from ruff_ecosystem.types import Comparison, Result, Serializable T = TypeVar("T") +class OutputFormat(Enum): + markdown = "markdown" + json = "json" + + async def main( command: RuffCommand, ruff_baseline_executable: Path, ruff_comparison_executable: Path, - targets: list[Target], + targets: list[Project], cache: Path | None, - emitter: Emitter, + format: OutputFormat, max_parallelism: int = 50, raise_on_failure: bool = False, ) -> None: @@ -42,7 +44,7 @@ async def main( async with semaphore: return await coroutine - comparisons: list[Exception | CheckComparison] = await asyncio.gather( + comparisons: list[Exception | Comparison] = await asyncio.gather( *[ limited_parallelism( clone_and_compare( @@ -59,177 +61,82 @@ async def main( ) comparisons_by_target = dict(zip(targets, comparisons, strict=True)) - # Calculate totals - total_removed = total_added = errors = 0 - total_rule_changes = RuleChanges() - for comparison in comparisons_by_target.values(): - if isinstance(comparison, Exception): - errors += 1 - else: - total_removed += len(comparison.diff.removed) - total_added += len(comparison.diff.added) - total_rule_changes += comparison.rule_changes - - errors = [] - comparisons = [] + errors, successes = [], [] for target, comparison in comparisons_by_target.items(): if isinstance(comparison, Exception): errors.append((target, comparison)) continue - if comparison.diff: - comparisons.append((target, comparison)) + successes.append((target, comparison)) - else: - continue + result = Result(completed=successes, errored=errors) - result = Result( - total_added=total_added, - total_removed=total_removed, - total_rule_changes=total_rule_changes, - comparisons=comparisons, - errors=errors, - ) + match format: + case OutputFormat.json: + print(json.dumps(result, indent=4, cls=JSONEncoder)) + case OutputFormat.markdown: + match command: + case RuffCommand.check: + print(summarize_check_result(result)) + case RuffCommand.format: + print(summarize_format_result(result)) + case _: + raise ValueError(f"Unknown target Ruff command {command}") + case _: + raise ValueError(f"Unknown output format {format}") - emitter.emit_result(result) - return - - if total_removed == 0 and total_added == 0 and errors == 0: - print("\u2705 ecosystem check detected no changes.") - return - - s = "s" if errors != 1 else "" - changes = f"(+{total_added}, -{total_removed}, {errors} error{s})" - - print(f"\u2139\ufe0f ecosystem check **detected changes**. {changes}") - print() - - for target, comparison in comparisons_by_target.items(): - if isinstance(comparison, Exception): - emitter.emit_error(target, comparison) - continue - - if comparison.diff: - emitter.emit_diff(target, comparison.diff, comparison.repo) - - else: - continue - - if len(total_rule_changes.rule_codes()) > 0: - print(f"Rules changed: {len(total_rule_changes.rule_codes())}") - print() - print("| Rule | Changes | Additions | Removals |") - print("| ---- | ------- | --------- | -------- |") - for rule, (additions, removals) in sorted( - total_rule_changes.items(), - key=lambda x: (x[1][0] + x[1][1]), - reverse=True, - ): - print(f"| {rule} | {additions + removals} | {additions} | {removals} |") + return None async def clone_and_compare( command: RuffCommand, ruff_baseline_executable: Path, ruff_comparison_executable: Path, - target: Target, + target: Project, cache: Path, -) -> CheckComparison: +) -> Comparison: """Check a specific repository against two versions of ruff.""" assert ":" not in target.repo.owner assert ":" not in target.repo.name match command: case RuffCommand.check: - ruff_task, create_comparison, options = ( - ruff_check, - create_check_comparison, + compare, options = ( + compare_check, target.check_options, ) case RuffCommand.format: - ruff_task, create_comparison, options = ( - ruff_format, - create_format_comparison, + compare, options = ( + compare_format, target.format_options, ) case _: - raise ValueError(f"Unknowm target Ruff command {command}") + raise ValueError(f"Unknown target Ruff command {command}") checkout_dir = cache.joinpath(f"{target.repo.owner}:{target.repo.name}") - async with clone(target.repo, checkout_dir) as cloned_repo: - try: - async with asyncio.TaskGroup() as tg: - baseline_task = tg.create_task( - ruff_task( - executable=ruff_baseline_executable.resolve(), - path=cloned_repo.path, - name=cloned_repo.fullname, - options=options, - ), - ) - comparison_task = tg.create_task( - ruff_task( - executable=ruff_comparison_executable.resolve(), - path=cloned_repo.path, - name=cloned_repo.fullname, - options=options, - ), - ) - except ExceptionGroup as e: - raise e.exceptions[0] from e + cloned_repo = await target.repo.clone(checkout_dir) - return create_comparison( - cloned_repo, baseline_task.result(), comparison_task.result() - ) + try: + return await compare( + ruff_baseline_executable, + ruff_comparison_executable, + options, + cloned_repo, + ) + except ExceptionGroup as e: + raise e.exceptions[0] from e -def create_check_comparison( - repo: ClonedRepository, baseline_output: str, comparison_output: str -) -> CheckComparison: - removed, added = set(), set() - - for line in difflib.ndiff(baseline_output, comparison_output): - if line.startswith("- "): - removed.add(line[2:]) - elif line.startswith("+ "): - added.add(line[2:]) - - diff = Diff(removed=removed, added=added) - - return CheckComparison( - diff=diff, repo=repo, rule_changes=rule_changes_from_diff(diff) - ) - - -def rule_changes_from_diff(diff: Diff) -> RuleChanges: - """ - Parse a diff from `ruff check` to determine the additions and removals for each rule. - """ - rule_changes = RuleChanges() - - # Count rule changes - for line in diff.lines(): - # Find rule change for current line or construction - # +{target.repo.fullname} ({title})
") - print(target.repo.url, target.check_options.summary()) - print("") - print() - - print(content) - - print() - print("
") - print("/ : : : - matches = re.search(r": ([A-Z]{1,4}[0-9]{3,4})", line) - - if matches is None: - # Handle case where there are no regex matches e.g. - # + "?application=AIRFLOW&authenticator=TEST_AUTH&role=TEST_ROLE&warehouse=TEST_WAREHOUSE" # noqa: E501, ERA001 - # Which was found in local testing - continue - - rule_code = matches.group(1) - - # Get current additions and removals for this rule - current_changes = rule_changes[rule_code] - - # Check if addition or removal depending on the first character - if line[0] == "+": - current_changes = (current_changes[0] + 1, current_changes[1]) - elif line[0] == "-": - current_changes = (current_changes[0], current_changes[1] + 1) - - rule_changes[rule_code] = current_changes - - return rule_changes +class JSONEncoder(json.JSONEncoder): + def default(self, o): + if isinstance(o, Serializable): + return o.jsonable() + if dataclasses.is_dataclass(o): + return dataclasses.asdict(o) + if isinstance(o, set): + return tuple(o) + if isinstance(o, Path): + return str(o) + if isinstance(o, Exception): + return str(o) + return super().default(o) diff --git a/python/ruff-ecosystem/ruff_ecosystem/markdown.py b/python/ruff-ecosystem/ruff_ecosystem/markdown.py new file mode 100644 index 0000000000..f30ab64b75 --- /dev/null +++ b/python/ruff-ecosystem/ruff_ecosystem/markdown.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from ruff_ecosystem.projects import Project + + +def project_section( + title: str, content: str, options: str, project: Project +) -> list[str]: + lines = [] + lines.append( + f' ") + return lines diff --git a/python/ruff-ecosystem/ruff_ecosystem/models.py b/python/ruff-ecosystem/ruff_ecosystem/models.py deleted file mode 100644 index be5c6cc60d..0000000000 --- a/python/ruff-ecosystem/ruff_ecosystem/models.py +++ /dev/null @@ -1,160 +0,0 @@ -from enum import Enum -from dataclasses import dataclass, field -from typing import Self, Iterator -import heapq -from pathlib import Path - - -class RuffCommand(Enum): - check = "check" - format = "format" - - -@dataclass(frozen=True) -class Repository: - """ - A remote GitHub repository - """ - - owner: str - name: str - branch: str | None - - @property - def fullname(self) -> str: - return f"{self.owner}/{self.name}" - - @property - def url(self: Self) -> str: - return f"https://github.com/{self.owner}/{self.name}" - - -@dataclass(frozen=True) -class ClonedRepository(Repository): - """ - A cloned GitHub repository, which includes the hash of the cloned commit. - """ - - commit_hash: str - path: Path - - def url_for(self: Self, path: str, line_number: int | None = None) -> str: - """ - Return the remote GitHub URL for the given path in this repository. - """ - # Default to main branch - url = f"https://github.com/{self.owner}/{self.name}/blob/{self.commit_hash}/{path}" - if line_number: - url += f"#L{line_number}" - return url - - @property - def url(self: Self) -> str: - return f"https://github.com/{self.owner}/{self.name}@{self.commit_hash}" - - -@dataclass(frozen=True) -class Diff: - """A diff between two runs of ruff.""" - - removed: set[str] - added: set[str] - - def __bool__(self: Self) -> bool: - """Return true if this diff is non-empty.""" - return bool(self.removed or self.added) - - def lines(self: Self) -> Iterator[str]: - """Iterate through the changed lines in diff format.""" - for line in heapq.merge(sorted(self.removed), sorted(self.added)): - if line in self.removed: - yield f"- {line}" - else: - yield f"+ {line}" - - -@dataclass(frozen=True) -class RuleChanges: - changes: dict[str, tuple[int, int]] = field(default_factory=dict) - - def rule_codes(self) -> list[str]: - return list(self.changes.keys()) - - def items(self) -> Iterator[tuple[str, tuple[int, int]]]: - return self.changes.items() - - def __setitem__(self, key: str, value: tuple[int, int]) -> None: - self.changes[key] = value - - def __getitem__(self, key: str) -> tuple[int, int]: - return self.changes.get(key, (0, 0)) - - def __add__(self, other: Self) -> Self: - if not isinstance(other, type(self)): - return NotImplemented - - result = self.changes.copy() - for rule_code, (added, removed) in other.changes.items(): - if rule_code in result: - result[rule_code] = ( - result[rule_code][0] + added, - result[rule_code][1] + removed, - ) - else: - result[rule_code] = (added, removed) - - return RuleChanges(changes=result) - - -@dataclass(frozen=True) -class CheckComparison: - diff: Diff - repo: ClonedRepository - rule_changes: RuleChanges - - -@dataclass(frozen=True) -class CheckOptions: - """ - Ruff check options - """ - - select: str = "" - ignore: str = "" - exclude: str = "" - - # Generating fixes is slow and verbose - show_fixes: bool = False - - def summary(self) -> str: - return f"select {self.select} ignore {self.ignore} exclude {self.exclude}" - - -@dataclass(frozen=True) -class FormatOptions: - """ - Ruff format options - """ - - pass - - -@dataclass(frozen=True) -class Target: - """ - An ecosystem target - """ - - repo: Repository - check_options: CheckOptions = field(default_factory=CheckOptions) - format_options: FormatOptions = field(default_factory=FormatOptions) - - -@dataclass(frozen=True) -class Result: - total_added: int - total_removed: int - total_rule_changes: RuleChanges - - comparisons: tuple[Target, CheckComparison] - errors: tuple[Target, Exception] diff --git a/python/ruff-ecosystem/ruff_ecosystem/projects.py b/python/ruff-ecosystem/ruff_ecosystem/projects.py new file mode 100644 index 0000000000..069bb7ae23 --- /dev/null +++ b/python/ruff-ecosystem/ruff_ecosystem/projects.py @@ -0,0 +1,168 @@ +from __future__ import annotations + +from asyncio import create_subprocess_exec +from dataclasses import dataclass, field +from enum import Enum +from pathlib import Path +from subprocess import PIPE +from typing import AsyncGenerator, Self + +from ruff_ecosystem import logger +from ruff_ecosystem.check import CheckOptions +from ruff_ecosystem.format import FormatOptions +from ruff_ecosystem.types import Serializable + + +@dataclass(frozen=True) +class Project(Serializable): + """ + An ecosystem target + """ + + repo: Repository + check_options: CheckOptions = field(default_factory=lambda: CheckOptions()) + format_options: FormatOptions = field(default_factory=lambda: FormatOptions()) + + +class RuffCommand(Enum): + check = "check" + format = "format" + + +class ProjectSetupError(Exception): + """An error setting up a project.""" + + +@dataclass(frozen=True) +class Repository(Serializable): + """ + A remote GitHub repository + """ + + owner: str + name: str + ref: str | None + + @property + def fullname(self) -> str: + return f"{self.owner}/{self.name}" + + @property + def url(self: Self) -> str: + return f"https://github.com/{self.owner}/{self.name}" + + async def clone( + self: Self, checkout_dir: Path + ) -> AsyncGenerator[ClonedRepository, None]: + """ + Shallow clone this repository + """ + if checkout_dir.exists(): + logger.debug(f"Reusing {self.owner}:{self.name}") + + if self.ref: + logger.debug(f"Checking out ref {self.ref}") + process = await create_subprocess_exec( + *["git", "checkout", "-f", self.ref], + cwd=checkout_dir, + env={"GIT_TERMINAL_PROMPT": "0"}, + stdout=PIPE, + stderr=PIPE, + ) + if await process.wait() != 0: + _, stderr = await process.communicate() + raise ProjectSetupError( + f"Failed to checkout {self.ref}: {stderr.decode()}" + ) + + return ClonedRepository( + name=self.name, + owner=self.owner, + ref=self.ref, + path=checkout_dir, + commit_hash=await self._get_head_commit(checkout_dir), + ) + + logger.debug(f"Cloning {self.owner}:{self.name} to {checkout_dir}") + command = [ + "git", + "clone", + "--config", + "advice.detachedHead=false", + "--quiet", + "--depth", + "1", + "--no-tags", + ] + if self.ref: + command.extend(["--branch", self.ref]) + + command.extend( + [ + f"https://github.com/{self.owner}/{self.name}", + checkout_dir, + ], + ) + + process = await create_subprocess_exec( + *command, env={"GIT_TERMINAL_PROMPT": "0"} + ) + + status_code = await process.wait() + + logger.debug( + f"Finished cloning {self.fullname} with status {status_code}", + ) + return ClonedRepository( + name=self.name, + owner=self.owner, + ref=self.ref, + path=checkout_dir, + commit_hash=await self._get_head_commit(checkout_dir), + ) + + @staticmethod + async def _get_head_commit(checkout_dir: Path) -> str: + """ + Return the commit sha for the repository in the checkout directory. + """ + process = await create_subprocess_exec( + *["git", "rev-parse", "HEAD"], + cwd=checkout_dir, + stdout=PIPE, + ) + stdout, _ = await process.communicate() + if await process.wait() != 0: + raise ProjectSetupError(f"Failed to retrieve commit sha at {checkout_dir}") + + return stdout.decode().strip() + + +@dataclass(frozen=True) +class ClonedRepository(Repository, Serializable): + """ + A cloned GitHub repository, which includes the hash of the cloned commit. + """ + + commit_hash: str + path: Path + + def url_for( + self: Self, + path: str, + line_number: int | None = None, + end_line_number: int | None = None, + ) -> str: + """ + Return the remote GitHub URL for the given path in this repository. + """ + url = f"https://github.com/{self.owner}/{self.name}/blob/{self.commit_hash}/{path}" + if line_number: + url += f"#L{line_number}" + if end_line_number: + url += f"-L{end_line_number}" + return url + + @property + def url(self: Self) -> str: + return f"https://github.com/{self.owner}/{self.name}@{self.commit_hash}" diff --git a/python/ruff-ecosystem/ruff_ecosystem/ruff.py b/python/ruff-ecosystem/ruff_ecosystem/ruff.py deleted file mode 100644 index 74fd624bb4..0000000000 --- a/python/ruff-ecosystem/ruff_ecosystem/ruff.py +++ /dev/null @@ -1,90 +0,0 @@ -from pathlib import Path -from ruff_ecosystem import logger -from ruff_ecosystem.models import CheckOptions, FormatOptions -import time -from asyncio import create_subprocess_exec -from subprocess import PIPE -from typing import Sequence -import re - -CHECK_SUMMARY_LINE_RE = re.compile( - r"^(Found \d+ error.*)|(.*potentially fixable with.*)$" -) - - -CHECK_DIFF_LINE_RE = re.compile( - r"^(?P{project.repo.fullname} ({title})
' + ) + lines.append(options) + lines.append("") + lines.append("") + + lines.append(content) + + lines.append("") + lines.append("
") + lines.append("[+-]) (?P(?P [^:]+):(?P \d+):\d+:) (?P .*)$", -) - - -class RuffError(Exception): - """An error reported by ruff.""" - - -async def ruff_check( - *, executable: Path, path: Path, name: str, options: CheckOptions -) -> Sequence[str]: - """Run the given ruff binary against the specified path.""" - logger.debug(f"Checking {name} with {executable}") - ruff_args = ["check", "--no-cache", "--exit-zero"] - if options.select: - ruff_args.extend(["--select", options.select]) - if options.ignore: - ruff_args.extend(["--ignore", options.ignore]) - if options.exclude: - ruff_args.extend(["--exclude", options.exclude]) - if options.show_fixes: - ruff_args.extend(["--show-fixes", "--ecosystem-ci"]) - - start = time.time() - proc = await create_subprocess_exec( - executable.absolute(), - *ruff_args, - ".", - stdout=PIPE, - stderr=PIPE, - cwd=path, - ) - result, err = await proc.communicate() - end = time.time() - - logger.debug(f"Finished checking {name} with {executable} in {end - start:.2f}") - - if proc.returncode != 0: - raise RuffError(err.decode("utf8")) - - lines = [ - line - for line in result.decode("utf8").splitlines() - if not CHECK_SUMMARY_LINE_RE.match(line) - ] - - return sorted(lines) - - -async def ruff_format( - *, executable: Path, path: Path, name: str, options: FormatOptions -) -> Sequence[str]: - """Run the given ruff binary against the specified path.""" - logger.debug(f"Checking {name} with {executable}") - ruff_args = ["format", "--no-cache", "--exit-zero"] - - start = time.time() - proc = await create_subprocess_exec( - executable.absolute(), - *ruff_args, - ".", - stdout=PIPE, - stderr=PIPE, - cwd=path, - ) - result, err = await proc.communicate() - end = time.time() - - logger.debug(f"Finished formatting {name} with {executable} in {end - start:.2f}") - - if proc.returncode != 0: - raise RuffError(err.decode("utf8")) - - lines = result.decode("utf8").splitlines() - return lines diff --git a/python/ruff-ecosystem/ruff_ecosystem/types.py b/python/ruff-ecosystem/ruff_ecosystem/types.py new file mode 100644 index 0000000000..ae8d3de217 --- /dev/null +++ b/python/ruff-ecosystem/ruff_ecosystem/types.py @@ -0,0 +1,70 @@ +from __future__ import annotations + +import abc +import dataclasses +import difflib +from dataclasses import dataclass, is_dataclass +from typing import TYPE_CHECKING, Any, Iterable, Sequence, Generator +import heapq + +if TYPE_CHECKING: + from ruff_ecosystem.projects import ClonedRepository, Project + + +class Serializable(abc.ABC): + """ + Allows serialization of content by casting to a JSON-compatible type. + """ + + def jsonable(self) -> Any: + # Default implementation for dataclasses + if is_dataclass(self): + return dataclasses.asdict(self) + + raise NotImplementedError() + + +class Diff(Serializable): + def __init__(self, lines: Iterable[str]) -> None: + self.lines = list(lines) + + # Compute added and removed lines once + self.added = list(line[2:] for line in self.lines if line.startswith("+ ")) + self.removed = list(line[2:] for line in self.lines if line.startswith("- ")) + + def __bool__(self) -> bool: + return bool(self.added or self.removed) + + def __iter__(self) -> Generator[str, None, None]: + yield from self.lines + + @property + def lines_added(self): + return len(self.added) + + @property + def lines_removed(self): + return len(self.removed) + + @classmethod + def new(cls, baseline: Sequence[str], comparison: Sequence[str]): + return cls(difflib.ndiff(baseline, comparison)) + + def jsonable(self) -> Any: + return self.lines + + +@dataclass(frozen=True) +class Result(Serializable): + errored: list[tuple[Project, Exception]] + completed: list[tuple[Project, Comparison]] + + +@dataclass(frozen=True) +class Comparison(Serializable): + diff: Diff + repo: ClonedRepository + + +class RuffError(Exception): + """An error reported by ruff."""