mirror of https://github.com/astral-sh/ruff
Refactor ecosystem checks into module
This commit is contained in:
parent
88c0106421
commit
105fb1c682
|
|
@ -0,0 +1,45 @@
|
||||||
|
# ruff-ecosystem
|
||||||
|
|
||||||
|
Ruff ecosystem checks.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
From the Ruff project root, install with `pip`:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
pip install -e ./python/ruff-ecosystem
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
```
|
||||||
|
ruff-ecosystem <check | format> <baseline executable> <comparison executable>
|
||||||
|
```
|
||||||
|
|
||||||
|
Note executable paths must be absolute or relative to the current working directory.
|
||||||
|
|
||||||
|
Run `ruff check` ecosystem checks comparing your debug build to your system Ruff:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
ruff-ecosystem check "$(which ruff)" "./target/debug/ruff"
|
||||||
|
```
|
||||||
|
|
||||||
|
Run `ruff format` ecosystem checks comparing your debug build to your system Ruff:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
ruff-ecosystem format "$(which ruff)" "./target/debug/ruff"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Development
|
||||||
|
|
||||||
|
When developing, it can be useful to set the `--pdb` flag to drop into a debugger on failure:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
ruff-ecosystem check "$(which ruff)" "./target/debug/ruff" --pdb
|
||||||
|
```
|
||||||
|
|
||||||
|
You can also provide a path to cache checkouts to speed up repeated runs:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
ruff-ecosystem check "$(which ruff)" "./target/debug/ruff" --cache ./repos
|
||||||
|
```
|
||||||
|
|
@ -0,0 +1,10 @@
|
||||||
|
[build-system]
|
||||||
|
requires = ["hatchling"]
|
||||||
|
build-backend = "hatchling.build"
|
||||||
|
|
||||||
|
[project]
|
||||||
|
name = "ruff-ecosystem"
|
||||||
|
version = "0.0.0"
|
||||||
|
|
||||||
|
[project.scripts]
|
||||||
|
ruff-ecosystem = "ruff_ecosystem.cli:entrypoint"
|
||||||
|
|
@ -0,0 +1,3 @@
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger = logging.getLogger("ruff-ecosystem")
|
||||||
|
|
@ -0,0 +1,8 @@
|
||||||
|
"""
|
||||||
|
Enables usage with `python -m ruff_ecosystem`
|
||||||
|
"""
|
||||||
|
|
||||||
|
from ruff_ecosystem.cli import entrypoint
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
entrypoint()
|
||||||
|
|
@ -0,0 +1,116 @@
|
||||||
|
import argparse
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
from contextlib import nullcontext
|
||||||
|
from ruff_ecosystem.models import RuffCommand
|
||||||
|
from ruff_ecosystem.emitters import EmitterType
|
||||||
|
from ruff_ecosystem.defaults import DEFAULT_TARGETS
|
||||||
|
from ruff_ecosystem.main import main
|
||||||
|
from signal import SIGINT, SIGTERM
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
def excepthook(type, value, tb):
|
||||||
|
if hasattr(sys, "ps1") or not sys.stderr.isatty():
|
||||||
|
# we are in interactive mode or we don't have a tty so call the default
|
||||||
|
sys.__excepthook__(type, value, tb)
|
||||||
|
else:
|
||||||
|
import traceback, pdb
|
||||||
|
|
||||||
|
traceback.print_exception(type, value, tb)
|
||||||
|
print()
|
||||||
|
pdb.post_mortem(tb)
|
||||||
|
|
||||||
|
|
||||||
|
def entrypoint():
|
||||||
|
args = parse_args()
|
||||||
|
|
||||||
|
if args.pdb:
|
||||||
|
sys.excepthook = excepthook
|
||||||
|
|
||||||
|
if args.verbose:
|
||||||
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
else:
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
|
# Use a temporary directory for caching if no cache is specified
|
||||||
|
cache_context = (
|
||||||
|
tempfile.TemporaryDirectory() if not args.cache else nullcontext(args.cache)
|
||||||
|
)
|
||||||
|
|
||||||
|
with cache_context as cache:
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
main_task = asyncio.ensure_future(
|
||||||
|
main(
|
||||||
|
command=RuffCommand(args.ruff_command),
|
||||||
|
ruff_baseline_executable=args.ruff_baseline,
|
||||||
|
ruff_comparison_executable=args.ruff_comparison,
|
||||||
|
targets=DEFAULT_TARGETS,
|
||||||
|
emitter=EmitterType(args.output_format).to_emitter(),
|
||||||
|
cache=Path(cache),
|
||||||
|
raise_on_failure=args.pdb,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
# https://stackoverflow.com/a/58840987/3549270
|
||||||
|
for signal in [SIGINT, SIGTERM]:
|
||||||
|
loop.add_signal_handler(signal, main_task.cancel)
|
||||||
|
try:
|
||||||
|
loop.run_until_complete(main_task)
|
||||||
|
finally:
|
||||||
|
loop.close()
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args() -> argparse.Namespace:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Check two versions of ruff against a corpus of open-source code.",
|
||||||
|
)
|
||||||
|
|
||||||
|
# TODO: Support non-default `--targets`
|
||||||
|
# parser.add_argument(
|
||||||
|
# "--targets",
|
||||||
|
# type=Path,
|
||||||
|
# help=(
|
||||||
|
# "Optional JSON files to use over the default repositories. "
|
||||||
|
# "Supports both github_search_*.jsonl and known-github-tomls.jsonl."
|
||||||
|
# ),
|
||||||
|
# )
|
||||||
|
parser.add_argument(
|
||||||
|
"--cache",
|
||||||
|
type=Path,
|
||||||
|
help="Location for caching cloned repositories",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output-format",
|
||||||
|
choices=[option.name for option in EmitterType],
|
||||||
|
default="json",
|
||||||
|
help="Location for caching cloned repositories",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-v",
|
||||||
|
"--verbose",
|
||||||
|
action="store_true",
|
||||||
|
help="Enable debug logging",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--pdb",
|
||||||
|
action="store_true",
|
||||||
|
help="Enable debugging on failure",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"ruff_command",
|
||||||
|
choices=[option.name for option in RuffCommand],
|
||||||
|
help="The Ruff command to test",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"ruff_baseline",
|
||||||
|
type=Path,
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"ruff_comparison",
|
||||||
|
type=Path,
|
||||||
|
)
|
||||||
|
|
||||||
|
return parser.parse_args()
|
||||||
|
|
@ -0,0 +1,66 @@
|
||||||
|
from .models import Repository, CheckOptions, Target
|
||||||
|
|
||||||
|
# TODO: Consider exporting this as JSON instead for consistent setup
|
||||||
|
DEFAULT_TARGETS = [
|
||||||
|
# Target(repo=Repository(owner="DisnakeDev", name="disnake", branch="master")),
|
||||||
|
# Target(repo=Repository(owner="PostHog", name="HouseWatch", branch="main")),
|
||||||
|
# Target(repo=Repository(owner="RasaHQ", name="rasa", branch="main")),
|
||||||
|
# Target(repo=Repository(owner="Snowflake-Labs", name="snowcli", branch="main")),
|
||||||
|
# Target(repo=Repository(owner="aiven", name="aiven-client", branch="main")),
|
||||||
|
# Target(repo=Repository(owner="alteryx", name="featuretools", branch="main")),
|
||||||
|
# Target(
|
||||||
|
# repo=Repository(owner="apache", name="airflow", branch="main"),
|
||||||
|
# check_options=CheckOptions(select="ALL"),
|
||||||
|
# ),
|
||||||
|
# Target(repo=Repository(owner="aws", name="aws-sam-cli", branch="develop")),
|
||||||
|
# Target(repo=Repository(owner="bloomberg", name="pytest-memray", branch="main")),
|
||||||
|
# Target(
|
||||||
|
# repo=Repository(owner="bokeh", name="bokeh", branch="branch-3.3"),
|
||||||
|
# check_options=CheckOptions(select="ALL"),
|
||||||
|
# ),
|
||||||
|
# Target(repo=Repository(owner="commaai", name="openpilot", branch="master")),
|
||||||
|
# Target(repo=Repository(owner="demisto", name="content", branch="master")),
|
||||||
|
# Target(repo=Repository(owner="docker", name="docker-py", branch="main")),
|
||||||
|
# Target(
|
||||||
|
# repo=Repository(owner="freedomofpress", name="securedrop", branch="develop")
|
||||||
|
# ),
|
||||||
|
# Target(repo=Repository(owner="fronzbot", name="blinkpy", branch="dev")),
|
||||||
|
# Target(repo=Repository(owner="ibis-project", name="ibis", branch="master")),
|
||||||
|
# Target(repo=Repository(owner="ing-bank", name="probatus", branch="main")),
|
||||||
|
# Target(repo=Repository(owner="jrnl-org", name="jrnl", branch="develop")),
|
||||||
|
# Target(repo=Repository(owner="latchbio", name="latch", branch="main")),
|
||||||
|
# Target(repo=Repository(owner="lnbits", name="lnbits", branch="main")),
|
||||||
|
# Target(repo=Repository(owner="milvus-io", name="pymilvus", branch="master")),
|
||||||
|
# Target(repo=Repository(owner="mlflow", name="mlflow", branch="master")),
|
||||||
|
# Target(repo=Repository(owner="model-bakers", name="model_bakery", branch="main")),
|
||||||
|
# Target(repo=Repository(owner="pandas-dev", name="pandas", branch="main")),
|
||||||
|
# Target(repo=Repository(owner="prefecthq", name="prefect", branch="main")),
|
||||||
|
# Target(repo=Repository(owner="pypa", name="build", branch="main")),
|
||||||
|
# Target(repo=Repository(owner="pypa", name="cibuildwheel", branch="main")),
|
||||||
|
# Target(repo=Repository(owner="pypa", name="pip", branch="main")),
|
||||||
|
# Target(repo=Repository(owner="pypa", name="setuptools", branch="main")),
|
||||||
|
# Target(repo=Repository(owner="python", name="mypy", branch="master")),
|
||||||
|
# Target(
|
||||||
|
# repo=Repository(
|
||||||
|
# owner="python",
|
||||||
|
# name="typeshed",
|
||||||
|
# branch="main",
|
||||||
|
# ),
|
||||||
|
# check_options=CheckOptions(select="PYI"),
|
||||||
|
# ),
|
||||||
|
# Target(repo=Repository(owner="python-poetry", name="poetry", branch="master")),
|
||||||
|
# Target(repo=Repository(owner="reflex-dev", name="reflex", branch="main")),
|
||||||
|
# Target(repo=Repository(owner="rotki", name="rotki", branch="develop")),
|
||||||
|
# Target(repo=Repository(owner="scikit-build", name="scikit-build", branch="main")),
|
||||||
|
# Target(
|
||||||
|
# repo=Repository(owner="scikit-build", name="scikit-build-core", branch="main")
|
||||||
|
# ),
|
||||||
|
# Target(repo=Repository(owner="sphinx-doc", name="sphinx", branch="master")),
|
||||||
|
# Target(repo=Repository(owner="spruceid", name="siwe-py", branch="main")),
|
||||||
|
# Target(repo=Repository(owner="tiangolo", name="fastapi", branch="master")),
|
||||||
|
# Target(repo=Repository(owner="yandex", name="ch-backup", branch="main")),
|
||||||
|
Target(
|
||||||
|
repo=Repository(owner="zulip", name="zulip", branch="main"),
|
||||||
|
check_options=CheckOptions(select="ALL"),
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,99 @@
|
||||||
|
from enum import Enum
|
||||||
|
import abc
|
||||||
|
from ruff_ecosystem.models import Target, Diff, ClonedRepository, Result
|
||||||
|
from ruff_ecosystem.ruff import CHECK_DIFF_LINE_RE
|
||||||
|
import traceback
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
import dataclasses
|
||||||
|
|
||||||
|
|
||||||
|
class Emitter(abc.ABC):
|
||||||
|
@abc.abstractclassmethod
|
||||||
|
def emit_error(cls, target: Target, exc: Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abc.abstractclassmethod
|
||||||
|
def emit_diff(cls, target: Target, diff: Diff, cloned_repo: ClonedRepository):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abc.abstractclassmethod
|
||||||
|
def emit_result(cls, result: Result):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class DebugEmitter(Emitter):
|
||||||
|
def emit_error(cls, target: Target, exc: Exception):
|
||||||
|
print(f"Error in {target.repo.fullname}")
|
||||||
|
traceback.print_exception(exc)
|
||||||
|
|
||||||
|
def emit_diff(cls, target: Target, diff: Diff, cloned_repo: ClonedRepository):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class JSONEmitter(Emitter):
|
||||||
|
class DataclassJSONEncoder(json.JSONEncoder):
|
||||||
|
def default(self, o):
|
||||||
|
if dataclasses.is_dataclass(o):
|
||||||
|
return dataclasses.asdict(o)
|
||||||
|
if isinstance(o, set):
|
||||||
|
return tuple(o)
|
||||||
|
if isinstance(o, Path):
|
||||||
|
return str(o)
|
||||||
|
return super().default(o)
|
||||||
|
|
||||||
|
def emit_error(cls, target: Target, exc: Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def emit_diff(cls, target: Target, diff: Diff, cloned_repo: ClonedRepository):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def emit_result(cls, result: Result):
|
||||||
|
print(json.dumps(result, indent=4, cls=cls.DataclassJSONEncoder))
|
||||||
|
|
||||||
|
|
||||||
|
class MarkdownEmitter(Emitter):
|
||||||
|
def emit_error(cls, target: Target, exc: Exception):
|
||||||
|
cls._print(title="error", content=f"```\n{exc}\n```", target=target)
|
||||||
|
|
||||||
|
def emit_diff(cls, target: Target, diff: Diff, cloned_repo: ClonedRepository):
|
||||||
|
changes = f"+{len(diff.added)}, -{len(diff.removed)}"
|
||||||
|
|
||||||
|
content = ""
|
||||||
|
for line in list(diff):
|
||||||
|
match = CHECK_DIFF_LINE_RE.match(line)
|
||||||
|
if match is None:
|
||||||
|
content += line + "\n"
|
||||||
|
continue
|
||||||
|
|
||||||
|
pre, inner, path, lnum, post = match.groups()
|
||||||
|
url = cloned_repo.url_for(path, int(lnum))
|
||||||
|
content += f"{pre} <a href='{url}'>{inner}</a> {post}" + "\n"
|
||||||
|
|
||||||
|
cls._print(title=changes, content=f"<pre>\n{content}\n</pre>", target=target)
|
||||||
|
|
||||||
|
def _print(cls, title: str, content: str, target: Target):
|
||||||
|
print(f"<details><summary>{target.repo.fullname} ({title})</summary>")
|
||||||
|
print(target.repo.url, target.check_options.summary())
|
||||||
|
print("<p>")
|
||||||
|
print()
|
||||||
|
|
||||||
|
print(content)
|
||||||
|
|
||||||
|
print()
|
||||||
|
print("</p>")
|
||||||
|
print("</details>")
|
||||||
|
|
||||||
|
|
||||||
|
class EmitterType(Enum):
|
||||||
|
markdown = "markdown"
|
||||||
|
json = "json"
|
||||||
|
|
||||||
|
def to_emitter(self) -> Emitter:
|
||||||
|
match self:
|
||||||
|
case self.markdown:
|
||||||
|
return MarkdownEmitter()
|
||||||
|
case self.json:
|
||||||
|
return JSONEmitter()
|
||||||
|
case _:
|
||||||
|
raise ValueError("Unknown emitter type {self}")
|
||||||
|
|
@ -0,0 +1,72 @@
|
||||||
|
from ruff_ecosystem.models import Repository, ClonedRepository
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import AsyncGenerator
|
||||||
|
from asyncio import create_subprocess_exec
|
||||||
|
from subprocess import PIPE
|
||||||
|
from ruff_ecosystem import logger
|
||||||
|
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def clone(
|
||||||
|
repo: Repository, checkout_dir: Path
|
||||||
|
) -> AsyncGenerator[ClonedRepository, None]:
|
||||||
|
"""Shallow clone this repository to a temporary directory."""
|
||||||
|
if checkout_dir.exists():
|
||||||
|
logger.debug(f"Reusing {repo.owner}:{repo.name}")
|
||||||
|
yield await _cloned_repository(repo, checkout_dir)
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.debug(f"Cloning {repo.owner}:{repo.name} to {checkout_dir}")
|
||||||
|
command = [
|
||||||
|
"git",
|
||||||
|
"clone",
|
||||||
|
"--config",
|
||||||
|
"advice.detachedHead=false",
|
||||||
|
"--quiet",
|
||||||
|
"--depth",
|
||||||
|
"1",
|
||||||
|
"--no-tags",
|
||||||
|
]
|
||||||
|
if repo.branch:
|
||||||
|
command.extend(["--branch", repo.branch])
|
||||||
|
|
||||||
|
command.extend(
|
||||||
|
[
|
||||||
|
f"https://github.com/{repo.owner}/{repo.name}",
|
||||||
|
checkout_dir,
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
process = await create_subprocess_exec(*command, env={"GIT_TERMINAL_PROMPT": "0"})
|
||||||
|
|
||||||
|
status_code = await process.wait()
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"Finished cloning {repo.fullname} with status {status_code}",
|
||||||
|
)
|
||||||
|
yield await _cloned_repository(repo, checkout_dir)
|
||||||
|
|
||||||
|
|
||||||
|
async def _cloned_repository(repo: Repository, checkout_dir: Path) -> ClonedRepository:
|
||||||
|
return ClonedRepository(
|
||||||
|
name=repo.name,
|
||||||
|
owner=repo.owner,
|
||||||
|
branch=repo.branch,
|
||||||
|
path=checkout_dir,
|
||||||
|
commit_hash=await _get_commit_hash(checkout_dir),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _get_commit_hash(checkout_dir: Path) -> str:
|
||||||
|
"""
|
||||||
|
Return the commit sha for the repository in the checkout directory.
|
||||||
|
"""
|
||||||
|
process = await create_subprocess_exec(
|
||||||
|
*["git", "rev-parse", "HEAD"],
|
||||||
|
cwd=checkout_dir,
|
||||||
|
stdout=PIPE,
|
||||||
|
)
|
||||||
|
stdout, _ = await process.communicate()
|
||||||
|
assert await process.wait() == 0, f"Failed to retrieve commit sha at {checkout_dir}"
|
||||||
|
return stdout.decode().strip()
|
||||||
|
|
@ -0,0 +1,235 @@
|
||||||
|
from ruff_ecosystem.models import (
|
||||||
|
RuffCommand,
|
||||||
|
Target,
|
||||||
|
Diff,
|
||||||
|
ClonedRepository,
|
||||||
|
RuleChanges,
|
||||||
|
CheckComparison,
|
||||||
|
Result,
|
||||||
|
)
|
||||||
|
from pathlib import Path
|
||||||
|
from ruff_ecosystem import logger
|
||||||
|
import asyncio
|
||||||
|
from ruff_ecosystem.git import clone
|
||||||
|
from ruff_ecosystem.ruff import ruff_check, ruff_format
|
||||||
|
from ruff_ecosystem.emitters import Emitter
|
||||||
|
import difflib
|
||||||
|
from typing import TypeVar
|
||||||
|
import re
|
||||||
|
|
||||||
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
|
||||||
|
async def main(
|
||||||
|
command: RuffCommand,
|
||||||
|
ruff_baseline_executable: Path,
|
||||||
|
ruff_comparison_executable: Path,
|
||||||
|
targets: list[Target],
|
||||||
|
cache: Path | None,
|
||||||
|
emitter: Emitter,
|
||||||
|
max_parallelism: int = 50,
|
||||||
|
raise_on_failure: bool = False,
|
||||||
|
) -> None:
|
||||||
|
logger.debug("Using command %s", command.value)
|
||||||
|
logger.debug("Using baseline executable at %s", ruff_baseline_executable)
|
||||||
|
logger.debug("Using comparison executable at %s", ruff_comparison_executable)
|
||||||
|
logger.debug("Using cache directory %s", cache)
|
||||||
|
logger.debug("Checking %s targets", len(targets))
|
||||||
|
|
||||||
|
semaphore = asyncio.Semaphore(max_parallelism)
|
||||||
|
|
||||||
|
async def limited_parallelism(coroutine: T) -> T:
|
||||||
|
async with semaphore:
|
||||||
|
return await coroutine
|
||||||
|
|
||||||
|
comparisons: list[Exception | CheckComparison] = await asyncio.gather(
|
||||||
|
*[
|
||||||
|
limited_parallelism(
|
||||||
|
clone_and_compare(
|
||||||
|
command,
|
||||||
|
ruff_baseline_executable,
|
||||||
|
ruff_comparison_executable,
|
||||||
|
target,
|
||||||
|
cache,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
for target in targets
|
||||||
|
],
|
||||||
|
return_exceptions=not raise_on_failure,
|
||||||
|
)
|
||||||
|
comparisons_by_target = dict(zip(targets, comparisons, strict=True))
|
||||||
|
|
||||||
|
# Calculate totals
|
||||||
|
total_removed = total_added = errors = 0
|
||||||
|
total_rule_changes = RuleChanges()
|
||||||
|
for comparison in comparisons_by_target.values():
|
||||||
|
if isinstance(comparison, Exception):
|
||||||
|
errors += 1
|
||||||
|
else:
|
||||||
|
total_removed += len(comparison.diff.removed)
|
||||||
|
total_added += len(comparison.diff.added)
|
||||||
|
total_rule_changes += comparison.rule_changes
|
||||||
|
|
||||||
|
errors = []
|
||||||
|
comparisons = []
|
||||||
|
for target, comparison in comparisons_by_target.items():
|
||||||
|
if isinstance(comparison, Exception):
|
||||||
|
errors.append((target, comparison))
|
||||||
|
continue
|
||||||
|
|
||||||
|
if comparison.diff:
|
||||||
|
comparisons.append((target, comparison))
|
||||||
|
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
|
||||||
|
result = Result(
|
||||||
|
total_added=total_added,
|
||||||
|
total_removed=total_removed,
|
||||||
|
total_rule_changes=total_rule_changes,
|
||||||
|
comparisons=comparisons,
|
||||||
|
errors=errors,
|
||||||
|
)
|
||||||
|
|
||||||
|
emitter.emit_result(result)
|
||||||
|
return
|
||||||
|
|
||||||
|
if total_removed == 0 and total_added == 0 and errors == 0:
|
||||||
|
print("\u2705 ecosystem check detected no changes.")
|
||||||
|
return
|
||||||
|
|
||||||
|
s = "s" if errors != 1 else ""
|
||||||
|
changes = f"(+{total_added}, -{total_removed}, {errors} error{s})"
|
||||||
|
|
||||||
|
print(f"\u2139\ufe0f ecosystem check **detected changes**. {changes}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
for target, comparison in comparisons_by_target.items():
|
||||||
|
if isinstance(comparison, Exception):
|
||||||
|
emitter.emit_error(target, comparison)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if comparison.diff:
|
||||||
|
emitter.emit_diff(target, comparison.diff, comparison.repo)
|
||||||
|
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if len(total_rule_changes.rule_codes()) > 0:
|
||||||
|
print(f"Rules changed: {len(total_rule_changes.rule_codes())}")
|
||||||
|
print()
|
||||||
|
print("| Rule | Changes | Additions | Removals |")
|
||||||
|
print("| ---- | ------- | --------- | -------- |")
|
||||||
|
for rule, (additions, removals) in sorted(
|
||||||
|
total_rule_changes.items(),
|
||||||
|
key=lambda x: (x[1][0] + x[1][1]),
|
||||||
|
reverse=True,
|
||||||
|
):
|
||||||
|
print(f"| {rule} | {additions + removals} | {additions} | {removals} |")
|
||||||
|
|
||||||
|
|
||||||
|
async def clone_and_compare(
|
||||||
|
command: RuffCommand,
|
||||||
|
ruff_baseline_executable: Path,
|
||||||
|
ruff_comparison_executable: Path,
|
||||||
|
target: Target,
|
||||||
|
cache: Path,
|
||||||
|
) -> CheckComparison:
|
||||||
|
"""Check a specific repository against two versions of ruff."""
|
||||||
|
assert ":" not in target.repo.owner
|
||||||
|
assert ":" not in target.repo.name
|
||||||
|
|
||||||
|
match command:
|
||||||
|
case RuffCommand.check:
|
||||||
|
ruff_task, create_comparison, options = (
|
||||||
|
ruff_check,
|
||||||
|
create_check_comparison,
|
||||||
|
target.check_options,
|
||||||
|
)
|
||||||
|
case RuffCommand.format:
|
||||||
|
ruff_task, create_comparison, options = (
|
||||||
|
ruff_format,
|
||||||
|
create_format_comparison,
|
||||||
|
target.format_options,
|
||||||
|
)
|
||||||
|
case _:
|
||||||
|
raise ValueError(f"Unknowm target Ruff command {command}")
|
||||||
|
|
||||||
|
checkout_dir = cache.joinpath(f"{target.repo.owner}:{target.repo.name}")
|
||||||
|
async with clone(target.repo, checkout_dir) as cloned_repo:
|
||||||
|
try:
|
||||||
|
async with asyncio.TaskGroup() as tg:
|
||||||
|
baseline_task = tg.create_task(
|
||||||
|
ruff_task(
|
||||||
|
executable=ruff_baseline_executable.resolve(),
|
||||||
|
path=cloned_repo.path,
|
||||||
|
name=cloned_repo.fullname,
|
||||||
|
options=options,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
comparison_task = tg.create_task(
|
||||||
|
ruff_task(
|
||||||
|
executable=ruff_comparison_executable.resolve(),
|
||||||
|
path=cloned_repo.path,
|
||||||
|
name=cloned_repo.fullname,
|
||||||
|
options=options,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
except ExceptionGroup as e:
|
||||||
|
raise e.exceptions[0] from e
|
||||||
|
|
||||||
|
return create_comparison(
|
||||||
|
cloned_repo, baseline_task.result(), comparison_task.result()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_check_comparison(
|
||||||
|
repo: ClonedRepository, baseline_output: str, comparison_output: str
|
||||||
|
) -> CheckComparison:
|
||||||
|
removed, added = set(), set()
|
||||||
|
|
||||||
|
for line in difflib.ndiff(baseline_output, comparison_output):
|
||||||
|
if line.startswith("- "):
|
||||||
|
removed.add(line[2:])
|
||||||
|
elif line.startswith("+ "):
|
||||||
|
added.add(line[2:])
|
||||||
|
|
||||||
|
diff = Diff(removed=removed, added=added)
|
||||||
|
|
||||||
|
return CheckComparison(
|
||||||
|
diff=diff, repo=repo, rule_changes=rule_changes_from_diff(diff)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def rule_changes_from_diff(diff: Diff) -> RuleChanges:
|
||||||
|
"""
|
||||||
|
Parse a diff from `ruff check` to determine the additions and removals for each rule.
|
||||||
|
"""
|
||||||
|
rule_changes = RuleChanges()
|
||||||
|
|
||||||
|
# Count rule changes
|
||||||
|
for line in diff.lines():
|
||||||
|
# Find rule change for current line or construction
|
||||||
|
# + <rule>/<path>:<line>:<column>: <rule_code> <message>
|
||||||
|
matches = re.search(r": ([A-Z]{1,4}[0-9]{3,4})", line)
|
||||||
|
|
||||||
|
if matches is None:
|
||||||
|
# Handle case where there are no regex matches e.g.
|
||||||
|
# + "?application=AIRFLOW&authenticator=TEST_AUTH&role=TEST_ROLE&warehouse=TEST_WAREHOUSE" # noqa: E501, ERA001
|
||||||
|
# Which was found in local testing
|
||||||
|
continue
|
||||||
|
|
||||||
|
rule_code = matches.group(1)
|
||||||
|
|
||||||
|
# Get current additions and removals for this rule
|
||||||
|
current_changes = rule_changes[rule_code]
|
||||||
|
|
||||||
|
# Check if addition or removal depending on the first character
|
||||||
|
if line[0] == "+":
|
||||||
|
current_changes = (current_changes[0] + 1, current_changes[1])
|
||||||
|
elif line[0] == "-":
|
||||||
|
current_changes = (current_changes[0], current_changes[1] + 1)
|
||||||
|
|
||||||
|
rule_changes[rule_code] = current_changes
|
||||||
|
|
||||||
|
return rule_changes
|
||||||
|
|
@ -0,0 +1,160 @@
|
||||||
|
from enum import Enum
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Self, Iterator
|
||||||
|
import heapq
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
class RuffCommand(Enum):
|
||||||
|
check = "check"
|
||||||
|
format = "format"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class Repository:
|
||||||
|
"""
|
||||||
|
A remote GitHub repository
|
||||||
|
"""
|
||||||
|
|
||||||
|
owner: str
|
||||||
|
name: str
|
||||||
|
branch: str | None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def fullname(self) -> str:
|
||||||
|
return f"{self.owner}/{self.name}"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def url(self: Self) -> str:
|
||||||
|
return f"https://github.com/{self.owner}/{self.name}"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class ClonedRepository(Repository):
|
||||||
|
"""
|
||||||
|
A cloned GitHub repository, which includes the hash of the cloned commit.
|
||||||
|
"""
|
||||||
|
|
||||||
|
commit_hash: str
|
||||||
|
path: Path
|
||||||
|
|
||||||
|
def url_for(self: Self, path: str, line_number: int | None = None) -> str:
|
||||||
|
"""
|
||||||
|
Return the remote GitHub URL for the given path in this repository.
|
||||||
|
"""
|
||||||
|
# Default to main branch
|
||||||
|
url = f"https://github.com/{self.owner}/{self.name}/blob/{self.commit_hash}/{path}"
|
||||||
|
if line_number:
|
||||||
|
url += f"#L{line_number}"
|
||||||
|
return url
|
||||||
|
|
||||||
|
@property
|
||||||
|
def url(self: Self) -> str:
|
||||||
|
return f"https://github.com/{self.owner}/{self.name}@{self.commit_hash}"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class Diff:
|
||||||
|
"""A diff between two runs of ruff."""
|
||||||
|
|
||||||
|
removed: set[str]
|
||||||
|
added: set[str]
|
||||||
|
|
||||||
|
def __bool__(self: Self) -> bool:
|
||||||
|
"""Return true if this diff is non-empty."""
|
||||||
|
return bool(self.removed or self.added)
|
||||||
|
|
||||||
|
def lines(self: Self) -> Iterator[str]:
|
||||||
|
"""Iterate through the changed lines in diff format."""
|
||||||
|
for line in heapq.merge(sorted(self.removed), sorted(self.added)):
|
||||||
|
if line in self.removed:
|
||||||
|
yield f"- {line}"
|
||||||
|
else:
|
||||||
|
yield f"+ {line}"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class RuleChanges:
|
||||||
|
changes: dict[str, tuple[int, int]] = field(default_factory=dict)
|
||||||
|
|
||||||
|
def rule_codes(self) -> list[str]:
|
||||||
|
return list(self.changes.keys())
|
||||||
|
|
||||||
|
def items(self) -> Iterator[tuple[str, tuple[int, int]]]:
|
||||||
|
return self.changes.items()
|
||||||
|
|
||||||
|
def __setitem__(self, key: str, value: tuple[int, int]) -> None:
|
||||||
|
self.changes[key] = value
|
||||||
|
|
||||||
|
def __getitem__(self, key: str) -> tuple[int, int]:
|
||||||
|
return self.changes.get(key, (0, 0))
|
||||||
|
|
||||||
|
def __add__(self, other: Self) -> Self:
|
||||||
|
if not isinstance(other, type(self)):
|
||||||
|
return NotImplemented
|
||||||
|
|
||||||
|
result = self.changes.copy()
|
||||||
|
for rule_code, (added, removed) in other.changes.items():
|
||||||
|
if rule_code in result:
|
||||||
|
result[rule_code] = (
|
||||||
|
result[rule_code][0] + added,
|
||||||
|
result[rule_code][1] + removed,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
result[rule_code] = (added, removed)
|
||||||
|
|
||||||
|
return RuleChanges(changes=result)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class CheckComparison:
|
||||||
|
diff: Diff
|
||||||
|
repo: ClonedRepository
|
||||||
|
rule_changes: RuleChanges
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class CheckOptions:
|
||||||
|
"""
|
||||||
|
Ruff check options
|
||||||
|
"""
|
||||||
|
|
||||||
|
select: str = ""
|
||||||
|
ignore: str = ""
|
||||||
|
exclude: str = ""
|
||||||
|
|
||||||
|
# Generating fixes is slow and verbose
|
||||||
|
show_fixes: bool = False
|
||||||
|
|
||||||
|
def summary(self) -> str:
|
||||||
|
return f"select {self.select} ignore {self.ignore} exclude {self.exclude}"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class FormatOptions:
|
||||||
|
"""
|
||||||
|
Ruff format options
|
||||||
|
"""
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class Target:
|
||||||
|
"""
|
||||||
|
An ecosystem target
|
||||||
|
"""
|
||||||
|
|
||||||
|
repo: Repository
|
||||||
|
check_options: CheckOptions = field(default_factory=CheckOptions)
|
||||||
|
format_options: FormatOptions = field(default_factory=FormatOptions)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class Result:
|
||||||
|
total_added: int
|
||||||
|
total_removed: int
|
||||||
|
total_rule_changes: RuleChanges
|
||||||
|
|
||||||
|
comparisons: tuple[Target, CheckComparison]
|
||||||
|
errors: tuple[Target, Exception]
|
||||||
|
|
@ -0,0 +1,90 @@
|
||||||
|
from pathlib import Path
|
||||||
|
from ruff_ecosystem import logger
|
||||||
|
from ruff_ecosystem.models import CheckOptions, FormatOptions
|
||||||
|
import time
|
||||||
|
from asyncio import create_subprocess_exec
|
||||||
|
from subprocess import PIPE
|
||||||
|
from typing import Sequence
|
||||||
|
import re
|
||||||
|
|
||||||
|
CHECK_SUMMARY_LINE_RE = re.compile(
|
||||||
|
r"^(Found \d+ error.*)|(.*potentially fixable with.*)$"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
CHECK_DIFF_LINE_RE = re.compile(
|
||||||
|
r"^(?P<pre>[+-]) (?P<inner>(?P<path>[^:]+):(?P<lnum>\d+):\d+:) (?P<post>.*)$",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class RuffError(Exception):
|
||||||
|
"""An error reported by ruff."""
|
||||||
|
|
||||||
|
|
||||||
|
async def ruff_check(
|
||||||
|
*, executable: Path, path: Path, name: str, options: CheckOptions
|
||||||
|
) -> Sequence[str]:
|
||||||
|
"""Run the given ruff binary against the specified path."""
|
||||||
|
logger.debug(f"Checking {name} with {executable}")
|
||||||
|
ruff_args = ["check", "--no-cache", "--exit-zero"]
|
||||||
|
if options.select:
|
||||||
|
ruff_args.extend(["--select", options.select])
|
||||||
|
if options.ignore:
|
||||||
|
ruff_args.extend(["--ignore", options.ignore])
|
||||||
|
if options.exclude:
|
||||||
|
ruff_args.extend(["--exclude", options.exclude])
|
||||||
|
if options.show_fixes:
|
||||||
|
ruff_args.extend(["--show-fixes", "--ecosystem-ci"])
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
proc = await create_subprocess_exec(
|
||||||
|
executable.absolute(),
|
||||||
|
*ruff_args,
|
||||||
|
".",
|
||||||
|
stdout=PIPE,
|
||||||
|
stderr=PIPE,
|
||||||
|
cwd=path,
|
||||||
|
)
|
||||||
|
result, err = await proc.communicate()
|
||||||
|
end = time.time()
|
||||||
|
|
||||||
|
logger.debug(f"Finished checking {name} with {executable} in {end - start:.2f}")
|
||||||
|
|
||||||
|
if proc.returncode != 0:
|
||||||
|
raise RuffError(err.decode("utf8"))
|
||||||
|
|
||||||
|
lines = [
|
||||||
|
line
|
||||||
|
for line in result.decode("utf8").splitlines()
|
||||||
|
if not CHECK_SUMMARY_LINE_RE.match(line)
|
||||||
|
]
|
||||||
|
|
||||||
|
return sorted(lines)
|
||||||
|
|
||||||
|
|
||||||
|
async def ruff_format(
|
||||||
|
*, executable: Path, path: Path, name: str, options: FormatOptions
|
||||||
|
) -> Sequence[str]:
|
||||||
|
"""Run the given ruff binary against the specified path."""
|
||||||
|
logger.debug(f"Checking {name} with {executable}")
|
||||||
|
ruff_args = ["format", "--no-cache", "--exit-zero"]
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
proc = await create_subprocess_exec(
|
||||||
|
executable.absolute(),
|
||||||
|
*ruff_args,
|
||||||
|
".",
|
||||||
|
stdout=PIPE,
|
||||||
|
stderr=PIPE,
|
||||||
|
cwd=path,
|
||||||
|
)
|
||||||
|
result, err = await proc.communicate()
|
||||||
|
end = time.time()
|
||||||
|
|
||||||
|
logger.debug(f"Finished formatting {name} with {executable} in {end - start:.2f}")
|
||||||
|
|
||||||
|
if proc.returncode != 0:
|
||||||
|
raise RuffError(err.decode("utf8"))
|
||||||
|
|
||||||
|
lines = result.decode("utf8").splitlines()
|
||||||
|
return lines
|
||||||
Loading…
Reference in New Issue