diff --git a/scripts/knot_benchmark/README.md b/scripts/knot_benchmark/README.md new file mode 100644 index 0000000000..622d6da747 --- /dev/null +++ b/scripts/knot_benchmark/README.md @@ -0,0 +1,21 @@ +## Getting started + +1. [Install `uv`](https://docs.astral.sh/uv/getting-started/installation/) + +- Unix: `curl -LsSf https://astral.sh/uv/install.sh | sh` +- Windows: `powershell -c "irm https://astral.sh/uv/install.ps1 | iex"` + +1. Build red_knot: `cargo build --bin red_knot --release` +1. `cd` into the benchmark directory: `cd scripts/knot_benchmark` +1. Run benchmarks: `uv run benchmark` + +## Known limitations + +Red Knot only implements a tiny fraction of Mypy's and Pyright's functionality, +so the benchmarks aren't in any way a fair comparison today. However, +they'll become more meaningful as we build out more type checking features in Red Knot. + +### Windows support + +The script should work on Windows, but we haven't tested it yet. +We do make use of `shlex` which has known limitations when using non-POSIX shells. diff --git a/scripts/knot_benchmark/pyproject.toml b/scripts/knot_benchmark/pyproject.toml new file mode 100644 index 0000000000..e1c5191232 --- /dev/null +++ b/scripts/knot_benchmark/pyproject.toml @@ -0,0 +1,21 @@ +[project] +name = "knot_benchmark" +version = "0.0.1" +description = "Package for running end-to-end Red Knot benchmarks" +requires-python = ">=3.12" +dependencies = ["mypy", "pyright"] + +[project.scripts] +benchmark = "benchmark.run:main" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/benchmark"] + +[tool.ruff.lint] +ignore = [ + "E501", # We use ruff format +] diff --git a/scripts/knot_benchmark/src/benchmark/__init__.py b/scripts/knot_benchmark/src/benchmark/__init__.py new file mode 100644 index 0000000000..3e2f61f737 --- /dev/null +++ b/scripts/knot_benchmark/src/benchmark/__init__.py @@ -0,0 +1,76 @@ +from __future__ import annotations + +import logging +import shlex +import subprocess +import typing +from pathlib import Path + + +class Command(typing.NamedTuple): + name: str + """The name of the command to benchmark.""" + + command: list[str] + """The command to benchmark.""" + + prepare: str | None = None + """The command to run before each benchmark run.""" + + +class Hyperfine(typing.NamedTuple): + name: str + """The benchmark to run.""" + + commands: list[Command] + """The commands to benchmark.""" + + warmup: int + """The number of warmup runs to perform.""" + + min_runs: int + """The minimum number of runs to perform.""" + + verbose: bool + """Whether to print verbose output.""" + + json: bool + """Whether to export results to JSON.""" + + def run(self, *, cwd: Path | None = None) -> None: + """Run the benchmark using `hyperfine`.""" + args = [ + "hyperfine", + # Most repositories have some typing errors. + # This is annoying because it prevents us from capturing "real" errors. + "-i", + ] + + # Export to JSON. + if self.json: + args.extend(["--export-json", f"{self.name}.json"]) + + # Preamble: benchmark-wide setup. + if self.verbose: + args.append("--show-output") + + args.extend(["--warmup", str(self.warmup), "--min-runs", str(self.min_runs)]) + + # Add all command names, + for command in self.commands: + args.extend(["--command-name", command.name]) + + # Add all prepare statements. + for command in self.commands: + args.extend(["--prepare", command.prepare or ""]) + + # Add all commands. + for command in self.commands: + args.append(shlex.join(command.command)) + + logging.info(f"Running {args}") + + subprocess.run( + args, + cwd=cwd, + ) diff --git a/scripts/knot_benchmark/src/benchmark/cases.py b/scripts/knot_benchmark/src/benchmark/cases.py new file mode 100644 index 0000000000..38417cf30c --- /dev/null +++ b/scripts/knot_benchmark/src/benchmark/cases.py @@ -0,0 +1,212 @@ +from __future__ import annotations + +import abc +import enum +import logging +import os +import shutil +import subprocess +import sys +from pathlib import Path + +from benchmark import Command +from benchmark.projects import Project + + +class Benchmark(enum.Enum): + """Enumeration of the benchmarks to run.""" + + COLD = "cold" + """Cold check of an entire project without a cache present.""" + + WARM = "warm" + """Re-checking the entire project without any changes".""" + + +def which_tool(name: str) -> Path: + tool = shutil.which(name) + + assert ( + tool is not None + ), f"Tool {name} not found. Run the script with `uv run