mirror of https://github.com/astral-sh/uv
This commit is contained in:
parent
4432b2daf0
commit
98628667f0
|
|
@ -15,7 +15,7 @@ def main():
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("base", type=Path)
|
parser.add_argument("base", type=Path)
|
||||||
parser.add_argument("branch", type=Path)
|
parser.add_argument("branch", type=Path)
|
||||||
parser.add_argument("--project", action="store_true")
|
parser.add_argument("--mode", choices=["compile", "lock", "pyproject-toml"])
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--markdown",
|
"--markdown",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
|
|
@ -53,21 +53,21 @@ def main():
|
||||||
# also `uv.lock` doesn't exist for failed resolutions
|
# also `uv.lock` doesn't exist for failed resolutions
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if args.project:
|
if args.mode == "compile":
|
||||||
|
resolution = package_dir.joinpath("stdout.txt").read_text()
|
||||||
|
else:
|
||||||
resolution = package_dir.joinpath("uv.lock").read_text()
|
resolution = package_dir.joinpath("uv.lock").read_text()
|
||||||
if package_dir.joinpath("stdout.txt").read_text().strip():
|
if package_dir.joinpath("stdout.txt").read_text().strip():
|
||||||
raise RuntimeError(f"Stdout not empty (base): {package}")
|
raise RuntimeError(f"Stdout not empty (base): {package}")
|
||||||
else:
|
|
||||||
resolution = package_dir.joinpath("stdout.txt").read_text()
|
|
||||||
stderr = package_dir.joinpath("stderr.txt").read_text()
|
stderr = package_dir.joinpath("stderr.txt").read_text()
|
||||||
stderr = redact_time.sub(r"[TIME]", stderr)
|
stderr = redact_time.sub(r"[TIME]", stderr)
|
||||||
|
|
||||||
if args.project:
|
if args.mode == "compile":
|
||||||
|
resolution_branch = package_branch.joinpath("stdout.txt").read_text()
|
||||||
|
else:
|
||||||
resolution_branch = package_branch.joinpath("uv.lock").read_text()
|
resolution_branch = package_branch.joinpath("uv.lock").read_text()
|
||||||
if package_branch.joinpath("stdout.txt").read_text().strip():
|
if package_branch.joinpath("stdout.txt").read_text().strip():
|
||||||
raise RuntimeError(f"Stdout not empty (branch): {package}")
|
raise RuntimeError(f"Stdout not empty (branch): {package}")
|
||||||
else:
|
|
||||||
resolution_branch = package_branch.joinpath("stdout.txt").read_text()
|
|
||||||
stderr_branch = package_branch.joinpath("stderr.txt").read_text()
|
stderr_branch = package_branch.joinpath("stderr.txt").read_text()
|
||||||
stderr_branch = redact_time.sub(r"[TIME]", stderr_branch)
|
stderr_branch = redact_time.sub(r"[TIME]", stderr_branch)
|
||||||
|
|
||||||
|
|
@ -77,19 +77,34 @@ def main():
|
||||||
)
|
)
|
||||||
|
|
||||||
if args.markdown:
|
if args.markdown:
|
||||||
print("# Ecosystem testing report")
|
|
||||||
print(
|
print(
|
||||||
f"Dataset: "
|
"## Ecosystem testing report "
|
||||||
f"`{'uv pip compile' if not parameters['project'] else 'uv lock'}` with `--no-build` "
|
f"({args.mode.replace('pyproject-toml', 'pyproject.toml')})"
|
||||||
f"on each of the top 15k PyPI packages on Python {parameters['python']} "
|
|
||||||
"pinned to the latest package version. "
|
|
||||||
if parameters["latest"]
|
|
||||||
else ". "
|
|
||||||
"A handful of pathological cases were filtered out. "
|
|
||||||
"Only success resolutions can be compared.\n"
|
|
||||||
)
|
)
|
||||||
print(f"Successfully resolved packages: {successful}/{total}\n")
|
if args.mode == "pyproject-toml":
|
||||||
print(f"Different packages: {len(differences)}/{total}\n")
|
print(
|
||||||
|
" * Dataset: A set of top level `pyproject.toml` from GitHub projects popular in 2025. "
|
||||||
|
+ "Only `pyproject.toml` files with a `[project]` section and static dependencies are included."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
" * Dataset: The top 15k PyPI packages. A handful of pathological cases were filtered out."
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
" * Command: "
|
||||||
|
+ f"`{'uv pip compile' if args.mode == 'compile' else 'uv lock'}` with `--no-build` "
|
||||||
|
+ f"on Python {parameters['python']} "
|
||||||
|
+ (
|
||||||
|
"pinned to the latest package version. "
|
||||||
|
if parameters["latest"]
|
||||||
|
else ". "
|
||||||
|
)
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
f" * Successfully resolved packages: {successful}/{total} ({successful / total:.0%}). "
|
||||||
|
+ "Only success resolutions can be compared."
|
||||||
|
)
|
||||||
|
print(f" * Different packages: {len(differences)}/{successful}")
|
||||||
|
|
||||||
for (
|
for (
|
||||||
package,
|
package,
|
||||||
|
|
@ -98,10 +113,10 @@ def main():
|
||||||
stderr,
|
stderr,
|
||||||
stderr_branch,
|
stderr_branch,
|
||||||
) in differences:
|
) in differences:
|
||||||
if args.project:
|
if args.mode == "compile":
|
||||||
context_window = 3
|
|
||||||
else:
|
|
||||||
context_window = 999999
|
context_window = 999999
|
||||||
|
else:
|
||||||
|
context_window = 3
|
||||||
print(f"\n<details>\n<summary>{package}</summary>\n")
|
print(f"\n<details>\n<summary>{package}</summary>\n")
|
||||||
if resolution != resolution_branch:
|
if resolution != resolution_branch:
|
||||||
print("```diff")
|
print("```diff")
|
||||||
|
|
@ -129,7 +144,7 @@ def main():
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
print("```")
|
print("```")
|
||||||
print("</details>")
|
print("</details>\n")
|
||||||
else:
|
else:
|
||||||
for (
|
for (
|
||||||
package,
|
package,
|
||||||
|
|
@ -159,9 +174,9 @@ def main():
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
print(
|
print(
|
||||||
f"Successfully resolved packages: {successful}/{total} ({successful}/{total}:.0%)"
|
f"Successfully resolved packages: {successful}/{total} ({successful / total:.0%})"
|
||||||
)
|
)
|
||||||
print(f"Different packages: {len(differences)}/{total}")
|
print(f"Different packages: {len(differences)}/{successful}")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
@ -3,7 +3,8 @@
|
||||||
# /// script
|
# /// script
|
||||||
# requires-python = ">=3.13"
|
# requires-python = ">=3.13"
|
||||||
# dependencies = [
|
# dependencies = [
|
||||||
# "tqdm>=4,<5",
|
# "tomli-w>=1.2.0,<2.0.0",
|
||||||
|
# "tqdm>=4.67.1,<5.0.0",
|
||||||
# ]
|
# ]
|
||||||
# ///
|
# ///
|
||||||
|
|
||||||
|
|
@ -16,11 +17,13 @@ import platform
|
||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import time
|
import time
|
||||||
|
import tomllib
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
|
|
||||||
|
import tomli_w
|
||||||
from tqdm.auto import tqdm
|
from tqdm.auto import tqdm
|
||||||
|
|
||||||
cwd = Path(__file__).parent
|
cwd = Path(__file__).parent
|
||||||
|
|
@ -35,63 +38,36 @@ class Summary:
|
||||||
|
|
||||||
|
|
||||||
def run_uv(
|
def run_uv(
|
||||||
|
package: str,
|
||||||
|
specification: str,
|
||||||
uv: Path,
|
uv: Path,
|
||||||
project: bool,
|
mode: str,
|
||||||
python: str,
|
python: str,
|
||||||
cache: Path,
|
cache: Path,
|
||||||
offline: bool,
|
offline: bool,
|
||||||
package: str,
|
output: Path,
|
||||||
output_dir: Path,
|
|
||||||
version: str | None,
|
|
||||||
) -> Summary:
|
) -> Summary:
|
||||||
"""Run a uv subprocess.
|
"""Resolve in a uv subprocess.
|
||||||
|
|
||||||
The logic captures the max RSS from the process and avoids deadlocks from full
|
The logic captures the max RSS from the process and avoids deadlocks from full
|
||||||
pipes.
|
pipes.
|
||||||
"""
|
"""
|
||||||
|
package_dir = output.joinpath(package)
|
||||||
|
package_dir.mkdir()
|
||||||
|
command = prepare_uv_command(
|
||||||
|
specification,
|
||||||
|
uv,
|
||||||
|
mode,
|
||||||
|
cache,
|
||||||
|
offline,
|
||||||
|
package_dir,
|
||||||
|
python,
|
||||||
|
)
|
||||||
|
|
||||||
start = time.time()
|
start = time.time()
|
||||||
|
|
||||||
requirement = f"{package}=={version}" if version else package
|
|
||||||
shared_args = [
|
|
||||||
"--no-build",
|
|
||||||
"--cache-dir",
|
|
||||||
cache,
|
|
||||||
"--color",
|
|
||||||
"never",
|
|
||||||
]
|
|
||||||
if offline:
|
|
||||||
shared_args.append("--offline")
|
|
||||||
package_dir = output_dir.joinpath(package)
|
|
||||||
package_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
if project:
|
|
||||||
package_dir.joinpath("pyproject.toml").write_text(
|
|
||||||
f"""
|
|
||||||
[project]
|
|
||||||
name = "testing"
|
|
||||||
version = "0.1.0"
|
|
||||||
requires-python = ">={python}"
|
|
||||||
dependencies = ["{requirement}"]
|
|
||||||
"""
|
|
||||||
)
|
|
||||||
cmd = [uv, "lock", *shared_args]
|
|
||||||
else:
|
|
||||||
cmd = [
|
|
||||||
uv,
|
|
||||||
"pip",
|
|
||||||
"compile",
|
|
||||||
"-",
|
|
||||||
"-p",
|
|
||||||
python,
|
|
||||||
# The results are more reproducible if they are platform independent
|
|
||||||
"--universal",
|
|
||||||
"--no-header",
|
|
||||||
"--no-annotate",
|
|
||||||
*shared_args,
|
|
||||||
]
|
|
||||||
|
|
||||||
process = subprocess.Popen(
|
process = subprocess.Popen(
|
||||||
cmd,
|
command,
|
||||||
cwd=package_dir,
|
cwd=package_dir,
|
||||||
stdin=subprocess.PIPE,
|
stdin=subprocess.PIPE,
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
|
|
@ -99,7 +75,7 @@ def run_uv(
|
||||||
text=True,
|
text=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
stdout, stderr = communicate(process, requirement if not project else None)
|
stdout, stderr = communicate(process, specification if mode == "compile" else None)
|
||||||
|
|
||||||
# At this point, the process is a zombie, so has called `exit()`, but we haven't reaped it with `wait4` yet.
|
# At this point, the process is a zombie, so has called `exit()`, but we haven't reaped it with `wait4` yet.
|
||||||
|
|
||||||
|
|
@ -122,6 +98,57 @@ def run_uv(
|
||||||
return summary
|
return summary
|
||||||
|
|
||||||
|
|
||||||
|
def prepare_uv_command(
|
||||||
|
specification: str,
|
||||||
|
uv: Path,
|
||||||
|
mode: str,
|
||||||
|
cache: Path,
|
||||||
|
offline: bool,
|
||||||
|
package_dir: Path,
|
||||||
|
python: str,
|
||||||
|
) -> list[Path | str]:
|
||||||
|
shared_args = [
|
||||||
|
"--no-build",
|
||||||
|
"--cache-dir",
|
||||||
|
cache,
|
||||||
|
"--color",
|
||||||
|
"never",
|
||||||
|
]
|
||||||
|
if offline:
|
||||||
|
shared_args.append("--offline")
|
||||||
|
if mode == "pyproject-toml":
|
||||||
|
package_dir.joinpath("pyproject.toml").write_text(specification)
|
||||||
|
command = [uv, "lock", *shared_args]
|
||||||
|
elif mode == "lock":
|
||||||
|
package_dir.joinpath("pyproject.toml").write_text(
|
||||||
|
f"""
|
||||||
|
[project]
|
||||||
|
name = "testing"
|
||||||
|
version = "0.1.0"
|
||||||
|
requires-python = ">={python}"
|
||||||
|
dependencies = ["{specification}"]
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
command = [uv, "lock", *shared_args]
|
||||||
|
elif mode == "compile":
|
||||||
|
command = [
|
||||||
|
uv,
|
||||||
|
"pip",
|
||||||
|
"compile",
|
||||||
|
"-",
|
||||||
|
"-p",
|
||||||
|
python,
|
||||||
|
# The results are more reproducible if they are platform independent
|
||||||
|
"--universal",
|
||||||
|
"--no-header",
|
||||||
|
"--no-annotate",
|
||||||
|
*shared_args,
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown mode: {mode}")
|
||||||
|
return command
|
||||||
|
|
||||||
|
|
||||||
def communicate(process: subprocess.Popen, stdin: str | None) -> tuple[str, str]:
|
def communicate(process: subprocess.Popen, stdin: str | None) -> tuple[str, str]:
|
||||||
"""Like `Popen.communicate`, but without the `os.wait` call.
|
"""Like `Popen.communicate`, but without the `os.wait` call.
|
||||||
|
|
||||||
|
|
@ -157,12 +184,18 @@ def communicate(process: subprocess.Popen, stdin: str | None) -> tuple[str, str]
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--project",
|
"--input", type=Path, default=cwd.joinpath("top-pypi-packages.json")
|
||||||
action="store_true",
|
)
|
||||||
help="Use `uv lock` instead of `uv pip compile`",
|
parser.add_argument(
|
||||||
|
"--mode",
|
||||||
|
choices=["compile", "lock", "pyproject-toml"],
|
||||||
|
default="compile",
|
||||||
|
help="`compile`: `uv pip compile`, "
|
||||||
|
"`lock`: `uv lock` from a single requirement"
|
||||||
|
"`pyproject-toml`: `uv lock` from a directory of `pyproject.toml` files",
|
||||||
)
|
)
|
||||||
parser.add_argument("--python", "-p", type=str, default="3.13")
|
parser.add_argument("--python", "-p", type=str, default="3.13")
|
||||||
parser.add_argument("--output-dir", type=Path, default="output")
|
parser.add_argument("--output", type=Path, default="output")
|
||||||
parser.add_argument("--uv", type=Path, default=Path("uv"))
|
parser.add_argument("--uv", type=Path, default=Path("uv"))
|
||||||
parser.add_argument("--limit", type=int, default=None)
|
parser.add_argument("--limit", type=int, default=None)
|
||||||
parser.add_argument("--cache", type=Path, default=cwd.joinpath("cache"))
|
parser.add_argument("--cache", type=Path, default=cwd.joinpath("cache"))
|
||||||
|
|
@ -170,16 +203,62 @@ def main():
|
||||||
parser.add_argument("--latest", action="store_true")
|
parser.add_argument("--latest", action="store_true")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
top_15k_pypi = json.loads(cwd.joinpath("top-pypi-packages.json").read_text())
|
if args.mode == "pyproject-toml":
|
||||||
top_15k_pypi = [pkg["project"] for pkg in top_15k_pypi["rows"]]
|
project_tomls = sorted((file.stem, file) for file in args.input.iterdir())
|
||||||
|
jobs = {}
|
||||||
|
no_project = 0
|
||||||
|
dynamic_dependencies = 0
|
||||||
|
for package, file in project_tomls:
|
||||||
|
if len(jobs) >= args.limit:
|
||||||
|
break
|
||||||
|
if file.suffix != ".toml":
|
||||||
|
continue
|
||||||
|
project_toml = file.read_text()
|
||||||
|
data = tomllib.loads(project_toml)
|
||||||
|
project = data.get("project")
|
||||||
|
if not project:
|
||||||
|
no_project += 1
|
||||||
|
continue
|
||||||
|
if dynamic := project.get("dynamic"):
|
||||||
|
if "dependencies" in dynamic:
|
||||||
|
dynamic_dependencies += 1
|
||||||
|
continue
|
||||||
|
if "version" in dynamic:
|
||||||
|
dynamic.remove("version")
|
||||||
|
# Usually there are no cycles back to the current project, so any version works
|
||||||
|
project["version"] = "1.0.0"
|
||||||
|
|
||||||
if args.latest:
|
jobs[package] = tomli_w.dumps(data)
|
||||||
with cwd.joinpath("package_versions.csv").open() as f:
|
|
||||||
latest_versions = {
|
print(f"`pyproject.toml`s without `[project]`: {no_project}")
|
||||||
row["package_name"]: row["latest_version"] for row in csv.DictReader(f)
|
print(
|
||||||
}
|
f"`pyproject.toml`s with `dynamic = ['dependencies']`: {dynamic_dependencies}"
|
||||||
|
)
|
||||||
|
if args.latest:
|
||||||
|
raise ValueError("Latest versions are not supported in pyproject-toml mode")
|
||||||
else:
|
else:
|
||||||
latest_versions = None
|
project_names = json.loads(args.input.read_text())
|
||||||
|
project_names = sorted(pkg["project"] for pkg in project_names["rows"])
|
||||||
|
|
||||||
|
if args.latest:
|
||||||
|
with cwd.joinpath("package_versions.csv").open() as f:
|
||||||
|
latest_versions = {
|
||||||
|
row["package_name"]: row["latest_version"]
|
||||||
|
for row in csv.DictReader(f)
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
latest_versions = None
|
||||||
|
|
||||||
|
jobs = {}
|
||||||
|
for package in project_names[: args.limit]:
|
||||||
|
if latest_versions:
|
||||||
|
if version := latest_versions.get(package):
|
||||||
|
jobs[package] = f"{package}=={version}"
|
||||||
|
else:
|
||||||
|
tqdm.write(f"Missing version: {package}")
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
jobs[package] = package
|
||||||
|
|
||||||
excluded_packages = [
|
excluded_packages = [
|
||||||
# 5000 releases, no solution
|
# 5000 releases, no solution
|
||||||
|
|
@ -188,53 +267,47 @@ def main():
|
||||||
"tf-models-nightly",
|
"tf-models-nightly",
|
||||||
"mtmtrain",
|
"mtmtrain",
|
||||||
"llm-dialog-manager",
|
"llm-dialog-manager",
|
||||||
|
"python-must",
|
||||||
# Slow and have no solution
|
# Slow and have no solution
|
||||||
"edx-enterprise",
|
"edx-enterprise",
|
||||||
"kcli",
|
"kcli",
|
||||||
"emmet-api",
|
"emmet-api",
|
||||||
]
|
]
|
||||||
for package in excluded_packages:
|
for package in excluded_packages:
|
||||||
top_15k_pypi.remove(package)
|
jobs.pop(package, None)
|
||||||
|
|
||||||
if args.output_dir.exists():
|
if args.output.exists():
|
||||||
shutil.rmtree(args.output_dir)
|
shutil.rmtree(args.output)
|
||||||
args.output_dir.mkdir(parents=True, exist_ok=True)
|
args.output.mkdir(parents=True)
|
||||||
args.output_dir.joinpath(".gitignore").write_text("*")
|
args.output.joinpath(".gitignore").write_text("*")
|
||||||
parameters = {
|
parameters = {
|
||||||
"project": args.project,
|
"mode": args.mode,
|
||||||
"python": args.python,
|
"python": args.python,
|
||||||
"latest": args.latest,
|
"latest": args.latest,
|
||||||
}
|
}
|
||||||
args.output_dir.joinpath("parameters.json").write_text(json.dumps(parameters))
|
args.output.joinpath("parameters.json").write_text(json.dumps(parameters))
|
||||||
|
|
||||||
success = 0
|
success = 0
|
||||||
all_results = [] # Track all results for analysis
|
all_results = [] # Track all results for analysis
|
||||||
max_package_len = max(len(package) for package in top_15k_pypi[: args.limit])
|
max_package_len = max(len(package) for package in jobs)
|
||||||
|
|
||||||
with ThreadPoolExecutor(max_workers=os.cpu_count() * 2) as executor:
|
with ThreadPoolExecutor(max_workers=os.cpu_count() * 2) as executor:
|
||||||
tasks = []
|
tasks = []
|
||||||
packages_pending = []
|
packages_pending = []
|
||||||
for package in top_15k_pypi[: args.limit]:
|
for package, specification in jobs.items():
|
||||||
if latest_versions:
|
|
||||||
if version := latest_versions.get(package):
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
tqdm.write(f"Missing version: {package}")
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
version = None
|
|
||||||
packages_pending.append(package)
|
packages_pending.append(package)
|
||||||
|
|
||||||
tasks.append(
|
tasks.append(
|
||||||
executor.submit(
|
executor.submit(
|
||||||
run_uv,
|
run_uv,
|
||||||
|
package,
|
||||||
|
specification,
|
||||||
args.uv,
|
args.uv,
|
||||||
args.project,
|
args.mode,
|
||||||
args.python,
|
args.python,
|
||||||
args.cache,
|
args.cache,
|
||||||
args.offline,
|
args.offline,
|
||||||
package,
|
args.output,
|
||||||
args.output_dir,
|
|
||||||
version,
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
total = len(packages_pending)
|
total = len(packages_pending)
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,107 @@
|
||||||
|
# /// script
|
||||||
|
# requires-python = ">=3.13"
|
||||||
|
# dependencies = [
|
||||||
|
# "httpx>=0.28.1,<0.29.0",
|
||||||
|
# "tqdm>=4.67.1,<5.0.0",
|
||||||
|
# ]
|
||||||
|
# ///
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import asyncio
|
||||||
|
import csv
|
||||||
|
import shutil
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from httpx import AsyncClient
|
||||||
|
from tqdm.auto import tqdm
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Repository:
|
||||||
|
org: str
|
||||||
|
repo: str
|
||||||
|
ref: str
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_pyproject(
|
||||||
|
client: AsyncClient, repository: Repository, output_dir: Path
|
||||||
|
):
|
||||||
|
url = f"https://raw.githubusercontent.com/{repository.org}/{repository.repo}/{repository.ref}/pyproject.toml"
|
||||||
|
try:
|
||||||
|
response = await client.get(url)
|
||||||
|
response.raise_for_status()
|
||||||
|
except httpx.HTTPError as e:
|
||||||
|
# The bigquery data is sometimes missing the master -> main transition
|
||||||
|
url = f"https://raw.githubusercontent.com/{repository.org}/{repository.repo}/refs/heads/main/pyproject.toml"
|
||||||
|
try:
|
||||||
|
response = await client.get(url)
|
||||||
|
response.raise_for_status()
|
||||||
|
except httpx.HTTPError:
|
||||||
|
# Ignore the error from the main fallback if it didn't work
|
||||||
|
if hasattr(e, "response") and e.response.status_code == 404:
|
||||||
|
tqdm.write(
|
||||||
|
f"Not found: https://github.com/{repository.org}/{repository.repo}"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
tqdm.write(
|
||||||
|
f"Error for https://github.com/{repository.org}/{repository.repo}: {e}"
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
output_dir.joinpath(f"{repository.repo}.toml").write_text(response.text)
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--input", type=Path, default=Path("top500_2025_gh_stars.csv"))
|
||||||
|
parser.add_argument("--output", type=Path, default=Path("pyproject_toml"))
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
with args.input.open() as f:
|
||||||
|
repositories = []
|
||||||
|
seen = set()
|
||||||
|
for row in csv.DictReader(f):
|
||||||
|
if row["repo_name"] in seen:
|
||||||
|
continue
|
||||||
|
seen.add(row["repo_name"])
|
||||||
|
repositories.append(
|
||||||
|
Repository(
|
||||||
|
org=row["repo_name"].split("/")[0],
|
||||||
|
repo=row["repo_name"].split("/")[1],
|
||||||
|
ref=row["ref"],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if args.output.exists():
|
||||||
|
shutil.rmtree(args.output)
|
||||||
|
args.output.mkdir(parents=True)
|
||||||
|
args.output.joinpath(".gitignore").write_text("*")
|
||||||
|
|
||||||
|
semaphore = asyncio.Semaphore(50)
|
||||||
|
|
||||||
|
async def fetch_with_semaphore(
|
||||||
|
client: AsyncClient, repository: Repository, output_dir: Path
|
||||||
|
):
|
||||||
|
async with semaphore:
|
||||||
|
return await fetch_pyproject(client, repository, output_dir)
|
||||||
|
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
with tqdm(total=len(repositories)) as pbar:
|
||||||
|
tasks = [
|
||||||
|
fetch_with_semaphore(client, repository, args.output)
|
||||||
|
for repository in repositories
|
||||||
|
]
|
||||||
|
results = []
|
||||||
|
for future in asyncio.as_completed(tasks):
|
||||||
|
results.append(await future)
|
||||||
|
pbar.update(1)
|
||||||
|
|
||||||
|
success = sum(1 for result in results if result is True)
|
||||||
|
print(f"Successes: {success}/{len(repositories)}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
|
|
@ -0,0 +1,18 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
script_dir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||||
|
limit=50000
|
||||||
|
|
||||||
|
uv run $script_dir/ecosystem_testing.py --uv $1 --mode compile --output $script_dir/base-compile --limit $limit
|
||||||
|
uv run $script_dir/ecosystem_testing.py --uv $2 --mode compile --output $script_dir/branch-compile --limit $limit
|
||||||
|
uv run $script_dir/ecosystem_testing.py --uv $1 --mode lock --output $script_dir/base-lock --limit $limit
|
||||||
|
uv run $script_dir/ecosystem_testing.py --uv $2 --mode lock --output $script_dir/branch-lock --limit $limit
|
||||||
|
uv run $script_dir/ecosystem_testing.py --uv $1 --mode pyproject-toml --input $script_dir/pyproject_toml --output $script_dir/base-pyproject-toml --limit $limit
|
||||||
|
uv run $script_dir/ecosystem_testing.py --uv $2 --mode pyproject-toml --input $script_dir/pyproject_toml --output $script_dir/branch-pyproject-toml --limit $limit
|
||||||
|
|
||||||
|
rm $script_dir/report.md
|
||||||
|
uv run $script_dir/create_report.py $script_dir/base-compile $script_dir/branch-compile --mode compile --markdown >> $script_dir/report.md
|
||||||
|
uv run $script_dir/create_report.py $script_dir/base-lock $script_dir/branch-lock --mode lock --markdown >> $script_dir/report.md
|
||||||
|
uv run $script_dir/create_report.py $script_dir/base-pyproject-toml $script_dir/branch-pyproject-toml --mode pyproject-toml --markdown >> $script_dir/report.md
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,20 @@
|
||||||
|
# BigQuery SQL for top5k_pyproject_toml_2025_gh_stars.csv
|
||||||
|
# Run in https://console.cloud.google.com/bigquery
|
||||||
|
SELECT
|
||||||
|
f.repo_name,
|
||||||
|
f.ref,
|
||||||
|
COUNT(e.id) AS stars
|
||||||
|
FROM
|
||||||
|
`bigquery-public-data.github_repos.files` f
|
||||||
|
JOIN
|
||||||
|
`githubarchive.month.2025*` e
|
||||||
|
ON
|
||||||
|
f.repo_name = e.repo.name
|
||||||
|
WHERE
|
||||||
|
f.path = 'pyproject.toml'
|
||||||
|
AND e.type = 'WatchEvent'
|
||||||
|
GROUP BY
|
||||||
|
f.repo_name, f.ref
|
||||||
|
ORDER BY
|
||||||
|
stars DESC
|
||||||
|
LIMIT 5000;
|
||||||
Loading…
Reference in New Issue