mirror of https://github.com/astral-sh/uv
This commit is contained in:
parent
4432b2daf0
commit
98628667f0
|
|
@ -15,7 +15,7 @@ def main():
|
|||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("base", type=Path)
|
||||
parser.add_argument("branch", type=Path)
|
||||
parser.add_argument("--project", action="store_true")
|
||||
parser.add_argument("--mode", choices=["compile", "lock", "pyproject-toml"])
|
||||
parser.add_argument(
|
||||
"--markdown",
|
||||
action="store_true",
|
||||
|
|
@ -53,21 +53,21 @@ def main():
|
|||
# also `uv.lock` doesn't exist for failed resolutions
|
||||
continue
|
||||
|
||||
if args.project:
|
||||
if args.mode == "compile":
|
||||
resolution = package_dir.joinpath("stdout.txt").read_text()
|
||||
else:
|
||||
resolution = package_dir.joinpath("uv.lock").read_text()
|
||||
if package_dir.joinpath("stdout.txt").read_text().strip():
|
||||
raise RuntimeError(f"Stdout not empty (base): {package}")
|
||||
else:
|
||||
resolution = package_dir.joinpath("stdout.txt").read_text()
|
||||
stderr = package_dir.joinpath("stderr.txt").read_text()
|
||||
stderr = redact_time.sub(r"[TIME]", stderr)
|
||||
|
||||
if args.project:
|
||||
if args.mode == "compile":
|
||||
resolution_branch = package_branch.joinpath("stdout.txt").read_text()
|
||||
else:
|
||||
resolution_branch = package_branch.joinpath("uv.lock").read_text()
|
||||
if package_branch.joinpath("stdout.txt").read_text().strip():
|
||||
raise RuntimeError(f"Stdout not empty (branch): {package}")
|
||||
else:
|
||||
resolution_branch = package_branch.joinpath("stdout.txt").read_text()
|
||||
stderr_branch = package_branch.joinpath("stderr.txt").read_text()
|
||||
stderr_branch = redact_time.sub(r"[TIME]", stderr_branch)
|
||||
|
||||
|
|
@ -77,19 +77,34 @@ def main():
|
|||
)
|
||||
|
||||
if args.markdown:
|
||||
print("# Ecosystem testing report")
|
||||
print(
|
||||
f"Dataset: "
|
||||
f"`{'uv pip compile' if not parameters['project'] else 'uv lock'}` with `--no-build` "
|
||||
f"on each of the top 15k PyPI packages on Python {parameters['python']} "
|
||||
"pinned to the latest package version. "
|
||||
if parameters["latest"]
|
||||
else ". "
|
||||
"A handful of pathological cases were filtered out. "
|
||||
"Only success resolutions can be compared.\n"
|
||||
"## Ecosystem testing report "
|
||||
f"({args.mode.replace('pyproject-toml', 'pyproject.toml')})"
|
||||
)
|
||||
print(f"Successfully resolved packages: {successful}/{total}\n")
|
||||
print(f"Different packages: {len(differences)}/{total}\n")
|
||||
if args.mode == "pyproject-toml":
|
||||
print(
|
||||
" * Dataset: A set of top level `pyproject.toml` from GitHub projects popular in 2025. "
|
||||
+ "Only `pyproject.toml` files with a `[project]` section and static dependencies are included."
|
||||
)
|
||||
else:
|
||||
print(
|
||||
" * Dataset: The top 15k PyPI packages. A handful of pathological cases were filtered out."
|
||||
)
|
||||
print(
|
||||
" * Command: "
|
||||
+ f"`{'uv pip compile' if args.mode == 'compile' else 'uv lock'}` with `--no-build` "
|
||||
+ f"on Python {parameters['python']} "
|
||||
+ (
|
||||
"pinned to the latest package version. "
|
||||
if parameters["latest"]
|
||||
else ". "
|
||||
)
|
||||
)
|
||||
print(
|
||||
f" * Successfully resolved packages: {successful}/{total} ({successful / total:.0%}). "
|
||||
+ "Only success resolutions can be compared."
|
||||
)
|
||||
print(f" * Different packages: {len(differences)}/{successful}")
|
||||
|
||||
for (
|
||||
package,
|
||||
|
|
@ -98,10 +113,10 @@ def main():
|
|||
stderr,
|
||||
stderr_branch,
|
||||
) in differences:
|
||||
if args.project:
|
||||
context_window = 3
|
||||
else:
|
||||
if args.mode == "compile":
|
||||
context_window = 999999
|
||||
else:
|
||||
context_window = 3
|
||||
print(f"\n<details>\n<summary>{package}</summary>\n")
|
||||
if resolution != resolution_branch:
|
||||
print("```diff")
|
||||
|
|
@ -129,7 +144,7 @@ def main():
|
|||
)
|
||||
)
|
||||
print("```")
|
||||
print("</details>")
|
||||
print("</details>\n")
|
||||
else:
|
||||
for (
|
||||
package,
|
||||
|
|
@ -159,9 +174,9 @@ def main():
|
|||
)
|
||||
)
|
||||
print(
|
||||
f"Successfully resolved packages: {successful}/{total} ({successful}/{total}:.0%)"
|
||||
f"Successfully resolved packages: {successful}/{total} ({successful / total:.0%})"
|
||||
)
|
||||
print(f"Different packages: {len(differences)}/{total}")
|
||||
print(f"Different packages: {len(differences)}/{successful}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
@ -3,7 +3,8 @@
|
|||
# /// script
|
||||
# requires-python = ">=3.13"
|
||||
# dependencies = [
|
||||
# "tqdm>=4,<5",
|
||||
# "tomli-w>=1.2.0,<2.0.0",
|
||||
# "tqdm>=4.67.1,<5.0.0",
|
||||
# ]
|
||||
# ///
|
||||
|
||||
|
|
@ -16,11 +17,13 @@ import platform
|
|||
import shutil
|
||||
import subprocess
|
||||
import time
|
||||
import tomllib
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from threading import Thread
|
||||
|
||||
import tomli_w
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
cwd = Path(__file__).parent
|
||||
|
|
@ -35,63 +38,36 @@ class Summary:
|
|||
|
||||
|
||||
def run_uv(
|
||||
package: str,
|
||||
specification: str,
|
||||
uv: Path,
|
||||
project: bool,
|
||||
mode: str,
|
||||
python: str,
|
||||
cache: Path,
|
||||
offline: bool,
|
||||
package: str,
|
||||
output_dir: Path,
|
||||
version: str | None,
|
||||
output: Path,
|
||||
) -> Summary:
|
||||
"""Run a uv subprocess.
|
||||
"""Resolve in a uv subprocess.
|
||||
|
||||
The logic captures the max RSS from the process and avoids deadlocks from full
|
||||
pipes.
|
||||
"""
|
||||
package_dir = output.joinpath(package)
|
||||
package_dir.mkdir()
|
||||
command = prepare_uv_command(
|
||||
specification,
|
||||
uv,
|
||||
mode,
|
||||
cache,
|
||||
offline,
|
||||
package_dir,
|
||||
python,
|
||||
)
|
||||
|
||||
start = time.time()
|
||||
|
||||
requirement = f"{package}=={version}" if version else package
|
||||
shared_args = [
|
||||
"--no-build",
|
||||
"--cache-dir",
|
||||
cache,
|
||||
"--color",
|
||||
"never",
|
||||
]
|
||||
if offline:
|
||||
shared_args.append("--offline")
|
||||
package_dir = output_dir.joinpath(package)
|
||||
package_dir.mkdir(parents=True, exist_ok=True)
|
||||
if project:
|
||||
package_dir.joinpath("pyproject.toml").write_text(
|
||||
f"""
|
||||
[project]
|
||||
name = "testing"
|
||||
version = "0.1.0"
|
||||
requires-python = ">={python}"
|
||||
dependencies = ["{requirement}"]
|
||||
"""
|
||||
)
|
||||
cmd = [uv, "lock", *shared_args]
|
||||
else:
|
||||
cmd = [
|
||||
uv,
|
||||
"pip",
|
||||
"compile",
|
||||
"-",
|
||||
"-p",
|
||||
python,
|
||||
# The results are more reproducible if they are platform independent
|
||||
"--universal",
|
||||
"--no-header",
|
||||
"--no-annotate",
|
||||
*shared_args,
|
||||
]
|
||||
|
||||
process = subprocess.Popen(
|
||||
cmd,
|
||||
command,
|
||||
cwd=package_dir,
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
|
|
@ -99,7 +75,7 @@ def run_uv(
|
|||
text=True,
|
||||
)
|
||||
|
||||
stdout, stderr = communicate(process, requirement if not project else None)
|
||||
stdout, stderr = communicate(process, specification if mode == "compile" else None)
|
||||
|
||||
# At this point, the process is a zombie, so has called `exit()`, but we haven't reaped it with `wait4` yet.
|
||||
|
||||
|
|
@ -122,6 +98,57 @@ def run_uv(
|
|||
return summary
|
||||
|
||||
|
||||
def prepare_uv_command(
|
||||
specification: str,
|
||||
uv: Path,
|
||||
mode: str,
|
||||
cache: Path,
|
||||
offline: bool,
|
||||
package_dir: Path,
|
||||
python: str,
|
||||
) -> list[Path | str]:
|
||||
shared_args = [
|
||||
"--no-build",
|
||||
"--cache-dir",
|
||||
cache,
|
||||
"--color",
|
||||
"never",
|
||||
]
|
||||
if offline:
|
||||
shared_args.append("--offline")
|
||||
if mode == "pyproject-toml":
|
||||
package_dir.joinpath("pyproject.toml").write_text(specification)
|
||||
command = [uv, "lock", *shared_args]
|
||||
elif mode == "lock":
|
||||
package_dir.joinpath("pyproject.toml").write_text(
|
||||
f"""
|
||||
[project]
|
||||
name = "testing"
|
||||
version = "0.1.0"
|
||||
requires-python = ">={python}"
|
||||
dependencies = ["{specification}"]
|
||||
"""
|
||||
)
|
||||
command = [uv, "lock", *shared_args]
|
||||
elif mode == "compile":
|
||||
command = [
|
||||
uv,
|
||||
"pip",
|
||||
"compile",
|
||||
"-",
|
||||
"-p",
|
||||
python,
|
||||
# The results are more reproducible if they are platform independent
|
||||
"--universal",
|
||||
"--no-header",
|
||||
"--no-annotate",
|
||||
*shared_args,
|
||||
]
|
||||
else:
|
||||
raise ValueError(f"Unknown mode: {mode}")
|
||||
return command
|
||||
|
||||
|
||||
def communicate(process: subprocess.Popen, stdin: str | None) -> tuple[str, str]:
|
||||
"""Like `Popen.communicate`, but without the `os.wait` call.
|
||||
|
||||
|
|
@ -157,12 +184,18 @@ def communicate(process: subprocess.Popen, stdin: str | None) -> tuple[str, str]
|
|||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--project",
|
||||
action="store_true",
|
||||
help="Use `uv lock` instead of `uv pip compile`",
|
||||
"--input", type=Path, default=cwd.joinpath("top-pypi-packages.json")
|
||||
)
|
||||
parser.add_argument(
|
||||
"--mode",
|
||||
choices=["compile", "lock", "pyproject-toml"],
|
||||
default="compile",
|
||||
help="`compile`: `uv pip compile`, "
|
||||
"`lock`: `uv lock` from a single requirement"
|
||||
"`pyproject-toml`: `uv lock` from a directory of `pyproject.toml` files",
|
||||
)
|
||||
parser.add_argument("--python", "-p", type=str, default="3.13")
|
||||
parser.add_argument("--output-dir", type=Path, default="output")
|
||||
parser.add_argument("--output", type=Path, default="output")
|
||||
parser.add_argument("--uv", type=Path, default=Path("uv"))
|
||||
parser.add_argument("--limit", type=int, default=None)
|
||||
parser.add_argument("--cache", type=Path, default=cwd.joinpath("cache"))
|
||||
|
|
@ -170,16 +203,62 @@ def main():
|
|||
parser.add_argument("--latest", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
top_15k_pypi = json.loads(cwd.joinpath("top-pypi-packages.json").read_text())
|
||||
top_15k_pypi = [pkg["project"] for pkg in top_15k_pypi["rows"]]
|
||||
if args.mode == "pyproject-toml":
|
||||
project_tomls = sorted((file.stem, file) for file in args.input.iterdir())
|
||||
jobs = {}
|
||||
no_project = 0
|
||||
dynamic_dependencies = 0
|
||||
for package, file in project_tomls:
|
||||
if len(jobs) >= args.limit:
|
||||
break
|
||||
if file.suffix != ".toml":
|
||||
continue
|
||||
project_toml = file.read_text()
|
||||
data = tomllib.loads(project_toml)
|
||||
project = data.get("project")
|
||||
if not project:
|
||||
no_project += 1
|
||||
continue
|
||||
if dynamic := project.get("dynamic"):
|
||||
if "dependencies" in dynamic:
|
||||
dynamic_dependencies += 1
|
||||
continue
|
||||
if "version" in dynamic:
|
||||
dynamic.remove("version")
|
||||
# Usually there are no cycles back to the current project, so any version works
|
||||
project["version"] = "1.0.0"
|
||||
|
||||
if args.latest:
|
||||
with cwd.joinpath("package_versions.csv").open() as f:
|
||||
latest_versions = {
|
||||
row["package_name"]: row["latest_version"] for row in csv.DictReader(f)
|
||||
}
|
||||
jobs[package] = tomli_w.dumps(data)
|
||||
|
||||
print(f"`pyproject.toml`s without `[project]`: {no_project}")
|
||||
print(
|
||||
f"`pyproject.toml`s with `dynamic = ['dependencies']`: {dynamic_dependencies}"
|
||||
)
|
||||
if args.latest:
|
||||
raise ValueError("Latest versions are not supported in pyproject-toml mode")
|
||||
else:
|
||||
latest_versions = None
|
||||
project_names = json.loads(args.input.read_text())
|
||||
project_names = sorted(pkg["project"] for pkg in project_names["rows"])
|
||||
|
||||
if args.latest:
|
||||
with cwd.joinpath("package_versions.csv").open() as f:
|
||||
latest_versions = {
|
||||
row["package_name"]: row["latest_version"]
|
||||
for row in csv.DictReader(f)
|
||||
}
|
||||
else:
|
||||
latest_versions = None
|
||||
|
||||
jobs = {}
|
||||
for package in project_names[: args.limit]:
|
||||
if latest_versions:
|
||||
if version := latest_versions.get(package):
|
||||
jobs[package] = f"{package}=={version}"
|
||||
else:
|
||||
tqdm.write(f"Missing version: {package}")
|
||||
continue
|
||||
else:
|
||||
jobs[package] = package
|
||||
|
||||
excluded_packages = [
|
||||
# 5000 releases, no solution
|
||||
|
|
@ -188,53 +267,47 @@ def main():
|
|||
"tf-models-nightly",
|
||||
"mtmtrain",
|
||||
"llm-dialog-manager",
|
||||
"python-must",
|
||||
# Slow and have no solution
|
||||
"edx-enterprise",
|
||||
"kcli",
|
||||
"emmet-api",
|
||||
]
|
||||
for package in excluded_packages:
|
||||
top_15k_pypi.remove(package)
|
||||
jobs.pop(package, None)
|
||||
|
||||
if args.output_dir.exists():
|
||||
shutil.rmtree(args.output_dir)
|
||||
args.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
args.output_dir.joinpath(".gitignore").write_text("*")
|
||||
if args.output.exists():
|
||||
shutil.rmtree(args.output)
|
||||
args.output.mkdir(parents=True)
|
||||
args.output.joinpath(".gitignore").write_text("*")
|
||||
parameters = {
|
||||
"project": args.project,
|
||||
"mode": args.mode,
|
||||
"python": args.python,
|
||||
"latest": args.latest,
|
||||
}
|
||||
args.output_dir.joinpath("parameters.json").write_text(json.dumps(parameters))
|
||||
args.output.joinpath("parameters.json").write_text(json.dumps(parameters))
|
||||
|
||||
success = 0
|
||||
all_results = [] # Track all results for analysis
|
||||
max_package_len = max(len(package) for package in top_15k_pypi[: args.limit])
|
||||
max_package_len = max(len(package) for package in jobs)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=os.cpu_count() * 2) as executor:
|
||||
tasks = []
|
||||
packages_pending = []
|
||||
for package in top_15k_pypi[: args.limit]:
|
||||
if latest_versions:
|
||||
if version := latest_versions.get(package):
|
||||
pass
|
||||
else:
|
||||
tqdm.write(f"Missing version: {package}")
|
||||
continue
|
||||
else:
|
||||
version = None
|
||||
for package, specification in jobs.items():
|
||||
packages_pending.append(package)
|
||||
|
||||
tasks.append(
|
||||
executor.submit(
|
||||
run_uv,
|
||||
package,
|
||||
specification,
|
||||
args.uv,
|
||||
args.project,
|
||||
args.mode,
|
||||
args.python,
|
||||
args.cache,
|
||||
args.offline,
|
||||
package,
|
||||
args.output_dir,
|
||||
version,
|
||||
args.output,
|
||||
)
|
||||
)
|
||||
total = len(packages_pending)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,107 @@
|
|||
# /// script
|
||||
# requires-python = ">=3.13"
|
||||
# dependencies = [
|
||||
# "httpx>=0.28.1,<0.29.0",
|
||||
# "tqdm>=4.67.1,<5.0.0",
|
||||
# ]
|
||||
# ///
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import csv
|
||||
import shutil
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
from httpx import AsyncClient
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
|
||||
@dataclass
|
||||
class Repository:
|
||||
org: str
|
||||
repo: str
|
||||
ref: str
|
||||
|
||||
|
||||
async def fetch_pyproject(
|
||||
client: AsyncClient, repository: Repository, output_dir: Path
|
||||
):
|
||||
url = f"https://raw.githubusercontent.com/{repository.org}/{repository.repo}/{repository.ref}/pyproject.toml"
|
||||
try:
|
||||
response = await client.get(url)
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPError as e:
|
||||
# The bigquery data is sometimes missing the master -> main transition
|
||||
url = f"https://raw.githubusercontent.com/{repository.org}/{repository.repo}/refs/heads/main/pyproject.toml"
|
||||
try:
|
||||
response = await client.get(url)
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPError:
|
||||
# Ignore the error from the main fallback if it didn't work
|
||||
if hasattr(e, "response") and e.response.status_code == 404:
|
||||
tqdm.write(
|
||||
f"Not found: https://github.com/{repository.org}/{repository.repo}"
|
||||
)
|
||||
else:
|
||||
tqdm.write(
|
||||
f"Error for https://github.com/{repository.org}/{repository.repo}: {e}"
|
||||
)
|
||||
return None
|
||||
|
||||
output_dir.joinpath(f"{repository.repo}.toml").write_text(response.text)
|
||||
return True
|
||||
|
||||
|
||||
async def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--input", type=Path, default=Path("top500_2025_gh_stars.csv"))
|
||||
parser.add_argument("--output", type=Path, default=Path("pyproject_toml"))
|
||||
args = parser.parse_args()
|
||||
|
||||
with args.input.open() as f:
|
||||
repositories = []
|
||||
seen = set()
|
||||
for row in csv.DictReader(f):
|
||||
if row["repo_name"] in seen:
|
||||
continue
|
||||
seen.add(row["repo_name"])
|
||||
repositories.append(
|
||||
Repository(
|
||||
org=row["repo_name"].split("/")[0],
|
||||
repo=row["repo_name"].split("/")[1],
|
||||
ref=row["ref"],
|
||||
)
|
||||
)
|
||||
|
||||
if args.output.exists():
|
||||
shutil.rmtree(args.output)
|
||||
args.output.mkdir(parents=True)
|
||||
args.output.joinpath(".gitignore").write_text("*")
|
||||
|
||||
semaphore = asyncio.Semaphore(50)
|
||||
|
||||
async def fetch_with_semaphore(
|
||||
client: AsyncClient, repository: Repository, output_dir: Path
|
||||
):
|
||||
async with semaphore:
|
||||
return await fetch_pyproject(client, repository, output_dir)
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
with tqdm(total=len(repositories)) as pbar:
|
||||
tasks = [
|
||||
fetch_with_semaphore(client, repository, args.output)
|
||||
for repository in repositories
|
||||
]
|
||||
results = []
|
||||
for future in asyncio.as_completed(tasks):
|
||||
results.append(await future)
|
||||
pbar.update(1)
|
||||
|
||||
success = sum(1 for result in results if result is True)
|
||||
print(f"Successes: {success}/{len(repositories)}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
script_dir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||
limit=50000
|
||||
|
||||
uv run $script_dir/ecosystem_testing.py --uv $1 --mode compile --output $script_dir/base-compile --limit $limit
|
||||
uv run $script_dir/ecosystem_testing.py --uv $2 --mode compile --output $script_dir/branch-compile --limit $limit
|
||||
uv run $script_dir/ecosystem_testing.py --uv $1 --mode lock --output $script_dir/base-lock --limit $limit
|
||||
uv run $script_dir/ecosystem_testing.py --uv $2 --mode lock --output $script_dir/branch-lock --limit $limit
|
||||
uv run $script_dir/ecosystem_testing.py --uv $1 --mode pyproject-toml --input $script_dir/pyproject_toml --output $script_dir/base-pyproject-toml --limit $limit
|
||||
uv run $script_dir/ecosystem_testing.py --uv $2 --mode pyproject-toml --input $script_dir/pyproject_toml --output $script_dir/branch-pyproject-toml --limit $limit
|
||||
|
||||
rm $script_dir/report.md
|
||||
uv run $script_dir/create_report.py $script_dir/base-compile $script_dir/branch-compile --mode compile --markdown >> $script_dir/report.md
|
||||
uv run $script_dir/create_report.py $script_dir/base-lock $script_dir/branch-lock --mode lock --markdown >> $script_dir/report.md
|
||||
uv run $script_dir/create_report.py $script_dir/base-pyproject-toml $script_dir/branch-pyproject-toml --mode pyproject-toml --markdown >> $script_dir/report.md
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,20 @@
|
|||
# BigQuery SQL for top5k_pyproject_toml_2025_gh_stars.csv
|
||||
# Run in https://console.cloud.google.com/bigquery
|
||||
SELECT
|
||||
f.repo_name,
|
||||
f.ref,
|
||||
COUNT(e.id) AS stars
|
||||
FROM
|
||||
`bigquery-public-data.github_repos.files` f
|
||||
JOIN
|
||||
`githubarchive.month.2025*` e
|
||||
ON
|
||||
f.repo_name = e.repo.name
|
||||
WHERE
|
||||
f.path = 'pyproject.toml'
|
||||
AND e.type = 'WatchEvent'
|
||||
GROUP BY
|
||||
f.repo_name, f.ref
|
||||
ORDER BY
|
||||
stars DESC
|
||||
LIMIT 5000;
|
||||
Loading…
Reference in New Issue