uv/scripts/ecosystem-testing/get_latest_versions.py

67 lines
1.9 KiB
Python

#!/usr/bin/env python3
# NB: LLM code ahead
# /// script
# requires-python = ">=3.13"
# dependencies = ["httpx", "orjson", "tqdm"]
# ///
import asyncio
import csv
from pathlib import Path
import httpx
import orjson
from tqdm.asyncio import tqdm
async def get_latest_version(
client: httpx.AsyncClient, package_name: str
) -> tuple[str, str | None]:
try:
response = await client.get(f"https://pypi.org/pypi/{package_name}/json")
if response.status_code == 200:
data = orjson.loads(response.content)
return package_name, data["info"]["version"]
else:
return package_name, None
except Exception:
return package_name, None
async def main() -> None:
input_file = Path("scripts/ecosystem-testing/top-pypi-packages.csv")
# Read package names
with open(input_file) as f:
package_names: list[str] = [row["project"] for row in csv.DictReader(f)]
print(f"Processing {len(package_names)} packages...")
# Fetch versions concurrently
results: dict[str, str | None] = {}
async with httpx.AsyncClient() as client:
semaphore = asyncio.Semaphore(50)
async def fetch(pkg: str) -> tuple[str, str | None]:
async with semaphore:
return await get_latest_version(client, pkg)
tasks = [fetch(pkg) for pkg in package_names]
for future in tqdm(asyncio.as_completed(tasks), total=len(package_names)):
name, version = await future
results[name] = version
# Write results
with open("package_versions.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerow(["package_name", "latest_version"])
for name in package_names:
writer.writerow([name, results.get(name, "")])
success_count = sum(1 for v in results.values() if v)
print(f"Completed: {success_count}/{len(package_names)} successful")
asyncio.run(main())