mirror of https://github.com/astral-sh/uv
This commit is contained in:
parent
835753242b
commit
88681c97b5
|
|
@ -33,10 +33,56 @@ class Summary:
|
|||
time: float
|
||||
|
||||
|
||||
def communicate(process: subprocess.Popen, stdin: str) -> tuple[str, str]:
|
||||
"""We have `Popen.communicate` at home.
|
||||
def run_uv(
|
||||
cmd: list[str], package: str, output_dir: Path, version: str | None
|
||||
) -> Summary:
|
||||
"""Run a uv subprocess.
|
||||
|
||||
Start threads to drain the pipes to avoid deadlocks on full pipes, but don't use
|
||||
The logic captures the max RSS from the process and avoids deadlocks from full
|
||||
pipes.
|
||||
"""
|
||||
|
||||
start = time.time()
|
||||
|
||||
process = subprocess.Popen(
|
||||
cmd,
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
)
|
||||
|
||||
stdin = f"{package}=={version}" if version else package
|
||||
|
||||
stdout, stderr = communicate(process, stdin)
|
||||
|
||||
# At this point, the process is a zombie, so has called `exit()`, but we haven't reaped it with `wait4` yet.
|
||||
|
||||
# rusage is only available on unix
|
||||
if os.name == "posix":
|
||||
# Wait for process and get resource usage
|
||||
_pid, exit_code, rusage = os.wait4(process.pid, 0)
|
||||
else:
|
||||
exit_code = process.wait()
|
||||
rusage = None
|
||||
|
||||
max_rss = rusage.ru_maxrss if rusage else 0
|
||||
|
||||
package_dir = output_dir.joinpath(package)
|
||||
package_dir.mkdir(parents=True, exist_ok=True)
|
||||
package_dir.joinpath("stdout.txt").write_text(stdout)
|
||||
package_dir.joinpath("stderr.txt").write_text(stderr)
|
||||
summary = Summary(
|
||||
package=package, exit_code=exit_code, max_rss=max_rss, time=time.time() - start
|
||||
)
|
||||
package_dir.joinpath("summary.json").write_text(json.dumps(summary.__dict__))
|
||||
return summary
|
||||
|
||||
|
||||
def communicate(process: subprocess.Popen, stdin: str) -> tuple[str, str]:
|
||||
"""Like `Popen.communicate`, but without the `os.wait` call.
|
||||
|
||||
Start threads to drain the pipes to avoid blocking on full pipes, but don't use
|
||||
libc's `wait` so we can use `os.wait4` later.
|
||||
"""
|
||||
process.stdin.write(stdin)
|
||||
|
|
@ -64,46 +110,6 @@ def communicate(process: subprocess.Popen, stdin: str) -> tuple[str, str]:
|
|||
return stdout[0], stderr[0]
|
||||
|
||||
|
||||
def run_uv(
|
||||
cmd: list[str], package: str, output_dir: Path, version: str | None
|
||||
) -> Summary:
|
||||
"""Run a uv subprocess.
|
||||
|
||||
The logic captures the max RSS from the process and avoids deadlocks from full
|
||||
pipes."""
|
||||
|
||||
start = time.time()
|
||||
|
||||
process = subprocess.Popen(
|
||||
cmd,
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
)
|
||||
|
||||
stdin = f"{package}=={version}" if version else package
|
||||
|
||||
stdout, stderr = communicate(process, stdin)
|
||||
|
||||
# At this point, the process is a zombie, so has called `exit()`, but we haven't reaped it with `wait4` yet.
|
||||
|
||||
# Wait for process and get resource usage
|
||||
_pid, exit_code, rusage = os.wait4(process.pid, 0)
|
||||
|
||||
max_rss = rusage.ru_maxrss
|
||||
|
||||
package_dir = output_dir.joinpath(package)
|
||||
package_dir.mkdir(parents=True, exist_ok=True)
|
||||
package_dir.joinpath("stdout.txt").write_text(stdout)
|
||||
package_dir.joinpath("stderr.txt").write_text(stderr)
|
||||
summary = Summary(
|
||||
package=package, exit_code=exit_code, max_rss=max_rss, time=time.time() - start
|
||||
)
|
||||
package_dir.joinpath("summary.json").write_text(json.dumps(summary.__dict__))
|
||||
return summary
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--python", "-p", type=str, default="3.13")
|
||||
|
|
@ -119,27 +125,27 @@ def main():
|
|||
top_15k_pypi = [pkg["project"] for pkg in top_15k_pypi["rows"]]
|
||||
|
||||
if args.latest:
|
||||
latest_versions = cwd.joinpath("package_versions.csv").read_text()
|
||||
latest_versions = {
|
||||
row["package_name"]: row["latest_version"]
|
||||
for row in csv.DictReader(latest_versions.splitlines())
|
||||
}
|
||||
with cwd.joinpath("package_versions.csv").open() as f:
|
||||
latest_versions = {
|
||||
row["package_name"]: row["latest_version"] for row in csv.DictReader(f)
|
||||
}
|
||||
else:
|
||||
latest_versions = None
|
||||
|
||||
# 5000 releases, no solution
|
||||
top_15k_pypi.remove("nucliadb")
|
||||
# Remove slow packages
|
||||
for slow in [
|
||||
excluded_packages = [
|
||||
# 5000 releases, no solution
|
||||
"nucliadb",
|
||||
# These packages have many non-small versions
|
||||
"tf-models-nightly",
|
||||
"mtmtrain",
|
||||
"llm-dialog-manager",
|
||||
"edx-enterprise", # Doesn't solve
|
||||
# Slow and have no solution
|
||||
"edx-enterprise",
|
||||
"kcli",
|
||||
"emmet-api",
|
||||
]:
|
||||
top_15k_pypi.remove(slow)
|
||||
]
|
||||
for package in excluded_packages:
|
||||
top_15k_pypi.remove(package)
|
||||
|
||||
output_dir = cwd.joinpath(args.output_dir)
|
||||
if output_dir.exists():
|
||||
|
|
@ -183,8 +189,8 @@ def main():
|
|||
version = None
|
||||
packages_pending.append(package)
|
||||
tasks.append(executor.submit(run_uv, cmd, package, output_dir, version))
|
||||
|
||||
total = len(packages_pending)
|
||||
|
||||
with tqdm(total=total) as progress_bar:
|
||||
for result in concurrent.futures.as_completed(tasks):
|
||||
summary = result.result()
|
||||
|
|
@ -199,15 +205,9 @@ def main():
|
|||
if summary.exit_code == 0:
|
||||
success += 1
|
||||
|
||||
print(f"Success: {success}/{total}")
|
||||
print(f"Success: {success}/{total} ({success / total:.0%})")
|
||||
|
||||
successes = [summary for summary in all_results if summary.exit_code == 0]
|
||||
print("\n# top 5 max RSS for successes")
|
||||
largest_rss = sorted(successes, key=lambda x: x.max_rss, reverse=True)[:5]
|
||||
for summary in largest_rss:
|
||||
print(
|
||||
f"{summary.package}: {summary.max_rss / 1024:.1f} MB (exit code: {summary.exit_code})"
|
||||
)
|
||||
|
||||
print("\n# top 5 slowest resolutions for successes")
|
||||
slowest = sorted(successes, key=lambda x: x.time, reverse=True)[:5]
|
||||
|
|
@ -216,6 +216,15 @@ def main():
|
|||
f"{summary.package}: {summary.time:.2f}s (exit code: {summary.exit_code})"
|
||||
)
|
||||
|
||||
if os.name == "posix":
|
||||
print("\n# top 5 max RSS for successes")
|
||||
largest_rss = sorted(successes, key=lambda x: x.max_rss, reverse=True)[:5]
|
||||
for summary in largest_rss:
|
||||
# Only linux, max RSS is in KB
|
||||
print(
|
||||
f"{summary.package}: {summary.max_rss / 1024:.1f} MB (exit code: {summary.exit_code})"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
|||
Loading…
Reference in New Issue