mirror of https://github.com/astral-sh/uv
This commit is contained in:
parent
835753242b
commit
88681c97b5
|
|
@ -33,10 +33,56 @@ class Summary:
|
||||||
time: float
|
time: float
|
||||||
|
|
||||||
|
|
||||||
def communicate(process: subprocess.Popen, stdin: str) -> tuple[str, str]:
|
def run_uv(
|
||||||
"""We have `Popen.communicate` at home.
|
cmd: list[str], package: str, output_dir: Path, version: str | None
|
||||||
|
) -> Summary:
|
||||||
|
"""Run a uv subprocess.
|
||||||
|
|
||||||
Start threads to drain the pipes to avoid deadlocks on full pipes, but don't use
|
The logic captures the max RSS from the process and avoids deadlocks from full
|
||||||
|
pipes.
|
||||||
|
"""
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
|
||||||
|
process = subprocess.Popen(
|
||||||
|
cmd,
|
||||||
|
stdin=subprocess.PIPE,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE,
|
||||||
|
text=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
stdin = f"{package}=={version}" if version else package
|
||||||
|
|
||||||
|
stdout, stderr = communicate(process, stdin)
|
||||||
|
|
||||||
|
# At this point, the process is a zombie, so has called `exit()`, but we haven't reaped it with `wait4` yet.
|
||||||
|
|
||||||
|
# rusage is only available on unix
|
||||||
|
if os.name == "posix":
|
||||||
|
# Wait for process and get resource usage
|
||||||
|
_pid, exit_code, rusage = os.wait4(process.pid, 0)
|
||||||
|
else:
|
||||||
|
exit_code = process.wait()
|
||||||
|
rusage = None
|
||||||
|
|
||||||
|
max_rss = rusage.ru_maxrss if rusage else 0
|
||||||
|
|
||||||
|
package_dir = output_dir.joinpath(package)
|
||||||
|
package_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
package_dir.joinpath("stdout.txt").write_text(stdout)
|
||||||
|
package_dir.joinpath("stderr.txt").write_text(stderr)
|
||||||
|
summary = Summary(
|
||||||
|
package=package, exit_code=exit_code, max_rss=max_rss, time=time.time() - start
|
||||||
|
)
|
||||||
|
package_dir.joinpath("summary.json").write_text(json.dumps(summary.__dict__))
|
||||||
|
return summary
|
||||||
|
|
||||||
|
|
||||||
|
def communicate(process: subprocess.Popen, stdin: str) -> tuple[str, str]:
|
||||||
|
"""Like `Popen.communicate`, but without the `os.wait` call.
|
||||||
|
|
||||||
|
Start threads to drain the pipes to avoid blocking on full pipes, but don't use
|
||||||
libc's `wait` so we can use `os.wait4` later.
|
libc's `wait` so we can use `os.wait4` later.
|
||||||
"""
|
"""
|
||||||
process.stdin.write(stdin)
|
process.stdin.write(stdin)
|
||||||
|
|
@ -64,46 +110,6 @@ def communicate(process: subprocess.Popen, stdin: str) -> tuple[str, str]:
|
||||||
return stdout[0], stderr[0]
|
return stdout[0], stderr[0]
|
||||||
|
|
||||||
|
|
||||||
def run_uv(
|
|
||||||
cmd: list[str], package: str, output_dir: Path, version: str | None
|
|
||||||
) -> Summary:
|
|
||||||
"""Run a uv subprocess.
|
|
||||||
|
|
||||||
The logic captures the max RSS from the process and avoids deadlocks from full
|
|
||||||
pipes."""
|
|
||||||
|
|
||||||
start = time.time()
|
|
||||||
|
|
||||||
process = subprocess.Popen(
|
|
||||||
cmd,
|
|
||||||
stdin=subprocess.PIPE,
|
|
||||||
stdout=subprocess.PIPE,
|
|
||||||
stderr=subprocess.PIPE,
|
|
||||||
text=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
stdin = f"{package}=={version}" if version else package
|
|
||||||
|
|
||||||
stdout, stderr = communicate(process, stdin)
|
|
||||||
|
|
||||||
# At this point, the process is a zombie, so has called `exit()`, but we haven't reaped it with `wait4` yet.
|
|
||||||
|
|
||||||
# Wait for process and get resource usage
|
|
||||||
_pid, exit_code, rusage = os.wait4(process.pid, 0)
|
|
||||||
|
|
||||||
max_rss = rusage.ru_maxrss
|
|
||||||
|
|
||||||
package_dir = output_dir.joinpath(package)
|
|
||||||
package_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
package_dir.joinpath("stdout.txt").write_text(stdout)
|
|
||||||
package_dir.joinpath("stderr.txt").write_text(stderr)
|
|
||||||
summary = Summary(
|
|
||||||
package=package, exit_code=exit_code, max_rss=max_rss, time=time.time() - start
|
|
||||||
)
|
|
||||||
package_dir.joinpath("summary.json").write_text(json.dumps(summary.__dict__))
|
|
||||||
return summary
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("--python", "-p", type=str, default="3.13")
|
parser.add_argument("--python", "-p", type=str, default="3.13")
|
||||||
|
|
@ -119,27 +125,27 @@ def main():
|
||||||
top_15k_pypi = [pkg["project"] for pkg in top_15k_pypi["rows"]]
|
top_15k_pypi = [pkg["project"] for pkg in top_15k_pypi["rows"]]
|
||||||
|
|
||||||
if args.latest:
|
if args.latest:
|
||||||
latest_versions = cwd.joinpath("package_versions.csv").read_text()
|
with cwd.joinpath("package_versions.csv").open() as f:
|
||||||
latest_versions = {
|
latest_versions = {
|
||||||
row["package_name"]: row["latest_version"]
|
row["package_name"]: row["latest_version"] for row in csv.DictReader(f)
|
||||||
for row in csv.DictReader(latest_versions.splitlines())
|
}
|
||||||
}
|
|
||||||
else:
|
else:
|
||||||
latest_versions = None
|
latest_versions = None
|
||||||
|
|
||||||
# 5000 releases, no solution
|
excluded_packages = [
|
||||||
top_15k_pypi.remove("nucliadb")
|
# 5000 releases, no solution
|
||||||
# Remove slow packages
|
"nucliadb",
|
||||||
for slow in [
|
|
||||||
# These packages have many non-small versions
|
# These packages have many non-small versions
|
||||||
"tf-models-nightly",
|
"tf-models-nightly",
|
||||||
"mtmtrain",
|
"mtmtrain",
|
||||||
"llm-dialog-manager",
|
"llm-dialog-manager",
|
||||||
"edx-enterprise", # Doesn't solve
|
# Slow and have no solution
|
||||||
|
"edx-enterprise",
|
||||||
"kcli",
|
"kcli",
|
||||||
"emmet-api",
|
"emmet-api",
|
||||||
]:
|
]
|
||||||
top_15k_pypi.remove(slow)
|
for package in excluded_packages:
|
||||||
|
top_15k_pypi.remove(package)
|
||||||
|
|
||||||
output_dir = cwd.joinpath(args.output_dir)
|
output_dir = cwd.joinpath(args.output_dir)
|
||||||
if output_dir.exists():
|
if output_dir.exists():
|
||||||
|
|
@ -183,8 +189,8 @@ def main():
|
||||||
version = None
|
version = None
|
||||||
packages_pending.append(package)
|
packages_pending.append(package)
|
||||||
tasks.append(executor.submit(run_uv, cmd, package, output_dir, version))
|
tasks.append(executor.submit(run_uv, cmd, package, output_dir, version))
|
||||||
|
|
||||||
total = len(packages_pending)
|
total = len(packages_pending)
|
||||||
|
|
||||||
with tqdm(total=total) as progress_bar:
|
with tqdm(total=total) as progress_bar:
|
||||||
for result in concurrent.futures.as_completed(tasks):
|
for result in concurrent.futures.as_completed(tasks):
|
||||||
summary = result.result()
|
summary = result.result()
|
||||||
|
|
@ -199,15 +205,9 @@ def main():
|
||||||
if summary.exit_code == 0:
|
if summary.exit_code == 0:
|
||||||
success += 1
|
success += 1
|
||||||
|
|
||||||
print(f"Success: {success}/{total}")
|
print(f"Success: {success}/{total} ({success / total:.0%})")
|
||||||
|
|
||||||
successes = [summary for summary in all_results if summary.exit_code == 0]
|
successes = [summary for summary in all_results if summary.exit_code == 0]
|
||||||
print("\n# top 5 max RSS for successes")
|
|
||||||
largest_rss = sorted(successes, key=lambda x: x.max_rss, reverse=True)[:5]
|
|
||||||
for summary in largest_rss:
|
|
||||||
print(
|
|
||||||
f"{summary.package}: {summary.max_rss / 1024:.1f} MB (exit code: {summary.exit_code})"
|
|
||||||
)
|
|
||||||
|
|
||||||
print("\n# top 5 slowest resolutions for successes")
|
print("\n# top 5 slowest resolutions for successes")
|
||||||
slowest = sorted(successes, key=lambda x: x.time, reverse=True)[:5]
|
slowest = sorted(successes, key=lambda x: x.time, reverse=True)[:5]
|
||||||
|
|
@ -216,6 +216,15 @@ def main():
|
||||||
f"{summary.package}: {summary.time:.2f}s (exit code: {summary.exit_code})"
|
f"{summary.package}: {summary.time:.2f}s (exit code: {summary.exit_code})"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if os.name == "posix":
|
||||||
|
print("\n# top 5 max RSS for successes")
|
||||||
|
largest_rss = sorted(successes, key=lambda x: x.max_rss, reverse=True)[:5]
|
||||||
|
for summary in largest_rss:
|
||||||
|
# Only linux, max RSS is in KB
|
||||||
|
print(
|
||||||
|
f"{summary.package}: {summary.max_rss / 1024:.1f} MB (exit code: {summary.exit_code})"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue