This commit is contained in:
konstin 2025-09-21 20:29:53 +02:00
parent a962d41c1e
commit 66801d01f1
1 changed files with 45 additions and 54 deletions

View File

@ -10,11 +10,14 @@
import argparse import argparse
import concurrent import concurrent
import csv import csv
import functools
import json import json
import multiprocessing
import os import os
import shutil import shutil
import signal import signal
import subprocess import subprocess
import sys
import time import time
from collections import deque from collections import deque
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
@ -26,22 +29,6 @@ from tqdm.auto import tqdm
cwd = Path(__file__).parent cwd = Path(__file__).parent
# Global flag to track ctrl+c
should_stop = False
def signal_handler(signum, frame):
"""Handle Ctrl+C gracefully."""
global should_stop
print("\nReceived interrupt signal. Stopping gracefully...")
should_stop = True
# Send SIGTERM to all processes in our process group
try:
os.killpg(0, signal.SIGTERM)
except ProcessLookupError:
pass
@dataclass @dataclass
class Summary: class Summary:
@ -54,6 +41,11 @@ class Summary:
def run_uv( def run_uv(
cmd: list[str], package: str, output_dir: Path, version: str | None cmd: list[str], package: str, output_dir: Path, version: str | None
) -> Summary: ) -> Summary:
"""Run a uv subprocess.
The logic captures the max RSS from the process and avoids deadlocks from full
pipes."""
start = time.time() start = time.time()
process = subprocess.Popen( process = subprocess.Popen(
@ -92,7 +84,6 @@ def run_uv(
# Wait for process and get resource usage # Wait for process and get resource usage
_pid, exit_code, rusage = os.wait4(process.pid, 0) _pid, exit_code, rusage = os.wait4(process.pid, 0)
# Wait for threads to finish reading
stdout_thread.join() stdout_thread.join()
stderr_thread.join() stderr_thread.join()
@ -108,18 +99,21 @@ def run_uv(
summary = Summary( summary = Summary(
package=package, exit_code=exit_code, max_rss=max_rss, time=time.time() - start package=package, exit_code=exit_code, max_rss=max_rss, time=time.time() - start
) )
# if max_rss > 1000 * 1024:
# print(f"{package} exit code:{exit_code}, {max_rss / 1024:.0f} MB")
package_dir.joinpath("summary.json").write_text(json.dumps(summary.__dict__)) package_dir.joinpath("summary.json").write_text(json.dumps(summary.__dict__))
return summary return summary
def main(): def signal_handler(executor: ThreadPoolExecutor, signum, frame):
# Register signal handler for Ctrl+C """Handle Ctrl+C gracefully."""
signal.signal(signal.SIGINT, signal_handler) print(f"Stopping for SIGINT (signal {signum})")
executor.shutdown(wait=False, cancel_futures=True)
print("Stopped.")
sys.exit(1)
def main():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--python", type=str, default="3.13") parser.add_argument("--python", "-p", type=str, default="3.13")
parser.add_argument("--output-dir", type=Path, default="output") parser.add_argument("--output-dir", type=Path, default="output")
parser.add_argument("--uv", type=Path, default=Path("uv")) parser.add_argument("--uv", type=Path, default=Path("uv"))
parser.add_argument("--limit", type=int, default=None) parser.add_argument("--limit", type=int, default=None)
@ -164,9 +158,10 @@ def main():
args.uv, args.uv,
"pip", "pip",
"compile", "compile",
"-",
"-p", "-p",
args.python, args.python,
"-", "--universal",
"--no-build", "--no-build",
"--cache-dir", "--cache-dir",
args.cache, args.cache,
@ -179,59 +174,55 @@ def main():
cmd.append("--offline") cmd.append("--offline")
success = 0 success = 0
all_results = [] # Track all results for analysis all_results = [] # Track all results for analysis
interrupted = False
max_package_len = max(len(package) for package in top_15k_pypi[: args.limit]) max_package_len = max(len(package) for package in top_15k_pypi[: args.limit])
try: with ThreadPoolExecutor(max_workers=os.cpu_count() * 2) as executor:
with ThreadPoolExecutor(max_workers=os.cpu_count() * 2) as executor: # Shutdown executor on ctrl+c
tasks = [] previous_sigint_handler = signal.signal(
packages_pending = [] signal.SIGINT, functools.partial(signal_handler, executor)
for package in top_15k_pypi[: args.limit]: )
if latest_versions:
if version := latest_versions.get(package): tasks = []
pass packages_pending = []
else: for package in top_15k_pypi[: args.limit]:
tqdm.write(f"Missing version: {package}") if latest_versions:
continue if version := latest_versions.get(package):
pass
else: else:
version = None tqdm.write(f"Missing version: {package}")
tasks.append(executor.submit(run_uv, cmd, package, output_dir, version)) continue
packages_pending.append(package) else:
version = None
progress_bar = tqdm(total=len(packages_pending)) packages_pending.append(package)
tasks.append(executor.submit(run_uv, cmd, package, output_dir, version))
total = len(packages_pending)
with tqdm(total=total) as progress_bar:
for result in concurrent.futures.as_completed(tasks): for result in concurrent.futures.as_completed(tasks):
summary = result.result() summary = result.result()
all_results.append(summary) # Collect all results all_results.append(summary)
progress_bar.update(1) progress_bar.update(1)
packages_pending.remove(summary.package) packages_pending.remove(summary.package)
if len(packages_pending) > 0: if packages_pending:
progress_bar.set_postfix_str( progress_bar.set_postfix_str(
f"{packages_pending[0]:>{max_package_len}}" f"{packages_pending[0]:>{max_package_len}}"
) )
if summary.exit_code == 0: if summary.exit_code == 0:
success += 1 success += 1
progress_bar.close() signal.signal(signal.SIGINT, previous_sigint_handler)
except KeyboardInterrupt: print(f"Success: {success}/{total}")
print("\nInterrupted. Cleaning up...")
interrupted = True
if interrupted or should_stop:
print(f"Interrupted. Success: {success}/{len(all_results)} (completed tasks)")
else:
print(f"Success: {success}/{len(top_15k_pypi[: args.limit])}")
successes = [summary for summary in all_results if summary.exit_code == 0] successes = [summary for summary in all_results if summary.exit_code == 0]
print("\ntop 5 max RSS") print("\n# top 5 max RSS for successes")
largest_rss = sorted(successes, key=lambda x: x.max_rss, reverse=True)[:5] largest_rss = sorted(successes, key=lambda x: x.max_rss, reverse=True)[:5]
for summary in largest_rss: for summary in largest_rss:
print( print(
f"{summary.package}: {summary.max_rss / 1024:.1f} MB (exit code: {summary.exit_code})" f"{summary.package}: {summary.max_rss / 1024:.1f} MB (exit code: {summary.exit_code})"
) )
print("\ntop 5 slowest resolutions") print("\n# top 5 slowest resolutions for successes")
slowest = sorted(successes, key=lambda x: x.time, reverse=True)[:5] slowest = sorted(successes, key=lambda x: x.time, reverse=True)[:5]
for summary in slowest: for summary in slowest:
print( print(