From 88681c97b50a27b3675a6aa66615b1a20a5f1c91 Mon Sep 17 00:00:00 2001 From: konstin Date: Sun, 21 Sep 2025 22:18:58 +0200 Subject: [PATCH] . --- .../ecosystem-testing/ecosystem_testing.py | 135 ++++++++++-------- 1 file changed, 72 insertions(+), 63 deletions(-) diff --git a/scripts/ecosystem-testing/ecosystem_testing.py b/scripts/ecosystem-testing/ecosystem_testing.py index 622d129ef..0058f4790 100644 --- a/scripts/ecosystem-testing/ecosystem_testing.py +++ b/scripts/ecosystem-testing/ecosystem_testing.py @@ -33,10 +33,56 @@ class Summary: time: float -def communicate(process: subprocess.Popen, stdin: str) -> tuple[str, str]: - """We have `Popen.communicate` at home. +def run_uv( + cmd: list[str], package: str, output_dir: Path, version: str | None +) -> Summary: + """Run a uv subprocess. - Start threads to drain the pipes to avoid deadlocks on full pipes, but don't use + The logic captures the max RSS from the process and avoids deadlocks from full + pipes. + """ + + start = time.time() + + process = subprocess.Popen( + cmd, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + + stdin = f"{package}=={version}" if version else package + + stdout, stderr = communicate(process, stdin) + + # At this point, the process is a zombie, so has called `exit()`, but we haven't reaped it with `wait4` yet. + + # rusage is only available on unix + if os.name == "posix": + # Wait for process and get resource usage + _pid, exit_code, rusage = os.wait4(process.pid, 0) + else: + exit_code = process.wait() + rusage = None + + max_rss = rusage.ru_maxrss if rusage else 0 + + package_dir = output_dir.joinpath(package) + package_dir.mkdir(parents=True, exist_ok=True) + package_dir.joinpath("stdout.txt").write_text(stdout) + package_dir.joinpath("stderr.txt").write_text(stderr) + summary = Summary( + package=package, exit_code=exit_code, max_rss=max_rss, time=time.time() - start + ) + package_dir.joinpath("summary.json").write_text(json.dumps(summary.__dict__)) + return summary + + +def communicate(process: subprocess.Popen, stdin: str) -> tuple[str, str]: + """Like `Popen.communicate`, but without the `os.wait` call. + + Start threads to drain the pipes to avoid blocking on full pipes, but don't use libc's `wait` so we can use `os.wait4` later. """ process.stdin.write(stdin) @@ -64,46 +110,6 @@ def communicate(process: subprocess.Popen, stdin: str) -> tuple[str, str]: return stdout[0], stderr[0] -def run_uv( - cmd: list[str], package: str, output_dir: Path, version: str | None -) -> Summary: - """Run a uv subprocess. - - The logic captures the max RSS from the process and avoids deadlocks from full - pipes.""" - - start = time.time() - - process = subprocess.Popen( - cmd, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - ) - - stdin = f"{package}=={version}" if version else package - - stdout, stderr = communicate(process, stdin) - - # At this point, the process is a zombie, so has called `exit()`, but we haven't reaped it with `wait4` yet. - - # Wait for process and get resource usage - _pid, exit_code, rusage = os.wait4(process.pid, 0) - - max_rss = rusage.ru_maxrss - - package_dir = output_dir.joinpath(package) - package_dir.mkdir(parents=True, exist_ok=True) - package_dir.joinpath("stdout.txt").write_text(stdout) - package_dir.joinpath("stderr.txt").write_text(stderr) - summary = Summary( - package=package, exit_code=exit_code, max_rss=max_rss, time=time.time() - start - ) - package_dir.joinpath("summary.json").write_text(json.dumps(summary.__dict__)) - return summary - - def main(): parser = argparse.ArgumentParser() parser.add_argument("--python", "-p", type=str, default="3.13") @@ -119,27 +125,27 @@ def main(): top_15k_pypi = [pkg["project"] for pkg in top_15k_pypi["rows"]] if args.latest: - latest_versions = cwd.joinpath("package_versions.csv").read_text() - latest_versions = { - row["package_name"]: row["latest_version"] - for row in csv.DictReader(latest_versions.splitlines()) - } + with cwd.joinpath("package_versions.csv").open() as f: + latest_versions = { + row["package_name"]: row["latest_version"] for row in csv.DictReader(f) + } else: latest_versions = None - # 5000 releases, no solution - top_15k_pypi.remove("nucliadb") - # Remove slow packages - for slow in [ + excluded_packages = [ + # 5000 releases, no solution + "nucliadb", # These packages have many non-small versions "tf-models-nightly", "mtmtrain", "llm-dialog-manager", - "edx-enterprise", # Doesn't solve + # Slow and have no solution + "edx-enterprise", "kcli", "emmet-api", - ]: - top_15k_pypi.remove(slow) + ] + for package in excluded_packages: + top_15k_pypi.remove(package) output_dir = cwd.joinpath(args.output_dir) if output_dir.exists(): @@ -183,8 +189,8 @@ def main(): version = None packages_pending.append(package) tasks.append(executor.submit(run_uv, cmd, package, output_dir, version)) - total = len(packages_pending) + with tqdm(total=total) as progress_bar: for result in concurrent.futures.as_completed(tasks): summary = result.result() @@ -199,15 +205,9 @@ def main(): if summary.exit_code == 0: success += 1 - print(f"Success: {success}/{total}") + print(f"Success: {success}/{total} ({success / total:.0%})") successes = [summary for summary in all_results if summary.exit_code == 0] - print("\n# top 5 max RSS for successes") - largest_rss = sorted(successes, key=lambda x: x.max_rss, reverse=True)[:5] - for summary in largest_rss: - print( - f"{summary.package}: {summary.max_rss / 1024:.1f} MB (exit code: {summary.exit_code})" - ) print("\n# top 5 slowest resolutions for successes") slowest = sorted(successes, key=lambda x: x.time, reverse=True)[:5] @@ -216,6 +216,15 @@ def main(): f"{summary.package}: {summary.time:.2f}s (exit code: {summary.exit_code})" ) + if os.name == "posix": + print("\n# top 5 max RSS for successes") + largest_rss = sorted(successes, key=lambda x: x.max_rss, reverse=True)[:5] + for summary in largest_rss: + # Only linux, max RSS is in KB + print( + f"{summary.package}: {summary.max_rss / 1024:.1f} MB (exit code: {summary.exit_code})" + ) + if __name__ == "__main__": main()