top-packages generation script

Signed-off-by: Mikayla Thompson <mrt@mikayla.codes>
2025-11-14 12:13:16 -07:00 · 2025-11-14 12:13:16 -07:00 · 559d494884
parent 2642acc80b
commit 559d494884
2 changed files with 920 additions and 0 deletions
--- a/crates/uv/src/commands/tool/top_packages.txt
+++ b/crates/uv/src/commands/tool/top_packages.txt
@ -0,0 +1,167 @@
+ruff
+agent-starter-pack
+pre-commit
+marimo
+args
+cookiecutter
+pyright
+ngrok
+pytest
+hatch
+llm
+markitdown
+mcp-server-fetch
+ty
+copier
+command
+mcp-server-git
+mcpo
+uvx
+black
+streamdown
+autoflake
+run
+nox
+files-to-prompt
+mypy
+tox
+yt-dlp
+split_markdown4gpt
+mcp-server-time
+basic-memory
+synth-ai
+gac
+twine
+are
+codetoprompt
+open-webui
+browser-use
+pip
+wpull
+build
+with
+blender-remote
+https
+migrate-to-uv
+bandit
+mcp-atlassian
+mkdocs
+playwright
+everything
+maturin
+cruft
+mcp-feedback-enhanced
+smithery
+aignostics
+databricks
+jupyterlab
+tool
+echo
+elevenlabs-mcp
+ipython
+package
+pycowsay
+support
+completions
+jupyter
+mini-swe-agent
+openhands-ai
+pyenvsearch
+rogue-ai
+uv
+uv-dynamic-versioning
+ansible-core
+class
+gcovr
+handles
+mcp
+mcp-proxy
+mcp-scan
+mcp-server-sqlite
+streamlit
+blender-mcp
+create-mcp-server
+eyelet
+mcp-server-qdrant
+mcp-wiki
+on
+path
+pytest-watch
+setup
+textual-demo
+voice-mode
+automagik-tools
+bump-my-version
+kedro
+schemathesis
+huggingface_hub
+integration
+jupyter-core
+jupytext
+llm-discovery
+mcp-sse-shim
+openhands
+reachy-mini
+runs
+usage
+automagik-hive
+business-use-core
+cookieplone
+cowsay
+create-dagster
+distribution
+eval
+healthyselfjournal
+httpie
+markitdown-mcp
+speaches-cli
+strip-tags
+autocompletion
+chroma
+cmake
+compatibility
+detect-secrets
+dist
+evalgate
+iowarp-mcps
+isort
+juv
+lean-lsp-mcp
+llamactl
+nearc
+ngiab_data_preprocess
+nvitop
+prek
+requests
+runner
+samstacks
+semgrep-mcp
+specifyplus
+sqlite-utils
+the
+uv-publish
+wandb
+agentprobe
+audible-cli
+cakemail-api-docs-mcp
+datasette
+fonttools
+git-cliff
+goldenverba
+honcho
+internetarchive
+mcp-google-sheets
+modal
+modern
+pdm
+poe
+pyinstaller
+pyrefly
+radon
+should
+sniffly
+specify
+superclaude
+tools
+workspace-mcp
+yt-mpv
--- a/scripts/uvx_usage_on_gh/fetch_uvx_usage.py
+++ b/scripts/uvx_usage_on_gh/fetch_uvx_usage.py
@ -0,0 +1,753 @@
+# /// script
+# requires-python = ">=3.13"
+# dependencies = [
+# 	"httpx"
+# ]
+# ///
+
+"""
+Use the GitHub Code Search API to find instances of `uvx <package>` in:
+- README files (*.md)
+- Shell scripts (*.sh, *.bash, *.zsh)
+
+Requirements:
+    - A GitHub Personal Access Token (PAT) with `public_repo` scope
+    - Set the GITHUB_TOKEN environment variable or pass --token
+
+Usage:
+    python scripts/uvx_usage_on_gh/fetch_uvx_usage.py --output crates/uv/src/commands/tool/top_packages.txt
+"""
+
+import argparse
+import asyncio
+import logging
+import os
+import re
+import sys
+import time
+from collections import Counter
+from pathlib import Path
+from typing import Any, NamedTuple, Optional
+
+import httpx
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+)
+logger = logging.getLogger(__name__)
+
+# GitHub API configuration
+GITHUB_API_BASE = "https://api.github.com"
+CODE_SEARCH_ENDPOINT = f"{GITHUB_API_BASE}/search/code"
+
+# Rate limiting configuration
+RATE_LIMIT_DELAY = 6.1  # seconds between requests (slightly more than 60/10)
+
+# GitHub Code Search API limits
+GITHUB_CODE_SEARCH_MAX_RESULTS = 1000  # Hard limit: only first 1000 results accessible
+GITHUB_CODE_SEARCH_MAX_PAGE = 10  # Page 10 = results 901-1000, page 11+ returns 422
+
+# Retry configuration
+MAX_RETRIES = 5
+INITIAL_RETRY_DELAY = 10  # seconds
+MAX_RETRY_DELAY = 300  # 5 minutes max delay
+
+# PyPI check concurrency
+PYPI_CONCURRENT_CHECKS = 20  # Number of concurrent PyPI checks
+
+# PyPI API endpoint
+PYPI_JSON_API_TEMPLATE = "https://pypi.org/pypi/{package}/json"
+
+
+class RateLimitInfo(NamedTuple):
+    remaining: int | None
+    reset_time: int | None
+
+
+class GitHubSearchResponse(NamedTuple):
+    items: list[dict[str, Any]]
+    total_count: int
+    rate_limit: RateLimitInfo
+
+
+# Regex patterns for extracting package names
+PACKAGE_PATTERN_FROM = re.compile(
+    r"\buvx\s+(?:--\w+(?:\s+\S+)?\s+)*--from\s+([a-z0-9](?:[a-z0-9._-]*[a-z0-9])?)(?:@\S+)?",
+    re.IGNORECASE,
+)
+PACKAGE_PATTERN_NORMAL = re.compile(
+    r"\buvx\s+(?:--\w+(?:\s+\S+)?\s+)+([a-z0-9](?:[a-z0-9._-]*[a-z0-9])?)(?:@\S+)?",
+    re.IGNORECASE,
+)
+PACKAGE_PATTERN_SIMPLE = re.compile(
+    r"\buvx\s+([a-z0-9](?:[a-z0-9._-]*[a-z0-9])?)(?:@\S+)?",
+    re.IGNORECASE,
+)
+URL_PATTERN = re.compile(
+    r"\buvx\s+(?:--\w+(?:\s+\S+)?\s+)*--from\s+(git\+[a-z]+://|git://|https?://)",
+    re.IGNORECASE,
+)
+
+
+def extract_package_name(match_text: str) -> Optional[str]:
+    """
+    Extract package name from a match.
+
+    Handles patterns like:
+    - uvx ruff
+    - uvx --from httpie http (extracts "httpie")
+    - uvx --python 3.12 textual-demo
+    - uvx black@latest
+    - uvx pytest --version
+    - uvx streamlit run streamlit_app/dashboard.py
+
+    Skips patterns like:
+    - uvx --from git+https://... (URLs are not package names)
+    - uvx --from http://... (URLs are not package names)
+    """
+    # Skip URLs after --from
+    if URL_PATTERN.search(match_text):
+        return None
+
+    # Try patterns in order: --from, flags, simple
+    match = (
+        PACKAGE_PATTERN_FROM.search(match_text)
+        or PACKAGE_PATTERN_NORMAL.search(match_text)
+        or PACKAGE_PATTERN_SIMPLE.search(match_text)
+    )
+
+    if not match:
+        return None
+
+    package = match.group(1).lower()
+
+    # Remove version specifiers (e.g., @latest, @1.0.0)
+    if "@" in package:
+        package = package.split("@")[0]
+
+    # Validation checks
+    if package.startswith("--") or "/" in package or "\\" in package or len(package) < 2:
+        return None
+
+    return package
+
+
+def _calculate_retry_delay(
+    status_code: int,
+    retry_count: int,
+    response_headers: httpx.Headers,
+) -> int:
+    """Calculate delay for retry based on status code and headers."""
+    if status_code in (403, 429):
+        # Try Retry-After header first
+        retry_after = response_headers.get("Retry-After")
+        if retry_after:
+            try:
+                return int(retry_after) + 2  # Add 2 second buffer
+            except ValueError:
+                pass
+
+        # Fall back to X-RateLimit-Reset
+        reset_time_str = response_headers.get("X-RateLimit-Reset")
+        if reset_time_str:
+            try:
+                reset_time = int(reset_time_str)
+                current_time = int(time.time())
+                return max(reset_time - current_time + 2, 10)  # At least 10 seconds
+
+            except ValueError:
+                pass
+
+    # Default: exponential backoff
+    return min(INITIAL_RETRY_DELAY * (2**retry_count), MAX_RETRY_DELAY)
+
+
+def search_github_code(
+    query: str,
+    token: str,
+    page: int,
+    per_page: int = 100,
+    retry_count: int = 0,
+) -> GitHubSearchResponse:
+    headers = {
+        "Accept": "application/vnd.github.text-match+json",
+        "Authorization": f"Bearer {token}",
+    }
+
+    params = {
+        "q": query,
+        "page": page,
+        "per_page": min(per_page, 100),
+    }
+
+    logger.info(f"Searching GitHub: {query} (page {page}, attempt {retry_count + 1})")
+
+    try:
+        response = httpx.get(
+            CODE_SEARCH_ENDPOINT,
+            headers=headers,
+            params=params,
+            timeout=30.0,
+        )
+        response.raise_for_status()
+
+        # Extract rate limit info
+        remaining_str = response.headers.get("X-RateLimit-Remaining")
+        reset_time_str = response.headers.get("X-RateLimit-Reset")
+        rate_limit = RateLimitInfo(
+            remaining=int(remaining_str) if remaining_str else None,
+            reset_time=int(reset_time_str) if reset_time_str else None,
+        )
+
+        logger.debug(
+            f"Rate limit remaining: {rate_limit.remaining}, reset at: {rate_limit.reset_time}"
+        )
+
+        data = response.json()
+        total_count = data.get("total_count", 0)
+        logger.info(f"Count of total results: {total_count}")
+
+        return GitHubSearchResponse(
+            items=data.get("items", []),
+            total_count=total_count,
+            rate_limit=rate_limit,
+        )
+
+    except httpx.HTTPStatusError as e:
+        status_code = e.response.status_code
+
+        # 422 on page 11+ is likely the hard 1000 result limit
+        if status_code == 422 and page > GITHUB_CODE_SEARCH_MAX_PAGE:
+            logger.info(
+                f"422 error on page {page} - likely hit GitHub's 1000 result limit. "
+                f"Code Search API only returns first {GITHUB_CODE_SEARCH_MAX_RESULTS} results."
+            )
+            raise ValueError(
+                f"Reached GitHub Code Search API limit (page {page} > {GITHUB_CODE_SEARCH_MAX_PAGE})"
+            ) from e
+
+        # Retryable errors
+        if status_code in (403, 422, 429) and retry_count < MAX_RETRIES:
+            delay = _calculate_retry_delay(status_code, retry_count, e.response.headers)
+
+            if status_code == 403:
+                logger.warning(
+                    f"Rate limit exceeded (403). Retrying in {delay}s "
+                    f"(attempt {retry_count + 1}/{MAX_RETRIES})"
+                )
+            elif status_code == 429:
+                logger.warning(
+                    f"Rate limit exceeded (429). Retrying in {delay}s "
+                    f"(attempt {retry_count + 1}/{MAX_RETRIES})"
+                )
+            elif status_code == 422:
+                logger.warning(
+                    f"Validation error (422) - may be transient. Retrying in {delay}s "
+                    f"(attempt {retry_count + 1}/{MAX_RETRIES})"
+                )
+
+            time.sleep(delay)
+            return search_github_code(query, token, page, per_page, retry_count + 1)
+
+        # Non-retryable or max retries reached
+        if status_code == 403:
+            logger.error(
+                "Rate limit exceeded or authentication failed after retries. "
+                "Check your token and wait before retrying."
+            )
+        elif status_code == 422:
+            logger.error(f"Invalid query after retries: {query}")
+        else:
+            logger.error(f"HTTP error {status_code} after retries")
+
+    except httpx.RequestError as e:
+        # Network errors are retryable
+        if retry_count < MAX_RETRIES:
+            delay = min(INITIAL_RETRY_DELAY * (2**retry_count), MAX_RETRY_DELAY)
+            logger.warning(
+                f"Request failed: {e}. Retrying in {delay}s "
+                f"(attempt {retry_count + 1}/{MAX_RETRIES})"
+            )
+            time.sleep(delay)
+            return search_github_code(query, token, page, per_page, retry_count + 1)
+
+        logger.error(f"Request failed after retries: {e}")
+        raise
+
+
+async def wait_for_rate_limit(rate_limit: RateLimitInfo) -> None:
+    """
+    Wait if we're approaching rate limit or need to wait until reset.
+
+    Args:
+        rate_limit: Rate limit information from previous request
+    """
+    if rate_limit.remaining is None or rate_limit.reset_time is None:
+        await asyncio.sleep(RATE_LIMIT_DELAY)
+        return
+
+    # If running low on requests, wait until reset
+    if rate_limit.remaining <= 2:
+        wait_time = rate_limit.reset_time - int(time.time()) + 2  # Add 2 second buffer
+        if wait_time > 0:
+            logger.info(
+                f"Rate limit low ({rate_limit.remaining} remaining). "
+                f"Waiting {wait_time}s until reset at {rate_limit.reset_time}"
+            )
+            await asyncio.sleep(wait_time)
+        else:
+            await asyncio.sleep(RATE_LIMIT_DELAY)
+    else:
+        await asyncio.sleep(RATE_LIMIT_DELAY)
+
+
+def build_size_query(base_query: str, start_bytes: int, end_bytes: Optional[int]) -> str:
+    """Build a GitHub Code Search query with size filter."""
+    if end_bytes is None:
+        return f"{base_query} size:>={start_bytes}"
+    return f"{base_query} size:{start_bytes}..{end_bytes}"
+
+
+async def check_pypi_package_exists(
+    package: str,
+    cache: dict[str, bool],
+    client: httpx.AsyncClient,
+) -> tuple[str, bool]:
+    """
+    Check if a single package exists on PyPI.
+
+    Args:
+        package: Package name to check
+        cache: Dictionary to cache results (modified in-place)
+        client: httpx async client instance
+
+    Returns:
+        Tuple of (package_name, exists)
+    """
+    # Check cache first
+    if package in cache:
+        return (package, cache[package])
+
+    url = PYPI_JSON_API_TEMPLATE.format(package=package)
+
+    try:
+        response = await client.get(url, timeout=10.0, follow_redirects=True)
+        exists = response.status_code == 200
+        cache[package] = exists
+
+        if exists:
+            logger.debug(f"✓ {package} exists on PyPI")
+        else:
+            logger.debug(f"✗ {package} not found on PyPI")
+
+        return (package, exists)
+    except httpx.RequestError as e:
+        logger.debug(f"Error checking {package} on PyPI: {e}")
+        cache[package] = False
+        return (package, False)
+
+
+async def check_packages_batch(
+    packages: list[str],
+    cache: dict[str, bool],
+    semaphore: asyncio.Semaphore,
+) -> dict[str, bool]:
+    """
+    Check a batch of packages against PyPI concurrently.
+
+    Args:
+        packages: List of package names to check
+        cache: Dictionary to cache results (modified in-place)
+        semaphore: Semaphore to limit concurrent requests
+
+    Returns:
+        Dictionary mapping package names to their existence status
+    """
+    async def check_one(package: str) -> tuple[str, bool]:
+        async with semaphore:
+            async with httpx.AsyncClient() as client:
+                return await check_pypi_package_exists(package, cache, client)
+
+    tasks = [check_one(pkg) for pkg in packages]
+    results = await asyncio.gather(*tasks, return_exceptions=False)
+    return dict(results)
+
+
+def extract_packages_from_items(items: list[dict[str, Any]]) -> Counter:
+    page_packages = Counter()
+
+    for item in items:
+        # Extract from text_matches (code snippets)
+        text_matches = item.get("text_matches", [])
+        for match in text_matches:
+            fragment = match.get("fragment", "")
+            package = extract_package_name(fragment)
+            if package:
+                page_packages[package] += 1
+                logger.debug(f"Found package: {package}")
+
+        # Also check file path/name
+        path = item.get("path", "")
+        if "uvx" in path.lower():
+            package = extract_package_name(path)
+            if package:
+                page_packages[package] += 1
+
+    return page_packages
+
+
+async def search_uvx_usage(
+    token: str, max_pages: int = 10
+) -> tuple[Counter[str], dict[str, bool]]:
+    """
+    Search for uvx usage across GitHub and extract package names.
+
+    Processes packages incrementally and checks PyPI concurrently.
+
+    Args:
+        token: GitHub Personal Access Token
+        max_pages: Maximum number of pages to fetch per query (default: 10)
+
+    Returns:
+        Tuple of (Counter of valid package names with counts, updated PyPI cache)
+    """
+    pypi_cache: dict[str, bool] = {}
+    valid_package_counts: Counter[str] = Counter()
+    all_package_counts: Counter[str] = Counter()
+    unknown_packages_queue: list[str] = []
+
+    semaphore = asyncio.Semaphore(PYPI_CONCURRENT_CHECKS)
+    current_rate_limit = RateLimitInfo(None, None)
+
+    # Size buckets to work around GitHub's 1000 result limit
+    # It would be way smarter to do this dynamically (query a given size range and do a 
+    # binary/proportional split on the number of results) but I already got this far
+    # so I'm not going to change it for now.
+    markdown_size_buckets = [
+        (0, 1025),
+        (1025, 1250),
+        (1250, 1500),
+        (1500, 1750),
+        (1750, 2000),
+        (2000, 2500),
+        (2500, 3500),
+        (3500, 4500),
+        (4500, 5500),
+        (5500, 6250),
+        (6250, 7000),
+        (7000, 7750),
+        (7750, 8500),
+        (8500, 9250),
+        (9250, 10000),
+        (10000, 10750),
+        (10750, 11750),
+        (11750, 13000),
+        (13000, 14000),
+        (14000, 15250),
+        (15250, 16250),
+        (16250, 17500),
+        (17500, 18750),
+        (18750, 20000),
+        (20000, 22000),
+        (22000, 24000),
+        (24000, 26000),
+        (26000, 28000),
+        (28000, 30000),
+        (30000, 33000),
+        (33000, 36000),
+        (36000, 39000),
+        (39000, 42000),
+        (42000, 45000),
+        (45000, 50000),
+        (50000, 60000),
+        (60000, 70000),
+        (70000, 80000),
+        (80000, 100000),
+        (100000, 120000),
+        (120000, 140000),
+        (140000, 160000),
+        (160000, 180000),
+        (180000, 200000),
+        (200000, 250000),
+        (250000, 300000),
+        (300000, None),
+    ]
+
+    shell_size_buckets = [
+        (0, 2800),
+        (2800, 6000),
+        (6000, 15000),
+        (15000, 32000),
+        (32000, None),
+    ]
+
+    queries = [
+        build_size_query("uvx AND language:Markdown in:file", start, end)
+        for start, end in markdown_size_buckets
+    ]
+    queries.extend(
+        build_size_query("uvx AND language:Shell in:file", start, end)
+        for start, end in shell_size_buckets
+    )
+
+    async def process_unknown_packages() -> None:
+        """Process queued unknown packages against PyPI."""
+        if not unknown_packages_queue:
+            return
+
+        packages_to_check = list(set(unknown_packages_queue))
+        unknown_packages_queue.clear()
+
+        logger.info(f"Checking {len(packages_to_check)} unknown packages against PyPI...")
+        results = await check_packages_batch(packages_to_check, pypi_cache, semaphore)
+
+        # Update valid package counts based on results
+        for package, exists in results.items():
+            if exists:
+                count = all_package_counts.get(package, 0)
+                if count > 0:
+                    valid_package_counts[package] = count
+                    logger.debug(f"Added {package} to valid packages ({count} occurrences)")
+                else:
+                    logger.warning(f"Package {package} validated but has no count")
+
+    for query_idx, query in enumerate(queries):
+        page = 1
+        effective_max_pages = min(max_pages, GITHUB_CODE_SEARCH_MAX_PAGE)
+
+        # Wait before starting a new query (except the first one)
+        if query_idx > 0:
+            logger.debug("Waiting before starting new query...")
+            await wait_for_rate_limit(current_rate_limit)
+            await process_unknown_packages()
+
+        while page <= effective_max_pages:
+            try:
+                # Rate limiting: wait between page requests (except for the first page)
+                if page > 1:
+                    logger.debug("Waiting before next page...")
+                    await wait_for_rate_limit(current_rate_limit)
+                    await process_unknown_packages()
+
+                response = search_github_code(query, token, page=page)
+
+                # Update rate limit state from response
+                current_rate_limit = response.rate_limit
+
+                items = response.items
+                if not items:
+                    logger.info(f"No more results for query: {query}")
+                    break
+
+                logger.info(f"Found {len(items)} results on page {page}")
+
+                # Extract package names from this page
+                page_packages = extract_packages_from_items(items)
+
+                # Process packages from this page
+                for package, count in page_packages.items():
+                    all_package_counts[package] += count
+
+                    # Check cache first
+                    if package in pypi_cache:
+                        if pypi_cache[package]:
+                            valid_package_counts[package] = all_package_counts[package]
+                            logger.debug(
+                                f"Known valid: {package} (total: {all_package_counts[package]})"
+                            )
+                    else:
+                        unknown_packages_queue.append(package)
+
+                # Process unknown packages while we have time before next GitHub request
+                if unknown_packages_queue:
+                    await process_unknown_packages()
+
+                # Check if there are more pages
+                effective_total = min(
+                    response.total_count, GITHUB_CODE_SEARCH_MAX_RESULTS
+                )
+
+                if len(items) < 100 or page * 100 >= effective_total:
+                    logger.info(
+                        f"Reached end of results for query: {query} "
+                        f"(page {page}, total: {response.total_count})"
+                    )
+                    break
+
+                page += 1
+
+            except ValueError as e:
+                # This is raised when we hit the 1000 result limit
+                logger.info(f"Hit GitHub Code Search API limit: {e}")
+                break
+            except Exception as e:
+                logger.error(f"Error processing page {page} of query '{query}': {e}")
+                break
+
+        # Process any remaining unknown packages after each query
+        await process_unknown_packages()
+
+    # Final processing of any remaining unknown packages
+    await process_unknown_packages()
+
+    logger.info(
+        f"Found {len(valid_package_counts)} valid PyPI packages "
+        f"out of {len(all_package_counts)} total"
+    )
+
+    return valid_package_counts, pypi_cache
+
+
+def write_top_packages(
+    package_counts: Counter[str],
+    output_path: Path,
+    debug_output_path: Path,
+    min_count: int = 2,
+) -> None:
+    """
+    Write top packages to files, sorted by frequency.
+
+    Packages are written in buckets by threshold (100+, 25+, 10+, 5+, min_count+).
+
+    Args:
+        package_counts: Counter of package names and counts
+        output_path: Path to output file (main packages list)
+        debug_output_path: Path to debug output file (with counts)
+        min_count: Minimum occurrence count to include (default: 2)
+    """
+    thresholds = [min_count, 5, 10, 25, 100]
+
+    # Filter packages into buckets by threshold
+    buckets = []
+    for i, threshold in enumerate(thresholds):
+        next_threshold = thresholds[i + 1] if i + 1 < len(thresholds) else float("inf")
+        bucket_packages = {
+            pkg: count
+            for pkg, count in package_counts.items()
+            if threshold <= count < next_threshold
+        }
+        buckets.append({"threshold": threshold, "packages": bucket_packages})
+
+    with open(output_path, "w") as f, open(debug_output_path, "w") as f_debug:
+        for bucket in reversed(buckets):
+            threshold = bucket["threshold"]
+            packages = bucket["packages"]
+            logger.info(
+                f"Greater than or equal to {threshold} mentions: {len(packages)} packages"
+            )
+
+            # Sort by count descending, then alphabetically
+            sorted_packages = sorted(
+                packages.items(), key=lambda x: (-x[1], x[0])
+            )
+
+            for package, count in sorted_packages:
+                f.write(f"{package}\n")
+                f_debug.write(f"{package}: {count}\n")
+
+    logger.info(f"Successfully wrote top packages to {output_path}")
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Fetch popular packages from GitHub by searching for uvx usage"
+    )
+    parser.add_argument(
+        "--token",
+        type=str,
+        help="GitHub Personal Access Token (or set GITHUB_TOKEN env var)",
+        default=os.getenv("GITHUB_TOKEN"),
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        help="Output file path (default: top_packages.txt)",
+        default=None,
+    )
+    parser.add_argument(
+        "--debug-output",
+        type=Path,
+        help="Debug output file path (default: top_packages_debug.txt)",
+        default=None,
+    )
+    parser.add_argument(
+        "--max-pages",
+        type=int,
+        default=10,
+        help="Maximum pages to fetch per query (default: 10)",
+    )
+    parser.add_argument(
+        "--min-count",
+        type=int,
+        default=2,
+        help="Minimum occurrence count to include (default: 2)",
+    )
+    parser.add_argument(
+        "--verbose",
+        "-v",
+        action="store_true",
+        help="Enable verbose logging",
+    )
+
+    args = parser.parse_args()
+
+    if args.verbose:
+        logging.getLogger().setLevel(logging.DEBUG)
+
+    if not args.token:
+        logger.error(
+            "GitHub token is required. Set GITHUB_TOKEN environment variable "
+            "or pass --token. Create a token at: https://github.com/settings/tokens"
+        )
+        sys.exit(1)
+
+    # Set default output paths
+    if args.output is None or args.debug_output is None:
+        script_dir = Path(__file__).parent
+        project_root = script_dir.parent.parent
+        if args.output is None:
+            args.output = (
+                project_root
+                / "crates"
+                / "uv"
+                / "src"
+                / "commands"
+                / "tool"
+                / "top_packages.txt"
+            )
+        if args.debug_output is None:
+            args.debug_output = (
+                project_root
+                / "crates"
+                / "uv"
+                / "src"
+                / "commands"
+                / "tool"
+                / "top_packages_debug.txt"
+            )
+
+    logger.info("Starting GitHub search for uvx usage...")
+    logger.info(f"Output will be written to: {args.output}")
+    logger.info(f"Debug output will be written to: {args.debug_output}")
+
+    valid_packages, pypi_cache = asyncio.run(
+        search_uvx_usage(args.token, max_pages=args.max_pages)
+    )
+
+    if not valid_packages:
+        logger.warning("No valid PyPI packages found.")
+        sys.exit(1)
+
+    logger.info(f"Found {len(valid_packages)} valid PyPI packages")
+    logger.info(f"Top 10 valid packages: {valid_packages.most_common(10)}")
+    logger.info(f"PyPI cache contains {len(pypi_cache)} entries")
+
+    write_top_packages(
+        valid_packages, args.output, args.debug_output, min_count=args.min_count
+    )
+
+
+if __name__ == "__main__":
+    main()