diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 326146e4450..45636b64c5c 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -40,7 +40,7 @@ RUN echo "${USERNAME} ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/devcontaineruser chmod 0440 /etc/sudoers.d/devcontaineruser # Toolchain installation with SHA256 verification -# Run "python3 toolchain.py generate" to update toolchain_config.env +# Run "python3 toolchain.py" to update toolchain_config.env ARG TARGETPLATFORM COPY .devcontainer/toolchain_config.env /tmp/toolchain_config.env RUN set -e; \ @@ -69,6 +69,34 @@ RUN echo "Extracting toolchain..."; \ rm /tmp/toolchain.tar.gz; \ chown -R ${USERNAME} /opt/mongodbtoolchain; +# Evergreen CLI installation with SHA256 verification +# Run "python3 evergreen_cli.py" to update evergreen_cli_config.env +COPY .devcontainer/evergreen_cli_config.env /tmp/evergreen_cli_config.env +RUN set -e; \ + . /tmp/evergreen_cli_config.env; \ + if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ + EVERGREEN_CLI_URL="$EVERGREEN_CLI_ARM64_URL"; \ + EVERGREEN_CLI_SHA256="$EVERGREEN_CLI_ARM64_SHA256"; \ + ARCH="arm64"; \ + elif [ "$TARGETPLATFORM" = "linux/amd64" ]; then \ + EVERGREEN_CLI_URL="$EVERGREEN_CLI_AMD64_URL"; \ + EVERGREEN_CLI_SHA256="$EVERGREEN_CLI_AMD64_SHA256"; \ + ARCH="amd64"; \ + else \ + echo "Unsupported platform for Evergreen CLI: $TARGETPLATFORM"; \ + exit 1; \ + fi; \ + echo "Installing Evergreen CLI for: $ARCH"; \ + echo "URL: $EVERGREEN_CLI_URL"; \ + echo "Expected SHA256: $EVERGREEN_CLI_SHA256"; \ + curl -fSL "$EVERGREEN_CLI_URL" -o /tmp/evergreen; \ + echo "Verifying checksum..."; \ + echo "$EVERGREEN_CLI_SHA256 /tmp/evergreen" | sha256sum -c -; \ + echo "Installing to /usr/local/bin/evergreen..."; \ + mv /tmp/evergreen /usr/local/bin/evergreen; \ + chmod +x /usr/local/bin/evergreen; \ + echo "Evergreen CLI installation complete" + USER $USERNAME ENV USER=${USERNAME} RUN /opt/mongodbtoolchain/revisions/*/scripts/install.sh; echo "Toolchain installation complete" diff --git a/.devcontainer/evergreen_cli.py b/.devcontainer/evergreen_cli.py new file mode 100755 index 00000000000..a6d33c1aacf --- /dev/null +++ b/.devcontainer/evergreen_cli.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +""" +Evergreen CLI configuration generator for DevContainers. + +Generates evergreen_cli_config.env with URLs and SHA256 checksums for both ARM64 and AMD64. +""" + +import os +import sys +import tempfile +from datetime import datetime + +# Add script directory to path for importing local modules +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from s3_artifact_utils import calculate_sha256, download_file + +# Default Evergreen CLI version +DEFAULT_EVERGREEN_VERSION = "4abdb95261a646cdd4b979e8717f261d830e6a0b" + +# S3 bucket and prefix +BUCKET = "evg-bucket-evergreen" +PREFIX = "evergreen/clients" + + +def fetch_evergreen_cli_info(arch: str, version: str) -> dict: + """Fetch Evergreen CLI info for a specific architecture.""" + print(f"\nšŸ” Fetching {arch} Evergreen CLI...", file=sys.stderr) + + arch_path = f"linux_{arch}" + url = f"https://{BUCKET}.s3.amazonaws.com/{PREFIX}/evergreen_{version}/{arch_path}/evergreen" + print(f"URL: {url}", file=sys.stderr) + + # Download to temp location to calculate checksum + with tempfile.NamedTemporaryFile(delete=False, suffix=".bin") as tmp: + tmp_path = tmp.name + + try: + download_file(url, tmp_path) + sha256 = calculate_sha256(tmp_path) + print(f"SHA256: {sha256}", file=sys.stderr) + finally: + os.unlink(tmp_path) + + return { + "url": url, + "sha256": sha256, + "arch_path": arch_path, + "version": version, + } + + +def main(): + """Generate Evergreen CLI configuration file.""" + version = DEFAULT_EVERGREEN_VERSION + print(f"Using version: {version}", file=sys.stderr) + + # Fetch both architectures + arm64_info = fetch_evergreen_cli_info("arm64", version) + amd64_info = fetch_evergreen_cli_info("amd64", version) + + # Determine output path + script_dir = os.path.dirname(os.path.abspath(__file__)) + output_file = os.path.join(script_dir, "evergreen_cli_config.env") + + # Write config file + with open(output_file, "w") as f: + f.write("# Generated by evergreen_cli.py\n") + f.write("# DO NOT EDIT MANUALLY - run: python3 evergreen_cli.py\n") + f.write("#\n") + f.write(f"# Generated: {datetime.now().isoformat()}\n") + f.write(f"# Version: {version}\n") + f.write("\n") + f.write("# ARM64 Evergreen CLI\n") + f.write(f'EVERGREEN_CLI_ARM64_URL="{arm64_info["url"]}"\n') + f.write(f'EVERGREEN_CLI_ARM64_SHA256="{arm64_info["sha256"]}"\n') + f.write(f'EVERGREEN_CLI_ARM64_ARCH="{arm64_info["arch_path"]}"\n') + f.write(f'EVERGREEN_CLI_ARM64_VERSION="{arm64_info["version"]}"\n') + f.write("\n") + f.write("# AMD64 Evergreen CLI\n") + f.write(f'EVERGREEN_CLI_AMD64_URL="{amd64_info["url"]}"\n') + f.write(f'EVERGREEN_CLI_AMD64_SHA256="{amd64_info["sha256"]}"\n') + f.write(f'EVERGREEN_CLI_AMD64_ARCH="{amd64_info["arch_path"]}"\n') + f.write(f'EVERGREEN_CLI_AMD64_VERSION="{amd64_info["version"]}"\n') + + print(f"\nāœ… Configuration written to: {output_file}", file=sys.stderr) + print("\nContents:", file=sys.stderr) + with open(output_file) as f: + print(f.read(), file=sys.stderr) + + +if __name__ == "__main__": + main() diff --git a/.devcontainer/evergreen_cli_config.env b/.devcontainer/evergreen_cli_config.env new file mode 100644 index 00000000000..81731b02fd8 --- /dev/null +++ b/.devcontainer/evergreen_cli_config.env @@ -0,0 +1,17 @@ +# Generated by evergreen_cli.py +# DO NOT EDIT MANUALLY - run: python3 evergreen_cli.py +# +# Generated: 2025-10-25T22:17:59.783295 +# Version: 4abdb95261a646cdd4b979e8717f261d830e6a0b + +# ARM64 Evergreen CLI +EVERGREEN_CLI_ARM64_URL="https://evg-bucket-evergreen.s3.amazonaws.com/evergreen/clients/evergreen_4abdb95261a646cdd4b979e8717f261d830e6a0b/linux_arm64/evergreen" +EVERGREEN_CLI_ARM64_SHA256="383d4ea4a428012b510da02821c61071ddea11b1176ef2de70795d65c004f954" +EVERGREEN_CLI_ARM64_ARCH="linux_arm64" +EVERGREEN_CLI_ARM64_VERSION="4abdb95261a646cdd4b979e8717f261d830e6a0b" + +# AMD64 Evergreen CLI +EVERGREEN_CLI_AMD64_URL="https://evg-bucket-evergreen.s3.amazonaws.com/evergreen/clients/evergreen_4abdb95261a646cdd4b979e8717f261d830e6a0b/linux_amd64/evergreen" +EVERGREEN_CLI_AMD64_SHA256="7a9f2d55d19f4fa09b209c1771d2f1adb7b44cccaffa76936f745db48a1f3fd3" +EVERGREEN_CLI_AMD64_ARCH="linux_amd64" +EVERGREEN_CLI_AMD64_VERSION="4abdb95261a646cdd4b979e8717f261d830e6a0b" diff --git a/.devcontainer/s3_artifact_utils.py b/.devcontainer/s3_artifact_utils.py new file mode 100644 index 00000000000..fb5a09bd63f --- /dev/null +++ b/.devcontainer/s3_artifact_utils.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python3 +""" +Shared S3 artifact utilities for DevContainer setup scripts. + +This module provides common functionality for downloading and verifying +artifacts from S3, used by toolchain.py and evergreen_cli.py. +""" + +import hashlib +import sys +import xml.etree.ElementTree as ET +from datetime import datetime +from urllib import request +from urllib.error import HTTPError, URLError + + +def list_s3_objects(bucket: str, prefix: str, path_style: bool = False) -> list[dict]: + """Query S3 REST API for objects matching prefix. + + Args: + bucket: S3 bucket name + prefix: Prefix to filter objects + path_style: If True, use path-style URL (s3.amazonaws.com/bucket). + If False, use virtual-hosted style (bucket.s3.amazonaws.com). + Path-style is required for buckets with dots in the name. + """ + try: + if path_style: + url = f"https://s3.amazonaws.com/{bucket}?list-type=2&prefix={prefix}" + else: + url = f"https://{bucket}.s3.amazonaws.com?list-type=2&prefix={prefix}" + print(f"Querying S3: {url}", file=sys.stderr) + + with request.urlopen(url) as response: + xml_data = response.read() + + root = ET.fromstring(xml_data) + ns = {"s3": "http://s3.amazonaws.com/doc/2006-03-01/"} + + objects = [] + for content in root.findall("s3:Contents", ns): + key_elem = content.find("s3:Key", ns) + modified_elem = content.find("s3:LastModified", ns) + + if key_elem is not None and modified_elem is not None: + objects.append( + { + "Key": key_elem.text, + "LastModified": datetime.fromisoformat( + modified_elem.text.replace("Z", "+00:00") + ), + } + ) + + return objects + + except (HTTPError, URLError, ET.ParseError) as e: + print(f"Error querying S3: {e}", file=sys.stderr) + sys.exit(1) + + +def download_file(url: str, output_path: str) -> None: + """Download file from URL.""" + print(f"Downloading {url}...", file=sys.stderr) + try: + request.urlretrieve(url, output_path) + print(f"Saved to {output_path}", file=sys.stderr) + except (HTTPError, URLError) as e: + print(f"Download failed: {e}", file=sys.stderr) + sys.exit(1) + + +def calculate_sha256(file_path: str) -> str: + """Calculate SHA256 checksum of file.""" + sha256_hash = hashlib.sha256() + with open(file_path, "rb") as f: + for chunk in iter(lambda: f.read(8192), b""): + sha256_hash.update(chunk) + return sha256_hash.hexdigest() diff --git a/.devcontainer/toolchain.py b/.devcontainer/toolchain.py index cf5b48f19eb..5fef2167f23 100755 --- a/.devcontainer/toolchain.py +++ b/.devcontainer/toolchain.py @@ -1,114 +1,56 @@ #!/usr/bin/env python3 """ -MongoDB Toolchain Management for DevContainers +MongoDB Toolchain configuration generator for DevContainers. -This script handles fetching, downloading, and configuring MongoDB toolchains from S3. -Supports both dynamic (latest) and static (locked with SHA256) approaches. +Generates toolchain_config.env with URLs and SHA256 checksums for both ARM64 and AMD64. """ -import argparse -import hashlib import os import sys -import xml.etree.ElementTree as ET +import tempfile from datetime import datetime -from typing import Optional -from urllib import request -from urllib.error import HTTPError, URLError + +# Add script directory to path for importing local modules +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from s3_artifact_utils import calculate_sha256, download_file, list_s3_objects + +# S3 bucket and prefix +BUCKET = "boxes.10gen.com" +PREFIX = "build/toolchain/mongodbtoolchain-ubuntu2404" -def list_s3_objects(bucket: str, prefix: str) -> list[dict]: - """Query S3 REST API for objects matching prefix.""" - try: - url = f"https://s3.amazonaws.com/{bucket}?list-type=2&prefix={prefix}" - print(f"Querying S3: {url}", file=sys.stderr) +def fetch_toolchain_info(arch: str) -> dict: + """Fetch toolchain info for a specific architecture.""" + print(f"\nšŸ” Fetching {arch} toolchain...", file=sys.stderr) - with request.urlopen(url) as response: - xml_data = response.read() + # Use path-style URL because bucket name contains dots (boxes.10gen.com) + # amd64 toolchains don't have an architecture suffix, so we need to exclude arm64 + if arch == "amd64": + arch_prefix = PREFIX + exclude_pattern = "-arm64-" + else: + arch_prefix = f"{PREFIX}-{arch}" + exclude_pattern = None - root = ET.fromstring(xml_data) - ns = {"s3": "http://s3.amazonaws.com/doc/2006-03-01/"} - - objects = [] - for content in root.findall("s3:Contents", ns): - key_elem = content.find("s3:Key", ns) - modified_elem = content.find("s3:LastModified", ns) - - if key_elem is not None and modified_elem is not None: - objects.append( - { - "Key": key_elem.text, - "LastModified": datetime.fromisoformat( - modified_elem.text.replace("Z", "+00:00") - ), - } - ) - - return objects - - except (HTTPError, URLError, ET.ParseError) as e: - print(f"Error querying S3: {e}", file=sys.stderr) - sys.exit(1) - - -def find_latest( - bucket: str, prefix: str, exclude_pattern: Optional[str] = None -) -> tuple[str, str, datetime]: - """Find most recently modified artifact.""" - objects = list_s3_objects(bucket, prefix) + objects = list_s3_objects(BUCKET, arch_prefix, path_style=True) # Filter out excluded patterns if exclude_pattern: objects = [obj for obj in objects if exclude_pattern not in obj["Key"]] if not objects: - print(f"No artifacts found with prefix: {prefix}", file=sys.stderr) + print(f"No artifacts found with prefix: {arch_prefix}", file=sys.stderr) sys.exit(1) latest = max(objects, key=lambda x: x["LastModified"]) - url = f"https://s3.amazonaws.com/{bucket}/{latest['Key']}" + url = f"https://s3.amazonaws.com/{BUCKET}/{latest['Key']}" + key = latest["Key"] + last_modified = latest["LastModified"] - print(f"Latest: {latest['Key']}", file=sys.stderr) - print(f"Modified: {latest['LastModified']}", file=sys.stderr) - - return url, latest["Key"], latest["LastModified"] - - -def download_file(url: str, output_path: str) -> None: - """Download file from URL.""" - print(f"Downloading {url}...", file=sys.stderr) - try: - request.urlretrieve(url, output_path) - print(f"Saved to {output_path}", file=sys.stderr) - except (HTTPError, URLError) as e: - print(f"Download failed: {e}", file=sys.stderr) - sys.exit(1) - - -def calculate_sha256(file_path: str) -> str: - """Calculate SHA256 checksum of file.""" - sha256_hash = hashlib.sha256() - with open(file_path, "rb") as f: - for chunk in iter(lambda: f.read(8192), b""): - sha256_hash.update(chunk) - return sha256_hash.hexdigest() - - -def fetch_toolchain_info(bucket: str, prefix: str, arch: str) -> dict: - """Fetch toolchain info for a specific architecture.""" - print(f"\nšŸ” Fetching {arch} toolchain...", file=sys.stderr) - # amd64 toolchains don't have an architecture suffix, so we need to exclude arm64 - if arch == "amd64": - arch_prefix = prefix - exclude_pattern = "-arm64-" - else: - arch_prefix = f"{prefix}-{arch}" - exclude_pattern = None - url, key, last_modified = find_latest(bucket, arch_prefix, exclude_pattern) + print(f"Latest: {key}", file=sys.stderr) + print(f"Modified: {last_modified}", file=sys.stderr) # Download to temp location to calculate checksum - import tempfile - with tempfile.NamedTemporaryFile(delete=False, suffix=".tar.gz") as tmp: tmp_path = tmp.name @@ -130,16 +72,20 @@ def fetch_toolchain_info(bucket: str, prefix: str, arch: str) -> dict: } -def generate_config(bucket: str, prefix: str, output_file: str) -> None: - """Generate locked toolchain configuration with SHA256 for both arm64 and amd64.""" +def main(): + """Generate toolchain configuration file.""" # Fetch both architectures - arm64_info = fetch_toolchain_info(bucket, prefix, "arm64") - amd64_info = fetch_toolchain_info(bucket, prefix, "amd64") + arm64_info = fetch_toolchain_info("arm64") + amd64_info = fetch_toolchain_info("amd64") - # Write config file with both architectures + # Determine output path + script_dir = os.path.dirname(os.path.abspath(__file__)) + output_file = os.path.join(script_dir, "toolchain_config.env") + + # Write config file with open(output_file, "w") as f: f.write("# Generated by toolchain.py\n") - f.write("# DO NOT EDIT MANUALLY - run: python3 toolchain.py generate\n") + f.write("# DO NOT EDIT MANUALLY - run: python3 toolchain.py\n") f.write("#\n") f.write(f"# Generated: {datetime.now().isoformat()}\n") f.write("\n") @@ -160,53 +106,10 @@ def generate_config(bucket: str, prefix: str, output_file: str) -> None: f.write(f'TOOLCHAIN_AMD64_LAST_MODIFIED="{amd64_info["last_modified"].isoformat()}"\n') print(f"\nāœ… Configuration written to: {output_file}", file=sys.stderr) - print(f"\nContents:", file=sys.stderr) + print("\nContents:", file=sys.stderr) with open(output_file) as f: print(f.read(), file=sys.stderr) -def main(): - parser = argparse.ArgumentParser(description="MongoDB Toolchain Management") - subparsers = parser.add_subparsers(dest="command", help="Command") - - # find-latest: Just print the latest URL - find_parser = subparsers.add_parser("find-latest", help="Find latest toolchain URL") - find_parser.add_argument("--bucket", default="boxes.10gen.com") - find_parser.add_argument("--prefix", default="build/toolchain/mongodbtoolchain-ubuntu24") - - # generate: Generate locked config - gen_parser = subparsers.add_parser("generate", help="Generate locked config with SHA256") - gen_parser.add_argument("--bucket", default="boxes.10gen.com") - gen_parser.add_argument("--prefix", default="build/toolchain/mongodbtoolchain-ubuntu24") - gen_parser.add_argument("--output", default="toolchain_config.env") - - # download: Download to file - dl_parser = subparsers.add_parser("download", help="Download toolchain") - dl_parser.add_argument("--bucket", default="boxes.10gen.com") - dl_parser.add_argument("--prefix", default="build/toolchain/mongodbtoolchain-ubuntu24") - dl_parser.add_argument("--output", required=True, help="Output file path") - - args = parser.parse_args() - - if not args.command: - parser.print_help() - sys.exit(1) - - if args.command == "find-latest": - url, _, _ = find_latest(args.bucket, args.prefix) - print(url) - - elif args.command == "generate": - script_dir = os.path.dirname(os.path.abspath(__file__)) - output_path = ( - args.output if os.path.isabs(args.output) else os.path.join(script_dir, args.output) - ) - generate_config(args.bucket, args.prefix, output_path) - - elif args.command == "download": - url, _, _ = find_latest(args.bucket, args.prefix) - download_file(url, args.output) - - if __name__ == "__main__": main() diff --git a/evergreen/devcontainer_test.sh b/evergreen/devcontainer_test.sh index fa126d2ec9c..aa6ba06eeab 100755 --- a/evergreen/devcontainer_test.sh +++ b/evergreen/devcontainer_test.sh @@ -115,6 +115,10 @@ echo "" echo "Checking Git..." devcontainer_run git --version +echo "" +echo "Checking Evergreen CLI..." +devcontainer_run evergreen --version + echo "" echo "Checking clangd configuration..." if ! devcontainer_run test -f compile_commands.json; then