From f767ff9d383a30aa5f3ff694da9a83bbe005a46b Mon Sep 17 00:00:00 2001 From: Daniel Moody Date: Fri, 18 Jul 2025 13:45:40 -0500 Subject: [PATCH] SERVER-107613 add sha verification to dist_test (#38613) Co-authored-by: Zack Winter <3457246+zackwintermdb@users.noreply.github.com> GitOrigin-RevId: 5afdf036fb2e1009d1255e507af97b4e99dc3f5e --- .github/CODEOWNERS | 3 + buildscripts/s3_binary/BUILD.bazel | 7 + buildscripts/s3_binary/OWNERS.yml | 5 + buildscripts/s3_binary/download.py | 132 ++++++++++++++++-- buildscripts/s3_binary/sha256sum.py | 36 +++++ etc/evergreen_yml_components/definitions.yml | 25 ++++ .../tasks/compile_tasks.yml | 19 +++ .../tasks/compile_tasks_shared.yml | 37 +++++ evergreen/BUILD.bazel | 9 ++ evergreen/jstestshell_sha_check.py | 26 ++++ 10 files changed, 285 insertions(+), 14 deletions(-) create mode 100644 buildscripts/s3_binary/OWNERS.yml create mode 100644 buildscripts/s3_binary/sha256sum.py create mode 100644 evergreen/jstestshell_sha_check.py diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 1db787a33a7..6419115d80e 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -213,6 +213,9 @@ WORKSPACE.bazel @10gen/devprod-build @svc-auto-approve-bot # The following patterns are parsed from ./buildscripts/resmokelib/testing/testcases/OWNERS.yml /buildscripts/resmokelib/testing/testcases/**/query_tester_server_test.py @10gen/query-optimization @svc-auto-approve-bot +# The following patterns are parsed from ./buildscripts/s3_binary/OWNERS.yml +/buildscripts/s3_binary/ @10gen/devprod-build @svc-auto-approve-bot + # The following patterns are parsed from ./buildscripts/smoke_tests/OWNERS.yml /buildscripts/smoke_tests/**/server_programmability.yml @10gen/server-programmability @svc-auto-approve-bot /buildscripts/smoke_tests/**/catalog_and_routing.yml @10gen/server-catalog-and-routing @svc-auto-approve-bot diff --git a/buildscripts/s3_binary/BUILD.bazel b/buildscripts/s3_binary/BUILD.bazel index de6afa308b4..bfce16dbe12 100644 --- a/buildscripts/s3_binary/BUILD.bazel +++ b/buildscripts/s3_binary/BUILD.bazel @@ -24,5 +24,12 @@ py_library( visibility = ["//visibility:public"], deps = [ "hashes", + "//buildscripts/resmokelib", ], ) + +py_library( + name = "sha256sum", + srcs = ["sha256sum.py"], + visibility = ["//visibility:public"], +) diff --git a/buildscripts/s3_binary/OWNERS.yml b/buildscripts/s3_binary/OWNERS.yml new file mode 100644 index 00000000000..3569b0a551d --- /dev/null +++ b/buildscripts/s3_binary/OWNERS.yml @@ -0,0 +1,5 @@ +version: 2.0.0 +filters: + - "*": + approvers: + - 10gen/devprod-build diff --git a/buildscripts/s3_binary/download.py b/buildscripts/s3_binary/download.py index b0f9603e87a..d980f086278 100644 --- a/buildscripts/s3_binary/download.py +++ b/buildscripts/s3_binary/download.py @@ -1,15 +1,52 @@ #!/usr/bin/env python3 +import argparse import hashlib import os -import shutil +import sys import tempfile import time -import urllib.request +import traceback +sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + +from buildscripts.resmokelib.setup_multiversion.download import ( + download_from_s3_with_boto, + download_from_s3_with_requests, +) from buildscripts.s3_binary.hashes import S3_SHA256_HASHES +def read_sha_file(filename): + with open(filename) as f: + content = f.read() + return content.strip().split()[0] + +def _fetch_remote_sha256_hash(s3_path: str): + downloaded = False + result = None + tempfile_name = None + with tempfile.NamedTemporaryFile(delete=False) as temp_file: + tempfile_name = temp_file.name + try: + download_from_s3_with_boto(s3_path + ".sha256", temp_file.name) + downloaded = True + except Exception: + try: + download_from_s3_with_requests(s3_path + ".sha256", temp_file.name) + downloaded = True + except Exception: + pass + + if downloaded: + result = read_sha_file(tempfile_name) + + if tempfile_name and os.path.exists(tempfile_name): + os.unlink(tempfile_name) + + return result + + def _sha256_file(filename: str) -> str: sha256_hash = hashlib.sha256() with open(filename, "rb") as f: @@ -24,14 +61,49 @@ def _verify_s3_hash(s3_path: str, local_path: str, expected_hash: str) -> None: raise ValueError( f"Hash mismatch for {s3_path}, expected {expected_hash} but got {hash_string}" ) + print(f"File is valid: {local_path} (sha256: {expected_hash})") +def validate_file(s3_path, output_path, remote_sha_allowed): + hexdigest = S3_SHA256_HASHES.get(s3_path) + if hexdigest: + print(f"Validating against hard coded sha256: {hexdigest}") + _verify_s3_hash(s3_path, output_path, hexdigest) + return True + + if not remote_sha_allowed: + raise ValueError(f"No SHA256 hash available for {s3_path}") -def _download_path_with_retry(*args, **kwargs): + if os.path.exists(output_path + ".sha256"): + hexdigest = read_sha_file(output_path + ".sha256") + print(f"Validating against sh256 file {hexdigest}\n{output_path}.sha256") + else: + hexdigest = _fetch_remote_sha256_hash(s3_path) + if hexdigest: + print(f"Validating against remote sha256 {hexdigest}\n({s3_path}.sha256)") + else: + print(f"Failed to download remote sha256 at {s3_path}.sha256)") + + if hexdigest: + _verify_s3_hash(s3_path, output_path, hexdigest) + return True + else: + raise ValueError(f"No SHA256 hash available for {s3_path}") + + +def _download_and_verify(s3_path, output_path, remote_sha_allowed): for i in range(5): try: - return urllib.request.urlretrieve(*args, **kwargs) - except Exception as e: - print(f"Download failed: {e}") + print(f"Downloading {s3_path}...") + try: + download_from_s3_with_boto(s3_path, output_path) + except Exception: + download_from_s3_with_requests(s3_path, output_path) + + validate_file(s3_path, output_path, remote_sha_allowed) + + except Exception: + print("Download failed:") + traceback.print_exc() if i == 4: raise print("Retrying download...") @@ -42,14 +114,46 @@ def _download_path_with_retry(*args, **kwargs): def download_s3_binary( s3_path: str, local_path: str = None, -) -> None: + remote_sha_allowed=False, +) -> bool: if local_path is None: local_path = s3_path.split("/")[-1] + + if os.path.exists(local_path): + try: + print(f"{local_path} exists, validating...") + validate_file(s3_path, local_path, remote_sha_allowed) + print(f"File is already valid: {local_path}") + return True + except Exception: + print("File is invalid, redownloading...") + tempfile_name = None - with tempfile.NamedTemporaryFile(delete=False) as temp_file: - tempfile_name = temp_file.name - _download_path_with_retry(s3_path, temp_file.name) - _verify_s3_hash(s3_path, temp_file.name, S3_SHA256_HASHES[s3_path]) - shutil.copy(temp_file.name, local_path) - if tempfile_name and os.path.exists(tempfile_name): - os.unlink(tempfile_name) + try: + with tempfile.NamedTemporaryFile(delete=False) as temp_file: + tempfile_name = temp_file.name + _download_and_verify(s3_path, tempfile_name, remote_sha_allowed) + os.replace(tempfile_name, local_path) + print(f"Downloaded and verified {s3_path} -> {local_path}") + return True + except Exception as e: + print(f"Download failed for {s3_path}: {e}") + traceback.print_exc() + return False + finally: + if tempfile_name and os.path.exists(tempfile_name): + os.unlink(tempfile_name) + + +if __name__ == "__main__": + + + parser = argparse.ArgumentParser(description="Download and verify S3 binary.") + parser.add_argument("s3_path", help="S3 URL to download from") + parser.add_argument("local_path", nargs="?", help="Optional output file path") + parser.add_argument("--remote-sha", action="store_true", help="Allow remote .sha256 lookup") + + args = parser.parse_args() + + if not download_s3_binary(args.s3_path, args.local_path, args.remote_sha): + sys.exit(1) diff --git a/buildscripts/s3_binary/sha256sum.py b/buildscripts/s3_binary/sha256sum.py new file mode 100644 index 00000000000..fbfdebcd46d --- /dev/null +++ b/buildscripts/s3_binary/sha256sum.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 + +import hashlib +import os +import sys + + +def compute_sha256(file_path: str) -> str: + sha256 = hashlib.sha256() + with open(file_path, "rb") as f: + for block in iter(lambda: f.read(4096), b""): + sha256.update(block) + return sha256.hexdigest() + +def write_sha256_file(file_path: str, hash_value: str): + sha256_path = file_path + ".sha256" + file_name = os.path.basename(file_path) + with open(sha256_path, "w") as f: + f.write(f"{hash_value} {file_name}\n") + print(f"Wrote SHA-256 to {sha256_path}") + +def main(): + if len(sys.argv) != 2: + print("Usage: sha256sum.py ") + sys.exit(1) + + file_path = sys.argv[1] + if not os.path.isfile(file_path): + print(f"Error: '{file_path}' is not a valid file.") + sys.exit(1) + + hash_value = compute_sha256(file_path) + write_sha256_file(file_path, hash_value) + +if __name__ == "__main__": + main() diff --git a/etc/evergreen_yml_components/definitions.yml b/etc/evergreen_yml_components/definitions.yml index 46051bc289c..c789f4b7e55 100644 --- a/etc/evergreen_yml_components/definitions.yml +++ b/etc/evergreen_yml_components/definitions.yml @@ -287,6 +287,18 @@ functions: bucket: mciuploads local_file: src/mongo-binaries.tgz + "verify binaries sha": &verify_binaries_sha + command: subprocess.exec + params: + binary: bash + add_expansions_to_env: true + args: + - "src/evergreen/run_python_script.sh" + - "buildscripts/s3_binary/download.py" + - "https://mciuploads.s3.amazonaws.com/${mongo_binaries}" + - "mongo-binaries.tgz" + - "--remote-sha" + "fetch jstestshell": &fetch_jstestshell command: s3.get display_name: "fetch jstestshell" @@ -298,6 +310,17 @@ functions: local_file: src/mongodb-jstestshell.tgz optional: true + "verify jstestshell sha": &verify_jstestshell_sha + command: subprocess.exec + params: + binary: bash + add_expansions_to_env: true + args: + - "src/evergreen/run_python_script.sh" + - "evergreen/jstestshell_sha_check.py" + - "https://mciuploads.s3.amazonaws.com/${mongo_jstestshell}" + - "mongodb-jstestshell.tgz" + "write mongo binary URL to downstream_expansions.yml": &echo_mongo_binary_url command: subprocess.exec params: @@ -761,7 +784,9 @@ functions: - *fetch_venv - *adjust_venv - *fetch_binaries + - *verify_binaries_sha - *fetch_jstestshell + - *verify_jstestshell_sha - *extract_binaries - *extract_jstestshell - *get_buildnumber diff --git a/etc/evergreen_yml_components/tasks/compile_tasks.yml b/etc/evergreen_yml_components/tasks/compile_tasks.yml index 181973ef671..e8ea7e4b2ab 100644 --- a/etc/evergreen_yml_components/tasks/compile_tasks.yml +++ b/etc/evergreen_yml_components/tasks/compile_tasks.yml @@ -623,6 +623,15 @@ tasks: --linkstatic=True --dbg=True --opt=on + - func: "f_expansions_write" + - command: subprocess.exec + params: + binary: bash + add_expansions_to_env: true + args: + - "src/evergreen/run_python_script.sh" + - "buildscripts/s3_binary/sha256sum.py" + - "bazel-bin/mongo-stripped.${ext|tgz}" - command: s3.put params: aws_key: ${aws_key} @@ -633,6 +642,16 @@ tasks: permissions: public-read content_type: ${content_type|application/gzip} display_name: Jstestshell + - command: s3.put + params: + aws_key: ${aws_key} + aws_secret: ${aws_secret} + local_file: src/bazel-bin/mongo-stripped.${ext|tgz}.sha256 + remote_file: ${mongo_jstestshell}.sha256 + bucket: mciuploads + permissions: public-read + content_type: text/plain + display_name: Jstestshell SHA256 - name: archive_jstestshell_debug tags: ["assigned_to_jira_team_devprod_build", "auxiliary"] diff --git a/etc/evergreen_yml_components/tasks/compile_tasks_shared.yml b/etc/evergreen_yml_components/tasks/compile_tasks_shared.yml index 2a8ba2860dd..c4ae71d0385 100644 --- a/etc/evergreen_yml_components/tasks/compile_tasks_shared.yml +++ b/etc/evergreen_yml_components/tasks/compile_tasks_shared.yml @@ -139,6 +139,14 @@ tasks: - "bazel-bin/dist-test-stripped.${ext|tgz}" - func: "BOLT" + - command: subprocess.exec + params: + binary: bash + add_expansions_to_env: true + args: + - "src/evergreen/run_python_script.sh" + - "buildscripts/s3_binary/sha256sum.py" + - "bazel-bin/dist-test-stripped.${ext|tgz}" - command: s3.put params: optional: true @@ -151,6 +159,17 @@ tasks: content_type: application/gzip # Sys-perf relies on this display name, please reach out before changing it. display_name: Binaries + - command: s3.put + params: + optional: true + aws_key: ${aws_key} + aws_secret: ${aws_secret} + local_file: src/bazel-bin/dist-test-stripped.${ext|tgz}.sha256 + remote_file: ${mongo_binaries}.sha256 + bucket: mciuploads + permissions: public-read + content_type: text/plain + display_name: Binaries SHA256 - func: "f_expansions_write" - func: "gen feature flags" @@ -433,6 +452,14 @@ tasks: permissions: public-read content_type: application/tar display_name: Dist Debugsymbols + - command: subprocess.exec + params: + binary: bash + add_expansions_to_env: true + args: + - "src/evergreen/run_python_script.sh" + - "buildscripts/s3_binary/sha256sum.py" + - "bazel-bin/mongo-stripped.${ext|tgz}" - command: s3.put params: aws_key: ${aws_key} @@ -443,6 +470,16 @@ tasks: permissions: public-read content_type: ${content_type|application/gzip} display_name: Jstestshell + - command: s3.put + params: + aws_key: ${aws_key} + aws_secret: ${aws_secret} + local_file: src/bazel-bin/mongo-stripped.${ext|tgz}.sha256 + remote_file: ${mongo_jstestshell}.sha256 + bucket: mciuploads + permissions: public-read + content_type: text/plain + display_name: Jstestshell SHA256 - command: s3.put params: aws_key: ${aws_key} diff --git a/evergreen/BUILD.bazel b/evergreen/BUILD.bazel index b8bff72ee06..18fc751e289 100644 --- a/evergreen/BUILD.bazel +++ b/evergreen/BUILD.bazel @@ -563,6 +563,15 @@ py_binary( visibility = ["//visibility:public"], ) +py_library( + name = "jstestshell_sha_check", + srcs = ["jstestshell_sha_check.py"], + visibility = ["//visibility:public"], + deps = [ + "//buildscripts/s3_binary:download", + ], +) + # TODO(SERVER-105817): The following library is autogenerated, please split these out into individual python targets py_library( name = "all_python_files", diff --git a/evergreen/jstestshell_sha_check.py b/evergreen/jstestshell_sha_check.py new file mode 100644 index 00000000000..247c0ad87b8 --- /dev/null +++ b/evergreen/jstestshell_sha_check.py @@ -0,0 +1,26 @@ +import argparse +import sys + +import requests + +from buildscripts.s3_binary.download import download_s3_binary + + +def url_exists(url, timeout=5): + try: + response = requests.head(url, allow_redirects=True, timeout=timeout) + return response.status_code == 200 + except requests.RequestException: + return False + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description="Download and verify S3 binary.") + parser.add_argument("s3_path", help="S3 URL to download from") + parser.add_argument("local_path", nargs="?", help="Optional output file path") + + args = parser.parse_args() + + if url_exists(args.s3_path): + if not download_s3_binary(args.s3_path, args.local_path, True): + sys.exit(1) \ No newline at end of file