SERVER-107613 add sha verification to dist_test (#38613)

Co-authored-by: Zack Winter <3457246+zackwintermdb@users.noreply.github.com>
GitOrigin-RevId: 5afdf036fb2e1009d1255e507af97b4e99dc3f5e
This commit is contained in:
Daniel Moody 2025-07-18 13:45:40 -05:00 committed by MongoDB Bot
parent fe5382fb8b
commit f767ff9d38
10 changed files with 285 additions and 14 deletions

3
.github/CODEOWNERS vendored
View File

@ -213,6 +213,9 @@ WORKSPACE.bazel @10gen/devprod-build @svc-auto-approve-bot
# The following patterns are parsed from ./buildscripts/resmokelib/testing/testcases/OWNERS.yml # The following patterns are parsed from ./buildscripts/resmokelib/testing/testcases/OWNERS.yml
/buildscripts/resmokelib/testing/testcases/**/query_tester_server_test.py @10gen/query-optimization @svc-auto-approve-bot /buildscripts/resmokelib/testing/testcases/**/query_tester_server_test.py @10gen/query-optimization @svc-auto-approve-bot
# The following patterns are parsed from ./buildscripts/s3_binary/OWNERS.yml
/buildscripts/s3_binary/ @10gen/devprod-build @svc-auto-approve-bot
# The following patterns are parsed from ./buildscripts/smoke_tests/OWNERS.yml # The following patterns are parsed from ./buildscripts/smoke_tests/OWNERS.yml
/buildscripts/smoke_tests/**/server_programmability.yml @10gen/server-programmability @svc-auto-approve-bot /buildscripts/smoke_tests/**/server_programmability.yml @10gen/server-programmability @svc-auto-approve-bot
/buildscripts/smoke_tests/**/catalog_and_routing.yml @10gen/server-catalog-and-routing @svc-auto-approve-bot /buildscripts/smoke_tests/**/catalog_and_routing.yml @10gen/server-catalog-and-routing @svc-auto-approve-bot

View File

@ -24,5 +24,12 @@ py_library(
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
deps = [ deps = [
"hashes", "hashes",
"//buildscripts/resmokelib",
], ],
) )
py_library(
name = "sha256sum",
srcs = ["sha256sum.py"],
visibility = ["//visibility:public"],
)

View File

@ -0,0 +1,5 @@
version: 2.0.0
filters:
- "*":
approvers:
- 10gen/devprod-build

View File

@ -1,15 +1,52 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import argparse
import hashlib import hashlib
import os import os
import shutil import sys
import tempfile import tempfile
import time import time
import urllib.request import traceback
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from buildscripts.resmokelib.setup_multiversion.download import (
download_from_s3_with_boto,
download_from_s3_with_requests,
)
from buildscripts.s3_binary.hashes import S3_SHA256_HASHES from buildscripts.s3_binary.hashes import S3_SHA256_HASHES
def read_sha_file(filename):
with open(filename) as f:
content = f.read()
return content.strip().split()[0]
def _fetch_remote_sha256_hash(s3_path: str):
downloaded = False
result = None
tempfile_name = None
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
tempfile_name = temp_file.name
try:
download_from_s3_with_boto(s3_path + ".sha256", temp_file.name)
downloaded = True
except Exception:
try:
download_from_s3_with_requests(s3_path + ".sha256", temp_file.name)
downloaded = True
except Exception:
pass
if downloaded:
result = read_sha_file(tempfile_name)
if tempfile_name and os.path.exists(tempfile_name):
os.unlink(tempfile_name)
return result
def _sha256_file(filename: str) -> str: def _sha256_file(filename: str) -> str:
sha256_hash = hashlib.sha256() sha256_hash = hashlib.sha256()
with open(filename, "rb") as f: with open(filename, "rb") as f:
@ -24,14 +61,49 @@ def _verify_s3_hash(s3_path: str, local_path: str, expected_hash: str) -> None:
raise ValueError( raise ValueError(
f"Hash mismatch for {s3_path}, expected {expected_hash} but got {hash_string}" f"Hash mismatch for {s3_path}, expected {expected_hash} but got {hash_string}"
) )
print(f"File is valid: {local_path} (sha256: {expected_hash})")
def validate_file(s3_path, output_path, remote_sha_allowed):
hexdigest = S3_SHA256_HASHES.get(s3_path)
if hexdigest:
print(f"Validating against hard coded sha256: {hexdigest}")
_verify_s3_hash(s3_path, output_path, hexdigest)
return True
if not remote_sha_allowed:
raise ValueError(f"No SHA256 hash available for {s3_path}")
def _download_path_with_retry(*args, **kwargs): if os.path.exists(output_path + ".sha256"):
hexdigest = read_sha_file(output_path + ".sha256")
print(f"Validating against sh256 file {hexdigest}\n{output_path}.sha256")
else:
hexdigest = _fetch_remote_sha256_hash(s3_path)
if hexdigest:
print(f"Validating against remote sha256 {hexdigest}\n({s3_path}.sha256)")
else:
print(f"Failed to download remote sha256 at {s3_path}.sha256)")
if hexdigest:
_verify_s3_hash(s3_path, output_path, hexdigest)
return True
else:
raise ValueError(f"No SHA256 hash available for {s3_path}")
def _download_and_verify(s3_path, output_path, remote_sha_allowed):
for i in range(5): for i in range(5):
try: try:
return urllib.request.urlretrieve(*args, **kwargs) print(f"Downloading {s3_path}...")
except Exception as e: try:
print(f"Download failed: {e}") download_from_s3_with_boto(s3_path, output_path)
except Exception:
download_from_s3_with_requests(s3_path, output_path)
validate_file(s3_path, output_path, remote_sha_allowed)
except Exception:
print("Download failed:")
traceback.print_exc()
if i == 4: if i == 4:
raise raise
print("Retrying download...") print("Retrying download...")
@ -42,14 +114,46 @@ def _download_path_with_retry(*args, **kwargs):
def download_s3_binary( def download_s3_binary(
s3_path: str, s3_path: str,
local_path: str = None, local_path: str = None,
) -> None: remote_sha_allowed=False,
) -> bool:
if local_path is None: if local_path is None:
local_path = s3_path.split("/")[-1] local_path = s3_path.split("/")[-1]
if os.path.exists(local_path):
try:
print(f"{local_path} exists, validating...")
validate_file(s3_path, local_path, remote_sha_allowed)
print(f"File is already valid: {local_path}")
return True
except Exception:
print("File is invalid, redownloading...")
tempfile_name = None tempfile_name = None
with tempfile.NamedTemporaryFile(delete=False) as temp_file: try:
tempfile_name = temp_file.name with tempfile.NamedTemporaryFile(delete=False) as temp_file:
_download_path_with_retry(s3_path, temp_file.name) tempfile_name = temp_file.name
_verify_s3_hash(s3_path, temp_file.name, S3_SHA256_HASHES[s3_path]) _download_and_verify(s3_path, tempfile_name, remote_sha_allowed)
shutil.copy(temp_file.name, local_path) os.replace(tempfile_name, local_path)
if tempfile_name and os.path.exists(tempfile_name): print(f"Downloaded and verified {s3_path} -> {local_path}")
os.unlink(tempfile_name) return True
except Exception as e:
print(f"Download failed for {s3_path}: {e}")
traceback.print_exc()
return False
finally:
if tempfile_name and os.path.exists(tempfile_name):
os.unlink(tempfile_name)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Download and verify S3 binary.")
parser.add_argument("s3_path", help="S3 URL to download from")
parser.add_argument("local_path", nargs="?", help="Optional output file path")
parser.add_argument("--remote-sha", action="store_true", help="Allow remote .sha256 lookup")
args = parser.parse_args()
if not download_s3_binary(args.s3_path, args.local_path, args.remote_sha):
sys.exit(1)

View File

@ -0,0 +1,36 @@
#!/usr/bin/env python3
import hashlib
import os
import sys
def compute_sha256(file_path: str) -> str:
sha256 = hashlib.sha256()
with open(file_path, "rb") as f:
for block in iter(lambda: f.read(4096), b""):
sha256.update(block)
return sha256.hexdigest()
def write_sha256_file(file_path: str, hash_value: str):
sha256_path = file_path + ".sha256"
file_name = os.path.basename(file_path)
with open(sha256_path, "w") as f:
f.write(f"{hash_value} {file_name}\n")
print(f"Wrote SHA-256 to {sha256_path}")
def main():
if len(sys.argv) != 2:
print("Usage: sha256sum.py <file>")
sys.exit(1)
file_path = sys.argv[1]
if not os.path.isfile(file_path):
print(f"Error: '{file_path}' is not a valid file.")
sys.exit(1)
hash_value = compute_sha256(file_path)
write_sha256_file(file_path, hash_value)
if __name__ == "__main__":
main()

View File

@ -287,6 +287,18 @@ functions:
bucket: mciuploads bucket: mciuploads
local_file: src/mongo-binaries.tgz local_file: src/mongo-binaries.tgz
"verify binaries sha": &verify_binaries_sha
command: subprocess.exec
params:
binary: bash
add_expansions_to_env: true
args:
- "src/evergreen/run_python_script.sh"
- "buildscripts/s3_binary/download.py"
- "https://mciuploads.s3.amazonaws.com/${mongo_binaries}"
- "mongo-binaries.tgz"
- "--remote-sha"
"fetch jstestshell": &fetch_jstestshell "fetch jstestshell": &fetch_jstestshell
command: s3.get command: s3.get
display_name: "fetch jstestshell" display_name: "fetch jstestshell"
@ -298,6 +310,17 @@ functions:
local_file: src/mongodb-jstestshell.tgz local_file: src/mongodb-jstestshell.tgz
optional: true optional: true
"verify jstestshell sha": &verify_jstestshell_sha
command: subprocess.exec
params:
binary: bash
add_expansions_to_env: true
args:
- "src/evergreen/run_python_script.sh"
- "evergreen/jstestshell_sha_check.py"
- "https://mciuploads.s3.amazonaws.com/${mongo_jstestshell}"
- "mongodb-jstestshell.tgz"
"write mongo binary URL to downstream_expansions.yml": &echo_mongo_binary_url "write mongo binary URL to downstream_expansions.yml": &echo_mongo_binary_url
command: subprocess.exec command: subprocess.exec
params: params:
@ -761,7 +784,9 @@ functions:
- *fetch_venv - *fetch_venv
- *adjust_venv - *adjust_venv
- *fetch_binaries - *fetch_binaries
- *verify_binaries_sha
- *fetch_jstestshell - *fetch_jstestshell
- *verify_jstestshell_sha
- *extract_binaries - *extract_binaries
- *extract_jstestshell - *extract_jstestshell
- *get_buildnumber - *get_buildnumber

View File

@ -623,6 +623,15 @@ tasks:
--linkstatic=True --linkstatic=True
--dbg=True --dbg=True
--opt=on --opt=on
- func: "f_expansions_write"
- command: subprocess.exec
params:
binary: bash
add_expansions_to_env: true
args:
- "src/evergreen/run_python_script.sh"
- "buildscripts/s3_binary/sha256sum.py"
- "bazel-bin/mongo-stripped.${ext|tgz}"
- command: s3.put - command: s3.put
params: params:
aws_key: ${aws_key} aws_key: ${aws_key}
@ -633,6 +642,16 @@ tasks:
permissions: public-read permissions: public-read
content_type: ${content_type|application/gzip} content_type: ${content_type|application/gzip}
display_name: Jstestshell display_name: Jstestshell
- command: s3.put
params:
aws_key: ${aws_key}
aws_secret: ${aws_secret}
local_file: src/bazel-bin/mongo-stripped.${ext|tgz}.sha256
remote_file: ${mongo_jstestshell}.sha256
bucket: mciuploads
permissions: public-read
content_type: text/plain
display_name: Jstestshell SHA256
- name: archive_jstestshell_debug - name: archive_jstestshell_debug
tags: ["assigned_to_jira_team_devprod_build", "auxiliary"] tags: ["assigned_to_jira_team_devprod_build", "auxiliary"]

View File

@ -139,6 +139,14 @@ tasks:
- "bazel-bin/dist-test-stripped.${ext|tgz}" - "bazel-bin/dist-test-stripped.${ext|tgz}"
- func: "BOLT" - func: "BOLT"
- command: subprocess.exec
params:
binary: bash
add_expansions_to_env: true
args:
- "src/evergreen/run_python_script.sh"
- "buildscripts/s3_binary/sha256sum.py"
- "bazel-bin/dist-test-stripped.${ext|tgz}"
- command: s3.put - command: s3.put
params: params:
optional: true optional: true
@ -151,6 +159,17 @@ tasks:
content_type: application/gzip content_type: application/gzip
# Sys-perf relies on this display name, please reach out before changing it. # Sys-perf relies on this display name, please reach out before changing it.
display_name: Binaries display_name: Binaries
- command: s3.put
params:
optional: true
aws_key: ${aws_key}
aws_secret: ${aws_secret}
local_file: src/bazel-bin/dist-test-stripped.${ext|tgz}.sha256
remote_file: ${mongo_binaries}.sha256
bucket: mciuploads
permissions: public-read
content_type: text/plain
display_name: Binaries SHA256
- func: "f_expansions_write" - func: "f_expansions_write"
- func: "gen feature flags" - func: "gen feature flags"
@ -433,6 +452,14 @@ tasks:
permissions: public-read permissions: public-read
content_type: application/tar content_type: application/tar
display_name: Dist Debugsymbols display_name: Dist Debugsymbols
- command: subprocess.exec
params:
binary: bash
add_expansions_to_env: true
args:
- "src/evergreen/run_python_script.sh"
- "buildscripts/s3_binary/sha256sum.py"
- "bazel-bin/mongo-stripped.${ext|tgz}"
- command: s3.put - command: s3.put
params: params:
aws_key: ${aws_key} aws_key: ${aws_key}
@ -443,6 +470,16 @@ tasks:
permissions: public-read permissions: public-read
content_type: ${content_type|application/gzip} content_type: ${content_type|application/gzip}
display_name: Jstestshell display_name: Jstestshell
- command: s3.put
params:
aws_key: ${aws_key}
aws_secret: ${aws_secret}
local_file: src/bazel-bin/mongo-stripped.${ext|tgz}.sha256
remote_file: ${mongo_jstestshell}.sha256
bucket: mciuploads
permissions: public-read
content_type: text/plain
display_name: Jstestshell SHA256
- command: s3.put - command: s3.put
params: params:
aws_key: ${aws_key} aws_key: ${aws_key}

View File

@ -563,6 +563,15 @@ py_binary(
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
) )
py_library(
name = "jstestshell_sha_check",
srcs = ["jstestshell_sha_check.py"],
visibility = ["//visibility:public"],
deps = [
"//buildscripts/s3_binary:download",
],
)
# TODO(SERVER-105817): The following library is autogenerated, please split these out into individual python targets # TODO(SERVER-105817): The following library is autogenerated, please split these out into individual python targets
py_library( py_library(
name = "all_python_files", name = "all_python_files",

View File

@ -0,0 +1,26 @@
import argparse
import sys
import requests
from buildscripts.s3_binary.download import download_s3_binary
def url_exists(url, timeout=5):
try:
response = requests.head(url, allow_redirects=True, timeout=timeout)
return response.status_code == 200
except requests.RequestException:
return False
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Download and verify S3 binary.")
parser.add_argument("s3_path", help="S3 URL to download from")
parser.add_argument("local_path", nargs="?", help="Optional output file path")
args = parser.parse_args()
if url_exists(args.s3_path):
if not download_s3_binary(args.s3_path, args.local_path, True):
sys.exit(1)