SERVER-107836 fix s3_binary downloader deps (#38807)

GitOrigin-RevId: 155043e7c1f0d8a09280a0fcb01c1c915fa34776
This commit is contained in:
Daniel Moody 2025-07-21 11:00:13 -05:00 committed by MongoDB Bot
parent 0999fa0d5f
commit 278dc570d7
9 changed files with 96 additions and 72 deletions

View File

@ -105,7 +105,7 @@ def install_modules(bazel):
with open(lockfile_hash_file, "w") as f:
f.write(current_hash)
deps = ["retry", "gitpython", "requests", "timeout-decorator"]
deps = ["retry", "gitpython", "requests", "timeout-decorator", "boto3"]
deps_installed = []
deps_needed = search_for_modules(
deps, deps_installed, lockfile_changed=old_hash != current_hash

View File

@ -10,11 +10,13 @@ import tarfile
import zipfile
from urllib.parse import parse_qs, urlparse
import requests
import structlog
from buildscripts.resmokelib.utils import archival
from buildscripts.resmokelib.utils.filesystem import build_hygienic_bin_path, mkdtemp_in_build_dir
from buildscripts.util.download_utils import (
download_from_s3_with_boto,
download_from_s3_with_requests,
)
S3_BUCKET = "mciuploads"
@ -35,35 +37,6 @@ def is_s3_presigned_url(url: str) -> bool:
return "X-Amz-Signature" in qs
def extract_s3_bucket_key(url: str) -> tuple[str, str]:
"""
Extracts the S3 bucket name and object key from an HTTP(s) S3 URL.
Supports both:
- https://bucket.s3.amazonaws.com/key/
- https://bucket.s3.<region>.amazonaws.com/key/
Returns:
(bucket, key)
"""
parsed = urlparse(url)
# Hostname labels, e.g. ["bucket","s3","us-east-1","amazonaws","com"]
bucket = parsed.hostname.split(".")[0]
key = parsed.path.lstrip("/")
return bucket, key
def download_from_s3_with_requests(url, output_file):
with requests.get(url, stream=True) as reader:
with open(output_file, "wb") as file_handle:
shutil.copyfileobj(reader.raw, file_handle)
def download_from_s3_with_boto(url, output_file):
bucket_name, object_key = extract_s3_bucket_key(url)
s3_client = archival.Archival._get_s3_client()
s3_client.download_file(bucket_name, object_key, output_file)
def download_from_s3(url):
"""Download file from S3 bucket by a given URL."""

View File

@ -36,5 +36,6 @@ py_library(
"botocore",
group = "aws",
),
"//buildscripts/util:download_utils",
],
)

View File

@ -12,6 +12,7 @@ import threading
import time
from buildscripts.resmokelib import config
from buildscripts.util.download_utils import get_s3_client
_IS_WINDOWS = sys.platform in ("win32", "cygwin")
@ -127,7 +128,7 @@ class Archival(object):
self._archive_file_worker.setDaemon(True)
self._archive_file_worker.start()
if not s3_client:
self.s3_client = self._get_s3_client()
self.s3_client = get_s3_client()
else:
self.s3_client = s3_client
@ -141,37 +142,6 @@ class Archival(object):
self._upload_worker.setDaemon(True)
self._upload_worker.start()
@staticmethod
def _get_s3_client():
# Since boto3 is a 3rd party module, we import locally.
import boto3
import botocore.session
botocore.session.Session()
if sys.platform in ("win32", "cygwin"):
# These overriden values can be found here
# https://github.com/boto/botocore/blob/13468bc9d8923eccd0816ce2dd9cd8de5a6f6e0e/botocore/configprovider.py#L49C7-L49C7
# This is due to the backwards breaking changed python introduced https://bugs.python.org/issue36264
botocore_session = botocore.session.Session(
session_vars={
"config_file": (
None,
"AWS_CONFIG_FILE",
os.path.join(os.environ["HOME"], ".aws", "config"),
None,
),
"credentials_file": (
None,
"AWS_SHARED_CREDENTIALS_FILE",
os.path.join(os.environ["HOME"], ".aws", "credentials"),
None,
),
}
)
boto3.setup_default_session(botocore_session=botocore_session)
return boto3.client("s3")
def archive_files_to_s3(self, display_name, input_files, s3_bucket, s3_path):
"""Archive 'input_files' to 's3_bucket' and 's3_path'.

View File

@ -24,7 +24,7 @@ py_library(
visibility = ["//visibility:public"],
deps = [
"hashes",
"//buildscripts/resmokelib",
"//buildscripts/util:download_utils",
],
)

View File

@ -9,12 +9,11 @@ import time
import traceback
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from buildscripts.resmokelib.setup_multiversion.download import (
from buildscripts.s3_binary.hashes import S3_SHA256_HASHES
from buildscripts.util.download_utils import (
download_from_s3_with_boto,
download_from_s3_with_requests,
)
from buildscripts.s3_binary.hashes import S3_SHA256_HASHES
def read_sha_file(filename):
@ -121,9 +120,8 @@ def download_s3_binary(
if os.path.exists(local_path):
try:
print(f"{local_path} exists, validating...")
print(f"Downloaded file {local_path} already exists, validating...")
validate_file(s3_path, local_path, remote_sha_allowed)
print(f"File is already valid: {local_path}")
return True
except Exception:
print("File is invalid, redownloading...")
@ -147,7 +145,6 @@ def download_s3_binary(
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Download and verify S3 binary.")
parser.add_argument("s3_path", help="S3 URL to download from")
parser.add_argument("local_path", nargs="?", help="Optional output file path")

View File

@ -8,6 +8,7 @@ import tempfile
import unittest
from buildscripts.resmokelib.utils import archival
from buildscripts.util.download_utils import get_s3_client
_BUCKET = "mongodatafiles"
@ -48,7 +49,7 @@ class ArchivalTestCase(unittest.TestCase):
if mock_client:
cls.s3_client = MockS3Client(cls.logger)
else:
cls.s3_client = archival.Archival._get_s3_client()
cls.s3_client = get_s3_client()
cls.archive = cls.create_archival()
@classmethod

View File

@ -39,3 +39,21 @@ py_library(
),
],
)
py_library(
name = "download_utils",
srcs = [
"download_utils.py",
],
visibility = ["//visibility:public"],
deps = [
dependency(
"boto3",
group = "aws",
),
dependency(
"requests",
group = "core",
),
],
)

View File

@ -0,0 +1,64 @@
import os
import shutil
import sys
from urllib.parse import urlparse
import boto3
import botocore.session
import requests
def get_s3_client():
botocore.session.Session()
if sys.platform in ("win32", "cygwin"):
# These overriden values can be found here
# https://github.com/boto/botocore/blob/13468bc9d8923eccd0816ce2dd9cd8de5a6f6e0e/botocore/configprovider.py#L49C7-L49C7
# This is due to the backwards breaking changed python introduced https://bugs.python.org/issue36264
botocore_session = botocore.session.Session(
session_vars={
"config_file": (
None,
"AWS_CONFIG_FILE",
os.path.join(os.environ["HOME"], ".aws", "config"),
None,
),
"credentials_file": (
None,
"AWS_SHARED_CREDENTIALS_FILE",
os.path.join(os.environ["HOME"], ".aws", "credentials"),
None,
),
}
)
boto3.setup_default_session(botocore_session=botocore_session)
return boto3.client("s3")
def extract_s3_bucket_key(url: str) -> tuple[str, str]:
"""
Extracts the S3 bucket name and object key from an HTTP(s) S3 URL.
Supports both:
- https://bucket.s3.amazonaws.com/key/
- https://bucket.s3.<region>.amazonaws.com/key/
Returns:
(bucket, key)
"""
parsed = urlparse(url)
# Hostname labels, e.g. ["bucket","s3","us-east-1","amazonaws","com"]
bucket = parsed.hostname.split(".")[0]
key = parsed.path.lstrip("/")
return bucket, key
def download_from_s3_with_requests(url, output_file):
with requests.get(url, stream=True) as reader:
with open(output_file, "wb") as file_handle:
shutil.copyfileobj(reader.raw, file_handle)
def download_from_s3_with_boto(url, output_file):
bucket_name, object_key = extract_s3_bucket_key(url)
s3_client = get_s3_client()
s3_client.download_file(bucket_name, object_key, output_file)