diff --git a/bazel/wrapper_hook/install_modules.py b/bazel/wrapper_hook/install_modules.py index 6d2c8ca2c50..d2a3d1b02e7 100644 --- a/bazel/wrapper_hook/install_modules.py +++ b/bazel/wrapper_hook/install_modules.py @@ -105,7 +105,7 @@ def install_modules(bazel): with open(lockfile_hash_file, "w") as f: f.write(current_hash) - deps = ["retry", "gitpython", "requests", "timeout-decorator"] + deps = ["retry", "gitpython", "requests", "timeout-decorator", "boto3"] deps_installed = [] deps_needed = search_for_modules( deps, deps_installed, lockfile_changed=old_hash != current_hash diff --git a/buildscripts/resmokelib/setup_multiversion/download.py b/buildscripts/resmokelib/setup_multiversion/download.py index 8d7009b81e8..d93b6270fde 100644 --- a/buildscripts/resmokelib/setup_multiversion/download.py +++ b/buildscripts/resmokelib/setup_multiversion/download.py @@ -10,11 +10,13 @@ import tarfile import zipfile from urllib.parse import parse_qs, urlparse -import requests import structlog -from buildscripts.resmokelib.utils import archival from buildscripts.resmokelib.utils.filesystem import build_hygienic_bin_path, mkdtemp_in_build_dir +from buildscripts.util.download_utils import ( + download_from_s3_with_boto, + download_from_s3_with_requests, +) S3_BUCKET = "mciuploads" @@ -35,35 +37,6 @@ def is_s3_presigned_url(url: str) -> bool: return "X-Amz-Signature" in qs -def extract_s3_bucket_key(url: str) -> tuple[str, str]: - """ - Extracts the S3 bucket name and object key from an HTTP(s) S3 URL. - - Supports both: - - https://bucket.s3.amazonaws.com/key/… - - https://bucket.s3..amazonaws.com/key/… - - Returns: - (bucket, key) - """ - parsed = urlparse(url) - # Hostname labels, e.g. ["bucket","s3","us-east-1","amazonaws","com"] - bucket = parsed.hostname.split(".")[0] - key = parsed.path.lstrip("/") - return bucket, key - - -def download_from_s3_with_requests(url, output_file): - with requests.get(url, stream=True) as reader: - with open(output_file, "wb") as file_handle: - shutil.copyfileobj(reader.raw, file_handle) - - -def download_from_s3_with_boto(url, output_file): - bucket_name, object_key = extract_s3_bucket_key(url) - s3_client = archival.Archival._get_s3_client() - s3_client.download_file(bucket_name, object_key, output_file) - def download_from_s3(url): """Download file from S3 bucket by a given URL.""" diff --git a/buildscripts/resmokelib/utils/BUILD.bazel b/buildscripts/resmokelib/utils/BUILD.bazel index cf6ea37b874..1c1bf5f579e 100644 --- a/buildscripts/resmokelib/utils/BUILD.bazel +++ b/buildscripts/resmokelib/utils/BUILD.bazel @@ -36,5 +36,6 @@ py_library( "botocore", group = "aws", ), + "//buildscripts/util:download_utils", ], ) diff --git a/buildscripts/resmokelib/utils/archival.py b/buildscripts/resmokelib/utils/archival.py index 7a0e630c1b3..3d50bf55e47 100644 --- a/buildscripts/resmokelib/utils/archival.py +++ b/buildscripts/resmokelib/utils/archival.py @@ -12,6 +12,7 @@ import threading import time from buildscripts.resmokelib import config +from buildscripts.util.download_utils import get_s3_client _IS_WINDOWS = sys.platform in ("win32", "cygwin") @@ -127,7 +128,7 @@ class Archival(object): self._archive_file_worker.setDaemon(True) self._archive_file_worker.start() if not s3_client: - self.s3_client = self._get_s3_client() + self.s3_client = get_s3_client() else: self.s3_client = s3_client @@ -141,37 +142,6 @@ class Archival(object): self._upload_worker.setDaemon(True) self._upload_worker.start() - @staticmethod - def _get_s3_client(): - # Since boto3 is a 3rd party module, we import locally. - import boto3 - import botocore.session - - botocore.session.Session() - - if sys.platform in ("win32", "cygwin"): - # These overriden values can be found here - # https://github.com/boto/botocore/blob/13468bc9d8923eccd0816ce2dd9cd8de5a6f6e0e/botocore/configprovider.py#L49C7-L49C7 - # This is due to the backwards breaking changed python introduced https://bugs.python.org/issue36264 - botocore_session = botocore.session.Session( - session_vars={ - "config_file": ( - None, - "AWS_CONFIG_FILE", - os.path.join(os.environ["HOME"], ".aws", "config"), - None, - ), - "credentials_file": ( - None, - "AWS_SHARED_CREDENTIALS_FILE", - os.path.join(os.environ["HOME"], ".aws", "credentials"), - None, - ), - } - ) - boto3.setup_default_session(botocore_session=botocore_session) - return boto3.client("s3") - def archive_files_to_s3(self, display_name, input_files, s3_bucket, s3_path): """Archive 'input_files' to 's3_bucket' and 's3_path'. diff --git a/buildscripts/s3_binary/BUILD.bazel b/buildscripts/s3_binary/BUILD.bazel index bfce16dbe12..5f6b682a9de 100644 --- a/buildscripts/s3_binary/BUILD.bazel +++ b/buildscripts/s3_binary/BUILD.bazel @@ -24,7 +24,7 @@ py_library( visibility = ["//visibility:public"], deps = [ "hashes", - "//buildscripts/resmokelib", + "//buildscripts/util:download_utils", ], ) diff --git a/buildscripts/s3_binary/download.py b/buildscripts/s3_binary/download.py index d980f086278..0163e50655b 100644 --- a/buildscripts/s3_binary/download.py +++ b/buildscripts/s3_binary/download.py @@ -9,12 +9,11 @@ import time import traceback sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) - -from buildscripts.resmokelib.setup_multiversion.download import ( +from buildscripts.s3_binary.hashes import S3_SHA256_HASHES +from buildscripts.util.download_utils import ( download_from_s3_with_boto, download_from_s3_with_requests, ) -from buildscripts.s3_binary.hashes import S3_SHA256_HASHES def read_sha_file(filename): @@ -121,9 +120,8 @@ def download_s3_binary( if os.path.exists(local_path): try: - print(f"{local_path} exists, validating...") + print(f"Downloaded file {local_path} already exists, validating...") validate_file(s3_path, local_path, remote_sha_allowed) - print(f"File is already valid: {local_path}") return True except Exception: print("File is invalid, redownloading...") @@ -147,7 +145,6 @@ def download_s3_binary( if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Download and verify S3 binary.") parser.add_argument("s3_path", help="S3 URL to download from") parser.add_argument("local_path", nargs="?", help="Optional output file path") diff --git a/buildscripts/tests/resmokelib/utils/test_archival.py b/buildscripts/tests/resmokelib/utils/test_archival.py index 0d0b8fd4eeb..8d6c85e29fb 100644 --- a/buildscripts/tests/resmokelib/utils/test_archival.py +++ b/buildscripts/tests/resmokelib/utils/test_archival.py @@ -8,6 +8,7 @@ import tempfile import unittest from buildscripts.resmokelib.utils import archival +from buildscripts.util.download_utils import get_s3_client _BUCKET = "mongodatafiles" @@ -48,7 +49,7 @@ class ArchivalTestCase(unittest.TestCase): if mock_client: cls.s3_client = MockS3Client(cls.logger) else: - cls.s3_client = archival.Archival._get_s3_client() + cls.s3_client = get_s3_client() cls.archive = cls.create_archival() @classmethod diff --git a/buildscripts/util/BUILD.bazel b/buildscripts/util/BUILD.bazel index c714de7a35f..a14f2f843d4 100644 --- a/buildscripts/util/BUILD.bazel +++ b/buildscripts/util/BUILD.bazel @@ -39,3 +39,21 @@ py_library( ), ], ) + +py_library( + name = "download_utils", + srcs = [ + "download_utils.py", + ], + visibility = ["//visibility:public"], + deps = [ + dependency( + "boto3", + group = "aws", + ), + dependency( + "requests", + group = "core", + ), + ], +) diff --git a/buildscripts/util/download_utils.py b/buildscripts/util/download_utils.py new file mode 100644 index 00000000000..59e50309b43 --- /dev/null +++ b/buildscripts/util/download_utils.py @@ -0,0 +1,64 @@ +import os +import shutil +import sys +from urllib.parse import urlparse + +import boto3 +import botocore.session +import requests + + +def get_s3_client(): + botocore.session.Session() + + if sys.platform in ("win32", "cygwin"): + # These overriden values can be found here + # https://github.com/boto/botocore/blob/13468bc9d8923eccd0816ce2dd9cd8de5a6f6e0e/botocore/configprovider.py#L49C7-L49C7 + # This is due to the backwards breaking changed python introduced https://bugs.python.org/issue36264 + botocore_session = botocore.session.Session( + session_vars={ + "config_file": ( + None, + "AWS_CONFIG_FILE", + os.path.join(os.environ["HOME"], ".aws", "config"), + None, + ), + "credentials_file": ( + None, + "AWS_SHARED_CREDENTIALS_FILE", + os.path.join(os.environ["HOME"], ".aws", "credentials"), + None, + ), + } + ) + boto3.setup_default_session(botocore_session=botocore_session) + return boto3.client("s3") + +def extract_s3_bucket_key(url: str) -> tuple[str, str]: + """ + Extracts the S3 bucket name and object key from an HTTP(s) S3 URL. + + Supports both: + - https://bucket.s3.amazonaws.com/key/… + - https://bucket.s3..amazonaws.com/key/… + + Returns: + (bucket, key) + """ + parsed = urlparse(url) + # Hostname labels, e.g. ["bucket","s3","us-east-1","amazonaws","com"] + bucket = parsed.hostname.split(".")[0] + key = parsed.path.lstrip("/") + return bucket, key + + +def download_from_s3_with_requests(url, output_file): + with requests.get(url, stream=True) as reader: + with open(output_file, "wb") as file_handle: + shutil.copyfileobj(reader.raw, file_handle) + + +def download_from_s3_with_boto(url, output_file): + bucket_name, object_key = extract_s3_bucket_key(url) + s3_client = get_s3_client() + s3_client.download_file(bucket_name, object_key, output_file)