mirror of https://github.com/mongodb/mongo
SERVER-107836 fix s3_binary downloader deps (#38807)
GitOrigin-RevId: 155043e7c1f0d8a09280a0fcb01c1c915fa34776
This commit is contained in:
parent
0999fa0d5f
commit
278dc570d7
|
|
@ -105,7 +105,7 @@ def install_modules(bazel):
|
|||
with open(lockfile_hash_file, "w") as f:
|
||||
f.write(current_hash)
|
||||
|
||||
deps = ["retry", "gitpython", "requests", "timeout-decorator"]
|
||||
deps = ["retry", "gitpython", "requests", "timeout-decorator", "boto3"]
|
||||
deps_installed = []
|
||||
deps_needed = search_for_modules(
|
||||
deps, deps_installed, lockfile_changed=old_hash != current_hash
|
||||
|
|
|
|||
|
|
@ -10,11 +10,13 @@ import tarfile
|
|||
import zipfile
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
|
||||
import requests
|
||||
import structlog
|
||||
|
||||
from buildscripts.resmokelib.utils import archival
|
||||
from buildscripts.resmokelib.utils.filesystem import build_hygienic_bin_path, mkdtemp_in_build_dir
|
||||
from buildscripts.util.download_utils import (
|
||||
download_from_s3_with_boto,
|
||||
download_from_s3_with_requests,
|
||||
)
|
||||
|
||||
S3_BUCKET = "mciuploads"
|
||||
|
||||
|
|
@ -35,35 +37,6 @@ def is_s3_presigned_url(url: str) -> bool:
|
|||
return "X-Amz-Signature" in qs
|
||||
|
||||
|
||||
def extract_s3_bucket_key(url: str) -> tuple[str, str]:
|
||||
"""
|
||||
Extracts the S3 bucket name and object key from an HTTP(s) S3 URL.
|
||||
|
||||
Supports both:
|
||||
- https://bucket.s3.amazonaws.com/key/…
|
||||
- https://bucket.s3.<region>.amazonaws.com/key/…
|
||||
|
||||
Returns:
|
||||
(bucket, key)
|
||||
"""
|
||||
parsed = urlparse(url)
|
||||
# Hostname labels, e.g. ["bucket","s3","us-east-1","amazonaws","com"]
|
||||
bucket = parsed.hostname.split(".")[0]
|
||||
key = parsed.path.lstrip("/")
|
||||
return bucket, key
|
||||
|
||||
|
||||
def download_from_s3_with_requests(url, output_file):
|
||||
with requests.get(url, stream=True) as reader:
|
||||
with open(output_file, "wb") as file_handle:
|
||||
shutil.copyfileobj(reader.raw, file_handle)
|
||||
|
||||
|
||||
def download_from_s3_with_boto(url, output_file):
|
||||
bucket_name, object_key = extract_s3_bucket_key(url)
|
||||
s3_client = archival.Archival._get_s3_client()
|
||||
s3_client.download_file(bucket_name, object_key, output_file)
|
||||
|
||||
|
||||
def download_from_s3(url):
|
||||
"""Download file from S3 bucket by a given URL."""
|
||||
|
|
|
|||
|
|
@ -36,5 +36,6 @@ py_library(
|
|||
"botocore",
|
||||
group = "aws",
|
||||
),
|
||||
"//buildscripts/util:download_utils",
|
||||
],
|
||||
)
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ import threading
|
|||
import time
|
||||
|
||||
from buildscripts.resmokelib import config
|
||||
from buildscripts.util.download_utils import get_s3_client
|
||||
|
||||
_IS_WINDOWS = sys.platform in ("win32", "cygwin")
|
||||
|
||||
|
|
@ -127,7 +128,7 @@ class Archival(object):
|
|||
self._archive_file_worker.setDaemon(True)
|
||||
self._archive_file_worker.start()
|
||||
if not s3_client:
|
||||
self.s3_client = self._get_s3_client()
|
||||
self.s3_client = get_s3_client()
|
||||
else:
|
||||
self.s3_client = s3_client
|
||||
|
||||
|
|
@ -141,37 +142,6 @@ class Archival(object):
|
|||
self._upload_worker.setDaemon(True)
|
||||
self._upload_worker.start()
|
||||
|
||||
@staticmethod
|
||||
def _get_s3_client():
|
||||
# Since boto3 is a 3rd party module, we import locally.
|
||||
import boto3
|
||||
import botocore.session
|
||||
|
||||
botocore.session.Session()
|
||||
|
||||
if sys.platform in ("win32", "cygwin"):
|
||||
# These overriden values can be found here
|
||||
# https://github.com/boto/botocore/blob/13468bc9d8923eccd0816ce2dd9cd8de5a6f6e0e/botocore/configprovider.py#L49C7-L49C7
|
||||
# This is due to the backwards breaking changed python introduced https://bugs.python.org/issue36264
|
||||
botocore_session = botocore.session.Session(
|
||||
session_vars={
|
||||
"config_file": (
|
||||
None,
|
||||
"AWS_CONFIG_FILE",
|
||||
os.path.join(os.environ["HOME"], ".aws", "config"),
|
||||
None,
|
||||
),
|
||||
"credentials_file": (
|
||||
None,
|
||||
"AWS_SHARED_CREDENTIALS_FILE",
|
||||
os.path.join(os.environ["HOME"], ".aws", "credentials"),
|
||||
None,
|
||||
),
|
||||
}
|
||||
)
|
||||
boto3.setup_default_session(botocore_session=botocore_session)
|
||||
return boto3.client("s3")
|
||||
|
||||
def archive_files_to_s3(self, display_name, input_files, s3_bucket, s3_path):
|
||||
"""Archive 'input_files' to 's3_bucket' and 's3_path'.
|
||||
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ py_library(
|
|||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"hashes",
|
||||
"//buildscripts/resmokelib",
|
||||
"//buildscripts/util:download_utils",
|
||||
],
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -9,12 +9,11 @@ import time
|
|||
import traceback
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from buildscripts.resmokelib.setup_multiversion.download import (
|
||||
from buildscripts.s3_binary.hashes import S3_SHA256_HASHES
|
||||
from buildscripts.util.download_utils import (
|
||||
download_from_s3_with_boto,
|
||||
download_from_s3_with_requests,
|
||||
)
|
||||
from buildscripts.s3_binary.hashes import S3_SHA256_HASHES
|
||||
|
||||
|
||||
def read_sha_file(filename):
|
||||
|
|
@ -121,9 +120,8 @@ def download_s3_binary(
|
|||
|
||||
if os.path.exists(local_path):
|
||||
try:
|
||||
print(f"{local_path} exists, validating...")
|
||||
print(f"Downloaded file {local_path} already exists, validating...")
|
||||
validate_file(s3_path, local_path, remote_sha_allowed)
|
||||
print(f"File is already valid: {local_path}")
|
||||
return True
|
||||
except Exception:
|
||||
print("File is invalid, redownloading...")
|
||||
|
|
@ -147,7 +145,6 @@ def download_s3_binary(
|
|||
|
||||
if __name__ == "__main__":
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(description="Download and verify S3 binary.")
|
||||
parser.add_argument("s3_path", help="S3 URL to download from")
|
||||
parser.add_argument("local_path", nargs="?", help="Optional output file path")
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import tempfile
|
|||
import unittest
|
||||
|
||||
from buildscripts.resmokelib.utils import archival
|
||||
from buildscripts.util.download_utils import get_s3_client
|
||||
|
||||
_BUCKET = "mongodatafiles"
|
||||
|
||||
|
|
@ -48,7 +49,7 @@ class ArchivalTestCase(unittest.TestCase):
|
|||
if mock_client:
|
||||
cls.s3_client = MockS3Client(cls.logger)
|
||||
else:
|
||||
cls.s3_client = archival.Archival._get_s3_client()
|
||||
cls.s3_client = get_s3_client()
|
||||
cls.archive = cls.create_archival()
|
||||
|
||||
@classmethod
|
||||
|
|
|
|||
|
|
@ -39,3 +39,21 @@ py_library(
|
|||
),
|
||||
],
|
||||
)
|
||||
|
||||
py_library(
|
||||
name = "download_utils",
|
||||
srcs = [
|
||||
"download_utils.py",
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
dependency(
|
||||
"boto3",
|
||||
group = "aws",
|
||||
),
|
||||
dependency(
|
||||
"requests",
|
||||
group = "core",
|
||||
),
|
||||
],
|
||||
)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,64 @@
|
|||
import os
|
||||
import shutil
|
||||
import sys
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import boto3
|
||||
import botocore.session
|
||||
import requests
|
||||
|
||||
|
||||
def get_s3_client():
|
||||
botocore.session.Session()
|
||||
|
||||
if sys.platform in ("win32", "cygwin"):
|
||||
# These overriden values can be found here
|
||||
# https://github.com/boto/botocore/blob/13468bc9d8923eccd0816ce2dd9cd8de5a6f6e0e/botocore/configprovider.py#L49C7-L49C7
|
||||
# This is due to the backwards breaking changed python introduced https://bugs.python.org/issue36264
|
||||
botocore_session = botocore.session.Session(
|
||||
session_vars={
|
||||
"config_file": (
|
||||
None,
|
||||
"AWS_CONFIG_FILE",
|
||||
os.path.join(os.environ["HOME"], ".aws", "config"),
|
||||
None,
|
||||
),
|
||||
"credentials_file": (
|
||||
None,
|
||||
"AWS_SHARED_CREDENTIALS_FILE",
|
||||
os.path.join(os.environ["HOME"], ".aws", "credentials"),
|
||||
None,
|
||||
),
|
||||
}
|
||||
)
|
||||
boto3.setup_default_session(botocore_session=botocore_session)
|
||||
return boto3.client("s3")
|
||||
|
||||
def extract_s3_bucket_key(url: str) -> tuple[str, str]:
|
||||
"""
|
||||
Extracts the S3 bucket name and object key from an HTTP(s) S3 URL.
|
||||
|
||||
Supports both:
|
||||
- https://bucket.s3.amazonaws.com/key/…
|
||||
- https://bucket.s3.<region>.amazonaws.com/key/…
|
||||
|
||||
Returns:
|
||||
(bucket, key)
|
||||
"""
|
||||
parsed = urlparse(url)
|
||||
# Hostname labels, e.g. ["bucket","s3","us-east-1","amazonaws","com"]
|
||||
bucket = parsed.hostname.split(".")[0]
|
||||
key = parsed.path.lstrip("/")
|
||||
return bucket, key
|
||||
|
||||
|
||||
def download_from_s3_with_requests(url, output_file):
|
||||
with requests.get(url, stream=True) as reader:
|
||||
with open(output_file, "wb") as file_handle:
|
||||
shutil.copyfileobj(reader.raw, file_handle)
|
||||
|
||||
|
||||
def download_from_s3_with_boto(url, output_file):
|
||||
bucket_name, object_key = extract_s3_bucket_key(url)
|
||||
s3_client = get_s3_client()
|
||||
s3_client.download_file(bucket_name, object_key, output_file)
|
||||
Loading…
Reference in New Issue