mirror of https://github.com/mongodb/mongo
SERVER-107836 fix s3_binary downloader deps (#38807)
GitOrigin-RevId: 155043e7c1f0d8a09280a0fcb01c1c915fa34776
This commit is contained in:
parent
0999fa0d5f
commit
278dc570d7
|
|
@ -105,7 +105,7 @@ def install_modules(bazel):
|
||||||
with open(lockfile_hash_file, "w") as f:
|
with open(lockfile_hash_file, "w") as f:
|
||||||
f.write(current_hash)
|
f.write(current_hash)
|
||||||
|
|
||||||
deps = ["retry", "gitpython", "requests", "timeout-decorator"]
|
deps = ["retry", "gitpython", "requests", "timeout-decorator", "boto3"]
|
||||||
deps_installed = []
|
deps_installed = []
|
||||||
deps_needed = search_for_modules(
|
deps_needed = search_for_modules(
|
||||||
deps, deps_installed, lockfile_changed=old_hash != current_hash
|
deps, deps_installed, lockfile_changed=old_hash != current_hash
|
||||||
|
|
|
||||||
|
|
@ -10,11 +10,13 @@ import tarfile
|
||||||
import zipfile
|
import zipfile
|
||||||
from urllib.parse import parse_qs, urlparse
|
from urllib.parse import parse_qs, urlparse
|
||||||
|
|
||||||
import requests
|
|
||||||
import structlog
|
import structlog
|
||||||
|
|
||||||
from buildscripts.resmokelib.utils import archival
|
|
||||||
from buildscripts.resmokelib.utils.filesystem import build_hygienic_bin_path, mkdtemp_in_build_dir
|
from buildscripts.resmokelib.utils.filesystem import build_hygienic_bin_path, mkdtemp_in_build_dir
|
||||||
|
from buildscripts.util.download_utils import (
|
||||||
|
download_from_s3_with_boto,
|
||||||
|
download_from_s3_with_requests,
|
||||||
|
)
|
||||||
|
|
||||||
S3_BUCKET = "mciuploads"
|
S3_BUCKET = "mciuploads"
|
||||||
|
|
||||||
|
|
@ -35,35 +37,6 @@ def is_s3_presigned_url(url: str) -> bool:
|
||||||
return "X-Amz-Signature" in qs
|
return "X-Amz-Signature" in qs
|
||||||
|
|
||||||
|
|
||||||
def extract_s3_bucket_key(url: str) -> tuple[str, str]:
|
|
||||||
"""
|
|
||||||
Extracts the S3 bucket name and object key from an HTTP(s) S3 URL.
|
|
||||||
|
|
||||||
Supports both:
|
|
||||||
- https://bucket.s3.amazonaws.com/key/…
|
|
||||||
- https://bucket.s3.<region>.amazonaws.com/key/…
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
(bucket, key)
|
|
||||||
"""
|
|
||||||
parsed = urlparse(url)
|
|
||||||
# Hostname labels, e.g. ["bucket","s3","us-east-1","amazonaws","com"]
|
|
||||||
bucket = parsed.hostname.split(".")[0]
|
|
||||||
key = parsed.path.lstrip("/")
|
|
||||||
return bucket, key
|
|
||||||
|
|
||||||
|
|
||||||
def download_from_s3_with_requests(url, output_file):
|
|
||||||
with requests.get(url, stream=True) as reader:
|
|
||||||
with open(output_file, "wb") as file_handle:
|
|
||||||
shutil.copyfileobj(reader.raw, file_handle)
|
|
||||||
|
|
||||||
|
|
||||||
def download_from_s3_with_boto(url, output_file):
|
|
||||||
bucket_name, object_key = extract_s3_bucket_key(url)
|
|
||||||
s3_client = archival.Archival._get_s3_client()
|
|
||||||
s3_client.download_file(bucket_name, object_key, output_file)
|
|
||||||
|
|
||||||
|
|
||||||
def download_from_s3(url):
|
def download_from_s3(url):
|
||||||
"""Download file from S3 bucket by a given URL."""
|
"""Download file from S3 bucket by a given URL."""
|
||||||
|
|
|
||||||
|
|
@ -36,5 +36,6 @@ py_library(
|
||||||
"botocore",
|
"botocore",
|
||||||
group = "aws",
|
group = "aws",
|
||||||
),
|
),
|
||||||
|
"//buildscripts/util:download_utils",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@ import threading
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from buildscripts.resmokelib import config
|
from buildscripts.resmokelib import config
|
||||||
|
from buildscripts.util.download_utils import get_s3_client
|
||||||
|
|
||||||
_IS_WINDOWS = sys.platform in ("win32", "cygwin")
|
_IS_WINDOWS = sys.platform in ("win32", "cygwin")
|
||||||
|
|
||||||
|
|
@ -127,7 +128,7 @@ class Archival(object):
|
||||||
self._archive_file_worker.setDaemon(True)
|
self._archive_file_worker.setDaemon(True)
|
||||||
self._archive_file_worker.start()
|
self._archive_file_worker.start()
|
||||||
if not s3_client:
|
if not s3_client:
|
||||||
self.s3_client = self._get_s3_client()
|
self.s3_client = get_s3_client()
|
||||||
else:
|
else:
|
||||||
self.s3_client = s3_client
|
self.s3_client = s3_client
|
||||||
|
|
||||||
|
|
@ -141,37 +142,6 @@ class Archival(object):
|
||||||
self._upload_worker.setDaemon(True)
|
self._upload_worker.setDaemon(True)
|
||||||
self._upload_worker.start()
|
self._upload_worker.start()
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _get_s3_client():
|
|
||||||
# Since boto3 is a 3rd party module, we import locally.
|
|
||||||
import boto3
|
|
||||||
import botocore.session
|
|
||||||
|
|
||||||
botocore.session.Session()
|
|
||||||
|
|
||||||
if sys.platform in ("win32", "cygwin"):
|
|
||||||
# These overriden values can be found here
|
|
||||||
# https://github.com/boto/botocore/blob/13468bc9d8923eccd0816ce2dd9cd8de5a6f6e0e/botocore/configprovider.py#L49C7-L49C7
|
|
||||||
# This is due to the backwards breaking changed python introduced https://bugs.python.org/issue36264
|
|
||||||
botocore_session = botocore.session.Session(
|
|
||||||
session_vars={
|
|
||||||
"config_file": (
|
|
||||||
None,
|
|
||||||
"AWS_CONFIG_FILE",
|
|
||||||
os.path.join(os.environ["HOME"], ".aws", "config"),
|
|
||||||
None,
|
|
||||||
),
|
|
||||||
"credentials_file": (
|
|
||||||
None,
|
|
||||||
"AWS_SHARED_CREDENTIALS_FILE",
|
|
||||||
os.path.join(os.environ["HOME"], ".aws", "credentials"),
|
|
||||||
None,
|
|
||||||
),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
boto3.setup_default_session(botocore_session=botocore_session)
|
|
||||||
return boto3.client("s3")
|
|
||||||
|
|
||||||
def archive_files_to_s3(self, display_name, input_files, s3_bucket, s3_path):
|
def archive_files_to_s3(self, display_name, input_files, s3_bucket, s3_path):
|
||||||
"""Archive 'input_files' to 's3_bucket' and 's3_path'.
|
"""Archive 'input_files' to 's3_bucket' and 's3_path'.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@ py_library(
|
||||||
visibility = ["//visibility:public"],
|
visibility = ["//visibility:public"],
|
||||||
deps = [
|
deps = [
|
||||||
"hashes",
|
"hashes",
|
||||||
"//buildscripts/resmokelib",
|
"//buildscripts/util:download_utils",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,12 +9,11 @@ import time
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||||
|
from buildscripts.s3_binary.hashes import S3_SHA256_HASHES
|
||||||
from buildscripts.resmokelib.setup_multiversion.download import (
|
from buildscripts.util.download_utils import (
|
||||||
download_from_s3_with_boto,
|
download_from_s3_with_boto,
|
||||||
download_from_s3_with_requests,
|
download_from_s3_with_requests,
|
||||||
)
|
)
|
||||||
from buildscripts.s3_binary.hashes import S3_SHA256_HASHES
|
|
||||||
|
|
||||||
|
|
||||||
def read_sha_file(filename):
|
def read_sha_file(filename):
|
||||||
|
|
@ -121,9 +120,8 @@ def download_s3_binary(
|
||||||
|
|
||||||
if os.path.exists(local_path):
|
if os.path.exists(local_path):
|
||||||
try:
|
try:
|
||||||
print(f"{local_path} exists, validating...")
|
print(f"Downloaded file {local_path} already exists, validating...")
|
||||||
validate_file(s3_path, local_path, remote_sha_allowed)
|
validate_file(s3_path, local_path, remote_sha_allowed)
|
||||||
print(f"File is already valid: {local_path}")
|
|
||||||
return True
|
return True
|
||||||
except Exception:
|
except Exception:
|
||||||
print("File is invalid, redownloading...")
|
print("File is invalid, redownloading...")
|
||||||
|
|
@ -147,7 +145,6 @@ def download_s3_binary(
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description="Download and verify S3 binary.")
|
parser = argparse.ArgumentParser(description="Download and verify S3 binary.")
|
||||||
parser.add_argument("s3_path", help="S3 URL to download from")
|
parser.add_argument("s3_path", help="S3 URL to download from")
|
||||||
parser.add_argument("local_path", nargs="?", help="Optional output file path")
|
parser.add_argument("local_path", nargs="?", help="Optional output file path")
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ import tempfile
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from buildscripts.resmokelib.utils import archival
|
from buildscripts.resmokelib.utils import archival
|
||||||
|
from buildscripts.util.download_utils import get_s3_client
|
||||||
|
|
||||||
_BUCKET = "mongodatafiles"
|
_BUCKET = "mongodatafiles"
|
||||||
|
|
||||||
|
|
@ -48,7 +49,7 @@ class ArchivalTestCase(unittest.TestCase):
|
||||||
if mock_client:
|
if mock_client:
|
||||||
cls.s3_client = MockS3Client(cls.logger)
|
cls.s3_client = MockS3Client(cls.logger)
|
||||||
else:
|
else:
|
||||||
cls.s3_client = archival.Archival._get_s3_client()
|
cls.s3_client = get_s3_client()
|
||||||
cls.archive = cls.create_archival()
|
cls.archive = cls.create_archival()
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
|
||||||
|
|
@ -39,3 +39,21 @@ py_library(
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
py_library(
|
||||||
|
name = "download_utils",
|
||||||
|
srcs = [
|
||||||
|
"download_utils.py",
|
||||||
|
],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
dependency(
|
||||||
|
"boto3",
|
||||||
|
group = "aws",
|
||||||
|
),
|
||||||
|
dependency(
|
||||||
|
"requests",
|
||||||
|
group = "core",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,64 @@
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import sys
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
import boto3
|
||||||
|
import botocore.session
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
def get_s3_client():
|
||||||
|
botocore.session.Session()
|
||||||
|
|
||||||
|
if sys.platform in ("win32", "cygwin"):
|
||||||
|
# These overriden values can be found here
|
||||||
|
# https://github.com/boto/botocore/blob/13468bc9d8923eccd0816ce2dd9cd8de5a6f6e0e/botocore/configprovider.py#L49C7-L49C7
|
||||||
|
# This is due to the backwards breaking changed python introduced https://bugs.python.org/issue36264
|
||||||
|
botocore_session = botocore.session.Session(
|
||||||
|
session_vars={
|
||||||
|
"config_file": (
|
||||||
|
None,
|
||||||
|
"AWS_CONFIG_FILE",
|
||||||
|
os.path.join(os.environ["HOME"], ".aws", "config"),
|
||||||
|
None,
|
||||||
|
),
|
||||||
|
"credentials_file": (
|
||||||
|
None,
|
||||||
|
"AWS_SHARED_CREDENTIALS_FILE",
|
||||||
|
os.path.join(os.environ["HOME"], ".aws", "credentials"),
|
||||||
|
None,
|
||||||
|
),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
boto3.setup_default_session(botocore_session=botocore_session)
|
||||||
|
return boto3.client("s3")
|
||||||
|
|
||||||
|
def extract_s3_bucket_key(url: str) -> tuple[str, str]:
|
||||||
|
"""
|
||||||
|
Extracts the S3 bucket name and object key from an HTTP(s) S3 URL.
|
||||||
|
|
||||||
|
Supports both:
|
||||||
|
- https://bucket.s3.amazonaws.com/key/…
|
||||||
|
- https://bucket.s3.<region>.amazonaws.com/key/…
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(bucket, key)
|
||||||
|
"""
|
||||||
|
parsed = urlparse(url)
|
||||||
|
# Hostname labels, e.g. ["bucket","s3","us-east-1","amazonaws","com"]
|
||||||
|
bucket = parsed.hostname.split(".")[0]
|
||||||
|
key = parsed.path.lstrip("/")
|
||||||
|
return bucket, key
|
||||||
|
|
||||||
|
|
||||||
|
def download_from_s3_with_requests(url, output_file):
|
||||||
|
with requests.get(url, stream=True) as reader:
|
||||||
|
with open(output_file, "wb") as file_handle:
|
||||||
|
shutil.copyfileobj(reader.raw, file_handle)
|
||||||
|
|
||||||
|
|
||||||
|
def download_from_s3_with_boto(url, output_file):
|
||||||
|
bucket_name, object_key = extract_s3_bucket_key(url)
|
||||||
|
s3_client = get_s3_client()
|
||||||
|
s3_client.download_file(bucket_name, object_key, output_file)
|
||||||
Loading…
Reference in New Issue