mongo/buildscripts/resmokelib/setup_multiversion/download.py

197 lines
6.3 KiB
Python

"""Helper functions to download."""
import contextlib
import errno
import glob
import os
import platform
import shutil
import tarfile
import zipfile
from urllib.parse import parse_qs, urlparse
import structlog
from buildscripts.resmokelib.utils.filesystem import build_hygienic_bin_path, mkdtemp_in_build_dir
from buildscripts.util.download_utils import (
download_from_s3_with_boto,
download_from_s3_with_requests,
)
S3_BUCKET = "mciuploads"
LOGGER = structlog.getLogger(__name__)
class DownloadError(Exception):
"""Errors in download.py."""
pass
def is_s3_presigned_url(url: str) -> bool:
"""
Return True if `url` looks like an AWS S3 presigned URL (SigV4).
"""
qs = parse_qs(urlparse(url).query)
return "X-Amz-Signature" in qs
def download_from_s3(url):
"""Download file from S3 bucket by a given URL."""
if not url:
raise DownloadError("Download URL not found")
LOGGER.info("Downloading.", url=url)
filename = os.path.join(mkdtemp_in_build_dir(), url.split("/")[-1].split("?")[0])
arch = platform.uname().machine.lower()
if is_s3_presigned_url(url) or arch.startswith(("s390", "ppc")):
# S3 presigned URL can't be downloaded with boto3 library;
# S390 and PPC architectures do not have adequate credentials;
# thus we fall back using standard requests library
download_from_s3_with_requests(url, filename)
else:
# Prefer boto3 library when possible.
# boto3 library is much faster because it use multipart download.
download_from_s3_with_boto(url, filename)
return filename
def _rsync_move_dir(source_dir, dest_dir):
"""
Move dir.
Move the contents of `source_dir` into `dest_dir` as a subdir while merging with
all existing dirs.
This is similar to the behavior of `rsync` but different to `mv`.
"""
for cur_src_dir, _, files in os.walk(source_dir):
cur_dest_dir = cur_src_dir.replace(source_dir, dest_dir, 1)
if not os.path.exists(cur_dest_dir):
os.makedirs(cur_dest_dir)
for cur_file in files:
src_file = os.path.join(cur_src_dir, cur_file)
dst_file = os.path.join(cur_dest_dir, cur_file)
if os.path.exists(dst_file):
# in case of the src and dst are the same file
if os.path.samefile(src_file, dst_file):
continue
os.remove(dst_file)
shutil.move(src_file, cur_dest_dir)
def extract_archive(archive_file, install_dir):
"""Uncompress file and return root of extracted directory."""
LOGGER.info("Extracting archive data.", archive=archive_file, install_dir=install_dir)
temp_dir = mkdtemp_in_build_dir()
archive_name = os.path.basename(archive_file)
_, file_suffix = os.path.splitext(archive_name)
if file_suffix == ".zip":
# Support .zip downloads, used for Windows binaries.
with zipfile.ZipFile(archive_file) as zip_handle:
zip_handle.extractall(temp_dir)
elif file_suffix == ".tgz":
# Support .tgz downloads, used for Linux binaries.
with contextlib.closing(tarfile.open(archive_file, "r:gz")) as tar_handle:
tar_handle.extractall(path=temp_dir)
else:
raise DownloadError(f"Unsupported file extension {file_suffix}")
# Pre-hygienic tarballs have a unique top-level dir when untarred. We ignore
# that dir to ensure the untarred dir structure is uniform. symbols and artifacts
# are rarely used on pre-hygienic versions so we ignore them for simplicity.
bin_archive_root = glob.glob(os.path.join(temp_dir, "mongodb-*", "bin"))
if bin_archive_root:
temp_dir = bin_archive_root[0]
try:
os.makedirs(install_dir)
except FileExistsError:
pass
_rsync_move_dir(temp_dir, install_dir)
shutil.rmtree(temp_dir)
LOGGER.info("Extract archive completed.", installed_dir=install_dir)
return install_dir
def mkdir_p(path):
"""Python equivalent of `mkdir -p`."""
try:
os.makedirs(path)
except OSError as exc:
if exc.errno == errno.EEXIST and os.path.isdir(path):
pass
else:
raise
def symlink_version(suffix, installed_dir, link_dir=None):
"""
Symlink the binaries in the 'installed_dir' to the 'link_dir'.
If `link_dir` is None, link to the physical executable's directory (`bin_dir`).
"""
hygienic_bin_dir = build_hygienic_bin_path(parent=installed_dir)
if os.path.isdir(hygienic_bin_dir):
bin_dir = hygienic_bin_dir
else:
bin_dir = installed_dir
if link_dir is None:
link_dir = bin_dir
else:
mkdir_p(link_dir)
for executable in os.listdir(bin_dir):
if executable.endswith(".dll"):
LOGGER.debug("Skipping linking DLL", file=executable)
continue
executable_name, executable_extension = os.path.splitext(executable)
if suffix:
link_name = f"{executable_name}-{suffix}{executable_extension}"
else:
link_name = executable
try:
executable = os.path.join(bin_dir, executable)
executable_link = os.path.join(link_dir, link_name)
link_method = os.symlink
if os.name == "nt":
# os.symlink is not supported on Windows, use a direct method instead.
def symlink_ms(source, symlink_name):
"""Provide symlink for Windows."""
import ctypes
csl = ctypes.windll.kernel32.CreateSymbolicLinkW
csl.argtypes = (ctypes.c_wchar_p, ctypes.c_wchar_p, ctypes.c_uint32)
csl.restype = ctypes.c_ubyte
flags = 1 if os.path.isdir(source) else 0
if csl(symlink_name, source.replace("/", "\\"), flags) == 0:
raise ctypes.WinError()
link_method = symlink_ms
link_method(executable, executable_link)
LOGGER.debug("Symlink created.", executable=executable, executable_link=executable_link)
except OSError as exc:
if exc.errno == errno.EEXIST:
pass
else:
raise
LOGGER.info("Symlinks for all executables are created in the directory.", link_dir=link_dir)
return link_dir