mirror of https://github.com/mongodb/mongo
371 lines
14 KiB
Python
371 lines
14 KiB
Python
"""Script to generate & upload 'buildId -> debug symbols URL' mappings to symbolizer service."""
|
|
import argparse
|
|
import json
|
|
import logging
|
|
import os
|
|
import pathlib
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
import typing
|
|
|
|
import requests
|
|
|
|
# register parent directory in sys.path, so 'buildscripts' is detected no matter where the script is called from
|
|
sys.path.append(str(pathlib.Path(os.path.join(os.getcwd(), __file__)).parent.parent))
|
|
|
|
# pylint: disable=wrong-import-position
|
|
from buildscripts.util.oauth import get_client_cred_oauth_credentials, Configs
|
|
from buildscripts.resmokelib.setup_multiversion.setup_multiversion import SetupMultiversion, download
|
|
from buildscripts.build_system_options import PathOptions
|
|
|
|
|
|
class LinuxBuildIDExtractor:
|
|
"""Parse readlef command output & extract Build ID."""
|
|
|
|
default_executable_path = "readelf"
|
|
|
|
def __init__(self, executable_path: str = None):
|
|
"""Initialize instance."""
|
|
|
|
self.executable_path = executable_path or self.default_executable_path
|
|
|
|
def callreadelf(self, binary_path: str) -> str:
|
|
"""Call readelf command for given binary & return string output."""
|
|
|
|
args = [self.executable_path, "-n", binary_path]
|
|
process = subprocess.Popen(args=args, close_fds=True, stdin=subprocess.PIPE,
|
|
stdout=subprocess.PIPE)
|
|
process.wait()
|
|
return process.stdout.read().decode()
|
|
|
|
@staticmethod
|
|
def extractbuildid(out: str) -> typing.Optional[str]:
|
|
"""Parse readelf output and extract Build ID from it."""
|
|
|
|
build_id = None
|
|
for line in out.splitlines():
|
|
line = line.strip()
|
|
if line.startswith('Build ID'):
|
|
if build_id is not None:
|
|
raise ValueError("Found multiple Build ID values.")
|
|
build_id = line.split(': ')[1]
|
|
return build_id
|
|
|
|
def run(self, binary_path: str) -> typing.Tuple[str, str]:
|
|
"""Perform all necessary actions to get Build ID."""
|
|
|
|
readelfout = self.callreadelf(binary_path)
|
|
buildid = self.extractbuildid(readelfout)
|
|
|
|
return buildid, readelfout
|
|
|
|
|
|
class DownloadOptions(object):
|
|
"""A class to collect download option configurations."""
|
|
|
|
def __init__(self, download_binaries=False, download_symbols=False, download_artifacts=False,
|
|
download_python_venv=False):
|
|
"""Initialize instance."""
|
|
|
|
self.download_binaries = download_binaries
|
|
self.download_symbols = download_symbols
|
|
self.download_artifacts = download_artifacts
|
|
self.download_python_venv = download_python_venv
|
|
|
|
|
|
class Mapper:
|
|
"""A class to to basically all of the work."""
|
|
|
|
# pylint: disable=too-many-instance-attributes
|
|
# This amount of attributes are necessary.
|
|
|
|
default_web_service_base_url: str = "https://symbolizer-service.server-tig.prod.corp.mongodb.com"
|
|
default_cache_dir = os.path.join(os.getcwd(), 'build', 'symbols_cache')
|
|
selected_binaries = ('mongos.debug', 'mongod.debug', 'mongo.debug')
|
|
default_client_credentials_scope = "servertig-symbolizer-fullaccess"
|
|
default_client_credentials_user_name = "client-user"
|
|
default_creds_file_path = os.path.join(os.getcwd(), '.symbolizer_credentials.json')
|
|
|
|
def __init__(self, version: str, client_id: str, client_secret: str, variant: str,
|
|
cache_dir: str = None, web_service_base_url: str = None,
|
|
logger: logging.Logger = None):
|
|
"""
|
|
Initialize instance.
|
|
|
|
:param version: version string
|
|
:param variant: build variant string
|
|
:param cache_dir: full path to cache directory as a string
|
|
:param web_service_base_url: URL of symbolizer web service
|
|
"""
|
|
self.version = version
|
|
self.variant = variant
|
|
self.cache_dir = cache_dir or self.default_cache_dir
|
|
self.web_service_base_url = web_service_base_url or self.default_web_service_base_url
|
|
|
|
if not logger:
|
|
logging.basicConfig()
|
|
logger = logging.getLogger('symbolizer')
|
|
logger.setLevel(logging.INFO)
|
|
self.logger = logger
|
|
|
|
self.http_client = requests.Session()
|
|
|
|
self.multiversion_setup = SetupMultiversion(
|
|
DownloadOptions(download_symbols=True, download_binaries=True), variant=self.variant,
|
|
ignore_failed_push=True)
|
|
self.debug_symbols_url = None
|
|
self.url = None
|
|
self.configs = Configs(
|
|
client_credentials_scope=self.default_client_credentials_scope,
|
|
client_credentials_user_name=self.default_client_credentials_user_name)
|
|
self.client_id = client_id
|
|
self.client_secret = client_secret
|
|
self.path_options = PathOptions()
|
|
|
|
if not os.path.exists(self.cache_dir):
|
|
os.makedirs(self.cache_dir)
|
|
|
|
self.authenticate()
|
|
self.setup_urls()
|
|
|
|
def authenticate(self):
|
|
"""Login & get credentials for further requests to web service."""
|
|
|
|
# try to read from file
|
|
if os.path.exists(self.default_creds_file_path):
|
|
with open(self.default_creds_file_path) as cfile:
|
|
data = json.loads(cfile.read())
|
|
access_token, expire_time = data.get("access_token"), data.get("expire_time")
|
|
if time.time() < expire_time:
|
|
# credentials hasn't expired yet
|
|
self.http_client.headers.update({"Authorization": f"Bearer {access_token}"})
|
|
return
|
|
|
|
credentials = get_client_cred_oauth_credentials(self.client_id, self.client_secret,
|
|
configs=self.configs)
|
|
self.http_client.headers.update({"Authorization": f"Bearer {credentials.access_token}"})
|
|
|
|
# write credentials to local file for further useage
|
|
with open(self.default_creds_file_path, "w") as cfile:
|
|
cfile.write(
|
|
json.dumps({
|
|
"access_token": credentials.access_token,
|
|
"expire_time": time.time() + credentials.expires_in
|
|
}))
|
|
|
|
def __enter__(self):
|
|
"""Return instance when used as a context manager."""
|
|
|
|
return self
|
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
"""Do cleaning process when used as a context manager."""
|
|
|
|
self.cleanup()
|
|
|
|
def cleanup(self):
|
|
"""Remove temporary files & folders."""
|
|
|
|
if os.path.exists(self.cache_dir):
|
|
shutil.rmtree(self.cache_dir)
|
|
|
|
@staticmethod
|
|
def url_to_filename(url: str) -> str:
|
|
"""
|
|
Convert URL to local filename.
|
|
|
|
:param url: download URL
|
|
:return: full name for local file
|
|
"""
|
|
return url.split('/')[-1]
|
|
|
|
def setup_urls(self):
|
|
"""Set up URLs using multiversion."""
|
|
|
|
urlinfo = self.multiversion_setup.get_urls(self.version, self.variant)
|
|
|
|
download_symbols_url = urlinfo.urls.get("mongo-debugsymbols.tgz", None)
|
|
binaries_url = urlinfo.urls.get("Binaries", "")
|
|
|
|
if not download_symbols_url:
|
|
download_symbols_url = urlinfo.urls.get("mongo-debugsymbols.zip", None)
|
|
|
|
if not download_symbols_url:
|
|
self.logger.error("Couldn't find URL for debug symbols. Version: %s, URLs dict: %s",
|
|
self.version, urlinfo.urls)
|
|
raise ValueError(f"Debug symbols URL not found. URLs dict: {urlinfo.urls}")
|
|
|
|
self.debug_symbols_url = download_symbols_url
|
|
self.url = binaries_url
|
|
|
|
def unpack(self, path: str) -> str:
|
|
"""
|
|
Use to untar/unzip files.
|
|
|
|
:param path: full path of file
|
|
:return: full path of directory of unpacked file
|
|
"""
|
|
foldername = path.replace('.tgz', '', 1).split('/')[-1]
|
|
out_dir = os.path.join(self.cache_dir, foldername)
|
|
|
|
if not os.path.exists(out_dir):
|
|
os.makedirs(out_dir)
|
|
|
|
download.extract_archive(path, out_dir)
|
|
|
|
# extracted everything, we don't need the original tar file anymore and it should be deleted
|
|
if os.path.exists(path):
|
|
os.remove(path)
|
|
|
|
return out_dir
|
|
|
|
@staticmethod
|
|
def download(url: str) -> str:
|
|
"""
|
|
Use to download file from URL.
|
|
|
|
:param url: URL of file to download
|
|
:return: full path of downloaded file in local filesystem
|
|
"""
|
|
|
|
tarball_full_path = download.download_from_s3(url)
|
|
return tarball_full_path
|
|
|
|
def generate_build_id_mapping(self) -> typing.Generator[typing.Dict[str, str], None, None]:
|
|
"""
|
|
Extract build id from binaries and creates new dict using them.
|
|
|
|
:return: mapped data as dict
|
|
"""
|
|
|
|
readelf_extractor = LinuxBuildIDExtractor()
|
|
|
|
debug_symbols_path = self.download(self.debug_symbols_url)
|
|
debug_symbols_unpacked_path = self.unpack(debug_symbols_path)
|
|
|
|
binaries_path = self.download(self.url)
|
|
binaries_unpacked_path = self.unpack(binaries_path)
|
|
|
|
# we need to analyze two directories: main binary folder inside debug-symbols and
|
|
# shared libraries folder inside binaries.
|
|
# main binary folder holds main binaries, like mongos, mongod, mongo ...
|
|
# shared libraries folder holds shared libraries, tons of them.
|
|
# some build variants do not contain shared libraries.
|
|
|
|
debug_symbols_unpacked_path = os.path.join(debug_symbols_unpacked_path, 'dist-test')
|
|
binaries_unpacked_path = os.path.join(binaries_unpacked_path, 'dist-test')
|
|
|
|
self.logger.info("INSIDE unpacked debug-symbols/dist-test: %s",
|
|
os.listdir(debug_symbols_unpacked_path))
|
|
self.logger.info("INSIDE unpacked binaries/dist-test: %s",
|
|
os.listdir(binaries_unpacked_path))
|
|
|
|
# start with main binary folder
|
|
for binary in self.selected_binaries:
|
|
full_bin_path = os.path.join(debug_symbols_unpacked_path,
|
|
self.path_options.main_binary_folder_name, binary)
|
|
|
|
if not os.path.exists(full_bin_path):
|
|
self.logger.error("Could not find binary at %s", full_bin_path)
|
|
return
|
|
|
|
build_id, readelf_out = readelf_extractor.run(full_bin_path)
|
|
|
|
if not build_id:
|
|
self.logger.error("Build ID couldn't be extracted. \nReadELF output %s",
|
|
readelf_out)
|
|
return
|
|
|
|
yield {
|
|
'url': self.url, 'debug_symbols_url': self.debug_symbols_url, 'build_id': build_id,
|
|
'file_name': binary, 'version': self.version
|
|
}
|
|
|
|
# move to shared libraries folder.
|
|
# it contains all shared library binary files,
|
|
# we run readelf on each of them.
|
|
lib_folder_path = os.path.join(binaries_unpacked_path,
|
|
self.path_options.shared_library_folder_name)
|
|
|
|
if not os.path.exists(lib_folder_path):
|
|
# sometimes we don't get lib folder, which means there is no shared libraries for current build variant.
|
|
self.logger.info("'%s' folder does not exist.",
|
|
self.path_options.shared_library_folder_name)
|
|
sofiles = []
|
|
else:
|
|
sofiles = os.listdir(lib_folder_path)
|
|
self.logger.info("'%s' folder: %s", self.path_options.shared_library_folder_name,
|
|
sofiles)
|
|
|
|
for sofile in sofiles:
|
|
sofile_path = os.path.join(lib_folder_path, sofile)
|
|
|
|
if not os.path.exists(sofile_path):
|
|
self.logger.error("Could not find binary at %s", sofile_path)
|
|
return
|
|
|
|
build_id, readelf_out = readelf_extractor.run(sofile_path)
|
|
|
|
if not build_id:
|
|
self.logger.error("Build ID couldn't be extracted. \nReadELF out %s", readelf_out)
|
|
return
|
|
|
|
yield {
|
|
'url': self.url,
|
|
'debug_symbols_url': self.debug_symbols_url,
|
|
'build_id': build_id,
|
|
'file_name': sofile,
|
|
'version': self.version,
|
|
}
|
|
|
|
def run(self):
|
|
"""Run all necessary processes."""
|
|
|
|
mappings = self.generate_build_id_mapping()
|
|
if not mappings:
|
|
self.logger.error("Could not generate mapping")
|
|
return
|
|
|
|
# mappings is a generator, we iterate over to generate mappings on the go
|
|
for mapping in mappings:
|
|
response = self.http_client.post('/'.join((self.web_service_base_url, 'add')),
|
|
json=mapping)
|
|
if response.status_code != 200:
|
|
self.logger.error(
|
|
"Could not store mapping, web service returned status code %s from URL %s. "
|
|
"Response: %s", response.status_code, response.url, response.text)
|
|
|
|
|
|
def make_argument_parser(parser=None, **kwargs):
|
|
"""Make and return an argparse."""
|
|
|
|
if parser is None:
|
|
parser = argparse.ArgumentParser(**kwargs)
|
|
|
|
parser.add_argument('--version')
|
|
parser.add_argument('--client-id')
|
|
parser.add_argument('--client-secret')
|
|
parser.add_argument('--variant')
|
|
parser.add_argument('--web-service-base-url', default="")
|
|
return parser
|
|
|
|
|
|
def main(options):
|
|
"""Execute mapper here. Main entry point."""
|
|
|
|
mapper = Mapper(version=options.version, variant=options.variant, client_id=options.client_id,
|
|
client_secret=options.client_secret,
|
|
web_service_base_url=options.web_service_base_url)
|
|
|
|
# when used as a context manager, mapper instance automatically cleans files/folders after finishing its job.
|
|
# in other cases, mapper.cleanup() method should be called manually.
|
|
with mapper:
|
|
mapper.run()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
mapper_options = make_argument_parser(description=__doc__).parse_args()
|
|
main(mapper_options)
|