mongo/buildscripts/debugsymb_mapper.py

371 lines
14 KiB
Python

"""Script to generate & upload 'buildId -> debug symbols URL' mappings to symbolizer service."""
import argparse
import json
import logging
import os
import pathlib
import shutil
import subprocess
import sys
import time
import typing
import requests
# register parent directory in sys.path, so 'buildscripts' is detected no matter where the script is called from
sys.path.append(str(pathlib.Path(os.path.join(os.getcwd(), __file__)).parent.parent))
# pylint: disable=wrong-import-position
from buildscripts.util.oauth import get_client_cred_oauth_credentials, Configs
from buildscripts.resmokelib.setup_multiversion.setup_multiversion import SetupMultiversion, download
from buildscripts.build_system_options import PathOptions
class LinuxBuildIDExtractor:
"""Parse readlef command output & extract Build ID."""
default_executable_path = "readelf"
def __init__(self, executable_path: str = None):
"""Initialize instance."""
self.executable_path = executable_path or self.default_executable_path
def callreadelf(self, binary_path: str) -> str:
"""Call readelf command for given binary & return string output."""
args = [self.executable_path, "-n", binary_path]
process = subprocess.Popen(args=args, close_fds=True, stdin=subprocess.PIPE,
stdout=subprocess.PIPE)
process.wait()
return process.stdout.read().decode()
@staticmethod
def extractbuildid(out: str) -> typing.Optional[str]:
"""Parse readelf output and extract Build ID from it."""
build_id = None
for line in out.splitlines():
line = line.strip()
if line.startswith('Build ID'):
if build_id is not None:
raise ValueError("Found multiple Build ID values.")
build_id = line.split(': ')[1]
return build_id
def run(self, binary_path: str) -> typing.Tuple[str, str]:
"""Perform all necessary actions to get Build ID."""
readelfout = self.callreadelf(binary_path)
buildid = self.extractbuildid(readelfout)
return buildid, readelfout
class DownloadOptions(object):
"""A class to collect download option configurations."""
def __init__(self, download_binaries=False, download_symbols=False, download_artifacts=False,
download_python_venv=False):
"""Initialize instance."""
self.download_binaries = download_binaries
self.download_symbols = download_symbols
self.download_artifacts = download_artifacts
self.download_python_venv = download_python_venv
class Mapper:
"""A class to to basically all of the work."""
# pylint: disable=too-many-instance-attributes
# This amount of attributes are necessary.
default_web_service_base_url: str = "https://symbolizer-service.server-tig.prod.corp.mongodb.com"
default_cache_dir = os.path.join(os.getcwd(), 'build', 'symbols_cache')
selected_binaries = ('mongos.debug', 'mongod.debug', 'mongo.debug')
default_client_credentials_scope = "servertig-symbolizer-fullaccess"
default_client_credentials_user_name = "client-user"
default_creds_file_path = os.path.join(os.getcwd(), '.symbolizer_credentials.json')
def __init__(self, version: str, client_id: str, client_secret: str, variant: str,
cache_dir: str = None, web_service_base_url: str = None,
logger: logging.Logger = None):
"""
Initialize instance.
:param version: version string
:param variant: build variant string
:param cache_dir: full path to cache directory as a string
:param web_service_base_url: URL of symbolizer web service
"""
self.version = version
self.variant = variant
self.cache_dir = cache_dir or self.default_cache_dir
self.web_service_base_url = web_service_base_url or self.default_web_service_base_url
if not logger:
logging.basicConfig()
logger = logging.getLogger('symbolizer')
logger.setLevel(logging.INFO)
self.logger = logger
self.http_client = requests.Session()
self.multiversion_setup = SetupMultiversion(
DownloadOptions(download_symbols=True, download_binaries=True), variant=self.variant,
ignore_failed_push=True)
self.debug_symbols_url = None
self.url = None
self.configs = Configs(
client_credentials_scope=self.default_client_credentials_scope,
client_credentials_user_name=self.default_client_credentials_user_name)
self.client_id = client_id
self.client_secret = client_secret
self.path_options = PathOptions()
if not os.path.exists(self.cache_dir):
os.makedirs(self.cache_dir)
self.authenticate()
self.setup_urls()
def authenticate(self):
"""Login & get credentials for further requests to web service."""
# try to read from file
if os.path.exists(self.default_creds_file_path):
with open(self.default_creds_file_path) as cfile:
data = json.loads(cfile.read())
access_token, expire_time = data.get("access_token"), data.get("expire_time")
if time.time() < expire_time:
# credentials hasn't expired yet
self.http_client.headers.update({"Authorization": f"Bearer {access_token}"})
return
credentials = get_client_cred_oauth_credentials(self.client_id, self.client_secret,
configs=self.configs)
self.http_client.headers.update({"Authorization": f"Bearer {credentials.access_token}"})
# write credentials to local file for further useage
with open(self.default_creds_file_path, "w") as cfile:
cfile.write(
json.dumps({
"access_token": credentials.access_token,
"expire_time": time.time() + credentials.expires_in
}))
def __enter__(self):
"""Return instance when used as a context manager."""
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Do cleaning process when used as a context manager."""
self.cleanup()
def cleanup(self):
"""Remove temporary files & folders."""
if os.path.exists(self.cache_dir):
shutil.rmtree(self.cache_dir)
@staticmethod
def url_to_filename(url: str) -> str:
"""
Convert URL to local filename.
:param url: download URL
:return: full name for local file
"""
return url.split('/')[-1]
def setup_urls(self):
"""Set up URLs using multiversion."""
urlinfo = self.multiversion_setup.get_urls(self.version, self.variant)
download_symbols_url = urlinfo.urls.get("mongo-debugsymbols.tgz", None)
binaries_url = urlinfo.urls.get("Binaries", "")
if not download_symbols_url:
download_symbols_url = urlinfo.urls.get("mongo-debugsymbols.zip", None)
if not download_symbols_url:
self.logger.error("Couldn't find URL for debug symbols. Version: %s, URLs dict: %s",
self.version, urlinfo.urls)
raise ValueError(f"Debug symbols URL not found. URLs dict: {urlinfo.urls}")
self.debug_symbols_url = download_symbols_url
self.url = binaries_url
def unpack(self, path: str) -> str:
"""
Use to untar/unzip files.
:param path: full path of file
:return: full path of directory of unpacked file
"""
foldername = path.replace('.tgz', '', 1).split('/')[-1]
out_dir = os.path.join(self.cache_dir, foldername)
if not os.path.exists(out_dir):
os.makedirs(out_dir)
download.extract_archive(path, out_dir)
# extracted everything, we don't need the original tar file anymore and it should be deleted
if os.path.exists(path):
os.remove(path)
return out_dir
@staticmethod
def download(url: str) -> str:
"""
Use to download file from URL.
:param url: URL of file to download
:return: full path of downloaded file in local filesystem
"""
tarball_full_path = download.download_from_s3(url)
return tarball_full_path
def generate_build_id_mapping(self) -> typing.Generator[typing.Dict[str, str], None, None]:
"""
Extract build id from binaries and creates new dict using them.
:return: mapped data as dict
"""
readelf_extractor = LinuxBuildIDExtractor()
debug_symbols_path = self.download(self.debug_symbols_url)
debug_symbols_unpacked_path = self.unpack(debug_symbols_path)
binaries_path = self.download(self.url)
binaries_unpacked_path = self.unpack(binaries_path)
# we need to analyze two directories: main binary folder inside debug-symbols and
# shared libraries folder inside binaries.
# main binary folder holds main binaries, like mongos, mongod, mongo ...
# shared libraries folder holds shared libraries, tons of them.
# some build variants do not contain shared libraries.
debug_symbols_unpacked_path = os.path.join(debug_symbols_unpacked_path, 'dist-test')
binaries_unpacked_path = os.path.join(binaries_unpacked_path, 'dist-test')
self.logger.info("INSIDE unpacked debug-symbols/dist-test: %s",
os.listdir(debug_symbols_unpacked_path))
self.logger.info("INSIDE unpacked binaries/dist-test: %s",
os.listdir(binaries_unpacked_path))
# start with main binary folder
for binary in self.selected_binaries:
full_bin_path = os.path.join(debug_symbols_unpacked_path,
self.path_options.main_binary_folder_name, binary)
if not os.path.exists(full_bin_path):
self.logger.error("Could not find binary at %s", full_bin_path)
return
build_id, readelf_out = readelf_extractor.run(full_bin_path)
if not build_id:
self.logger.error("Build ID couldn't be extracted. \nReadELF output %s",
readelf_out)
return
yield {
'url': self.url, 'debug_symbols_url': self.debug_symbols_url, 'build_id': build_id,
'file_name': binary, 'version': self.version
}
# move to shared libraries folder.
# it contains all shared library binary files,
# we run readelf on each of them.
lib_folder_path = os.path.join(binaries_unpacked_path,
self.path_options.shared_library_folder_name)
if not os.path.exists(lib_folder_path):
# sometimes we don't get lib folder, which means there is no shared libraries for current build variant.
self.logger.info("'%s' folder does not exist.",
self.path_options.shared_library_folder_name)
sofiles = []
else:
sofiles = os.listdir(lib_folder_path)
self.logger.info("'%s' folder: %s", self.path_options.shared_library_folder_name,
sofiles)
for sofile in sofiles:
sofile_path = os.path.join(lib_folder_path, sofile)
if not os.path.exists(sofile_path):
self.logger.error("Could not find binary at %s", sofile_path)
return
build_id, readelf_out = readelf_extractor.run(sofile_path)
if not build_id:
self.logger.error("Build ID couldn't be extracted. \nReadELF out %s", readelf_out)
return
yield {
'url': self.url,
'debug_symbols_url': self.debug_symbols_url,
'build_id': build_id,
'file_name': sofile,
'version': self.version,
}
def run(self):
"""Run all necessary processes."""
mappings = self.generate_build_id_mapping()
if not mappings:
self.logger.error("Could not generate mapping")
return
# mappings is a generator, we iterate over to generate mappings on the go
for mapping in mappings:
response = self.http_client.post('/'.join((self.web_service_base_url, 'add')),
json=mapping)
if response.status_code != 200:
self.logger.error(
"Could not store mapping, web service returned status code %s from URL %s. "
"Response: %s", response.status_code, response.url, response.text)
def make_argument_parser(parser=None, **kwargs):
"""Make and return an argparse."""
if parser is None:
parser = argparse.ArgumentParser(**kwargs)
parser.add_argument('--version')
parser.add_argument('--client-id')
parser.add_argument('--client-secret')
parser.add_argument('--variant')
parser.add_argument('--web-service-base-url', default="")
return parser
def main(options):
"""Execute mapper here. Main entry point."""
mapper = Mapper(version=options.version, variant=options.variant, client_id=options.client_id,
client_secret=options.client_secret,
web_service_base_url=options.web_service_base_url)
# when used as a context manager, mapper instance automatically cleans files/folders after finishing its job.
# in other cases, mapper.cleanup() method should be called manually.
with mapper:
mapper.run()
if __name__ == '__main__':
mapper_options = make_argument_parser(description=__doc__).parse_args()
main(mapper_options)