SERVER-58695 improved symbolizer

This commit is contained in:
Robert Guo 2021-07-20 09:07:31 -04:00 committed by Evergreen Agent
parent 84e1317cca
commit a3705542f5
10 changed files with 639 additions and 248 deletions

View File

@ -31,7 +31,7 @@ variable-rgx=[a-z_][a-z0-9_]{1,50}$
# R0801 - duplicate-code - See PM-1380
# E0611 - no-name-in-module
disable=bad-continuation,fixme,import-error,line-too-long,no-member,locally-disabled,no-else-return,redefined-variable-type,too-few-public-methods,unused-import,useless-object-inheritance,deprecated-module,unnecessary-pass,duplicate-code,no-else-raise,deprecated-method,exec-used,no-name-in-module,raise-missing-from, unnecessary-comprehension,super-with-arguments,consider-using-sys-exit,import-outside-toplevel,no-else-continue,no-else-break
disable=bad-continuation,fixme,import-error,line-too-long,no-member,locally-disabled,no-else-return,redefined-variable-type,too-few-public-methods,unused-import,useless-object-inheritance,deprecated-module,unnecessary-pass,duplicate-code,no-else-raise,deprecated-method,exec-used,no-name-in-module,raise-missing-from, unnecessary-comprehension,super-with-arguments,consider-using-sys-exit,import-outside-toplevel,no-else-continue,no-else-break,too-many-arguments,too-many-locals
[IMPORTS]
known-third-party=boto3,botocore,psutil,yaml,xmlrunner

View File

@ -61,21 +61,17 @@ def parse_input(trace_doc, dbg_path_resolver):
def symbolize_frames(trace_doc, dbg_path_resolver, symbolizer_path, dsym_hint, input_format,
**_kwargs):
**kwargs):
"""Return a list of symbolized stack frames from a trace_doc in MongoDB stack dump format."""
# Keep frames in kwargs to avoid changing the function signature.
frames = kwargs.get("frames")
if frames is None:
frames = preprocess_frames(dbg_path_resolver, trace_doc, input_format)
if not symbolizer_path:
symbolizer_path = os.environ.get("MONGOSYMB_SYMBOLIZER_PATH", "llvm-symbolizer")
if input_format == "classic":
frames = parse_input(trace_doc, dbg_path_resolver)
elif input_format == "thin":
frames = trace_doc["backtrace"]
for frame in frames:
frame["path"] = dbg_path_resolver.get_dbg_file(frame)
else:
raise ValueError('Unknown input format "{}"'.format(input_format))
symbolizer_args = [symbolizer_path]
for dh in dsym_hint:
symbolizer_args.append("-dsym-hint={}".format(dh))
@ -123,16 +119,33 @@ def symbolize_frames(trace_doc, dbg_path_resolver, symbolizer_path, dsym_hint, i
return frames
def preprocess_frames(dbg_path_resolver, trace_doc, input_format):
"""Process the paths in frame objects."""
if input_format == "classic":
frames = parse_input(trace_doc, dbg_path_resolver)
elif input_format == "thin":
frames = trace_doc["backtrace"]
for frame in frames:
frame["path"] = dbg_path_resolver.get_dbg_file(frame)
else:
raise ValueError('Unknown input format "{}"'.format(input_format))
return frames
class PathDbgFileResolver(object):
"""PathDbgFileResolver class."""
def __init__(self, bin_path_guess):
"""Initialize PathDbgFileResolver."""
self._bin_path_guess = os.path.realpath(bin_path_guess)
self.mci_build_dir = None
def get_dbg_file(self, soinfo):
"""Return dbg file name."""
path = soinfo.get("path", "")
# TODO: make identifying mongo shared library directory more robust
if self.mci_build_dir is None and path.startswith("/data/mci/"):
self.mci_build_dir = path.split("/src/", maxsplit=1)[0]
return path if path else self._bin_path_guess
@ -143,6 +156,7 @@ class S3BuildidDbgFileResolver(object):
"""Initialize S3BuildidDbgFileResolver."""
self._cache_dir = cache_dir
self._s3_bucket = s3_bucket
self.mci_build_dir = None
def get_dbg_file(self, soinfo):
"""Return dbg file name."""
@ -182,32 +196,41 @@ def classic_output(frames, outfile, **kwargs): # pylint: disable=unused-argumen
outfile.write(" {path:s}!!!\n".format(**symbinfo))
def make_argument_parser(**kwargs):
def make_argument_parser(parser=None, **kwargs):
"""Make and return an argparse."""
parser = argparse.ArgumentParser(**kwargs)
if parser is None:
parser = argparse.ArgumentParser(**kwargs)
parser.add_argument('--dsym-hint', default=[], action='append')
parser.add_argument('--symbolizer-path', default='')
parser.add_argument('--input-format', choices=['classic', 'thin'], default='classic')
parser.add_argument('--output-format', choices=['classic', 'json'], default='classic',
help='"json" shows some extra information')
parser.add_argument('--debug-file-resolver', choices=['path', 's3'], default='path')
parser.add_argument('--src-dir-to-move', action="store", type=str, default=None,
help="Specify a src dir to move to /data/mci/{original_buildid}/src")
s3_group = parser.add_argument_group(
"s3 options", description='Options used with \'--debug-file-resolver s3\'')
s3_group.add_argument('--s3-cache-dir')
s3_group.add_argument('--s3-bucket')
parser.add_argument('path_to_executable')
# Look for symbols in the cwd by default.
parser.add_argument('path_to_executable', nargs="?")
return parser
def main():
def main(options):
"""Execute Main program."""
options = make_argument_parser(description=__doc__).parse_args()
# Skip over everything before the first '{' since it is likely to be log line prefixes.
# Additionally, using raw_decode() to ignore extra data after the closing '}' to allow maximal
# sloppiness in copy-pasting input.
trace_doc = sys.stdin.read()
if not trace_doc or not trace_doc.strip():
print("Please provide the backtrace through stdin for symbolization;"
"e.g. `your/symbolization/command < /file/with/stacktrace`")
trace_doc = trace_doc[trace_doc.find('{'):]
trace_doc = json.JSONDecoder().raw_decode(trace_doc)[0]
@ -242,10 +265,23 @@ def main():
elif options.debug_file_resolver == 's3':
resolver = S3BuildidDbgFileResolver(options.s3_cache_dir, options.s3_bucket)
frames = symbolize_frames(trace_doc, resolver, **vars(options))
frames = preprocess_frames(resolver, trace_doc, options.input_format)
if options.src_dir_to_move and resolver.mci_build_dir is not None:
try:
os.makedirs(resolver.mci_build_dir)
os.symlink(
os.path.join(os.getcwd(), options.src_dir_to_move),
os.path.join(resolver.mci_build_dir, 'src'))
except FileExistsError:
pass
frames = symbolize_frames(frames=frames, trace_doc=trace_doc, dbg_path_resolver=resolver,
**vars(options))
output_fn(frames, sys.stdout, indent=2)
if __name__ == '__main__':
main()
symbolizer_options = make_argument_parser(description=__doc__).parse_args()
main(symbolizer_options)
sys.exit(0)

View File

@ -8,6 +8,7 @@ from buildscripts.resmokelib.hang_analyzer import HangAnalyzerPlugin
from buildscripts.resmokelib.powercycle import PowercyclePlugin
from buildscripts.resmokelib.run import RunPlugin
from buildscripts.resmokelib.setup_multiversion import SetupMultiversionPlugin
from buildscripts.resmokelib.symbolizer import SymbolizerPlugin
from buildscripts.resmokelib.undodb import UndoDbPlugin
_PLUGINS = [
@ -16,6 +17,7 @@ _PLUGINS = [
UndoDbPlugin(),
SetupMultiversionPlugin(),
PowercyclePlugin(),
SymbolizerPlugin(),
]

View File

@ -26,7 +26,7 @@ def download_from_s3(url):
"""Download file from S3 bucket by a given URL."""
if not url:
raise DownloadError("Download URL not found.")
raise DownloadError("Download URL not found")
LOGGER.info("Downloading.", url=url)
filename = os.path.join(tempfile.gettempdir(), url.split('/')[-1].split('?')[0])
@ -91,11 +91,8 @@ def extract_archive(archive_file, install_dir):
try:
os.makedirs(install_dir)
except OSError as exc:
if exc.errno == errno.EEXIST and os.path.isdir(install_dir):
pass
else:
raise
except FileExistsError:
pass
_rsync_move_dir(temp_dir, install_dir)
shutil.rmtree(temp_dir)

View File

@ -1,150 +0,0 @@
"""Helper functions to interact with evergreen."""
import os
import structlog
from requests import HTTPError
from evergreen import RetryingEvergreenApi
EVERGREEN_HOST = "https://evergreen.mongodb.com"
EVERGREEN_CONFIG_LOCATIONS = (
# Common for machines in Evergreen
os.path.join(os.getcwd(), ".evergreen.yml"),
# Common for local machines
os.path.expanduser(os.path.join("~", ".evergreen.yml")),
)
GENERIC_EDITION = "base"
GENERIC_PLATFORM = "linux_x86_64"
GENERIC_ARCHITECTURE = "x86_64"
LOGGER = structlog.getLogger(__name__)
class EvergreenConnError(Exception):
"""Errors in evergreen_conn.py."""
pass
def get_evergreen_api(evergreen_config):
"""Return evergreen API."""
config_to_pass = evergreen_config
if not config_to_pass:
# Pickup the first config file found in common locations.
for file in EVERGREEN_CONFIG_LOCATIONS:
if os.path.isfile(file):
config_to_pass = file
break
try:
evg_api = RetryingEvergreenApi.get_api(config_file=config_to_pass)
except Exception as ex:
LOGGER.error("Most likely something is wrong with evergreen config file.",
config_file=config_to_pass)
raise ex
else:
return evg_api
def get_buildvariant_name(config, edition, platform, architecture, major_minor_version):
"""Return Evergreen buildvariant name."""
buildvariant_name = ""
evergreen_buildvariants = config.evergreen_buildvariants
for buildvariant in evergreen_buildvariants:
if (buildvariant.edition == edition and buildvariant.platform == platform
and buildvariant.architecture == architecture):
versions = buildvariant.versions
if major_minor_version in versions:
buildvariant_name = buildvariant.name
break
elif not versions:
buildvariant_name = buildvariant.name
return buildvariant_name
def get_generic_buildvariant_name(config, major_minor_version):
"""Return Evergreen buildvariant name for generic platform."""
LOGGER.info("Falling back to generic architecture.", edition=GENERIC_EDITION,
platform=GENERIC_PLATFORM, architecture=GENERIC_ARCHITECTURE)
generic_buildvariant_name = get_buildvariant_name(
config=config, edition=GENERIC_EDITION, platform=GENERIC_PLATFORM,
architecture=GENERIC_ARCHITECTURE, major_minor_version=major_minor_version)
if not generic_buildvariant_name:
raise EvergreenConnError("Generic architecture buildvariant not found.")
return generic_buildvariant_name
def get_evergreen_project_and_version(config, evg_api, commit_hash):
"""Return evergreen project and version by commit hash."""
for evg_project in config.evergreen_projects:
try:
version_id = evg_project.replace("-", "_") + "_" + commit_hash
evg_version = evg_api.version_by_id(version_id)
except HTTPError:
continue
else:
LOGGER.debug("Found evergreen version.",
evergreen_version=f"{EVERGREEN_HOST}/version/{evg_version.version_id}")
return evg_project, evg_version
raise EvergreenConnError(f"Evergreen version for commit hash {commit_hash} not found.")
def get_evergreen_versions(evg_api, evg_project):
"""Return the list of evergreen versions by evergreen project name."""
return evg_api.versions_by_project(evg_project)
def get_compile_artifact_urls(evg_api, evg_version, buildvariant_name):
"""Return compile urls from buildvariant in Evergreen version."""
compile_artifact_urls = {}
try:
build_id = evg_version.build_variants_map[buildvariant_name]
except KeyError:
raise EvergreenConnError(f"Buildvariant {buildvariant_name} not found.")
else:
evg_build = evg_api.build_by_id(build_id)
LOGGER.debug("Found evergreen build.", evergreen_build=f"{EVERGREEN_HOST}/build/{build_id}")
evg_tasks = evg_build.get_tasks()
compile_task = None
push_task = None
for evg_task in evg_tasks:
# Only set the compile task if there isn't one already, otherwise
# newer tasks like "archive_dist_test_debug" take precedence.
if evg_task.display_name in ("compile", "archive_dist_test") and compile_task is None:
compile_task = evg_task
elif evg_task.display_name == "push":
push_task = evg_task
if compile_task and push_task:
break
if compile_task and push_task and compile_task.status == push_task.status == "success":
LOGGER.info("Found successful evergreen tasks.",
compile_task=f"{EVERGREEN_HOST}/task/{compile_task.task_id}",
push_task=f"{EVERGREEN_HOST}/task/{push_task.task_id}")
evg_artifacts = compile_task.artifacts
for artifact in evg_artifacts:
compile_artifact_urls[artifact.name] = artifact.url
# Tack on the project id for generating a friendly decompressed name for the artifacts.
compile_artifact_urls["project_id"] = compile_task.project_id
elif compile_task and push_task:
LOGGER.warning("Found evergreen tasks, but they are not both successful.",
compile_task=f"{EVERGREEN_HOST}/task/{compile_task.task_id}",
push_task=f"{EVERGREEN_HOST}/task/{push_task.task_id}")
else:
LOGGER.error("There are no `compile` and/or 'push' tasks in the evergreen build.",
evergreen_build=f"{EVERGREEN_HOST}/build/{build_id}")
return compile_artifact_urls

View File

@ -15,7 +15,8 @@ import structlog
import yaml
from buildscripts.resmokelib.plugin import PluginInterface, Subcommand
from buildscripts.resmokelib.setup_multiversion import config, download, evergreen_conn, github_conn
from buildscripts.resmokelib.setup_multiversion import config, download, github_conn
from buildscripts.resmokelib.utils import evergreen_conn
SUBCOMMAND = "setup-multiversion"
@ -43,27 +44,30 @@ class SetupMultiversion(Subcommand):
"""Main class for the setup multiversion subcommand."""
# pylint: disable=too-many-instance-attributes
def __init__(self, options):
def __init__(self, download_options, install_dir="", link_dir="", platform=None, edition=None,
architecture=None, use_latest=None, versions=None, evergreen_config=None,
github_oauth_token=None, debug=None, ignore_failed_push=False):
"""Initialize."""
setup_logging(options.debug)
setup_logging(debug)
cwd = os.getcwd()
self.install_dir = os.path.join(cwd, options.install_dir)
self.link_dir = os.path.join(cwd, options.link_dir)
self.install_dir = os.path.join(cwd, install_dir)
self.link_dir = os.path.join(cwd, link_dir)
self.edition = options.edition.lower() if options.edition else None
self.platform = options.platform.lower() if options.platform else None
self.architecture = options.architecture.lower() if options.architecture else None
self.use_latest = options.use_latest
self.versions = options.versions
self.edition = edition.lower() if edition else None
self.platform = platform.lower() if platform else None
self.architecture = architecture.lower() if architecture else None
self.use_latest = use_latest
self.versions = versions
self.ignore_failed_push = ignore_failed_push
self.download_binaries = options.download_binaries
self.download_symbols = options.download_symbols
self.download_artifacts = options.download_artifacts
self.download_binaries = download_options.download_binaries
self.download_symbols = download_options.download_symbols
self.download_artifacts = download_options.download_artifacts
self.evg_api = evergreen_conn.get_evergreen_api(options.evergreen_config)
self.evg_api = evergreen_conn.get_evergreen_api(evergreen_config)
# In evergreen github oauth token is stored as `token ******`, so we remove the leading part
self.github_oauth_token = options.github_oauth_token.replace(
"token ", "") if options.github_oauth_token else None
self.github_oauth_token = github_oauth_token.replace("token ",
"") if github_oauth_token else None
with open(config.SETUP_MULTIVERSION_CONFIG) as file_handle:
raw_yaml = yaml.safe_load(file_handle)
self.config = config.SetupMultiversionConfig(raw_yaml)
@ -99,21 +103,11 @@ class SetupMultiversion(Subcommand):
"version.")
urls = self.get_urls(version)
artifacts_url = urls.get("Artifacts", "") if self.download_artifacts else None
binaries_url = urls.get("Binaries", "") if self.download_binaries else None
download_symbols_url = None
if self.download_symbols:
download_symbols_url = urls.get(" mongo-debugsymbols.tgz", "")
if not download_symbols_url:
download_symbols_url = urls.get(" mongo-debugsymbols.zip", "")
bin_suffix = self._get_bin_suffix(version, urls["project_id"])
# Give each version a unique install dir
install_dir = os.path.join(self.install_dir, version)
self.setup_mongodb(artifacts_url, binaries_url, download_symbols_url, install_dir,
bin_suffix, self.link_dir)
self.download_and_extract_from_urls(urls, bin_suffix, install_dir)
except (github_conn.GithubConnError, evergreen_conn.EvergreenConnError,
download.DownloadError) as ex:
@ -124,6 +118,20 @@ class SetupMultiversion(Subcommand):
LOGGER.info("Setup version completed.", version=version)
LOGGER.info("-" * 50)
def download_and_extract_from_urls(self, urls, bin_suffix, install_dir):
"""Download and extract values indicated in `urls`."""
artifacts_url = urls.get("Artifacts", "") if self.download_artifacts else None
binaries_url = urls.get("Binaries", "") if self.download_binaries else None
download_symbols_url = None
if self.download_symbols:
download_symbols_url = urls.get(" mongo-debugsymbols.tgz", None)
if not download_symbols_url:
download_symbols_url = urls.get(" mongo-debugsymbols.zip", None)
self.setup_mongodb(artifacts_url, binaries_url, download_symbols_url, install_dir,
bin_suffix, self.link_dir)
def get_latest_urls(self, version):
"""Return latest urls."""
urls = {}
@ -149,34 +157,48 @@ class SetupMultiversion(Subcommand):
if buildvariant_name not in evg_version.build_variants_map:
buildvariant_name = self.fallback_to_generic_buildvariant(major_minor_version)
curr_urls = evergreen_conn.get_compile_artifact_urls(self.evg_api, evg_version,
buildvariant_name)
curr_urls = evergreen_conn.get_compile_artifact_urls(
self.evg_api, evg_version, buildvariant_name,
ignore_failed_push=self.ignore_failed_push)
if "Binaries" in curr_urls:
urls = curr_urls
break
return urls
def get_urls(self, version):
"""Return urls."""
git_tag, commit_hash = github_conn.get_git_tag_and_commit(self.github_oauth_token, version)
LOGGER.info("Found git attributes.", git_tag=git_tag, commit_hash=commit_hash)
def get_urls(self, binary_version=None, evergreen_version=None, buildvariant_name=None):
"""Return multiversion urls for a given binary version or (Evergreen version + variant)."""
if (binary_version and evergreen_version) or not (binary_version or evergreen_version):
raise ValueError("Must specify exactly one of `version` and `evergreen_version`")
if binary_version:
git_tag, commit_hash = github_conn.get_git_tag_and_commit(self.github_oauth_token,
binary_version)
LOGGER.info("Found git attributes.", git_tag=git_tag, commit_hash=commit_hash)
evg_project, evg_version = evergreen_conn.get_evergreen_project_and_version(
self.config, self.evg_api, commit_hash)
else:
evg_project, evg_version = evergreen_conn.get_evergreen_project(
self.config, self.evg_api, evergreen_version)
evg_project, evg_version = evergreen_conn.get_evergreen_project_and_version(
self.config, self.evg_api, commit_hash)
LOGGER.debug("Found evergreen project.", evergreen_project=evg_project)
try:
major_minor_version = re.findall(r"\d+\.\d+", evg_project)[-1]
except IndexError:
major_minor_version = "master"
buildvariant_name = self.get_buildvariant_name(major_minor_version)
if not buildvariant_name:
buildvariant_name = self.get_buildvariant_name(major_minor_version)
LOGGER.debug("Found buildvariant.", buildvariant_name=buildvariant_name)
if buildvariant_name not in evg_version.build_variants_map:
buildvariant_name = self.fallback_to_generic_buildvariant(major_minor_version)
urls = evergreen_conn.get_compile_artifact_urls(self.evg_api, evg_version,
buildvariant_name)
buildvariant_name,
ignore_failed_push=self.ignore_failed_push)
return urls
@ -203,8 +225,6 @@ class SetupMultiversion(Subcommand):
try_download(url)
if binaries_url is not None:
if not link_dir:
raise ValueError("link_dir must be specified if downloading binaries")
download.symlink_version(bin_suffix, install_dir, link_dir)
def get_buildvariant_name(self, major_minor_version):
@ -227,21 +247,36 @@ class SetupMultiversion(Subcommand):
major_minor_version=major_minor_version)
class _DownloadOptions(object):
def __init__(self, db, ds, da):
self.download_binaries = db
self.download_symbols = ds
self.download_artifacts = da
class SetupMultiversionPlugin(PluginInterface):
"""Integration point for setup-multiversion-mongodb."""
def parse(self, subcommand, parser, parsed_args, **kwargs):
"""Parse command-line options."""
if subcommand != SUBCOMMAND:
return None
if subcommand == SUBCOMMAND:
return SetupMultiversion(parsed_args)
# Shorthand for brevity.
args = parsed_args
return None
download_options = _DownloadOptions(db=args.download_binaries, ds=args.download_symbols,
da=args.download_artifacts)
def add_subcommand(self, subparsers):
"""Create and add the parser for the subcommand."""
parser = subparsers.add_parser(SUBCOMMAND, help=__doc__)
return SetupMultiversion(install_dir=args.install_dir, link_dir=args.link_dir,
platform=args.platform, edition=args.edition,
architecture=args.architecture, use_latest=args.use_latest,
versions=args.versions, download_options=download_options,
evergreen_config=args.evergreen_config,
github_oauth_token=args.github_oauth_token, debug=args.debug)
@classmethod
def _add_args_to_parser(cls, parser):
parser.add_argument("-i", "--installDir", dest="install_dir", required=True,
help="Directory to install the download archive. [REQUIRED]")
parser.add_argument(
@ -292,3 +327,8 @@ class SetupMultiversionPlugin(PluginInterface):
"https://developer.github.com/v3/#rate-limiting")
parser.add_argument("-d", "--debug", dest="debug", action="store_true", default=False,
help="Set DEBUG logging level.")
def add_subcommand(self, subparsers):
"""Create and add the parser for the subcommand."""
parser = subparsers.add_parser(SUBCOMMAND, help=__doc__)
self._add_args_to_parser(parser)

View File

@ -0,0 +1,235 @@
"""Wrapper around mongosym to download everything required."""
import argparse
import logging
import os
import shutil
import subprocess
import sys
import structlog
from buildscripts import mongosymb
from buildscripts.resmokelib.plugin import PluginInterface, Subcommand
from buildscripts.resmokelib.setup_multiversion.setup_multiversion import SetupMultiversion, _DownloadOptions
from buildscripts.resmokelib.utils import evergreen_conn
LOGGER = structlog.getLogger(__name__)
_HELP = """
Symbolize a backtrace JSON file given an Evergreen Task ID.
"""
_MESSAGE = """TODO"""
_COMMAND = "symbolize"
DEFAULT_SYMBOLIZER_LOCATION = "/opt/mongodbtoolchain/v3/bin/llvm-symbolizer"
def setup_logging(debug=False):
"""Enable logging."""
log_level = logging.DEBUG if debug else logging.INFO
logging.basicConfig(
format="[%(asctime)s - %(name)s - %(levelname)s] %(message)s",
level=log_level,
stream=sys.stdout,
)
logging.getLogger("urllib3").setLevel(logging.WARNING)
logging.getLogger("github").setLevel(logging.WARNING)
structlog.configure(logger_factory=structlog.stdlib.LoggerFactory())
class Symbolizer(Subcommand):
"""Interact with Symbolizer."""
def __init__(self, task_id, execution_num, bin_name, mongosym_fwd_args):
"""Constructor."""
self.execution_num = execution_num
self.bin_name = bin_name
self.mongosym_args = mongosym_fwd_args
self.evg_api: evergreen_conn.RetryingEvergreenApi = evergreen_conn.get_evergreen_api()
self.multiversion_setup = self._get_multiversion_setup()
self.task_info = self.evg_api.task_by_id(task_id)
self.dest_dir = None # Populated later.
@staticmethod
def _get_multiversion_setup():
# Add the args we care about.
download_options = _DownloadOptions(db=True, ds=True, da=False)
return SetupMultiversion(download_options=download_options, ignore_failed_push=True)
def _get_compile_artifacts(self):
version_id = self.task_info.version_id
buildvariant_name = self.task_info.build_variant
urls = self.multiversion_setup.get_urls(binary_version=None, evergreen_version=version_id,
buildvariant_name=buildvariant_name)
self.multiversion_setup.download_and_extract_from_urls(urls, bin_suffix=None,
install_dir=self.dest_dir)
def _patch_diff_by_id(self):
version_id = self.task_info.version_id
module_diffs = evergreen_conn.get_patch_module_diffs(self.evg_api, version_id)
# Not a patch build.
if not module_diffs:
return
for module_name, diff in module_diffs.items():
# TODO enterprise
if "mongodb-mongo-" in module_name:
with open(os.path.join(self.dest_dir, "patch.diff"), 'w') as git_diff_file:
git_diff_file.write(diff)
subprocess.run(["git", "apply", "patch.diff"], cwd=self.dest_dir, check=True)
def _get_source(self):
revision = self.task_info.revision
source_url = f"https://github.com/mongodb/mongo/archive/{revision}.zip"
# TODO: enterprise.
try:
# Get source for community. No need for entire repo to use `git apply [patch]`.
src_parent_dir = os.path.dirname(self.dest_dir)
try:
os.makedirs(src_parent_dir)
except FileExistsError:
pass
subprocess.run(["curl", "-L", "-o", "source.zip", source_url], cwd=src_parent_dir,
check=True)
subprocess.run(["unzip", "-q", "source.zip"], cwd=src_parent_dir, check=True)
subprocess.run(["rm", "source.zip"], cwd=src_parent_dir, check=True)
# Do a little dance to get the downloaded source into `self.dest_dir`
src_dir = os.listdir(src_parent_dir)
if len(src_dir) != 1:
raise ValueError(
f"expected exactly 1 directory containing source file, got {src_dir}")
src_dir = src_dir[0]
os.rename(os.path.join(src_parent_dir, src_dir), self.dest_dir)
except subprocess.CalledProcessError as err:
LOGGER.error(err.stdout)
LOGGER.error(err.stderr)
raise
def _setup_symbols(self):
try:
self.dest_dir = os.path.join("build", "multiversion", self.task_info.build_id)
if os.path.isdir(self.dest_dir):
LOGGER.info(
"directory for build already exists, skipping fetching source and symbols")
return
LOGGER.info("Getting source from GitHub...")
self._get_source()
LOGGER.info("Downloading debug symbols and binaries, this may take a few minutes...")
self._get_compile_artifacts()
LOGGER.info("Applying patch diff (if any)...")
self._patch_diff_by_id()
except: # pylint: disable=bare-except
if self.dest_dir is not None:
LOGGER.warning("Removing downloaded directory due to error",
directory=self.dest_dir)
shutil.rmtree(self.dest_dir)
raise
def _parse_mongosymb_args(self):
symbolizer_path = self.mongosym_args.symbolizer_path
if symbolizer_path:
raise ValueError("Must use the default symbolizer from the toolchain,"
f"not {symbolizer_path}")
self.mongosym_args.symbolizer_path = DEFAULT_SYMBOLIZER_LOCATION
sym_search_path = self.mongosym_args.path_to_executable
if sym_search_path:
raise ValueError(f"Must not specify path_to_executable, the original path that "
f"generated the symbols will be used: {sym_search_path}")
# TODO: support non-hygienic builds.
self.mongosym_args.path_to_executable = os.path.join(self.dest_dir, "dist-test", "bin",
self.bin_name)
self.mongosym_args.src_dir_to_move = self.dest_dir
def execute(self) -> None:
"""
Work your magic.
:return: None
"""
self._setup_symbols()
self._parse_mongosymb_args()
LOGGER.info("Invoking mongosymb...")
mongosymb.main(self.mongosym_args)
class SymbolizerPlugin(PluginInterface):
"""Symbolizer for MongoDB stacktraces."""
def add_subcommand(self, subparsers):
"""
Add 'symbolize' subcommand.
:param subparsers: argparse parser to add to
:return: None
"""
parser = subparsers.add_parser(_COMMAND, help=_HELP)
parser.add_argument(
"--task-id", '-t', action="store", type=str, required=True,
help="Fetch corresponding binaries and symbols given an Evergreen task ID")
# TODO: support multiple Evergreen executions.
parser.add_argument("--execution", "-e", action="store", type=int, default=0,
help=argparse.SUPPRESS)
parser.add_argument(
"--binary-name", "-b", action="store", type=str, default="mongod",
help="Base name of the binary that generated the stacktrace; e.g. `mongod` or `mongos`")
parser.add_argument("--debug", "-d", dest="debug", action="store_true", default=False,
help="Set DEBUG logging level.")
group = parser.add_argument_group(
"Verbatim mongosymb.py options for advanced usages",
description="Compatibility not guaranteed, use at your own risk")
mongosymb.make_argument_parser(group)
def parse(self, subcommand, parser, parsed_args, **kwargs):
"""
Return Symbolizer if command is one we recognize.
:param subcommand: equivalent to parsed_args.command
:param parser: parser used
:param parsed_args: output of parsing
:param kwargs: additional args
:return: None or a Subcommand
"""
if subcommand != _COMMAND:
return None
setup_logging(parsed_args.debug)
task_id = parsed_args.task_id
binary_name = parsed_args.binary_name
if not task_id:
raise ValueError(
"A valid Evergreen Task ID is required. You can get it by double clicking the"
" Evergreen URL after `/task/` on any task page")
if not binary_name:
raise ValueError("A binary base name is required. This is usually `mongod` or `mongos`")
# Check is Linux.
if not os.path.isfile(DEFAULT_SYMBOLIZER_LOCATION):
raise ValueError("llvm-symbolizer in MongoDB toolchain not found. Please run this on a "
"virtual workstation or install the toolchain manually")
if not os.access("/data/mci", os.W_OK):
raise ValueError("Please ensure you have write access to /data/mci. "
"E.g. with `sudo mkdir -p /data/mci; sudo chown $USER /data/mci`")
return Symbolizer(task_id, parsed_args.execution, binary_name, parsed_args)

View File

@ -0,0 +1,233 @@
"""Helper functions to interact with evergreen."""
import os
import requests
import structlog
from requests import HTTPError
from evergreen import RetryingEvergreenApi, Patch
EVERGREEN_HOST = "https://evergreen.mongodb.com"
EVERGREEN_CONFIG_LOCATIONS = (
# Common for machines in Evergreen
os.path.join(os.getcwd(), ".evergreen.yml"),
# Common for local machines
os.path.expanduser(os.path.join("~", ".evergreen.yml")),
)
GENERIC_EDITION = "base"
GENERIC_PLATFORM = "linux_x86_64"
GENERIC_ARCHITECTURE = "x86_64"
LOGGER = structlog.getLogger(__name__)
class EvergreenConnError(Exception):
"""Errors in evergreen_conn.py."""
pass
def get_evergreen_api(evergreen_config=None):
"""Return evergreen API."""
config_to_pass = evergreen_config
if not config_to_pass:
# Pickup the first config file found in common locations.
for file in EVERGREEN_CONFIG_LOCATIONS:
if os.path.isfile(file):
config_to_pass = file
break
try:
evg_api = RetryingEvergreenApi.get_api(config_file=config_to_pass)
except Exception as ex:
LOGGER.error("Most likely something is wrong with evergreen config file.",
config_file=config_to_pass)
raise ex
else:
return evg_api
def get_buildvariant_name(config, edition, platform, architecture, major_minor_version):
"""Return Evergreen buildvariant name."""
buildvariant_name = ""
evergreen_buildvariants = config.evergreen_buildvariants
for buildvariant in evergreen_buildvariants:
if (buildvariant.edition == edition and buildvariant.platform == platform
and buildvariant.architecture == architecture):
versions = buildvariant.versions
if major_minor_version in versions:
buildvariant_name = buildvariant.name
break
elif not versions:
buildvariant_name = buildvariant.name
return buildvariant_name
# pylint: disable=protected-access
def get_patch_module_diffs(evg_api, version_id):
"""Get the raw git diffs for all modules."""
evg_url = evg_api._create_url(f"/patches/{version_id}")
try:
res = evg_api._call_api(evg_url)
except requests.exceptions.HTTPError as err:
err_res = err.response
if err_res.status_code == 400:
LOGGER.debug("Not a patch build task, skipping applying patch",
version_id_of_task=version_id)
return None
else:
raise
patch = Patch(res.json(), evg_api)
res = {}
for module_code_change in patch.module_code_changes:
git_diff_link = module_code_change.raw_link
raw = evg_api._call_api(git_diff_link)
diff = raw.text
res[module_code_change.branch_name] = diff
return res
def get_generic_buildvariant_name(config, major_minor_version):
"""Return Evergreen buildvariant name for generic platform."""
LOGGER.info("Falling back to generic architecture.", edition=GENERIC_EDITION,
platform=GENERIC_PLATFORM, architecture=GENERIC_ARCHITECTURE)
generic_buildvariant_name = get_buildvariant_name(
config=config, edition=GENERIC_EDITION, platform=GENERIC_PLATFORM,
architecture=GENERIC_ARCHITECTURE, major_minor_version=major_minor_version)
if not generic_buildvariant_name:
raise EvergreenConnError("Generic architecture buildvariant not found.")
return generic_buildvariant_name
def get_evergreen_project_and_version(config, evg_api, commit_hash):
"""Return evergreen project and version by commit hash."""
for evg_project in config.evergreen_projects:
try:
version_id = evg_project.replace("-", "_") + "_" + commit_hash
evg_version = evg_api.version_by_id(version_id)
except HTTPError:
continue
else:
LOGGER.debug("Found evergreen version.",
evergreen_version=f"{EVERGREEN_HOST}/version/{evg_version.version_id}")
return evg_project, evg_version
raise EvergreenConnError(f"Evergreen version for commit hash {commit_hash} not found.")
def get_evergreen_project(config, evg_api, evergreen_version_id):
"""Return evergreen project for a given Evergreen version."""
for evg_project in config.evergreen_projects:
try:
evg_version = evg_api.version_by_id(evergreen_version_id)
except HTTPError:
continue
else:
LOGGER.debug("Found evergreen version.",
evergreen_version=f"{EVERGREEN_HOST}/version/{evg_version.version_id}")
return evg_project, evg_version
raise EvergreenConnError(f"Evergreen version {evergreen_version_id} not found.")
def get_evergreen_versions(evg_api, evg_project):
"""Return the list of evergreen versions by evergreen project name."""
return evg_api.versions_by_project(evg_project)
def get_compile_artifact_urls(evg_api, evg_version, buildvariant_name, ignore_failed_push=False):
"""Return compile urls from buildvariant in Evergreen version."""
try:
build_id = evg_version.build_variants_map[buildvariant_name]
except KeyError:
raise EvergreenConnError(f"Buildvariant {buildvariant_name} not found.")
evg_build = evg_api.build_by_id(build_id)
LOGGER.debug("Found evergreen build.", evergreen_build=f"{EVERGREEN_HOST}/build/{build_id}")
evg_tasks = evg_build.get_tasks()
tasks_wrapper = _filter_successful_tasks(evg_tasks)
# Ignore push tasks if specified as such, else return no results if push does not exist.
if ignore_failed_push:
tasks_wrapper.push_task = None
elif tasks_wrapper.push_task is None:
return {}
return _get_multiversion_urls(tasks_wrapper)
def _get_multiversion_urls(tasks_wrapper):
compile_artifact_urls = {}
binary = tasks_wrapper.binary_task
push = tasks_wrapper.push_task
symbols = tasks_wrapper.symbols_task
required_tasks = [binary, push] if push is not None else [binary]
if all(task and task.status == "success" for task in required_tasks):
LOGGER.info("Required evergreen task(s) were successful.",
required_tasks=f"{required_tasks}",
task_id=f"{EVERGREEN_HOST}/task/{required_tasks[0].task_id}")
evg_artifacts = binary.artifacts
for artifact in evg_artifacts:
compile_artifact_urls[artifact.name] = artifact.url
if symbols and symbols.status == "success":
for artifact in symbols.artifacts:
compile_artifact_urls[artifact.name] = artifact.url
elif symbols and symbols.task_id:
LOGGER.warning("debug symbol archive was unsuccessful",
archive_symbols_task=f"{EVERGREEN_HOST}/task/{symbols.task_id}")
# Tack on the project id for generating a friendly decompressed name for the artifacts.
compile_artifact_urls["project_id"] = binary.project_id
elif all(task for task in required_tasks):
LOGGER.warning("Required Evergreen task(s) were not successful.",
required_tasks=f"{required_tasks}",
task_id=f"{EVERGREEN_HOST}/task/{required_tasks[0].task_id}")
else:
LOGGER.error("There are no `compile` and/or 'push' tasks in the evergreen build")
return compile_artifact_urls
class _MultiversionTasks(object):
"""Tasks relevant for multiversion setup."""
def __init__(self, symbols, binary, push):
"""Init function."""
self.symbols_task = symbols
self.binary_task = binary
self.push_task = push
def _filter_successful_tasks(evg_tasks) -> _MultiversionTasks:
compile_task = None
archive_symbols_task = None
push_task = None
for evg_task in evg_tasks:
# Only set the compile task if there isn't one already, otherwise
# newer tasks like "archive_dist_test_debug" take precedence.
if evg_task.display_name in ("compile", "archive_dist_test") and compile_task is None:
compile_task = evg_task
elif evg_task.display_name == "push":
push_task = evg_task
elif evg_task.display_name == "archive_dist_test_debug":
archive_symbols_task = evg_task
if compile_task and push_task and archive_symbols_task:
break
return _MultiversionTasks(symbols=archive_symbols_task, binary=compile_task, push=push_task)

View File

@ -5,9 +5,9 @@ from argparse import Namespace
from mock import patch
from buildscripts.resmokelib.setup_multiversion import evergreen_conn
from buildscripts.resmokelib.utils import evergreen_conn
from buildscripts.resmokelib.setup_multiversion.config import SetupMultiversionConfig
from buildscripts.resmokelib.setup_multiversion.setup_multiversion import SetupMultiversion
from buildscripts.resmokelib.setup_multiversion.setup_multiversion import SetupMultiversion, _DownloadOptions
class TestSetupMultiversionBase(unittest.TestCase):
@ -36,6 +36,9 @@ class TestSetupMultiversionBase(unittest.TestCase):
},
]
}
download_options = _DownloadOptions(db=True, ds=False, da=False)
options = Namespace(
install_dir="install",
link_dir="link",
@ -44,17 +47,15 @@ class TestSetupMultiversionBase(unittest.TestCase):
architecture=architecture,
use_latest=False,
versions=["4.2.1"],
download_symbols=False,
download_binaries=True,
download_artifacts=False,
evergreen_config=None,
github_oauth_token=None,
download_options=download_options,
debug=False,
)
with patch("buildscripts.resmokelib.setup_multiversion.config.SetupMultiversionConfig"
) as mock_config:
mock_config.return_value = SetupMultiversionConfig(raw_yaml_config)
self.setup_multiversion = SetupMultiversion(options)
self.setup_multiversion = SetupMultiversion(**vars(options))
class TestSetupMultiversionGetLatestUrls(TestSetupMultiversionBase):
@ -66,7 +67,7 @@ class TestSetupMultiversionGetLatestUrls(TestSetupMultiversionBase):
@patch("evergreen.version.Version")
@patch("evergreen.api.EvergreenApi.versions_by_project")
@patch("buildscripts.resmokelib.setup_multiversion.evergreen_conn.get_compile_artifact_urls")
@patch("buildscripts.resmokelib.utils.evergreen_conn.get_compile_artifact_urls")
def test_no_compile_artifacts(self, mock_get_compile_artifact_urls, mock_versions_by_project,
mock_version):
mock_version.build_variants_map = {self.buildvariant_name: "build_id"}
@ -78,7 +79,7 @@ class TestSetupMultiversionGetLatestUrls(TestSetupMultiversionBase):
@patch("evergreen.version.Version")
@patch("evergreen.api.EvergreenApi.versions_by_project")
@patch("buildscripts.resmokelib.setup_multiversion.evergreen_conn.get_compile_artifact_urls")
@patch("buildscripts.resmokelib.utils.evergreen_conn.get_compile_artifact_urls")
def test_urls_found_on_last_version(self, mock_get_compile_artifact_urls,
mock_versions_by_project, mock_version):
expected_urls = {
@ -96,7 +97,7 @@ class TestSetupMultiversionGetLatestUrls(TestSetupMultiversionBase):
@patch("evergreen.version.Version")
@patch("evergreen.version.Version")
@patch("evergreen.api.EvergreenApi.versions_by_project")
@patch("buildscripts.resmokelib.setup_multiversion.evergreen_conn.get_compile_artifact_urls")
@patch("buildscripts.resmokelib.utils.evergreen_conn.get_compile_artifact_urls")
def test_urls_found_on_not_last_version(self, mock_get_compile_artifact_urls,
mock_versions_by_project, mock_version,
mock_expected_version):
@ -110,18 +111,21 @@ class TestSetupMultiversionGetLatestUrls(TestSetupMultiversionBase):
evg_versions = [mock_version for _ in range(3)]
evg_versions.append(mock_expected_version)
mock_versions_by_project.return_value = evg_versions
mock_get_compile_artifact_urls.side_effect = lambda evg_api, evg_version, buildvariant_name: {
(self.setup_multiversion.evg_api, mock_version, self.buildvariant_name): {},
(self.setup_multiversion.evg_api, mock_expected_version, self.buildvariant_name):
print(self.setup_multiversion.evg_api)
print(self.buildvariant_name)
print(mock_version)
mock_get_compile_artifact_urls.side_effect = lambda evg_api, evg_version, buildvariant_name, ignore_failed_push: {
(self.setup_multiversion.evg_api, mock_version, self.buildvariant_name, False): {},
(self.setup_multiversion.evg_api, mock_expected_version, self.buildvariant_name, False):
expected_urls,
}[evg_api, evg_version, buildvariant_name]
}[evg_api, evg_version, buildvariant_name, ignore_failed_push]
urls = self.setup_multiversion.get_latest_urls("4.4")
self.assertEqual(urls, expected_urls)
@patch("evergreen.version.Version")
@patch("evergreen.api.EvergreenApi.versions_by_project")
@patch("buildscripts.resmokelib.setup_multiversion.evergreen_conn.get_compile_artifact_urls")
@patch("buildscripts.resmokelib.utils.evergreen_conn.get_compile_artifact_urls")
def test_fallback_to_generic_buildvariant(self, mock_get_compile_artifact_urls,
mock_versions_by_project, mock_version):
expected_urls = {
@ -139,11 +143,9 @@ class TestSetupMultiversionGetLatestUrls(TestSetupMultiversionBase):
class TestSetupMultiversionGetUrls(TestSetupMultiversionBase):
@patch("evergreen.version.Version")
@patch(
"buildscripts.resmokelib.setup_multiversion.evergreen_conn.get_evergreen_project_and_version"
)
@patch("buildscripts.resmokelib.utils.evergreen_conn.get_evergreen_project_and_version")
@patch("buildscripts.resmokelib.setup_multiversion.github_conn.get_git_tag_and_commit")
@patch("buildscripts.resmokelib.setup_multiversion.evergreen_conn.get_compile_artifact_urls")
@patch("buildscripts.resmokelib.utils.evergreen_conn.get_compile_artifact_urls")
def test_urls_found(self, mock_get_compile_artifact_urls, mock_get_git_tag_and_commit,
mock_get_evergreen_project_and_version, mock_version):
expected_urls = {
@ -160,11 +162,9 @@ class TestSetupMultiversionGetUrls(TestSetupMultiversionBase):
self.assertEqual(urls, expected_urls)
@patch("evergreen.version.Version")
@patch(
"buildscripts.resmokelib.setup_multiversion.evergreen_conn.get_evergreen_project_and_version"
)
@patch("buildscripts.resmokelib.utils.evergreen_conn.get_evergreen_project_and_version")
@patch("buildscripts.resmokelib.setup_multiversion.github_conn.get_git_tag_and_commit")
@patch("buildscripts.resmokelib.setup_multiversion.evergreen_conn.get_compile_artifact_urls")
@patch("buildscripts.resmokelib.utils.evergreen_conn.get_compile_artifact_urls")
def test_urls_not_found(self, mock_get_compile_artifact_urls, mock_get_git_tag_and_commit,
mock_get_evergreen_project_and_version, mock_version):
mock_get_git_tag_and_commit.return_value = ("git_tag", "commit_hash")
@ -176,11 +176,9 @@ class TestSetupMultiversionGetUrls(TestSetupMultiversionBase):
self.assertEqual(urls, {})
@patch("evergreen.version.Version")
@patch(
"buildscripts.resmokelib.setup_multiversion.evergreen_conn.get_evergreen_project_and_version"
)
@patch("buildscripts.resmokelib.utils.evergreen_conn.get_evergreen_project_and_version")
@patch("buildscripts.resmokelib.setup_multiversion.github_conn.get_git_tag_and_commit")
@patch("buildscripts.resmokelib.setup_multiversion.evergreen_conn.get_compile_artifact_urls")
@patch("buildscripts.resmokelib.utils.evergreen_conn.get_compile_artifact_urls")
def test_fallback_to_generic_buildvariant(self, mock_get_compile_artifact_urls,
mock_get_git_tag_and_commit,
mock_get_evergreen_project_and_version, mock_version):

View File

@ -1,4 +1,4 @@
"""Unit tests for buildscripts/resmokelib/setup_multiversion/evergreen_conn.py."""
"""Unit tests for buildscripts/resmokelib/utils/evergreen_conn.py."""
# pylint: disable=missing-docstring,too-many-arguments
import unittest
@ -6,7 +6,7 @@ from mock import patch
from requests import HTTPError
from evergreen import RetryingEvergreenApi
from buildscripts.resmokelib.setup_multiversion import evergreen_conn
from buildscripts.resmokelib.utils import evergreen_conn
from buildscripts.resmokelib.setup_multiversion.config import SetupMultiversionConfig