SERVER-67060 add build metrics evergreen tasks and cedar reporting

This commit is contained in:
Daniel Moody 2022-09-02 23:21:52 +00:00 committed by Evergreen Agent
parent 4defabfabd
commit 026f76cae1
8 changed files with 511 additions and 4 deletions

View File

@ -0,0 +1,145 @@
#!/usr/bin/env python3
import os
import sys
from shrub.v2 import ShrubProject, Task, BuildVariant, FunctionCall, TaskGroup
from shrub.v2.command import BuiltInCommand
def main():
tasks = {
'windows_tasks': [],
'linux_tasks': [],
'macos_tasks': [],
}
def create_build_metric_task_steps(task_build_flags, task_targets):
evg_flags = f"--debug=time,count,memory VARIANT_DIR=metrics BUILD_METRICS_EVG_TASK_ID={os.environ['task_id']} BUILD_METRICS_EVG_BUILD_VARIANT={os.environ['build_variant']}"
cache_flags = "--cache=all --cache-dir=$PWD/scons-cache --cache-signature-mode=validate"
scons_task_steps = [
"{evg_flags} --build-metrics=build_metrics.json",
f"{evg_flags} {cache_flags} --cache-populate --build-metrics=populate_cache.json",
"{evg_flags} --clean",
f"{evg_flags} {cache_flags} --build-metrics=pull_cache.json",
]
task_steps = [
FunctionCall(
"scons compile", {
"task_compile_flags": f"{task_build_flags} {step_flags}",
"targets": task_targets,
"compiling_for_test": "true",
}) for step_flags in scons_task_steps
]
task_steps.append(FunctionCall("print top N metrics"))
return task_steps
#############################
if sys.platform == 'win32':
targets = "install-all-meta"
build_flags = ""
tasks['windows_tasks'].append(
Task("build_metrics_msvc", create_build_metric_task_steps(build_flags, targets)))
##############################
elif sys.platform == 'darwin':
for link_model in ['dynamic', 'static']:
targets = "install-all-meta" + " generate-libdeps-graph" if link_model == 'dynamic' else ""
build_flags = f"--link-model={link_model} --force-macos-dynamic-link"
tasks['macos_tasks'].append(
Task(f"build_metrics_xcode_{link_model}",
create_build_metric_task_steps(build_flags, targets)))
##############################
else:
for toolchain in ['v3', 'v4']:
# possibly we want to add clang to the mix here, so leaving as an easy drop in
for compiler in ['gcc']:
for link_model in ['dynamic', 'static']:
targets = "install-all-meta" + " generate-libdeps-graph" if link_model == 'dynamic' else ""
build_flags = f"BUILD_METRICS_BLOATY=/opt/mongodbtoolchain/v4/bin/bloaty --variables-files=etc/scons/mongodbtoolchain_{toolchain}_{compiler}.vars --link-model={link_model}"
tasks['linux_tasks'].append(
Task(f"build_metrics_{toolchain}_{compiler}_{link_model}",
create_build_metric_task_steps(build_flags, targets)))
def create_task_group(platform, tasks):
task_group = TaskGroup(
name=f'build_metrics_{platform}_task_group_gen',
tasks=tasks,
max_hosts=len(tasks),
setup_group=[
BuiltInCommand("manifest.load", {}),
FunctionCall("git get project and add git tag"),
FunctionCall("set task expansion macros"),
FunctionCall("f_expansions_write"),
FunctionCall("kill processes"),
FunctionCall("cleanup environment"),
FunctionCall("set up venv"),
FunctionCall("upload pip requirements"),
FunctionCall("get all modified patch files"),
FunctionCall("f_expansions_write"),
FunctionCall("configure evergreen api credentials"),
FunctionCall("get buildnumber"),
FunctionCall("f_expansions_write"),
FunctionCall("generate compile expansions"),
FunctionCall("f_expansions_write"),
],
setup_task=[
FunctionCall("f_expansions_write"),
FunctionCall("apply compile expansions"),
FunctionCall("set task expansion macros"),
FunctionCall("f_expansions_write"),
],
teardown_group=[
FunctionCall("f_expansions_write"),
FunctionCall("cleanup environment"),
],
teardown_task=[
FunctionCall("f_expansions_write"),
FunctionCall("attach scons logs"),
FunctionCall("attach build metrics"),
FunctionCall("kill processes"),
FunctionCall("save disk statistics"),
FunctionCall("save system resource information"),
FunctionCall("remove files",
{'files': ' '.join(['src/build', 'src/scons-cache', '*.tgz'])}),
],
setup_group_can_fail_task=True,
)
return task_group
if sys.platform == 'win32':
variant = BuildVariant(
name="enterprise-windows-build-metrics",
activate=True,
)
variant.add_task_group(
create_task_group('windows', tasks['windows_tasks']), ['windows-vsCurrent-large'])
elif sys.platform == 'darwin':
variant = BuildVariant(
name="macos-enterprise-build-metrics",
activate=True,
)
variant.add_task_group(create_task_group('macos', tasks['macos_tasks']), ['macos-1100'])
else:
variant = BuildVariant(
name="enterprise-rhel-80-64-bit-build-metrics",
activate=True,
)
variant.add_task_group(create_task_group('linux', tasks['linux_tasks']), ['rhel80-xlarge'])
project = ShrubProject({variant})
with open('build_metrics_task_gen.json', 'w') as fout:
fout.write(project.json())
if __name__ == "__main__":
main()

View File

@ -1962,6 +1962,47 @@ buildvariants:
- name: generate_buildid_to_debug_symbols_mapping
- name: enterprise-rhel-80-64-bit-build-metrics
display_name: "~ Build Metrics Enterprise RHEL 8.0 "
cron: "0 4 * * *" # From the ${project_required_suggested_cron} parameter
stepback: false
modules:
- enterprise
run_on:
- rhel80-build
expansions:
compile_flags: --ssl MONGO_DISTMOD=rhel80 -j$(grep -c ^processor /proc/cpuinfo)
repo_edition: enterprise
large_distro_name: rhel80-large
tasks:
- name: build_metrics_gen_TG
- name: macos-enterprise-build-metrics
display_name: "~ Build Metrics Enterprise MacOS"
cron: "0 4 * * *" # From the ${project_nightly_cron} parameter.
stepback: false
modules:
- enterprise
run_on:
- macos-1100
expansions:
compile_env: DEVELOPER_DIR=/Applications/Xcode13.app
compile_flags: --ssl -j$(sysctl -n hw.logicalcpu) --libc++ --variables-files=etc/scons/xcode_macosx.vars
tasks:
- name: build_metrics_gen_TG
- name: enterprise-windows-build-metrics
display_name: "~ Build Metrics Enterprise Windows"
cron: "0 4 * * *" # From the ${project_nightly_cron} parameter.
stepback: false
run_on:
- windows-vsCurrent-small
expansions:
compile_flags: CPPPATH="c:/sasl/include" LIBPATH="c:/sasl/lib" -j$(( $(grep -c ^processor /proc/cpuinfo) / 2 )) --win-version-min=win10
python: '/cygdrive/c/python/python37/python.exe'
tasks:
- name: build_metrics_gen_TG
# This variant is to intentionally test uncommon features nightly
#- <<: *enterprise-rhel-70-64-bit-template
# name: enterprise-rhel-70-64-bit-kitchen-sink

View File

@ -2041,6 +2041,87 @@ functions:
params:
file_location: ${report_file|src/report.json}
"print top N metrics":
- command: subprocess.exec
params:
binary: bash
add_expansions_to_env: true
args:
- "src/evergreen/run_python_script.sh"
- "site_scons/site_tools/build_metrics/top_n_metrics.py"
- "--input=build_metrics.json"
- "--output=top_15_metrics.txt"
- "--num=15"
- command: s3.put
params:
aws_key: ${aws_key}
aws_secret: ${aws_secret}
local_file: src/top_15_metrics.txt
remote_file: ${project}/${build_variant}/${revision}/${build_id}-${task_name}-${execution}-top_15_metrics.txt
bucket: mciuploads
permissions: public-read
content_type: text/plain
display_name: Top 15 Metrics
"attach build metrics":
- command: archive.targz_pack
params:
target: build-metrics.tgz
source_dir: src
include:
- "./build_metrics.json"
- "./populate_cache.json"
- "./pull_cache.json"
- command: s3.put
params:
aws_key: ${aws_key}
aws_secret: ${aws_secret}
local_file: build-metrics.tgz
remote_file: ${project}/${build_variant}/${revision}/${build_id}-${task_name}-${execution}-build-metrics.tgz
bucket: mciuploads
permissions: public-read
content_type: application/gzip
display_name: Metrics JSON
- command: subprocess.exec
params:
binary: bash
add_expansions_to_env: true
args:
- "src/evergreen/run_python_script.sh"
- "evergreen/build_metric_cedar_report.py"
- "--build-metrics=build_metrics.json"
- "--cache-push-metrics=populate_cache.json"
- "--cache-pull-metrics=pull_cache.json"
- command: archive.targz_pack
params:
target: build_metrics_cedar_report.tgz
source_dir: src
include:
- "./build_metrics_cedar_report.json"
- command: s3.put
params:
aws_key: ${aws_key}
aws_secret: ${aws_secret}
local_file: build_metrics_cedar_report.tgz
remote_file: ${project}/${build_variant}/${revision}/${build_id}-${task_name}-${execution}-build_metrics_cedar_report.tgz
bucket: mciuploads
permissions: public-read
content_type: application/gzip
display_name: Cedar Report JSON
- command: perf.send
params:
aws_key: ${aws_key}
aws_secret: ${aws_secret}
bucket: mciuploads
prefix: ${task_id}_${execution}
file: src/build_metrics_cedar_report.json
"attach artifacts":
command: attach.artifacts
params:
@ -2548,6 +2629,43 @@ tasks:
targets: install-unittests install-unittests-debug
compiling_for_test: true
- name: build_metrics_tasks_gen
depends_on:
- name: version_expansions_gen
variant: generate-tasks-for-version
commands:
- command: subprocess.exec
params:
binary: bash
add_expansions_to_env: true
args:
- "src/evergreen/run_python_script.sh"
- "buildscripts/evergreen_gen_build_metrics_tasks.py"
- command: archive.targz_pack
params:
target: build_metrics_task_gen.tgz
source_dir: "src"
include:
- "build_metrics_task_gen.json"
- command: s3.put
params:
aws_key: ${aws_key}
aws_secret: ${aws_secret}
local_file: build_metrics_task_gen.tgz
remote_file: ${project}/${build_variant}/${revision}/build-metrics/${task_name}-${build_id}.tgz
bucket: mciuploads
permissions: public-read
content_type: application/gzip
display_name: Build Metrics Task Gen JSON
- command: generate.tasks
params:
files:
- src/build_metrics_task_gen.json
## A copy of the compile_unittests task for the recorded unittest taskgroup ##
- <<: *compile_unittests
name: compile_unittests_for_recorded_unittest
@ -7719,6 +7837,11 @@ task_groups:
- run_dbtest
- archive_dbtest
- <<: *compile_task_group_template
name: build_metrics_gen_TG
tasks:
- build_metrics_tasks_gen
- name: clang_tidy_TG
setup_group_can_fail_task: true
setup_group:

View File

@ -2,3 +2,4 @@ psutil
jsonschema
memory_profiler
puremagic
tabulate

View File

@ -0,0 +1,111 @@
import json
import sys
import argparse
parser = argparse.ArgumentParser(description='Print top n metrics from build metrics json files.')
parser.add_argument('--build-metrics', metavar='FILE', type=str, default='build_metrics.json',
help='Path to build metrics input json.')
parser.add_argument('--cache-pull-metrics', metavar='FILE', type=str, default='pull_cache.json',
help='Path to build metrics for cache pull input json.')
parser.add_argument('--cache-push-metrics', metavar='FILE', type=str, default='populate_cache.json',
help='Path to build metrics for cache push input json.')
args = parser.parse_args()
clean_build_metrics_json = args.build_metrics
populate_cache_metrics_json = args.cache_push_metrics
pull_cache_metrics_json = args.cache_pull_metrics
cedar_report = []
def single_metric_test(test_name, metric_name, value):
return {
"info": {
"test_name": test_name,
},
"metrics": [
{
"name": metric_name,
"value": round(value, 2)
},
]
}
with open(clean_build_metrics_json) as f:
aggregated_build_tasks = {}
build_metrics = json.load(f)
for task in build_metrics['build_tasks']:
outputs_key = ' '.join(task['outputs'])
if outputs_key in aggregated_build_tasks:
aggregated_build_tasks[outputs_key]['mem_usage'] += task['mem_usage']
aggregated_build_tasks[outputs_key]['time'] += (task['end_time'] - task['start_time'])
else:
aggregated_build_tasks[outputs_key] = {
'mem_usage': task['mem_usage'],
'time': task['end_time'] - task['start_time'],
}
for output_files in aggregated_build_tasks:
cedar_report.append({
"info": {
"test_name": output_files,
},
"metrics": [
{
"name": "seconds",
"value": round(aggregated_build_tasks[output_files]['time'] / (10.0**9.0), 2)
},
{
"name": "MBs",
"value": round(aggregated_build_tasks[output_files]['mem_usage'] / 1024.0 / 1024.0, 2)
},
]
})
cedar_report.append(single_metric_test("SCons memory usage", "MBs", build_metrics['scons_metrics']['memory']['post_build'] / 1024.0 / 1024.0))
cedar_report.append(single_metric_test("System Memory Peak", "MBs", build_metrics['system_memory']['max'] / 1024.0 / 1024.0))
cedar_report.append(single_metric_test("Total Build time", "seconds", build_metrics['scons_metrics']['time']['total']))
cedar_report.append(single_metric_test("Total Build output size", "MBs", build_metrics['artifact_metrics']['total_artifact_size'] / 1024.0 / 1024.0))
mongod_metrics = None
for artifact in build_metrics['artifact_metrics']['artifacts']:
if artifact['name'] == 'build/metrics/mongo/db/mongod':
mongod_metrics = artifact
break
if mongod_metrics and mongod_metrics.get('bin_metrics'):
cedar_report.append(single_metric_test("Mongod debug info size", "MBs", mongod_metrics['bin_metrics']['debug']['filesize'] / 1024.0 / 1024.0))
with open(populate_cache_metrics_json) as f:
build_metrics = json.load(f)
cedar_report.append({
"info": {
"test_name": "cache_push_time",
},
"metrics": [
{
"name": "seconds",
"value": build_metrics["cache_metrics"]['push_time'] / (10.0**9.0)
},
]
})
with open(pull_cache_metrics_json) as f:
build_metrics = json.load(f)
cedar_report.append({
"info": {
"test_name": "cache_pull_time",
},
"metrics": [
{
"name": "seconds",
"value": build_metrics["cache_metrics"]['pull_time'] / (10.0**9.0)
},
]
})
with open("build_metrics_cedar_report.json", "w") as fh:
json.dump(cedar_report, fh)

View File

@ -171,7 +171,7 @@ class CollectArtifacts(BuildMetricsCollector):
self._artifacts += list(map(lambda x: os.path.join(root, x), files))
def finalize(self):
self.walk(self._env.Dir(self._env.subst(self._build_dir)).abspath)
self.walk(self._env.Dir(self._env.subst(self._build_dir)).path)
for artifact in self._artifacts:
artifact_dict = self._identify_artifact(artifact)

View File

@ -1,7 +1,8 @@
import functools
import subprocess
import time
import psutil
import platform
import memory_profiler
import SCons
import sys
@ -179,8 +180,14 @@ class PerActionMetrics(BuildMetricsCollector):
task_metrics['end_time'] = time.time_ns()
task_metrics['cpu_time'] = int(cpu_usage * (10.0**9.0))
task_metrics['mem_usage'] = int(mem_usage)
# apparently macos big sur (11) changed some of the api for getting memory,
# so the memory comes up a bit larger than expected:
# https://github.com/giampaolo/psutil/issues/1908
if sys.platform == "darwin" and platform.mac_ver()[0] and int(
platform.mac_ver()[0].split('.')[0]) > 10:
task_metrics['mem_usage'] = int(mem_usage / 1024.0)
else:
task_metrics['mem_usage'] = int(mem_usage)
self.build_tasks_metrics.append(task_metrics)
task_metrics['array_index'] = self.build_tasks_metrics.index(task_metrics)

View File

@ -0,0 +1,79 @@
import json
import sys
import datetime
import argparse
import logging
from tabulate import tabulate
parser = argparse.ArgumentParser(description='Print top n metrics from build metrics json files.')
parser.add_argument('--input', metavar='FILE', type=str, default='build-metrics.json',
help='Path to build metrics input json.')
parser.add_argument('--output', metavar='FILE', type=str, default="top_n_metrics.txt",
help='Path to output text file.')
parser.add_argument('--num', metavar='N', type=int, default=10,
help='Positive integer which represent the top N metrics to report on.')
args = parser.parse_args()
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.addHandler(logging.FileHandler(args.output))
log_format = logging.Formatter("%(message)s")
for handler in logger.handlers:
handler.setFormatter(log_format)
with open(args.input) as f:
metrics = json.load(f)
logger.info(f"Time of report: {datetime.datetime.now()}")
logger.info(f"Task ID: {metrics['evg_id']}")
logger.info(f"Distro: {metrics['variant']}")
logger.info(
f"Peak Memory Used:\n{round(metrics['system_memory']['max'] / 1024.0 / 1024.0, 2)} MBs")
logger.info(f"SCons Command:\n{metrics['scons_command']}")
build_tasks_sort = metrics['build_tasks'].copy()
build_tasks_sort.sort(reverse=True, key=lambda x: x['mem_usage'])
logger.info(f"\nTop {args.num} Memory tasks:")
table_data = []
for i, val in enumerate(build_tasks_sort[:args.num], start=1):
table_data.append([i, val['mem_usage'] / 1024.0 / 1024.0, val['outputs'][0]])
logger.info(tabulate(table_data, headers=['Num', 'MBs', 'Output'], floatfmt=".2f"))
build_tasks_sort = metrics['build_tasks'].copy()
build_tasks_sort.sort(reverse=True, key=lambda x: x['end_time'] - x['start_time'])
logger.info(f"\nTop {args.num} duration tasks:")
table_data = []
for i, val in enumerate(build_tasks_sort[:args.num], start=1):
table_data.append([i, (val['end_time'] - val['start_time']) / 10.0**9, val['outputs'][0]])
logger.info(tabulate(table_data, headers=['Num', 'Secs', 'Output'], floatfmt=".2f"))
build_tasks_sort = metrics['artifact_metrics']['artifacts'].copy()
build_tasks_sort.sort(reverse=True, key=lambda x: x['size'])
logger.info(f"\nTop {args.num} sized artifacts:")
table_data = []
for i, val in enumerate(build_tasks_sort[:args.num], start=1):
table_data.append([i, val['size'] / 1024.0 / 1024.0, val['name']])
logger.info(tabulate(table_data, headers=['Num', 'MBs', 'Output'], floatfmt=".2f"))
build_tasks_sort = [
metric for metric in metrics['artifact_metrics']['artifacts']
if metric.get('bin_metrics') and metric['bin_metrics'].get('text')
]
build_tasks_sort.sort(reverse=True, key=lambda x: x['bin_metrics']['text']['vmsize'])
logger.info(f"\nTop {args.num} Text sections:")
table_data = []
for i, val in enumerate(build_tasks_sort[:args.num], start=1):
table_data.append([i, val['bin_metrics']['text']['vmsize'] / 1024.0 / 1024.0, val['name']])
logger.info(tabulate(table_data, headers=['Num', 'MBs', 'Output'], floatfmt=".2f"))
build_tasks_sort = [
metric for metric in metrics['artifact_metrics']['artifacts']
if metric.get('bin_metrics') and metric['bin_metrics'].get('debug')
]
build_tasks_sort.sort(reverse=True, key=lambda x: x['bin_metrics']['debug']['filesize'])
logger.info(f"\nTop {args.num} Debug sections:")
table_data = []
for i, val in enumerate(build_tasks_sort[:args.num], start=1):
table_data.append(
[i, val['bin_metrics']['debug']['filesize'] / 1024.0 / 1024.0, val['name']])
logger.info(tabulate(table_data, headers=['Num', 'MBs', 'Output'], floatfmt=".2f"))