SERVER-67060 add build metrics evergreen tasks and cedar reporting

2022-09-02 23:21:52 +00:00 · 2022-09-02 23:21:52 +00:00 · 026f76cae1
parent 4defabfabd
commit 026f76cae1
8 changed files with 511 additions and 4 deletions
--- a/buildscripts/evergreen_gen_build_metrics_tasks.py
+++ b/buildscripts/evergreen_gen_build_metrics_tasks.py
@ -0,0 +1,145 @@
+#!/usr/bin/env python3
+import os
+import sys
+
+from shrub.v2 import ShrubProject, Task, BuildVariant, FunctionCall, TaskGroup
+from shrub.v2.command import BuiltInCommand
+
+
+def main():
+
+    tasks = {
+        'windows_tasks': [],
+        'linux_tasks': [],
+        'macos_tasks': [],
+    }
+
+    def create_build_metric_task_steps(task_build_flags, task_targets):
+
+        evg_flags = f"--debug=time,count,memory VARIANT_DIR=metrics BUILD_METRICS_EVG_TASK_ID={os.environ['task_id']} BUILD_METRICS_EVG_BUILD_VARIANT={os.environ['build_variant']}"
+        cache_flags = "--cache=all --cache-dir=$PWD/scons-cache --cache-signature-mode=validate"
+
+        scons_task_steps = [
+            "{evg_flags} --build-metrics=build_metrics.json",
+            f"{evg_flags} {cache_flags} --cache-populate --build-metrics=populate_cache.json",
+            "{evg_flags} --clean",
+            f"{evg_flags} {cache_flags} --build-metrics=pull_cache.json",
+        ]
+
+        task_steps = [
+            FunctionCall(
+                "scons compile", {
+                    "task_compile_flags": f"{task_build_flags} {step_flags}",
+                    "targets": task_targets,
+                    "compiling_for_test": "true",
+                }) for step_flags in scons_task_steps
+        ]
+
+        task_steps.append(FunctionCall("print top N metrics"))
+        return task_steps
+
+    #############################
+    if sys.platform == 'win32':
+        targets = "install-all-meta"
+        build_flags = ""
+
+        tasks['windows_tasks'].append(
+            Task("build_metrics_msvc", create_build_metric_task_steps(build_flags, targets)))
+
+    ##############################
+    elif sys.platform == 'darwin':
+        for link_model in ['dynamic', 'static']:
+            targets = "install-all-meta" + " generate-libdeps-graph" if link_model == 'dynamic' else ""
+            build_flags = f"--link-model={link_model} --force-macos-dynamic-link"
+
+            tasks['macos_tasks'].append(
+                Task(f"build_metrics_xcode_{link_model}",
+                     create_build_metric_task_steps(build_flags, targets)))
+
+    ##############################
+    else:
+        for toolchain in ['v3', 'v4']:
+            # possibly we want to add clang to the mix here, so leaving as an easy drop in
+            for compiler in ['gcc']:
+                for link_model in ['dynamic', 'static']:
+
+                    targets = "install-all-meta" + " generate-libdeps-graph" if link_model == 'dynamic' else ""
+                    build_flags = f"BUILD_METRICS_BLOATY=/opt/mongodbtoolchain/v4/bin/bloaty --variables-files=etc/scons/mongodbtoolchain_{toolchain}_{compiler}.vars --link-model={link_model}"
+
+                    tasks['linux_tasks'].append(
+                        Task(f"build_metrics_{toolchain}_{compiler}_{link_model}",
+                             create_build_metric_task_steps(build_flags, targets)))
+
+    def create_task_group(platform, tasks):
+        task_group = TaskGroup(
+            name=f'build_metrics_{platform}_task_group_gen',
+            tasks=tasks,
+            max_hosts=len(tasks),
+            setup_group=[
+                BuiltInCommand("manifest.load", {}),
+                FunctionCall("git get project and add git tag"),
+                FunctionCall("set task expansion macros"),
+                FunctionCall("f_expansions_write"),
+                FunctionCall("kill processes"),
+                FunctionCall("cleanup environment"),
+                FunctionCall("set up venv"),
+                FunctionCall("upload pip requirements"),
+                FunctionCall("get all modified patch files"),
+                FunctionCall("f_expansions_write"),
+                FunctionCall("configure evergreen api credentials"),
+                FunctionCall("get buildnumber"),
+                FunctionCall("f_expansions_write"),
+                FunctionCall("generate compile expansions"),
+                FunctionCall("f_expansions_write"),
+            ],
+            setup_task=[
+                FunctionCall("f_expansions_write"),
+                FunctionCall("apply compile expansions"),
+                FunctionCall("set task expansion macros"),
+                FunctionCall("f_expansions_write"),
+            ],
+            teardown_group=[
+                FunctionCall("f_expansions_write"),
+                FunctionCall("cleanup environment"),
+            ],
+            teardown_task=[
+                FunctionCall("f_expansions_write"),
+                FunctionCall("attach scons logs"),
+                FunctionCall("attach build metrics"),
+                FunctionCall("kill processes"),
+                FunctionCall("save disk statistics"),
+                FunctionCall("save system resource information"),
+                FunctionCall("remove files",
+                             {'files': ' '.join(['src/build', 'src/scons-cache', '*.tgz'])}),
+            ],
+            setup_group_can_fail_task=True,
+        )
+        return task_group
+
+    if sys.platform == 'win32':
+        variant = BuildVariant(
+            name="enterprise-windows-build-metrics",
+            activate=True,
+        )
+        variant.add_task_group(
+            create_task_group('windows', tasks['windows_tasks']), ['windows-vsCurrent-large'])
+    elif sys.platform == 'darwin':
+        variant = BuildVariant(
+            name="macos-enterprise-build-metrics",
+            activate=True,
+        )
+        variant.add_task_group(create_task_group('macos', tasks['macos_tasks']), ['macos-1100'])
+    else:
+        variant = BuildVariant(
+            name="enterprise-rhel-80-64-bit-build-metrics",
+            activate=True,
+        )
+        variant.add_task_group(create_task_group('linux', tasks['linux_tasks']), ['rhel80-xlarge'])
+
+    project = ShrubProject({variant})
+    with open('build_metrics_task_gen.json', 'w') as fout:
+        fout.write(project.json())
+
+
+if __name__ == "__main__":
+    main()
--- a/etc/evergreen.yml
+++ b/etc/evergreen.yml
@ -1962,6 +1962,47 @@ buildvariants:
  - name: generate_buildid_to_debug_symbols_mapping


+- name: enterprise-rhel-80-64-bit-build-metrics
+  display_name: "~ Build Metrics Enterprise RHEL 8.0 "
+  cron: "0 4 * * *" # From the ${project_required_suggested_cron} parameter
+  stepback: false
+  modules:
+  - enterprise
+  run_on:
+  - rhel80-build
+  expansions:
+    compile_flags: --ssl MONGO_DISTMOD=rhel80 -j$(grep -c ^processor /proc/cpuinfo)
+    repo_edition: enterprise
+    large_distro_name: rhel80-large
+  tasks:
+  - name: build_metrics_gen_TG
+
+- name: macos-enterprise-build-metrics
+  display_name: "~ Build Metrics Enterprise MacOS"
+  cron: "0 4 * * *" # From the ${project_nightly_cron} parameter.
+  stepback: false
+  modules:
+  - enterprise
+  run_on:
+  - macos-1100
+  expansions:
+    compile_env: DEVELOPER_DIR=/Applications/Xcode13.app
+    compile_flags: --ssl -j$(sysctl -n hw.logicalcpu) --libc++ --variables-files=etc/scons/xcode_macosx.vars
+  tasks:
+  - name: build_metrics_gen_TG
+
+- name: enterprise-windows-build-metrics
+  display_name: "~ Build Metrics Enterprise Windows"
+  cron: "0 4 * * *" # From the ${project_nightly_cron} parameter.
+  stepback: false
+  run_on:
+  - windows-vsCurrent-small
+  expansions:
+    compile_flags: CPPPATH="c:/sasl/include" LIBPATH="c:/sasl/lib" -j$(( $(grep -c ^processor /proc/cpuinfo) / 2 )) --win-version-min=win10
+    python: '/cygdrive/c/python/python37/python.exe'
+  tasks:
+  - name: build_metrics_gen_TG
+
 # This variant is to intentionally test uncommon features nightly
 #- <<: *enterprise-rhel-70-64-bit-template
 #  name: enterprise-rhel-70-64-bit-kitchen-sink
--- a/etc/evergreen_yml_components/definitions.yml
+++ b/etc/evergreen_yml_components/definitions.yml
@ -2041,6 +2041,87 @@ functions:
    params:
      file_location: ${report_file|src/report.json}

+  "print top N metrics":
+    - command: subprocess.exec
+      params:
+        binary: bash
+        add_expansions_to_env: true
+        args:
+        - "src/evergreen/run_python_script.sh"
+        - "site_scons/site_tools/build_metrics/top_n_metrics.py"
+        - "--input=build_metrics.json"
+        - "--output=top_15_metrics.txt"
+        - "--num=15"
+
+    - command: s3.put
+      params:
+        aws_key: ${aws_key}
+        aws_secret: ${aws_secret}
+        local_file: src/top_15_metrics.txt
+        remote_file: ${project}/${build_variant}/${revision}/${build_id}-${task_name}-${execution}-top_15_metrics.txt
+        bucket: mciuploads
+        permissions: public-read
+        content_type: text/plain
+        display_name: Top 15 Metrics
+
+  "attach build metrics":
+    - command: archive.targz_pack
+      params:
+        target: build-metrics.tgz
+        source_dir: src
+        include:
+        - "./build_metrics.json"
+        - "./populate_cache.json"
+        - "./pull_cache.json"
+
+    - command: s3.put
+      params:
+        aws_key: ${aws_key}
+        aws_secret: ${aws_secret}
+        local_file: build-metrics.tgz
+        remote_file: ${project}/${build_variant}/${revision}/${build_id}-${task_name}-${execution}-build-metrics.tgz
+        bucket: mciuploads
+        permissions: public-read
+        content_type: application/gzip
+        display_name: Metrics JSON
+
+    - command: subprocess.exec
+      params:
+        binary: bash
+        add_expansions_to_env: true
+        args:
+        - "src/evergreen/run_python_script.sh"
+        - "evergreen/build_metric_cedar_report.py"
+        - "--build-metrics=build_metrics.json"
+        - "--cache-push-metrics=populate_cache.json"
+        - "--cache-pull-metrics=pull_cache.json"
+
+    - command: archive.targz_pack
+      params:
+        target: build_metrics_cedar_report.tgz
+        source_dir: src
+        include:
+        - "./build_metrics_cedar_report.json"
+
+    - command: s3.put
+      params:
+        aws_key: ${aws_key}
+        aws_secret: ${aws_secret}
+        local_file: build_metrics_cedar_report.tgz
+        remote_file: ${project}/${build_variant}/${revision}/${build_id}-${task_name}-${execution}-build_metrics_cedar_report.tgz
+        bucket: mciuploads
+        permissions: public-read
+        content_type: application/gzip
+        display_name: Cedar Report JSON
+
+    - command: perf.send
+      params:
+        aws_key: ${aws_key}
+        aws_secret: ${aws_secret}
+        bucket: mciuploads
+        prefix: ${task_id}_${execution}
+        file: src/build_metrics_cedar_report.json
+
  "attach artifacts":
    command: attach.artifacts
    params:
@ -2548,6 +2629,43 @@ tasks:
      targets: install-unittests install-unittests-debug
      compiling_for_test: true

+- name: build_metrics_tasks_gen
+  depends_on:
+  - name: version_expansions_gen
+    variant: generate-tasks-for-version
+  commands:
+
+  - command: subprocess.exec
+    params:
+      binary: bash
+      add_expansions_to_env: true
+      args:
+      - "src/evergreen/run_python_script.sh"
+      - "buildscripts/evergreen_gen_build_metrics_tasks.py"
+
+  - command: archive.targz_pack
+    params:
+      target: build_metrics_task_gen.tgz
+      source_dir: "src"
+      include:
+      - "build_metrics_task_gen.json"
+
+  - command: s3.put
+    params:
+      aws_key: ${aws_key}
+      aws_secret: ${aws_secret}
+      local_file: build_metrics_task_gen.tgz
+      remote_file: ${project}/${build_variant}/${revision}/build-metrics/${task_name}-${build_id}.tgz
+      bucket: mciuploads
+      permissions: public-read
+      content_type: application/gzip
+      display_name: Build Metrics Task Gen JSON
+
+  - command: generate.tasks
+    params:
+      files:
+      - src/build_metrics_task_gen.json
+
 ## A copy of the compile_unittests task for the recorded unittest taskgroup ##
 - <<: *compile_unittests
  name: compile_unittests_for_recorded_unittest
@ -7719,6 +7837,11 @@ task_groups:
  - run_dbtest
  - archive_dbtest

+- <<: *compile_task_group_template
+  name: build_metrics_gen_TG
+  tasks:
+  - build_metrics_tasks_gen
+
 - name: clang_tidy_TG
  setup_group_can_fail_task: true
  setup_group:
--- a/etc/pip/components/build_metrics.req
+++ b/etc/pip/components/build_metrics.req
@ -2,3 +2,4 @@ psutil
 jsonschema
 memory_profiler
 puremagic
+tabulate
--- a/evergreen/build_metric_cedar_report.py
+++ b/evergreen/build_metric_cedar_report.py
@ -0,0 +1,111 @@
+import json
+import sys
+import argparse
+
+parser = argparse.ArgumentParser(description='Print top n metrics from build metrics json files.')
+parser.add_argument('--build-metrics', metavar='FILE', type=str, default='build_metrics.json',
+                    help='Path to build metrics input json.')
+parser.add_argument('--cache-pull-metrics', metavar='FILE', type=str, default='pull_cache.json',
+                    help='Path to build metrics for cache pull input json.')
+parser.add_argument('--cache-push-metrics', metavar='FILE', type=str, default='populate_cache.json',
+                    help='Path to build metrics for cache push input json.')
+args = parser.parse_args()
+
+clean_build_metrics_json = args.build_metrics
+populate_cache_metrics_json = args.cache_push_metrics
+pull_cache_metrics_json = args.cache_pull_metrics
+cedar_report = []
+
+def single_metric_test(test_name, metric_name, value):
+    return {
+        "info": {
+            "test_name": test_name,
+        },
+        "metrics": [
+            {
+                "name": metric_name,
+                "value": round(value, 2)
+            },
+        ]
+    }
+
+with open(clean_build_metrics_json) as f:
+    aggregated_build_tasks = {}
+    build_metrics = json.load(f)
+    for task in build_metrics['build_tasks']:
+        outputs_key = ' '.join(task['outputs'])
+        if outputs_key in aggregated_build_tasks:
+            aggregated_build_tasks[outputs_key]['mem_usage'] += task['mem_usage']
+            aggregated_build_tasks[outputs_key]['time'] += (task['end_time'] - task['start_time'])
+        else:
+            aggregated_build_tasks[outputs_key] = {
+                'mem_usage': task['mem_usage'],
+                'time': task['end_time'] - task['start_time'],
+            }
+
+    for output_files in aggregated_build_tasks:
+
+        cedar_report.append({
+            "info": {
+                "test_name": output_files,
+            },
+            "metrics": [
+                {
+                    "name": "seconds",
+                    "value": round(aggregated_build_tasks[output_files]['time'] / (10.0**9.0), 2)
+                },
+                {
+                    "name": "MBs",
+                    "value": round(aggregated_build_tasks[output_files]['mem_usage'] / 1024.0 / 1024.0, 2)
+                },
+            ]
+        })
+
+    cedar_report.append(single_metric_test("SCons memory usage", "MBs", build_metrics['scons_metrics']['memory']['post_build'] / 1024.0 / 1024.0))
+    cedar_report.append(single_metric_test("System Memory Peak", "MBs", build_metrics['system_memory']['max'] / 1024.0 / 1024.0))
+    cedar_report.append(single_metric_test("Total Build time", "seconds", build_metrics['scons_metrics']['time']['total']))
+    cedar_report.append(single_metric_test("Total Build output size", "MBs", build_metrics['artifact_metrics']['total_artifact_size'] / 1024.0 / 1024.0))
+
+    mongod_metrics = None
+    for artifact in build_metrics['artifact_metrics']['artifacts']:
+        if artifact['name'] == 'build/metrics/mongo/db/mongod':
+            mongod_metrics = artifact
+            break
+
+    if mongod_metrics and mongod_metrics.get('bin_metrics'):
+        cedar_report.append(single_metric_test("Mongod debug info size", "MBs", mongod_metrics['bin_metrics']['debug']['filesize'] / 1024.0 / 1024.0))
+
+with open(populate_cache_metrics_json) as f:
+
+    build_metrics = json.load(f)
+    cedar_report.append({
+        "info": {
+            "test_name": "cache_push_time",
+        },
+        "metrics": [
+            {
+                "name": "seconds",
+                "value": build_metrics["cache_metrics"]['push_time'] / (10.0**9.0)
+            },
+        ]
+    })
+
+with open(pull_cache_metrics_json) as f:
+
+    build_metrics = json.load(f)
+    cedar_report.append({
+        "info": {
+            "test_name": "cache_pull_time",
+        },
+        "metrics": [
+            {
+                "name": "seconds",
+                "value": build_metrics["cache_metrics"]['pull_time'] / (10.0**9.0)
+            },
+        ]
+    })
+
+with open("build_metrics_cedar_report.json", "w") as fh:
+    json.dump(cedar_report, fh)
+
+
--- a/site_scons/site_tools/build_metrics/artifacts.py
+++ b/site_scons/site_tools/build_metrics/artifacts.py
@ -171,7 +171,7 @@ class CollectArtifacts(BuildMetricsCollector):
            self._artifacts += list(map(lambda x: os.path.join(root, x), files))

    def finalize(self):
-        self.walk(self._env.Dir(self._env.subst(self._build_dir)).abspath)
+        self.walk(self._env.Dir(self._env.subst(self._build_dir)).path)

        for artifact in self._artifacts:
            artifact_dict = self._identify_artifact(artifact)
--- a/site_scons/site_tools/build_metrics/per_action_metrics.py
+++ b/site_scons/site_tools/build_metrics/per_action_metrics.py
@ -1,7 +1,8 @@
 import functools
-import subprocess
 import time
 import psutil
+import platform
+
 import memory_profiler
 import SCons
 import sys
@ -179,8 +180,14 @@ class PerActionMetrics(BuildMetricsCollector):

        task_metrics['end_time'] = time.time_ns()
        task_metrics['cpu_time'] = int(cpu_usage * (10.0**9.0))
-        task_metrics['mem_usage'] = int(mem_usage)
-
+        # apparently macos big sur (11) changed some of the api for getting memory,
+        # so the memory comes up a bit larger than expected:
+        # https://github.com/giampaolo/psutil/issues/1908
+        if sys.platform == "darwin" and platform.mac_ver()[0] and int(
+                platform.mac_ver()[0].split('.')[0]) > 10:
+            task_metrics['mem_usage'] = int(mem_usage / 1024.0)
+        else:
+            task_metrics['mem_usage'] = int(mem_usage)
        self.build_tasks_metrics.append(task_metrics)
        task_metrics['array_index'] = self.build_tasks_metrics.index(task_metrics)

--- a/site_scons/site_tools/build_metrics/top_n_metrics.py
+++ b/site_scons/site_tools/build_metrics/top_n_metrics.py
@ -0,0 +1,79 @@
+import json
+import sys
+import datetime
+import argparse
+import logging
+from tabulate import tabulate
+
+parser = argparse.ArgumentParser(description='Print top n metrics from build metrics json files.')
+parser.add_argument('--input', metavar='FILE', type=str, default='build-metrics.json',
+                    help='Path to build metrics input json.')
+parser.add_argument('--output', metavar='FILE', type=str, default="top_n_metrics.txt",
+                    help='Path to output text file.')
+parser.add_argument('--num', metavar='N', type=int, default=10,
+                    help='Positive integer which represent the top N metrics to report on.')
+args = parser.parse_args()
+
+logger = logging.getLogger()
+logger.setLevel(logging.INFO)
+logger.addHandler(logging.FileHandler(args.output))
+log_format = logging.Formatter("%(message)s")
+for handler in logger.handlers:
+    handler.setFormatter(log_format)
+
+with open(args.input) as f:
+    metrics = json.load(f)
+
+    logger.info(f"Time of report: {datetime.datetime.now()}")
+    logger.info(f"Task ID: {metrics['evg_id']}")
+    logger.info(f"Distro: {metrics['variant']}")
+    logger.info(
+        f"Peak Memory Used:\n{round(metrics['system_memory']['max'] / 1024.0 / 1024.0, 2)} MBs")
+    logger.info(f"SCons Command:\n{metrics['scons_command']}")
+
+    build_tasks_sort = metrics['build_tasks'].copy()
+    build_tasks_sort.sort(reverse=True, key=lambda x: x['mem_usage'])
+    logger.info(f"\nTop {args.num} Memory tasks:")
+    table_data = []
+    for i, val in enumerate(build_tasks_sort[:args.num], start=1):
+        table_data.append([i, val['mem_usage'] / 1024.0 / 1024.0, val['outputs'][0]])
+    logger.info(tabulate(table_data, headers=['Num', 'MBs', 'Output'], floatfmt=".2f"))
+
+    build_tasks_sort = metrics['build_tasks'].copy()
+    build_tasks_sort.sort(reverse=True, key=lambda x: x['end_time'] - x['start_time'])
+    logger.info(f"\nTop {args.num} duration tasks:")
+    table_data = []
+    for i, val in enumerate(build_tasks_sort[:args.num], start=1):
+        table_data.append([i, (val['end_time'] - val['start_time']) / 10.0**9, val['outputs'][0]])
+    logger.info(tabulate(table_data, headers=['Num', 'Secs', 'Output'], floatfmt=".2f"))
+
+    build_tasks_sort = metrics['artifact_metrics']['artifacts'].copy()
+    build_tasks_sort.sort(reverse=True, key=lambda x: x['size'])
+    logger.info(f"\nTop {args.num} sized artifacts:")
+    table_data = []
+    for i, val in enumerate(build_tasks_sort[:args.num], start=1):
+        table_data.append([i, val['size'] / 1024.0 / 1024.0, val['name']])
+    logger.info(tabulate(table_data, headers=['Num', 'MBs', 'Output'], floatfmt=".2f"))
+
+    build_tasks_sort = [
+        metric for metric in metrics['artifact_metrics']['artifacts']
+        if metric.get('bin_metrics') and metric['bin_metrics'].get('text')
+    ]
+    build_tasks_sort.sort(reverse=True, key=lambda x: x['bin_metrics']['text']['vmsize'])
+    logger.info(f"\nTop {args.num} Text sections:")
+    table_data = []
+    for i, val in enumerate(build_tasks_sort[:args.num], start=1):
+        table_data.append([i, val['bin_metrics']['text']['vmsize'] / 1024.0 / 1024.0, val['name']])
+    logger.info(tabulate(table_data, headers=['Num', 'MBs', 'Output'], floatfmt=".2f"))
+
+    build_tasks_sort = [
+        metric for metric in metrics['artifact_metrics']['artifacts']
+        if metric.get('bin_metrics') and metric['bin_metrics'].get('debug')
+    ]
+    build_tasks_sort.sort(reverse=True, key=lambda x: x['bin_metrics']['debug']['filesize'])
+    logger.info(f"\nTop {args.num} Debug sections:")
+    table_data = []
+    for i, val in enumerate(build_tasks_sort[:args.num], start=1):
+        table_data.append(
+            [i, val['bin_metrics']['debug']['filesize'] / 1024.0 / 1024.0, val['name']])
+    logger.info(tabulate(table_data, headers=['Num', 'MBs', 'Output'], floatfmt=".2f"))