From c0271d9cda5f601bfb0ef89480f23e3545c5cbaf Mon Sep 17 00:00:00 2001 From: Daniel Moody Date: Wed, 5 Nov 2025 09:22:07 -0600 Subject: [PATCH] SERVER-113270 add symbol checking tool to commit queue (#43418) GitOrigin-RevId: 374fbf56d65b29b88e687ebf4b1d1333412a2529 --- .bazelrc | 6 + .github/CODEOWNERS | 1 + bazel/mongo_src_rules.bzl | 3 +- bazel/symbol_checker/BUILD.bazel | 6 + bazel/symbol_checker/symbol_checker.bzl | 161 ++++++++++++++++++ bazel/symbol_checker/symbol_checker.py | 84 +++++++++ bazel/symbol_checker/symbol_extractor.py | 67 ++++++++ .../configuration.yml | 1 + etc/evergreen_yml_components/definitions.yml | 28 +++ .../tasks/compile_tasks.yml | 21 +++ evergreen/OWNERS.yml | 3 + evergreen/bazel_run.sh | 2 +- evergreen/generate_symbol_check_report.py | 80 +++++++++ 13 files changed, 461 insertions(+), 2 deletions(-) create mode 100644 bazel/symbol_checker/BUILD.bazel create mode 100644 bazel/symbol_checker/symbol_checker.bzl create mode 100644 bazel/symbol_checker/symbol_checker.py create mode 100644 bazel/symbol_checker/symbol_extractor.py create mode 100644 evergreen/generate_symbol_check_report.py diff --git a/.bazelrc b/.bazelrc index 004f1bb2c38..af0442d2548 100644 --- a/.bazelrc +++ b/.bazelrc @@ -520,6 +520,12 @@ common:mod-scanner --output_groups=report common:mod-scanner --aspects //modules_poc:mod_scanner.bzl%mod_scanner_aspect common:mod-scanner --remote_download_regex=.*\.mod_scanner_decls.json$ +--config=symbol-checker +common:symbol-checker --aspects //bazel/symbol_checker:symbol_checker.bzl%symbol_checker_aspect +common:symbol-checker --output_groups=symbol_checker +common:symbol-checker --skip_archive=False +common:symbol-checker --keep_going + # if you don't have access to the remote execution cluster above, use the local config # by passing "--config=local" on the bazel command line --config=local diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index e214d4c8c1e..3ffcd9d540c 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -386,6 +386,7 @@ WORKSPACE.bazel @10gen/devprod-build @svc-auto-approve-bot /evergreen/streams* @10gen/streams-engine @svc-auto-approve-bot /evergreen/generate_sast_report.sh @10gen/devprod-release-infrastructure @svc-auto-approve-bot /evergreen/write_sast_report_env_file.sh @10gen/devprod-release-infrastructure @svc-auto-approve-bot +/evergreen/generate_symbol_check_report.py @10gen/devprod-build @svc-auto-approve-bot # The following patterns are parsed from ./evergreen/functions/security_reporting_scripts/OWNERS.yml /evergreen/functions/security_reporting_scripts/ @10gen/platsec-server @svc-auto-approve-bot diff --git a/bazel/mongo_src_rules.bzl b/bazel/mongo_src_rules.bzl index 9c6385742ca..e904629b93b 100644 --- a/bazel/mongo_src_rules.bzl +++ b/bazel/mongo_src_rules.bzl @@ -600,6 +600,7 @@ def mongo_cc_library( }) else: undefined_ref_flag = [] + tags = tags + ["skip_symbol_check"] create_header_dep( name = name + HEADER_DEP_SUFFIX, @@ -652,7 +653,7 @@ def mongo_cc_library( copts = copts, cxxopts = cxxopts, data = data, - tags = tags + ["mongo_library"], + tags = tags + ["mongo_library", "check_symbol_target"], linkopts = linkopts, linkstatic = True, local_defines = MONGO_GLOBAL_DEFINES + local_defines, diff --git a/bazel/symbol_checker/BUILD.bazel b/bazel/symbol_checker/BUILD.bazel new file mode 100644 index 00000000000..1846cf3d333 --- /dev/null +++ b/bazel/symbol_checker/BUILD.bazel @@ -0,0 +1,6 @@ +package(default_visibility = ["//visibility:public"]) + +exports_files([ + "symbol_extractor.py", + "symbol_checker.py", +]) diff --git a/bazel/symbol_checker/symbol_checker.bzl b/bazel/symbol_checker/symbol_checker.bzl new file mode 100644 index 00000000000..e66972bf8e5 --- /dev/null +++ b/bazel/symbol_checker/symbol_checker.bzl @@ -0,0 +1,161 @@ +load("@bazel_tools//tools/cpp:toolchain_utils.bzl", "find_cpp_toolchain") + +SymbolInfo = provider( + fields = { + "symbol_file": "depset of files containing symbol info", + }, +) + +def _collect_dep_symbol_files_from_deps(ctx): + if not hasattr(ctx.rule.attr, "deps"): + return depset() + return depset(transitive = [ + dep[SymbolInfo].symbol_file + for dep in ctx.rule.attr.deps + if SymbolInfo in dep + ]) + +def _collect_cc_objects(cc_info): + objs = [] + for linker_input in cc_info.linking_context.linker_inputs.to_list(): + for lib in linker_input.libraries: + if lib.objects: + objs += lib.objects + if lib.pic_objects: + objs += lib.pic_objects + return objs + +def _has_skip_tag(ctx): + return "skip_symbol_check" in getattr(ctx.rule.attr, "tags", []) + +def _has_check_tag(ctx): + # only do extraction/check on targets tagged with this + return "check_symbol_target" in getattr(ctx.rule.attr, "tags", []) + +def _pick_cc_info(target, ctx): + # Prefer the target's own CcInfo if present + if CcInfo in target: + return target[CcInfo] + return None + +def symbol_checker_aspect_impl(target, ctx): + # Always forward deps’ symbol files so downstream checks see them + if hasattr(ctx.rule.attr, "binary_with_debug"): + cc_tgt = ctx.rule.attr.binary_with_debug + dep_sym_files = depset(transitive = [cc_tgt[SymbolInfo].symbol_file]) + return [ + SymbolInfo(symbol_file = dep_sym_files), + OutputGroupInfo(symbol_checker = depset()), + ] + else: + dep_sym_files = _collect_dep_symbol_files_from_deps(ctx) + + # Gate heavy work on the presence of the tag + if not _has_check_tag(ctx): + return [ + SymbolInfo(symbol_file = dep_sym_files), + OutputGroupInfo(symbol_checker = depset()), + ] + + cc_info = _pick_cc_info(target, ctx) + if cc_info == None: + # Not a C++ target (or no usable CcInfo) — just forward + return [ + SymbolInfo(symbol_file = dep_sym_files), + OutputGroupInfo(symbol_checker = depset()), + ] + + python = ctx.toolchains["@bazel_tools//tools/python:toolchain_type"].py3_runtime + cc_toolchain = find_cpp_toolchain(ctx) + nm_bin = cc_toolchain.nm_executable + + objs = _collect_cc_objects(cc_info) + if not objs: + return [ + SymbolInfo(symbol_file = dep_sym_files), + OutputGroupInfo(symbol_checker = depset()), + ] + + # --- extract --- + out = ctx.actions.declare_file(target.label.name + "_symbols.sym") + extract_args = ctx.actions.args() + extract_args.add(ctx.attr._extractor.files.to_list()[0]) + extract_args.add("--out") + extract_args.add(out) + extract_args.add("--nm") + extract_args.add(nm_bin) + extract_args.add_all([o.path for o in objs], before_each = "--obj") + + extract_inputs = depset(transitive = [ + ctx.attr._extractor.files, + python.files, + cc_toolchain.all_files, + depset(objs), + ]) + + ctx.actions.run( + executable = python.interpreter.path, + outputs = [out], + inputs = extract_inputs, + arguments = [extract_args], + mnemonic = "SymbolExtractor", + ) + + # --- check --- + check = ctx.actions.declare_file(target.label.name + "_checked") + check_args = ctx.actions.args() + check_args.add(ctx.attr._checker.files.to_list()[0]) + check_args.add("--sym") + check_args.add(out) + check_args.add("--out") + check_args.add(check) + check_args.add("--label") + check_args.add(str(target.label)) + + if _has_skip_tag(ctx): + check_args.add("--skip") + + for dep_sym in dep_sym_files.to_list(): + check_args.add("--dep") + check_args.add(dep_sym) + + check_inputs = depset(transitive = [ + ctx.attr._checker.files, + python.files, + depset([out]), + dep_sym_files, + ]) + + ctx.actions.run( + executable = python.interpreter.path, + outputs = [check], + inputs = check_inputs, + arguments = [check_args], + mnemonic = "SymbolChecker", + ) + + return [ + SymbolInfo( + symbol_file = depset(direct = [out], transitive = [dep_sym_files]), + ), + OutputGroupInfo(symbol_checker = depset([check])), + ] + +symbol_checker_aspect = aspect( + implementation = symbol_checker_aspect_impl, + attrs = { + "_extractor": attr.label( + default = "//bazel/symbol_checker:symbol_extractor.py", + allow_single_file = True, + ), + "_checker": attr.label( + default = "//bazel/symbol_checker:symbol_checker.py", + allow_single_file = True, + ), + }, + toolchains = [ + "@bazel_tools//tools/python:toolchain_type", + "@bazel_tools//tools/cpp:toolchain_type", + ], + attr_aspects = ["deps", "binary_with_debug"], +) diff --git a/bazel/symbol_checker/symbol_checker.py b/bazel/symbol_checker/symbol_checker.py new file mode 100644 index 00000000000..441ce76be5b --- /dev/null +++ b/bazel/symbol_checker/symbol_checker.py @@ -0,0 +1,84 @@ +import sys +import argparse +import json +import os + +parser = argparse.ArgumentParser() +parser.add_argument("--sym", required=True) +parser.add_argument("--dep", action="append") +parser.add_argument("--out", required=True) +parser.add_argument("--skip", action="store_true") +parser.add_argument("--label", required=True) +args = parser.parse_args() + + +# Helper to write JSON to the output file +def write_json(obj): + with open(args.out, "w") as f: + json.dump(obj, f, indent=2, sort_keys=True) + + +if args.skip: + payload = { + "status": "skipped", + "target": args.label, + "sym_file": args.sym, + "missing": [], + "reason": "skip tag present", + } + write_json(payload) + sys.exit(0) + +with open(args.sym) as f: + current = json.load(f) + +undefined = current["undefined"] +defined = set() + +# we include self because we scanned all objs that belong to this library/archive +defined.update(current["defined"]) + +for dep_path in args.dep or []: + with open(dep_path) as f: + dep_sym = json.load(f) + defined.update(dep_sym["defined"]) + +missing = [u for u in undefined if u not in defined] + +display_name = args.label +if display_name.endswith("_with_debug"): + display_name = display_name[: -len("_with_debug")] + +if display_name.startswith("@@"): + display_name = display_name[2:] + +if missing: + # human-readable to stderr + header = f"Symbol check failed for: {display_name}\n" + sys.stderr.write(header) + sys.stderr.write(" undefined but not found in self or deps:\n") + for u in missing: + sys.stderr.write(f" - {u}\n") + sys.stderr.write( + f"Please check to see if {display_name} is missing any deps that would include the symbols above\n" + ) + + # machine-readable to file + payload = { + "status": "failed", + "target": display_name, + "sym_file": args.sym, + "missing": missing, + } + write_json(payload) + sys.exit(1) +else: + # machine-readable + payload = { + "status": "ok", + "target": display_name, + "sym_file": args.sym, + "missing": [], + } + write_json(payload) + sys.exit(0) diff --git a/bazel/symbol_checker/symbol_extractor.py b/bazel/symbol_checker/symbol_extractor.py new file mode 100644 index 00000000000..e125844eb99 --- /dev/null +++ b/bazel/symbol_checker/symbol_extractor.py @@ -0,0 +1,67 @@ +import subprocess +import argparse +import json +import sys + +parser = argparse.ArgumentParser() +parser.add_argument("--obj", action="append", required=True) +parser.add_argument("--nm", required=True) +parser.add_argument("--out", required=True) +args = parser.parse_args() + +defined = set() +undefined = set() + +for objfile in args.obj: + proc = subprocess.run( + [args.nm, "--demangle", "--defined-only", "-g", objfile], + capture_output=True, + text=True, + ) + if proc.returncode != 0: + print(f"nm failed on {objfile}: {proc.stderr}", file=sys.stderr) + continue + + for line in proc.stdout.splitlines(): + line = line.strip() + if not line: + continue + # typical: "0000000000000000 T mongo::foo()" + parts = line.split(None, 2) + if len(parts) < 3: + continue + _addr, symtype, name = parts + if symtype in ("T", "D", "B", "R", "S", "V", "W"): + if name.startswith("mongo::"): + defined.add(name) + +for objfile in args.obj: + proc = subprocess.run( + [args.nm, "--demangle", "--undefined-only", objfile], + capture_output=True, + text=True, + ) + if proc.returncode != 0: + print(f"nm failed on {objfile}: {proc.stderr}", file=sys.stderr) + continue + + for line in proc.stdout.splitlines(): + line = line.strip() + if not line: + continue + # typical: " U mongo::bar()" + parts = line.split(None, 1) + if len(parts) != 2: + continue + _u, name = parts + if name.startswith("mongo::"): + undefined.add(name) + +with open(args.out, "w") as f: + json.dump( + { + "defined": sorted(defined), + "undefined": sorted(undefined), + }, + f, + ) diff --git a/etc/evergreen_yml_components/configuration.yml b/etc/evergreen_yml_components/configuration.yml index 57459df85cb..87fc95aabc1 100644 --- a/etc/evergreen_yml_components/configuration.yml +++ b/etc/evergreen_yml_components/configuration.yml @@ -107,6 +107,7 @@ post: - func: "debug full disk" - func: "upload npm logs" - func: "generate clang-tidy report" + - func: "generate symbol-check report" - func: "attach local resmoke invocation" - func: "attach bazel invocation" - func: "create bazel test report" diff --git a/etc/evergreen_yml_components/definitions.yml b/etc/evergreen_yml_components/definitions.yml index afd0ba346c2..a17adc8dff2 100644 --- a/etc/evergreen_yml_components/definitions.yml +++ b/etc/evergreen_yml_components/definitions.yml @@ -1851,6 +1851,34 @@ functions: - *f_expansions_write - *generate_clang_tidy_report_sh + "generate symbol-check report sh": &generate_symbol_check_report_sh + command: subprocess.exec + display_name: "generate symbol-check report" + type: test + params: + binary: bash + args: + - "src/evergreen/run_python_script.sh" + - "evergreen/generate_symbol_check_report.py" + + "enable run_for_symbol_check expansions update": + &enable_run_for_symbol_check_expansions_update + command: expansions.update + display_name: "enable bazel test report creation" + params: + updates: + - key: run_for_symbol_check + value: "true" + + "enable run_for_symbol_check expansions": + - *f_expansions_write + - *enable_run_for_symbol_check_expansions_update + - *f_expansions_write + + "generate symbol-check report": + - *f_expansions_write + - *generate_symbol_check_report_sh + "bazel run sh": &bazel_run_sh command: subprocess.exec display_name: "bazel run sh" diff --git a/etc/evergreen_yml_components/tasks/compile_tasks.yml b/etc/evergreen_yml_components/tasks/compile_tasks.yml index 53297481aa3..3ca1a1dc096 100644 --- a/etc/evergreen_yml_components/tasks/compile_tasks.yml +++ b/etc/evergreen_yml_components/tasks/compile_tasks.yml @@ -344,6 +344,27 @@ tasks: bazel_args: --config=clang-tidy --mongo_toolchain_version=${clang_tidy_toolchain|v5} --keep_going - func: "generate clang-tidy report" + - name: run_symbol_checker + tags: + [ + "assigned_to_jira_team_devprod_build", + "development_critical_single_variant", + "requires_large_host", + ] + exec_timeout_secs: 1800 # 30 mins + depends_on: + - name: version_expansions_gen + variant: generate-tasks-for-version + - name: archive_dist_test + commands: + - func: "do bazel setup" + - func: "enable run_for_symbol_check expansions" + - func: "bazel compile" + vars: + targets: //src/... + bazel_args: --config=evg --config=symbol-checker + - func: "generate symbol-check report" + - name: run_bazel_compiledb tags: [ diff --git a/evergreen/OWNERS.yml b/evergreen/OWNERS.yml index 4256187b71b..0186a9deb4d 100644 --- a/evergreen/OWNERS.yml +++ b/evergreen/OWNERS.yml @@ -48,3 +48,6 @@ filters: - "write_sast_report_env_file.sh": approvers: - 10gen/devprod-release-infrastructure + - "generate_symbol_check_report.py": + approvers: + - 10gen/devprod-build diff --git a/evergreen/bazel_run.sh b/evergreen/bazel_run.sh index 41976b63cb8..7b73be2e952 100644 --- a/evergreen/bazel_run.sh +++ b/evergreen/bazel_run.sh @@ -45,7 +45,7 @@ echo "bazel run --verbose_failures ${LOCAL_ARG} ${INVOCATION_WITH_REDACTION}" >b # capture exit code set +o errexit export RETRY_ON_FAIL=0 -bazel_evergreen_shutils::retry_bazel_cmd 5 "$BAZEL_BINARY" \ +bazel_evergreen_shutils::retry_bazel_cmd 3 "$BAZEL_BINARY" \ run --verbose_failures ${LOCAL_ARG} ${target} ${args} 2>&1 | tee -a bazel_output.log RET=${PIPESTATUS[0]} : "${RET:=1}" diff --git a/evergreen/generate_symbol_check_report.py b/evergreen/generate_symbol_check_report.py new file mode 100644 index 00000000000..1367026ad92 --- /dev/null +++ b/evergreen/generate_symbol_check_report.py @@ -0,0 +1,80 @@ +import json +import os +import sys + +from buildscripts.simple_report import make_report, put_report, try_combine_reports +from buildscripts.util.read_config import read_config_file + +# 1. detect if we should run symbol-check reporting +expansions = read_config_file("../expansions.yml") +symbol_check = expansions.get("run_for_symbol_check", None) + +if not symbol_check: + sys.exit(0) + +failures = [] + +# 2. walk bazel-bin for *_checked files emitted by the aspect +for root, _, files in os.walk("bazel-bin"): + for name in files: + if not name.endswith("_checked"): + continue + + checked_path = os.path.join(root, name) + # default values in case we fall back to text mode + target = None + sym_file = None + missing = None + status = None + + with open(checked_path) as f: + data = json.load(f) + + status = data.get("status") + target = data.get("target") + sym_file = data.get("sym_file") + missing = data.get("missing", []) + + if status == "failed": + # build content for the report + lines = [] + lines.append(f"Symbol check failed for: {target}") + lines.append("Missing symbols:") + for m in missing: + lines.append(f" - {m}") + lines.append( + f"Please check to see if {target} is missing any deps that would include the symbols above" + ) + + # reproduction hint – adjust this to your CI config name + # if you have a real config, e.g. --config=symbol-check, use that + repro_target = target or sym_file or checked_path + lines.append("") + lines.append("To reproduce:") + lines.append(f" bazel build --config=symbol-checker {repro_target}") + + content = "\n".join(lines) + + # for symbol check we don't have a real src path like clang-tidy, + # so use a synthetic "file" name that encodes the bazel target + synthetic_file = f"symbol_check:{target or checked_path}" + + failures.append((synthetic_file, content)) + +# 3. write a helper invocation file +# adjust this to your actual symbol-check build config if you have one +with open("bazel-invocation.txt", "w") as f: + f.write("bazel build --config=symbol-checker //src/...") + +# 4. emit reports +if failures: + for filename, content in failures: + report = make_report(filename, content, 1) + try_combine_reports(report) + put_report(report) + sys.exit(1) +else: + report = make_report("symbol-check", "all symbol checks passed", 0) + try_combine_reports(report) + put_report(report) + sys.exit(0)