SERVER-113270 add symbol checking tool to commit queue (#43418)

GitOrigin-RevId: 374fbf56d65b29b88e687ebf4b1d1333412a2529
This commit is contained in:
Daniel Moody 2025-11-05 09:22:07 -06:00 committed by MongoDB Bot
parent 24f23cd9de
commit c0271d9cda
13 changed files with 461 additions and 2 deletions

View File

@ -520,6 +520,12 @@ common:mod-scanner --output_groups=report
common:mod-scanner --aspects //modules_poc:mod_scanner.bzl%mod_scanner_aspect
common:mod-scanner --remote_download_regex=.*\.mod_scanner_decls.json$
--config=symbol-checker
common:symbol-checker --aspects //bazel/symbol_checker:symbol_checker.bzl%symbol_checker_aspect
common:symbol-checker --output_groups=symbol_checker
common:symbol-checker --skip_archive=False
common:symbol-checker --keep_going
# if you don't have access to the remote execution cluster above, use the local config
# by passing "--config=local" on the bazel command line
--config=local

1
.github/CODEOWNERS vendored
View File

@ -386,6 +386,7 @@ WORKSPACE.bazel @10gen/devprod-build @svc-auto-approve-bot
/evergreen/streams* @10gen/streams-engine @svc-auto-approve-bot
/evergreen/generate_sast_report.sh @10gen/devprod-release-infrastructure @svc-auto-approve-bot
/evergreen/write_sast_report_env_file.sh @10gen/devprod-release-infrastructure @svc-auto-approve-bot
/evergreen/generate_symbol_check_report.py @10gen/devprod-build @svc-auto-approve-bot
# The following patterns are parsed from ./evergreen/functions/security_reporting_scripts/OWNERS.yml
/evergreen/functions/security_reporting_scripts/ @10gen/platsec-server @svc-auto-approve-bot

View File

@ -600,6 +600,7 @@ def mongo_cc_library(
})
else:
undefined_ref_flag = []
tags = tags + ["skip_symbol_check"]
create_header_dep(
name = name + HEADER_DEP_SUFFIX,
@ -652,7 +653,7 @@ def mongo_cc_library(
copts = copts,
cxxopts = cxxopts,
data = data,
tags = tags + ["mongo_library"],
tags = tags + ["mongo_library", "check_symbol_target"],
linkopts = linkopts,
linkstatic = True,
local_defines = MONGO_GLOBAL_DEFINES + local_defines,

View File

@ -0,0 +1,6 @@
package(default_visibility = ["//visibility:public"])
exports_files([
"symbol_extractor.py",
"symbol_checker.py",
])

View File

@ -0,0 +1,161 @@
load("@bazel_tools//tools/cpp:toolchain_utils.bzl", "find_cpp_toolchain")
SymbolInfo = provider(
fields = {
"symbol_file": "depset of files containing symbol info",
},
)
def _collect_dep_symbol_files_from_deps(ctx):
if not hasattr(ctx.rule.attr, "deps"):
return depset()
return depset(transitive = [
dep[SymbolInfo].symbol_file
for dep in ctx.rule.attr.deps
if SymbolInfo in dep
])
def _collect_cc_objects(cc_info):
objs = []
for linker_input in cc_info.linking_context.linker_inputs.to_list():
for lib in linker_input.libraries:
if lib.objects:
objs += lib.objects
if lib.pic_objects:
objs += lib.pic_objects
return objs
def _has_skip_tag(ctx):
return "skip_symbol_check" in getattr(ctx.rule.attr, "tags", [])
def _has_check_tag(ctx):
# only do extraction/check on targets tagged with this
return "check_symbol_target" in getattr(ctx.rule.attr, "tags", [])
def _pick_cc_info(target, ctx):
# Prefer the target's own CcInfo if present
if CcInfo in target:
return target[CcInfo]
return None
def symbol_checker_aspect_impl(target, ctx):
# Always forward deps symbol files so downstream checks see them
if hasattr(ctx.rule.attr, "binary_with_debug"):
cc_tgt = ctx.rule.attr.binary_with_debug
dep_sym_files = depset(transitive = [cc_tgt[SymbolInfo].symbol_file])
return [
SymbolInfo(symbol_file = dep_sym_files),
OutputGroupInfo(symbol_checker = depset()),
]
else:
dep_sym_files = _collect_dep_symbol_files_from_deps(ctx)
# Gate heavy work on the presence of the tag
if not _has_check_tag(ctx):
return [
SymbolInfo(symbol_file = dep_sym_files),
OutputGroupInfo(symbol_checker = depset()),
]
cc_info = _pick_cc_info(target, ctx)
if cc_info == None:
# Not a C++ target (or no usable CcInfo) — just forward
return [
SymbolInfo(symbol_file = dep_sym_files),
OutputGroupInfo(symbol_checker = depset()),
]
python = ctx.toolchains["@bazel_tools//tools/python:toolchain_type"].py3_runtime
cc_toolchain = find_cpp_toolchain(ctx)
nm_bin = cc_toolchain.nm_executable
objs = _collect_cc_objects(cc_info)
if not objs:
return [
SymbolInfo(symbol_file = dep_sym_files),
OutputGroupInfo(symbol_checker = depset()),
]
# --- extract ---
out = ctx.actions.declare_file(target.label.name + "_symbols.sym")
extract_args = ctx.actions.args()
extract_args.add(ctx.attr._extractor.files.to_list()[0])
extract_args.add("--out")
extract_args.add(out)
extract_args.add("--nm")
extract_args.add(nm_bin)
extract_args.add_all([o.path for o in objs], before_each = "--obj")
extract_inputs = depset(transitive = [
ctx.attr._extractor.files,
python.files,
cc_toolchain.all_files,
depset(objs),
])
ctx.actions.run(
executable = python.interpreter.path,
outputs = [out],
inputs = extract_inputs,
arguments = [extract_args],
mnemonic = "SymbolExtractor",
)
# --- check ---
check = ctx.actions.declare_file(target.label.name + "_checked")
check_args = ctx.actions.args()
check_args.add(ctx.attr._checker.files.to_list()[0])
check_args.add("--sym")
check_args.add(out)
check_args.add("--out")
check_args.add(check)
check_args.add("--label")
check_args.add(str(target.label))
if _has_skip_tag(ctx):
check_args.add("--skip")
for dep_sym in dep_sym_files.to_list():
check_args.add("--dep")
check_args.add(dep_sym)
check_inputs = depset(transitive = [
ctx.attr._checker.files,
python.files,
depset([out]),
dep_sym_files,
])
ctx.actions.run(
executable = python.interpreter.path,
outputs = [check],
inputs = check_inputs,
arguments = [check_args],
mnemonic = "SymbolChecker",
)
return [
SymbolInfo(
symbol_file = depset(direct = [out], transitive = [dep_sym_files]),
),
OutputGroupInfo(symbol_checker = depset([check])),
]
symbol_checker_aspect = aspect(
implementation = symbol_checker_aspect_impl,
attrs = {
"_extractor": attr.label(
default = "//bazel/symbol_checker:symbol_extractor.py",
allow_single_file = True,
),
"_checker": attr.label(
default = "//bazel/symbol_checker:symbol_checker.py",
allow_single_file = True,
),
},
toolchains = [
"@bazel_tools//tools/python:toolchain_type",
"@bazel_tools//tools/cpp:toolchain_type",
],
attr_aspects = ["deps", "binary_with_debug"],
)

View File

@ -0,0 +1,84 @@
import sys
import argparse
import json
import os
parser = argparse.ArgumentParser()
parser.add_argument("--sym", required=True)
parser.add_argument("--dep", action="append")
parser.add_argument("--out", required=True)
parser.add_argument("--skip", action="store_true")
parser.add_argument("--label", required=True)
args = parser.parse_args()
# Helper to write JSON to the output file
def write_json(obj):
with open(args.out, "w") as f:
json.dump(obj, f, indent=2, sort_keys=True)
if args.skip:
payload = {
"status": "skipped",
"target": args.label,
"sym_file": args.sym,
"missing": [],
"reason": "skip tag present",
}
write_json(payload)
sys.exit(0)
with open(args.sym) as f:
current = json.load(f)
undefined = current["undefined"]
defined = set()
# we include self because we scanned all objs that belong to this library/archive
defined.update(current["defined"])
for dep_path in args.dep or []:
with open(dep_path) as f:
dep_sym = json.load(f)
defined.update(dep_sym["defined"])
missing = [u for u in undefined if u not in defined]
display_name = args.label
if display_name.endswith("_with_debug"):
display_name = display_name[: -len("_with_debug")]
if display_name.startswith("@@"):
display_name = display_name[2:]
if missing:
# human-readable to stderr
header = f"Symbol check failed for: {display_name}\n"
sys.stderr.write(header)
sys.stderr.write(" undefined but not found in self or deps:\n")
for u in missing:
sys.stderr.write(f" - {u}\n")
sys.stderr.write(
f"Please check to see if {display_name} is missing any deps that would include the symbols above\n"
)
# machine-readable to file
payload = {
"status": "failed",
"target": display_name,
"sym_file": args.sym,
"missing": missing,
}
write_json(payload)
sys.exit(1)
else:
# machine-readable
payload = {
"status": "ok",
"target": display_name,
"sym_file": args.sym,
"missing": [],
}
write_json(payload)
sys.exit(0)

View File

@ -0,0 +1,67 @@
import subprocess
import argparse
import json
import sys
parser = argparse.ArgumentParser()
parser.add_argument("--obj", action="append", required=True)
parser.add_argument("--nm", required=True)
parser.add_argument("--out", required=True)
args = parser.parse_args()
defined = set()
undefined = set()
for objfile in args.obj:
proc = subprocess.run(
[args.nm, "--demangle", "--defined-only", "-g", objfile],
capture_output=True,
text=True,
)
if proc.returncode != 0:
print(f"nm failed on {objfile}: {proc.stderr}", file=sys.stderr)
continue
for line in proc.stdout.splitlines():
line = line.strip()
if not line:
continue
# typical: "0000000000000000 T mongo::foo()"
parts = line.split(None, 2)
if len(parts) < 3:
continue
_addr, symtype, name = parts
if symtype in ("T", "D", "B", "R", "S", "V", "W"):
if name.startswith("mongo::"):
defined.add(name)
for objfile in args.obj:
proc = subprocess.run(
[args.nm, "--demangle", "--undefined-only", objfile],
capture_output=True,
text=True,
)
if proc.returncode != 0:
print(f"nm failed on {objfile}: {proc.stderr}", file=sys.stderr)
continue
for line in proc.stdout.splitlines():
line = line.strip()
if not line:
continue
# typical: " U mongo::bar()"
parts = line.split(None, 1)
if len(parts) != 2:
continue
_u, name = parts
if name.startswith("mongo::"):
undefined.add(name)
with open(args.out, "w") as f:
json.dump(
{
"defined": sorted(defined),
"undefined": sorted(undefined),
},
f,
)

View File

@ -107,6 +107,7 @@ post:
- func: "debug full disk"
- func: "upload npm logs"
- func: "generate clang-tidy report"
- func: "generate symbol-check report"
- func: "attach local resmoke invocation"
- func: "attach bazel invocation"
- func: "create bazel test report"

View File

@ -1851,6 +1851,34 @@ functions:
- *f_expansions_write
- *generate_clang_tidy_report_sh
"generate symbol-check report sh": &generate_symbol_check_report_sh
command: subprocess.exec
display_name: "generate symbol-check report"
type: test
params:
binary: bash
args:
- "src/evergreen/run_python_script.sh"
- "evergreen/generate_symbol_check_report.py"
"enable run_for_symbol_check expansions update":
&enable_run_for_symbol_check_expansions_update
command: expansions.update
display_name: "enable bazel test report creation"
params:
updates:
- key: run_for_symbol_check
value: "true"
"enable run_for_symbol_check expansions":
- *f_expansions_write
- *enable_run_for_symbol_check_expansions_update
- *f_expansions_write
"generate symbol-check report":
- *f_expansions_write
- *generate_symbol_check_report_sh
"bazel run sh": &bazel_run_sh
command: subprocess.exec
display_name: "bazel run sh"

View File

@ -344,6 +344,27 @@ tasks:
bazel_args: --config=clang-tidy --mongo_toolchain_version=${clang_tidy_toolchain|v5} --keep_going
- func: "generate clang-tidy report"
- name: run_symbol_checker
tags:
[
"assigned_to_jira_team_devprod_build",
"development_critical_single_variant",
"requires_large_host",
]
exec_timeout_secs: 1800 # 30 mins
depends_on:
- name: version_expansions_gen
variant: generate-tasks-for-version
- name: archive_dist_test
commands:
- func: "do bazel setup"
- func: "enable run_for_symbol_check expansions"
- func: "bazel compile"
vars:
targets: //src/...
bazel_args: --config=evg --config=symbol-checker
- func: "generate symbol-check report"
- name: run_bazel_compiledb
tags:
[

View File

@ -48,3 +48,6 @@ filters:
- "write_sast_report_env_file.sh":
approvers:
- 10gen/devprod-release-infrastructure
- "generate_symbol_check_report.py":
approvers:
- 10gen/devprod-build

View File

@ -45,7 +45,7 @@ echo "bazel run --verbose_failures ${LOCAL_ARG} ${INVOCATION_WITH_REDACTION}" >b
# capture exit code
set +o errexit
export RETRY_ON_FAIL=0
bazel_evergreen_shutils::retry_bazel_cmd 5 "$BAZEL_BINARY" \
bazel_evergreen_shutils::retry_bazel_cmd 3 "$BAZEL_BINARY" \
run --verbose_failures ${LOCAL_ARG} ${target} ${args} 2>&1 | tee -a bazel_output.log
RET=${PIPESTATUS[0]}
: "${RET:=1}"

View File

@ -0,0 +1,80 @@
import json
import os
import sys
from buildscripts.simple_report import make_report, put_report, try_combine_reports
from buildscripts.util.read_config import read_config_file
# 1. detect if we should run symbol-check reporting
expansions = read_config_file("../expansions.yml")
symbol_check = expansions.get("run_for_symbol_check", None)
if not symbol_check:
sys.exit(0)
failures = []
# 2. walk bazel-bin for *_checked files emitted by the aspect
for root, _, files in os.walk("bazel-bin"):
for name in files:
if not name.endswith("_checked"):
continue
checked_path = os.path.join(root, name)
# default values in case we fall back to text mode
target = None
sym_file = None
missing = None
status = None
with open(checked_path) as f:
data = json.load(f)
status = data.get("status")
target = data.get("target")
sym_file = data.get("sym_file")
missing = data.get("missing", [])
if status == "failed":
# build content for the report
lines = []
lines.append(f"Symbol check failed for: {target}")
lines.append("Missing symbols:")
for m in missing:
lines.append(f" - {m}")
lines.append(
f"Please check to see if {target} is missing any deps that would include the symbols above"
)
# reproduction hint adjust this to your CI config name
# if you have a real config, e.g. --config=symbol-check, use that
repro_target = target or sym_file or checked_path
lines.append("")
lines.append("To reproduce:")
lines.append(f" bazel build --config=symbol-checker {repro_target}")
content = "\n".join(lines)
# for symbol check we don't have a real src path like clang-tidy,
# so use a synthetic "file" name that encodes the bazel target
synthetic_file = f"symbol_check:{target or checked_path}"
failures.append((synthetic_file, content))
# 3. write a helper invocation file
# adjust this to your actual symbol-check build config if you have one
with open("bazel-invocation.txt", "w") as f:
f.write("bazel build --config=symbol-checker //src/...")
# 4. emit reports
if failures:
for filename, content in failures:
report = make_report(filename, content, 1)
try_combine_reports(report)
put_report(report)
sys.exit(1)
else:
report = make_report("symbol-check", "all symbol checks passed", 0)
try_combine_reports(report)
put_report(report)
sys.exit(0)