mirror of https://github.com/mongodb/mongo
SERVER-113270 add symbol checking tool to commit queue (#43418)
GitOrigin-RevId: 374fbf56d65b29b88e687ebf4b1d1333412a2529
This commit is contained in:
parent
24f23cd9de
commit
c0271d9cda
6
.bazelrc
6
.bazelrc
|
|
@ -520,6 +520,12 @@ common:mod-scanner --output_groups=report
|
|||
common:mod-scanner --aspects //modules_poc:mod_scanner.bzl%mod_scanner_aspect
|
||||
common:mod-scanner --remote_download_regex=.*\.mod_scanner_decls.json$
|
||||
|
||||
--config=symbol-checker
|
||||
common:symbol-checker --aspects //bazel/symbol_checker:symbol_checker.bzl%symbol_checker_aspect
|
||||
common:symbol-checker --output_groups=symbol_checker
|
||||
common:symbol-checker --skip_archive=False
|
||||
common:symbol-checker --keep_going
|
||||
|
||||
# if you don't have access to the remote execution cluster above, use the local config
|
||||
# by passing "--config=local" on the bazel command line
|
||||
--config=local
|
||||
|
|
|
|||
|
|
@ -386,6 +386,7 @@ WORKSPACE.bazel @10gen/devprod-build @svc-auto-approve-bot
|
|||
/evergreen/streams* @10gen/streams-engine @svc-auto-approve-bot
|
||||
/evergreen/generate_sast_report.sh @10gen/devprod-release-infrastructure @svc-auto-approve-bot
|
||||
/evergreen/write_sast_report_env_file.sh @10gen/devprod-release-infrastructure @svc-auto-approve-bot
|
||||
/evergreen/generate_symbol_check_report.py @10gen/devprod-build @svc-auto-approve-bot
|
||||
|
||||
# The following patterns are parsed from ./evergreen/functions/security_reporting_scripts/OWNERS.yml
|
||||
/evergreen/functions/security_reporting_scripts/ @10gen/platsec-server @svc-auto-approve-bot
|
||||
|
|
|
|||
|
|
@ -600,6 +600,7 @@ def mongo_cc_library(
|
|||
})
|
||||
else:
|
||||
undefined_ref_flag = []
|
||||
tags = tags + ["skip_symbol_check"]
|
||||
|
||||
create_header_dep(
|
||||
name = name + HEADER_DEP_SUFFIX,
|
||||
|
|
@ -652,7 +653,7 @@ def mongo_cc_library(
|
|||
copts = copts,
|
||||
cxxopts = cxxopts,
|
||||
data = data,
|
||||
tags = tags + ["mongo_library"],
|
||||
tags = tags + ["mongo_library", "check_symbol_target"],
|
||||
linkopts = linkopts,
|
||||
linkstatic = True,
|
||||
local_defines = MONGO_GLOBAL_DEFINES + local_defines,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,6 @@
|
|||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
exports_files([
|
||||
"symbol_extractor.py",
|
||||
"symbol_checker.py",
|
||||
])
|
||||
|
|
@ -0,0 +1,161 @@
|
|||
load("@bazel_tools//tools/cpp:toolchain_utils.bzl", "find_cpp_toolchain")
|
||||
|
||||
SymbolInfo = provider(
|
||||
fields = {
|
||||
"symbol_file": "depset of files containing symbol info",
|
||||
},
|
||||
)
|
||||
|
||||
def _collect_dep_symbol_files_from_deps(ctx):
|
||||
if not hasattr(ctx.rule.attr, "deps"):
|
||||
return depset()
|
||||
return depset(transitive = [
|
||||
dep[SymbolInfo].symbol_file
|
||||
for dep in ctx.rule.attr.deps
|
||||
if SymbolInfo in dep
|
||||
])
|
||||
|
||||
def _collect_cc_objects(cc_info):
|
||||
objs = []
|
||||
for linker_input in cc_info.linking_context.linker_inputs.to_list():
|
||||
for lib in linker_input.libraries:
|
||||
if lib.objects:
|
||||
objs += lib.objects
|
||||
if lib.pic_objects:
|
||||
objs += lib.pic_objects
|
||||
return objs
|
||||
|
||||
def _has_skip_tag(ctx):
|
||||
return "skip_symbol_check" in getattr(ctx.rule.attr, "tags", [])
|
||||
|
||||
def _has_check_tag(ctx):
|
||||
# only do extraction/check on targets tagged with this
|
||||
return "check_symbol_target" in getattr(ctx.rule.attr, "tags", [])
|
||||
|
||||
def _pick_cc_info(target, ctx):
|
||||
# Prefer the target's own CcInfo if present
|
||||
if CcInfo in target:
|
||||
return target[CcInfo]
|
||||
return None
|
||||
|
||||
def symbol_checker_aspect_impl(target, ctx):
|
||||
# Always forward deps’ symbol files so downstream checks see them
|
||||
if hasattr(ctx.rule.attr, "binary_with_debug"):
|
||||
cc_tgt = ctx.rule.attr.binary_with_debug
|
||||
dep_sym_files = depset(transitive = [cc_tgt[SymbolInfo].symbol_file])
|
||||
return [
|
||||
SymbolInfo(symbol_file = dep_sym_files),
|
||||
OutputGroupInfo(symbol_checker = depset()),
|
||||
]
|
||||
else:
|
||||
dep_sym_files = _collect_dep_symbol_files_from_deps(ctx)
|
||||
|
||||
# Gate heavy work on the presence of the tag
|
||||
if not _has_check_tag(ctx):
|
||||
return [
|
||||
SymbolInfo(symbol_file = dep_sym_files),
|
||||
OutputGroupInfo(symbol_checker = depset()),
|
||||
]
|
||||
|
||||
cc_info = _pick_cc_info(target, ctx)
|
||||
if cc_info == None:
|
||||
# Not a C++ target (or no usable CcInfo) — just forward
|
||||
return [
|
||||
SymbolInfo(symbol_file = dep_sym_files),
|
||||
OutputGroupInfo(symbol_checker = depset()),
|
||||
]
|
||||
|
||||
python = ctx.toolchains["@bazel_tools//tools/python:toolchain_type"].py3_runtime
|
||||
cc_toolchain = find_cpp_toolchain(ctx)
|
||||
nm_bin = cc_toolchain.nm_executable
|
||||
|
||||
objs = _collect_cc_objects(cc_info)
|
||||
if not objs:
|
||||
return [
|
||||
SymbolInfo(symbol_file = dep_sym_files),
|
||||
OutputGroupInfo(symbol_checker = depset()),
|
||||
]
|
||||
|
||||
# --- extract ---
|
||||
out = ctx.actions.declare_file(target.label.name + "_symbols.sym")
|
||||
extract_args = ctx.actions.args()
|
||||
extract_args.add(ctx.attr._extractor.files.to_list()[0])
|
||||
extract_args.add("--out")
|
||||
extract_args.add(out)
|
||||
extract_args.add("--nm")
|
||||
extract_args.add(nm_bin)
|
||||
extract_args.add_all([o.path for o in objs], before_each = "--obj")
|
||||
|
||||
extract_inputs = depset(transitive = [
|
||||
ctx.attr._extractor.files,
|
||||
python.files,
|
||||
cc_toolchain.all_files,
|
||||
depset(objs),
|
||||
])
|
||||
|
||||
ctx.actions.run(
|
||||
executable = python.interpreter.path,
|
||||
outputs = [out],
|
||||
inputs = extract_inputs,
|
||||
arguments = [extract_args],
|
||||
mnemonic = "SymbolExtractor",
|
||||
)
|
||||
|
||||
# --- check ---
|
||||
check = ctx.actions.declare_file(target.label.name + "_checked")
|
||||
check_args = ctx.actions.args()
|
||||
check_args.add(ctx.attr._checker.files.to_list()[0])
|
||||
check_args.add("--sym")
|
||||
check_args.add(out)
|
||||
check_args.add("--out")
|
||||
check_args.add(check)
|
||||
check_args.add("--label")
|
||||
check_args.add(str(target.label))
|
||||
|
||||
if _has_skip_tag(ctx):
|
||||
check_args.add("--skip")
|
||||
|
||||
for dep_sym in dep_sym_files.to_list():
|
||||
check_args.add("--dep")
|
||||
check_args.add(dep_sym)
|
||||
|
||||
check_inputs = depset(transitive = [
|
||||
ctx.attr._checker.files,
|
||||
python.files,
|
||||
depset([out]),
|
||||
dep_sym_files,
|
||||
])
|
||||
|
||||
ctx.actions.run(
|
||||
executable = python.interpreter.path,
|
||||
outputs = [check],
|
||||
inputs = check_inputs,
|
||||
arguments = [check_args],
|
||||
mnemonic = "SymbolChecker",
|
||||
)
|
||||
|
||||
return [
|
||||
SymbolInfo(
|
||||
symbol_file = depset(direct = [out], transitive = [dep_sym_files]),
|
||||
),
|
||||
OutputGroupInfo(symbol_checker = depset([check])),
|
||||
]
|
||||
|
||||
symbol_checker_aspect = aspect(
|
||||
implementation = symbol_checker_aspect_impl,
|
||||
attrs = {
|
||||
"_extractor": attr.label(
|
||||
default = "//bazel/symbol_checker:symbol_extractor.py",
|
||||
allow_single_file = True,
|
||||
),
|
||||
"_checker": attr.label(
|
||||
default = "//bazel/symbol_checker:symbol_checker.py",
|
||||
allow_single_file = True,
|
||||
),
|
||||
},
|
||||
toolchains = [
|
||||
"@bazel_tools//tools/python:toolchain_type",
|
||||
"@bazel_tools//tools/cpp:toolchain_type",
|
||||
],
|
||||
attr_aspects = ["deps", "binary_with_debug"],
|
||||
)
|
||||
|
|
@ -0,0 +1,84 @@
|
|||
import sys
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--sym", required=True)
|
||||
parser.add_argument("--dep", action="append")
|
||||
parser.add_argument("--out", required=True)
|
||||
parser.add_argument("--skip", action="store_true")
|
||||
parser.add_argument("--label", required=True)
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
# Helper to write JSON to the output file
|
||||
def write_json(obj):
|
||||
with open(args.out, "w") as f:
|
||||
json.dump(obj, f, indent=2, sort_keys=True)
|
||||
|
||||
|
||||
if args.skip:
|
||||
payload = {
|
||||
"status": "skipped",
|
||||
"target": args.label,
|
||||
"sym_file": args.sym,
|
||||
"missing": [],
|
||||
"reason": "skip tag present",
|
||||
}
|
||||
write_json(payload)
|
||||
sys.exit(0)
|
||||
|
||||
with open(args.sym) as f:
|
||||
current = json.load(f)
|
||||
|
||||
undefined = current["undefined"]
|
||||
defined = set()
|
||||
|
||||
# we include self because we scanned all objs that belong to this library/archive
|
||||
defined.update(current["defined"])
|
||||
|
||||
for dep_path in args.dep or []:
|
||||
with open(dep_path) as f:
|
||||
dep_sym = json.load(f)
|
||||
defined.update(dep_sym["defined"])
|
||||
|
||||
missing = [u for u in undefined if u not in defined]
|
||||
|
||||
display_name = args.label
|
||||
if display_name.endswith("_with_debug"):
|
||||
display_name = display_name[: -len("_with_debug")]
|
||||
|
||||
if display_name.startswith("@@"):
|
||||
display_name = display_name[2:]
|
||||
|
||||
if missing:
|
||||
# human-readable to stderr
|
||||
header = f"Symbol check failed for: {display_name}\n"
|
||||
sys.stderr.write(header)
|
||||
sys.stderr.write(" undefined but not found in self or deps:\n")
|
||||
for u in missing:
|
||||
sys.stderr.write(f" - {u}\n")
|
||||
sys.stderr.write(
|
||||
f"Please check to see if {display_name} is missing any deps that would include the symbols above\n"
|
||||
)
|
||||
|
||||
# machine-readable to file
|
||||
payload = {
|
||||
"status": "failed",
|
||||
"target": display_name,
|
||||
"sym_file": args.sym,
|
||||
"missing": missing,
|
||||
}
|
||||
write_json(payload)
|
||||
sys.exit(1)
|
||||
else:
|
||||
# machine-readable
|
||||
payload = {
|
||||
"status": "ok",
|
||||
"target": display_name,
|
||||
"sym_file": args.sym,
|
||||
"missing": [],
|
||||
}
|
||||
write_json(payload)
|
||||
sys.exit(0)
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
import subprocess
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--obj", action="append", required=True)
|
||||
parser.add_argument("--nm", required=True)
|
||||
parser.add_argument("--out", required=True)
|
||||
args = parser.parse_args()
|
||||
|
||||
defined = set()
|
||||
undefined = set()
|
||||
|
||||
for objfile in args.obj:
|
||||
proc = subprocess.run(
|
||||
[args.nm, "--demangle", "--defined-only", "-g", objfile],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
print(f"nm failed on {objfile}: {proc.stderr}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
for line in proc.stdout.splitlines():
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
# typical: "0000000000000000 T mongo::foo()"
|
||||
parts = line.split(None, 2)
|
||||
if len(parts) < 3:
|
||||
continue
|
||||
_addr, symtype, name = parts
|
||||
if symtype in ("T", "D", "B", "R", "S", "V", "W"):
|
||||
if name.startswith("mongo::"):
|
||||
defined.add(name)
|
||||
|
||||
for objfile in args.obj:
|
||||
proc = subprocess.run(
|
||||
[args.nm, "--demangle", "--undefined-only", objfile],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
print(f"nm failed on {objfile}: {proc.stderr}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
for line in proc.stdout.splitlines():
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
# typical: " U mongo::bar()"
|
||||
parts = line.split(None, 1)
|
||||
if len(parts) != 2:
|
||||
continue
|
||||
_u, name = parts
|
||||
if name.startswith("mongo::"):
|
||||
undefined.add(name)
|
||||
|
||||
with open(args.out, "w") as f:
|
||||
json.dump(
|
||||
{
|
||||
"defined": sorted(defined),
|
||||
"undefined": sorted(undefined),
|
||||
},
|
||||
f,
|
||||
)
|
||||
|
|
@ -107,6 +107,7 @@ post:
|
|||
- func: "debug full disk"
|
||||
- func: "upload npm logs"
|
||||
- func: "generate clang-tidy report"
|
||||
- func: "generate symbol-check report"
|
||||
- func: "attach local resmoke invocation"
|
||||
- func: "attach bazel invocation"
|
||||
- func: "create bazel test report"
|
||||
|
|
|
|||
|
|
@ -1851,6 +1851,34 @@ functions:
|
|||
- *f_expansions_write
|
||||
- *generate_clang_tidy_report_sh
|
||||
|
||||
"generate symbol-check report sh": &generate_symbol_check_report_sh
|
||||
command: subprocess.exec
|
||||
display_name: "generate symbol-check report"
|
||||
type: test
|
||||
params:
|
||||
binary: bash
|
||||
args:
|
||||
- "src/evergreen/run_python_script.sh"
|
||||
- "evergreen/generate_symbol_check_report.py"
|
||||
|
||||
"enable run_for_symbol_check expansions update":
|
||||
&enable_run_for_symbol_check_expansions_update
|
||||
command: expansions.update
|
||||
display_name: "enable bazel test report creation"
|
||||
params:
|
||||
updates:
|
||||
- key: run_for_symbol_check
|
||||
value: "true"
|
||||
|
||||
"enable run_for_symbol_check expansions":
|
||||
- *f_expansions_write
|
||||
- *enable_run_for_symbol_check_expansions_update
|
||||
- *f_expansions_write
|
||||
|
||||
"generate symbol-check report":
|
||||
- *f_expansions_write
|
||||
- *generate_symbol_check_report_sh
|
||||
|
||||
"bazel run sh": &bazel_run_sh
|
||||
command: subprocess.exec
|
||||
display_name: "bazel run sh"
|
||||
|
|
|
|||
|
|
@ -344,6 +344,27 @@ tasks:
|
|||
bazel_args: --config=clang-tidy --mongo_toolchain_version=${clang_tidy_toolchain|v5} --keep_going
|
||||
- func: "generate clang-tidy report"
|
||||
|
||||
- name: run_symbol_checker
|
||||
tags:
|
||||
[
|
||||
"assigned_to_jira_team_devprod_build",
|
||||
"development_critical_single_variant",
|
||||
"requires_large_host",
|
||||
]
|
||||
exec_timeout_secs: 1800 # 30 mins
|
||||
depends_on:
|
||||
- name: version_expansions_gen
|
||||
variant: generate-tasks-for-version
|
||||
- name: archive_dist_test
|
||||
commands:
|
||||
- func: "do bazel setup"
|
||||
- func: "enable run_for_symbol_check expansions"
|
||||
- func: "bazel compile"
|
||||
vars:
|
||||
targets: //src/...
|
||||
bazel_args: --config=evg --config=symbol-checker
|
||||
- func: "generate symbol-check report"
|
||||
|
||||
- name: run_bazel_compiledb
|
||||
tags:
|
||||
[
|
||||
|
|
|
|||
|
|
@ -48,3 +48,6 @@ filters:
|
|||
- "write_sast_report_env_file.sh":
|
||||
approvers:
|
||||
- 10gen/devprod-release-infrastructure
|
||||
- "generate_symbol_check_report.py":
|
||||
approvers:
|
||||
- 10gen/devprod-build
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ echo "bazel run --verbose_failures ${LOCAL_ARG} ${INVOCATION_WITH_REDACTION}" >b
|
|||
# capture exit code
|
||||
set +o errexit
|
||||
export RETRY_ON_FAIL=0
|
||||
bazel_evergreen_shutils::retry_bazel_cmd 5 "$BAZEL_BINARY" \
|
||||
bazel_evergreen_shutils::retry_bazel_cmd 3 "$BAZEL_BINARY" \
|
||||
run --verbose_failures ${LOCAL_ARG} ${target} ${args} 2>&1 | tee -a bazel_output.log
|
||||
RET=${PIPESTATUS[0]}
|
||||
: "${RET:=1}"
|
||||
|
|
|
|||
|
|
@ -0,0 +1,80 @@
|
|||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
from buildscripts.simple_report import make_report, put_report, try_combine_reports
|
||||
from buildscripts.util.read_config import read_config_file
|
||||
|
||||
# 1. detect if we should run symbol-check reporting
|
||||
expansions = read_config_file("../expansions.yml")
|
||||
symbol_check = expansions.get("run_for_symbol_check", None)
|
||||
|
||||
if not symbol_check:
|
||||
sys.exit(0)
|
||||
|
||||
failures = []
|
||||
|
||||
# 2. walk bazel-bin for *_checked files emitted by the aspect
|
||||
for root, _, files in os.walk("bazel-bin"):
|
||||
for name in files:
|
||||
if not name.endswith("_checked"):
|
||||
continue
|
||||
|
||||
checked_path = os.path.join(root, name)
|
||||
# default values in case we fall back to text mode
|
||||
target = None
|
||||
sym_file = None
|
||||
missing = None
|
||||
status = None
|
||||
|
||||
with open(checked_path) as f:
|
||||
data = json.load(f)
|
||||
|
||||
status = data.get("status")
|
||||
target = data.get("target")
|
||||
sym_file = data.get("sym_file")
|
||||
missing = data.get("missing", [])
|
||||
|
||||
if status == "failed":
|
||||
# build content for the report
|
||||
lines = []
|
||||
lines.append(f"Symbol check failed for: {target}")
|
||||
lines.append("Missing symbols:")
|
||||
for m in missing:
|
||||
lines.append(f" - {m}")
|
||||
lines.append(
|
||||
f"Please check to see if {target} is missing any deps that would include the symbols above"
|
||||
)
|
||||
|
||||
# reproduction hint – adjust this to your CI config name
|
||||
# if you have a real config, e.g. --config=symbol-check, use that
|
||||
repro_target = target or sym_file or checked_path
|
||||
lines.append("")
|
||||
lines.append("To reproduce:")
|
||||
lines.append(f" bazel build --config=symbol-checker {repro_target}")
|
||||
|
||||
content = "\n".join(lines)
|
||||
|
||||
# for symbol check we don't have a real src path like clang-tidy,
|
||||
# so use a synthetic "file" name that encodes the bazel target
|
||||
synthetic_file = f"symbol_check:{target or checked_path}"
|
||||
|
||||
failures.append((synthetic_file, content))
|
||||
|
||||
# 3. write a helper invocation file
|
||||
# adjust this to your actual symbol-check build config if you have one
|
||||
with open("bazel-invocation.txt", "w") as f:
|
||||
f.write("bazel build --config=symbol-checker //src/...")
|
||||
|
||||
# 4. emit reports
|
||||
if failures:
|
||||
for filename, content in failures:
|
||||
report = make_report(filename, content, 1)
|
||||
try_combine_reports(report)
|
||||
put_report(report)
|
||||
sys.exit(1)
|
||||
else:
|
||||
report = make_report("symbol-check", "all symbol checks passed", 0)
|
||||
try_combine_reports(report)
|
||||
put_report(report)
|
||||
sys.exit(0)
|
||||
Loading…
Reference in New Issue