SERVER-102654 Save core dumps as undeclared test output (#40094)

GitOrigin-RevId: ba47e626d2c03546af8b76ed7fcfab9891b29b29
This commit is contained in:
Sean Lyons 2025-08-15 10:48:25 -04:00 committed by MongoDB Bot
parent 2aa7e19c46
commit 37a3ed4860
3 changed files with 47 additions and 8 deletions

View File

@ -1,12 +1,17 @@
import os
import pathlib
import signal
import sys
from functools import cache
import psutil
REPO_ROOT = pathlib.Path(__file__).parent.parent.parent
sys.path.append(str(REPO_ROOT))
from buildscripts.bazel_local_resources import acquire_local_resource
from buildscripts.resmokelib import cli
from buildscripts.resmokelib.hang_analyzer.process import signal_python
from buildscripts.resmokelib.logging.loggers import new_resmoke_logger
@cache
@ -43,6 +48,36 @@ def add_evergreen_build_info(args):
add_volatile_arg(args, "--versionId=", "version_id")
add_volatile_arg(args, "--requester=", "requester")
class ResmokeShimContext:
def __init__(self):
self.links = []
def __enter__(self):
# Bazel will send SIGTERM on a test timeout. If all processes haven't terminated
# after -local_termination_grace_seconds (default 15s), Bazel will SIGKILL them instead.
signal.signal(signal.SIGTERM, self._handle_interrupt)
# Symlink source directories because resmoke uses relative paths profusely.
base_dir = os.path.join(os.environ.get("TEST_SRCDIR"), "_main")
working_dir = os.getcwd()
for entry in os.scandir(base_dir):
link = os.path.join(working_dir, entry.name)
self.links.append(link)
os.symlink(entry.path, link)
return self
def __exit__(self, exception_type, exception_value, exception_traceback):
for link in self.links:
os.unlink(link)
def _handle_interrupt(self, signum, frame):
# Attempt a clean shutdown, producing python stacktraces and generating core dumps for
# any still running process. It is likely that most programs will have terminated before
# core dumps can be produced, since Bazel sends SIGTERM to all processes, not just this one.
# TODO: SERVER-109274
pid = os.getpid()
p = psutil.Process(pid)
signal_python(new_resmoke_logger(), p.name, pid)
if __name__ == "__main__":
sys.argv[0] = (
@ -66,6 +101,7 @@ if __name__ == "__main__":
resmoke_args.append(f"--dbpathPrefix={os.path.join(undeclared_output_dir,'data')}")
resmoke_args.append(f"--taskWorkDir={undeclared_output_dir}")
resmoke_args.append(f"--reportFile={os.path.join(undeclared_output_dir,'report.json')}")
os.chdir(undeclared_output_dir)
if os.environ.get("TEST_SHARD_INDEX") and os.environ.get("TEST_TOTAL_SHARDS"):
shard_count = os.environ.get("TEST_TOTAL_SHARDS")
@ -90,7 +126,7 @@ if __name__ == "__main__":
):
resmoke_args.append("--historicTestRuntimes=bazel/resmoke/test_runtimes.json")
with ResmokeShimContext() as ctx:
cli.main(resmoke_args)
lock.release()

View File

@ -2587,8 +2587,8 @@ functions:
- "src/evergreen/run_python_script.sh"
- "buildscripts/fast_archive.py"
- "-f=mongo-coredumps.json"
- "-p=./**.core"
- "-p=./**.mdmp" # Windows: minidumps
- "-p=./*.core"
- "-p=./*.mdmp" # Windows: minidumps
- "-n=Core Dump"
"archive mongo coredumps": &archive_mongo_coredumps

View File

@ -1,10 +1,13 @@
cd src
# Find all core files and move to src
core_files=$(/usr/bin/find -H .. \( -name "*.core" -o -name "*.mdmp" \) 2>/dev/null)
# Find all core files and symlink them to src
# -H is used to follow hard-links, but add bazel-testlogs explicitly. This ensures we look
# in bazel-testlogs, but don't follow soft-links and end up with multiple copies of the same
# core dump from bazel-testlogs, bazel-out, etc.
core_files=$(/usr/bin/find -H .. bazel-testlogs \( -name "*.core" -o -name "*.mdmp" \) 2>/dev/null)
for core_file in $core_files; do
base_name=$(echo $core_file | sed "s/.*\///")
# Move file if it does not already exist
# Symlink file if it does not already exist
if [ ! -f $base_name ]; then
mv $core_file .
ln -sf $core_file $base_name
fi
done