mongo/buildscripts/setup_spawnhost_coredump

397 lines
17 KiB
Bash
Executable File

#!/bin/bash
cd $HOME # workaround EVG-12829
unameOut=$(uname -s)
case "${unameOut}" in
Linux*) machine=Linux;;
Darwin*) machine=Mac;;
CYGWIN*) machine=Cygwin;;
*) machine="UNKNOWN:${unameOut}"
esac
TOOLCHAIN_ROOT=/opt/mongodbtoolchain/v4
archive_fail() {
echo "Error: archive [${1}] not found." >&2
}
if [[ "${machine}" = "Cygwin" ]]; then
out_dir="/cygdrive/c/setup_script_output.txt"
desktop_dir="/cygdrive/c/Users/Administrator/Desktop"
{
date
env
echo "----------------------"
echo -e "\n=> Setting _NT_SOURCE_PATH environment variable for debuggers to pick up source files."
SRC_DIR_HASH=$(find /cygdrive/z/data/mci -name 'source-*' | head -n 1 | xargs -I% basename %)
SRC_DIR="Z:\data\mci\\${SRC_DIR_HASH}\src"
echo "Source Path: [${SRC_DIR}]"
set -x;
setx _NT_SOURCE_PATH "${SRC_DIR}"
{ set +x; } 2>/dev/null
echo -e "\n=> Setting _NT_SYMBOL_PATH environment variable for debuggers to pick up the symbols."
DBG_SYMBOLS_HASH=$(find /cygdrive/z/data/mci -name 'artifacts-*dist_test_debug' | head -n 1 | xargs -I% basename %)
DBG_SYMBOLS_WINPATH="\"Z:\data\mci\\${DBG_SYMBOLS_HASH}\extracted_symbols\dist-test\bin;srv*;\""
DBG_ARCHIVE_PARENT=$(readlink -f /cygdrive/z/data/mci/artifacts-*dist_test_debug)
DBG_ARCHIVE=$(readlink -f ${DBG_ARCHIVE_PARENT}/debugsymbols-*.zip)
DBG_ARCHIVE_TARGET_PARENT="${DBG_ARCHIVE_PARENT}/extracted_symbols"
DBG_ARCHIVE_TARGET="${DBG_ARCHIVE_TARGET_PARENT}/dist-test/bin"
echo "Symbols Dir: [${DBG_ARCHIVE_TARGET}]"
echo -e "\n=> Copying .natvis file to Visual Studio's expected directories (System for 2017/2019 and User)."
set -x;
cp "/cygdrive/z/data/mci/${SRC_DIR_HASH}/buildscripts/win/mongodb.natvis" /cygdrive/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/Professional/Common7/Packages/Debugger/Visualizers
cp "/cygdrive/z/data/mci/${SRC_DIR_HASH}/buildscripts/win/mongodb.natvis" /cygdrive/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2019/Professional/Common7/Packages/Debugger/Visualizers
mkdir "${USERPROFILE}/Documents/Visual Studio 2022/Visualizers"
cp "/cygdrive/z/data/mci/${SRC_DIR_HASH}/buildscripts/win/mongodb.natvis" "${USERPROFILE}/Documents/Visual Studio 2022/Visualizers"
{ set +x; } 2>/dev/null
echo -e "\n=> Extracting Symbol files."
set -x;
mkdir -p ${DBG_ARCHIVE_TARGET_PARENT}
unzip -n ${DBG_ARCHIVE} -d ${DBG_ARCHIVE_TARGET_PARENT}
setx _NT_SYMBOL_PATH "${DBG_SYMBOLS_WINPATH}"
{ set +x; } 2>/dev/null
echo -e "\n=> Extracting Core Dumps to Desktop."
set -x
# logic for core dumps uploaded together in a .tar.gz file
COREDUMP_ARCHIVE=$(readlink -f /cygdrive/z/data/mci/artifacts-*/mongo-coredumps*.tgz 2>/dev/null)
if [[ -f "${COREDUMP_ARCHIVE}" ]]; then
COREDUMP_ARCHIVE_PARENT=$(dirname ${COREDUMP_ARCHIVE})
COREDUMP_ARCHIVE_TARGET="${COREDUMP_ARCHIVE_PARENT}/extracted_dump"
set -x;
mkdir -p ${COREDUMP_ARCHIVE_TARGET}
tar -xzvf ${COREDUMP_ARCHIVE} -C ${COREDUMP_ARCHIVE_TARGET}
cp -r ${COREDUMP_ARCHIVE_TARGET}/* ${desktop_dir}
else
archive_fail "core dump"
fi
# logic for core dumps individually uploaded and gzipped
filenames=$(find /cygdrive/z/data/mci/artifacts-* -name 'dump_*.mdmp.gz' -o -name 'mongod.*.mdmp.gz')
for filename in ${filenames[@]};
do
COREDUMP_ARCHIVE=$(readlink -f ${filename} 2>/dev/null)
COREDUMP_ARCHIVE_BASENAME=$(basename ${COREDUMP_ARCHIVE})
COREDUMP_ARCHIVE_PARENT=$(dirname ${COREDUMP_ARCHIVE})
COREDUMP_ARCHIVE_TARGET="${COREDUMP_ARCHIVE_PARENT}/extracted_dump"
mkdir -p ${COREDUMP_ARCHIVE_TARGET}
mv ${COREDUMP_ARCHIVE} ${COREDUMP_ARCHIVE_TARGET}
gunzip ${COREDUMP_ARCHIVE_TARGET}/${COREDUMP_ARCHIVE_BASENAME}
DECOMPRESSED_NAME=$(basename ${COREDUMP_ARCHIVE_BASENAME} .gz)
cp ${COREDUMP_ARCHIVE_TARGET}/${DECOMPRESSED_NAME} ${desktop_dir}
done
echo -e "\n=> Extracting Binaries and symbols to Desktop."
BIN_ARCHIVE=$(ls /cygdrive/z/data/mci/artifacts-*archive_dist_test*/mongo-*.zip 2>/dev/null)
if [[ -f $BIN_ARCHIVE ]]; then
BIN_ARCHIVE_PARENT=$(dirname ${BIN_ARCHIVE})
BIN_ARCHIVE_TARGET=${BIN_ARCHIVE_PARENT}/extracted_binaries
unzip -n ${BIN_ARCHIVE} -d ${BIN_ARCHIVE_TARGET}
cp ${BIN_ARCHIVE_TARGET}/dist-test/bin/mongo{,d,s}.pdb ${desktop_dir}
cp ${BIN_ARCHIVE_TARGET}/dist-test/bin/mongo{,d,s}.exe ${desktop_dir}
else
archive_fail "bin"
fi
if [[ -z $(ls ${COREDUMP_ARCHIVE_TARGET}/dump* 2>/dev/null) ]]; then
echo "No core dumps found."
fi
{ set +x; } 2>/dev/null
echo "Copied to Desktop."
} &> ${out_dir}
cp ${out_dir} ${desktop_dir}
else
# Communicate to users that logged in before the script started that nothing is ready.
wall "The setup_spawnhost_coredump script has just started setting up the debugging environment."
# Write this file that gets cat'ed on login to communicate to users logging in if this setup script is still running.
echo '+-----------------------------------------------------------------------------------+' > ~/.setup_spawnhost_coredump_progress
echo "| The setup script is still setting up data files for inspection on a [${machine}] host. |" >> ~/.setup_spawnhost_coredump_progress
echo '+-----------------------------------------------------------------------------------+' >> ~/.setup_spawnhost_coredump_progress
cat >> ~/.profile <<EOF
cat ~/.setup_spawnhost_coredump_progress
# Coredumps generated by a toolchain built mongodb can be problematic when examined with the system
# gdb.
alias gdb='${TOOLCHAIN_ROOT}/bin/gdb'
# As per below, put the user into the appropriate directory. This is where gdb is expected to be
# invoked from.
cd debug
echo "Debuggable binaries:"
ls -l mongo* 2>/dev/null | grep -v debug$
ls -l bin/ 2>/dev/null
( ls -l mongo* &>/dev/null | grep -v debug$ || ls -l bin/ &>/dev/null ) || echo " [none]"
for item in "mongo" "mongod" "mongos"; do
echo "\${item} core dumps:"
ls -l dump_\${item}.* 2>/dev/null || echo " [none]"
done
echo "Core dumps from unknown processes (crashed processes typically found here):"
ls -l dump_* 2>/dev/null | grep -v mongo || echo " [none]"
echo
echo "To examine a core dump, type 'gdb ./<binary> ./<core file>'"
cat ~/.setup_spawnhost_coredump_progress
EOF
echo 'if [ -f ~/.profile ]; then
. ~/.profile
fi' >> .bash_profile
# Make a directory on the larger EBS volume. Soft-link it under the home directory. The smaller home
# volume can have trouble particularly with coredumps from sharded timeouts.
mkdir -p /data/debug
ln -s /data/debug .
cd debug
# As the name suggests, pretty printers. Primarily for boost::optional<T>
git clone git@github.com:mongodb-forks/Boost-Pretty-Printer.git --branch mongodb-stable &
# Discover and unarchive necessary files and source code. This will put mongo binaries and their
# partner .debug files in the same `debug/bin` directory. The `bin` directory will later be symbolic
# linked into the top-level (`debug`) directory. Shared library files and their debug symbols will
# be dumped into a `debug/lib` directory for tidiness. The mongo `<reporoot>/src/` directory is soft
# linked as `debug/src`. The .gdbinit file assumes gdb is being run from the `debug` directory.
BIN_ARCHIVE=$(ls /data/mci/artifacts-*archive_dist_test*/mongo-*.tgz 2>/dev/null)
if [[ -n $BIN_ARCHIVE ]]; then
# Have shell expand braces before passing the wildcard to tar.
bin_files_pattern=(\*/bin/mongo{d,s,,bridge})
tar --wildcards --strip-components=1 -xzf $BIN_ARCHIVE "${bin_files_pattern[@]}"
tar --wildcards --strip-components=1 -xzf $BIN_ARCHIVE '*/lib/*' &
else
archive_fail "bin"
fi
DBG_ARCHIVE=$(ls /data/mci/artifacts-*archive_dist_test_debug/debugsymbols-*.tgz 2>/dev/null)
if [[ -n $DBG_ARCHIVE ]]; then
# Support discovering split-dwarf files. Specifically, capture both <file>.debug and <file>.dwp
# files.
# Have shell expand braces before passing the wildcard to tar.
dbg_files_pattern=(\*/bin/mongo{d,s,,bridge}.{debug,dwp})
tar --wildcards --strip-components=1 -xzf $DBG_ARCHIVE "${dbg_files_pattern[@]}" &
tar --wildcards --strip-components=1 -xzf $DBG_ARCHIVE '*/lib/*' &
else
archive_fail "debug"
fi
UNITTEST_ARCHIVE=$(ls /data/mci/artifacts-*run_unittests*/mongo-unittests-*.tgz 2>/dev/null)
if [[ -n $UNITTEST_ARCHIVE ]]; then
tar --wildcards --strip-components=0 -xzf $UNITTEST_ARCHIVE 'bin/*' &
tar --wildcards -xzf $UNITTEST_ARCHIVE 'lib/*' &
else
archive_fail "unittest"
fi
BENCHMARK_ARCHIVE=$(ls /data/mci/artifacts-*compile_upload_benchmarks/mongodb_mongo_*.tgz 2>/dev/null)
if [[ -n $BENCHMARK_ARCHIVE ]]; then
tar --wildcards --strip-components=2 -xzf $BENCHMARK_ARCHIVE '*/bin/*' &
else
archive_fail "benchmark"
fi
SRC_DIR=$(ls -d /data/mci/source-* 2>/dev/null)
if [[ -n $SRC_DIR ]]; then
ln -s ${SRC_DIR}/.gdbinit .
ln -s ${SRC_DIR}/src src
ln -s ${SRC_DIR}/buildscripts buildscripts
# Install pymongo to get the bson library for pretty-printers.
${TOOLCHAIN_ROOT}/bin/pip3 install -r ${SRC_DIR}/etc/pip/dev-requirements.txt &
else
archive_fail "src"
fi
set -x
# logic for core dumps uploaded together in a .tar.gz file
COREDUMP_ARCHIVE=$(ls /data/mci/artifacts-*/mongo-coredumps-*.tgz 2>/dev/null)
if [[ -n $COREDUMP_ARCHIVE ]]; then
tar -xzf $COREDUMP_ARCHIVE &
fi
# logic for core dumps individually uploaded and gzipped
COREDUMP_ARCHIVE_TARGET="./"
for filename in /data/mci/artifacts-*/dump_*.core.gz;
do
COREDUMP_ARCHIVE=$(readlink -f ${filename} 2>/dev/null)
COREDUMP_ARCHIVE_BASENAME=$(basename ${COREDUMP_ARCHIVE})
mv ${COREDUMP_ARCHIVE} ${COREDUMP_ARCHIVE_TARGET}
gunzip ${COREDUMP_ARCHIVE_TARGET}/${COREDUMP_ARCHIVE_BASENAME}
done
if [[ -z $(ls ${COREDUMP_ARCHIVE_TARGET}/dump* 2>/dev/null) ]]; then
archive_fail "coredump"
fi
set +x
echo "Waiting for background processes to complete."
wait
# Symbolic linking all of the executable files is sufficient for `gdb ./mongod ./dump_mongod.core`
# to succeed. This inadvertantly also links in the ".debug" files which is unnecessary, but
# harmless. gdb expects the .debug files to live adjacent to the physical binary.
find bin -type f -perm -o=x -exec ln -s {} . \;
# This script checks a bin for the dwarf version and then generates an index if the bin does not already have an index. eu-readelf is used in place of readelf as it is much faster.
cat > add_index.sh <<EOF
#!/bin/bash
set -o pipefail
target_dir="\$(dirname \${1})"
dwarf_version="\$($TOOLCHAIN_ROOT/bin/eu-readelf --debug-dump=info \$1 | grep --line-buffered -E '^\s+Version:' | head -1 | awk -F, '{print(\$1)}' | awk '{print(\$2)}')"
if [[ \$dwarf_version == 5 ]]
then
$TOOLCHAIN_ROOT/bin/gdb --batch-silent --quiet --nx --eval-command "save gdb-index -dwarf-5 \$target_dir" \$1
if [ -f \${1}.debug_names ]
then
$TOOLCHAIN_ROOT/bin/objcopy --dump-section .debug_str=\${1}.debug_str.new \$1
cat \${1}.debug_str >>\${1}.debug_str.new
$TOOLCHAIN_ROOT/bin/objcopy --add-section .debug_names=\${1}.debug_names --set-section-flags .debug_names=readonly --update-section .debug_str=\${1}.debug_str.new \${1} \${1}
rm -f \${1}.debug_names \${1}.debug_str.new \${1}.debug_str
fi
elif [[ \$dwarf_version == 4 ]]
then
$TOOLCHAIN_ROOT/bin/gdb --batch-silent --quiet --nx --eval-command "save gdb-index \$target_dir" \$1
if [ -f \${1}.gdb-index ]
then
$TOOLCHAIN_ROOT/bin/objcopy --add-section .gdb_index=\${1}.gdb-index --set-section-flags .gdb_index=readonly \${1} \${1}
rm -f \${1}.gdb-index
fi
else
echo "Can't determine debug info for \$1"
fi
EOF
# After creating the index file in a separate debug file, the debuglink CRC
# is no longer value, this will simply recreate the debuglink and therefore
# update the CRC to match.
cat > recalc_debuglink.sh <<EOF
#!/bin/bash
set -o pipefail
debuglink="\$($TOOLCHAIN_ROOT/bin/eu-readelf -S \$1 | grep '.gnu_debuglink')"
if [ ! -z "\$debuglink" ]
then
$TOOLCHAIN_ROOT/bin/objcopy --remove-section ".gnu_debuglink" \$1
$TOOLCHAIN_ROOT/bin/objcopy --add-gnu-debuglink "$(basename \$1).debug" \$1
fi
EOF
# this script creates a symlink in the toolchain lib/debug directory which is in the
# build-id format. This allows gdb to load the separate debug file and skip CRC
# checking.
cat > create_build_id_links.sh <<EOF
#!/bin/bash
set -o pipefail
build_id="\$($TOOLCHAIN_ROOT/bin/eu-readelf -n \$1 | grep 'Build ID:' | awk -F: '{print \$2}' | sed 's/ *//')"
gdb_debug_dir="\$(readlink $TOOLCHAIN_ROOT/bin/gdb)"
gdb_debug_dir="\$(dirname \$gdb_debug_dir)"
gdb_debug_dir="\$(dirname \$gdb_debug_dir)/lib/debug/.build-id/\${build_id:0:2}"
gdb_debug_file="\${build_id:2}.debug"
mkdir -p \$gdb_debug_dir
ln -s \$PWD/\$1 \$gdb_debug_dir/\$gdb_debug_file
EOF
chmod +x ./add_index.sh
chmod +x ./recalc_debuglink.sh
chmod +x ./create_build_id_links.sh
cpus=$(getconf _NPROCESSORS_ONLN)
find bin lib -type f -perm -o=x | xargs --max-args=1 --max-procs=$cpus ./add_index.sh
find bin lib -type f -perm -o=x | xargs --max-args=1 --max-procs=$cpus ./recalc_debuglink.sh
# This script constructs symblinks based off the build-id so GDB can skip the crc check
# normally performed during .gnu_debuglink loading.
find bin lib -name "*.debug" -type f -perm -o=x | xargs --max-args=1 --max-procs=$cpus ./create_build_id_links.sh
# Boost-Pretty-Printer supports auto-detection for the boost version but relies on the system
# installed version of boost. To avoid this behavior we explicitly specify the boost_version.
# Moreover, the most recent version of boost that Boost-Pretty-Printer verifies it supports is
# version 1.73.0. While 1.73.0 is older than the version in the src/third_party/boost/
# directory, the pretty printer in Boost-Pretty-Printer will display boost::optional values
# correctly.
cat >> ~/.gdbinit <<EOF
set auto-load safe-path /
set solib-search-path ./lib/
set pagination off
set print object on
set print static-members off
set print pretty on
python
import sys
sys.path.insert(0, './Boost-Pretty-Printer')
import boost
boost.register_printers(boost_version=(1, 73, 0))
end
dir $HOME/debug
EOF
# Empty out the progress script that warns users about the set script still running when users log in.
echo "" > ~/.setup_spawnhost_coredump_progress
# Alert currently logged in users that this setup script has completed. Logging back in will ensure any
# paths/environment variables will be set as intended.
wall "The setup_spawnhost_coredump script has completed, please relogin to ensure the right environment variables are set."
fi
# Send a Slack notification as the very last thing the setup_spawnhost_coredump script does.
# This way a Server engineer can temporarily forget about the Evergreen host they spawned until the
# paths and environment variables are configured as intended for when they first connect.
if [[ "${machine}" = "Cygwin" ]]; then
# The setup_spawnhost_coredump script runs as the mci-exec user on Windows hosts. However,
# Server engineers log in as the Administrator user.
ssh_user="Administrator"
# The Evergreen binary only expects a Windows path. The rest of Cygwin is flexible about it
# being a Cygwin path or a Windows path so we do the conversion here.
evg_credentials_pathname=$(cygpath -w ~Administrator/.evergreen.yml)
evg_binary_pathname=~Administrator/cli_bin/evergreen
else
ssh_user=$(whoami)
evg_credentials_pathname=~/.evergreen.yml
evg_binary_pathname=evergreen
fi
slack_user=$(awk '{if ($1 == "user:") print $2}' "$evg_credentials_pathname")
# Refer to the https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
# documentation for more information on the AWS instance metadata endpoints.
aws_metadata_svc="http://169.254.169.254"
aws_token=$(curl -s -X PUT "$aws_metadata_svc/latest/api/token" -H 'X-aws-ec2-metadata-token-ttl-seconds: 60')
ssh_host=$(curl -s -H "X-aws-ec2-metadata-token: $aws_token" "$aws_metadata_svc/latest/meta-data/public-hostname")
if [[ "${machine}" = "Cygwin" ]]; then
slack_message=$(printf "The setup_spawnhost_coredump script has finished setting things up. \
Please use Windows Remote Desktop with\n\
1. PC name: $ssh_host\n\
2. User account: $ssh_user\n\
3. The RDP password configured under the edit dialog at https://spruce.mongodb.com/spawn/host\n\
to log in.")
else
slack_message="The setup_spawnhost_coredump script has finished setting things up. Please run "'```'"ssh $ssh_user@$ssh_host"'```'" to log in."
fi
# The Evergreen spawn host is expected to be provisioned with the user's .evergreen.yml credentials.
# But in case something unexpected happens we don't want the setup_spawnhost_coredump script itself
# to error.
if [[ -n "${slack_user}" ]]; then
"$evg_binary_pathname" --config "$evg_credentials_pathname" notify slack -t "@$slack_user" -m "$slack_message"
fi