mongo/buildscripts/setup_spawnhost_coredump

#!/bin/bash

cd $HOME # workaround EVG-12829

unameOut=$(uname -s)
case "${unameOut}" in
    Linux*)     machine=Linux;;
    Darwin*)    machine=Mac;;
    CYGWIN*)    machine=Cygwin;;
    *)          machine="UNKNOWN:${unameOut}"
esac

TOOLCHAIN_ROOT=/opt/mongodbtoolchain/v4

archive_fail() {
    echo "Error: archive [${1}] not found." >&2
}

if [[ "${machine}" = "Cygwin" ]]; then
    out_dir="/cygdrive/c/setup_script_output.txt"
    desktop_dir="/cygdrive/c/Users/Administrator/Desktop"

    {
        date
        env

        echo "----------------------"
        echo -e "\n=> Setting _NT_SOURCE_PATH environment variable for debuggers to pick up source files."
        SRC_DIR_HASH=$(find /cygdrive/z/data/mci -name 'source-*' | head -n 1 | xargs -I% basename %)
        SRC_DIR="Z:\data\mci\\${SRC_DIR_HASH}\src"
        echo "Source Path: [${SRC_DIR}]"
        set -x;
        setx _NT_SOURCE_PATH "${SRC_DIR}"
        { set +x; } 2>/dev/null

        echo -e "\n=> Setting _NT_SYMBOL_PATH environment variable for debuggers to pick up the symbols."
        DBG_SYMBOLS_HASH=$(find /cygdrive/z/data/mci -name 'artifacts-*dist_test_debug' | head -n 1 | xargs -I% basename %)
        DBG_SYMBOLS_WINPATH="\"Z:\data\mci\\${DBG_SYMBOLS_HASH}\extracted_symbols\dist-test\bin;srv*;\""
        DBG_ARCHIVE_PARENT=$(readlink -f /cygdrive/z/data/mci/artifacts-*dist_test_debug)
        DBG_ARCHIVE=$(readlink -f ${DBG_ARCHIVE_PARENT}/debugsymbols-*.zip)
        DBG_ARCHIVE_TARGET_PARENT="${DBG_ARCHIVE_PARENT}/extracted_symbols"
        DBG_ARCHIVE_TARGET="${DBG_ARCHIVE_TARGET_PARENT}/dist-test/bin"
        echo "Symbols Dir: [${DBG_ARCHIVE_TARGET}]"

        echo -e "\n=> Copying .natvis file to Visual Studio's expected directories (System for 2017/2019 and User)."
        set -x;
        cp "/cygdrive/z/data/mci/${SRC_DIR_HASH}/buildscripts/win/mongodb.natvis" /cygdrive/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/Professional/Common7/Packages/Debugger/Visualizers
        cp "/cygdrive/z/data/mci/${SRC_DIR_HASH}/buildscripts/win/mongodb.natvis" /cygdrive/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2019/Professional/Common7/Packages/Debugger/Visualizers
        mkdir "${USERPROFILE}/Documents/Visual Studio 2022/Visualizers"
        cp "/cygdrive/z/data/mci/${SRC_DIR_HASH}/buildscripts/win/mongodb.natvis" "${USERPROFILE}/Documents/Visual Studio 2022/Visualizers"
        { set +x; } 2>/dev/null

        echo -e "\n=> Extracting Symbol files."
        set -x;
        mkdir -p ${DBG_ARCHIVE_TARGET_PARENT}
        unzip -n ${DBG_ARCHIVE} -d ${DBG_ARCHIVE_TARGET_PARENT}
        setx _NT_SYMBOL_PATH "${DBG_SYMBOLS_WINPATH}"
        { set +x; } 2>/dev/null

        echo -e "\n=> Extracting Core Dumps to Desktop."
        set -x

        # logic for core dumps uploaded together in a .tar.gz file
        COREDUMP_ARCHIVE=$(readlink -f /cygdrive/z/data/mci/artifacts-*/mongo-coredumps*.tgz 2>/dev/null)
        if [[ -f "${COREDUMP_ARCHIVE}" ]]; then
            COREDUMP_ARCHIVE_PARENT=$(dirname ${COREDUMP_ARCHIVE})
            COREDUMP_ARCHIVE_TARGET="${COREDUMP_ARCHIVE_PARENT}/extracted_dump"
            set -x;
            mkdir -p ${COREDUMP_ARCHIVE_TARGET}
            tar -xzvf ${COREDUMP_ARCHIVE} -C ${COREDUMP_ARCHIVE_TARGET}
            cp -r ${COREDUMP_ARCHIVE_TARGET}/* ${desktop_dir}
        else
            archive_fail "core dump"
        fi

        # logic for core dumps individually uploaded and gzipped
        filenames=$(find /cygdrive/z/data/mci/artifacts-* -name 'dump_*.mdmp.gz' -o -name 'mongod.*.mdmp.gz')
        for filename in ${filenames[@]};
        do
            COREDUMP_ARCHIVE=$(readlink -f ${filename} 2>/dev/null)
            COREDUMP_ARCHIVE_BASENAME=$(basename ${COREDUMP_ARCHIVE})
            COREDUMP_ARCHIVE_PARENT=$(dirname ${COREDUMP_ARCHIVE})
            COREDUMP_ARCHIVE_TARGET="${COREDUMP_ARCHIVE_PARENT}/extracted_dump"
            mkdir -p ${COREDUMP_ARCHIVE_TARGET}
            mv ${COREDUMP_ARCHIVE} ${COREDUMP_ARCHIVE_TARGET}
            gunzip ${COREDUMP_ARCHIVE_TARGET}/${COREDUMP_ARCHIVE_BASENAME}
            DECOMPRESSED_NAME=$(basename ${COREDUMP_ARCHIVE_BASENAME} .gz)
            cp ${COREDUMP_ARCHIVE_TARGET}/${DECOMPRESSED_NAME} ${desktop_dir}
        done

        echo -e "\n=> Extracting Binaries and symbols to Desktop."
        BIN_ARCHIVE=$(ls /cygdrive/z/data/mci/artifacts-*archive_dist_test*/mongo-*.zip 2>/dev/null)
        if [[ -f $BIN_ARCHIVE ]]; then
            BIN_ARCHIVE_PARENT=$(dirname ${BIN_ARCHIVE})
            BIN_ARCHIVE_TARGET=${BIN_ARCHIVE_PARENT}/extracted_binaries
            unzip -n ${BIN_ARCHIVE} -d ${BIN_ARCHIVE_TARGET}
            cp ${BIN_ARCHIVE_TARGET}/dist-test/bin/mongo{,d,s}.pdb ${desktop_dir}
            cp ${BIN_ARCHIVE_TARGET}/dist-test/bin/mongo{,d,s}.exe ${desktop_dir}
        else
            archive_fail "bin"
        fi

        if [[ -z $(ls ${COREDUMP_ARCHIVE_TARGET}/dump* 2>/dev/null) ]]; then
            echo "No core dumps found."
        fi

        { set +x; } 2>/dev/null
        echo "Copied to Desktop."
    } &> ${out_dir}

    cp ${out_dir} ${desktop_dir}
else
    # Communicate to users that logged in before the script started that nothing is ready.
    wall "The setup_spawnhost_coredump script has just started setting up the debugging environment."

    # Write this file that gets cat'ed on login to communicate to users logging in if this setup script is still running.
    echo '+-----------------------------------------------------------------------------------+' > ~/.setup_spawnhost_coredump_progress
    echo "| The setup script is still setting up data files for inspection on a [${machine}] host. |" >> ~/.setup_spawnhost_coredump_progress
    echo '+-----------------------------------------------------------------------------------+' >> ~/.setup_spawnhost_coredump_progress

    cat >> ~/.profile <<EOF
cat ~/.setup_spawnhost_coredump_progress
# Coredumps generated by a toolchain built mongodb can be problematic when examined with the system
# gdb.
alias gdb='${TOOLCHAIN_ROOT}/bin/gdb'
# As per below, put the user into the appropriate directory. This is where gdb is expected to be
# invoked from.
cd debug
echo "Debuggable binaries:"
ls -l mongo* 2>/dev/null | grep -v debug$
ls -l bin/ 2>/dev/null
( ls -l mongo* &>/dev/null | grep -v debug$ || ls -l bin/ &>/dev/null ) || echo " [none]"

for item in "mongo" "mongod" "mongos"; do
    echo "\${item} core dumps:"
    ls -l dump_\${item}.* 2>/dev/null || echo " [none]"
done

echo "Core dumps from unknown processes (crashed processes typically found here):"
ls -l dump_* 2>/dev/null | grep -v mongo || echo " [none]"

echo
echo "To examine a core dump, type 'gdb ./<binary> ./<core file>'"
cat ~/.setup_spawnhost_coredump_progress
EOF

    echo 'if [ -f ~/.profile ]; then
    . ~/.profile
fi' >> .bash_profile

    # Make a directory on the larger EBS volume. Soft-link it under the home directory. The smaller home
    # volume can have trouble particularly with coredumps from sharded timeouts.
    mkdir -p /data/debug
    ln -s /data/debug .
    cd debug

    # As the name suggests, pretty printers. Primarily for boost::optional<T>
    git clone git@github.com:mongodb-forks/Boost-Pretty-Printer.git --branch mongodb-stable &

    # Discover and unarchive necessary files and source code. This will put mongo binaries and their
    # partner .debug files in the same `debug/bin` directory. The `bin` directory will later be symbolic
    # linked into the top-level (`debug`) directory. Shared library files and their debug symbols will
    # be dumped into a `debug/lib` directory for tidiness. The mongo `<reporoot>/src/` directory is soft
    # linked as `debug/src`. The .gdbinit file assumes gdb is being run from the `debug` directory.
    BIN_ARCHIVE=$(ls /data/mci/artifacts-*archive_dist_test*/mongo-*.tgz 2>/dev/null)
    if [[ -n $BIN_ARCHIVE ]]; then
        # Have shell expand braces before passing the wildcard to tar.
        bin_files_pattern=(\*/bin/mongo{d,s,,bridge})
        tar --wildcards --strip-components=1 -xzf $BIN_ARCHIVE "${bin_files_pattern[@]}"
        tar --wildcards --strip-components=1 -xzf $BIN_ARCHIVE '*/lib/*' &
    else
        archive_fail "bin"
    fi

    DBG_ARCHIVE=$(ls /data/mci/artifacts-*archive_dist_test_debug/debugsymbols-*.tgz 2>/dev/null)
    if [[ -n $DBG_ARCHIVE ]]; then
        # Support discovering split-dwarf files. Specifically, capture both <file>.debug and <file>.dwp
        # files.
        # Have shell expand braces before passing the wildcard to tar.
        dbg_files_pattern=(\*/bin/mongo{d,s,,bridge}.{debug,dwp})
        tar --wildcards --strip-components=1 -xzf $DBG_ARCHIVE "${dbg_files_pattern[@]}" &
        tar --wildcards --strip-components=1 -xzf $DBG_ARCHIVE '*/lib/*' &
    else
        archive_fail "debug"
    fi

    UNITTEST_ARCHIVE=$(ls /data/mci/artifacts-*run_unittests*/mongo-unittests-*.tgz 2>/dev/null)
    if [[ -n $UNITTEST_ARCHIVE ]]; then
        tar --wildcards --strip-components=0 -xzf $UNITTEST_ARCHIVE 'bin/*' &
        tar --wildcards -xzf $UNITTEST_ARCHIVE 'lib/*' &
    else
        archive_fail "unittest"
    fi

    BENCHMARK_ARCHIVE=$(ls /data/mci/artifacts-*compile_upload_benchmarks/mongodb_mongo_*.tgz 2>/dev/null)
    if [[ -n $BENCHMARK_ARCHIVE ]]; then
        tar --wildcards --strip-components=2 -xzf $BENCHMARK_ARCHIVE '*/bin/*' &
    else
        archive_fail "benchmark"
    fi

    SRC_DIR=$(ls -d /data/mci/source-* 2>/dev/null)
    if [[ -n $SRC_DIR ]]; then
        ln -s ${SRC_DIR}/.gdbinit .
        ln -s ${SRC_DIR}/src src
        ln -s ${SRC_DIR}/buildscripts buildscripts

        # Install pymongo to get the bson library for pretty-printers.
        ${TOOLCHAIN_ROOT}/bin/pip3 install -r ${SRC_DIR}/etc/pip/dev-requirements.txt &
    else
        archive_fail "src"
    fi


    set -x

    # logic for core dumps uploaded together in a .tar.gz file
    COREDUMP_ARCHIVE=$(ls /data/mci/artifacts-*/mongo-coredumps-*.tgz 2>/dev/null)
    if [[ -n $COREDUMP_ARCHIVE ]]; then
        tar -xzf $COREDUMP_ARCHIVE &
    fi

    # logic for core dumps individually uploaded and gzipped
    COREDUMP_ARCHIVE_TARGET="./"
    for filename in /data/mci/artifacts-*/dump_*.core.gz;
    do
        COREDUMP_ARCHIVE=$(readlink -f ${filename} 2>/dev/null)
        COREDUMP_ARCHIVE_BASENAME=$(basename ${COREDUMP_ARCHIVE})
        mv ${COREDUMP_ARCHIVE} ${COREDUMP_ARCHIVE_TARGET}
        gunzip ${COREDUMP_ARCHIVE_TARGET}/${COREDUMP_ARCHIVE_BASENAME}
    done

    if [[ -z $(ls ${COREDUMP_ARCHIVE_TARGET}/dump* 2>/dev/null) ]]; then
        archive_fail "coredump"
    fi
    set +x

    echo "Waiting for background processes to complete."
    wait

    # Symbolic linking all of the executable files is sufficient for `gdb ./mongod ./dump_mongod.core`
    # to succeed. This inadvertantly also links in the ".debug" files which is unnecessary, but
    # harmless. gdb expects the .debug files to live adjacent to the physical binary.
    find bin -type f -perm -o=x -exec ln -s {} . \;

    # This script checks a bin for the dwarf version and then generates an index if the bin does not already have an index. eu-readelf is used in place of readelf as it is much faster.
    cat > add_index.sh <<EOF
#!/bin/bash

set -o pipefail

target_dir="\$(dirname \${1})"

dwarf_version="\$($TOOLCHAIN_ROOT/bin/eu-readelf --debug-dump=info \$1 | grep --line-buffered -E '^\s+Version:' | head -1 | awk -F, '{print(\$1)}' | awk '{print(\$2)}')"

if [[ \$dwarf_version == 5 ]]
then
    $TOOLCHAIN_ROOT/bin/gdb --batch-silent --quiet --nx --eval-command "save gdb-index -dwarf-5 \$target_dir" \$1
    if [ -f \${1}.debug_names ]
    then
        $TOOLCHAIN_ROOT/bin/objcopy --dump-section .debug_str=\${1}.debug_str.new \$1
        cat \${1}.debug_str >>\${1}.debug_str.new
        $TOOLCHAIN_ROOT/bin/objcopy --add-section .debug_names=\${1}.debug_names --set-section-flags .debug_names=readonly --update-section .debug_str=\${1}.debug_str.new \${1} \${1}
        rm -f \${1}.debug_names \${1}.debug_str.new \${1}.debug_str
    fi

elif [[ \$dwarf_version == 4 ]]
then
    $TOOLCHAIN_ROOT/bin/gdb --batch-silent --quiet --nx --eval-command "save gdb-index \$target_dir" \$1
    if [ -f \${1}.gdb-index ]
    then
        $TOOLCHAIN_ROOT/bin/objcopy --add-section .gdb_index=\${1}.gdb-index --set-section-flags .gdb_index=readonly \${1} \${1}
        rm -f \${1}.gdb-index
    fi

else
    echo "Can't determine debug info for \$1"
fi
EOF

    # After creating the index file in a separate debug file, the debuglink CRC
    # is no longer value, this will simply recreate the debuglink and therefore
    # update the CRC to match.
    cat > recalc_debuglink.sh <<EOF
#!/bin/bash

set -o pipefail

debuglink="\$($TOOLCHAIN_ROOT/bin/eu-readelf -S \$1 | grep '.gnu_debuglink')"

if [ ! -z "\$debuglink" ]
then
    $TOOLCHAIN_ROOT/bin/objcopy --remove-section ".gnu_debuglink" \$1
	$TOOLCHAIN_ROOT/bin/objcopy --add-gnu-debuglink "$(basename \$1).debug" \$1
fi
EOF

# this script creates a symlink in the toolchain lib/debug directory which is in the
# build-id format. This allows gdb to load the separate debug file and skip CRC
# checking.
cat > create_build_id_links.sh <<EOF
#!/bin/bash

set -o pipefail

build_id="\$($TOOLCHAIN_ROOT/bin/eu-readelf -n \$1 | grep 'Build ID:' | awk -F: '{print \$2}' | sed 's/ *//')"
gdb_debug_dir="\$(readlink $TOOLCHAIN_ROOT/bin/gdb)"
gdb_debug_dir="\$(dirname \$gdb_debug_dir)"
gdb_debug_dir="\$(dirname \$gdb_debug_dir)/lib/debug/.build-id/\${build_id:0:2}"
gdb_debug_file="\${build_id:2}.debug"
mkdir -p \$gdb_debug_dir
ln -s \$PWD/\$1 \$gdb_debug_dir/\$gdb_debug_file
EOF

    chmod +x ./add_index.sh
    chmod +x ./recalc_debuglink.sh
    chmod +x ./create_build_id_links.sh
    cpus=$(getconf _NPROCESSORS_ONLN)

    find bin lib -type f -perm -o=x | xargs --max-args=1 --max-procs=$cpus ./add_index.sh
    find bin lib -type f -perm -o=x | xargs --max-args=1 --max-procs=$cpus ./recalc_debuglink.sh

    # This script constructs symblinks based off the build-id so GDB can skip the crc check
    # normally performed during .gnu_debuglink loading.
    find bin lib -name "*.debug" -type f -perm -o=x | xargs --max-args=1 --max-procs=$cpus ./create_build_id_links.sh

    # Boost-Pretty-Printer supports auto-detection for the boost version but relies on the system
    # installed version of boost. To avoid this behavior we explicitly specify the boost_version.
    # Moreover, the most recent version of boost that Boost-Pretty-Printer verifies it supports is
    # version 1.73.0. While 1.73.0 is older than the version in the src/third_party/boost/
    # directory, the pretty printer in Boost-Pretty-Printer will display boost::optional values
    # correctly.
    cat >> ~/.gdbinit <<EOF
set auto-load safe-path /
set solib-search-path ./lib/
set pagination off
set print object on
set print static-members off
set print pretty on

python
import sys
sys.path.insert(0, './Boost-Pretty-Printer')
import boost
boost.register_printers(boost_version=(1, 73, 0))
end
dir $HOME/debug
EOF

    # Empty out the progress script that warns users about the set script still running when users log in.
    echo "" > ~/.setup_spawnhost_coredump_progress
    # Alert currently logged in users that this setup script has completed. Logging back in will ensure any
    # paths/environment variables will be set as intended.
    wall "The setup_spawnhost_coredump script has completed, please relogin to ensure the right environment variables are set."
fi

# Send a Slack notification as the very last thing the setup_spawnhost_coredump script does.
# This way a Server engineer can temporarily forget about the Evergreen host they spawned until the
# paths and environment variables are configured as intended for when they first connect.
if [[ "${machine}" = "Cygwin" ]]; then
    # The setup_spawnhost_coredump script runs as the mci-exec user on Windows hosts. However,
    # Server engineers log in as the Administrator user.
    ssh_user="Administrator"
    # The Evergreen binary only expects a Windows path. The rest of Cygwin is flexible about it
    # being a Cygwin path or a Windows path so we do the conversion here.
    evg_credentials_pathname=$(cygpath -w ~Administrator/.evergreen.yml)
    evg_binary_pathname=~Administrator/cli_bin/evergreen
else
    ssh_user=$(whoami)
    evg_credentials_pathname=~/.evergreen.yml
    evg_binary_pathname=evergreen
fi

slack_user=$(awk '{if ($1 == "user:") print $2}' "$evg_credentials_pathname")
# Refer to the https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
# documentation for more information on the AWS instance metadata endpoints.
aws_metadata_svc="http://169.254.169.254"
aws_token=$(curl -s -X PUT "$aws_metadata_svc/latest/api/token" -H 'X-aws-ec2-metadata-token-ttl-seconds: 60')
ssh_host=$(curl -s -H "X-aws-ec2-metadata-token: $aws_token" "$aws_metadata_svc/latest/meta-data/public-hostname")
if [[ "${machine}" = "Cygwin" ]]; then
    slack_message=$(printf "The setup_spawnhost_coredump script has finished setting things up. \
Please use Windows Remote Desktop with\n\
1. PC name: $ssh_host\n\
2. User account: $ssh_user\n\
3. The RDP password configured under the edit dialog at https://spruce.mongodb.com/spawn/host\n\
to log in.")
else
    slack_message="The setup_spawnhost_coredump script has finished setting things up. Please run "'```'"ssh $ssh_user@$ssh_host"'```'" to log in."
fi

# The Evergreen spawn host is expected to be provisioned with the user's .evergreen.yml credentials.
# But in case something unexpected happens we don't want the setup_spawnhost_coredump script itself
# to error.
if [[ -n "${slack_user}" ]]; then
    "$evg_binary_pathname" --config "$evg_credentials_pathname" notify slack -t "@$slack_user" -m "$slack_message"
fi