diff --git a/buildscripts/BUILD.bazel b/buildscripts/BUILD.bazel index cdff302c115..557eb1fba45 100644 --- a/buildscripts/BUILD.bazel +++ b/buildscripts/BUILD.bazel @@ -232,11 +232,10 @@ py_library( ) py_binary( - name = "gather_failed_unittests", - srcs = ["gather_failed_unittests.py"], + name = "gather_failed_tests", + srcs = ["gather_failed_tests.py"], visibility = ["//visibility:public"], deps = [ - ":parse_test_xml", "//buildscripts/util", dependency( "typer", diff --git a/buildscripts/create_rbe_sysroot.py b/buildscripts/create_rbe_sysroot.py index ca68494b4e3..9ea281c6774 100644 --- a/buildscripts/create_rbe_sysroot.py +++ b/buildscripts/create_rbe_sysroot.py @@ -30,7 +30,7 @@ def main(): os.makedirs("./rbe_sysroot", exist_ok=True) - subprocess.run([container_cli, "cp", f"{cid}:/", "./rbe_sysroot/"], check=True) + subprocess.run(["sudo", container_cli, "cp", f"{cid}:/", "./rbe_sysroot/"], check=True) user = getpass.getuser() subprocess.run(["sudo", "chown", "-R", f"{user}:{user}", "./rbe_sysroot"], check=True) diff --git a/buildscripts/gather_failed_tests.py b/buildscripts/gather_failed_tests.py new file mode 100644 index 00000000000..79e27c9a913 --- /dev/null +++ b/buildscripts/gather_failed_tests.py @@ -0,0 +1,123 @@ +import json +import os +import shutil +import subprocess +from pathlib import Path +from typing import List + +import typer + + +def process_bep(bep_path): + failed_tests = [] + successful_tests = [] + with open(bep_path, "rt") as f: + # Each line in the BEP JSON file is a separate JSON object representing an event + for line in f: + event = json.loads(line) + if "testSummary" in event.get("id", {}): + target_label = event["id"]["testSummary"]["label"] + if "testSummary" not in event: + continue + overall_status = event["testSummary"]["overallStatus"] + if overall_status != "PASSED": + failed_tests += [target_label] + else: + successful_tests += [target_label] + return failed_tests, successful_tests + + +def _relink_binaries_with_symbols(failed_test_labels: List[str]): + print("Rebuilding tests with --remote_download_outputs=toplevel...") + bazel_build_flags = "" + if os.path.isfile(".bazel_build_flags"): + with open(".bazel_build_flags", "r", encoding="utf-8") as f: + bazel_build_flags = f.read().strip() + + bazel_build_flags += " --remote_download_outputs=toplevel" + + relink_command = [ + arg for arg in ["bazel", "build", *bazel_build_flags.split(" "), *failed_test_labels] if arg + ] + + print(f"Running command: {' '.join(relink_command)}") + subprocess.run( + relink_command, + check=True, + ) + + repro_test_command = " ".join(["test" if arg == "build" else arg for arg in relink_command]) + with open(".failed_unittest_repro.txt", "w", encoding="utf-8") as f: + f.write(repro_test_command) + print(f"Repro command written to .failed_unittest_repro.txt: {repro_test_command}") + + +def _copy_bins_to_upload(upload_bin_dir: str, upload_lib_dir: str): + libs = [] + bins = [] + dsyms = [] + bazel_bin_dir = Path("./bazel-bin/src") + for dirpath, _, filenames in os.walk(bazel_bin_dir): + if dirpath.endswith(".dSYM"): + dsyms.append(Path(dirpath)) + for f in filenames: + file = Path(f) + if file.stem.endswith(("_with_debug", "_ci_wrapper")): + continue + if file.suffix in [".so", ".so.debug", ".dylib"]: + libs.append(Path(os.path.join(dirpath, file))) + elif file.suffix in [".debug", ".dwp", ".pdb", ".exe", ""]: + bins.append(Path(os.path.join(dirpath, file))) + + for binary_file in bins: + new_binary_file = upload_bin_dir / binary_file.name + if not os.path.exists(new_binary_file): + try: + shutil.copy(binary_file, new_binary_file) + except FileNotFoundError: + continue # It is likely a broken symlink. + + for lib_file in libs: + new_lib_file = upload_lib_dir / lib_file.name + if not os.path.exists(new_lib_file): + try: + shutil.copy(lib_file, new_lib_file) + except FileNotFoundError: + continue # It is likely a broken symlink. + + for dsym_dir in dsyms: + print(f"Copying dsym {dsym_dir} to {upload_bin_dir}") + try: + shutil.copytree(dsym_dir, upload_bin_dir / dsym_dir.name, dirs_exist_ok=True) + except FileNotFoundError: + continue # It is likely a broken symlink. + + +def main(build_events: str = "build_events.json"): + """Gather binaries and debug symbols of failed tests based off of a Build Event Protocol (BEP) json file.""" + + os.chdir(os.environ.get("BUILD_WORKSPACE_DIRECTORY", ".")) + + upload_bin_dir = Path("dist-tests/bin") + upload_lib_dir = Path("dist-tests/lib") + upload_bin_dir.mkdir(parents=True, exist_ok=True) + upload_lib_dir.mkdir(parents=True, exist_ok=True) + + failed_tests, successful_tests = process_bep(build_events) + if len(failed_tests) == 0 and len(successful_tests) == 0: + print("Test results not found, aborting. Please check above for any build errors.") + exit(1) + + if not failed_tests: + print("No failed tests found.") + exit(0) + + print(f"Found {len(failed_tests)} failed tests. Gathering binaries and debug symbols.") + _relink_binaries_with_symbols(failed_tests) + + print("Copying binaries and debug symbols to upload directories.") + _copy_bins_to_upload(upload_bin_dir, upload_lib_dir) + + +if __name__ == "__main__": + typer.run(main) diff --git a/buildscripts/gather_failed_unittests.py b/buildscripts/gather_failed_unittests.py deleted file mode 100644 index 74254da05a0..00000000000 --- a/buildscripts/gather_failed_unittests.py +++ /dev/null @@ -1,148 +0,0 @@ -import os -import shutil -import subprocess -from glob import glob -from pathlib import Path -from typing import List - -import typer - -from buildscripts.parse_test_xml import parse_test_xml - - -def _collect_test_results(testlog_dir: str) -> List[str]: - failed_tests = [] - successful_tests = [] - for test_xml in glob(f"{testlog_dir}/**/test.xml", recursive=True): - testsuite = parse_test_xml(test_xml).find("testsuite") - testcase = testsuite.find("testcase") - test_file = testcase.attrib["name"] - - if testcase.find("error") is not None: - failed_tests += [test_file] - else: - successful_tests += [test_file] - return failed_tests, successful_tests - - -def _relink_binaries_with_symbols(failed_tests: List[str]): - print("Rebuilding unit tests with --remote_download_outputs=toplevel...") - bazel_build_flags = "" - if os.path.isfile(".bazel_build_flags"): - with open(".bazel_build_flags", "r", encoding="utf-8") as f: - bazel_build_flags = f.read().strip() - - bazel_build_flags += " --remote_download_outputs=toplevel" - - # Remap //src/mongo/testabc to //src/mongo:testabc - failed_test_labels = [":".join(test.rsplit("/", 1)) for test in failed_tests] - - relink_command = [ - arg for arg in ["bazel", "build", *bazel_build_flags.split(" "), *failed_test_labels] if arg - ] - - print(f"Running command: {' '.join(relink_command)}") - subprocess.run( - relink_command, - check=True, - ) - - repro_test_command = " ".join(["test" if arg == "build" else arg for arg in relink_command]) - with open(".failed_unittest_repro.txt", "w", encoding="utf-8") as f: - f.write(repro_test_command) - print(f"Repro command written to .failed_unittest_repro.txt: {repro_test_command}") - - -def _copy_bins_to_upload(failed_tests: List[str], upload_bin_dir: str, upload_lib_dir: str) -> bool: - success = True - bazel_bin_dir = Path("./bazel-bin/src") - # Search both in the top level remote exec shellscript wrapper output directory, and in the - # binary output directory. - failed_tests += [ - failed_test.replace("_ci_wrapper", "") - for failed_test in failed_tests - if "_ci_wrapper" in failed_test - ] - for failed_test in failed_tests: - full_binary_path = bazel_bin_dir / failed_test - binary_name = failed_test.split(os.sep)[-1] - bin_to_upload = [] - for pattern in [ - "*.core", - "*.mdmp", - f"{binary_name}.debug", - f"{binary_name}.pdb", - f"{binary_name}.exe", - f"{binary_name}", - ]: - bin_to_upload.extend(bazel_bin_dir.rglob(pattern)) - - # core dumps may be in the root directory - bin_to_upload.extend(Path(".").rglob("*.core")) - bin_to_upload.extend(Path(".").rglob("*.mdmp")) - - if not bin_to_upload: - print(f"Cannot locate the files to upload for ({failed_test})") - success = False - continue - - for binary_file in bin_to_upload: - new_binary_file = upload_bin_dir / binary_file.name - if not os.path.exists(new_binary_file): - print(f"Copying {binary_file} to {new_binary_file}") - shutil.copy(binary_file, new_binary_file) - - dsym_dir = full_binary_path.with_suffix(".dSYM") - if dsym_dir.is_dir(): - print(f"Copying dsym {dsym_dir} to {upload_bin_dir}") - shutil.copytree(dsym_dir, upload_bin_dir / dsym_dir.name, dirs_exist_ok=True) - - # Copy debug symbols for dynamic builds - lib_to_upload = [] - for pattern in [ - "*.so", - "*.so.debug", - "*.dylib", - ]: - lib_to_upload.extend(bazel_bin_dir.rglob(pattern)) - - for lib_file in lib_to_upload: - new_lib_file = upload_lib_dir / lib_file.name - if not os.path.exists(new_lib_file): - print(f"Copying {lib_file} to {new_lib_file}") - shutil.copy(lib_file, new_lib_file) - print("All binaries and debug symbols copied successfully.") - return success - - -def main(testlog_dir: str = "bazel-testlogs"): - """Gather unit test binaries and debug symbols of failed unit tests based off of bazel test logs.""" - - os.chdir(os.environ.get("BUILD_WORKSPACE_DIRECTORY", ".")) - - upload_bin_dir = Path("dist-unittests/bin") - upload_lib_dir = Path("dist-unittests/lib") - upload_bin_dir.mkdir(parents=True, exist_ok=True) - upload_lib_dir.mkdir(parents=True, exist_ok=True) - - failed_tests, successful_tests = _collect_test_results(testlog_dir) - if len(failed_tests) == 0 and len(successful_tests) == 0: - print("Test results not found, aborting. Please check above for any build errors.") - exit(1) - - if not failed_tests: - print("No failed tests found.") - exit(0) - - print(f"Found {len(failed_tests)} failed tests. Gathering binaries and debug symbols.") - _relink_binaries_with_symbols(failed_tests) - - print("Copying binaries and debug symbols to upload directories.") - if not _copy_bins_to_upload(failed_tests, upload_bin_dir, upload_lib_dir): - print("Fatal error occurred during processing.") - # TODO: add slack notification - exit(1) - - -if __name__ == "__main__": - typer.run(main) diff --git a/buildscripts/setup_spawnhost_coredump b/buildscripts/setup_spawnhost_coredump index d232af63760..004d56dca85 100755 --- a/buildscripts/setup_spawnhost_coredump +++ b/buildscripts/setup_spawnhost_coredump @@ -211,6 +211,7 @@ fi' >> .bash_profile mkdir -p /data/debug ln -s /data/debug . cd debug + mkdir -p bin lib # As the name suggests, pretty printers. Primarily for boost::optional git clone https://github.com/mongodb-forks/Boost-Pretty-Printer.git --branch mongodb-stable & @@ -241,13 +242,13 @@ fi' >> .bash_profile else archive_fail "debug" fi - - UNITTEST_ARCHIVE=$(ls /data/mci/artifacts-*unit_test*/mongo-unittests-*.tgz 2>/dev/null) - if [[ -n $UNITTEST_ARCHIVE ]]; then - tar --wildcards --strip-components=0 -xzf $UNITTEST_ARCHIVE 'bin/*' & - tar --wildcards -xzf $UNITTEST_ARCHIVE 'lib/*' & + + TEST_ARCHIVE=$(ls /data/mci/artifacts-*/mongo-*tests-*.tgz 2>/dev/null) + if [[ -n $TEST_ARCHIVE ]]; then + tar --wildcards --strip-components=0 -xzf $TEST_ARCHIVE 'bin/*' & + tar --wildcards -xzf $TEST_ARCHIVE 'lib/*' & else - archive_fail "unit_test" + archive_fail "tests" fi BENCHMARK_ARCHIVE=$(ls /data/mci/artifacts-*compile_upload_benchmarks/benchmarks-*.tgz 2>/dev/null) diff --git a/etc/evergreen_yml_components/configuration.yml b/etc/evergreen_yml_components/configuration.yml index 4c891d97b0e..75d6a8b98ce 100644 --- a/etc/evergreen_yml_components/configuration.yml +++ b/etc/evergreen_yml_components/configuration.yml @@ -137,7 +137,7 @@ post: - func: "save mongo coredumps" - func: "generate hang analyzer tasks" - func: "attach bazel invocation text" - - func: "save failed unittests" + - func: "save failed tests" - func: "save hang analyzer debugger files" - func: "save disk statistics" - func: "save system resource information" @@ -150,9 +150,9 @@ post: src/gcov-intermediate-files.tgz src/*.core src/*.mdmp src/*.core.gz src/*.mdmp.gz mongo-coredumps.json - src/dist-unittests/bin/* - src/dist-unittests/lib/* - mongo-unittests.tgz + src/dist-tests/bin/* + src/dist-tests/lib/* + mongo-tests.tgz src/debugger*.* src/mongo-hanganalyzer.tgz diskstats.tgz diff --git a/etc/evergreen_yml_components/definitions.yml b/etc/evergreen_yml_components/definitions.yml index 6bd83735738..6af5b78245f 100644 --- a/etc/evergreen_yml_components/definitions.yml +++ b/etc/evergreen_yml_components/definitions.yml @@ -2964,26 +2964,26 @@ functions: args: - "./src/evergreen/failed_unittests_gather.sh" - "tar failed unittests": &tar_failed_unittests + "tar failed tests": &tar_failed_tests command: archive.targz_pack params: - target: "mongo-unittests.tgz" - source_dir: "src/dist-unittests" + target: "mongo-tests.tgz" + source_dir: "src/dist-tests" include: - "**" - "archive failed unittests": &archive_failed_unittests + "archive failed tests": &archive_failed_tests command: s3.put params: aws_key: ${aws_key} aws_secret: ${aws_secret} - local_file: mongo-unittests.tgz - remote_file: ${project}/${build_variant}/${revision}/unittests/mongo-unittests-${build_id}-${task_name}-${execution}.tgz + local_file: mongo-tests.tgz + remote_file: ${project}/${build_variant}/${revision}/tests/mongo-tests-${build_id}-${task_name}-${execution}.tgz bucket: mciuploads permissions: ${mciuploads_binary_permissions|public-read} visibility: ${mciuploads_binary_visibility|public} content_type: application/gzip - display_name: Unit tests - Execution ${execution} + display_name: Test binaries and libraries - Execution ${execution} optional: true "upload failed unittest repro": &upload_failed_unittest_repro @@ -3030,11 +3030,11 @@ functions: - *upload_bazel_build_invocation - *upload_engflow_link - "save failed unittests": + "save failed tests": - *f_expansions_write - *gather_failed_unittests_resmoke - - *tar_failed_unittests - - *archive_failed_unittests + - *tar_failed_tests + - *archive_failed_tests - *upload_failed_unittest_repro ### Process & archive artifacts from hung processes ### diff --git a/etc/evergreen_yml_components/tasks/compile_tasks.yml b/etc/evergreen_yml_components/tasks/compile_tasks.yml index f1b5e3630aa..45db1e7581c 100644 --- a/etc/evergreen_yml_components/tasks/compile_tasks.yml +++ b/etc/evergreen_yml_components/tasks/compile_tasks.yml @@ -52,7 +52,7 @@ variables: - func: "save mongo coredumps" - func: "generate hang analyzer tasks" - func: "attach bazel invocation text" - - func: "save failed unittests" + - func: "save failed tests" - func: "save bazel headers" - func: "save bazel jvm dump" - func: "save bazel exec logs" @@ -68,9 +68,9 @@ variables: src/gcov-intermediate-files.tgz src/*.core src/*.mdmp src/*.core.gz src/*.mdmp.gz mongo-coredumps.json - src/dist-unittests/bin/* - src/dist-unittests/lib/* - mongo-unittests.tgz + src/dist-tests/bin/* + src/dist-tests/lib/* + mongo-tests.tgz src/debugger*.* src/mongo-hanganalyzer.tgz diskstats.tgz diff --git a/etc/evergreen_yml_components/tasks/compile_tasks_shared.yml b/etc/evergreen_yml_components/tasks/compile_tasks_shared.yml index b22945d9bcc..5a4aba8a725 100644 --- a/etc/evergreen_yml_components/tasks/compile_tasks_shared.yml +++ b/etc/evergreen_yml_components/tasks/compile_tasks_shared.yml @@ -26,7 +26,7 @@ variables: - func: "save mongo coredumps" - func: "generate hang analyzer tasks" - func: "attach bazel invocation text" - - func: "save failed unittests" + - func: "save failed tests" - func: "save bazel headers" - func: "save bazel jvm dump" - func: "save bazel exec logs" @@ -42,9 +42,9 @@ variables: src/gcov-intermediate-files.tgz src/*.core src/*.mdmp src/*.core.gz src/*.mdmp.gz mongo-coredumps.json - src/dist-unittests/bin/* - src/dist-unittests/lib/* - mongo-unittests.tgz + src/dist-tests/bin/* + src/dist-tests/lib/* + mongo-tests.tgz src/debugger*.* src/mongo-hanganalyzer.tgz diskstats.tgz diff --git a/etc/evergreen_yml_components/tasks/resmoke/non_server_teams/tasks.yml b/etc/evergreen_yml_components/tasks/resmoke/non_server_teams/tasks.yml index 44dfda162c6..5b2fbb96ce0 100644 --- a/etc/evergreen_yml_components/tasks/resmoke/non_server_teams/tasks.yml +++ b/etc/evergreen_yml_components/tasks/resmoke/non_server_teams/tasks.yml @@ -514,12 +514,12 @@ tasks: task_compile_flags: >- --verbose_failures --simple_build_id=True - --features=strip_debug - --separate_debug=False --define=MONGO_VERSION=${version} --config=evg --config=opt_profiled --jobs=1600 + --features=strip_debug + --separate_debug=False task_groups: - name: resmoke_tests_TG @@ -544,6 +544,7 @@ task_groups: - func: "attach report" - func: "upload mongodatafiles" - func: "attach multiversion download links" + - func: "save failed tests" - func: "kill processes" - func: "save mongo coredumps" - func: "generate hang analyzer tasks" diff --git a/etc/evergreen_yml_components/tasks/resmoke/server_divisions/clusters_and_integrations/tasks.yml b/etc/evergreen_yml_components/tasks/resmoke/server_divisions/clusters_and_integrations/tasks.yml index d20a562d220..3ee8e73b338 100644 --- a/etc/evergreen_yml_components/tasks/resmoke/server_divisions/clusters_and_integrations/tasks.yml +++ b/etc/evergreen_yml_components/tasks/resmoke/server_divisions/clusters_and_integrations/tasks.yml @@ -145,7 +145,7 @@ variables: - func: "save mongo coredumps" - func: "generate hang analyzer tasks" - func: "attach bazel invocation text" - - func: "save failed unittests" + - func: "save failed tests" - func: "save hang analyzer debugger files" - func: "save disk statistics" - func: "save system resource information" @@ -158,9 +158,9 @@ variables: src/gcov-intermediate-files.tgz src/*.core src/*.mdmp src/*.core.gz src/*.mdmp.gz mongo-coredumps.json - src/dist-unittests/bin/* - src/dist-unittests/lib/* - mongo-unittests.tgz + src/dist-tests/bin/* + src/dist-tests/lib/* + mongo-tests.tgz src/debugger*.* src/mongo-hanganalyzer.tgz diskstats.tgz diff --git a/evergreen/bazel_test.sh b/evergreen/bazel_test.sh index 96ee2efd6b4..cfeb692faa3 100644 --- a/evergreen/bazel_test.sh +++ b/evergreen/bazel_test.sh @@ -50,7 +50,7 @@ RET=$? if [[ "$RET" == "0" ]]; then export RETRY_ON_FAIL=0 bazel_evergreen_shutils::retry_bazel_cmd 3 "$BAZEL_BINARY" \ - test ${ALL_FLAGS} ${targets} + test ${ALL_FLAGS} --build_event_json_file=build_events.json ${targets} RET=$? if [[ "$RET" -eq 124 ]]; then @@ -68,7 +68,7 @@ set -o errexit # Strip out anything that isn't a --config flag that could interfere with the run command. if [[ "$RET" != "0" ]]; then CONFIG_FLAGS="$(bazel_evergreen_shutils::extract_config_flags "${ALL_FLAGS}")" - eval ${BAZEL_BINARY} run ${CONFIG_FLAGS} //buildscripts:gather_failed_unittests || true + eval ${BAZEL_BINARY} run ${CONFIG_FLAGS} //buildscripts:gather_failed_tests || true fi : "${RET:=1}" diff --git a/evergreen/failed_unittests_gather.sh b/evergreen/failed_unittests_gather.sh index 2551d5ca0fb..f836e9cd9e3 100644 --- a/evergreen/failed_unittests_gather.sh +++ b/evergreen/failed_unittests_gather.sh @@ -13,7 +13,7 @@ if [ "${task_name}" != "run_dbtest" ] && exit 0 fi -unittest_bin_dir=dist-unittests/bin +unittest_bin_dir=dist-tests/bin mkdir -p $unittest_bin_dir || true # Find all core files @@ -89,5 +89,5 @@ done <<<"${core_files}" # Copy debug symbols for dynamic builds lib_dir=bazel-bin/install/lib if [ -d "$lib_dir" ] && [ -n "$core_files" ]; then - cp -r "$lib_dir" dist-unittests + cp -r "$lib_dir" dist-tests fi diff --git a/evergreen/resmoke_tests_execute_bazel.sh b/evergreen/resmoke_tests_execute_bazel.sh index ba65847a15b..29418197392 100644 --- a/evergreen/resmoke_tests_execute_bazel.sh +++ b/evergreen/resmoke_tests_execute_bazel.sh @@ -79,7 +79,7 @@ RET=$? if [[ "$RET" == "0" ]]; then export RETRY_ON_FAIL=0 bazel_evergreen_shutils::retry_bazel_cmd 3 "$BAZEL_BINARY" \ - test ${ci_flags} ${bazel_args} ${bazel_compile_flags} ${task_compile_flags} ${patch_compile_flags} ${targets} + test ${ci_flags} ${bazel_args} ${bazel_compile_flags} ${task_compile_flags} ${patch_compile_flags} --build_event_json_file=build_events.json ${targets} RET=$? if [[ "$RET" -eq 124 ]]; then @@ -167,7 +167,19 @@ else echo "version: 0" >>${workdir}/build/TestLogs/log_spec.yaml # Combine reports from potentially multiple tests/shards. - find bazel-testlogs/ -name report*.json | xargs $python buildscripts/combine_reports.py --no-report-exit --add-bazel-target-info -o report.json + find bazel-testlogs/ -name report*.json | xargs $python buildscripts/combine_reports.py --no-report-exit --add-bazel-target-info -o report.json || true + + if [[ "$RET" != "0" ]]; then + # This is a hacky way to save build time for the initial build during the `bazel test` above. They + # are stripped binaries there. We should rebuild them with debug symbols and separate debug. + # The relinked binaries should still be hash identical when stripped with strip. + sed -i -e 's/--config=remote_test//g' -e 's/--separate_debug=False/--separate_debug=True/g' -e 's/--features=strip_debug//g' .bazel_build_flags + + # The --config flag needs to stay consistent for the `bazel run` to avoid evicting the previous results. + # Strip out anything that isn't a --config flag that could interfere with the run command. + CONFIG_FLAGS="$(bazel_evergreen_shutils::extract_config_flags "${ALL_FLAGS}")" + eval ${BAZEL_BINARY} run ${CONFIG_FLAGS} //buildscripts:gather_failed_tests || true + fi fi exit $RET