From 12c0c32f127beab2acb4f04f003a7cdd518524f0 Mon Sep 17 00:00:00 2001 From: Steve McClure Date: Wed, 24 Sep 2025 15:56:13 -0400 Subject: [PATCH] SERVER-111373 parse test.xml more robustly with better diagnostics (#41734) GitOrigin-RevId: a3cb25fc5dedbc89e817cc4a431d9a6941465a56 --- buildscripts/BUILD.bazel | 14 +++++++++++++ buildscripts/create_bazel_test_report.py | 5 +++-- buildscripts/gather_failed_unittests.py | 5 +++-- buildscripts/parse_test_xml.py | 25 ++++++++++++++++++++++++ 4 files changed, 45 insertions(+), 4 deletions(-) create mode 100644 buildscripts/parse_test_xml.py diff --git a/buildscripts/BUILD.bazel b/buildscripts/BUILD.bazel index f510714dd73..53ca7953ba8 100644 --- a/buildscripts/BUILD.bazel +++ b/buildscripts/BUILD.bazel @@ -217,11 +217,25 @@ sh_binary( srcs = ["mount_drives.sh"], ) +py_library( + name = "parse_test_xml", + srcs = [ + "parse_test_xml.py", + ], + deps = [ + dependency( + "pyyaml", + group = "core", + ), + ], +) + py_binary( name = "gather_failed_unittests", srcs = ["gather_failed_unittests.py"], visibility = ["//visibility:public"], deps = [ + ":parse_test_xml", "//buildscripts/util", dependency( "typer", diff --git a/buildscripts/create_bazel_test_report.py b/buildscripts/create_bazel_test_report.py index 54b078de529..19299e7d2e9 100644 --- a/buildscripts/create_bazel_test_report.py +++ b/buildscripts/create_bazel_test_report.py @@ -1,6 +1,5 @@ import json import time -import xml.etree.ElementTree as ET from glob import glob from typing import List @@ -8,6 +7,8 @@ import typer from typing_extensions import TypedDict from util.expansions import get_expansion +from buildscripts.parse_test_xml import parse_test_xml + class Result(TypedDict): """A single test result""" @@ -36,7 +37,7 @@ def main(testlog_dir: str): report = Report({"results": []}) for test_xml in glob(f"{testlog_dir}/**/test.xml", recursive=True): - testsuite = ET.parse(test_xml).getroot().find("testsuite") + testsuite = parse_test_xml(test_xml).find("testsuite") testcase = testsuite.find("testcase") # Replace part of the name added by the remote test wrapper script diff --git a/buildscripts/gather_failed_unittests.py b/buildscripts/gather_failed_unittests.py index db25c3a4adb..120d93576a3 100644 --- a/buildscripts/gather_failed_unittests.py +++ b/buildscripts/gather_failed_unittests.py @@ -1,19 +1,20 @@ import os import shutil import subprocess -import xml.etree.ElementTree as ET from glob import glob from pathlib import Path from typing import List import typer +from buildscripts.parse_test_xml import parse_test_xml + def _collect_test_results(testlog_dir: str) -> List[str]: failed_tests = [] successful_tests = [] for test_xml in glob(f"{testlog_dir}/**/test.xml", recursive=True): - testsuite = ET.parse(test_xml).getroot().find("testsuite") + testsuite = parse_test_xml(test_xml).find("testsuite") testcase = testsuite.find("testcase") test_file = testcase.attrib["name"] diff --git a/buildscripts/parse_test_xml.py b/buildscripts/parse_test_xml.py new file mode 100644 index 00000000000..1243ee27491 --- /dev/null +++ b/buildscripts/parse_test_xml.py @@ -0,0 +1,25 @@ +import os +import xml.etree.ElementTree as ET + + +def parse_test_xml(xml_file: str) -> ET.ElementTree: + """Parse the test.xml file and return the ElementTree object. Throws with diagnostics if unparsable.""" + + # Check if file exists and has content + if not os.path.exists(xml_file): + raise Exception(f"Failed to parse {xml_file}: file does not exist") + + with open(xml_file, "r", encoding="utf-8") as f: + content = f.read().strip() + + if not content: + raise Exception(f"Failed to parse {xml_file}: file is empty") + + try: + root = ET.fromstring(content) + return ET.ElementTree(root) + + except (ET.ParseError, UnicodeDecodeError) as e: + print(f"Failed to parse {xml_file}: {e}") + print(content) + raise