mongo/buildscripts/jepsen_report.py

227 lines
7.2 KiB
Python

"""Generate Evergreen reports from the Jepsen list-append workload."""
import json
import re
import sys
import os
from datetime import datetime, timezone
from typing import List, Optional, Tuple
from typing_extensions import TypedDict
import click
from buildscripts.simple_report import Result, Report
class ParserOutput(TypedDict):
"""Result of parsing jepsen log file. Each List[str] is a list of test names."""
success: List[str]
unknown: List[str]
crashed: List[str]
failed: List[str]
start: int
end: int
elapsed: int
_JEPSEN_TIME_FORMAT = "%Y-%m-%d %H:%M:%S"
_JEPSEN_MILLI_RE = re.compile("([0-9]+){(.*)}")
_JEPSEN_TIME_RE = re.compile("[0-9]{4}-[0-8]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]+{.*}")
def _time_parse(time: str):
split = time.split(",")
date = datetime.strptime(split[0], _JEPSEN_TIME_FORMAT)
match = _JEPSEN_MILLI_RE.match(split[1])
microseconds = 0
if match:
microseconds = int(match[1]) * 1000
return date.replace(microsecond=microseconds, tzinfo=timezone.utc)
def _calc_time_from_log(log: str) -> Tuple[int, int, int]:
if not log:
return (0, 0, 0)
start_time = None
end_time = None
for line in log.splitlines():
if _JEPSEN_TIME_RE.match(line):
if start_time is None:
start_time = _time_parse(line)
else:
end_time = _time_parse(line)
if start_time is None or end_time is None:
return (0, 0, 0)
elapsed_time = int(end_time.timestamp() - start_time.timestamp())
return (int(start_time.timestamp()), int(end_time.timestamp()), elapsed_time)
SUCCESS_RE = re.compile("([0-9]+) successes")
CRASH_RE = re.compile("([0-9]+) crashed")
UNKNOWN_RE = re.compile("([0-9]+) unknown")
FAIL_RE = re.compile("([0-9]+) failures")
def parse(text: List[str]) -> ParserOutput:
"""Given a List of strings representing jepsen log file split by newlines, return the ParserOutput struct."""
successful_tests: List[str] = []
indeterminate_tests: List[str] = []
crashed_tests: List[str] = []
failed_tests: List[str] = []
target = None
table_matches = 0
for line in text:
if "# Successful tests" in line:
target = successful_tests
continue
elif "# Indeterminate tests" in line:
target = indeterminate_tests
continue
elif "# Crashed tests" in line:
target = crashed_tests
continue
elif "# Failed tests" in line:
target = failed_tests
continue
# at this point we're parsing this table:
# 29 successes
# 0 unknown
# 1 crashed
# 0 failures
s_match = SUCCESS_RE.match(line)
if s_match:
target = None
assert int(s_match[1]) == len(successful_tests)
table_matches += 1
u_match = UNKNOWN_RE.match(line)
if u_match:
target = None
assert int(u_match[1]) == len(indeterminate_tests)
table_matches += 1
c_match = CRASH_RE.match(line)
if c_match:
target = None
assert int(c_match[1]) == len(crashed_tests)
table_matches += 1
f_match = FAIL_RE.match(line)
if f_match:
target = None
assert int(f_match[1]) == len(failed_tests)
table_matches += 1
if target is not None and line.strip():
target.append(line)
assert table_matches == 4, f"Failed to parse summary table. Expected 4, found {table_matches}"
return ParserOutput({
'success': successful_tests,
'unknown': indeterminate_tests,
'crashed': crashed_tests,
'failed': failed_tests,
})
def _try_find_log_file(store: Optional[str], test_name) -> str:
if store is None:
return ""
try:
with open(os.path.join(store, test_name, "jepsen.log")) as fh:
return fh.read()
except Exception: # pylint: disable=broad-except
return ""
def report(out: ParserOutput, start_time: int, end_time: int, elapsed: int,
store: Optional[str]) -> Report:
"""Given ParserOutput, return report.json as a dict."""
results = []
failures = 0
for test_name in out['success']:
log_raw = _try_find_log_file(store, test_name)
start_time, end_time, elapsed_time = _calc_time_from_log(log_raw)
results.append(
Result(status='pass', exit_code=0, test_file=test_name, start=start_time, end=end_time,
elapsed=elapsed_time, log_raw=log_raw))
for test_name in out['failed']:
log_raw = _try_find_log_file(store, test_name)
start_time, end_time, elapsed_time = _calc_time_from_log(log_raw)
failures += 1
results.append(
Result(status='fail', exit_code=1, test_file=test_name, start=start_time, end=end_time,
elapsed=elapsed_time, log_raw=log_raw))
for test_name in out['crashed']:
log_raw = "Log files are unavailable for crashed tests because Jepsen does not save them separately. You may be able to find the exception and stack trace in the task log"
failures += 1
results.append(
Result(status='fail', exit_code=1, test_file=test_name, start=start_time, end=end_time,
elapsed=elapsed, log_raw=log_raw))
for test_name in out['unknown']:
log_raw = _try_find_log_file(store, test_name)
start_time, end_time, elapsed_time = _calc_time_from_log(log_raw)
failures += 1
results.append(
Result(status='fail', exit_code=1, test_file=test_name, start=start_time, end=end_time,
elapsed=elapsed_time, log_raw=log_raw))
return Report({
"failures": failures,
"results": results,
})
def _get_log_lines(filename: str) -> List[str]:
with open(filename) as fh:
return fh.read().splitlines()
def _put_report(report_: Report) -> None:
with open("report.json", "w") as fh:
json.dump(report_, fh)
@click.command()
@click.option("--start_time", type=int, required=True)
@click.option("--end_time", type=int, required=True)
@click.option("--elapsed", type=int, required=True)
@click.option("--emit_status_files", type=bool, is_flag=True, default=False,
help="If true, emit status files for marking Evergreen tasks as system fails")
@click.option("--store", type=str, default=None,
help="Path to folder containing jepsen 'store' directory")
@click.argument("filename", type=str)
def main(filename: str, start_time: str, end_time: str, elapsed: str, emit_status_files: bool,
store: Optional[str]):
"""Generate Evergreen reports from the Jepsen list-append workload."""
out = parse(_get_log_lines(filename))
_put_report(report(out, start_time, end_time, elapsed, store))
exit_code = 255
if out['crashed']:
exit_code = 2
if emit_status_files:
with open("jepsen_system_fail.txt", "w") as fh:
fh.write(str(exit_code))
else:
if out['unknown'] or out['failed']:
exit_code = 1
else:
exit_code = 0
sys.exit(exit_code)
if __name__ == "__main__":
main() # pylint: disable=no-value-for-parameter