SERVER-63827: Determine task timeout at the start of task execution

This commit is contained in:
David Bradford 2022-03-02 15:37:40 -05:00 committed by Evergreen Agent
parent 1e72f6d25e
commit 0a079df588
17 changed files with 1143 additions and 113 deletions

View File

@ -3,14 +3,35 @@
import argparse
import math
import os
import sys
from datetime import timedelta
from typing import Optional
from datetime import datetime, timedelta
from pathlib import Path
from typing import Dict, List, Optional
import inject
import structlog
import yaml
from pydantic import BaseModel
from evergreen import EvergreenApi, RetryingEvergreenApi
from buildscripts.ciconfig.evergreen import (EvergreenProjectConfig, parse_evergreen_file)
from buildscripts.timeouts.timeout_service import (TimeoutParams, TimeoutService, TimeoutSettings)
from buildscripts.util.cmdutils import enable_logging
from buildscripts.util.taskname import determine_task_base_name
LOGGER = structlog.get_logger(__name__)
DEFAULT_TIMEOUT_OVERRIDES = "etc/evergreen_timeouts.yml"
DEFAULT_EVERGREEN_CONFIG = "etc/evergreen.yml"
DEFAULT_EVERGREEN_AUTH_CONFIG = "~/.evergreen.yml"
COMMIT_QUEUE_ALIAS = "__commit_queue"
UNITTEST_TASK = "run_unittests"
IGNORED_SUITES = {
"integration_tests_replset", "integration_tests_replset_ssl_auth", "integration_tests_sharded",
"integration_tests_standalone", "integration_tests_standalone_audit", "mongos_test",
"server_selection_json_test"
}
HISTORY_LOOKBACK = timedelta(weeks=2)
COMMIT_QUEUE_TIMEOUT = timedelta(minutes=40)
DEFAULT_REQUIRED_BUILD_TIMEOUT = timedelta(hours=1, minutes=20)
@ -19,45 +40,92 @@ DEFAULT_NON_REQUIRED_BUILD_TIMEOUT = timedelta(hours=2)
# which is 5 mins 47 secs, excluding outliers below
UNITTESTS_TIMEOUT = timedelta(minutes=12)
SPECIFIC_TASK_OVERRIDES = {
"linux-64-debug": {"auth": timedelta(minutes=60)},
"enterprise-windows-all-feature-flags-suggested": {
"replica_sets_jscore_passthrough": timedelta(hours=3),
"replica_sets_update_v1_oplog_jscore_passthrough": timedelta(hours=2, minutes=30),
},
"enterprise-windows-required": {
"replica_sets_jscore_passthrough": timedelta(hours=3),
"replica_sets_update_v1_oplog_jscore_passthrough": timedelta(hours=2, minutes=30),
},
"enterprise-windows-inmem": {"replica_sets_jscore_passthrough": timedelta(hours=3), },
"enterprise-windows": {"replica_sets_jscore_passthrough": timedelta(hours=3), },
"windows-debug-suggested": {
"replica_sets_initsync_jscore_passthrough": timedelta(hours=2, minutes=30),
"replica_sets_jscore_passthrough": timedelta(hours=2, minutes=30),
"replica_sets_update_v1_oplog_jscore_passthrough": timedelta(hours=2, minutes=30),
},
"windows": {
"replica_sets": timedelta(hours=3),
"replica_sets_jscore_passthrough": timedelta(hours=2, minutes=30),
},
"ubuntu1804-debug-suggested": {"replica_sets_jscore_passthrough": timedelta(hours=3), },
"enterprise-rhel-80-64-bit-coverage": {
"replica_sets_jscore_passthrough": timedelta(hours=2, minutes=30),
},
"macos": {"replica_sets_jscore_passthrough": timedelta(hours=2, minutes=30), },
"enterprise-macos": {"replica_sets_jscore_passthrough": timedelta(hours=2, minutes=30), },
# unittests outliers
# repeated execution runs a suite 10 times
"linux-64-repeated-execution": {UNITTEST_TASK: 10 * UNITTESTS_TIMEOUT},
# some of the a/ub/t san variants need a little extra time
"enterprise-ubuntu2004-debug-tsan": {UNITTEST_TASK: 2 * UNITTESTS_TIMEOUT},
"ubuntu1804-asan": {UNITTEST_TASK: 2 * UNITTESTS_TIMEOUT},
"ubuntu1804-ubsan": {UNITTEST_TASK: 2 * UNITTESTS_TIMEOUT},
"ubuntu1804-debug-asan": {UNITTEST_TASK: 2 * UNITTESTS_TIMEOUT},
"ubuntu1804-debug-aubsan-lite": {UNITTEST_TASK: 2 * UNITTESTS_TIMEOUT},
"ubuntu1804-debug-ubsan": {UNITTEST_TASK: 2 * UNITTESTS_TIMEOUT},
}
class TimeoutOverride(BaseModel):
"""
Specification for overriding a task timeout.
* task: Name of task to overide.
* exec_timeout: Value to override exec timeout with.
* idle_timeout: Value to override idle timeout with.
"""
task: str
exec_timeout: Optional[int] = None
idle_timeout: Optional[int] = None
def get_exec_timeout(self) -> Optional[timedelta]:
"""Get a timedelta of the exec timeout to use."""
if self.exec_timeout is not None:
return timedelta(minutes=self.exec_timeout)
return None
def get_idle_timeout(self) -> Optional[timedelta]:
"""Get a timedelta of the idle timeout to use."""
if self.idle_timeout is not None:
return timedelta(minutes=self.idle_timeout)
return None
class TimeoutOverrides(BaseModel):
"""Collection of timeout overrides to apply."""
overrides: Dict[str, List[TimeoutOverride]]
@classmethod
def from_yaml_file(cls, file_path: Path) -> "TimeoutOverrides":
"""Read the timeout overrides from the given file."""
with open(file_path) as file_handler:
return cls(**yaml.safe_load(file_handler))
def _lookup_override(self, build_variant: str, task_name: str) -> Optional[TimeoutOverride]:
"""
Check if the given task on the given build variant has an override defined.
Note: If multiple overrides are found, an exception will be raised.
:param build_variant: Build Variant to check.
:param task_name: Task name to check.
:return: Timeout override if found.
"""
overrides = [
override for override in self.overrides.get(build_variant, [])
if override.task == task_name
]
if overrides:
if len(overrides) > 1:
LOGGER.error("Found multiple overrides for the same task",
build_variant=build_variant, task=task_name,
overrides=[override.dict() for override in overrides])
raise ValueError(f"Found multiple overrides for '{task_name}' on '{build_variant}'")
return overrides[0]
return None
def lookup_exec_override(self, build_variant: str, task_name: str) -> Optional[timedelta]:
"""
Look up the exec timeout override of the given build variant/task.
:param build_variant: Build Variant to check.
:param task_name: Task name to check.
:return: Exec timeout override if found.
"""
override = self._lookup_override(build_variant, task_name)
if override is not None:
return override.get_exec_timeout()
return None
def lookup_idle_override(self, build_variant: str, task_name: str) -> Optional[timedelta]:
"""
Look up the idle timeout override of the given build variant/task.
:param build_variant: Build Variant to check.
:param task_name: Task name to check.
:return: Idle timeout override if found.
"""
override = self._lookup_override(build_variant, task_name)
if override is not None:
return override.get_idle_timeout()
return None
def _is_required_build_variant(build_variant: str) -> bool:
@ -70,19 +138,47 @@ def _is_required_build_variant(build_variant: str) -> bool:
return build_variant.endswith("-required")
def _has_override(variant: str, task_name: str) -> bool:
def output_timeout(exec_timeout: timedelta, idle_timeout: Optional[timedelta],
output_file: Optional[str]) -> None:
"""
Determine if the given task has a timeout override.
Output timeout configuration to the specified location.
:param variant: Build Variant task is running on.
:param task_name: Task to check.
:return: True if override exists for task.
:param exec_timeout: Exec timeout to output.
:param idle_timeout: Idle timeout to output.
:param output_file: Location of output file to write.
"""
return variant in SPECIFIC_TASK_OVERRIDES and task_name in SPECIFIC_TASK_OVERRIDES[variant]
output = {
"exec_timeout_secs": math.ceil(exec_timeout.total_seconds()),
}
if idle_timeout is not None:
output["timeout_secs"] = math.ceil(idle_timeout.total_seconds())
if output_file:
with open(output_file, "w") as outfile:
yaml.dump(output, stream=outfile, default_flow_style=False)
yaml.dump(output, stream=sys.stdout, default_flow_style=False)
def determine_timeout(task_name: str, variant: str, idle_timeout: Optional[timedelta] = None,
exec_timeout: Optional[timedelta] = None, evg_alias: str = '') -> timedelta:
class TaskTimeoutOrchestrator:
"""An orchestrator for determining task timeouts."""
@inject.autoparams()
def __init__(self, timeout_service: TimeoutService, timeout_overrides: TimeoutOverrides,
evg_project_config: EvergreenProjectConfig) -> None:
"""
Initialize the orchestrator.
:param timeout_service: Service for calculating historic timeouts.
:param timeout_overrides: Timeout overrides for specific tasks.
"""
self.timeout_service = timeout_service
self.timeout_overrides = timeout_overrides
self.evg_project_config = evg_project_config
def determine_exec_timeout(
self, task_name: str, variant: str, idle_timeout: Optional[timedelta] = None,
exec_timeout: Optional[timedelta] = None, evg_alias: str = "") -> timedelta:
"""
Determine what exec timeout should be used.
@ -95,44 +191,128 @@ def determine_timeout(task_name: str, variant: str, idle_timeout: Optional[timed
"""
determined_timeout = DEFAULT_NON_REQUIRED_BUILD_TIMEOUT
override = self.timeout_overrides.lookup_exec_override(variant, task_name)
if exec_timeout and exec_timeout.total_seconds() != 0:
LOGGER.info("Using timeout from cmd line",
exec_timeout_secs=exec_timeout.total_seconds())
determined_timeout = exec_timeout
elif task_name == UNITTEST_TASK and not _has_override(variant, task_name):
elif task_name == UNITTEST_TASK and override is None:
LOGGER.info("Overriding unittest timeout",
exec_timeout_secs=UNITTESTS_TIMEOUT.total_seconds())
determined_timeout = UNITTESTS_TIMEOUT
elif evg_alias == COMMIT_QUEUE_ALIAS:
LOGGER.info("Overriding commit-queue timeout",
exec_timeout_secs=COMMIT_QUEUE_TIMEOUT.total_seconds())
determined_timeout = COMMIT_QUEUE_TIMEOUT
elif _has_override(variant, task_name):
determined_timeout = SPECIFIC_TASK_OVERRIDES[variant][task_name]
elif override is not None:
LOGGER.info("Overriding configured timeout", exec_timeout_secs=override.total_seconds())
determined_timeout = override
elif _is_required_build_variant(variant):
LOGGER.info("Overriding required-builder timeout",
exec_timeout_secs=DEFAULT_REQUIRED_BUILD_TIMEOUT.total_seconds())
determined_timeout = DEFAULT_REQUIRED_BUILD_TIMEOUT
# The timeout needs to be at least as large as the idle timeout.
if idle_timeout and determined_timeout.total_seconds() < idle_timeout.total_seconds():
LOGGER.info("Making exec timeout as large as idle timeout",
exec_timeout_secs=idle_timeout.total_seconds())
return idle_timeout
return determined_timeout
def output_timeout(task_timeout: timedelta, output_file: Optional[str]) -> None:
def determine_idle_timeout(self, task_name: str, variant: str,
idle_timeout: Optional[timedelta] = None) -> Optional[timedelta]:
"""
Output timeout configuration to the specified location.
Determine what idle timeout should be used.
:param task_timeout: Timeout to output.
:param output_file: Location of output file to write.
:param task_name: Name of task being run.
:param variant: Name of build variant being run.
:param idle_timeout: Override to use for idle_timeout.
:return: Idle timeout to use for running task.
"""
output = {
"exec_timeout_secs": math.ceil(task_timeout.total_seconds()),
}
determined_timeout = None
override = self.timeout_overrides.lookup_idle_override(variant, task_name)
if output_file:
with open(output_file, "w") as outfile:
yaml.dump(output, stream=outfile, default_flow_style=False)
if idle_timeout and idle_timeout.total_seconds() != 0:
LOGGER.info("Using timeout from cmd line",
idle_timeout_secs=idle_timeout.total_seconds())
determined_timeout = idle_timeout
yaml.dump(output, stream=sys.stdout, default_flow_style=False)
elif override is not None:
LOGGER.info("Overriding configured timeout", idle_timeout_secs=override.total_seconds())
determined_timeout = override
return determined_timeout
def determine_historic_timeout(self, task: str, variant: str, suite_name: str,
exec_timeout_factor: Optional[float]) -> Optional[timedelta]:
"""
Calculate the timeout based on historic test results.
:param task: Name of task to query.
:param variant: Name of build variant to query.
:param suite_name: Name of test suite being run.
:param exec_timeout_factor: Scaling factor to use when determining timeout.
"""
if suite_name in IGNORED_SUITES:
return None
timeout_params = TimeoutParams(
evg_project="mongodb-mongo-master",
build_variant=variant,
task_name=task,
suite_name=suite_name,
is_asan=self.is_build_variant_asan(variant),
)
timeout_estimate = self.timeout_service.get_timeout_estimate(timeout_params)
if timeout_estimate and timeout_estimate.is_specified():
exec_timeout = timeout_estimate.calculate_task_timeout(
repeat_factor=1, scaling_factor=exec_timeout_factor)
if exec_timeout is not None:
LOGGER.info("Using historic based timeout", exec_timeout_secs=exec_timeout)
return timedelta(seconds=exec_timeout)
return None
def is_build_variant_asan(self, build_variant: str) -> bool:
"""
Determine if the given build variant is an ASAN build variant.
:param build_variant: Name of build variant to check.
:return: True if build variant is an ASAN build variant.
"""
bv = self.evg_project_config.get_variant(build_variant)
return bv.is_asan_build()
def determine_timeouts(self, cli_idle_timeout: Optional[timedelta],
cli_exec_timeout: Optional[timedelta], outfile: Optional[str], task: str,
variant: str, evg_alias: str, suite_name: str,
exec_timeout_factor: Optional[float]) -> None:
"""
Determine the timeouts to use for the given task and write timeouts to expansion file.
:param cli_idle_timeout: Idle timeout specified by the CLI.
:param cli_exec_timeout: Exec timeout specified by the CLI.
:param outfile: File to write timeout expansions to.
:param variant: Build variant task is being run on.
:param evg_alias: Evergreen alias that triggered task.
:param suite_name: Name of evergreen suite being run.
:param exec_timeout_factor: Scaling factor to use when determining timeout.
"""
idle_timeout = self.determine_idle_timeout(task, variant, cli_idle_timeout)
exec_timeout = self.determine_exec_timeout(task, variant, idle_timeout, cli_exec_timeout,
evg_alias)
historic_timeout = self.determine_historic_timeout(task, variant, suite_name,
exec_timeout_factor)
if historic_timeout:
exec_timeout = historic_timeout
output_timeout(exec_timeout, idle_timeout, outfile)
def main():
@ -140,23 +320,56 @@ def main():
parser = argparse.ArgumentParser(description=main.__doc__)
parser.add_argument("--task-name", dest="task", required=True, help="Task being executed.")
parser.add_argument("--suite-name", dest="suite_name", required=True,
help="Resmoke suite being run against.")
parser.add_argument("--build-variant", dest="variant", required=True,
help="Build variant task is being executed on.")
parser.add_argument("--evg-alias", dest="evg_alias", required=True,
help="Evergreen alias used to trigger build.")
parser.add_argument("--timeout", dest="timeout", type=int, help="Timeout to use (in sec).")
parser.add_argument("--exec-timeout", dest="exec_timeout", type=int,
help="Exec timeout ot use (in sec).")
help="Exec timeout to use (in sec).")
parser.add_argument("--exec-timeout-factor", dest="exec_timeout_factor", type=float,
help="Exec timeout factor to use (in sec).")
parser.add_argument("--out-file", dest="outfile", help="File to write configuration to.")
parser.add_argument("--timeout-overrides", dest="timeout_overrides_file",
default=DEFAULT_TIMEOUT_OVERRIDES,
help="File containing timeout overrides to use.")
parser.add_argument("--evg-api-config", dest="evg_api_config",
default=DEFAULT_EVERGREEN_AUTH_CONFIG, help="Evergreen API config file.")
parser.add_argument("--evg-project-config", dest="evg_project_config",
default=DEFAULT_EVERGREEN_CONFIG, help="Evergreen project config file.")
options = parser.parse_args()
end_date = datetime.now()
start_date = end_date - HISTORY_LOOKBACK
timeout_override = timedelta(seconds=options.timeout) if options.timeout else None
exec_timeout_override = timedelta(
seconds=options.exec_timeout) if options.exec_timeout else None
task_timeout = determine_timeout(options.task, options.variant, timeout_override,
exec_timeout_override, options.evg_alias)
output_timeout(task_timeout, options.outfile)
task_name = determine_task_base_name(options.task, options.variant)
timeout_overrides = TimeoutOverrides.from_yaml_file(
os.path.expanduser(options.timeout_overrides_file))
enable_logging(verbose=False)
def dependencies(binder: inject.Binder) -> None:
binder.bind(
EvergreenApi,
RetryingEvergreenApi.get_api(config_file=os.path.expanduser(options.evg_api_config)))
binder.bind(TimeoutSettings, TimeoutSettings(start_date=start_date, end_date=end_date))
binder.bind(TimeoutOverrides, timeout_overrides)
binder.bind(EvergreenProjectConfig,
parse_evergreen_file(os.path.expanduser(options.evg_project_config)))
inject.configure(dependencies)
task_timeout_orchestrator = inject.instance(TaskTimeoutOrchestrator)
task_timeout_orchestrator.determine_timeouts(
timeout_override, exec_timeout_override, options.outfile, task_name, options.variant,
options.evg_alias, options.suite_name, options.exec_timeout_factor)
if __name__ == "__main__":

View File

@ -13,7 +13,7 @@ from evergreen import EvergreenApi
from buildscripts.task_generation.resmoke_proxy import ResmokeProxyService
from buildscripts.task_generation.suite_split_strategies import SplitStrategy, FallbackStrategy
from buildscripts.task_generation.timeout import TimeoutEstimate
from buildscripts.timeouts.timeout import TimeoutEstimate
from buildscripts.util import taskname
from buildscripts.util.teststats import HistoricTaskData, TestRuntime, normalize_test_name

View File

@ -14,7 +14,7 @@ from buildscripts.task_generation.task_types.gentask_options import GenTaskOptio
from buildscripts.task_generation.task_types.models.resmoke_task_model import ResmokeTask
from buildscripts.task_generation.task_types.multiversion_decorator import MultiversionGenTaskDecorator, \
MultiversionDecoratorParams
from buildscripts.task_generation.timeout import TimeoutEstimate
from buildscripts.timeouts.timeout import TimeoutEstimate
LOGGER = structlog.getLogger(__name__)

View File

@ -1,46 +1,213 @@
"""Unit tests for the evergreen_task_timeout script."""
from datetime import timedelta
import unittest
from datetime import timedelta
from unittest.mock import MagicMock
import buildscripts.evergreen_task_timeout as under_test
from buildscripts.ciconfig.evergreen import EvergreenProjectConfig
from buildscripts.timeouts.timeout_service import TimeoutService
# pylint: disable=missing-docstring,no-self-use
# pylint: disable=missing-docstring,no-self-use,invalid-name,protected-access
class DetermineTimeoutTest(unittest.TestCase):
class TestTimeoutOverride(unittest.TestCase):
def test_exec_timeout_should_be_settable(self):
timeout_override = under_test.TimeoutOverride(task="my task", exec_timeout=42)
timeout = timeout_override.get_exec_timeout()
self.assertIsNotNone(timeout)
self.assertEqual(42 * 60, timeout.total_seconds())
def test_exec_timeout_should_default_to_none(self):
timeout_override = under_test.TimeoutOverride(task="my task")
timeout = timeout_override.get_exec_timeout()
self.assertIsNone(timeout)
def test_idle_timeout_should_be_settable(self):
timeout_override = under_test.TimeoutOverride(task="my task", idle_timeout=42)
timeout = timeout_override.get_idle_timeout()
self.assertIsNotNone(timeout)
self.assertEqual(42 * 60, timeout.total_seconds())
def test_idle_timeout_should_default_to_none(self):
timeout_override = under_test.TimeoutOverride(task="my task")
timeout = timeout_override.get_idle_timeout()
self.assertIsNone(timeout)
class TestTimeoutOverrides(unittest.TestCase):
def test_looking_up_a_non_existing_override_should_return_none(self):
timeout_overrides = under_test.TimeoutOverrides(overrides={})
self.assertIsNone(timeout_overrides.lookup_exec_override("bv", "task"))
self.assertIsNone(timeout_overrides.lookup_idle_override("bv", "task"))
def test_looking_up_a_duplicate_override_should_raise_error(self):
timeout_overrides = under_test.TimeoutOverrides(
overrides={
"bv": [{
"task": "task_name",
"exec_timeout": 42,
"idle_timeout": 10,
}, {
"task": "task_name",
"exec_timeout": 314,
"idle_timeout": 20,
}]
})
with self.assertRaises(ValueError):
self.assertIsNone(timeout_overrides.lookup_exec_override("bv", "task_name"))
with self.assertRaises(ValueError):
self.assertIsNone(timeout_overrides.lookup_idle_override("bv", "task_name"))
def test_looking_up_an_exec_override_should_work(self):
timeout_overrides = under_test.TimeoutOverrides(
overrides={
"bv": [
{
"task": "another_task",
"exec_timeout": 314,
"idle_timeout": 20,
},
{
"task": "task_name",
"exec_timeout": 42,
},
]
})
self.assertEqual(42 * 60,
timeout_overrides.lookup_exec_override("bv", "task_name").total_seconds())
def test_looking_up_an_idle_override_should_work(self):
timeout_overrides = under_test.TimeoutOverrides(
overrides={
"bv": [
{
"task": "another_task",
"exec_timeout": 314,
"idle_timeout": 20,
},
{
"task": "task_name",
"idle_timeout": 10,
},
]
})
self.assertEqual(10 * 60,
timeout_overrides.lookup_idle_override("bv", "task_name").total_seconds())
class TestDetermineExecTimeout(unittest.TestCase):
def test_timeout_used_if_specified(self):
mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
orchestrator = under_test.TaskTimeoutOrchestrator(
timeout_service=MagicMock(spec_set=TimeoutService),
timeout_overrides=mock_timeout_overrides,
evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
timeout = timedelta(seconds=42)
self.assertEqual(
under_test.determine_timeout("task_name", "variant", None, timeout), timeout)
orchestrator.determine_exec_timeout("task_name", "variant", None, timeout), timeout)
def test_default_is_returned_with_no_timeout(self):
mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
orchestrator = under_test.TaskTimeoutOrchestrator(
timeout_service=MagicMock(spec_set=TimeoutService),
timeout_overrides=mock_timeout_overrides,
evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
self.assertEqual(
under_test.determine_timeout("task_name", "variant"),
orchestrator.determine_exec_timeout("task_name", "variant"),
under_test.DEFAULT_NON_REQUIRED_BUILD_TIMEOUT)
def test_default_is_returned_with_timeout_at_zero(self):
mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
orchestrator = under_test.TaskTimeoutOrchestrator(
timeout_service=MagicMock(spec_set=TimeoutService),
timeout_overrides=mock_timeout_overrides,
evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
self.assertEqual(
under_test.determine_timeout("task_name", "variant", timedelta(seconds=0)),
orchestrator.determine_exec_timeout("task_name", "variant", timedelta(seconds=0)),
under_test.DEFAULT_NON_REQUIRED_BUILD_TIMEOUT)
def test_default_required_returned_on_required_variants(self):
mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
orchestrator = under_test.TaskTimeoutOrchestrator(
timeout_service=MagicMock(spec_set=TimeoutService),
timeout_overrides=mock_timeout_overrides,
evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
self.assertEqual(
under_test.determine_timeout("task_name", "variant-required"),
orchestrator.determine_exec_timeout("task_name", "variant-required"),
under_test.DEFAULT_REQUIRED_BUILD_TIMEOUT)
def test_task_specific_timeout(self):
mock_timeout_overrides = under_test.TimeoutOverrides(
overrides={"linux-64-debug": [{"task": "auth", "exec_timeout": 60}]})
orchestrator = under_test.TaskTimeoutOrchestrator(
timeout_service=MagicMock(spec_set=TimeoutService),
timeout_overrides=mock_timeout_overrides,
evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
self.assertEqual(
under_test.determine_timeout("auth", "linux-64-debug"), timedelta(minutes=60))
orchestrator.determine_exec_timeout("auth", "linux-64-debug"), timedelta(minutes=60))
def test_commit_queue_items_use_commit_queue_timeout(self):
timeout = under_test.determine_timeout("auth", "variant",
mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
orchestrator = under_test.TaskTimeoutOrchestrator(
timeout_service=MagicMock(spec_set=TimeoutService),
timeout_overrides=mock_timeout_overrides,
evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
timeout = orchestrator.determine_exec_timeout("auth", "variant",
evg_alias=under_test.COMMIT_QUEUE_ALIAS)
self.assertEqual(timeout, under_test.COMMIT_QUEUE_TIMEOUT)
def test_use_idle_timeout_if_greater_than_exec_timeout(self):
mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
orchestrator = under_test.TaskTimeoutOrchestrator(
timeout_service=MagicMock(spec_set=TimeoutService),
timeout_overrides=mock_timeout_overrides,
evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
idle_timeout = timedelta(hours=2)
exec_timeout = timedelta(minutes=10)
timeout = under_test.determine_timeout("task_name", "variant", idle_timeout=idle_timeout,
exec_timeout=exec_timeout)
timeout = orchestrator.determine_exec_timeout(
"task_name", "variant", idle_timeout=idle_timeout, exec_timeout=exec_timeout)
self.assertEqual(timeout, idle_timeout)
class TestDetermineIdleTimeout(unittest.TestCase):
def test_timeout_used_if_specified(self):
mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
orchestrator = under_test.TaskTimeoutOrchestrator(
timeout_service=MagicMock(spec_set=TimeoutService),
timeout_overrides=mock_timeout_overrides,
evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
timeout = timedelta(seconds=42)
self.assertEqual(
orchestrator.determine_idle_timeout("task_name", "variant", timeout), timeout)
def test_default_is_returned_with_no_timeout(self):
mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
orchestrator = under_test.TaskTimeoutOrchestrator(
timeout_service=MagicMock(spec_set=TimeoutService),
timeout_overrides=mock_timeout_overrides,
evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
self.assertIsNone(orchestrator.determine_idle_timeout("task_name", "variant"))
def test_task_specific_timeout(self):
mock_timeout_overrides = under_test.TimeoutOverrides(
overrides={"linux-64-debug": [{"task": "auth", "idle_timeout": 60}]})
orchestrator = under_test.TaskTimeoutOrchestrator(
timeout_service=MagicMock(spec_set=TimeoutService),
timeout_overrides=mock_timeout_overrides,
evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
self.assertEqual(
orchestrator.determine_idle_timeout("auth", "linux-64-debug"), timedelta(minutes=60))

View File

@ -0,0 +1 @@
"""Empty."""

View File

@ -1,7 +1,7 @@
"""Unit tests for timeout.py."""
import unittest
from buildscripts.task_generation import timeout as under_test
from buildscripts.timeouts import timeout as under_test
# pylint: disable=missing-docstring,invalid-name,unused-argument,no-self-use,protected-access,no-value-for-parameter

View File

@ -0,0 +1,258 @@
"""Unit tests for timeout_service.py."""
import random
import unittest
from datetime import datetime, timedelta
from unittest.mock import MagicMock
from requests.exceptions import HTTPError
from evergreen import EvergreenApi
import buildscripts.timeouts.timeout_service as under_test
from buildscripts.task_generation.resmoke_proxy import ResmokeProxyService
from buildscripts.util.teststats import HistoricTaskData
# pylint: disable=missing-docstring,no-self-use,invalid-name,protected-access
def build_mock_service(evg_api=None, resmoke_proxy=None):
end_date = datetime.now()
start_date = end_date - timedelta(weeks=2)
timeout_settings = under_test.TimeoutSettings(
end_date=end_date,
start_date=start_date,
)
return under_test.TimeoutService(
evg_api=evg_api if evg_api else MagicMock(spec_set=EvergreenApi),
resmoke_proxy=resmoke_proxy if resmoke_proxy else MagicMock(spec_set=ResmokeProxyService),
timeout_settings=timeout_settings)
def tst_stat_mock(file, duration, pass_count):
return MagicMock(test_file=file, avg_duration_pass=duration, num_pass=pass_count)
class TestGetTimeoutEstimate(unittest.TestCase):
def test_no_stats_should_return_default_timeout(self):
mock_evg_api = MagicMock(spec_set=EvergreenApi)
mock_evg_api.test_stats_by_project.return_value = []
timeout_service = build_mock_service(evg_api=mock_evg_api)
timeout_params = under_test.TimeoutParams(
evg_project="my project",
build_variant="bv",
task_name="my task",
suite_name="my suite",
is_asan=False,
)
timeout = timeout_service.get_timeout_estimate(timeout_params)
self.assertFalse(timeout.is_specified())
def test_a_test_with_missing_history_should_cause_a_default_timeout(self):
mock_evg_api = MagicMock(spec_set=EvergreenApi)
test_stats = [tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(30)]
mock_evg_api.test_stats_by_project.return_value = test_stats
mock_resmoke_proxy = MagicMock(spec_set=ResmokeProxyService)
mock_resmoke_proxy.list_tests.return_value = ["test_with_no_stats.js"]
timeout_service = build_mock_service(evg_api=mock_evg_api, resmoke_proxy=mock_resmoke_proxy)
timeout_params = under_test.TimeoutParams(
evg_project="my project",
build_variant="bv",
task_name="my task",
suite_name="my suite",
is_asan=False,
)
timeout = timeout_service.get_timeout_estimate(timeout_params)
self.assertFalse(timeout.is_specified())
def test_a_test_with_zero_runtime_history_should_cause_a_default_timeout(self):
mock_evg_api = MagicMock(spec_set=EvergreenApi)
test_stats = [tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(30)]
test_stats.append(tst_stat_mock("zero.js", 0.0, 1))
mock_evg_api.test_stats_by_project.return_value = test_stats
mock_resmoke_proxy = MagicMock(spec_set=ResmokeProxyService)
mock_resmoke_proxy.list_tests.return_value = [ts.test_file for ts in test_stats]
timeout_service = build_mock_service(evg_api=mock_evg_api, resmoke_proxy=mock_resmoke_proxy)
timeout_params = under_test.TimeoutParams(
evg_project="my project",
build_variant="bv",
task_name="my task",
suite_name="my suite",
is_asan=False,
)
timeout = timeout_service.get_timeout_estimate(timeout_params)
self.assertFalse(timeout.is_specified())
def test_all_tests_with_runtime_history_should_use_custom_timeout(self):
mock_evg_api = MagicMock(spec_set=EvergreenApi)
n_tests = 30
test_runtime = 600
test_stats = [tst_stat_mock(f"test_{i}.js", test_runtime, 1) for i in range(n_tests)]
mock_evg_api.test_stats_by_project.return_value = test_stats
mock_resmoke_proxy = MagicMock(spec_set=ResmokeProxyService)
mock_resmoke_proxy.list_tests.return_value = [ts.test_file for ts in test_stats]
timeout_service = build_mock_service(evg_api=mock_evg_api, resmoke_proxy=mock_resmoke_proxy)
timeout_params = under_test.TimeoutParams(
evg_project="my project",
build_variant="bv",
task_name="my task",
suite_name="my suite",
is_asan=False,
)
timeout = timeout_service.get_timeout_estimate(timeout_params)
self.assertTrue(timeout.is_specified())
self.assertEqual(1860, timeout.calculate_test_timeout(1))
self.assertEqual(54180, timeout.calculate_task_timeout(1))
class TestGetTaskHookOverhead(unittest.TestCase):
def test_no_stats_should_return_zero(self):
timeout_service = build_mock_service()
overhead = timeout_service.get_task_hook_overhead("suite", is_asan=False, test_count=30,
historic_stats=None)
self.assertEqual(0.0, overhead)
def test_stats_with_no_clean_every_n_should_return_zero(self):
timeout_service = build_mock_service()
test_stats = HistoricTaskData.from_stats_list(
[tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(30)])
overhead = timeout_service.get_task_hook_overhead("suite", is_asan=False, test_count=30,
historic_stats=test_stats)
self.assertEqual(0.0, overhead)
def test_stats_with_clean_every_n_should_return_overhead(self):
test_count = 30
runtime = 25
timeout_service = build_mock_service()
test_stat_list = [tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(test_count)]
test_stat_list.extend([
tst_stat_mock(f"test_{i}:{under_test.CLEAN_EVERY_N_HOOK}", runtime, 1)
for i in range(10)
])
random.shuffle(test_stat_list)
test_stats = HistoricTaskData.from_stats_list(test_stat_list)
overhead = timeout_service.get_task_hook_overhead(
"suite", is_asan=True, test_count=test_count, historic_stats=test_stats)
self.assertEqual(runtime * test_count, overhead)
class TestLookupHistoricStats(unittest.TestCase):
def test_no_stats_from_evergreen_should_return_none(self):
mock_evg_api = MagicMock(spec_set=EvergreenApi)
mock_evg_api.test_stats_by_project.return_value = []
timeout_service = build_mock_service(evg_api=mock_evg_api)
timeout_params = under_test.TimeoutParams(
evg_project="my project",
build_variant="bv",
task_name="my task",
suite_name="my suite",
is_asan=False,
)
stats = timeout_service.lookup_historic_stats(timeout_params)
self.assertIsNone(stats)
def test_errors_from_evergreen_should_return_none(self):
mock_evg_api = MagicMock(spec_set=EvergreenApi)
mock_evg_api.test_stats_by_project.side_effect = HTTPError("failed to connect")
timeout_service = build_mock_service(evg_api=mock_evg_api)
timeout_params = under_test.TimeoutParams(
evg_project="my project",
build_variant="bv",
task_name="my task",
suite_name="my suite",
is_asan=False,
)
stats = timeout_service.lookup_historic_stats(timeout_params)
self.assertIsNone(stats)
def test_stats_from_evergreen_should_return_the_stats(self):
mock_evg_api = MagicMock(spec_set=EvergreenApi)
test_stats = [tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(100)]
mock_evg_api.test_stats_by_project.return_value = test_stats
timeout_service = build_mock_service(evg_api=mock_evg_api)
timeout_params = under_test.TimeoutParams(
evg_project="my project",
build_variant="bv",
task_name="my task",
suite_name="my suite",
is_asan=False,
)
stats = timeout_service.lookup_historic_stats(timeout_params)
self.assertIsNotNone(stats)
self.assertEqual(len(test_stats), len(stats.historic_test_results))
class TestGetCleanEveryNCadence(unittest.TestCase):
def test_clean_every_n_cadence_on_asan(self):
timeout_service = build_mock_service()
cadence = timeout_service._get_clean_every_n_cadence("suite", True)
self.assertEqual(1, cadence)
def test_clean_every_n_cadence_from_hook_config(self):
expected_n = 42
mock_resmoke_proxy = MagicMock()
mock_resmoke_proxy.read_suite_config.return_value = {
"executor": {
"hooks": [{
"class": "hook1",
}, {
"class": under_test.CLEAN_EVERY_N_HOOK,
"n": expected_n,
}]
}
}
timeout_service = build_mock_service(resmoke_proxy=mock_resmoke_proxy)
cadence = timeout_service._get_clean_every_n_cadence("suite", False)
self.assertEqual(expected_n, cadence)
def test_clean_every_n_cadence_no_n_in_hook_config(self):
mock_resmoke_proxy = MagicMock()
mock_resmoke_proxy.read_suite_config.return_value = {
"executor": {
"hooks": [{
"class": "hook1",
}, {
"class": under_test.CLEAN_EVERY_N_HOOK,
}]
}
}
timeout_service = build_mock_service(resmoke_proxy=mock_resmoke_proxy)
cadence = timeout_service._get_clean_every_n_cadence("suite", False)
self.assertEqual(1, cadence)
def test_clean_every_n_cadence_no_hook_config(self):
mock_resmoke_proxy = MagicMock()
mock_resmoke_proxy.read_suite_config.return_value = {
"executor": {"hooks": [{
"class": "hook1",
}, ]}
}
timeout_service = build_mock_service(resmoke_proxy=mock_resmoke_proxy)
cadence = timeout_service._get_clean_every_n_cadence("suite", False)
self.assertEqual(1, cadence)

View File

@ -4,7 +4,7 @@ import unittest
import buildscripts.util.taskname as under_test
# pylint: disable=missing-docstring,protected-access
# pylint: disable=missing-docstring,protected-access,invalid-name
class TestNameTask(unittest.TestCase):
@ -24,3 +24,29 @@ class TestRemoveGenSuffix(unittest.TestCase):
input_task_name = "sharded_multi_stmt_txn_jscore_passthroug"
self.assertEqual("sharded_multi_stmt_txn_jscore_passthroug",
under_test.remove_gen_suffix(input_task_name))
class TestDetermineTaskBaseName(unittest.TestCase):
def test_task_name_with_build_variant_should_strip_bv_and_sub_task_index(self):
bv = "enterprise-rhel-80-64-bit-dynamic-required"
task_name = f"auth_23_{bv}"
base_task_name = under_test.determine_task_base_name(task_name, bv)
self.assertEqual("auth", base_task_name)
def test_task_name_without_build_variant_should_strip_sub_task_index(self):
bv = "enterprise-rhel-80-64-bit-dynamic-required"
task_name = "auth_314"
base_task_name = under_test.determine_task_base_name(task_name, bv)
self.assertEqual("auth", base_task_name)
def test_task_name_without_build_variant_or_subtask_index_should_self(self):
bv = "enterprise-rhel-80-64-bit-dynamic-required"
task_name = "auth"
base_task_name = under_test.determine_task_base_name(task_name, bv)
self.assertEqual("auth", base_task_name)

View File

@ -0,0 +1 @@
"""Empty."""

View File

@ -1,11 +1,10 @@
"""Timeout information for generating tasks."""
import math
from datetime import timedelta
from inspect import getframeinfo, currentframe
from inspect import currentframe, getframeinfo
from typing import NamedTuple, Optional
import structlog
from buildscripts.patch_builds.task_generation import TimeoutInfo
LOGGER = structlog.getLogger(__name__)

View File

@ -0,0 +1,189 @@
"""Service for determining task timeouts."""
from datetime import datetime
from typing import Any, Dict, NamedTuple, Optional
import inject
import structlog
from buildscripts.task_generation.resmoke_proxy import ResmokeProxyService
from buildscripts.timeouts.timeout import TimeoutEstimate
from buildscripts.util.teststats import HistoricTaskData
from evergreen import EvergreenApi
LOGGER = structlog.get_logger(__name__)
CLEAN_EVERY_N_HOOK = "CleanEveryN"
class TimeoutParams(NamedTuple):
"""
Parameters about task being run.
* evg_project: Evergreen project.
* build_variant: Evergreen build variant.
* task_name: Evergreen task_name.
* suite_name: Test Suite being run.
* is_asan: Whether this run is part of an asan build.
"""
evg_project: str
build_variant: str
task_name: str
suite_name: str
is_asan: bool
class TimeoutSettings(NamedTuple):
"""Settings for determining timeouts."""
start_date: datetime
end_date: datetime
class TimeoutService:
"""A service for determining task timeouts."""
@inject.autoparams()
def __init__(self, evg_api: EvergreenApi, resmoke_proxy: ResmokeProxyService,
timeout_settings: TimeoutSettings) -> None:
"""
Initialize the service.
:param evg_api: Evergreen API client.
:param resmoke_proxy: Proxy to query resmoke.
:param timeout_settings: Settings for how timeouts are calculated.
"""
self.evg_api = evg_api
self.resmoke_proxy = resmoke_proxy
self.timeout_settings = timeout_settings
def get_timeout_estimate(self, timeout_params: TimeoutParams) -> TimeoutEstimate:
"""
Calculate the timeout estimate for the given task based on historic test results.
:param timeout_params: Details about the task to query.
:return: Timeouts to use based on historic test results.
"""
historic_stats = self.lookup_historic_stats(timeout_params)
if not historic_stats:
return TimeoutEstimate.no_timeouts()
test_set = set(self.resmoke_proxy.list_tests(timeout_params.suite_name))
test_runtimes = [
stat for stat in historic_stats.get_tests_runtimes() if stat.test_name in test_set
]
test_runtime_set = {test.test_name for test in test_runtimes}
for test in test_set:
if test not in test_runtime_set:
# If we don't have historic runtime information for all the tests, we cannot
# reliable determine a timeout, so fallback to a default timeout.
LOGGER.warning(
"Could not find historic runtime information for test, using default timeout",
test=test)
return TimeoutEstimate.no_timeouts()
total_runtime = 0.0
max_runtime = 0.0
for runtime in test_runtimes:
if runtime.runtime > 0.0:
total_runtime += runtime.runtime
max_runtime = max(max_runtime, runtime.runtime)
else:
LOGGER.warning("Found a test with 0 runtime, using default timeouts",
test=runtime.test_name)
# We found a test with a runtime of 0, which indicates that it does not have a
# proper runtime history, so fall back to a default timeout.
return TimeoutEstimate.no_timeouts()
hook_overhead = self.get_task_hook_overhead(
timeout_params.suite_name, timeout_params.is_asan, len(test_set), historic_stats)
total_runtime += hook_overhead
return TimeoutEstimate(max_test_runtime=max_runtime, expected_task_runtime=total_runtime)
def get_task_hook_overhead(self, suite_name: str, is_asan: bool, test_count: int,
historic_stats: Optional[HistoricTaskData]) -> float:
"""
Add how much overhead task-level hooks each suite should account for.
Certain test hooks need to be accounted for on the task level instead of the test level
in order to calculate accurate timeouts. So we will add details about those hooks to
each suite here.
:param suite_name: Name of suite being generated.
:param is_asan: Whether ASAN is being used.
:param test_count: Number of tests in sub-suite.
:param historic_stats: Historic runtime data of the suite.
"""
# The CleanEveryN hook is run every 'N' tests. The runtime of the
# hook will be associated with whichever test happens to be running, which could be
# different every run. So we need to take its runtime into account at the task level.
if historic_stats is None:
return 0.0
clean_every_n_cadence = self._get_clean_every_n_cadence(suite_name, is_asan)
avg_clean_every_n_runtime = historic_stats.get_avg_hook_runtime(CLEAN_EVERY_N_HOOK)
LOGGER.debug("task hook overhead", cadence=clean_every_n_cadence,
runtime=avg_clean_every_n_runtime, is_asan=is_asan)
if avg_clean_every_n_runtime != 0:
n_expected_runs = test_count / clean_every_n_cadence
return n_expected_runs * avg_clean_every_n_runtime
return 0.0
def lookup_historic_stats(self, timeout_params: TimeoutParams) -> Optional[HistoricTaskData]:
"""
Lookup historic test results stats for the given task.
:param timeout_params: Details about the task to lookup.
:return: Historic test results if they exist.
"""
try:
evg_stats = HistoricTaskData.from_evg(
self.evg_api, timeout_params.evg_project, self.timeout_settings.start_date,
self.timeout_settings.end_date, timeout_params.task_name,
timeout_params.build_variant)
if not evg_stats:
LOGGER.warning("No historic runtime information available")
return None
return evg_stats
except Exception: # pylint: disable=broad-except
# If we have any trouble getting the historic runtime information, log the issue, but
# don't fall back to default timeouts instead of failing.
LOGGER.warning("Error querying history runtime information from evergreen",
exc_info=True)
return None
def _get_clean_every_n_cadence(self, suite_name: str, is_asan: bool) -> int:
"""
Get the N value for the CleanEveryN hook.
:param suite_name: Name of suite being generated.
:param is_asan: Whether ASAN is being used.
:return: How frequently clean every end is run.
"""
# Default to 1, which is the worst case meaning CleanEveryN would run for every test.
clean_every_n_cadence = 1
if is_asan:
# ASAN runs hard-code N to 1. See `resmokelib/testing/hooks/cleanup.py`.
return clean_every_n_cadence
clean_every_n_config = self._get_hook_config(suite_name, CLEAN_EVERY_N_HOOK)
if clean_every_n_config:
clean_every_n_cadence = clean_every_n_config.get("n", 1)
return clean_every_n_cadence
def _get_hook_config(self, suite_name: str, hook_name: str) -> Optional[Dict[str, Any]]:
"""
Get the configuration for the given hook.
:param hook_name: Name of hook to query.
:return: Configuration for hook, if it exists.
"""
hooks_config = self.resmoke_proxy.read_suite_config(suite_name).get("executor",
{}).get("hooks")
if hooks_config:
for hook in hooks_config:
if hook.get("class") == hook_name:
return hook
return None

View File

@ -1,6 +1,7 @@
"""Functions for working with resmoke task names."""
import math
import re
GEN_SUFFIX = "_gen"
@ -36,3 +37,25 @@ def remove_gen_suffix(task_name: str) -> str:
if task_name.endswith(GEN_SUFFIX):
return task_name[:-4]
return task_name
def determine_task_base_name(task_name: str, build_variant: str) -> str:
"""
Determine the base name of a task.
For generated tasks the base name will have the build variant and sub-task index
stripped off. For other tasks, it is the unmodified task_name.
:param task_name: Name of task to get base name of.
:param build_variant: Build variant that may be included in task name.
:return: Base name of given task.
"""
match = re.match(f"(.*)_([0-9]+|misc)_{build_variant}", task_name)
if match:
return match.group(1)
match = re.match(r"(.*)_([0-9]+|misc)", task_name)
if match:
return match.group(1)
return task_name

View File

@ -0,0 +1,5 @@
# Testing in Evergreen
Documentation about how MongoDB is tested in Evergreen.
* [Task Timeouts](task_timeouts.md)

View File

@ -0,0 +1,35 @@
# Evergreen Task Timeouts
## Type of timeouts
There are two types of timeouts that [evergreen supports](https://github.com/evergreen-ci/evergreen/wiki/Project-Commands#timeoutupdate):
* **Exec timeout**: The _exec_ timeout is the overall timeout for a task. Once the total runtime for
a test hits this value, the timeout logic will be triggered. This value is specified by
**exec_timeout_secs** in the evergreen configuration.
* **Idle timeout**: The _idle_ timeout is the amount of time in which evergreen will wait for
output to be created before it considers the task hung and triggers timeout logic. This value
is specified by **timeout_secs** in the evergreen configuration.
**Note**: In most cases, **exec_timeout** is usually the more useful of the timeouts.
## Setting the timeout for a task
There are a few ways in which the timeout can be determined for a task running in evergreen.
* **Specified in 'etc/evergreen.yml'**: Timeout can be specified directly in the 'evergreen.yml' file,
both on tasks and build variants. This can be useful for setting default timeout values, but is limited
since different build variants frequently have different runtime characteristics and it is not possible
to set timeouts for a task running on a specific build variant.
* **etc/evergreen_timeouts.yml**: The 'etc/evergreen_timeouts.yml' file for overriding timeouts
for specific tasks on specific build variants. This provides a work-around for the limitations of
specifying the timeouts directly in the 'evergreen.yml'. In order to use this method, the task
must run the "determine task timeout" and "update task timeout expansions" functions at the beginning
of the task evergreen definition. Most resmoke tasks already do this.
* **buildscripts/evergreen_task_timeout.py**: This is the script that reads the 'etc/evergreen_timeouts.yml'
file and calculates the timeout to use. Additionally, it will check the historic test results of the
task being run and see if there is enough information to calculate timeouts based on that. It can
also be used for more advanced ways of determining timeouts (e.g. the script is used to set much
more aggressive timeouts on tasks that are run in the commit-queue).

View File

@ -1218,6 +1218,9 @@ functions:
- *update_resmoke_jobs_expansions
- *f_expansions_write
- *configure_evergreen_api_credentials
- *determine_task_timeout
- *update_task_timeout_expansions
- *f_expansions_write
- command: subprocess.exec
params:
binary: bash
@ -1243,6 +1246,7 @@ functions:
"run tests":
- *f_expansions_write
- *configure_evergreen_api_credentials
- *determine_task_timeout
- *update_task_timeout_expansions
- *f_expansions_write
@ -2246,7 +2250,9 @@ tasks:
- "./build/**.gcno"
- "./etc/*san.suppressions"
- "./etc/backports_required_for_multiversion_tests.yml"
- "./etc/evergreen_timeouts.yml"
- "./etc/expansions.default.yml"
- "./etc/evergreen.yml"
- "./etc/pip/**"
- "./etc/repo_config.yaml"
- "./etc/scons/**"

View File

@ -0,0 +1,93 @@
# This file defines timeouts in evergreen that will override the default timeouts.
#
# Each key under `overrides` provides the build variant where the override will occur. The
# override should include the `task` that should have its timeout overridden and either the
# `exec_timeout` to override or the `idle_timeout` to override.
#
# The timeouts should be specified in minutes.
# Note: In order to make it easier to find existing entries, please try to keep the build variants
# in alphabetical order.
overrides:
enterprise-macos:
- task: replica_sets_jscore_passthrough
exec_timeout: 150 # 2.5 hours
enterprise-rhel-80-64-bit-coverage:
- task: replica_sets_jscore_passthrough
exec_timeout: 150 # 2.5 hours.
enterprise-ubuntu2004-debug-tsan:
- task: run_unittests
exec_timeout: 24
enterprise-windows:
- task: replica_sets_jscore_passthrough
exec_timeout: 180 # 3 hours.
enterprise-windows-all-feature-flags-suggested:
- task: replica_sets_jscore_passthrough
exec_timeout: 180 # 3 hours.
- task: replica_sets_update_v1_oplog_jscore_passthrough
exec_timeout: 150 # 2.5 hours.
enterprise-windows-inmem:
- task: replica_sets_jscore_passthrough
exec_timeout: 180 # 3 hours.
enterprise-windows-required:
- task: replica_sets_jscore_passthrough
exec_timeout: 180 # 3 hours.
- task: replica_sets_update_v1_oplog_jscore_passthrough
exec_timeout: 150 # 2.5 hours.
linux-64-debug:
- task: auth
exec_timeout: 60 # 1 hour.
linux-64-debug-repeated-execution:
- task: run_unittests
exec_timeout: 120 # 2 hours.
macos:
- task: replica_sets_jscore_passthrough
exec_timeout: 150 # 2.5 hours
ubuntu1804-asan:
- task: run_unittests
exec_timeout: 24
ubuntu1804-debug-asan:
- task: run_unittests
exec_timeout: 24
ubuntu1804-debug-aubsan-lite:
- task: run_unittests
exec_timeout: 24
ubuntu1804-debug-ubsan:
- task: run_unittests
exec_timeout: 24
ubuntu1804-debug-suggested:
- task: replica_sets_jscore_passthrough
exec_timeout: 180 # 3 hours.
ubuntu1804-ubsan:
- task: run_unittests
exec_timeout: 24
windows:
- task: replica_sets
exec_timeout: 180 # 3 hours.
- task: replica_sets_jscore_passthrough
exec_timeout: 150 # 2.5 hours.
windows-debug-suggested:
- task: replica_sets_initsync_jscore_passthrough
exec_timeout: 150 # 2.5 hours.
- task: replica_sets_jscore_passthrough
exec_timeout: 180 # 3 hours.
- task: replica_sets_update_v1_oplog_jscore_passthrough
exec_timeout: 150 # 2.5 hours.

View File

@ -5,11 +5,25 @@ cd src
set -o verbose
set -o errexit
# Set the suite name to be the task name by default; unless overridden with the `suite` expansion.
suite_name=${task_name}
if [[ -n ${suite} ]]; then
suite_name=${suite}
fi
timeout_factor=""
if [[ -n "${exec_timeout_factor}" ]]; then
timeout_factor="--exec-timeout-factor ${exec_timeout_factor}"
fi
activate_venv
$python buildscripts/evergreen_task_timeout.py \
PATH=$PATH:$HOME:/ $python buildscripts/evergreen_task_timeout.py $timeout_factor \
--task-name ${task_name} \
--suite-name ${suite_name} \
--build-variant ${build_variant} \
--evg-alias '${alias}' \
--timeout ${timeout_secs} \
--exec-timeout ${exec_timeout_secs} \
--evg-api-config ./.evergreen.yml \
--out-file task_timeout_expansions.yml