SERVER-63827: Determine task timeout at the start of task execution

2022-03-02 15:37:40 -05:00 · 2022-03-02 15:37:40 -05:00 · 0a079df588
parent 1e72f6d25e
commit 0a079df588
17 changed files with 1143 additions and 113 deletions
--- a/buildscripts/evergreen_task_timeout.py
+++ b/buildscripts/evergreen_task_timeout.py
@ -3,14 +3,35 @@

 import argparse
 import math
+import os
 import sys
-from datetime import timedelta
-from typing import Optional
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import Dict, List, Optional

+import inject
+import structlog
 import yaml
+from pydantic import BaseModel
+from evergreen import EvergreenApi, RetryingEvergreenApi

+from buildscripts.ciconfig.evergreen import (EvergreenProjectConfig, parse_evergreen_file)
+from buildscripts.timeouts.timeout_service import (TimeoutParams, TimeoutService, TimeoutSettings)
+from buildscripts.util.cmdutils import enable_logging
+from buildscripts.util.taskname import determine_task_base_name
+
+LOGGER = structlog.get_logger(__name__)
+DEFAULT_TIMEOUT_OVERRIDES = "etc/evergreen_timeouts.yml"
+DEFAULT_EVERGREEN_CONFIG = "etc/evergreen.yml"
+DEFAULT_EVERGREEN_AUTH_CONFIG = "~/.evergreen.yml"
 COMMIT_QUEUE_ALIAS = "__commit_queue"
 UNITTEST_TASK = "run_unittests"
+IGNORED_SUITES = {
+    "integration_tests_replset", "integration_tests_replset_ssl_auth", "integration_tests_sharded",
+    "integration_tests_standalone", "integration_tests_standalone_audit", "mongos_test",
+    "server_selection_json_test"
+}
+HISTORY_LOOKBACK = timedelta(weeks=2)

 COMMIT_QUEUE_TIMEOUT = timedelta(minutes=40)
 DEFAULT_REQUIRED_BUILD_TIMEOUT = timedelta(hours=1, minutes=20)
@ -19,45 +40,92 @@ DEFAULT_NON_REQUIRED_BUILD_TIMEOUT = timedelta(hours=2)
 # which is 5 mins 47 secs, excluding outliers below
 UNITTESTS_TIMEOUT = timedelta(minutes=12)

-SPECIFIC_TASK_OVERRIDES = {
-    "linux-64-debug": {"auth": timedelta(minutes=60)},
-    "enterprise-windows-all-feature-flags-suggested": {
-        "replica_sets_jscore_passthrough": timedelta(hours=3),
-        "replica_sets_update_v1_oplog_jscore_passthrough": timedelta(hours=2, minutes=30),
-    },
-    "enterprise-windows-required": {
-        "replica_sets_jscore_passthrough": timedelta(hours=3),
-        "replica_sets_update_v1_oplog_jscore_passthrough": timedelta(hours=2, minutes=30),
-    },
-    "enterprise-windows-inmem": {"replica_sets_jscore_passthrough": timedelta(hours=3), },
-    "enterprise-windows": {"replica_sets_jscore_passthrough": timedelta(hours=3), },
-    "windows-debug-suggested": {
-        "replica_sets_initsync_jscore_passthrough": timedelta(hours=2, minutes=30),
-        "replica_sets_jscore_passthrough": timedelta(hours=2, minutes=30),
-        "replica_sets_update_v1_oplog_jscore_passthrough": timedelta(hours=2, minutes=30),
-    },
-    "windows": {
-        "replica_sets": timedelta(hours=3),
-        "replica_sets_jscore_passthrough": timedelta(hours=2, minutes=30),
-    },
-    "ubuntu1804-debug-suggested": {"replica_sets_jscore_passthrough": timedelta(hours=3), },
-    "enterprise-rhel-80-64-bit-coverage": {
-        "replica_sets_jscore_passthrough": timedelta(hours=2, minutes=30),
-    },
-    "macos": {"replica_sets_jscore_passthrough": timedelta(hours=2, minutes=30), },
-    "enterprise-macos": {"replica_sets_jscore_passthrough": timedelta(hours=2, minutes=30), },

-    # unittests outliers
-    # repeated execution runs a suite 10 times
-    "linux-64-repeated-execution": {UNITTEST_TASK: 10 * UNITTESTS_TIMEOUT},
-    # some of the a/ub/t san variants need a little extra time
-    "enterprise-ubuntu2004-debug-tsan": {UNITTEST_TASK: 2 * UNITTESTS_TIMEOUT},
-    "ubuntu1804-asan": {UNITTEST_TASK: 2 * UNITTESTS_TIMEOUT},
-    "ubuntu1804-ubsan": {UNITTEST_TASK: 2 * UNITTESTS_TIMEOUT},
-    "ubuntu1804-debug-asan": {UNITTEST_TASK: 2 * UNITTESTS_TIMEOUT},
-    "ubuntu1804-debug-aubsan-lite": {UNITTEST_TASK: 2 * UNITTESTS_TIMEOUT},
-    "ubuntu1804-debug-ubsan": {UNITTEST_TASK: 2 * UNITTESTS_TIMEOUT},
-}
+class TimeoutOverride(BaseModel):
+    """
+    Specification for overriding a task timeout.
+
+    * task: Name of task to overide.
+    * exec_timeout: Value to override exec timeout with.
+    * idle_timeout: Value to override idle timeout with.
+    """
+
+    task: str
+    exec_timeout: Optional[int] = None
+    idle_timeout: Optional[int] = None
+
+    def get_exec_timeout(self) -> Optional[timedelta]:
+        """Get a timedelta of the exec timeout to use."""
+        if self.exec_timeout is not None:
+            return timedelta(minutes=self.exec_timeout)
+        return None
+
+    def get_idle_timeout(self) -> Optional[timedelta]:
+        """Get a timedelta of the idle timeout to use."""
+        if self.idle_timeout is not None:
+            return timedelta(minutes=self.idle_timeout)
+        return None
+
+
+class TimeoutOverrides(BaseModel):
+    """Collection of timeout overrides to apply."""
+
+    overrides: Dict[str, List[TimeoutOverride]]
+
+    @classmethod
+    def from_yaml_file(cls, file_path: Path) -> "TimeoutOverrides":
+        """Read the timeout overrides from the given file."""
+        with open(file_path) as file_handler:
+            return cls(**yaml.safe_load(file_handler))
+
+    def _lookup_override(self, build_variant: str, task_name: str) -> Optional[TimeoutOverride]:
+        """
+        Check if the given task on the given build variant has an override defined.
+
+        Note: If multiple overrides are found, an exception will be raised.
+
+        :param build_variant: Build Variant to check.
+        :param task_name: Task name to check.
+        :return: Timeout override if found.
+        """
+        overrides = [
+            override for override in self.overrides.get(build_variant, [])
+            if override.task == task_name
+        ]
+        if overrides:
+            if len(overrides) > 1:
+                LOGGER.error("Found multiple overrides for the same task",
+                             build_variant=build_variant, task=task_name,
+                             overrides=[override.dict() for override in overrides])
+                raise ValueError(f"Found multiple overrides for '{task_name}' on '{build_variant}'")
+            return overrides[0]
+        return None
+
+    def lookup_exec_override(self, build_variant: str, task_name: str) -> Optional[timedelta]:
+        """
+        Look up the exec timeout override of the given build variant/task.
+
+        :param build_variant: Build Variant to check.
+        :param task_name: Task name to check.
+        :return: Exec timeout override if found.
+        """
+        override = self._lookup_override(build_variant, task_name)
+        if override is not None:
+            return override.get_exec_timeout()
+        return None
+
+    def lookup_idle_override(self, build_variant: str, task_name: str) -> Optional[timedelta]:
+        """
+        Look up the idle timeout override of the given build variant/task.
+
+        :param build_variant: Build Variant to check.
+        :param task_name: Task name to check.
+        :return: Idle timeout override if found.
+        """
+        override = self._lookup_override(build_variant, task_name)
+        if override is not None:
+            return override.get_idle_timeout()
+        return None


 def _is_required_build_variant(build_variant: str) -> bool:
@ -70,63 +138,20 @@ def _is_required_build_variant(build_variant: str) -> bool:
    return build_variant.endswith("-required")


-def _has_override(variant: str, task_name: str) -> bool:
-    """
-    Determine if the given task has a timeout override.
-
-    :param variant: Build Variant task is running on.
-    :param task_name: Task to check.
-    :return: True if override exists for task.
-    """
-    return variant in SPECIFIC_TASK_OVERRIDES and task_name in SPECIFIC_TASK_OVERRIDES[variant]
-
-
-def determine_timeout(task_name: str, variant: str, idle_timeout: Optional[timedelta] = None,
-                      exec_timeout: Optional[timedelta] = None, evg_alias: str = '') -> timedelta:
-    """
-    Determine what exec timeout should be used.
-
-    :param task_name: Name of task being run.
-    :param variant: Name of build variant being run.
-    :param idle_timeout: Idle timeout if specified.
-    :param exec_timeout: Override to use for exec_timeout or 0 if no override.
-    :param evg_alias: Evergreen alias running the task.
-    :return: Exec timeout to use for running task.
-    """
-    determined_timeout = DEFAULT_NON_REQUIRED_BUILD_TIMEOUT
-
-    if exec_timeout and exec_timeout.total_seconds() != 0:
-        determined_timeout = exec_timeout
-
-    elif task_name == UNITTEST_TASK and not _has_override(variant, task_name):
-        determined_timeout = UNITTESTS_TIMEOUT
-
-    elif evg_alias == COMMIT_QUEUE_ALIAS:
-        determined_timeout = COMMIT_QUEUE_TIMEOUT
-
-    elif _has_override(variant, task_name):
-        determined_timeout = SPECIFIC_TASK_OVERRIDES[variant][task_name]
-
-    elif _is_required_build_variant(variant):
-        determined_timeout = DEFAULT_REQUIRED_BUILD_TIMEOUT
-
-    # The timeout needs to be at least as large as the idle timeout.
-    if idle_timeout and determined_timeout.total_seconds() < idle_timeout.total_seconds():
-        return idle_timeout
-
-    return determined_timeout
-
-
-def output_timeout(task_timeout: timedelta, output_file: Optional[str]) -> None:
+def output_timeout(exec_timeout: timedelta, idle_timeout: Optional[timedelta],
+                   output_file: Optional[str]) -> None:
    """
    Output timeout configuration to the specified location.

-    :param task_timeout: Timeout to output.
+    :param exec_timeout: Exec timeout to output.
+    :param idle_timeout: Idle timeout to output.
    :param output_file: Location of output file to write.
    """
    output = {
-        "exec_timeout_secs": math.ceil(task_timeout.total_seconds()),
+        "exec_timeout_secs": math.ceil(exec_timeout.total_seconds()),
    }
+    if idle_timeout is not None:
+        output["timeout_secs"] = math.ceil(idle_timeout.total_seconds())

    if output_file:
        with open(output_file, "w") as outfile:
@ -135,28 +160,216 @@ def output_timeout(task_timeout: timedelta, output_file: Optional[str]) -> None:
    yaml.dump(output, stream=sys.stdout, default_flow_style=False)


+class TaskTimeoutOrchestrator:
+    """An orchestrator for determining task timeouts."""
+
+    @inject.autoparams()
+    def __init__(self, timeout_service: TimeoutService, timeout_overrides: TimeoutOverrides,
+                 evg_project_config: EvergreenProjectConfig) -> None:
+        """
+        Initialize the orchestrator.
+
+        :param timeout_service: Service for calculating historic timeouts.
+        :param timeout_overrides: Timeout overrides for specific tasks.
+        """
+        self.timeout_service = timeout_service
+        self.timeout_overrides = timeout_overrides
+        self.evg_project_config = evg_project_config
+
+    def determine_exec_timeout(
+            self, task_name: str, variant: str, idle_timeout: Optional[timedelta] = None,
+            exec_timeout: Optional[timedelta] = None, evg_alias: str = "") -> timedelta:
+        """
+        Determine what exec timeout should be used.
+
+        :param task_name: Name of task being run.
+        :param variant: Name of build variant being run.
+        :param idle_timeout: Idle timeout if specified.
+        :param exec_timeout: Override to use for exec_timeout or 0 if no override.
+        :param evg_alias: Evergreen alias running the task.
+        :return: Exec timeout to use for running task.
+        """
+        determined_timeout = DEFAULT_NON_REQUIRED_BUILD_TIMEOUT
+
+        override = self.timeout_overrides.lookup_exec_override(variant, task_name)
+
+        if exec_timeout and exec_timeout.total_seconds() != 0:
+            LOGGER.info("Using timeout from cmd line",
+                        exec_timeout_secs=exec_timeout.total_seconds())
+            determined_timeout = exec_timeout
+
+        elif task_name == UNITTEST_TASK and override is None:
+            LOGGER.info("Overriding unittest timeout",
+                        exec_timeout_secs=UNITTESTS_TIMEOUT.total_seconds())
+            determined_timeout = UNITTESTS_TIMEOUT
+
+        elif evg_alias == COMMIT_QUEUE_ALIAS:
+            LOGGER.info("Overriding commit-queue timeout",
+                        exec_timeout_secs=COMMIT_QUEUE_TIMEOUT.total_seconds())
+            determined_timeout = COMMIT_QUEUE_TIMEOUT
+
+        elif override is not None:
+            LOGGER.info("Overriding configured timeout", exec_timeout_secs=override.total_seconds())
+            determined_timeout = override
+
+        elif _is_required_build_variant(variant):
+            LOGGER.info("Overriding required-builder timeout",
+                        exec_timeout_secs=DEFAULT_REQUIRED_BUILD_TIMEOUT.total_seconds())
+            determined_timeout = DEFAULT_REQUIRED_BUILD_TIMEOUT
+
+        # The timeout needs to be at least as large as the idle timeout.
+        if idle_timeout and determined_timeout.total_seconds() < idle_timeout.total_seconds():
+            LOGGER.info("Making exec timeout as large as idle timeout",
+                        exec_timeout_secs=idle_timeout.total_seconds())
+            return idle_timeout
+
+        return determined_timeout
+
+    def determine_idle_timeout(self, task_name: str, variant: str,
+                               idle_timeout: Optional[timedelta] = None) -> Optional[timedelta]:
+        """
+        Determine what idle timeout should be used.
+
+        :param task_name: Name of task being run.
+        :param variant: Name of build variant being run.
+        :param idle_timeout: Override to use for idle_timeout.
+        :return: Idle timeout to use for running task.
+        """
+        determined_timeout = None
+        override = self.timeout_overrides.lookup_idle_override(variant, task_name)
+
+        if idle_timeout and idle_timeout.total_seconds() != 0:
+            LOGGER.info("Using timeout from cmd line",
+                        idle_timeout_secs=idle_timeout.total_seconds())
+            determined_timeout = idle_timeout
+
+        elif override is not None:
+            LOGGER.info("Overriding configured timeout", idle_timeout_secs=override.total_seconds())
+            determined_timeout = override
+
+        return determined_timeout
+
+    def determine_historic_timeout(self, task: str, variant: str, suite_name: str,
+                                   exec_timeout_factor: Optional[float]) -> Optional[timedelta]:
+        """
+        Calculate the timeout based on historic test results.
+
+        :param task: Name of task to query.
+        :param variant: Name of build variant to query.
+        :param suite_name: Name of test suite being run.
+        :param exec_timeout_factor: Scaling factor to use when determining timeout.
+        """
+        if suite_name in IGNORED_SUITES:
+            return None
+
+        timeout_params = TimeoutParams(
+            evg_project="mongodb-mongo-master",
+            build_variant=variant,
+            task_name=task,
+            suite_name=suite_name,
+            is_asan=self.is_build_variant_asan(variant),
+        )
+        timeout_estimate = self.timeout_service.get_timeout_estimate(timeout_params)
+        if timeout_estimate and timeout_estimate.is_specified():
+            exec_timeout = timeout_estimate.calculate_task_timeout(
+                repeat_factor=1, scaling_factor=exec_timeout_factor)
+            if exec_timeout is not None:
+                LOGGER.info("Using historic based timeout", exec_timeout_secs=exec_timeout)
+                return timedelta(seconds=exec_timeout)
+        return None
+
+    def is_build_variant_asan(self, build_variant: str) -> bool:
+        """
+        Determine if the given build variant is an ASAN build variant.
+
+        :param build_variant: Name of build variant to check.
+        :return: True if build variant is an ASAN build variant.
+        """
+        bv = self.evg_project_config.get_variant(build_variant)
+        return bv.is_asan_build()
+
+    def determine_timeouts(self, cli_idle_timeout: Optional[timedelta],
+                           cli_exec_timeout: Optional[timedelta], outfile: Optional[str], task: str,
+                           variant: str, evg_alias: str, suite_name: str,
+                           exec_timeout_factor: Optional[float]) -> None:
+        """
+        Determine the timeouts to use for the given task and write timeouts to expansion file.
+
+        :param cli_idle_timeout: Idle timeout specified by the CLI.
+        :param cli_exec_timeout: Exec timeout specified by the CLI.
+        :param outfile: File to write timeout expansions to.
+        :param variant: Build variant task is being run on.
+        :param evg_alias: Evergreen alias that triggered task.
+        :param suite_name: Name of evergreen suite being run.
+        :param exec_timeout_factor: Scaling factor to use when determining timeout.
+        """
+        idle_timeout = self.determine_idle_timeout(task, variant, cli_idle_timeout)
+        exec_timeout = self.determine_exec_timeout(task, variant, idle_timeout, cli_exec_timeout,
+                                                   evg_alias)
+
+        historic_timeout = self.determine_historic_timeout(task, variant, suite_name,
+                                                           exec_timeout_factor)
+        if historic_timeout:
+            exec_timeout = historic_timeout
+
+        output_timeout(exec_timeout, idle_timeout, outfile)
+
+
 def main():
    """Determine the timeout value a task should use in evergreen."""
    parser = argparse.ArgumentParser(description=main.__doc__)

    parser.add_argument("--task-name", dest="task", required=True, help="Task being executed.")
+    parser.add_argument("--suite-name", dest="suite_name", required=True,
+                        help="Resmoke suite being run against.")
    parser.add_argument("--build-variant", dest="variant", required=True,
                        help="Build variant task is being executed on.")
    parser.add_argument("--evg-alias", dest="evg_alias", required=True,
                        help="Evergreen alias used to trigger build.")
    parser.add_argument("--timeout", dest="timeout", type=int, help="Timeout to use (in sec).")
    parser.add_argument("--exec-timeout", dest="exec_timeout", type=int,
-                        help="Exec timeout ot use (in sec).")
+                        help="Exec timeout to use (in sec).")
+    parser.add_argument("--exec-timeout-factor", dest="exec_timeout_factor", type=float,
+                        help="Exec timeout factor to use (in sec).")
    parser.add_argument("--out-file", dest="outfile", help="File to write configuration to.")
+    parser.add_argument("--timeout-overrides", dest="timeout_overrides_file",
+                        default=DEFAULT_TIMEOUT_OVERRIDES,
+                        help="File containing timeout overrides to use.")
+    parser.add_argument("--evg-api-config", dest="evg_api_config",
+                        default=DEFAULT_EVERGREEN_AUTH_CONFIG, help="Evergreen API config file.")
+    parser.add_argument("--evg-project-config", dest="evg_project_config",
+                        default=DEFAULT_EVERGREEN_CONFIG, help="Evergreen project config file.")

    options = parser.parse_args()

+    end_date = datetime.now()
+    start_date = end_date - HISTORY_LOOKBACK
+
    timeout_override = timedelta(seconds=options.timeout) if options.timeout else None
    exec_timeout_override = timedelta(
        seconds=options.exec_timeout) if options.exec_timeout else None
-    task_timeout = determine_timeout(options.task, options.variant, timeout_override,
-                                     exec_timeout_override, options.evg_alias)
-    output_timeout(task_timeout, options.outfile)
+
+    task_name = determine_task_base_name(options.task, options.variant)
+    timeout_overrides = TimeoutOverrides.from_yaml_file(
+        os.path.expanduser(options.timeout_overrides_file))
+
+    enable_logging(verbose=False)
+
+    def dependencies(binder: inject.Binder) -> None:
+        binder.bind(
+            EvergreenApi,
+            RetryingEvergreenApi.get_api(config_file=os.path.expanduser(options.evg_api_config)))
+        binder.bind(TimeoutSettings, TimeoutSettings(start_date=start_date, end_date=end_date))
+        binder.bind(TimeoutOverrides, timeout_overrides)
+        binder.bind(EvergreenProjectConfig,
+                    parse_evergreen_file(os.path.expanduser(options.evg_project_config)))
+
+    inject.configure(dependencies)
+
+    task_timeout_orchestrator = inject.instance(TaskTimeoutOrchestrator)
+    task_timeout_orchestrator.determine_timeouts(
+        timeout_override, exec_timeout_override, options.outfile, task_name, options.variant,
+        options.evg_alias, options.suite_name, options.exec_timeout_factor)


 if __name__ == "__main__":
--- a/buildscripts/task_generation/suite_split.py
+++ b/buildscripts/task_generation/suite_split.py
@ -13,7 +13,7 @@ from evergreen import EvergreenApi

 from buildscripts.task_generation.resmoke_proxy import ResmokeProxyService
 from buildscripts.task_generation.suite_split_strategies import SplitStrategy, FallbackStrategy
-from buildscripts.task_generation.timeout import TimeoutEstimate
+from buildscripts.timeouts.timeout import TimeoutEstimate
 from buildscripts.util import taskname
 from buildscripts.util.teststats import HistoricTaskData, TestRuntime, normalize_test_name

--- a/buildscripts/task_generation/task_types/resmoke_tasks.py
+++ b/buildscripts/task_generation/task_types/resmoke_tasks.py
@ -14,7 +14,7 @@ from buildscripts.task_generation.task_types.gentask_options import GenTaskOptio
 from buildscripts.task_generation.task_types.models.resmoke_task_model import ResmokeTask
 from buildscripts.task_generation.task_types.multiversion_decorator import MultiversionGenTaskDecorator, \
    MultiversionDecoratorParams
-from buildscripts.task_generation.timeout import TimeoutEstimate
+from buildscripts.timeouts.timeout import TimeoutEstimate

 LOGGER = structlog.getLogger(__name__)

--- a/buildscripts/tests/test_evergreen_task_timeout.py
+++ b/buildscripts/tests/test_evergreen_task_timeout.py
@ -1,46 +1,213 @@
 """Unit tests for the evergreen_task_timeout script."""
-from datetime import timedelta
 import unittest
+from datetime import timedelta
+from unittest.mock import MagicMock

 import buildscripts.evergreen_task_timeout as under_test
+from buildscripts.ciconfig.evergreen import EvergreenProjectConfig
+from buildscripts.timeouts.timeout_service import TimeoutService

-# pylint: disable=missing-docstring,no-self-use
+# pylint: disable=missing-docstring,no-self-use,invalid-name,protected-access


-class DetermineTimeoutTest(unittest.TestCase):
+class TestTimeoutOverride(unittest.TestCase):
+    def test_exec_timeout_should_be_settable(self):
+        timeout_override = under_test.TimeoutOverride(task="my task", exec_timeout=42)
+
+        timeout = timeout_override.get_exec_timeout()
+
+        self.assertIsNotNone(timeout)
+        self.assertEqual(42 * 60, timeout.total_seconds())
+
+    def test_exec_timeout_should_default_to_none(self):
+        timeout_override = under_test.TimeoutOverride(task="my task")
+
+        timeout = timeout_override.get_exec_timeout()
+
+        self.assertIsNone(timeout)
+
+    def test_idle_timeout_should_be_settable(self):
+        timeout_override = under_test.TimeoutOverride(task="my task", idle_timeout=42)
+
+        timeout = timeout_override.get_idle_timeout()
+
+        self.assertIsNotNone(timeout)
+        self.assertEqual(42 * 60, timeout.total_seconds())
+
+    def test_idle_timeout_should_default_to_none(self):
+        timeout_override = under_test.TimeoutOverride(task="my task")
+
+        timeout = timeout_override.get_idle_timeout()
+
+        self.assertIsNone(timeout)
+
+
+class TestTimeoutOverrides(unittest.TestCase):
+    def test_looking_up_a_non_existing_override_should_return_none(self):
+        timeout_overrides = under_test.TimeoutOverrides(overrides={})
+
+        self.assertIsNone(timeout_overrides.lookup_exec_override("bv", "task"))
+        self.assertIsNone(timeout_overrides.lookup_idle_override("bv", "task"))
+
+    def test_looking_up_a_duplicate_override_should_raise_error(self):
+        timeout_overrides = under_test.TimeoutOverrides(
+            overrides={
+                "bv": [{
+                    "task": "task_name",
+                    "exec_timeout": 42,
+                    "idle_timeout": 10,
+                }, {
+                    "task": "task_name",
+                    "exec_timeout": 314,
+                    "idle_timeout": 20,
+                }]
+            })
+
+        with self.assertRaises(ValueError):
+            self.assertIsNone(timeout_overrides.lookup_exec_override("bv", "task_name"))
+
+        with self.assertRaises(ValueError):
+            self.assertIsNone(timeout_overrides.lookup_idle_override("bv", "task_name"))
+
+    def test_looking_up_an_exec_override_should_work(self):
+        timeout_overrides = under_test.TimeoutOverrides(
+            overrides={
+                "bv": [
+                    {
+                        "task": "another_task",
+                        "exec_timeout": 314,
+                        "idle_timeout": 20,
+                    },
+                    {
+                        "task": "task_name",
+                        "exec_timeout": 42,
+                    },
+                ]
+            })
+
+        self.assertEqual(42 * 60,
+                         timeout_overrides.lookup_exec_override("bv", "task_name").total_seconds())
+
+    def test_looking_up_an_idle_override_should_work(self):
+        timeout_overrides = under_test.TimeoutOverrides(
+            overrides={
+                "bv": [
+                    {
+                        "task": "another_task",
+                        "exec_timeout": 314,
+                        "idle_timeout": 20,
+                    },
+                    {
+                        "task": "task_name",
+                        "idle_timeout": 10,
+                    },
+                ]
+            })
+
+        self.assertEqual(10 * 60,
+                         timeout_overrides.lookup_idle_override("bv", "task_name").total_seconds())
+
+
+class TestDetermineExecTimeout(unittest.TestCase):
    def test_timeout_used_if_specified(self):
+        mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
+        orchestrator = under_test.TaskTimeoutOrchestrator(
+            timeout_service=MagicMock(spec_set=TimeoutService),
+            timeout_overrides=mock_timeout_overrides,
+            evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
        timeout = timedelta(seconds=42)
        self.assertEqual(
-            under_test.determine_timeout("task_name", "variant", None, timeout), timeout)
+            orchestrator.determine_exec_timeout("task_name", "variant", None, timeout), timeout)

    def test_default_is_returned_with_no_timeout(self):
+        mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
+        orchestrator = under_test.TaskTimeoutOrchestrator(
+            timeout_service=MagicMock(spec_set=TimeoutService),
+            timeout_overrides=mock_timeout_overrides,
+            evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
        self.assertEqual(
-            under_test.determine_timeout("task_name", "variant"),
+            orchestrator.determine_exec_timeout("task_name", "variant"),
            under_test.DEFAULT_NON_REQUIRED_BUILD_TIMEOUT)

    def test_default_is_returned_with_timeout_at_zero(self):
+        mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
+        orchestrator = under_test.TaskTimeoutOrchestrator(
+            timeout_service=MagicMock(spec_set=TimeoutService),
+            timeout_overrides=mock_timeout_overrides,
+            evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
        self.assertEqual(
-            under_test.determine_timeout("task_name", "variant", timedelta(seconds=0)),
+            orchestrator.determine_exec_timeout("task_name", "variant", timedelta(seconds=0)),
            under_test.DEFAULT_NON_REQUIRED_BUILD_TIMEOUT)

    def test_default_required_returned_on_required_variants(self):
+        mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
+        orchestrator = under_test.TaskTimeoutOrchestrator(
+            timeout_service=MagicMock(spec_set=TimeoutService),
+            timeout_overrides=mock_timeout_overrides,
+            evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
        self.assertEqual(
-            under_test.determine_timeout("task_name", "variant-required"),
+            orchestrator.determine_exec_timeout("task_name", "variant-required"),
            under_test.DEFAULT_REQUIRED_BUILD_TIMEOUT)

    def test_task_specific_timeout(self):
+        mock_timeout_overrides = under_test.TimeoutOverrides(
+            overrides={"linux-64-debug": [{"task": "auth", "exec_timeout": 60}]})
+        orchestrator = under_test.TaskTimeoutOrchestrator(
+            timeout_service=MagicMock(spec_set=TimeoutService),
+            timeout_overrides=mock_timeout_overrides,
+            evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
        self.assertEqual(
-            under_test.determine_timeout("auth", "linux-64-debug"), timedelta(minutes=60))
+            orchestrator.determine_exec_timeout("auth", "linux-64-debug"), timedelta(minutes=60))

    def test_commit_queue_items_use_commit_queue_timeout(self):
-        timeout = under_test.determine_timeout("auth", "variant",
-                                               evg_alias=under_test.COMMIT_QUEUE_ALIAS)
+        mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
+        orchestrator = under_test.TaskTimeoutOrchestrator(
+            timeout_service=MagicMock(spec_set=TimeoutService),
+            timeout_overrides=mock_timeout_overrides,
+            evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
+        timeout = orchestrator.determine_exec_timeout("auth", "variant",
+                                                      evg_alias=under_test.COMMIT_QUEUE_ALIAS)
        self.assertEqual(timeout, under_test.COMMIT_QUEUE_TIMEOUT)

    def test_use_idle_timeout_if_greater_than_exec_timeout(self):
+        mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
+        orchestrator = under_test.TaskTimeoutOrchestrator(
+            timeout_service=MagicMock(spec_set=TimeoutService),
+            timeout_overrides=mock_timeout_overrides,
+            evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
        idle_timeout = timedelta(hours=2)
        exec_timeout = timedelta(minutes=10)
-        timeout = under_test.determine_timeout("task_name", "variant", idle_timeout=idle_timeout,
-                                               exec_timeout=exec_timeout)
+        timeout = orchestrator.determine_exec_timeout(
+            "task_name", "variant", idle_timeout=idle_timeout, exec_timeout=exec_timeout)

        self.assertEqual(timeout, idle_timeout)
+
+
+class TestDetermineIdleTimeout(unittest.TestCase):
+    def test_timeout_used_if_specified(self):
+        mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
+        orchestrator = under_test.TaskTimeoutOrchestrator(
+            timeout_service=MagicMock(spec_set=TimeoutService),
+            timeout_overrides=mock_timeout_overrides,
+            evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
+        timeout = timedelta(seconds=42)
+        self.assertEqual(
+            orchestrator.determine_idle_timeout("task_name", "variant", timeout), timeout)
+
+    def test_default_is_returned_with_no_timeout(self):
+        mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
+        orchestrator = under_test.TaskTimeoutOrchestrator(
+            timeout_service=MagicMock(spec_set=TimeoutService),
+            timeout_overrides=mock_timeout_overrides,
+            evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
+        self.assertIsNone(orchestrator.determine_idle_timeout("task_name", "variant"))
+
+    def test_task_specific_timeout(self):
+        mock_timeout_overrides = under_test.TimeoutOverrides(
+            overrides={"linux-64-debug": [{"task": "auth", "idle_timeout": 60}]})
+        orchestrator = under_test.TaskTimeoutOrchestrator(
+            timeout_service=MagicMock(spec_set=TimeoutService),
+            timeout_overrides=mock_timeout_overrides,
+            evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
+        self.assertEqual(
+            orchestrator.determine_idle_timeout("auth", "linux-64-debug"), timedelta(minutes=60))
--- a/buildscripts/tests/timeouts/init.py
+++ b/buildscripts/tests/timeouts/init.py
@ -0,0 +1 @@
+"""Empty."""
--- a/buildscripts/tests/task_generation/test_timeout.py
+++ b/buildscripts/tests/task_generation/test_timeout.py
@ -1,7 +1,7 @@
 """Unit tests for timeout.py."""
 import unittest

-from buildscripts.task_generation import timeout as under_test
+from buildscripts.timeouts import timeout as under_test

 # pylint: disable=missing-docstring,invalid-name,unused-argument,no-self-use,protected-access,no-value-for-parameter

--- a/buildscripts/tests/timeouts/test_timeout_service.py
+++ b/buildscripts/tests/timeouts/test_timeout_service.py
@ -0,0 +1,258 @@
+"""Unit tests for timeout_service.py."""
+import random
+import unittest
+from datetime import datetime, timedelta
+from unittest.mock import MagicMock
+
+from requests.exceptions import HTTPError
+from evergreen import EvergreenApi
+
+import buildscripts.timeouts.timeout_service as under_test
+from buildscripts.task_generation.resmoke_proxy import ResmokeProxyService
+from buildscripts.util.teststats import HistoricTaskData
+
+# pylint: disable=missing-docstring,no-self-use,invalid-name,protected-access
+
+
+def build_mock_service(evg_api=None, resmoke_proxy=None):
+    end_date = datetime.now()
+    start_date = end_date - timedelta(weeks=2)
+    timeout_settings = under_test.TimeoutSettings(
+        end_date=end_date,
+        start_date=start_date,
+    )
+    return under_test.TimeoutService(
+        evg_api=evg_api if evg_api else MagicMock(spec_set=EvergreenApi),
+        resmoke_proxy=resmoke_proxy if resmoke_proxy else MagicMock(spec_set=ResmokeProxyService),
+        timeout_settings=timeout_settings)
+
+
+def tst_stat_mock(file, duration, pass_count):
+    return MagicMock(test_file=file, avg_duration_pass=duration, num_pass=pass_count)
+
+
+class TestGetTimeoutEstimate(unittest.TestCase):
+    def test_no_stats_should_return_default_timeout(self):
+        mock_evg_api = MagicMock(spec_set=EvergreenApi)
+        mock_evg_api.test_stats_by_project.return_value = []
+        timeout_service = build_mock_service(evg_api=mock_evg_api)
+        timeout_params = under_test.TimeoutParams(
+            evg_project="my project",
+            build_variant="bv",
+            task_name="my task",
+            suite_name="my suite",
+            is_asan=False,
+        )
+
+        timeout = timeout_service.get_timeout_estimate(timeout_params)
+
+        self.assertFalse(timeout.is_specified())
+
+    def test_a_test_with_missing_history_should_cause_a_default_timeout(self):
+        mock_evg_api = MagicMock(spec_set=EvergreenApi)
+        test_stats = [tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(30)]
+        mock_evg_api.test_stats_by_project.return_value = test_stats
+        mock_resmoke_proxy = MagicMock(spec_set=ResmokeProxyService)
+        mock_resmoke_proxy.list_tests.return_value = ["test_with_no_stats.js"]
+        timeout_service = build_mock_service(evg_api=mock_evg_api, resmoke_proxy=mock_resmoke_proxy)
+        timeout_params = under_test.TimeoutParams(
+            evg_project="my project",
+            build_variant="bv",
+            task_name="my task",
+            suite_name="my suite",
+            is_asan=False,
+        )
+
+        timeout = timeout_service.get_timeout_estimate(timeout_params)
+
+        self.assertFalse(timeout.is_specified())
+
+    def test_a_test_with_zero_runtime_history_should_cause_a_default_timeout(self):
+        mock_evg_api = MagicMock(spec_set=EvergreenApi)
+        test_stats = [tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(30)]
+        test_stats.append(tst_stat_mock("zero.js", 0.0, 1))
+        mock_evg_api.test_stats_by_project.return_value = test_stats
+        mock_resmoke_proxy = MagicMock(spec_set=ResmokeProxyService)
+        mock_resmoke_proxy.list_tests.return_value = [ts.test_file for ts in test_stats]
+        timeout_service = build_mock_service(evg_api=mock_evg_api, resmoke_proxy=mock_resmoke_proxy)
+        timeout_params = under_test.TimeoutParams(
+            evg_project="my project",
+            build_variant="bv",
+            task_name="my task",
+            suite_name="my suite",
+            is_asan=False,
+        )
+
+        timeout = timeout_service.get_timeout_estimate(timeout_params)
+
+        self.assertFalse(timeout.is_specified())
+
+    def test_all_tests_with_runtime_history_should_use_custom_timeout(self):
+        mock_evg_api = MagicMock(spec_set=EvergreenApi)
+        n_tests = 30
+        test_runtime = 600
+        test_stats = [tst_stat_mock(f"test_{i}.js", test_runtime, 1) for i in range(n_tests)]
+        mock_evg_api.test_stats_by_project.return_value = test_stats
+        mock_resmoke_proxy = MagicMock(spec_set=ResmokeProxyService)
+        mock_resmoke_proxy.list_tests.return_value = [ts.test_file for ts in test_stats]
+        timeout_service = build_mock_service(evg_api=mock_evg_api, resmoke_proxy=mock_resmoke_proxy)
+        timeout_params = under_test.TimeoutParams(
+            evg_project="my project",
+            build_variant="bv",
+            task_name="my task",
+            suite_name="my suite",
+            is_asan=False,
+        )
+
+        timeout = timeout_service.get_timeout_estimate(timeout_params)
+
+        self.assertTrue(timeout.is_specified())
+        self.assertEqual(1860, timeout.calculate_test_timeout(1))
+        self.assertEqual(54180, timeout.calculate_task_timeout(1))
+
+
+class TestGetTaskHookOverhead(unittest.TestCase):
+    def test_no_stats_should_return_zero(self):
+        timeout_service = build_mock_service()
+
+        overhead = timeout_service.get_task_hook_overhead("suite", is_asan=False, test_count=30,
+                                                          historic_stats=None)
+
+        self.assertEqual(0.0, overhead)
+
+    def test_stats_with_no_clean_every_n_should_return_zero(self):
+        timeout_service = build_mock_service()
+        test_stats = HistoricTaskData.from_stats_list(
+            [tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(30)])
+
+        overhead = timeout_service.get_task_hook_overhead("suite", is_asan=False, test_count=30,
+                                                          historic_stats=test_stats)
+
+        self.assertEqual(0.0, overhead)
+
+    def test_stats_with_clean_every_n_should_return_overhead(self):
+        test_count = 30
+        runtime = 25
+        timeout_service = build_mock_service()
+        test_stat_list = [tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(test_count)]
+        test_stat_list.extend([
+            tst_stat_mock(f"test_{i}:{under_test.CLEAN_EVERY_N_HOOK}", runtime, 1)
+            for i in range(10)
+        ])
+        random.shuffle(test_stat_list)
+        test_stats = HistoricTaskData.from_stats_list(test_stat_list)
+
+        overhead = timeout_service.get_task_hook_overhead(
+            "suite", is_asan=True, test_count=test_count, historic_stats=test_stats)
+
+        self.assertEqual(runtime * test_count, overhead)
+
+
+class TestLookupHistoricStats(unittest.TestCase):
+    def test_no_stats_from_evergreen_should_return_none(self):
+        mock_evg_api = MagicMock(spec_set=EvergreenApi)
+        mock_evg_api.test_stats_by_project.return_value = []
+        timeout_service = build_mock_service(evg_api=mock_evg_api)
+        timeout_params = under_test.TimeoutParams(
+            evg_project="my project",
+            build_variant="bv",
+            task_name="my task",
+            suite_name="my suite",
+            is_asan=False,
+        )
+
+        stats = timeout_service.lookup_historic_stats(timeout_params)
+
+        self.assertIsNone(stats)
+
+    def test_errors_from_evergreen_should_return_none(self):
+        mock_evg_api = MagicMock(spec_set=EvergreenApi)
+        mock_evg_api.test_stats_by_project.side_effect = HTTPError("failed to connect")
+        timeout_service = build_mock_service(evg_api=mock_evg_api)
+        timeout_params = under_test.TimeoutParams(
+            evg_project="my project",
+            build_variant="bv",
+            task_name="my task",
+            suite_name="my suite",
+            is_asan=False,
+        )
+
+        stats = timeout_service.lookup_historic_stats(timeout_params)
+
+        self.assertIsNone(stats)
+
+    def test_stats_from_evergreen_should_return_the_stats(self):
+        mock_evg_api = MagicMock(spec_set=EvergreenApi)
+        test_stats = [tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(100)]
+        mock_evg_api.test_stats_by_project.return_value = test_stats
+        timeout_service = build_mock_service(evg_api=mock_evg_api)
+        timeout_params = under_test.TimeoutParams(
+            evg_project="my project",
+            build_variant="bv",
+            task_name="my task",
+            suite_name="my suite",
+            is_asan=False,
+        )
+
+        stats = timeout_service.lookup_historic_stats(timeout_params)
+
+        self.assertIsNotNone(stats)
+        self.assertEqual(len(test_stats), len(stats.historic_test_results))
+
+
+class TestGetCleanEveryNCadence(unittest.TestCase):
+    def test_clean_every_n_cadence_on_asan(self):
+        timeout_service = build_mock_service()
+
+        cadence = timeout_service._get_clean_every_n_cadence("suite", True)
+
+        self.assertEqual(1, cadence)
+
+    def test_clean_every_n_cadence_from_hook_config(self):
+        expected_n = 42
+        mock_resmoke_proxy = MagicMock()
+        mock_resmoke_proxy.read_suite_config.return_value = {
+            "executor": {
+                "hooks": [{
+                    "class": "hook1",
+                }, {
+                    "class": under_test.CLEAN_EVERY_N_HOOK,
+                    "n": expected_n,
+                }]
+            }
+        }
+        timeout_service = build_mock_service(resmoke_proxy=mock_resmoke_proxy)
+
+        cadence = timeout_service._get_clean_every_n_cadence("suite", False)
+
+        self.assertEqual(expected_n, cadence)
+
+    def test_clean_every_n_cadence_no_n_in_hook_config(self):
+        mock_resmoke_proxy = MagicMock()
+        mock_resmoke_proxy.read_suite_config.return_value = {
+            "executor": {
+                "hooks": [{
+                    "class": "hook1",
+                }, {
+                    "class": under_test.CLEAN_EVERY_N_HOOK,
+                }]
+            }
+        }
+        timeout_service = build_mock_service(resmoke_proxy=mock_resmoke_proxy)
+
+        cadence = timeout_service._get_clean_every_n_cadence("suite", False)
+
+        self.assertEqual(1, cadence)
+
+    def test_clean_every_n_cadence_no_hook_config(self):
+        mock_resmoke_proxy = MagicMock()
+        mock_resmoke_proxy.read_suite_config.return_value = {
+            "executor": {"hooks": [{
+                "class": "hook1",
+            }, ]}
+        }
+        timeout_service = build_mock_service(resmoke_proxy=mock_resmoke_proxy)
+
+        cadence = timeout_service._get_clean_every_n_cadence("suite", False)
+
+        self.assertEqual(1, cadence)
--- a/buildscripts/tests/util/test_taskname.py
+++ b/buildscripts/tests/util/test_taskname.py
@ -4,7 +4,7 @@ import unittest

 import buildscripts.util.taskname as under_test

-# pylint: disable=missing-docstring,protected-access
+# pylint: disable=missing-docstring,protected-access,invalid-name


 class TestNameTask(unittest.TestCase):
@ -24,3 +24,29 @@ class TestRemoveGenSuffix(unittest.TestCase):
        input_task_name = "sharded_multi_stmt_txn_jscore_passthroug"
        self.assertEqual("sharded_multi_stmt_txn_jscore_passthroug",
                         under_test.remove_gen_suffix(input_task_name))
+
+
+class TestDetermineTaskBaseName(unittest.TestCase):
+    def test_task_name_with_build_variant_should_strip_bv_and_sub_task_index(self):
+        bv = "enterprise-rhel-80-64-bit-dynamic-required"
+        task_name = f"auth_23_{bv}"
+
+        base_task_name = under_test.determine_task_base_name(task_name, bv)
+
+        self.assertEqual("auth", base_task_name)
+
+    def test_task_name_without_build_variant_should_strip_sub_task_index(self):
+        bv = "enterprise-rhel-80-64-bit-dynamic-required"
+        task_name = "auth_314"
+
+        base_task_name = under_test.determine_task_base_name(task_name, bv)
+
+        self.assertEqual("auth", base_task_name)
+
+    def test_task_name_without_build_variant_or_subtask_index_should_self(self):
+        bv = "enterprise-rhel-80-64-bit-dynamic-required"
+        task_name = "auth"
+
+        base_task_name = under_test.determine_task_base_name(task_name, bv)
+
+        self.assertEqual("auth", base_task_name)
--- a/buildscripts/timeouts/init.py
+++ b/buildscripts/timeouts/init.py
@ -0,0 +1 @@
+"""Empty."""
--- a/buildscripts/task_generation/timeout.py
+++ b/buildscripts/task_generation/timeout.py
@ -1,11 +1,10 @@
 """Timeout information for generating tasks."""
 import math
 from datetime import timedelta
-from inspect import getframeinfo, currentframe
+from inspect import currentframe, getframeinfo
 from typing import NamedTuple, Optional

 import structlog
-
 from buildscripts.patch_builds.task_generation import TimeoutInfo

 LOGGER = structlog.getLogger(__name__)
--- a/buildscripts/timeouts/timeout_service.py
+++ b/buildscripts/timeouts/timeout_service.py
@ -0,0 +1,189 @@
+"""Service for determining task timeouts."""
+from datetime import datetime
+from typing import Any, Dict, NamedTuple, Optional
+
+import inject
+import structlog
+from buildscripts.task_generation.resmoke_proxy import ResmokeProxyService
+from buildscripts.timeouts.timeout import TimeoutEstimate
+from buildscripts.util.teststats import HistoricTaskData
+from evergreen import EvergreenApi
+
+LOGGER = structlog.get_logger(__name__)
+CLEAN_EVERY_N_HOOK = "CleanEveryN"
+
+
+class TimeoutParams(NamedTuple):
+    """
+    Parameters about task being run.
+
+    * evg_project: Evergreen project.
+    * build_variant: Evergreen build variant.
+    * task_name: Evergreen task_name.
+    * suite_name: Test Suite being run.
+    * is_asan: Whether this run is part of an asan build.
+    """
+
+    evg_project: str
+    build_variant: str
+    task_name: str
+    suite_name: str
+    is_asan: bool
+
+
+class TimeoutSettings(NamedTuple):
+    """Settings for determining timeouts."""
+
+    start_date: datetime
+    end_date: datetime
+
+
+class TimeoutService:
+    """A service for determining task timeouts."""
+
+    @inject.autoparams()
+    def __init__(self, evg_api: EvergreenApi, resmoke_proxy: ResmokeProxyService,
+                 timeout_settings: TimeoutSettings) -> None:
+        """
+        Initialize the service.
+
+        :param evg_api: Evergreen API client.
+        :param resmoke_proxy: Proxy to query resmoke.
+        :param timeout_settings: Settings for how timeouts are calculated.
+        """
+        self.evg_api = evg_api
+        self.resmoke_proxy = resmoke_proxy
+        self.timeout_settings = timeout_settings
+
+    def get_timeout_estimate(self, timeout_params: TimeoutParams) -> TimeoutEstimate:
+        """
+        Calculate the timeout estimate for the given task based on historic test results.
+
+        :param timeout_params: Details about the task to query.
+        :return: Timeouts to use based on historic test results.
+        """
+        historic_stats = self.lookup_historic_stats(timeout_params)
+        if not historic_stats:
+            return TimeoutEstimate.no_timeouts()
+
+        test_set = set(self.resmoke_proxy.list_tests(timeout_params.suite_name))
+        test_runtimes = [
+            stat for stat in historic_stats.get_tests_runtimes() if stat.test_name in test_set
+        ]
+        test_runtime_set = {test.test_name for test in test_runtimes}
+        for test in test_set:
+            if test not in test_runtime_set:
+                # If we don't have historic runtime information for all the tests, we cannot
+                # reliable determine a timeout, so fallback to a default timeout.
+                LOGGER.warning(
+                    "Could not find historic runtime information for test, using default timeout",
+                    test=test)
+                return TimeoutEstimate.no_timeouts()
+
+        total_runtime = 0.0
+        max_runtime = 0.0
+
+        for runtime in test_runtimes:
+            if runtime.runtime > 0.0:
+                total_runtime += runtime.runtime
+                max_runtime = max(max_runtime, runtime.runtime)
+            else:
+                LOGGER.warning("Found a test with 0 runtime, using default timeouts",
+                               test=runtime.test_name)
+                # We found a test with a runtime of 0, which indicates that it does not have a
+                # proper runtime history, so fall back to a default timeout.
+                return TimeoutEstimate.no_timeouts()
+
+        hook_overhead = self.get_task_hook_overhead(
+            timeout_params.suite_name, timeout_params.is_asan, len(test_set), historic_stats)
+        total_runtime += hook_overhead
+
+        return TimeoutEstimate(max_test_runtime=max_runtime, expected_task_runtime=total_runtime)
+
+    def get_task_hook_overhead(self, suite_name: str, is_asan: bool, test_count: int,
+                               historic_stats: Optional[HistoricTaskData]) -> float:
+        """
+        Add how much overhead task-level hooks each suite should account for.
+
+        Certain test hooks need to be accounted for on the task level instead of the test level
+        in order to calculate accurate timeouts. So we will add details about those hooks to
+        each suite here.
+
+        :param suite_name: Name of suite being generated.
+        :param is_asan: Whether ASAN is being used.
+        :param test_count: Number of tests in sub-suite.
+        :param historic_stats: Historic runtime data of the suite.
+        """
+        # The CleanEveryN hook is run every 'N' tests. The runtime of the
+        # hook will be associated with whichever test happens to be running, which could be
+        # different every run. So we need to take its runtime into account at the task level.
+        if historic_stats is None:
+            return 0.0
+
+        clean_every_n_cadence = self._get_clean_every_n_cadence(suite_name, is_asan)
+        avg_clean_every_n_runtime = historic_stats.get_avg_hook_runtime(CLEAN_EVERY_N_HOOK)
+        LOGGER.debug("task hook overhead", cadence=clean_every_n_cadence,
+                     runtime=avg_clean_every_n_runtime, is_asan=is_asan)
+        if avg_clean_every_n_runtime != 0:
+            n_expected_runs = test_count / clean_every_n_cadence
+            return n_expected_runs * avg_clean_every_n_runtime
+        return 0.0
+
+    def lookup_historic_stats(self, timeout_params: TimeoutParams) -> Optional[HistoricTaskData]:
+        """
+        Lookup historic test results stats for the given task.
+
+        :param timeout_params: Details about the task to lookup.
+        :return: Historic test results if they exist.
+        """
+        try:
+            evg_stats = HistoricTaskData.from_evg(
+                self.evg_api, timeout_params.evg_project, self.timeout_settings.start_date,
+                self.timeout_settings.end_date, timeout_params.task_name,
+                timeout_params.build_variant)
+            if not evg_stats:
+                LOGGER.warning("No historic runtime information available")
+                return None
+            return evg_stats
+        except Exception:  # pylint: disable=broad-except
+            # If we have any trouble getting the historic runtime information, log the issue, but
+            # don't fall back to default timeouts instead of failing.
+            LOGGER.warning("Error querying history runtime information from evergreen",
+                           exc_info=True)
+            return None
+
+    def _get_clean_every_n_cadence(self, suite_name: str, is_asan: bool) -> int:
+        """
+        Get the N value for the CleanEveryN hook.
+
+        :param suite_name: Name of suite being generated.
+        :param is_asan: Whether ASAN is being used.
+        :return: How frequently clean every end is run.
+        """
+        # Default to 1, which is the worst case meaning CleanEveryN would run for every test.
+        clean_every_n_cadence = 1
+        if is_asan:
+            # ASAN runs hard-code N to 1. See `resmokelib/testing/hooks/cleanup.py`.
+            return clean_every_n_cadence
+
+        clean_every_n_config = self._get_hook_config(suite_name, CLEAN_EVERY_N_HOOK)
+        if clean_every_n_config:
+            clean_every_n_cadence = clean_every_n_config.get("n", 1)
+
+        return clean_every_n_cadence
+
+    def _get_hook_config(self, suite_name: str, hook_name: str) -> Optional[Dict[str, Any]]:
+        """
+        Get the configuration for the given hook.
+
+        :param hook_name: Name of hook to query.
+        :return: Configuration for hook, if it exists.
+        """
+        hooks_config = self.resmoke_proxy.read_suite_config(suite_name).get("executor",
+                                                                            {}).get("hooks")
+        if hooks_config:
+            for hook in hooks_config:
+                if hook.get("class") == hook_name:
+                    return hook
+
+        return None
--- a/buildscripts/util/taskname.py
+++ b/buildscripts/util/taskname.py
@ -1,6 +1,7 @@
 """Functions for working with resmoke task names."""

 import math
+import re

 GEN_SUFFIX = "_gen"

@ -36,3 +37,25 @@ def remove_gen_suffix(task_name: str) -> str:
    if task_name.endswith(GEN_SUFFIX):
        return task_name[:-4]
    return task_name
+
+
+def determine_task_base_name(task_name: str, build_variant: str) -> str:
+    """
+    Determine the base name of a task.
+
+    For generated tasks the base name will have the build variant and sub-task index
+    stripped off. For other tasks, it is the unmodified task_name.
+
+    :param task_name: Name of task to get base name of.
+    :param build_variant: Build variant that may be included in task name.
+    :return: Base name of given task.
+    """
+    match = re.match(f"(.*)_([0-9]+|misc)_{build_variant}", task_name)
+    if match:
+        return match.group(1)
+
+    match = re.match(r"(.*)_([0-9]+|misc)", task_name)
+    if match:
+        return match.group(1)
+
+    return task_name
--- a/docs/evergreen-testing/index.md
+++ b/docs/evergreen-testing/index.md
@ -0,0 +1,5 @@
+# Testing in Evergreen
+
+Documentation about how MongoDB is tested in Evergreen.
+
+* [Task Timeouts](task_timeouts.md)
--- a/docs/evergreen-testing/task_timeouts.md
+++ b/docs/evergreen-testing/task_timeouts.md
@ -0,0 +1,35 @@
+# Evergreen Task Timeouts
+
+## Type of timeouts
+
+There are two types of timeouts that [evergreen supports](https://github.com/evergreen-ci/evergreen/wiki/Project-Commands#timeoutupdate):
+
+* **Exec timeout**: The _exec_ timeout is the overall timeout for a task. Once the total runtime for
+a test hits this value, the timeout logic will be triggered. This value is specified by
+**exec_timeout_secs** in the evergreen configuration.
+* **Idle timeout**: The _idle_ timeout is the amount of time in which evergreen will wait for
+output to be created before it considers the task hung and triggers timeout logic. This value
+is specified by **timeout_secs** in the evergreen configuration.
+
+**Note**: In most cases, **exec_timeout** is usually the more useful of the timeouts.
+
+## Setting the timeout for a task
+
+There are a few ways in which the timeout can be determined for a task running in evergreen.
+
+* **Specified in 'etc/evergreen.yml'**: Timeout can be specified directly in the 'evergreen.yml' file,
+both on tasks and build variants. This can be useful for setting default timeout values, but is limited
+since different build variants frequently have different runtime characteristics and it is not possible
+to set timeouts for a task running on a specific build variant.
+
+* **etc/evergreen_timeouts.yml**: The 'etc/evergreen_timeouts.yml' file for overriding timeouts
+for specific tasks on specific build variants. This provides a work-around for the limitations of
+specifying the timeouts directly in the 'evergreen.yml'. In order to use this method, the task
+must run the "determine task timeout" and "update task timeout expansions" functions at the beginning
+of the task evergreen definition. Most resmoke tasks already do this.
+
+* **buildscripts/evergreen_task_timeout.py**: This is the script that reads the 'etc/evergreen_timeouts.yml'
+file and calculates the timeout to use. Additionally, it will check the historic test results of the
+task being run and see if there is enough information to calculate timeouts based on that. It can
+also be used for more advanced ways of determining timeouts (e.g. the script is used to set much
+more aggressive timeouts on tasks that are run in the commit-queue).
--- a/etc/evergreen.yml
+++ b/etc/evergreen.yml
@ -1218,6 +1218,9 @@ functions:
  - *update_resmoke_jobs_expansions
  - *f_expansions_write
  - *configure_evergreen_api_credentials
+  - *determine_task_timeout
+  - *update_task_timeout_expansions
+  - *f_expansions_write
  - command: subprocess.exec
    params:
      binary: bash
@ -1243,6 +1246,7 @@ functions:

  "run tests":
    - *f_expansions_write
+    - *configure_evergreen_api_credentials
    - *determine_task_timeout
    - *update_task_timeout_expansions
    - *f_expansions_write
@ -2246,7 +2250,9 @@ tasks:
          - "./build/**.gcno"
          - "./etc/*san.suppressions"
          - "./etc/backports_required_for_multiversion_tests.yml"
+          - "./etc/evergreen_timeouts.yml"
          - "./etc/expansions.default.yml"
+          - "./etc/evergreen.yml"
          - "./etc/pip/**"
          - "./etc/repo_config.yaml"
          - "./etc/scons/**"
--- a/etc/evergreen_timeouts.yml
+++ b/etc/evergreen_timeouts.yml
@ -0,0 +1,93 @@
+# This file defines timeouts in evergreen that will override the default timeouts.
+#
+# Each key under `overrides` provides the build variant where the override will occur. The
+# override should include the `task` that should have its timeout overridden and either the
+# `exec_timeout` to override or the `idle_timeout` to override.
+#
+# The timeouts should be specified in minutes.
+
+# Note: In order to make it easier to find existing entries, please try to keep the build variants
+# in alphabetical order.
+
+overrides:
+  enterprise-macos:
+  - task: replica_sets_jscore_passthrough
+    exec_timeout: 150  # 2.5 hours
+
+  enterprise-rhel-80-64-bit-coverage:
+  - task: replica_sets_jscore_passthrough
+    exec_timeout: 150  # 2.5 hours.
+
+  enterprise-ubuntu2004-debug-tsan:
+  - task: run_unittests
+    exec_timeout: 24
+
+  enterprise-windows:
+  - task: replica_sets_jscore_passthrough
+    exec_timeout: 180  # 3 hours.
+
+  enterprise-windows-all-feature-flags-suggested:
+  - task: replica_sets_jscore_passthrough
+    exec_timeout: 180  # 3 hours.
+  - task: replica_sets_update_v1_oplog_jscore_passthrough
+    exec_timeout: 150  # 2.5 hours.
+
+  enterprise-windows-inmem:
+  - task: replica_sets_jscore_passthrough
+    exec_timeout: 180  # 3 hours.
+
+  enterprise-windows-required:
+  - task: replica_sets_jscore_passthrough
+    exec_timeout: 180  # 3 hours.
+  - task: replica_sets_update_v1_oplog_jscore_passthrough
+    exec_timeout: 150  # 2.5 hours.
+
+  linux-64-debug:
+  - task: auth
+    exec_timeout: 60  # 1 hour.
+
+  linux-64-debug-repeated-execution:
+  - task: run_unittests
+    exec_timeout: 120  # 2 hours.
+
+  macos:
+  - task: replica_sets_jscore_passthrough
+    exec_timeout: 150  # 2.5 hours
+
+  ubuntu1804-asan:
+  - task: run_unittests
+    exec_timeout: 24
+
+  ubuntu1804-debug-asan:
+  - task: run_unittests
+    exec_timeout: 24
+
+  ubuntu1804-debug-aubsan-lite:
+  - task: run_unittests
+    exec_timeout: 24
+
+  ubuntu1804-debug-ubsan:
+  - task: run_unittests
+    exec_timeout: 24
+
+  ubuntu1804-debug-suggested:
+  - task: replica_sets_jscore_passthrough
+    exec_timeout: 180  # 3 hours.
+
+  ubuntu1804-ubsan:
+  - task: run_unittests
+    exec_timeout: 24
+
+  windows:
+  - task: replica_sets
+    exec_timeout: 180  # 3 hours.
+  - task: replica_sets_jscore_passthrough
+    exec_timeout: 150  # 2.5 hours.
+
+  windows-debug-suggested:
+  - task: replica_sets_initsync_jscore_passthrough
+    exec_timeout: 150  # 2.5 hours.
+  - task: replica_sets_jscore_passthrough
+    exec_timeout: 180  # 3 hours.
+  - task: replica_sets_update_v1_oplog_jscore_passthrough
+    exec_timeout: 150  # 2.5 hours.
--- a/evergreen/functions/task_timeout_determine.sh
+++ b/evergreen/functions/task_timeout_determine.sh
@ -5,11 +5,25 @@ cd src

 set -o verbose
 set -o errexit
+
+# Set the suite name to be the task name by default; unless overridden with the `suite` expansion.
+suite_name=${task_name}
+if [[ -n ${suite} ]]; then
+  suite_name=${suite}
+fi
+
+timeout_factor=""
+if [[ -n "${exec_timeout_factor}" ]]; then
+  timeout_factor="--exec-timeout-factor ${exec_timeout_factor}"
+fi
+
 activate_venv
-$python buildscripts/evergreen_task_timeout.py \
+PATH=$PATH:$HOME:/ $python buildscripts/evergreen_task_timeout.py $timeout_factor \
  --task-name ${task_name} \
+  --suite-name ${suite_name} \
  --build-variant ${build_variant} \
  --evg-alias '${alias}' \
  --timeout ${timeout_secs} \
  --exec-timeout ${exec_timeout_secs} \
+  --evg-api-config ./.evergreen.yml \
  --out-file task_timeout_expansions.yml