mirror of https://github.com/mongodb/mongo
SERVER-56478 Run powercycle with extended host lifetime for easier debugging
This commit is contained in:
parent
51a860a764
commit
985e4fbf29
|
|
@ -0,0 +1,110 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Generate multiple powercycle tasks to run in evergreen."""
|
||||
from collections import namedtuple
|
||||
from typing import Any, List, Tuple, Set
|
||||
|
||||
import click
|
||||
from shrub.v2 import BuildVariant, FunctionCall, ShrubProject, Task, TaskDependency
|
||||
from shrub.v2.command import BuiltInCommand
|
||||
|
||||
from buildscripts.util.fileops import write_file
|
||||
from buildscripts.util.read_config import read_config_file
|
||||
from buildscripts.util.taskname import name_generated_task
|
||||
|
||||
Config = namedtuple("config", [
|
||||
"task_names",
|
||||
"num_tasks",
|
||||
"timeout_params",
|
||||
"remote_credentials_vars",
|
||||
"set_up_ec2_instance_vars",
|
||||
"run_powercycle_vars",
|
||||
"build_variant",
|
||||
"distro",
|
||||
])
|
||||
|
||||
|
||||
def make_config(expansions_file: Any) -> Config:
|
||||
"""Group expansions into config."""
|
||||
expansions = read_config_file(expansions_file)
|
||||
task_names = expansions.get("task_names", "powercycle_smoke_skip_compile")
|
||||
# Avoid duplicated task names
|
||||
task_names = {task_name for task_name in task_names.split(" ")}
|
||||
num_tasks = int(expansions.get("num_tasks", 10))
|
||||
timeout_params = {
|
||||
"exec_timeout_secs": int(expansions.get("exec_timeout_secs", 7200)),
|
||||
"timeout_secs": int(expansions.get("timeout_secs", 1800)),
|
||||
}
|
||||
remote_credentials_vars = {
|
||||
"private_key_file": "src/powercycle.pem",
|
||||
"private_key_remote": "${__project_aws_ssh_key_value}",
|
||||
}
|
||||
set_up_ec2_instance_vars = {
|
||||
"set_up_retry_count": int(expansions.get("set_up_retry_count", 2)),
|
||||
}
|
||||
run_powercycle_vars = {
|
||||
"run_powercycle_args": expansions.get("run_powercycle_args"),
|
||||
}
|
||||
build_variant = expansions.get("build_variant")
|
||||
distro = expansions.get("distro_id")
|
||||
|
||||
return Config(task_names, num_tasks, timeout_params, remote_credentials_vars,
|
||||
set_up_ec2_instance_vars, run_powercycle_vars, build_variant, distro)
|
||||
|
||||
|
||||
def get_setup_commands() -> Tuple[List[FunctionCall], Set[TaskDependency]]:
|
||||
"""Return setup commands."""
|
||||
return [
|
||||
FunctionCall("do setup"),
|
||||
], {TaskDependency("archive_dist_test_debug")}
|
||||
|
||||
|
||||
def get_skip_compile_setup_commands() -> Tuple[List[FunctionCall], set]:
|
||||
"""Return skip compile setup commands."""
|
||||
return [
|
||||
FunctionCall("set task expansion macros"),
|
||||
FunctionCall("set up venv"),
|
||||
FunctionCall("upload pip requirements"),
|
||||
FunctionCall("f_expansions_write"),
|
||||
FunctionCall("configure evergreen api credentials"),
|
||||
FunctionCall("get compiled binaries"),
|
||||
], set()
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument("expansions_file", type=str, default="expansions.yml")
|
||||
@click.argument("output_file", type=str, default="powercycle_tasks.json")
|
||||
def main(expansions_file: str = "expansions.yml",
|
||||
output_file: str = "powercycle_tasks.json") -> None:
|
||||
"""Generate multiple powercycle tasks to run in evergreen."""
|
||||
|
||||
config = make_config(expansions_file)
|
||||
build_variant = BuildVariant(config.build_variant)
|
||||
for task_name in config.task_names:
|
||||
if "skip_compile" in task_name:
|
||||
commands, task_dependency = get_skip_compile_setup_commands()
|
||||
else:
|
||||
commands, task_dependency = get_setup_commands()
|
||||
|
||||
commands.extend([
|
||||
FunctionCall("set up remote credentials", config.remote_credentials_vars),
|
||||
BuiltInCommand("timeout.update", config.timeout_params),
|
||||
FunctionCall("set up EC2 instance", config.set_up_ec2_instance_vars),
|
||||
FunctionCall("run powercycle test", config.run_powercycle_vars),
|
||||
])
|
||||
|
||||
build_variant.display_task(
|
||||
task_name, {
|
||||
Task(
|
||||
name_generated_task(task_name, index, config.num_tasks, config.build_variant),
|
||||
commands, task_dependency)
|
||||
for index in range(config.num_tasks)
|
||||
}, distros=[config.distro])
|
||||
|
||||
shrub_project = ShrubProject.empty()
|
||||
shrub_project.add_build_variant(build_variant)
|
||||
|
||||
write_file(output_file, shrub_project.json())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
@ -0,0 +1,119 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Powercycle tasks sentinel.
|
||||
|
||||
Error out when any powercycle task on the same buildvariant runs for more than 2 hours.
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from typing import List
|
||||
|
||||
import click
|
||||
import structlog
|
||||
from evergreen import RetryingEvergreenApi, EvergreenApi
|
||||
|
||||
from buildscripts.util.read_config import read_config_file
|
||||
|
||||
LOGGER = structlog.getLogger(__name__)
|
||||
|
||||
EVERGREEN_HOST = "https://evergreen.mongodb.com"
|
||||
EVERGREEN_CONFIG_LOCATIONS = (
|
||||
# Common for machines in Evergreen
|
||||
os.path.join(os.getcwd(), ".evergreen.yml"),
|
||||
# Common for local machines
|
||||
os.path.expanduser(os.path.join("~", ".evergreen.yml")),
|
||||
)
|
||||
POWERCYCLE_TASK_EXEC_TIMEOUT_SECS = 2 * 60 * 60
|
||||
WATCH_INTERVAL_SECS = 5 * 60
|
||||
|
||||
|
||||
def get_evergreen_api() -> EvergreenApi:
|
||||
"""Return evergreen API."""
|
||||
# Pickup the first config file found in common locations.
|
||||
for file in EVERGREEN_CONFIG_LOCATIONS:
|
||||
if os.path.isfile(file):
|
||||
evg_api = RetryingEvergreenApi.get_api(config_file=file)
|
||||
return evg_api
|
||||
|
||||
LOGGER.error("Evergreen config not found in locations.", locations=EVERGREEN_CONFIG_LOCATIONS)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def watch_tasks(task_ids: List[str], evg_api: EvergreenApi, watch_interval_secs: int) -> List[str]:
|
||||
"""Watch tasks if they run longer than exec timeout."""
|
||||
watch_task_ids = task_ids[:]
|
||||
long_running_task_ids = []
|
||||
|
||||
while watch_task_ids:
|
||||
LOGGER.info("Looking if powercycle tasks are still running on the current buildvariant.")
|
||||
powercycle_tasks = [evg_api.task_by_id(task_id) for task_id in watch_task_ids]
|
||||
for task in powercycle_tasks:
|
||||
if task.finish_time:
|
||||
watch_task_ids.remove(task.task_id)
|
||||
elif task.start_time and (datetime.now(timezone.utc) - task.start_time
|
||||
).total_seconds() > POWERCYCLE_TASK_EXEC_TIMEOUT_SECS:
|
||||
long_running_task_ids.append(task.task_id)
|
||||
watch_task_ids.remove(task.task_id)
|
||||
if watch_task_ids:
|
||||
time.sleep(watch_interval_secs)
|
||||
|
||||
return long_running_task_ids
|
||||
|
||||
|
||||
def get_links(task_ids: List[str]) -> str:
|
||||
"""Return evergreen task urls delimited by newline."""
|
||||
return "\n".join([f"{EVERGREEN_HOST}/task/{task_id}" for task_id in task_ids])
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument("expansions_file", type=str, default="expansions.yml")
|
||||
def main(expansions_file: str = "expansions.yml") -> None:
|
||||
"""Implementation."""
|
||||
|
||||
logging.basicConfig(
|
||||
format="[%(levelname)s] %(message)s",
|
||||
level=logging.INFO,
|
||||
stream=sys.stdout,
|
||||
)
|
||||
structlog.configure(logger_factory=structlog.stdlib.LoggerFactory())
|
||||
|
||||
expansions = read_config_file(expansions_file)
|
||||
build_id = expansions["build_id"]
|
||||
current_task_id = expansions["task_id"]
|
||||
gen_task_name = expansions["gen_task"]
|
||||
|
||||
evg_api = get_evergreen_api()
|
||||
|
||||
build_tasks = evg_api.tasks_by_build(build_id)
|
||||
gen_task_id = [task.task_id for task in build_tasks if gen_task_name in task.task_id][0]
|
||||
gen_task_url = f"{EVERGREEN_HOST}/task/{gen_task_id}"
|
||||
|
||||
while evg_api.task_by_id(gen_task_id).is_active():
|
||||
LOGGER.info(
|
||||
f"Waiting for '{gen_task_name}' task to generate powercycle tasks:\n{gen_task_url}")
|
||||
time.sleep(WATCH_INTERVAL_SECS)
|
||||
|
||||
build_tasks = evg_api.tasks_by_build(build_id)
|
||||
powercycle_task_ids = [
|
||||
task.task_id for task in build_tasks
|
||||
if not task.display_only and task.task_id != current_task_id and task.task_id != gen_task_id
|
||||
and "powercycle" in task.task_id
|
||||
]
|
||||
LOGGER.info(f"Watching powercycle tasks:\n{get_links(powercycle_task_ids)}")
|
||||
|
||||
long_running_task_ids = watch_tasks(powercycle_task_ids, evg_api, WATCH_INTERVAL_SECS)
|
||||
if long_running_task_ids:
|
||||
LOGGER.error(
|
||||
f"Found powercycle tasks that are running for more than {POWERCYCLE_TASK_EXEC_TIMEOUT_SECS} "
|
||||
f"seconds and most likely something is going wrong in those tasks:\n{get_links(long_running_task_ids)}"
|
||||
)
|
||||
LOGGER.error(
|
||||
"Hopefully hosts from the tasks are still in run at the time you are seeing this "
|
||||
"and the Build team is able to check them to diagnose the issue.")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
@ -156,6 +156,9 @@ MongoDB Powercycle Tests. To run a powercycle test locally, use the following st
|
|||
f" config values will be used from '{powercycle_config.POWERCYCLE_TASKS_CONFIG}'."
|
||||
f" [default: '%(default)s']", default="powercycle")
|
||||
|
||||
test_options.add_argument("--sshAccessRetryCount", dest="ssh_access_retry_count",
|
||||
help=argparse.SUPPRESS, type=int, default=5)
|
||||
|
||||
# MongoDB options
|
||||
mongodb_options.add_argument(
|
||||
"--downloadUrl", dest="tarball_url",
|
||||
|
|
|
|||
|
|
@ -25,9 +25,7 @@ class PowercycleCommand(Subcommand):
|
|||
def __init__(self):
|
||||
"""Initialize PowercycleCommand."""
|
||||
self.expansions = yaml.safe_load(open(powercycle_constants.EXPANSIONS_FILE))
|
||||
self.ssh_identity = self._get_ssh_identity()
|
||||
self.ssh_connection_options = \
|
||||
f"{self.ssh_identity} {powercycle_constants.DEFAULT_SSH_CONNECTION_OPTIONS}"
|
||||
self.ssh_connection_options = f"-i powercycle.pem {powercycle_constants.DEFAULT_SSH_CONNECTION_OPTIONS}"
|
||||
self.sudo = "" if self.is_windows() else "sudo"
|
||||
# The username on the Windows image that powercycle uses is currently the default user.
|
||||
self.user = "Administrator" if self.is_windows() else getpass.getuser()
|
||||
|
|
@ -52,14 +50,6 @@ class PowercycleCommand(Subcommand):
|
|||
buff = buff_stdout.decode("utf-8", "replace")
|
||||
return process.poll(), buff
|
||||
|
||||
def _get_ssh_identity(self) -> str:
|
||||
workdir = self.expansions['workdir']
|
||||
if self.is_windows():
|
||||
workdir = workdir.replace("\\", "/")
|
||||
pem_file = '/'.join([workdir, 'src', 'powercycle.pem'])
|
||||
|
||||
return f"-i {pem_file}"
|
||||
|
||||
|
||||
def execute_cmd(cmd, use_file=False):
|
||||
"""Execute command and returns return_code, output from command."""
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ class RemoteOperations(object): # pylint: disable=too-many-instance-attributes
|
|||
|
||||
def __init__( # pylint: disable=too-many-arguments
|
||||
self, user_host, ssh_connection_options=None, ssh_options=None, scp_options=None,
|
||||
shell_binary="/bin/bash", use_shell=False, ignore_ret=False):
|
||||
shell_binary="/bin/bash", use_shell=False, ignore_ret=False, access_retry_count=5):
|
||||
"""Initialize RemoteOperations."""
|
||||
|
||||
self.user_host = user_host
|
||||
|
|
@ -62,6 +62,7 @@ class RemoteOperations(object): # pylint: disable=too-many-instance-attributes
|
|||
self.ignore_ret = ignore_ret
|
||||
self.shell_binary = shell_binary
|
||||
self.use_shell = use_shell
|
||||
self.access_retry_count = access_retry_count
|
||||
# Check if we can remotely access the host.
|
||||
self._access_code, self._access_buff = self._remote_access()
|
||||
|
||||
|
|
@ -99,7 +100,7 @@ class RemoteOperations(object): # pylint: disable=too-many-instance-attributes
|
|||
"""Check if a remote session is possible."""
|
||||
cmd = "ssh {} {} {} date".format(self.ssh_connection_options, self.ssh_options,
|
||||
self.user_host)
|
||||
return self._call_retries(cmd, 5)
|
||||
return self._call_retries(cmd, self.access_retry_count)
|
||||
|
||||
def _perform_operation(self, cmd, retry, retry_count):
|
||||
if retry:
|
||||
|
|
|
|||
|
|
@ -724,13 +724,13 @@ class LocalToRemoteOperations(object):
|
|||
|
||||
def __init__( # pylint: disable=too-many-arguments
|
||||
self, user_host, ssh_connection_options=None, ssh_options=None,
|
||||
shell_binary="/bin/bash", use_shell=False):
|
||||
shell_binary="/bin/bash", use_shell=False, access_retry_count=5):
|
||||
"""Initialize LocalToRemoteOperations."""
|
||||
|
||||
self.remote_op = remote_operations.RemoteOperations(
|
||||
user_host=user_host, ssh_connection_options=ssh_connection_options,
|
||||
ssh_options=ssh_options, shell_binary=shell_binary, use_shell=use_shell,
|
||||
ignore_ret=True)
|
||||
ignore_ret=True, access_retry_count=access_retry_count)
|
||||
|
||||
def shell(self, cmds, remote_dir=None):
|
||||
"""Return tuple (ret, output) from performing remote shell operation."""
|
||||
|
|
@ -1332,7 +1332,7 @@ def main(parser_actions, options): # pylint: disable=too-many-branches,too-many
|
|||
|
||||
LOGGER.info("powercycle invocation: %s", " ".join(sys.argv))
|
||||
|
||||
task_name = options.task_name
|
||||
task_name = re.sub(r"(_[0-9]+)(_[\w-]+)?$", "", options.task_name)
|
||||
task_config = powercycle_config.get_task_config(task_name, options.remote_operation)
|
||||
|
||||
LOGGER.info("powercycle task config: %s", task_config)
|
||||
|
|
@ -1450,9 +1450,9 @@ def main(parser_actions, options): # pylint: disable=too-many-branches,too-many
|
|||
ssh_options = "" if _IS_WINDOWS else "-tt"
|
||||
|
||||
# Instantiate the local handler object.
|
||||
local_ops = LocalToRemoteOperations(user_host=ssh_user_host,
|
||||
ssh_connection_options=ssh_connection_options,
|
||||
ssh_options=ssh_options, use_shell=True)
|
||||
local_ops = LocalToRemoteOperations(
|
||||
user_host=ssh_user_host, ssh_connection_options=ssh_connection_options,
|
||||
ssh_options=ssh_options, use_shell=True, access_retry_count=options.ssh_access_retry_count)
|
||||
verify_remote_access(local_ops)
|
||||
|
||||
# Pass client_args to the remote script invocation.
|
||||
|
|
@ -1662,7 +1662,8 @@ def main(parser_actions, options): # pylint: disable=too-many-branches,too-many
|
|||
# Reestablish remote access after crash.
|
||||
local_ops = LocalToRemoteOperations(user_host=ssh_user_host,
|
||||
ssh_connection_options=ssh_connection_options,
|
||||
ssh_options=ssh_options, use_shell=True)
|
||||
ssh_options=ssh_options, use_shell=True,
|
||||
access_retry_count=options.ssh_access_retry_count)
|
||||
verify_remote_access(local_ops)
|
||||
ret, output = call_remote_operation(local_ops, remote_python, script_name, client_args,
|
||||
"--remoteOperation noop")
|
||||
|
|
|
|||
|
|
@ -15,6 +15,9 @@ class SetUpEC2Instance(PowercycleCommand):
|
|||
def execute(self) -> None: # pylint: disable=too-many-instance-attributes, too-many-locals, too-many-statements
|
||||
""":return: None."""
|
||||
|
||||
default_retry_count = 2
|
||||
retry_count = int(self.expansions.get("set_up_retry_count", default_retry_count))
|
||||
|
||||
# First operation -
|
||||
# Create remote_dir.
|
||||
group_cmd = f"id -Gn {self.user}"
|
||||
|
|
@ -31,7 +34,7 @@ class SetUpEC2Instance(PowercycleCommand):
|
|||
cmds = f"{self.sudo} mkdir -p {remote_dir}; {self.sudo} chown -R {user_group} {remote_dir}; {set_permission_stmt} {remote_dir}; ls -ld {remote_dir}"
|
||||
cmds = f"{cmds}; {self.sudo} mkdir -p {db_path}; {self.sudo} chown -R {user_group} {db_path}; {set_permission_stmt} {db_path}; ls -ld {db_path}"
|
||||
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmds, retry=True, retry_count=2)
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmds, retry=True, retry_count=retry_count)
|
||||
|
||||
# Second operation -
|
||||
# Copy buildscripts and mongoDB executables to the remote host.
|
||||
|
|
@ -41,7 +44,8 @@ class SetUpEC2Instance(PowercycleCommand):
|
|||
if os.path.isdir(shared_libs):
|
||||
files.append(shared_libs)
|
||||
|
||||
self.remote_op.operation(SSHOperation.COPY_TO, files, remote_dir, retry=True, retry_count=2)
|
||||
self.remote_op.operation(SSHOperation.COPY_TO, files, remote_dir, retry=True,
|
||||
retry_count=retry_count)
|
||||
|
||||
# Third operation -
|
||||
# Set up virtualenv on remote.
|
||||
|
|
@ -57,7 +61,7 @@ class SetUpEC2Instance(PowercycleCommand):
|
|||
cmds = f"{cmds}; . $activate"
|
||||
cmds = f"{cmds}; pip3 install -r $remote_dir/etc/pip/powercycle-requirements.txt"
|
||||
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmds, retry=True, retry_count=2)
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmds, retry=True, retry_count=retry_count)
|
||||
|
||||
# Fourth operation -
|
||||
# Enable core dumps on non-Windows remote hosts.
|
||||
|
|
@ -81,7 +85,7 @@ class SetUpEC2Instance(PowercycleCommand):
|
|||
# https://unix.stackexchange.com/a/349558 in order to ensure the ssh client gets a
|
||||
# response from the remote machine before it restarts.
|
||||
cmds = f"{cmds}; nohup {self.sudo} reboot &>/dev/null & exit"
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmds, retry=True, retry_count=2)
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmds, retry=True, retry_count=retry_count)
|
||||
|
||||
# Fifth operation -
|
||||
# Print the ulimit & kernel.core_pattern
|
||||
|
|
@ -93,7 +97,7 @@ class SetUpEC2Instance(PowercycleCommand):
|
|||
cmds = f"{cmds}; then /sbin/sysctl kernel.core_pattern"
|
||||
cmds = f"{cmds}; fi"
|
||||
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmds, retry=True, retry_count=2)
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmds, retry=True, retry_count=retry_count)
|
||||
|
||||
# Sixth operation -
|
||||
# Set up curator to collect system & process stats on remote.
|
||||
|
|
@ -120,7 +124,7 @@ class SetUpEC2Instance(PowercycleCommand):
|
|||
cmds = f"{cmds}; crontab -l"
|
||||
cmds = f"{cmds}; {{ {self.sudo} $HOME/curator stat system --file {monitor_system_file} > /dev/null 2>&1 & {self.sudo} $HOME/curator stat process-all --file {monitor_proc_file} > /dev/null 2>&1 & }} & disown"
|
||||
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmds, retry=True, retry_count=2)
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmds, retry=True, retry_count=retry_count)
|
||||
|
||||
# Seventh operation -
|
||||
# Install NotMyFault, used to crash Windows.
|
||||
|
|
@ -132,4 +136,4 @@ class SetUpEC2Instance(PowercycleCommand):
|
|||
cmds = f"curl -s -o {windows_crash_zip} {windows_crash_dl}"
|
||||
cmds = f"{cmds}; unzip -q {windows_crash_zip} -d {windows_crash_dir}"
|
||||
cmds = f"{cmds}; chmod +x {windows_crash_dir}/*.exe"
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmds, retry=True, retry_count=2)
|
||||
self.remote_op.operation(SSHOperation.SHELL, cmds, retry=True, retry_count=retry_count)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,51 @@
|
|||
"""Unit tests for powercycle_sentinel.py."""
|
||||
# pylint: disable=missing-docstring
|
||||
import unittest
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from unittest.mock import Mock
|
||||
|
||||
from evergreen import EvergreenApi, Task
|
||||
|
||||
from buildscripts.powercycle_sentinel import watch_tasks, POWERCYCLE_TASK_EXEC_TIMEOUT_SECS
|
||||
|
||||
|
||||
def make_task_mock(evg_api, task_id, start_time, finish_time):
|
||||
return Task({
|
||||
"task_id": task_id,
|
||||
"start_time": start_time,
|
||||
"finish_time": finish_time,
|
||||
}, evg_api)
|
||||
|
||||
|
||||
class TestWatchTasks(unittest.TestCase):
|
||||
"""Test watch_tasks."""
|
||||
|
||||
def test_no_long_running_tasks(self):
|
||||
evg_api = EvergreenApi()
|
||||
task_ids = ["1", "2"]
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
task_1 = make_task_mock(evg_api, task_ids[0], now, now)
|
||||
task_2 = make_task_mock(evg_api, task_ids[1], now, now)
|
||||
evg_api.task_by_id = Mock(
|
||||
side_effect=(lambda task_id: {
|
||||
"1": task_1,
|
||||
"2": task_2,
|
||||
}[task_id]))
|
||||
long_running_task_ids = watch_tasks(task_ids, evg_api, 0)
|
||||
self.assertEqual([], long_running_task_ids)
|
||||
|
||||
def test_found_long_running_tasks(self):
|
||||
evg_api = EvergreenApi()
|
||||
task_ids = ["1", "2"]
|
||||
exec_timeout_seconds_ago = (datetime.now(timezone.utc) -
|
||||
timedelta(hours=POWERCYCLE_TASK_EXEC_TIMEOUT_SECS)).isoformat()
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
task_1 = make_task_mock(evg_api, task_ids[0], exec_timeout_seconds_ago, now)
|
||||
task_2 = make_task_mock(evg_api, task_ids[1], exec_timeout_seconds_ago, None)
|
||||
evg_api.task_by_id = Mock(
|
||||
side_effect=(lambda task_id: {
|
||||
"1": task_1,
|
||||
"2": task_2,
|
||||
}[task_id]))
|
||||
long_running_task_ids = watch_tasks(task_ids, evg_api, 0)
|
||||
self.assertEqual([task_2.task_id], long_running_task_ids)
|
||||
|
|
@ -121,8 +121,6 @@ variables:
|
|||
- &powercycle_remote_credentials
|
||||
private_key_file: src/powercycle.pem
|
||||
private_key_remote: ${__project_aws_ssh_key_value}
|
||||
aws_key_remote: ${powercycle_aws_key}
|
||||
aws_secret_remote: ${powercycle_aws_secret}
|
||||
|
||||
- &libfuzzertests
|
||||
name: libfuzzertests!
|
||||
|
|
@ -891,12 +889,68 @@ functions:
|
|||
- *f_expansions_write
|
||||
- *do_multiversion_setup
|
||||
|
||||
"move multiversion binaries": &move_multiversion_binaries
|
||||
# Used by generator
|
||||
"get compiled binaries":
|
||||
command: subprocess.exec
|
||||
params:
|
||||
binary: bash
|
||||
args:
|
||||
- "./src/evergreen/move_multiversion_binaries.sh"
|
||||
- "./src/evergreen/compiled_binaries_get.sh"
|
||||
env:
|
||||
workdir: ${workdir}
|
||||
|
||||
"generate powercycle tasks":
|
||||
- *set_task_expansion_macros
|
||||
- *f_expansions_write
|
||||
- *set_up_venv
|
||||
- *upload_pip_requirements
|
||||
|
||||
- command: subprocess.exec
|
||||
params:
|
||||
binary: bash
|
||||
args:
|
||||
- "./src/evergreen/powercycle_tasks_generate.sh"
|
||||
env:
|
||||
workdir: ${workdir}
|
||||
|
||||
- command: archive.targz_pack
|
||||
params:
|
||||
target: powercycle_tasks_config.tgz
|
||||
source_dir: "./"
|
||||
include:
|
||||
- "powercycle_tasks.json"
|
||||
|
||||
- command: s3.put
|
||||
params:
|
||||
aws_key: ${aws_key}
|
||||
aws_secret: ${aws_secret}
|
||||
local_file: powercycle_tasks_config.tgz
|
||||
remote_file: ${project}/${build_variant}/${revision}/powercycle_tasks/${task_name}-${build_id}.tgz
|
||||
bucket: mciuploads
|
||||
permissions: public-read
|
||||
content_type: application/gzip
|
||||
display_name: Generated Task Config - Execution ${execution}
|
||||
|
||||
- command: generate.tasks
|
||||
params:
|
||||
files:
|
||||
- powercycle_tasks.json
|
||||
|
||||
"run powercycle sentinel":
|
||||
- *set_task_expansion_macros
|
||||
- *f_expansions_write
|
||||
- *set_up_venv
|
||||
- *upload_pip_requirements
|
||||
- *configure_evergreen_api_credentials
|
||||
|
||||
- command: subprocess.exec
|
||||
type: system
|
||||
params:
|
||||
binary: bash
|
||||
args:
|
||||
- "./src/evergreen/powercycle_sentinel_run.sh"
|
||||
env:
|
||||
workdir: ${workdir}
|
||||
|
||||
"execute resmoke tests": &execute_resmoke_tests
|
||||
command: subprocess.exec
|
||||
|
|
@ -1607,6 +1661,7 @@ functions:
|
|||
params:
|
||||
provider: ec2
|
||||
distro: ${distro_id}
|
||||
timeout_teardown_secs: 604800 # 7 days
|
||||
security_group_ids:
|
||||
- sg-097bff6dd0d1d31d0
|
||||
|
||||
|
|
@ -6075,26 +6130,24 @@ tasks:
|
|||
resmoke_args: --suites=json_schema --storageEngine=wiredTiger
|
||||
resmoke_jobs_max: 1
|
||||
|
||||
- name: powercycle_smoke_skip_compile
|
||||
exec_timeout_secs: 7200 # 2 hour timeout for the task overall
|
||||
- name: powercycle_smoke_skip_compile_gen
|
||||
commands:
|
||||
- *f_expansions_write
|
||||
- func: "set task expansion macros"
|
||||
- *f_expansions_write
|
||||
- func: "set up venv"
|
||||
- func: "upload pip requirements"
|
||||
- *f_expansions_write
|
||||
- func: "configure evergreen api credentials"
|
||||
- func: "do multiversion setup"
|
||||
- func: "generate powercycle tasks"
|
||||
vars:
|
||||
install_master_bin: true
|
||||
- *move_multiversion_binaries
|
||||
- func: "set up remote credentials"
|
||||
task_names: >-
|
||||
powercycle_smoke_skip_compile
|
||||
num_tasks: 20
|
||||
exec_timeout_secs: 604800 # 7 days
|
||||
timeout_secs: 604800 # 7 days
|
||||
set_up_retry_count: 1000000
|
||||
run_powercycle_args: --sshAccessRetryCount=1000000
|
||||
|
||||
- name: powercycle_sentinel
|
||||
exec_timeout_secs: 604800 # 7 days
|
||||
commands:
|
||||
- func: "run powercycle sentinel"
|
||||
vars:
|
||||
<<: *powercycle_remote_credentials
|
||||
- func: "set up EC2 instance"
|
||||
- func: "run powercycle test"
|
||||
timeout_secs: 1800 # 30 minute timeout for no output
|
||||
gen_task: powercycle_smoke_skip_compile_gen
|
||||
|
||||
- name: powercycle_smoke
|
||||
exec_timeout_secs: 7200 # 2 hour timeout for the task overall
|
||||
|
|
@ -8225,14 +8278,13 @@ buildvariants:
|
|||
run_on:
|
||||
- rhel80-small
|
||||
expansions:
|
||||
multiversion_platform: amazon2
|
||||
multiversion_platform: rhel80
|
||||
multiversion_edition: enterprise
|
||||
stepback: false
|
||||
tasks:
|
||||
- name: lint_fuzzer_sanity_all
|
||||
- name: powercycle_smoke_skip_compile
|
||||
distros:
|
||||
- amazon2-test
|
||||
- name: powercycle_sentinel
|
||||
- name: powercycle_smoke_skip_compile_gen
|
||||
|
||||
- name: security-daily-cron
|
||||
modules:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,46 @@
|
|||
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
|
||||
. "$DIR/prelude.sh"
|
||||
|
||||
cd src
|
||||
|
||||
set -o errexit
|
||||
set -o verbose
|
||||
|
||||
activate_venv
|
||||
|
||||
rm -rf /data/install dist-test/bin
|
||||
|
||||
edition="${multiversion_edition}"
|
||||
platform="${multiversion_platform}"
|
||||
architecture="${multiversion_architecture}"
|
||||
|
||||
if [ ! -z "${multiversion_edition_42_or_later}" ]; then
|
||||
edition="${multiversion_edition_42_or_later}"
|
||||
fi
|
||||
if [ ! -z "${multiversion_platform_42_or_later}" ]; then
|
||||
platform="${multiversion_platform_42_or_later}"
|
||||
fi
|
||||
if [ ! -z "${multiversion_architecture_42_or_later}" ]; then
|
||||
architecture="${multiversion_architecture_42_or_later}"
|
||||
fi
|
||||
|
||||
if [ ! -z "${multiversion_edition_44_or_later}" ]; then
|
||||
edition="${multiversion_edition_44_or_later}"
|
||||
fi
|
||||
if [ ! -z "${multiversion_platform_44_or_later}" ]; then
|
||||
platform="${multiversion_platform_44_or_later}"
|
||||
fi
|
||||
if [ ! -z "${multiversion_architecture_44_or_later}" ]; then
|
||||
architecture="${multiversion_architecture_44_or_later}"
|
||||
fi
|
||||
|
||||
# This is primarily for tests for infrastructure which don't always need the latest
|
||||
# binaries.
|
||||
$python buildscripts/resmoke.py setup-multiversion \
|
||||
--installDir /data/install \
|
||||
--linkDir dist-test/bin \
|
||||
--edition $edition \
|
||||
--platform $platform \
|
||||
--architecture $architecture \
|
||||
--githubOauthToken "${github_token}" \
|
||||
--useLatest master
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
set -o verbose
|
||||
|
||||
cd src
|
||||
# powercycle expects the binaries to be in dist-test/bin
|
||||
mkdir -p dist-test/bin
|
||||
mv /data/multiversion/* dist-test/bin/
|
||||
|
|
@ -74,16 +74,3 @@ $python buildscripts/resmoke.py setup-multiversion \
|
|||
--architecture $architecture \
|
||||
--githubOauthToken "${github_token}" \
|
||||
--useLatest 4.4 4.7 4.8 4.9
|
||||
|
||||
# This is primarily for tests for infrastructure which don't always need the latest
|
||||
# binaries.
|
||||
if [ ! -z "${install_master_bin}" ]; then
|
||||
$python buildscripts/resmoke.py setup-multiversion \
|
||||
--installDir /data/install \
|
||||
--linkDir /data/multiversion \
|
||||
--edition $edition \
|
||||
--platform $platform \
|
||||
--architecture $architecture \
|
||||
--githubOauthToken "${github_token}" \
|
||||
--useLatest master
|
||||
fi
|
||||
|
|
|
|||
|
|
@ -18,5 +18,6 @@ trap 'echo $? > error_exit.txt; exit 0' EXIT
|
|||
set +o errexit
|
||||
eval $python -u buildscripts/resmoke.py powercycle run \
|
||||
"--sshUserHost=$(printf "%s@%s" "$user" "${private_ip_address}") \
|
||||
--sshConnection=\"-i ${private_key_file}\" \
|
||||
--taskName=${task_name}"
|
||||
--sshConnection=\"-i powercycle.pem\" \
|
||||
--taskName=${task_name} \
|
||||
${run_powercycle_args}"
|
||||
|
|
|
|||
|
|
@ -0,0 +1,10 @@
|
|||
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
|
||||
. "$DIR/prelude.sh"
|
||||
|
||||
cd src
|
||||
|
||||
set -o errexit
|
||||
set -o verbose
|
||||
|
||||
activate_venv
|
||||
$python buildscripts/powercycle_sentinel.py ../expansions.yml
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
|
||||
. "$DIR/prelude.sh"
|
||||
|
||||
cd src
|
||||
|
||||
set -o errexit
|
||||
set -o verbose
|
||||
|
||||
activate_venv
|
||||
$python buildscripts/evergreen_gen_powercycle_tasks.py ../expansions.yml ../powercycle_tasks.json
|
||||
Loading…
Reference in New Issue