mongo/buildscripts/golden_test.py

#!/usr/bin/env python3
"""
Utility to interact with golden data test outputs, produced by golden data test framework.

For details on the golden data test framework see: docs/golden_data_test_framework.md.
"""

import os
import pathlib
import platform
import re
import shutil
import sys
from dataclasses import dataclass
from datetime import datetime
from subprocess import CalledProcessError, call, check_call, check_output

import click

# Get relative imports to work when the package is not installed on the PYTHONPATH.
if __name__ == "__main__" and __package__ is None:
    sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from buildscripts.util.fileops import read_yaml_file

assert sys.version_info >= (3, 7)


@dataclass
class Output:
    name: str
    ctime: int

    def __repr__(self) -> str:
        ts = datetime.fromtimestamp(self.ctime)
        iso_date = ts.strftime("%Y-%m-%d %H:%M:%S")
        return f"{self.name} {iso_date}"


class AppError(Exception):
    """Application execution error."""

    pass


class GoldenTestConfig(object):
    """Represents the golden test configuration.

    See: docs/golden_data_test_framework.md#appendix---config-file-reference
    """

    def __init__(self, iterable=(), **kwargs):
        """Initialize the fields."""
        self.__dict__.update(iterable, **kwargs)

    outputRootPattern: str
    diffCmd: str

    @classmethod
    def from_yaml_file(cls, path: str) -> "GoldenTestConfig":
        """Read the golden test configuration from the given file."""
        return cls(**read_yaml_file(path))


class OutputPaths(object):
    """Represents actual and expected output paths."""

    def __init__(self, actual, expected):
        """Initialize the fields."""
        self.actual = actual
        self.expected = expected

    actual: None
    expected: None


def replace_variables(pattern: str, variables: dict) -> str:
    """Replace the mustache-style variables."""
    return re.sub(r"\{\{(\w+)\}\}", lambda match: variables[match.group(1)], pattern)


def get_path_name_regex(pattern: str) -> str:
    """Return the regex pattern for output names."""
    return "[0-9a-f]".join([re.escape(part) for part in pattern.split("%")])


# For compatibility with version<3.8 that does not support shutil.copytree with dirs_exist_ok=True
def copytree_dirs_exist_ok_compatibility(src, dest):
    if os.path.isdir(src):
        if not os.path.isdir(dest):
            os.makedirs(dest)
        files = os.listdir(src)
        for file in files:
            copytree_dirs_exist_ok_compatibility(os.path.join(src, file), os.path.join(dest, file))
    else:
        shutil.copyfile(src, dest)


def copytree_dirs_exist_ok(src, dest):
    if sys.version_info >= (3, 8):
        shutil.copytree(src, dest, dirs_exist_ok=True)
    else:
        copytree_dirs_exist_ok_compatibility(src, dest)


@click.group()
@click.option("-n", "--dry-run", is_flag=True)
@click.option("-v", "--verbose", is_flag=True)
@click.option(
    "--config",
    envvar="GOLDEN_TEST_CONFIG_PATH",
    help="Config file path. Also GOLDEN_TEST_CONFIG_PATH environment variable.",
)
@click.pass_context
def cli(ctx, dry_run, verbose, config):
    """Manage test results from golden data test framework.

    Allows for querying, diffing and accepting the golden data test results.

    \b
    For advanced setup guide see: https://wiki.corp.mongodb.com/display/KERNEL/Golden+Data+test+framework+and+workstation+setup
    """

    ctx.obj = GoldenTestApp(dry_run, verbose, config)


class GoldenTestApp(object):
    """Represents the golden application."""

    verbose: False
    dry_run: False
    config_path: None
    config: None
    output_parent_path = None
    output_name_pattern = None
    output_name_regex = None

    def __init__(self, dry_run, verbose, config_path):
        """Initialize the app."""
        self.verbose = verbose
        self.dry_run = dry_run
        self.config_path = config_path

    def init_config(self):
        self.config = self.load_config(self.config_path)

        self.output_parent_path = pathlib.Path(self.config.outputRootPattern).parent
        self.output_name_pattern = str(pathlib.Path(self.config.outputRootPattern).name)
        self.output_name_regex = get_path_name_regex(self.output_name_pattern)

    def vprint(self, *args, **kwargs):
        """Verbose print, if enabled."""
        if self.verbose:
            print(*args, file=sys.stderr, **kwargs)

    def call_shell(self, cmd):
        """Call shell command."""
        if not self.dry_run:
            call(cmd, shell=True)
        else:
            print(cmd)

    def get_git_root(self):
        """Return the root for git repo."""
        self.vprint("Querying git repo root")
        repo_root = check_output("git rev-parse --show-toplevel", shell=True, text=True).strip()
        self.vprint(f"Found git repo root: '{repo_root}'")
        return repo_root

    def load_config(self, config_path):
        """Load configuration file."""
        if config_path is None:
            raise AppError(
                (
                    "Can't load config. GOLDEN_TEST_CONFIG_PATH envrionment variable is not set. Golden test CLI must be configured before use.\n"
                    "To configure it, follow the instructions in https://github.com/mongodb/mongo/blob/master/docs/golden_data_test_framework.md#how-to-diff-and-accept-new-test-outputs-on-a-workstation\n"
                    "Note: After setup you may need to rerun the tests for this utility to find them."
                )
            )

        self.vprint(f"Loading config from path: '{config_path}'")
        config = GoldenTestConfig.from_yaml_file(config_path)

        if config.outputRootPattern is None:
            raise AppError("Invalid config. outputRootPattern config parameter is not set")

        return config

    def get_output_path(self, output_name):
        """Return the path for given output name."""
        if not re.match(self.output_name_regex, output_name):
            raise AppError(
                f"Invalid name: '{output_name}'. "
                + f"Does not match configured pattern: {self.output_name_pattern}"
            )
        output_path = os.path.join(self.output_parent_path, output_name)
        if not os.path.isdir(output_path):
            raise AppError(f"No such directory: '{output_path}'")
        return output_path

    def get_outputs(self) -> list[Output]:
        """Return names of all available outputs."""
        self.vprint(
            f"Listing outputs in path: '{self.output_parent_path}' "
            + f"matching '{self.output_name_pattern}'"
        )

        if not os.path.isdir(self.output_parent_path):
            return []
        return [
            Output(name=name, ctime=os.path.getctime(self.get_output_path(name)))
            for name in os.listdir(self.output_parent_path)
            if re.match(self.output_name_regex, name)
            and os.path.isdir(os.path.join(self.output_parent_path, name))
        ]

    def get_latest_output(self) -> Output:
        """Return the output name wit most recent created timestamp."""
        self.vprint("Searching for output with latest creation time")
        outputs = self.get_outputs()
        if len(outputs) == 0:
            raise AppError("No outputs found")
        else:
            latest = max(outputs, key=lambda output: output.ctime)
            self.vprint(
                f"Found output with latest creation time: {latest.name} "
                + f"created at {latest.ctime}"
            )
            return latest

    def get_latest_or_matching_output(self, output_name) -> Output:
        if output_name is None:
            return self.get_latest_output()
        else:
            outputs = [output for output in self.get_outputs() if output.name == output_name]
            if len(outputs) == 1:
                return outputs[0]
            elif len(outputs) == 0:
                raise AppError("No outputs found")
            else:
                raise AppError("Multiple outputs match the provided filter")

    def get_paths(self, output_name):
        """Return actual and expected paths for given output name."""
        output_path = self.get_output_path(output_name)
        return OutputPaths(
            actual=os.path.join(output_path, "actual"),
            expected=os.path.join(output_path, "expected"),
        )

    def setup_linux(self):
        # Create config file
        config_path = os.path.join(os.path.expanduser("~"), ".golden_test_config.yml")
        if not os.path.isfile(config_path):
            print(f"Creating {config_path}")
            config_contents = (
                r"""outputRootPattern: '/var/tmp/test_output/out-%%%%-%%%%-%%%%-%%%%'"""
                "\n"
                r"""diffCmd: 'git diff --no-index "{{expected}}" "{{actual}}"'"""
                "\n"
            )
            with open(config_path, "w") as file:
                file.write(config_contents)
        else:
            print(f"Skipping creating {config_path}, file exists.")

        # Add global GOLDEN_TEST_CONFIG_PATH environment variable
        etc_environment_path = "/etc/environment"
        env_var_defined = False
        if os.path.isfile(etc_environment_path):
            with open(etc_environment_path, "r") as file:
                for line in file.readlines():
                    if line.startswith("GOLDEN_TEST_CONFIG_PATH="):
                        env_var_defined = True
        if not env_var_defined:
            print(f"Adding GOLDEN_TEST_CONFIG_PATH to {etc_environment_path}")
            env_var_contents = f'GOLDEN_TEST_CONFIG_PATH="{config_path}"'
            call(["sudo", "/bin/sh", "-c", f"echo '{env_var_contents}' >> {etc_environment_path}"])
        else:
            print(
                f"Skipping adding GOLDEN_TEST_CONFIG_PATH to {etc_environment_path}, variable already defined."
            )

    def setup_windows(self):
        # Create config file
        config_path = os.path.join(os.path.expandvars("%LocalAppData%"), ".golden_test_config.yml")
        if not os.path.isfile(config_path):
            print(f"Creating {config_path}")
            config_contents = (
                r"outputRootPattern: 'C:\Users\Administrator\AppData\Local\Temp\test_output\out-%%%%-%%%%-%%%%-%%%%'"
                "\n"
                r"""diffCmd: 'git diff --no-index "{{expected}}" "{{actual}}"'"""
                "\n"
            )
            with open(config_path, "w") as file:
                file.write(config_contents)
        else:
            print(f"Skipping creating {config_path}, file exists.")

        # Add global GOLDEN_TEST_CONFIG_PATH environment variable
        if os.environ.get("GOLDEN_TEST_CONFIG_PATH") is None:
            print("Setting GOLDEN_TEST_CONFIG_PATH global variable")
            call(
                [
                    "runas",
                    "/profile",
                    "/user:administrator",
                    f'setx GOLDEN_TEST_CONFIG_PATH "{config_path}"',
                ]
            )
        else:
            print(
                "Skipping setting GOLDEN_TEST_CONFIG_PATH global variable, variable already defined."
            )

    def accept(self, output_name):
        """Accept the actual test output and copy it as new golden data to the source repo."""

        output = self.get_latest_or_matching_output(output_name)
        self.vprint(f"Accepting actual results from output '{output.name}'")

        repo_root = self.get_git_root()
        paths = self.get_paths(output.name)

        self.vprint(f"Copying files recursively from '{paths.actual}' to '{repo_root}'")
        if not self.dry_run:
            copytree_dirs_exist_ok(paths.actual, repo_root)

    def clean(self):
        """Remove all test outputs."""

        outputs = self.get_outputs()
        self.vprint(f"Deleting {len(outputs)} outputs")
        for output in outputs:
            output_path = self.get_output_path(output.name)
            self.vprint(f"Deleting folder: '{output_path}'")
            if not self.dry_run:
                shutil.rmtree(output_path)

    @cli.command("diff", help="Diff the expected and actual folders of the test output")
    @click.argument("output_name", required=False)
    @click.pass_obj
    def command_diff(self, output_name):
        """Diff the expected and actual folders of the test output."""
        self.init_config()

        output = self.get_latest_or_matching_output(output_name)
        self.vprint(f"Diffing results from output '{output.name}'")

        paths = self.get_paths(output.name)
        diff_cmd = replace_variables(
            self.config.diffCmd, {"actual": paths.actual, "expected": paths.expected}
        )
        self.vprint(f"Running command: '{diff_cmd}'")
        self.call_shell(diff_cmd)

    @cli.command("get-path", help="Get the root folder path of the test output.")
    @click.argument("output_name", required=False)
    @click.pass_obj
    def command_get_path(self, output_name):
        """Get the root folder path of the test output."""
        self.init_config()

        output = self.get_latest_or_matching_output(output_name)
        print(self.get_output_path(output.name))

    @cli.command(
        "accept",
        help="Accept the actual test output and copy it as new golden data to the source repo.",
    )
    @click.argument("output_name", required=False)
    @click.pass_obj
    def command_accept(self, output_name):
        """Accept the actual test output and copy it as new golden data to the source repo."""
        self.init_config()
        self.accept(output_name)

    @cli.command("clean", help="Remove all test outputs")
    @click.pass_obj
    def command_clean(self):
        """Remove all test outputs."""
        self.init_config()
        self.clean()

    @cli.command("latest", help="Get the name of the most recent test output")
    @click.pass_obj
    def command_latest(self):
        """Get the name of the most recent test output."""
        self.init_config()

        output = self.get_latest_output()
        print(output.name)

    @cli.command("list", help="List all names of the available test outputs")
    @click.pass_obj
    def command_list(self):
        """List all names of the available test outputs."""
        self.init_config()

        for output in sorted(self.get_outputs(), key=lambda output: output.ctime):
            print(output)

    @cli.command("setup", help="Performs default setup based on current platform")
    @click.pass_obj
    def command_setup(self):
        """Performs default setup based on current platform."""
        if platform.platform().startswith("Linux"):
            self.setup_linux()
        elif platform.platform().startswith("Windows"):
            self.setup_windows()
        else:
            raise AppError(f"Platform not supported by this setup utility: {platform.platform()}")

    @cli.command("clean-run-accept", help="Runs the test in all suites and accepts the results.")
    @click.argument("test_name", required=True)
    @click.pass_obj
    def command_clean_run_accept(self, test_name):
        """Runs a given jstest with all its passthrough suites and accepts the results."""
        self.init_config()
        self.clean()

        self.vprint(
            f"Obtaining the list of suites {test_name} belongs to using 'resmoke.py find-suites' ..."
        )
        suites = (
            check_output(["buildscripts/resmoke.py", "find-suites", test_name], text=True)
            .strip()
            .split()
        )
        assert len(suites) > 0, f"Failed to find any suites for test {test_name}"
        self.vprint(f"Found suites {suites} for test {test_name}")

        resmoke_invocations = []

        for suite in suites:
            resmoke_invocations.append(["--suite", suite])

        if len(suites) == 1 and suites[0] == "query_golden_classic":
            # The test only belongs to the query_golden_classic passthrough, which means that its various expected files
            # are generated by different evergreen build variants, not by different passthroughs. We could try to extract
            # the correct resmoke arguments from the evergreen .yml file, but in practice there are many passthroughs and
            # most define resmoke arguments that have nothing to do with query golden testing. So we hardcore the list
            # of resmoke arguments here.
            self.vprint(
                "Only query_golden_classic passthrough found, will run with various settings for internalQueryFrameworkControl"
            )
            for framework_control in [
                ["--mongodSetParameters={internalQueryFrameworkControl: forceClassicEngine}"],
                ["--mongodSetParameters={internalQueryFrameworkControl: trySbeEngine}"],
                ["--mongodSetParameters={internalQueryFrameworkControl: trySbeRestricted}"],
                ["--additionalFeatureFlags=featureFlagSbeFull"],
            ]:
                resmoke_invocations.append(["--suite", suites[0], *framework_control])

        for resmoke_invocation in resmoke_invocations:
            self.vprint(f"Will run resmoke.py with arguments: {resmoke_invocation}")

        for resmoke_invocation in resmoke_invocations:
            try:
                check_call(["buildscripts/resmoke.py", "run", *resmoke_invocation, test_name])
            except CalledProcessError:
                # Golden test failed, accept the new results
                self.accept(None)


def main():
    """Execute main."""
    try:
        cli()
    except AppError as err:
        print(err)
        sys.exit(1)


if __name__ == "__main__":
    main()