import argparse
import os
import pathlib
import platform
import subprocess
import sys
import tempfile
from typing import List

REPO_ROOT = pathlib.Path(__file__).parent.parent.parent
sys.path.append(str(REPO_ROOT))

LARGE_FILE_THRESHOLD = 10 * 1024 * 1024  # 10MiB

SUPPORTED_EXTENSIONS = (
    ".cpp",
    ".c",
    ".h",
    ".hpp",
    ".py",
    ".js",
    ".mjs",
    ".json",
    ".lock",
    ".toml",
    ".defs",
    ".inl",
    ".idl",
)


class LinterFail(Exception):
    pass


def create_build_files_in_new_js_dirs() -> None:
    base_dirs = ["src/mongo/db/modules/enterprise/jstests", "jstests"]
    for base_dir in base_dirs:
        for root, dirs, _ in os.walk(base_dir):
            for dir in dirs:
                full_dir = os.path.join(root, dir)
                build_file_path = os.path.join(full_dir, "BUILD.bazel")
                if not os.path.isfile(build_file_path):
                    js_files = [f for f in os.listdir(full_dir) if f.endswith(".js")]
                    if js_files:
                        with open(build_file_path, "w", encoding="utf-8") as build_file:
                            build_file.write("""load("//bazel:mongo_js_rules.bzl", "mongo_js_library", "all_subpackage_javascript_files")

package(default_visibility = ["//visibility:public"])

mongo_js_library(
    name = "all_javascript_files",
    srcs = glob([
        "*.js",
    ]),
)

all_subpackage_javascript_files()
""")
                        print(f"Created BUILD.bazel in {full_dir}")


def list_files_with_targets(bazel_bin: str) -> List:
    return [
        line.strip()
        for line in subprocess.run(
            [bazel_bin, "query", 'kind("source file", deps(//...))', "--keep_going"],
            capture_output=True,
            text=True,
            check=False,
        ).stdout.splitlines()
    ]


class LintRunner:
    def __init__(self, keep_going: bool, bazel_bin: str):
        self.keep_going = keep_going
        self.bazel_bin = bazel_bin
        self.fail = False

    def list_files_without_targets(
        self,
        files_with_targets: List[str],
        type_name: str,
        ext: str,
        dirs: List[str],
    ) -> bool:
        # rules_lint only checks files that are in targets, verify that all files in the source tree
        # are contained within targets.

        exempt_list = {
            # TODO(SERVER-101360): Remove the exemptions below once resolved.
            "src/mongo/crypto/fle_options.cpp",
            # TODO(SERVER-101368): Remove the exemptions below once resolved.
            "src/mongo/db/modules/enterprise/src/streams/commands/update_connection.cpp",
            # TODO(SERVER-101370): Remove the exemptions below once resolved.
            "src/mongo/db/modules/enterprise/src/streams/third_party/mongocxx/dist/mongocxx/test_util/client_helpers.cpp",
            # TODO(SERVER-101371): Remove the exemptions below once resolved.
            "src/mongo/db/modules/enterprise/src/streams/util/tests/concurrent_memory_aggregator_test.cpp",
            # TODO(SERVER-101375): Remove the exemptions below once resolved.
            "src/mongo/platform/decimal128_dummy.cpp",
        }

        exempted_subpaths = [
            # Skip files in bazel_rules_mongo, since it has its own Bazel repo
            "bazel_rules_mongo",
            # vim creates temporary c++ files that aren't part of the tree
            "/.vim/",
        ]

        typed_files_in_targets = [line for line in files_with_targets if line.endswith(f".{ext}")]

        print(f"Checking that all {type_name} files have BUILD.bazel targets...")

        all_typed_files = (
            subprocess.check_output(
                ["find", *dirs, "-name", f"*.{ext}"],
                stderr=subprocess.STDOUT,
            )
            .decode("utf-8")
            .splitlines()
        )

        # Convert typed_files_in_targets to a set for easy comparison
        typed_files_in_targets_set = set()
        for file in typed_files_in_targets:
            # Remove the leading "//" and replace ":" with "/"
            clean_file = file.lstrip("//").replace(":", "/")
            typed_files_in_targets_set.add(clean_file)

        # Create a new list of files that are in all_typed_files but not in typed_files_in_targets
        new_list = []
        for file in all_typed_files:
            if file not in typed_files_in_targets_set and file not in exempt_list:
                if not any(subpath in file for subpath in exempted_subpaths):
                    new_list.append(file)

        if len(new_list) != 0:
            print(f"Found {type_name} files without BUILD.bazel definitions:")
            for file in new_list:
                print(f"\t{file}")
            print("")
            print(
                f"Please add these to a {ext}_library target in a BUILD.bazel file in their directory"
            )
            print("Run the following to attempt to fix the issue automatically:")
            print("\tbazel run lint --fix")
            self.fail = True
            if not self.keep_going:
                raise LinterFail("File missing bazel target.")

        print(f"All {type_name} files have BUILD.bazel targets!")

    def run_bazel(self, target: str, args: List = []):
        p = subprocess.run([self.bazel_bin, "run", target] + (["--"] + args if args else []))
        if p.returncode != 0:
            self.fail = True
            if not self.keep_going:
                raise LinterFail("Linter failed")

    def simple_file_size_check(self, files_to_lint: List[str]):
        for file in files_to_lint:
            if os.path.getsize(file) > LARGE_FILE_THRESHOLD:
                print(f"File {file} exceeds large file threshold of {LARGE_FILE_THRESHOLD}")
                self.fail = True
                if not self.keep_going:
                    raise LinterFail("File too large")


def _git_distance(args: list) -> int:
    command = ["git", "rev-list", "--count"] + args
    try:
        result = subprocess.run(command, capture_output=True, text=True, check=True)
    except subprocess.CalledProcessError as e:
        print(f"Error running git command: {' '.join(command)}")
        print(f"stderr: {e.stderr.strip()}")
        print(f"stdout: {e.stdout.strip()}")
        raise
    return int(result.stdout.strip())


def _get_merge_base(args: list) -> str:
    command = ["git", "merge-base"] + args
    result = subprocess.run(command, capture_output=True, text=True, check=True)
    return result.stdout.strip()


def _git_diff(args: list) -> str:
    command = ["git", "diff"] + args
    result = subprocess.run(command, capture_output=True, text=True, check=True)
    return result.stdout.strip() + os.linesep


def _git_unstaged_files() -> str:
    command = ["git", "ls-files", "--others", "--exclude-standard"]
    result = subprocess.run(command, capture_output=True, text=True, check=True)
    return result.stdout.strip() + os.linesep


def _get_files_changed_since_fork_point(origin_branch: str = "origin/master") -> List[str]:
    """Query git to get a list of files in the repo from a diff."""
    # There are 3 diffs we run:
    # 1. List of commits between origin/master and HEAD of current branch
    # 2. Cached/Staged files (--cached)
    # 3. Working Tree files git tracks

    fork_point = _get_merge_base(["HEAD", origin_branch])

    diff_files = _git_diff(["--name-only", f"{fork_point}..HEAD"])
    diff_files += _git_diff(["--name-only", "--cached"])
    diff_files += _git_diff(["--name-only"])
    diff_files += _git_unstaged_files()

    file_set = {
        os.path.normpath(os.path.join(os.curdir, line.rstrip()))
        for line in diff_files.splitlines()
        if line
    }

    return list(file_set)


def get_parsed_args(args):
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--lint-yaml-project",
        type=str,
        default="mongodb-mongo-master",
        required=False,
        help="Run evergreen yaml linter for specified project",
    )
    parser.add_argument(
        "--fix",
        action="store_true",
        default=False,
        help="Apply linter fixes",
    )
    parser.add_argument(
        "--all",
        action="store_true",
        default=False,
        help="Run linter on all targets",
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        default=False,
    )
    parser.add_argument(
        "--fail-on-validation",
        action="store_true",
        default=False,
    )
    parser.add_argument(
        "--origin-branch",
        type=str,
        default="origin/master",
        help="Base branch to compare changes against",
    )
    parser.add_argument("--large-files", action="store_true", default=False)
    parser.add_argument(
        "--keep-going",
        action="store_true",
        default=False,
        help="Keep going after failures",
    )
    return parser.parse_known_args(args)


def lint_mod(lint_runner: LintRunner):
    lint_runner.run_bazel("//modules_poc:mod_mapping", ["--validate-modules"])
    # TODO add support for the following steps
    # subprocess.run([bazel_bin, "run", "//modules_poc:merge_decls"], check=True)
    # subprocess.run([bazel_bin, "run", "//modules_poc:browse", "--", "merged_decls.json", "--parse-only"], check=True)


def run_rules_lint(bazel_bin: str, args: List[str]):
    parsed_args, args = get_parsed_args(args)
    if platform.system() == "Windows":
        print("eslint not supported on windows")
        raise LinterFail("Unsupported platform")

    if parsed_args.fix:
        create_build_files_in_new_js_dirs()

    keep_going = parsed_args.keep_going
    lr = LintRunner(keep_going, bazel_bin)

    files_with_targets = list_files_with_targets(bazel_bin)
    lr.list_files_without_targets(files_with_targets, "C++", "cpp", ["src/mongo"])
    lr.list_files_without_targets(
        files_with_targets,
        "javascript",
        "js",
        ["src/mongo", "jstests"],
    )
    lr.list_files_without_targets(
        files_with_targets,
        "python",
        "py",
        ["src/mongo", "buildscripts", "evergreen"],
    )
    lint_all = parsed_args.all or "..." in args or "//..." in args
    files_to_lint = [arg for arg in args if not arg.startswith("-")]
    if not lint_all and files_to_lint:
        origin_branch = parsed_args.origin_branch
        max_distance = 100
        distance = _git_distance([f"{origin_branch}..HEAD"])
        if distance > max_distance:
            print(
                f"The number of commits between current branch and origin branch ({origin_branch}) is too large: {distance} commits (> {max_distance} commits)."
            )
            print(
                "Please update your local branch with the latest changes from origin, or use `bazel run lint -- --origin-branch=other_branch` to select a different origin branch"
            )
            lint_all = True
        else:
            files_to_lint = [
                file
                for file in _get_files_changed_since_fork_point(origin_branch)
                if file.endswith((SUPPORTED_EXTENSIONS))
            ]

    if lint_all or "sbom.json" in files_to_lint:
        lr.run_bazel("//buildscripts:sbom_linter")

    if lint_all or any(file.endswith((".h", ".cpp")) for file in files_to_lint):
        lr.run_bazel("//buildscripts:quickmongolint", ["lint"])

    if lint_all or any(
        file.endswith((".cpp", ".c", ".h", ".py", ".idl")) for file in files_to_lint
    ):
        lr.run_bazel("//buildscripts:errorcodes", ["--quiet"])

    if lint_all:
        lr.run_bazel("//buildscripts:pyrightlint", ["lint-all"])
    elif any(file.endswith(".py") for file in files_to_lint):
        lr.run_bazel("//buildscripts:pyrightlint", ["lints"] + files_to_lint)

    if lint_all or "poetry.lock" in files_to_lint or "pyproject.toml" in files_to_lint:
        lr.run_bazel("//buildscripts:poetry_lock_check")

    if lint_all or any(file.endswith(".yml") for file in files_to_lint):
        lr.run_bazel(
            "buildscripts:validate_evg_project_config",
            [
                f"--evg-project-name={parsed_args.lint_yaml_project}",
            ],
        )

    if lint_all or parsed_args.large_files:
        lr.run_bazel("buildscripts:large_file_check", ["--exclude", "src/third_party/*"])
    else:
        lr.simple_file_size_check(files_to_lint)

    if lint_all or any(
        file.endswith((".cpp", ".c", ".h", ".hpp", ".idl", ".inl", ".defs"))
        for file in files_to_lint
    ):
        lint_mod(lr)

    if lr.fail:
        raise LinterFail("Linter(s) failed")

    # Default to linting everything in rules_lint if no path was passed in.
    if len([arg for arg in args if not arg.startswith("--")]) == 0:
        args = ["//..."] + args

    fix = ""
    with tempfile.NamedTemporaryFile(delete=False) as buildevents:
        buildevents_path = buildevents.name

    for linter in ["eslint", "ruff"]:
        args.append(f"--aspects=//tools/lint:linters.bzl%{linter}")

    args.extend(
        [
            # Allow lints of code that fails some validation action
            # See https://github.com/aspect-build/rules_ts/pull/574#issuecomment-2073632879
            "--norun_validations",
            f"--build_event_json_file={buildevents_path}",
            "--output_groups=rules_lint_human",
            "--remote_download_regex='.*AspectRulesLint.*'",
        ]
    )

    # This is a rudimentary flag parser.
    if parsed_args.fail_on_validation:
        args.extend(["--@aspect_rules_lint//lint:fail_on_violation", "--keep_going"])

    # Allow a `--fix` option on the command-line.
    # This happens to make output of the linter such as ruff's
    # [*] 1 fixable with the `--fix` option.
    # so that the naive thing of pasting that flag to lint.sh will do what the user expects.
    if parsed_args.fix:
        fix = "patch"
        args.extend(["--@aspect_rules_lint//lint:fix", "--output_groups=rules_lint_patch"])

    # the --dry-run flag must immediately follow the --fix flag
    if parsed_args.dry_run:
        fix = "print"

    args = (
        [arg for arg in args if arg.startswith("--") and arg != "--"]
        + ["--"]
        + [arg for arg in args if not arg.startswith("--")]
    )

    # Actually run the lint itself
    subprocess.run([bazel_bin, "build"] + args, check=True, stdout=sys.stdout, stderr=sys.stderr)

    # Parse out the reports from the build events
    filter_expr = '.namedSetOfFiles | values | .files[] | select(.name | endswith($ext)) | ((.pathPrefix | join("/")) + "/" + .name)'

    # Maybe this could be hermetic with bazel run @aspect_bazel_lib//tools:jq or sth
    # jq on windows outputs CRLF which breaks this script. https://github.com/jqlang/jq/issues/92
    valid_reports = (
        subprocess.run(
            ["jq", "--arg", "ext", ".out", "--raw-output", filter_expr, buildevents_path],
            capture_output=True,
            text=True,
            check=True,
        )
        .stdout.strip()
        .split("\n")
    )

    failing_reports = 0
    for report in valid_reports:
        # Exclude coverage reports, and check if the output is empty.
        if "coverage.dat" in report or not os.path.exists(report) or not os.path.getsize(report):
            # Report is empty. No linting errors.
            continue
        with open(report, "r", encoding="utf-8") as f:
            file_contents = f.read().strip()
            if file_contents == "All checks passed!":
                # Report is successful. No linting errors.
                continue

            print(f"From {report}:")
            print(file_contents)
            print()
            failing_reports += 1

    # Apply fixes if requested
    if fix:
        valid_patches = (
            subprocess.run(
                ["jq", "--arg", "ext", ".patch", "--raw-output", filter_expr, buildevents_path],
                capture_output=True,
                text=True,
                check=True,
            )
            .stdout.strip()
            .split("\n")
        )

        for patch in valid_patches:
            # Exclude coverage, and check if the patch is empty.
            if "coverage.dat" in patch or not os.path.exists(patch) or not os.path.getsize(patch):
                # Patch is empty. No linting errors.
                continue

            if fix == "print":
                print(f"From {patch}:")
                with open(patch, "r", encoding="utf-8") as f:
                    print(f.read())
                print()
            elif fix == "patch":
                subprocess.run(
                    ["patch", "-p1"], check=True, stdin=open(patch, "r", encoding="utf-8")
                )
            else:
                print(f"ERROR: unknown fix type {fix}", file=sys.stderr)
                raise LinterFail("Unknown fix type")
    elif failing_reports != 0:
        raise LinterFail("Failing reports")