mongo/bazel/wrapper_hook/lint.py

424 lines
15 KiB
Python

import argparse
import os
import pathlib
import platform
import subprocess
import sys
import tempfile
from typing import List
REPO_ROOT = pathlib.Path(__file__).parent.parent.parent
sys.path.append(str(REPO_ROOT))
LARGE_FILE_THRESHOLD = 10 * 1024 * 1024 #10MiB
SUPPORTED_EXTENSIONS = (".cpp", ".c", ".h", ".hpp", ".py", ".js", ".mjs", ".json", ".lock", ".toml", ".defs", ".inl", ".idl")
def create_build_files_in_new_js_dirs() -> None:
base_dirs = ["src/mongo/db/modules/enterprise/jstests", "jstests"]
for base_dir in base_dirs:
for root, dirs, _ in os.walk(base_dir):
for dir in dirs:
full_dir = os.path.join(root, dir)
build_file_path = os.path.join(full_dir, "BUILD.bazel")
if not os.path.isfile(build_file_path):
js_files = [f for f in os.listdir(full_dir) if f.endswith(".js")]
if js_files:
with open(build_file_path, "w", encoding="utf-8") as build_file:
build_file.write("""load("//bazel:mongo_js_rules.bzl", "mongo_js_library")
mongo_js_library(
name = "all_javascript_files",
srcs = glob([
"*.js",
]),
visibility = ["//visibility:public"],
)
""")
print(f"Created BUILD.bazel in {full_dir}")
def list_files_with_targets(bazel_bin: str) -> List:
return [
line.strip()
for line in subprocess.run(
[bazel_bin, "query", 'kind("source file", deps(//...))', "--keep_going"],
capture_output=True,
text=True,
check=False,
).stdout.splitlines()
]
def list_files_without_targets(
files_with_targets: List[str],
type_name: str,
ext: str,
dirs: List[str],
) -> bool:
# rules_lint only checks files that are in targets, verify that all files in the source tree
# are contained within targets.
exempt_list = {
# TODO(SERVER-101360): Remove the exemptions below once resolved.
"src/mongo/crypto/fle_options.cpp",
# TODO(SERVER-101368): Remove the exemptions below once resolved.
"src/mongo/db/modules/enterprise/src/streams/commands/update_connection.cpp",
# TODO(SERVER-101370): Remove the exemptions below once resolved.
"src/mongo/db/modules/enterprise/src/streams/third_party/mongocxx/dist/mongocxx/test_util/client_helpers.cpp",
# TODO(SERVER-101371): Remove the exemptions below once resolved.
"src/mongo/db/modules/enterprise/src/streams/util/tests/concurrent_memory_aggregator_test.cpp",
# TODO(SERVER-101373): Remove the exemptions below once resolved.
"src/mongo/executor/network_interface_thread_pool_test.cpp",
# TODO(SERVER-101375): Remove the exemptions below once resolved.
"src/mongo/platform/decimal128_dummy.cpp",
# TODO(SERVER-101377): Remove the exemptions below once resolved.
"src/mongo/util/icu_init_stub.cpp",
# TODO(SERVER-101377): Remove the exemptions below once resolved.
"src/mongo/util/processinfo_emscripten.cpp",
"src/mongo/util/processinfo_macOS.cpp",
"src/mongo/util/processinfo_solaris.cpp",
}
typed_files_in_targets = [line for line in files_with_targets if line.endswith(f".{ext}")]
print(f"Checking that all {type_name} files have BUILD.bazel targets...")
all_typed_files = (
subprocess.check_output(
["find", *dirs, "-name", f"*.{ext}"],
stderr=subprocess.STDOUT,
)
.decode("utf-8")
.splitlines()
)
# Convert typed_files_in_targets to a set for easy comparison
typed_files_in_targets_set = set()
for file in typed_files_in_targets:
# Remove the leading "//" and replace ":" with "/"
clean_file = file.lstrip("//").replace(":", "/")
typed_files_in_targets_set.add(clean_file)
# Create a new list of files that are in all_typed_files but not in typed_files_in_targets
new_list = []
for file in all_typed_files:
if file not in typed_files_in_targets_set and file not in exempt_list:
if "bazel_rules_mongo" in file:
# Skip files in bazel_rules_mongo, since it has its own Bazel repo
continue
new_list.append(file)
if len(new_list) != 0:
print(f"Found {type_name} files without BUILD.bazel definitions:")
for file in new_list:
print(f"\t{file}")
print("")
print(
f"Please add these to a {ext}_library target in a BUILD.bazel file in their directory"
)
print("Run the following to attempt to fix the issue automatically:")
print("\tbazel run lint --fix")
return False
print(f"All {type_name} files have BUILD.bazel targets!")
return True
def _git_distance(args: list) -> int:
command = ["git", "rev-list", "--count"] + args
try:
result = subprocess.run(command, capture_output=True, text=True, check=True)
except subprocess.CalledProcessError as e:
print(f"Error running git command: {' '.join(command)}")
print(f"stderr: {e.stderr.strip()}")
print(f"stdout: {e.stdout.strip()}")
raise
return int(result.stdout.strip())
def _get_merge_base(args: list) -> str:
command = ["git", "merge-base"] + args
result = subprocess.run(command, capture_output=True, text=True, check=True)
return result.stdout.strip()
def _git_diff(args: list) -> str:
command = ["git", "diff"] + args
result = subprocess.run(command, capture_output=True, text=True, check=True)
return result.stdout.strip() + os.linesep
def _git_unstaged_files() -> str:
command = ["git", "ls-files", "--others", "--exclude-standard"]
result = subprocess.run(command, capture_output=True, text=True, check=True)
return result.stdout.strip() + os.linesep
def _get_files_changed_since_fork_point(origin_branch: str = "origin/master") -> List[str]:
"""Query git to get a list of files in the repo from a diff."""
# There are 3 diffs we run:
# 1. List of commits between origin/master and HEAD of current branch
# 2. Cached/Staged files (--cached)
# 3. Working Tree files git tracks
fork_point = _get_merge_base(["HEAD", origin_branch])
diff_files = _git_diff(["--name-only", f"{fork_point}..HEAD"])
diff_files += _git_diff(["--name-only", "--cached"])
diff_files += _git_diff(["--name-only"])
diff_files += _git_unstaged_files()
file_set = {
os.path.normpath(os.path.join(os.curdir, line.rstrip()))
for line in diff_files.splitlines()
if line
}
return list(file_set)
def get_parsed_args(args):
parser = argparse.ArgumentParser()
parser.add_argument(
"--lint-yaml-project",
type=str,
default="mongodb-mongo-master",
required=False,
help="Run evergreen yaml linter for specified project",
)
parser.add_argument(
"--fix",
action="store_true",
default=False,
help="Apply linter fixes",
)
parser.add_argument(
"--all",
action="store_true",
default=False,
help="Run linter on all targets",
)
parser.add_argument(
"--dry-run",
action="store_true",
default=False,
)
parser.add_argument(
"--fail-on-validation",
action="store_true",
default=False,
)
parser.add_argument(
"--origin-branch",
type=str,
default="origin/master",
help="Base branch to compare changes against",
)
parser.add_argument(
"--large-files",
action="store_true",
default=False
)
return parser.parse_known_args(args)
def lint_mod(bazel_bin: str) -> bool:
subprocess.run([bazel_bin, "run", "//modules_poc:mod_mapping", "--", "--validate-modules"], check=True)
#TODO add support for the following steps
#subprocess.run([bazel_bin, "run", "//modules_poc:merge_decls"], check=True)
#subprocess.run([bazel_bin, "run", "//modules_poc:browse", "--", "merged_decls.json", "--parse-only"], check=True)
def run_rules_lint(bazel_bin: str, args: List[str]) -> bool:
parsed_args, args = get_parsed_args(args)
if platform.system() == "Windows":
print("eslint not supported on windows")
return False
if parsed_args.fix:
create_build_files_in_new_js_dirs()
files_with_targets = list_files_with_targets(bazel_bin)
if not list_files_without_targets(files_with_targets, "C++", "cpp", ["src/mongo"]):
return False
if not list_files_without_targets(
files_with_targets, "javascript", "js", ["src/mongo", "jstests"]
):
return False
if not list_files_without_targets(
files_with_targets, "python", "py", ["src/mongo", "buildscripts", "evergreen"]
):
return False
lint_all = parsed_args.all or "..." in args or "//..." in args
files_to_lint = [arg for arg in args if not arg.startswith("-")]
if not lint_all and files_to_lint:
origin_branch = parsed_args.origin_branch
max_distance = 100
distance = _git_distance([f"{origin_branch}..HEAD"])
if distance > max_distance:
print(
f"The number of commits between current branch and origin branch ({origin_branch}) is too large: {distance} commits (> {max_distance} commits)."
)
print(
"Please update your local branch with the latest changes from origin, or use `bazel run lint -- --origin-branch=other_branch` to select a different origin branch"
)
lint_all = True
else:
files_to_lint = [
file
for file in _get_files_changed_since_fork_point(origin_branch)
if file.endswith((SUPPORTED_EXTENSIONS))
]
if lint_all or "sbom.json" in files_to_lint:
subprocess.run([bazel_bin, "run", "//buildscripts:sbom_linter"], check=True)
if lint_all or any(file.endswith((".h", ".cpp")) for file in files_to_lint):
subprocess.run(
[bazel_bin, "run", "//buildscripts:quickmongolint", "--", "lint"], check=True
)
if lint_all or any(
file.endswith((".cpp", ".c", ".h", ".py", ".idl"))
for file in files_to_lint
):
subprocess.run([bazel_bin, "run", "//buildscripts:errorcodes", "--", "--quiet"], check=True)
if lint_all or "poetry.lock" in files_to_lint or "pyproject.toml" in files_to_lint:
subprocess.run([bazel_bin, "run", "//buildscripts:poetry_lock_check"], check=True)
if lint_all or any(file.endswith(".yml") for file in files_to_lint):
subprocess.run([bazel_bin, "run", "//buildscripts:validate_evg_project_config", "--", f"--evg-project-name={parsed_args.lint_yaml_project}", "--evg-auth-config=.evergreen.yml"], check=True)
if lint_all or parsed_args.large_files:
subprocess.run([bazel_bin, "run", "//buildscripts:large_file_check", "--", "--exclude", "src/third_party/*"], check=True)
else:
# simple check
for file in files_to_lint:
if os.path.getsize(file) > LARGE_FILE_THRESHOLD:
print(f"File {file} exceeds large file threshold of {LARGE_FILE_THRESHOLD}")
return False
# Default to linting everything in rules_lint if no path was passed in.
if len([arg for arg in args if not arg.startswith("--")]) == 0:
args = ["//..."] + args
if lint_all or any(
file.endswith((".cpp", ".c", ".h", ".hpp", ".idl", ".inl", ".defs"))
for file in files_to_lint
):
lint_mod(bazel_bin)
fix = ""
with tempfile.NamedTemporaryFile(delete=False) as buildevents:
buildevents_path = buildevents.name
for linter in ["eslint", "ruff"]:
args.append(f"--aspects=//tools/lint:linters.bzl%{linter}")
args.extend(
[
# Allow lints of code that fails some validation action
# See https://github.com/aspect-build/rules_ts/pull/574#issuecomment-2073632879
"--norun_validations",
f"--build_event_json_file={buildevents_path}",
"--output_groups=rules_lint_human",
"--remote_download_regex='.*AspectRulesLint.*'",
]
)
# This is a rudimentary flag parser.
if parsed_args.fail_on_validation:
args.extend(["--@aspect_rules_lint//lint:fail_on_violation", "--keep_going"])
# Allow a `--fix` option on the command-line.
# This happens to make output of the linter such as ruff's
# [*] 1 fixable with the `--fix` option.
# so that the naive thing of pasting that flag to lint.sh will do what the user expects.
if parsed_args.fix:
fix = "patch"
args.extend(["--@aspect_rules_lint//lint:fix", "--output_groups=rules_lint_patch"])
# the --dry-run flag must immediately follow the --fix flag
if parsed_args.dry_run:
fix = "print"
args = (
[arg for arg in args if arg.startswith("--") and arg != "--"]
+ ["--"]
+ [arg for arg in args if not arg.startswith("--")]
)
# Actually run the lint itself
subprocess.run([bazel_bin, "build"] + args, check=True)
# Parse out the reports from the build events
filter_expr = '.namedSetOfFiles | values | .files[] | select(.name | endswith($ext)) | ((.pathPrefix | join("/")) + "/" + .name)'
# Maybe this could be hermetic with bazel run @aspect_bazel_lib//tools:jq or sth
# jq on windows outputs CRLF which breaks this script. https://github.com/jqlang/jq/issues/92
valid_reports = (
subprocess.run(
["jq", "--arg", "ext", ".out", "--raw-output", filter_expr, buildevents_path],
capture_output=True,
text=True,
check=True,
)
.stdout.strip()
.split("\n")
)
failing_reports = 0
for report in valid_reports:
# Exclude coverage reports, and check if the output is empty.
if "coverage.dat" in report or not os.path.exists(report) or not os.path.getsize(report):
# Report is empty. No linting errors.
continue
with open(report, "r", encoding="utf-8") as f:
file_contents = f.read().strip()
if file_contents == "All checks passed!":
# Report is successful. No linting errors.
continue
print(f"From {report}:")
print(file_contents)
print()
failing_reports += 1
# Apply fixes if requested
if fix:
valid_patches = (
subprocess.run(
["jq", "--arg", "ext", ".patch", "--raw-output", filter_expr, buildevents_path],
capture_output=True,
text=True,
check=True,
)
.stdout.strip()
.split("\n")
)
for patch in valid_patches:
# Exclude coverage, and check if the patch is empty.
if "coverage.dat" in patch or not os.path.exists(patch) or not os.path.getsize(patch):
# Patch is empty. No linting errors.
continue
if fix == "print":
print(f"From {patch}:")
with open(patch, "r", encoding="utf-8") as f:
print(f.read())
print()
elif fix == "patch":
subprocess.run(
["patch", "-p1"], check=True, stdin=open(patch, "r", encoding="utf-8")
)
else:
print(f"ERROR: unknown fix type {fix}", file=sys.stderr)
return False
elif failing_reports != 0:
return False
return True