From 18cfe7e7f2afd9d36b18effb6f1a187119443cc9 Mon Sep 17 00:00:00 2001 From: Zac Date: Wed, 6 Aug 2025 16:42:19 -0700 Subject: [PATCH] SERVER-107600 Move the first stage of lint_modules to bazel run lint (#38639) GitOrigin-RevId: 87dd5ed68da2da5237891420c321886c1352004f --- bazel/wrapper_hook/lint.py | 16 +++- .../tasks/misc_tasks.yml | 9 --- modules_poc/BUILD.bazel | 76 +++++++++++++++++++ modules_poc/browse.py | 3 +- modules_poc/merge_decls.py | 10 +-- modules_poc/mod_mapping.py | 3 +- 6 files changed, 100 insertions(+), 17 deletions(-) diff --git a/bazel/wrapper_hook/lint.py b/bazel/wrapper_hook/lint.py index 9daeea22a4a..5ba4cab610b 100644 --- a/bazel/wrapper_hook/lint.py +++ b/bazel/wrapper_hook/lint.py @@ -12,6 +12,8 @@ sys.path.append(str(REPO_ROOT)) LARGE_FILE_THRESHOLD = 10 * 1024 * 1024 #10MiB +SUPPORTED_EXTENSIONS = (".cpp", ".c", ".h", ".hpp", ".py", ".js", ".mjs", ".json", ".lock", ".toml", ".defs", ".inl", ".idl") + def create_build_files_in_new_js_dirs() -> None: base_dirs = ["src/mongo/db/modules/enterprise/jstests", "jstests"] for base_dir in base_dirs: @@ -220,6 +222,12 @@ def get_parsed_args(args): ) return parser.parse_known_args(args) +def lint_mod(bazel_bin: str) -> bool: + subprocess.run([bazel_bin, "run", "//modules_poc:mod_mapping", "--", "--validate-modules"], check=True) + #TODO add support for the following steps + #subprocess.run([bazel_bin, "run", "//modules_poc:merge_decls"], check=True) + #subprocess.run([bazel_bin, "run", "//modules_poc:browse", "--", "merged_decls.json", "--parse-only"], check=True) + def run_rules_lint(bazel_bin: str, args: List[str]) -> bool: parsed_args, args = get_parsed_args(args) if platform.system() == "Windows": @@ -261,7 +269,7 @@ def run_rules_lint(bazel_bin: str, args: List[str]) -> bool: files_to_lint = [ file for file in _get_files_changed_since_fork_point(origin_branch) - if file.endswith((".cpp", ".c", ".h", ".py", ".js", ".mjs", ".json", ".lock", ".toml")) + if file.endswith((SUPPORTED_EXTENSIONS)) ] if lint_all or "sbom.json" in files_to_lint: @@ -297,6 +305,12 @@ def run_rules_lint(bazel_bin: str, args: List[str]) -> bool: if len([arg for arg in args if not arg.startswith("--")]) == 0: args = ["//..."] + args + if lint_all or any( + file.endswith((".cpp", ".c", ".h", ".hpp", ".idl", ".inl", ".defs")) + for file in files_to_lint + ): + lint_mod(bazel_bin) + fix = "" with tempfile.NamedTemporaryFile(delete=False) as buildevents: buildevents_path = buildevents.name diff --git a/etc/evergreen_yml_components/tasks/misc_tasks.yml b/etc/evergreen_yml_components/tasks/misc_tasks.yml index 593cffc1a00..dbcce0ae10a 100644 --- a/etc/evergreen_yml_components/tasks/misc_tasks.yml +++ b/etc/evergreen_yml_components/tasks/misc_tasks.yml @@ -822,15 +822,6 @@ tasks: - func: "set up venv" - func: "upload pip requirements" - func: "get engflow creds" - - command: subprocess.exec - type: test - params: - binary: bash - args: - - "./src/evergreen/run_python_script_with_report.sh" - - "validate-modules-yaml" - - "modules_poc/mod_mapping.py" - - "--validate-modules" - func: "bazel compile" vars: targets: //src/mongo/... diff --git a/modules_poc/BUILD.bazel b/modules_poc/BUILD.bazel index 1c76f5ae496..72822ed73e0 100644 --- a/modules_poc/BUILD.bazel +++ b/modules_poc/BUILD.bazel @@ -37,3 +37,79 @@ py_binary( ), ], ) + +py_binary( + name = "merge_decls", + srcs = [ + "merge_decls.py", + ], + deps = [ + dependency( + "typer", + group = "core", + ), + dependency( + "pyzstd", + group = "modules_poc", + ), + dependency( + "progressbar2", + group = "modules_poc", + ), + ], +) + +py_binary( + name = "browse", + srcs = [ + "browse.py", + ], + data = [ + "cpp-highlights.scm", + ], + deps = [ + dependency( + "textual", + group = "modules_poc", + ), + dependency( + "tree-sitter", + group = "modules_poc", + ), + dependency( + "tree-sitter-cpp", + group = "modules_poc", + ), + ], +) + +py_binary( + name = "mod_mapping", + srcs = [ + "mod_mapping.py", + ], + data = [ + "modules.yaml", + "//.github:CODEOWNERS", + # These are runtime deps, but switched to getting them via the dependency on + # on cc_toolchain.all_files injected by the aspect that is needed in order to get + # access to the toolchain headers. Ideally there would be an all_headers file list + # that we could depend on. + # "@mongo_toolchain//:v4/lib/libLLVM-12.so", + # "@mongo_toolchain//:v4/lib/libclang.so", + ], + deps = [ + dependency( + "regex", + group = "compile", + ), + dependency( + "pyyaml", + group = "core", + ), + dependency( + "codeowners", + group = "modules_poc", + ), + ], +) diff --git a/modules_poc/browse.py b/modules_poc/browse.py index 8727e30f13a..1a96838dc21 100755 --- a/modules_poc/browse.py +++ b/modules_poc/browse.py @@ -24,6 +24,7 @@ from textual.widgets.tree import TreeNode cpp_language = tree_sitter.Language(tree_sitter_cpp.language()) cpp_highlight_query = (Path(__file__).parent / "cpp-highlights.scm").read_text() +REPO_ROOT = os.environ.get("BUILD_WORKSPACE_DIRECTORY", ".") class Loc(NamedTuple): file: str @@ -648,7 +649,7 @@ input_path = "merged_decls.json" def load_decls() -> list[File]: files = dict[str, File]() - with open(input_path, "rb") as file: + with open(REPO_ROOT + "/" + input_path, "rb") as file: raw_decls = json.load(file) for d in raw_decls: diff --git a/modules_poc/merge_decls.py b/modules_poc/merge_decls.py index d1821bb44b3..48359285579 100755 --- a/modules_poc/merge_decls.py +++ b/modules_poc/merge_decls.py @@ -13,6 +13,7 @@ import pyzstd import typer # nicer error dump on exceptions from progressbar import ProgressBar, progressbar +REPO_ROOT = os.environ.get("BUILD_WORKSPACE_DIRECTORY", os.path.dirname(os.path.abspath(sys.argv[0])) + "/..") class Decl(TypedDict): display_name: str @@ -121,11 +122,10 @@ def is_submodule_usage(decl: Decl, mod: str) -> bool: def get_paths(timer: Timer): - project_root = os.path.dirname(os.path.abspath(sys.argv[0])) + "/.." proc = subprocess.run( ["bazel", "build", "--config=mod-scanner", "//src/mongo/..."], text=True, # unnecessary since we don't use stdout, but makes the types match - cwd=project_root, + cwd=REPO_ROOT, check=False, ) timer.mark("scanned sources") @@ -143,7 +143,7 @@ def get_paths(timer: Timer): ], capture_output=True, text=True, - cwd=project_root, + cwd=REPO_ROOT, check=True, ) @@ -152,7 +152,7 @@ def get_paths(timer: Timer): if line.startswith(" Environment:") and "MOD_SCANNER_OUTPUT=" in line: m = re.search("MOD_SCANNER_OUTPUT=([^,]+),", line) if m: - outputs.append(m.group(1)) + outputs.append(REPO_ROOT + "/" + m.group(1)) timer.mark("queried bazel for mod_scanner outputs") return outputs @@ -214,7 +214,7 @@ def main( decl["other_mods"] = {k: sorted(v) for k, v in decl["other_mods"].items()} # type: ignore timer.mark("massaged output for json") - with open("merged_decls.json", "w") as f: + with open(f"{REPO_ROOT}/merged_decls.json", "w") as f: json.dump(out, f) timer.mark("dumped json") diff --git a/modules_poc/mod_mapping.py b/modules_poc/mod_mapping.py index 177393be599..91e9abd18e4 100755 --- a/modules_poc/mod_mapping.py +++ b/modules_poc/mod_mapping.py @@ -127,7 +127,8 @@ def teams_for_file(f: ClangFile | str | None): def glob_paths(): - for path in glob("src/mongo/**/*", recursive=True): + repo_root = os.environ.get("BUILD_WORKSPACE_DIRECTORY", ".") + for path in glob("src/mongo/**/*", recursive=True, root_dir=repo_root): if "/third_party/" in path: continue extensions = ("h", "cpp", "idl", "c", "defs", "inl", "hpp")