SERVER-107600 Move the first stage of lint_modules to bazel run lint (#38639)

GitOrigin-RevId: 87dd5ed68da2da5237891420c321886c1352004f
This commit is contained in:
Zac 2025-08-06 16:42:19 -07:00 committed by MongoDB Bot
parent 454126a36c
commit 18cfe7e7f2
6 changed files with 100 additions and 17 deletions

View File

@ -12,6 +12,8 @@ sys.path.append(str(REPO_ROOT))
LARGE_FILE_THRESHOLD = 10 * 1024 * 1024 #10MiB
SUPPORTED_EXTENSIONS = (".cpp", ".c", ".h", ".hpp", ".py", ".js", ".mjs", ".json", ".lock", ".toml", ".defs", ".inl", ".idl")
def create_build_files_in_new_js_dirs() -> None:
base_dirs = ["src/mongo/db/modules/enterprise/jstests", "jstests"]
for base_dir in base_dirs:
@ -220,6 +222,12 @@ def get_parsed_args(args):
)
return parser.parse_known_args(args)
def lint_mod(bazel_bin: str) -> bool:
subprocess.run([bazel_bin, "run", "//modules_poc:mod_mapping", "--", "--validate-modules"], check=True)
#TODO add support for the following steps
#subprocess.run([bazel_bin, "run", "//modules_poc:merge_decls"], check=True)
#subprocess.run([bazel_bin, "run", "//modules_poc:browse", "--", "merged_decls.json", "--parse-only"], check=True)
def run_rules_lint(bazel_bin: str, args: List[str]) -> bool:
parsed_args, args = get_parsed_args(args)
if platform.system() == "Windows":
@ -261,7 +269,7 @@ def run_rules_lint(bazel_bin: str, args: List[str]) -> bool:
files_to_lint = [
file
for file in _get_files_changed_since_fork_point(origin_branch)
if file.endswith((".cpp", ".c", ".h", ".py", ".js", ".mjs", ".json", ".lock", ".toml"))
if file.endswith((SUPPORTED_EXTENSIONS))
]
if lint_all or "sbom.json" in files_to_lint:
@ -297,6 +305,12 @@ def run_rules_lint(bazel_bin: str, args: List[str]) -> bool:
if len([arg for arg in args if not arg.startswith("--")]) == 0:
args = ["//..."] + args
if lint_all or any(
file.endswith((".cpp", ".c", ".h", ".hpp", ".idl", ".inl", ".defs"))
for file in files_to_lint
):
lint_mod(bazel_bin)
fix = ""
with tempfile.NamedTemporaryFile(delete=False) as buildevents:
buildevents_path = buildevents.name

View File

@ -822,15 +822,6 @@ tasks:
- func: "set up venv"
- func: "upload pip requirements"
- func: "get engflow creds"
- command: subprocess.exec
type: test
params:
binary: bash
args:
- "./src/evergreen/run_python_script_with_report.sh"
- "validate-modules-yaml"
- "modules_poc/mod_mapping.py"
- "--validate-modules"
- func: "bazel compile"
vars:
targets: //src/mongo/...

View File

@ -37,3 +37,79 @@ py_binary(
),
],
)
py_binary(
name = "merge_decls",
srcs = [
"merge_decls.py",
],
deps = [
dependency(
"typer",
group = "core",
),
dependency(
"pyzstd",
group = "modules_poc",
),
dependency(
"progressbar2",
group = "modules_poc",
),
],
)
py_binary(
name = "browse",
srcs = [
"browse.py",
],
data = [
"cpp-highlights.scm",
],
deps = [
dependency(
"textual",
group = "modules_poc",
),
dependency(
"tree-sitter",
group = "modules_poc",
),
dependency(
"tree-sitter-cpp",
group = "modules_poc",
),
],
)
py_binary(
name = "mod_mapping",
srcs = [
"mod_mapping.py",
],
data = [
"modules.yaml",
"//.github:CODEOWNERS",
# These are runtime deps, but switched to getting them via the dependency on
# on cc_toolchain.all_files injected by the aspect that is needed in order to get
# access to the toolchain headers. Ideally there would be an all_headers file list
# that we could depend on.
# "@mongo_toolchain//:v4/lib/libLLVM-12.so",
# "@mongo_toolchain//:v4/lib/libclang.so",
],
deps = [
dependency(
"regex",
group = "compile",
),
dependency(
"pyyaml",
group = "core",
),
dependency(
"codeowners",
group = "modules_poc",
),
],
)

View File

@ -24,6 +24,7 @@ from textual.widgets.tree import TreeNode
cpp_language = tree_sitter.Language(tree_sitter_cpp.language())
cpp_highlight_query = (Path(__file__).parent / "cpp-highlights.scm").read_text()
REPO_ROOT = os.environ.get("BUILD_WORKSPACE_DIRECTORY", ".")
class Loc(NamedTuple):
file: str
@ -648,7 +649,7 @@ input_path = "merged_decls.json"
def load_decls() -> list[File]:
files = dict[str, File]()
with open(input_path, "rb") as file:
with open(REPO_ROOT + "/" + input_path, "rb") as file:
raw_decls = json.load(file)
for d in raw_decls:

View File

@ -13,6 +13,7 @@ import pyzstd
import typer # nicer error dump on exceptions
from progressbar import ProgressBar, progressbar
REPO_ROOT = os.environ.get("BUILD_WORKSPACE_DIRECTORY", os.path.dirname(os.path.abspath(sys.argv[0])) + "/..")
class Decl(TypedDict):
display_name: str
@ -121,11 +122,10 @@ def is_submodule_usage(decl: Decl, mod: str) -> bool:
def get_paths(timer: Timer):
project_root = os.path.dirname(os.path.abspath(sys.argv[0])) + "/.."
proc = subprocess.run(
["bazel", "build", "--config=mod-scanner", "//src/mongo/..."],
text=True, # unnecessary since we don't use stdout, but makes the types match
cwd=project_root,
cwd=REPO_ROOT,
check=False,
)
timer.mark("scanned sources")
@ -143,7 +143,7 @@ def get_paths(timer: Timer):
],
capture_output=True,
text=True,
cwd=project_root,
cwd=REPO_ROOT,
check=True,
)
@ -152,7 +152,7 @@ def get_paths(timer: Timer):
if line.startswith(" Environment:") and "MOD_SCANNER_OUTPUT=" in line:
m = re.search("MOD_SCANNER_OUTPUT=([^,]+),", line)
if m:
outputs.append(m.group(1))
outputs.append(REPO_ROOT + "/" + m.group(1))
timer.mark("queried bazel for mod_scanner outputs")
return outputs
@ -214,7 +214,7 @@ def main(
decl["other_mods"] = {k: sorted(v) for k, v in decl["other_mods"].items()} # type: ignore
timer.mark("massaged output for json")
with open("merged_decls.json", "w") as f:
with open(f"{REPO_ROOT}/merged_decls.json", "w") as f:
json.dump(out, f)
timer.mark("dumped json")

View File

@ -127,7 +127,8 @@ def teams_for_file(f: ClangFile | str | None):
def glob_paths():
for path in glob("src/mongo/**/*", recursive=True):
repo_root = os.environ.get("BUILD_WORKSPACE_DIRECTORY", ".")
for path in glob("src/mongo/**/*", recursive=True, root_dir=repo_root):
if "/third_party/" in path:
continue
extensions = ("h", "cpp", "idl", "c", "defs", "inl", "hpp")