#!/usr/bin/env python3 import os import sys from glob import glob from pathlib import Path # if you haven't already done so from typing import NamedTuple, NoReturn import codeowners import regex as re import yaml from codeowners import CodeOwners try: from yaml import CLoader as Loader except ImportError: raise RuntimeError("Why no cYaml?") file = Path(__file__).resolve() parent, root = file.parent, file.parents[1] sys.path.append(str(file.parent)) from cindex import Cursor from cindex import File as ClangFile def perr(*values): print(*values, file=sys.stderr) def perr_exit(*values) -> NoReturn: perr(*values) sys.exit(1) if codeowners.path_to_regex("/**/bar").match("/foobar"): # Detect an outdated version suffering from https://github.com/sbdchd/codeowners/issues/43. # We need to update to at least 0.8.0 to get the fix. perr_exit("please run buildscripts/poetry_sync.sh to update dependencies") with open(root / ".github/CODEOWNERS") as f: code_owners = CodeOwners(f.read()) with open(parent / "modules.yaml") as f: def parseModules(): raw_mods = yaml.load(f, Loader=Loader) lines = [] fully_marked = set[str]() for mod, info in raw_mods.items(): for glob in info["files"]: lines.append(f"/{glob} @10gen/{mod}") if info.get("meta", {}).get("fully_marked", False): fully_marked.add(mod) # If multiple rules match, later wins. So put rules with more # specificity later. For all of our current rules, longer means more # specific. lines.sort(key=lambda l: len(l.split()[0])) return "\n".join(lines), fully_marked modules_text, fully_marked_modules = parseModules() modules = CodeOwners(modules_text) def normpath_for_file(f: Cursor | ClangFile | str | None) -> str | None: if f is None: return None if isinstance(f, Cursor): return normpath_for_file(f.location.file) name = f.name if type(f) == ClangFile else f if "/third_party/" in name: return None offset = name.find("src/mongo") if offset == -1: return None name = name[offset:] return os.path.normpath(name) # fix up a/X/../b/c.h -> a/b/c.h file_mod_map: dict[str, str] = {} def mod_for_file(f: ClangFile | str | None) -> str | None: name = normpath_for_file(f) if not name: return None if name and name.endswith("_gen.h") or name.endswith("_gen.cpp"): name = re.sub(r"_gen\.(h|cpp)$", ".idl", name) if name in file_mod_map: return file_mod_map[name] match modules.of(name): case []: mod = "__NONE__" case [[kind, mod]]: assert kind == "TEAM" ignore = "@10gen/" assert mod.startswith(ignore) mod = mod[len(ignore) :] case owners: perr_exit( f"ERROR: multiple owners for file {name}: {', '.join(mod for (_, mod) in owners)}" ) file_mod_map[name] = mod return mod def is_module_fully_marked(mod: str | None) -> bool: return mod in fully_marked_modules def teams_for_file(f: ClangFile | str | None): name = normpath_for_file(f) if name is None: return [] # No need to cache since this is called once per file teams = [] for kind, owner in code_owners.of(name): if kind != "TEAM": # ignore both individual engineers and svc-auto-approve-bot continue ignore = "@10gen/" assert owner.startswith(ignore) owner = owner[len(ignore) :] owner = owner.replace("-", "_") # easier for processing with jq teams.append(owner) return teams if teams else ["__NO_OWNER__"] def glob_paths(): repo_root = os.environ.get("BUILD_WORKSPACE_DIRECTORY", ".") for path in glob("src/mongo/**/*", recursive=True, root_dir=repo_root): if "/third_party/" in path: continue extensions = ("h", "cpp", "idl", "c", "defs", "inl", "hpp") if not any(path.endswith(f".{ext}") for ext in extensions): continue yield path def dump_modules() -> None: out: dict[str, dict[str, dict[str, list[str]]]] = {} for path in glob_paths(): mod = mod_for_file(path) assert mod # None would mean not first-party, but that is already filtered out. (dir, leaf) = path.rsplit("/", 1) for team in teams_for_file(path): # In cases where multiple teams own a file, this will list the file multiple times. # This is intended to play nicely with teams trying to filter to just the files they own. out.setdefault(mod, {}).setdefault(team, {}).setdefault(dir, []).append(leaf) for teams in out.values(): for dirs in teams.values(): for files in dirs.values(): files.sort() yaml.dump(out, open("modules_dump.yaml", "w")) def dump_list() -> None: for line in sorted(f"{path} -- {mod_for_file(path)}" for path in glob_paths()): print(line) def validate_modules() -> bool: def glob_is_prefix(short: str, long: str): # Simplistic but good enough for now. In particular, I want to make sure we would # catch things like "foo*" and "*bar*" both matching "foobar". assert len(short) <= len(long) # argument are sorted by length before calling if short == long: return False # duplicates are treated as errors if long.startswith(short): return True # foo and foo/ are prefixes of foo/bar if short.endswith("*") and long.startswith(short[:-1]): return True # foo* is a prefix of foo/bar and foobar return False class Info(NamedTuple): mod: str glob: str info_for_line = { info[3]: Info( mod=info[2][0][1].removeprefix("@10gen/"), glob=info[1][1:], ) for info in modules.paths } seen_lines = set[int]() failed = False for path in glob_paths(): matches = list(modules.matching_lines(path)) for match in matches: seen_lines.add(match[1]) if not matches: teams = " and ".join(teams_for_file(path)) perr(f"Error: {path} owned by {teams} doesn't match any globs in modules.yaml") failed = True if len(matches) <= 1: continue infos = sorted((info_for_line[match[1]] for match in matches), key=lambda i: len(i.glob)) for i in range(0, len(infos)): for j in range(i, len(infos)): a = infos[i] b = infos[j] if a.mod != b.mod and not glob_is_prefix(a.glob, b.glob): perr( f"Error: {path} matches multiple globs that are neither prefixes nor same module:" ) for info in infos: perr(f" {info.glob} ({info.mod})") failed = True break else: continue break # break out of outer loop for line, info in info_for_line.items(): if line not in seen_lines: perr(f"Error: glob '{info.glob}' in module {info.mod} doesn't match any files") failed = True return failed def main(): if len(sys.argv) == 2: match sys.argv[1]: case "--dump-modules": sys.exit(dump_modules()) case "--dump-modules-list": sys.exit(dump_list()) case "--validate-modules": sys.exit(validate_modules()) perr_exit(f"Usage: {sys.argv[0]} (--dump-modules|--dump-modules-list|--validate-modules)") if __name__ == "__main__": main() # cspell: perr cindex