From 504c4bc8d1852aaf6b8a57a06ec22b8e421fd4f7 Mon Sep 17 00:00:00 2001 From: Mathias Stearn Date: Wed, 8 Oct 2025 20:45:17 +0200 Subject: [PATCH] SERVER-98435 Allow marking modules as fully_marked so they stay that way even when new files are added (#42270) GitOrigin-RevId: cb96c2d48b51302a5c30fe1ce0f7a23a5b2f9132 --- modules_poc/mod_mapping.py | 12 ++++++++++-- modules_poc/mod_scanner.py | 10 +++++++++- modules_poc/modules.yaml | 9 +++++++++ 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/modules_poc/mod_mapping.py b/modules_poc/mod_mapping.py index 91e9abd18e4..376d5be29e6 100755 --- a/modules_poc/mod_mapping.py +++ b/modules_poc/mod_mapping.py @@ -47,16 +47,20 @@ with open(parent / "modules.yaml") as f: def parseModules(): raw_mods = yaml.load(f, Loader=Loader) lines = [] + fully_marked = set[str]() for mod, info in raw_mods.items(): for glob in info["files"]: lines.append(f"/{glob} @10gen/{mod}") + if info.get("meta", {}).get("fully_marked", False): + fully_marked.add(mod) # If multiple rules match, later wins. So put rules with more # specificity later. For all of our current rules, longer means more # specific. lines.sort(key=lambda l: len(l.split()[0])) - return "\n".join(lines) + return "\n".join(lines), fully_marked - modules = CodeOwners(parseModules()) + modules_text, fully_marked_modules = parseModules() + modules = CodeOwners(modules_text) def normpath_for_file(f: Cursor | ClangFile | str | None) -> str | None: @@ -107,6 +111,10 @@ def mod_for_file(f: ClangFile | str | None) -> str | None: return mod +def is_module_fully_marked(mod: str | None) -> bool: + return mod in fully_marked_modules + + def teams_for_file(f: ClangFile | str | None): name = normpath_for_file(f) if name is None: diff --git a/modules_poc/mod_scanner.py b/modules_poc/mod_scanner.py index a6a70188b87..4cded77e1b9 100755 --- a/modules_poc/mod_scanner.py +++ b/modules_poc/mod_scanner.py @@ -38,7 +38,7 @@ from cindex import ( RefQualifierKind, TranslationUnit, ) -from mod_mapping import mod_for_file, normpath_for_file +from mod_mapping import is_module_fully_marked, mod_for_file, normpath_for_file def perr(*values): @@ -868,6 +868,14 @@ def parseTU(args: list[str] | str): complete_headers.add(normpath_for_file(include.include)) continue + # Treat all headers from fully marked modules as complete. This makes newly-added + # headers in that module private by default, requiring explicit marking of the public + # API. + header_mod = mod_for_file(include.include) + if is_module_fully_marked(header_mod): + complete_headers.add(normpath_for_file(include.include)) + continue + # Note: using bytes to avoid unicode handling overhead since the # needles we are looking for are ascii-only. content = Path(include.include.name).read_bytes() diff --git a/modules_poc/modules.yaml b/modules_poc/modules.yaml index 8718d9854f9..7d615cdf4dd 100644 --- a/modules_poc/modules.yaml +++ b/modules_poc/modules.yaml @@ -15,6 +15,14 @@ # jira: # Jira "Assigned Team" that should be used when filing tickets for the module. # +# fully_marked: +# If present and true, behaves as if all headers in the module have been fully +# marked, meaning that any unmarked APIs will be treated as private, regardless +# of whether they have included modules.h. This lets us lock-in the progress of +# marking that module so that it stays fully marked even as new files are added. +# Unlike other metadata, this is not inherited by submodules so that it is +# possible to have a fully_marked parent module before marking all submodules. +# # If unsure about these, or if multiple teams are involved, please pick a good place # to send questions/tickets to initially, understanding that we can always send them # somewhere else if needed. @@ -931,6 +939,7 @@ atlas_streams: meta: slack: streams-engine jira: Atlas Streams + fully_marked: true files: - src/mongo/db/modules/enterprise/src/streams