mirror of https://github.com/mongodb/mongo
SERVER-104399 SERVER-103542 implement submodule and file_private visibility rules (#35630)
GitOrigin-RevId: 3072be4a4cda82cae272260c0f74c70c32ea9558
This commit is contained in:
parent
58d505a6b9
commit
ded9a0e961
|
|
@ -3,24 +3,16 @@ import glob
|
||||||
import json
|
import json
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import sys
|
import sys
|
||||||
from concurrent.futures import ProcessPoolExecutor, as_completed, wait
|
from concurrent.futures import ProcessPoolExecutor, as_completed, wait
|
||||||
from copy import deepcopy
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Any, TypedDict
|
from typing import Any, TypedDict
|
||||||
|
|
||||||
import pyzstd
|
import pyzstd
|
||||||
import typer # nicer error dump on exceptions
|
import typer # nicer error dump on exceptions
|
||||||
import yaml
|
|
||||||
from progressbar import ProgressBar, progressbar
|
from progressbar import ProgressBar, progressbar
|
||||||
|
|
||||||
try:
|
|
||||||
from yaml import CDumper as Dumper
|
|
||||||
from yaml import CLoader as Loader # noqa: F401 Not used right now but may be again
|
|
||||||
except ImportError:
|
|
||||||
raise RuntimeError("Why no cYaml?")
|
|
||||||
# from yaml import Loader, Dumper
|
|
||||||
|
|
||||||
|
|
||||||
class Decl(TypedDict):
|
class Decl(TypedDict):
|
||||||
display_name: str
|
display_name: str
|
||||||
|
|
@ -124,10 +116,13 @@ def worker(paths: list[bytes]):
|
||||||
merge_decls(json.loads(f.read()))
|
merge_decls(json.loads(f.read()))
|
||||||
|
|
||||||
|
|
||||||
|
def is_submodule_usage(decl: Decl, mod: str) -> bool:
|
||||||
|
return decl["mod"] == mod or mod.startswith(decl["mod"] + ".")
|
||||||
|
|
||||||
|
|
||||||
def main(
|
def main(
|
||||||
jobs: int = typer.Option(os.cpu_count(), "--jobs", "-j"),
|
jobs: int = typer.Option(os.cpu_count(), "--jobs", "-j"),
|
||||||
intra_module: bool = typer.Option(False, help="Include intra-module accesses"),
|
intra_module: bool = typer.Option(False, help="Include intra-module accesses"),
|
||||||
generate_yaml: bool = False,
|
|
||||||
):
|
):
|
||||||
timer = Timer()
|
timer = Timer()
|
||||||
paths = glob.glob(b"bazel-bin/**/*.mod_scanner_decls.json.zst", recursive=True)
|
paths = glob.glob(b"bazel-bin/**/*.mod_scanner_decls.json.zst", recursive=True)
|
||||||
|
|
@ -165,14 +160,16 @@ def main(
|
||||||
merge_decls(json.loads(f.read()))
|
merge_decls(json.loads(f.read()))
|
||||||
timer.mark("processed input")
|
timer.mark("processed input")
|
||||||
|
|
||||||
no_self_decls = deepcopy(all_decls)
|
out: Any = [dict(d) for d in all_decls.values()] # shallow copy each decl
|
||||||
for decl in no_self_decls.values():
|
if not intra_module:
|
||||||
if decl["mod"] in decl["used_from"]:
|
for decl in out:
|
||||||
del decl["used_from"][decl["mod"]]
|
decl["used_from"] = {
|
||||||
|
mod: locs
|
||||||
|
for mod, locs in decl["used_from"].items()
|
||||||
|
if not is_submodule_usage(decl, mod)
|
||||||
|
}
|
||||||
|
out = [d for d in out if d["used_from"]]
|
||||||
|
|
||||||
out: Any = deepcopy(
|
|
||||||
[d for d in (no_self_decls if not intra_module else all_decls).values() if d["used_from"]]
|
|
||||||
)
|
|
||||||
for decl in out:
|
for decl in out:
|
||||||
# go from {$MOD: $LOCS} map to [{mod: $MOD, locs: $LOCS}] list of
|
# go from {$MOD: $LOCS} map to [{mod: $MOD, locs: $LOCS}] list of
|
||||||
# objects which is easier to work with in mongo aggregations
|
# objects which is easier to work with in mongo aggregations
|
||||||
|
|
@ -185,30 +182,51 @@ def main(
|
||||||
json.dump(out, f)
|
json.dump(out, f)
|
||||||
timer.mark("dumped json")
|
timer.mark("dumped json")
|
||||||
|
|
||||||
if generate_yaml:
|
found_violations = False
|
||||||
for decl in out:
|
for decl in sorted(all_decls.values(), key=lambda d: d["display_name"]):
|
||||||
decl["used_from"] = {u["mod"]: u["locs"] for u in decl["used_from"]} # type: ignore
|
violations = []
|
||||||
# sort by file to make it easier to use
|
match decl["visibility"]:
|
||||||
out.sort(key=lambda d: d["loc"])
|
case "private":
|
||||||
|
err = f"Illegal use of {decl['display_name']} outside of module {decl['mod']}:"
|
||||||
|
for mod, locs in decl["used_from"].items():
|
||||||
|
if not is_submodule_usage(decl, mod):
|
||||||
|
for loc in locs:
|
||||||
|
violations.append(f" {loc} ({mod})")
|
||||||
|
|
||||||
timer.mark("massaged output for yaml")
|
case "file_private":
|
||||||
|
err = f"Illegal use of {decl['display_name']} outside of its file family:"
|
||||||
|
|
||||||
with open("merged_decls.yaml", "w") as f:
|
# file_base is the portion of the file name that defines the family
|
||||||
yaml.dump(out, f, Dumper=Dumper, width=1000000)
|
# e.g. bazel-out/blah/src/mongo/db/foo_details.h -> src/mongo/db/foo
|
||||||
timer.mark("dumped yaml")
|
file_base = decl["loc"].split(".")[0]
|
||||||
|
if index := file_base.index("src/mongo/"):
|
||||||
|
file_base = file_base[index:]
|
||||||
|
file_base = re.sub(r"_(internal|detail)s?$", "", file_base)
|
||||||
|
assert file_base.startswith("src/mongo/")
|
||||||
|
|
||||||
out = [d for d in no_self_decls.values() if d["used_from"] and d["visibility"] == "private"]
|
file_family_regex = re.compile(
|
||||||
out.sort(key=lambda d: d["display_name"])
|
rf"[\w/]*{file_base}(?:_(?:internals?|details?|test|bm|mock)(_.*)?)?\."
|
||||||
for decl in out:
|
)
|
||||||
print(f"Illegal use of {decl['display_name']} outside of module {decl['mod']}:")
|
assert file_family_regex.match(decl["loc"]) # sanity check
|
||||||
print(f" loc: {decl['loc']}")
|
|
||||||
print(" usages:")
|
for mod, locs in decl["used_from"].items():
|
||||||
for mod, locs in decl["used_from"].items():
|
for loc in locs:
|
||||||
for loc in locs:
|
# Must be in the same module even if file family matches.
|
||||||
print(f" {loc} ({mod})")
|
# This helps prevent accidental matches.
|
||||||
|
if mod != decl["mod"] or not file_family_regex.match(loc):
|
||||||
|
violations.append(f" {loc} ({mod})")
|
||||||
|
case _: # ignore other visibility types
|
||||||
|
continue
|
||||||
|
|
||||||
|
if violations:
|
||||||
|
found_violations = True
|
||||||
|
print(err)
|
||||||
|
print(f" loc: {decl['loc']}")
|
||||||
|
print(" usages:")
|
||||||
|
print("\n".join(violations))
|
||||||
timer.mark("checked for privacy violations")
|
timer.mark("checked for privacy violations")
|
||||||
if out:
|
|
||||||
sys.exit(1)
|
sys.exit(found_violations) # bools are ints, so False(0) is success and True(1) is failure
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue