SERVER-104208 Disallow any new unowned files (#35304)

GitOrigin-RevId: ee2b90adc4b9ff0b8f9211be066cc010509e2660
This commit is contained in:
Trevor Guidry 2025-04-30 13:42:36 -05:00 committed by MongoDB Bot
parent 377f7087ee
commit f1139e838e
14 changed files with 519 additions and 10 deletions

5
.gitignore vendored
View File

@ -330,3 +330,8 @@ etc/evaluated_evergreen.yml
etc/evaluated_evergreen_nightly.yml
etc/evaluated_system_perf.yml
etc/trimmed_system_perf.yml
# evergreen files to ignore when detecting patch changes
/version_expansions.yml
/engflow.*
/.bazelrc.evergreen

View File

@ -203,6 +203,10 @@ load("@bazel_rules_mongo//codeowners:codeowners_validator.bzl", "codeowners_vali
codeowners_validator()
load("@bazel_rules_mongo//codeowners:codeowners_binary.bzl", "codeowners_binary")
codeowners_binary()
poetry(
name = "poetry_bazel_rules_mongo",
lockfile = "@bazel_rules_mongo//:poetry.lock",

View File

@ -39,6 +39,10 @@ load("@bazel_rules_mongo//codeowners:codeowners_validator.bzl", "codeowners_vali
codeowners_validator()
load("@bazel_rules_mongo//codeowners:codeowners_binary.bzl", "codeowners_binary")
codeowners_binary()
poetry(
name = "poetry_bazel_rules_mongo",
lockfile = "@bazel_rules_mongo//:poetry.lock",

View File

@ -29,3 +29,7 @@ poetry(
load("//codeowners:codeowners_validator.bzl", "codeowners_validator")
codeowners_validator()
load("//codeowners:codeowners_binary.bzl", "codeowners_binary")
codeowners_binary()

View File

@ -5,12 +5,15 @@ py_binary(
srcs = [
"codeowners_generate.py",
"validate_codeowners.py",
"//utils:evergreen_git.py",
],
data = [
"@codeowners_binary//:codeowners-binary",
"@codeowners_validator//:codeowners-validator",
],
env = {
"CODEOWNERS_VALIDATOR_PATH": "$(execpath @codeowners_validator//:codeowners-validator)",
"CODEOWNERS_BINARY_PATH": "$(execpath @codeowners_binary//:codeowners-binary)",
},
main = "codeowners_generate.py",
visibility = ["//visibility:public"],
@ -18,6 +21,9 @@ py_binary(
dependency(
"pyyaml",
),
dependency(
"gitpython",
),
],
)
@ -26,12 +32,15 @@ py_binary(
srcs = [
"codeowners_generate.py",
"validate_codeowners.py",
"//utils:evergreen_git.py",
],
data = [
"@codeowners_binary//:codeowners",
"@codeowners_validator//:codeowners-validator",
],
env = {
"CODEOWNERS_VALIDATOR_PATH": "$(execpath @codeowners_validator//:codeowners-validator)",
"CODEOWNERS_BINARY_PATH": "$(execpath @codeowners_binary//:codeowners)",
"ADD_AUTO_APPROVE_USER": "true",
},
main = "codeowners_generate.py",
@ -40,5 +49,8 @@ py_binary(
dependency(
"pyyaml",
),
dependency(
"gitpython",
),
],
)

View File

@ -0,0 +1,61 @@
"""Repository rules for codeowners validator download"""
load("//utils:downloads.bzl", "retry_download_and_extract")
load("//utils:platforms_normalize.bzl", "ARCH_NORMALIZE_MAP", "OS_NORMALIZE_MAP")
URLS_MAP = {
"linux_aarch64": {
"sha": "bb3a283e2bd6c50d8b383c5a8b99179ded65eefdbd95945826a61f860ce531f4",
"url": "https://github.com/hmarr/codeowners/releases/download/v1.2.1/codeowners_1.2.1_linux_arm64.tar.gz",
},
"linux_x86_64": {
"sha": "94f9f9ec43dba151816b5c2fd98698afbfd03d5ac63db77d2d8c2cf77b326bb0",
"url": "https://github.com/hmarr/codeowners/releases/download/v1.2.1/codeowners_1.2.1_linux_amd64.tar.gz",
},
"macos_aarch64": {
"sha": "1a271d2a3960491d7fceffdca741e7a3830cb2ab5013723ed8f9efe04dd3d9c1",
"url": "https://github.com/hmarr/codeowners/releases/download/v1.2.1/codeowners_1.2.1_darwin_arm64.tar.gz",
},
"macos_x86_64": {
"sha": "39d5868f50a3716af61c1bd4722b9f840f07a005d3018b20483de26b10ced19a",
"url": "https://github.com/hmarr/codeowners/releases/download/v1.2.1/codeowners_1.2.1_darwin_amd64.tar.gz",
},
}
def _codeowners_binary_download(ctx):
"""
Downloads a codeowners validator binary
Args:
ctx: Repository context.
"""
os = ctx.os.name
arch = ctx.os.arch
os_constraint = OS_NORMALIZE_MAP[os]
arch_constraint = ARCH_NORMALIZE_MAP[arch]
platform_info = URLS_MAP["{os}_{arch}".format(os = os_constraint, arch = arch_constraint)]
ctx.report_progress("downloading codeowners binary")
retry_download_and_extract(
ctx = ctx,
tries = 5,
url = platform_info["url"],
sha256 = platform_info["sha"],
)
ctx.file(
"BUILD.bazel",
"""
package(default_visibility = ["//visibility:public"])
exports_files(["codeowners"])
""",
)
return None
_codeowners_binary = repository_rule(
implementation = _codeowners_binary_download,
attrs = {},
)
def codeowners_binary():
_codeowners_binary(name = "codeowners_binary")

View File

@ -5,10 +5,12 @@ import os
import pathlib
import subprocess
import sys
import tempfile
from functools import cache, lru_cache
import yaml
from codeowners.validate_codeowners import run_validator
from utils import evergreen_git
OWNERS_FILE_NAME = "OWNERS"
OWNERS_FILE_EXTENSIONS = (".yml", ".yaml")
@ -207,6 +209,65 @@ def validate_generated_codeowners(validator_path: str) -> int:
return 1
def check_new_files(codeowners_binary_path: str, expansions_file: str, branch: str) -> int:
new_files = evergreen_git.get_new_files(expansions_file, branch)
if not new_files:
print("No new files were detected.")
return 0
print(f"The following new files were detected: {new_files}")
temp_output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt")
temp_output_file.close()
# This file can be bigger than the allowed subprocess buffer so we redirect output into a file
command = f"{codeowners_binary_path} --unowned > {temp_output_file.name}"
process = subprocess.run(command, shell=True, stderr=subprocess.PIPE, text=True)
if process.returncode != 0:
print(process.stderr)
print("Error while trying to find unowned files")
return process.returncode
unowned_files = set()
with open(temp_output_file.name, "r") as file:
for line in file.read().split("\n"):
if not line:
continue
parts = line.split()
file_name = parts[0].strip()
unowned_files.add(file_name)
unowned_new_files = []
for file in new_files:
if file in unowned_files:
unowned_new_files.append(file)
if unowned_new_files:
print("The following new files are unowned:")
for file in unowned_new_files:
print(f"- {file}")
print("New files are required to have code owners. See http://go/codeowners-ug")
return 1
print("There are no new files added that are unowned.")
return 0
def post_generation_checks(
validator_path: str,
should_run_validation: bool,
codeowners_binary_path: str,
should_check_new_files: bool,
expansions_file: str,
branch: str,
) -> int:
status = 0
if should_run_validation:
status |= validate_generated_codeowners(validator_path)
if should_check_new_files:
status |= check_new_files(codeowners_binary_path, expansions_file, branch)
return status
def main():
# If we are running in bazel, default the directory to the workspace
default_dir = os.environ.get("BUILD_WORKSPACE_DIRECTORY")
@ -221,6 +282,13 @@ def main():
raise RuntimeError("no CODEOWNERS_VALIDATOR_PATH env var found")
codeowners_validator_path = os.path.abspath(codeowners_validator_path)
codeowners_binary_path = os.environ.get("CODEOWNERS_BINARY_PATH")
if not codeowners_binary_path:
raise RuntimeError("no CODEOWNERS_BINARY_PATH env var found")
codeowners_binary_path = os.path.abspath(codeowners_binary_path)
parser = argparse.ArgumentParser(
prog="GenerateCodeowners",
description="This generates a CODEOWNERS file based off of our OWNERS.yml files. "
@ -242,6 +310,30 @@ def main():
default=False,
action="store_true",
)
parser.add_argument(
"--run-validation",
help="When set, validation will be run against the resulting CODEOWNERS file.",
default=True,
action="store_false",
)
parser.add_argument(
"--check-new-files",
help="When set, this script will check new files to ensure they are owned.",
default=True,
action="store_false",
)
parser.add_argument(
"--expansions-file",
help="When set, implements CI specific logic around getting new files in a specific patch.",
default=None,
action="store",
)
parser.add_argument(
"--branch",
help="Helps the script understand what branch to compare against to see what new files are added when run locally. Defaults to master or main.",
default=None,
action="store",
)
args = parser.parse_args()
os.chdir(args.repo_dir)
@ -285,14 +377,28 @@ def main():
return 1
print("CODEOWNERS file is up to date")
return validate_generated_codeowners(codeowners_validator_path)
return post_generation_checks(
codeowners_validator_path,
args.run_validation,
codeowners_binary_path,
args.check_new_files,
args.expansions_file,
args.branch,
)
with open(output_file, "w") as file:
file.write(new_contents)
print(f"Successfully wrote to the CODEOWNERS file at: {os.path.abspath(output_file)}")
# Add validation after generating CODEOWNERS file
return validate_generated_codeowners(codeowners_validator_path)
return post_generation_checks(
codeowners_validator_path,
args.run_validation,
codeowners_binary_path,
args.check_new_files,
args.expansions_file,
args.branch,
)
if __name__ == "__main__":

View File

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
# This file is automatically @generated by Poetry 2.0.0 and should not be changed by hand.
[[package]]
name = "decorator"
@ -6,17 +6,53 @@ version = "5.2.1"
description = "Decorators for Humans"
optional = false
python-versions = ">=3.8"
groups = ["main"]
files = [
{file = "decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a"},
{file = "decorator-5.2.1.tar.gz", hash = "sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360"},
]
[[package]]
name = "gitdb"
version = "4.0.12"
description = "Git Object Database"
optional = false
python-versions = ">=3.7"
groups = ["main"]
files = [
{file = "gitdb-4.0.12-py3-none-any.whl", hash = "sha256:67073e15955400952c6565cc3e707c554a4eea2e428946f7a4c162fab9bd9bcf"},
{file = "gitdb-4.0.12.tar.gz", hash = "sha256:5ef71f855d191a3326fcfbc0d5da835f26b13fbcba60c32c21091c349ffdb571"},
]
[package.dependencies]
smmap = ">=3.0.1,<6"
[[package]]
name = "gitpython"
version = "3.1.44"
description = "GitPython is a Python library used to interact with Git repositories"
optional = false
python-versions = ">=3.7"
groups = ["main"]
files = [
{file = "GitPython-3.1.44-py3-none-any.whl", hash = "sha256:9e0e10cda9bed1ee64bc9a6de50e7e38a9c9943241cd7f585f6df3ed28011110"},
{file = "gitpython-3.1.44.tar.gz", hash = "sha256:c87e30b26253bf5418b01b0660f818967f3c503193838337fe5e573331249269"},
]
[package.dependencies]
gitdb = ">=4.0.1,<5"
[package.extras]
doc = ["sphinx (>=7.1.2,<7.2)", "sphinx-autodoc-typehints", "sphinx_rtd_theme"]
test = ["coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit", "pytest (>=7.3.1)", "pytest-cov", "pytest-instafail", "pytest-mock", "pytest-sugar", "typing-extensions"]
[[package]]
name = "py"
version = "1.11.0"
description = "library with cross-python path, ini-parsing, io, code, log facilities"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
groups = ["main"]
files = [
{file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"},
{file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
@ -28,6 +64,7 @@ version = "6.0.2"
description = "YAML parser and emitter for Python"
optional = false
python-versions = ">=3.8"
groups = ["main"]
files = [
{file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"},
{file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"},
@ -90,6 +127,7 @@ version = "0.9.2"
description = "Easy to use retry decorator."
optional = false
python-versions = "*"
groups = ["main"]
files = [
{file = "retry-0.9.2-py2.py3-none-any.whl", hash = "sha256:ccddf89761fa2c726ab29391837d4327f819ea14d244c232a1d24c67a2f98606"},
{file = "retry-0.9.2.tar.gz", hash = "sha256:f8bfa8b99b69c4506d6f5bd3b0aabf77f98cdb17f3c9fc3f5ca820033336fba4"},
@ -99,7 +137,19 @@ files = [
decorator = ">=3.4.2"
py = ">=1.4.26,<2.0.0"
[[package]]
name = "smmap"
version = "5.0.2"
description = "A pure Python implementation of a sliding window memory map manager"
optional = false
python-versions = ">=3.7"
groups = ["main"]
files = [
{file = "smmap-5.0.2-py3-none-any.whl", hash = "sha256:b30115f0def7d7531d22a0fb6502488d879e75b260a9db4d0819cfb25403af5e"},
{file = "smmap-5.0.2.tar.gz", hash = "sha256:26ea65a03958fa0c8a1c7e8c7a58fdc77221b8910f6be2131affade476898ad5"},
]
[metadata]
lock-version = "2.0"
lock-version = "2.1"
python-versions = "^3.10"
content-hash = "bf0a4d91d4bb05ca502b55ea476b6035ae2f72e0c1e6198e7b58733693e2b908"
content-hash = "ee6675fea24b574ad815d5ca55bbac594a456899ccf18ccc2637d3163a7951c7"

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "bazel_rules_mongo"
version = "0.1.3"
version = "0.1.4"
description = "Bazel rule we use to ship common code between bazel repos"
authors = ["Trevor Guidry <trevor.guidry@mongodb.com>"]
readme = "README.md"
@ -9,6 +9,7 @@ readme = "README.md"
python = "^3.10"
pyyaml = "^6.0.2"
retry = "^0.9.2"
gitpython = "^3.1.44"
[build-system]

View File

@ -0,0 +1,157 @@
import os
import shutil
import tempfile
import unittest
from git import Repo
from mock import MagicMock
from buildscripts.bazel_rules_mongo.utils import evergreen_git
changed_file_name = "changed_file.txt"
new_file_name = "new_file.txt"
def write_file(repo: Repo, file_name: str) -> None:
# just adding more text to the file so git thinks it has changed or is created
with open(os.path.join(repo.working_tree_dir, file_name), "a+") as file:
file.write("change\n")
class TestChangedFiles(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.tmp_dir = tempfile.mkdtemp()
root_repo = Repo()
# commit of HEAD
commit = root_repo.head.commit.hexsha
files_to_copy = []
# copy the current repo into a temp dir to do testing on
root_repo.git.execute(["git", "worktree", "add", cls.tmp_dir, commit])
# get tracked files that have been changed that are tracked by git
diff_output = root_repo.git.execute(
["git", "diff", "--name-only", "--diff-filter=d", commit]
)
files_to_copy.extend(diff_output.split("\n"))
# gets all the untracked changes in the current repo
untracked_changes = root_repo.git.execute(["git", "add", ".", "-n"])
for line in untracked_changes.split("\n"):
if not line:
continue
files_to_copy.append(line.strip()[5:-1])
# copy all changed files from the current repo to the new worktree for testing.
for file in files_to_copy:
if not file:
continue
if not os.path.exists(file):
raise RuntimeError(f"Changed file was found and does not exist: {file}")
new_dest = os.path.join(cls.tmp_dir, file)
os.makedirs(os.path.dirname(new_dest), exist_ok=True)
shutil.copy(file, new_dest)
cls.repo = Repo(cls.tmp_dir)
# add a testing file to this original commit so we can treat it as a preexisting file that
# is going to be modified
write_file(cls.repo, changed_file_name)
cls.repo.git.execute(["git", "add", "."])
cls.repo.git.execute(["git", "commit", "-m", "Commit changed files"])
# this new commit is out base revision to compare changes against
cls.base_revision = cls.repo.head.commit.hexsha
os.chdir(cls.tmp_dir)
@classmethod
def tearDownClass(cls):
shutil.rmtree(cls.tmp_dir)
pass
def setUp(self):
# change the file already commited to the repo
write_file(self.repo, changed_file_name)
# make a new file that has not been commited yet
write_file(self.repo, new_file_name)
def tearDown(self):
# reset to the original state between tests
self.repo.git.execute(["git", "reset", "--hard", self.base_revision])
pass
def test_local_unchanged_files(self):
evergreen_git.get_remote_branch_ref = MagicMock(return_value=self.base_revision)
new_files = evergreen_git.get_new_files()
self.assertEqual(
new_files, [], msg="New files list was not empty when no new files were added to git."
)
changed_files = evergreen_git.get_changed_files()
self.assertEqual(
changed_files, [changed_file_name], msg="Changed file list was not as expected."
)
self.repo.git.execute(["git", "add", "."])
# random file not tracked by git
write_file(self.repo, "random_other_untracked_file.txt")
new_files = evergreen_git.get_new_files()
self.assertEqual(
new_files,
[new_file_name],
msg="New file list did not contain the new file added to git.",
)
changed_files = evergreen_git.get_changed_files()
self.assertEqual(
changed_files,
[changed_file_name, new_file_name],
msg="Changed file list was not as expected.",
)
def test_evergreen_patch(self):
# the files in evergreen patches just live untracked normally so we don't have to do
# anything to the git state
with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8") as tmp:
tmp.write("is_patch: true\n")
tmp.write(f"revision: {self.base_revision}\n")
tmp.flush()
new_files = evergreen_git.get_new_files(expansions_file=tmp.name)
self.assertEqual(
new_files, [new_file_name], msg="New file list did not contain the new file."
)
changed_files = evergreen_git.get_changed_files(expansions_file=tmp.name)
self.assertEqual(
changed_files,
[changed_file_name, new_file_name],
msg="Changed file list was not as expected.",
)
def test_evergreen_waterfall(self):
# Evergreen waterfall runs just check against the last commit so we need to commit the changes
self.repo.git.execute(["git", "add", "."])
self.repo.git.execute(["git", "commit", "-m", "Fake waterfall changes"])
with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8") as tmp:
tmp.write("fake_expansion: true")
tmp.flush()
new_files = evergreen_git.get_new_files(expansions_file=tmp.name)
self.assertEqual(
new_files, [new_file_name], msg="New file list did not contain the new file."
)
changed_files = evergreen_git.get_changed_files(expansions_file=tmp.name)
self.assertEqual(
changed_files,
[changed_file_name, new_file_name],
msg="Changed file list was not as expected.",
)
def test_remote_picker(self):
remote = evergreen_git.get_mongodb_remote(self.repo)
self.assertIn("10gen/mongo", remote.url, msg="The wrong remote was found.")

View File

@ -1 +1 @@
# This file intentionally left blank
exports_files(["evergreen_git.py"])

View File

@ -0,0 +1,105 @@
import os
from functools import cache
from typing import Dict, List
import yaml
from git import Remote, Repo
@cache
def get_expansions(expansions_file: str) -> Dict[str, any]:
if not expansions_file:
return None
if not os.path.exists(expansions_file):
raise RuntimeError(f"Expansions file not found at {expansions_file}")
with open(expansions_file, "r") as file:
return yaml.safe_load(file)
def get_mongodb_remote(repo: Repo) -> Remote:
remotes = repo.remotes
picked_remote = None
for remote in remotes:
url = remote.url
# local repository pointing to a local dir
if not url.endswith(".git"):
continue
# all other remote urls should end with owner/project.git
parts = url[:-4].split("/")
assert len(parts) >= 2, f"Unexpected git remote url: {url}"
owner = parts[-2].split(":")[-1]
if owner in ("10gen", "mongodb", "evergreen-ci", "mongodb-ets", "realm", "mongodb-js"):
picked_remote = remote
print(f"Selected remote: {remote.url}")
break
if picked_remote is None:
print(
"Could not find remote from any mongodb github org, falling back to the first remote found"
)
picked_remote = next(repo.remotes)
if picked_remote is None:
raise RuntimeError("Could not find valid remote")
return picked_remote
def get_remote_branch_ref(repo: Repo, branch: str = None) -> str:
# If branch is not specified, default to master or main
if branch is None:
for branch in repo.branches:
if branch.name in ("main", "master"):
branch = branch.name
break
if branch is None:
raise RuntimeError("Could not infer correct branch name")
# pick a remote from a mongodb org
picked_remote = get_mongodb_remote(repo)
picked_remote.fetch()
# find the latest commit on the remote branch to check for a valid merge-base with the current branch
remote_branch = repo.refs[f"{picked_remote.name}/{branch}"]
diff_commit = repo.git.execute(["git", "merge-base", remote_branch.commit.hexsha, "HEAD"])
return diff_commit
def get_new_files(expansions_file: str = None, branch: str = None) -> List[str]:
# docs on the diff-filter are here https://www.kernel.org/pub/software/scm/git/docs/git-diff.html
# This gets added, renamed, and copied files from the git diff.
return get_changed_files(expansions_file, branch, diff_filter="ARC")
def get_changed_files(
expansions_file: str = None, branch: str = None, diff_filter: str = "d"
) -> List[str]:
expansions = get_expansions(expansions_file)
in_ci = expansions_file is not None
diff_commit = None
repo = Repo()
if not in_ci:
diff_commit = get_remote_branch_ref(repo, branch)
else:
if expansions.get("is_patch", None):
# patches from the cli have the changes uncommited, we need to add them to git for git diff to work
# we add the files in github patches as well to make it fail consistently if new files
# are generated in CI before this point.
repo.git.execute(["git", "add", "."])
diff_commit = expansions.get("revision")
else:
# in waterfall runs we just want to compare to the previous commit
diff_commit = repo.git.execute(["git", "rev-parse", "HEAD^1"])
output = repo.git.execute(
["git", "diff", "--name-only", f"--diff-filter={diff_filter}", diff_commit]
)
files = output.split("\n")
return [file for file in files if file]

View File

@ -4,6 +4,7 @@ selector:
roots:
- buildscripts/tests/**/test_*.py
- buildscripts/idl/tests/**/test_*.py
- buildscripts/bazel_rules_mongo/tests/test_*.py
exclude_files:
# These tests are also @unittest.skip'ed. SERVER-48969 tracks re-enabling them.
- buildscripts/tests/resmokelib/test_selector.py # Test assumes POSIX path.

View File

@ -615,8 +615,7 @@ tasks:
target: >-
//:format -- --check
# TODO(SERVER-97804): rename if display_name appears on the evergreen UI
- name: bazel_run_//:codeowners
- name: bazel_run_codeowners
tags:
[
"assigned_to_jira_team_devprod_build",
@ -644,7 +643,7 @@ tasks:
- func: "bazel run"
vars:
target: >-
//:codeowners -- --check
//:codeowners -- --check --expansions-file ../expansions.yml
# TODO(SERVER-97804): rename if display_name appears on the evergreen UI
- name: bazel_run_//buildscripts:resmoke