From f1139e838eec4e0a4fd35d65cf5583eab007b0e4 Mon Sep 17 00:00:00 2001 From: Trevor Guidry Date: Wed, 30 Apr 2025 13:42:36 -0500 Subject: [PATCH] SERVER-104208 Disallow any new unowned files (#35304) GitOrigin-RevId: ee2b90adc4b9ff0b8f9211be066cc010509e2660 --- .gitignore | 5 + WORKSPACE.bazel | 4 + buildscripts/bazel_rules_mongo/README.md | 4 + .../bazel_rules_mongo/WORKSPACE.bazel | 4 + .../bazel_rules_mongo/codeowners/BUILD.bazel | 12 ++ .../codeowners/codeowners_binary.bzl | 61 +++++++ .../codeowners/codeowners_generate.py | 110 +++++++++++- buildscripts/bazel_rules_mongo/poetry.lock | 56 ++++++- buildscripts/bazel_rules_mongo/pyproject.toml | 3 +- .../tests/test_changed_files.py | 157 ++++++++++++++++++ .../bazel_rules_mongo/utils/BUILD.bazel | 2 +- .../bazel_rules_mongo/utils/evergreen_git.py | 105 ++++++++++++ .../suites/buildscripts_test.yml | 1 + .../tasks/misc_tasks.yml | 5 +- 14 files changed, 519 insertions(+), 10 deletions(-) create mode 100644 buildscripts/bazel_rules_mongo/codeowners/codeowners_binary.bzl create mode 100644 buildscripts/bazel_rules_mongo/tests/test_changed_files.py create mode 100644 buildscripts/bazel_rules_mongo/utils/evergreen_git.py diff --git a/.gitignore b/.gitignore index 3d7e66feea4..40eb29603d8 100644 --- a/.gitignore +++ b/.gitignore @@ -330,3 +330,8 @@ etc/evaluated_evergreen.yml etc/evaluated_evergreen_nightly.yml etc/evaluated_system_perf.yml etc/trimmed_system_perf.yml + +# evergreen files to ignore when detecting patch changes +/version_expansions.yml +/engflow.* +/.bazelrc.evergreen diff --git a/WORKSPACE.bazel b/WORKSPACE.bazel index 0cce61888e7..b931d6468d0 100644 --- a/WORKSPACE.bazel +++ b/WORKSPACE.bazel @@ -203,6 +203,10 @@ load("@bazel_rules_mongo//codeowners:codeowners_validator.bzl", "codeowners_vali codeowners_validator() +load("@bazel_rules_mongo//codeowners:codeowners_binary.bzl", "codeowners_binary") + +codeowners_binary() + poetry( name = "poetry_bazel_rules_mongo", lockfile = "@bazel_rules_mongo//:poetry.lock", diff --git a/buildscripts/bazel_rules_mongo/README.md b/buildscripts/bazel_rules_mongo/README.md index 3167ba9bc9b..62e2497f8d2 100644 --- a/buildscripts/bazel_rules_mongo/README.md +++ b/buildscripts/bazel_rules_mongo/README.md @@ -39,6 +39,10 @@ load("@bazel_rules_mongo//codeowners:codeowners_validator.bzl", "codeowners_vali codeowners_validator() +load("@bazel_rules_mongo//codeowners:codeowners_binary.bzl", "codeowners_binary") + +codeowners_binary() + poetry( name = "poetry_bazel_rules_mongo", lockfile = "@bazel_rules_mongo//:poetry.lock", diff --git a/buildscripts/bazel_rules_mongo/WORKSPACE.bazel b/buildscripts/bazel_rules_mongo/WORKSPACE.bazel index fd24d62d526..bc999d7b659 100644 --- a/buildscripts/bazel_rules_mongo/WORKSPACE.bazel +++ b/buildscripts/bazel_rules_mongo/WORKSPACE.bazel @@ -29,3 +29,7 @@ poetry( load("//codeowners:codeowners_validator.bzl", "codeowners_validator") codeowners_validator() + +load("//codeowners:codeowners_binary.bzl", "codeowners_binary") + +codeowners_binary() diff --git a/buildscripts/bazel_rules_mongo/codeowners/BUILD.bazel b/buildscripts/bazel_rules_mongo/codeowners/BUILD.bazel index 704a92a85ba..1a0c6e5f9d6 100644 --- a/buildscripts/bazel_rules_mongo/codeowners/BUILD.bazel +++ b/buildscripts/bazel_rules_mongo/codeowners/BUILD.bazel @@ -5,12 +5,15 @@ py_binary( srcs = [ "codeowners_generate.py", "validate_codeowners.py", + "//utils:evergreen_git.py", ], data = [ + "@codeowners_binary//:codeowners-binary", "@codeowners_validator//:codeowners-validator", ], env = { "CODEOWNERS_VALIDATOR_PATH": "$(execpath @codeowners_validator//:codeowners-validator)", + "CODEOWNERS_BINARY_PATH": "$(execpath @codeowners_binary//:codeowners-binary)", }, main = "codeowners_generate.py", visibility = ["//visibility:public"], @@ -18,6 +21,9 @@ py_binary( dependency( "pyyaml", ), + dependency( + "gitpython", + ), ], ) @@ -26,12 +32,15 @@ py_binary( srcs = [ "codeowners_generate.py", "validate_codeowners.py", + "//utils:evergreen_git.py", ], data = [ + "@codeowners_binary//:codeowners", "@codeowners_validator//:codeowners-validator", ], env = { "CODEOWNERS_VALIDATOR_PATH": "$(execpath @codeowners_validator//:codeowners-validator)", + "CODEOWNERS_BINARY_PATH": "$(execpath @codeowners_binary//:codeowners)", "ADD_AUTO_APPROVE_USER": "true", }, main = "codeowners_generate.py", @@ -40,5 +49,8 @@ py_binary( dependency( "pyyaml", ), + dependency( + "gitpython", + ), ], ) diff --git a/buildscripts/bazel_rules_mongo/codeowners/codeowners_binary.bzl b/buildscripts/bazel_rules_mongo/codeowners/codeowners_binary.bzl new file mode 100644 index 00000000000..03abf460ed7 --- /dev/null +++ b/buildscripts/bazel_rules_mongo/codeowners/codeowners_binary.bzl @@ -0,0 +1,61 @@ +"""Repository rules for codeowners validator download""" + +load("//utils:downloads.bzl", "retry_download_and_extract") +load("//utils:platforms_normalize.bzl", "ARCH_NORMALIZE_MAP", "OS_NORMALIZE_MAP") + +URLS_MAP = { + "linux_aarch64": { + "sha": "bb3a283e2bd6c50d8b383c5a8b99179ded65eefdbd95945826a61f860ce531f4", + "url": "https://github.com/hmarr/codeowners/releases/download/v1.2.1/codeowners_1.2.1_linux_arm64.tar.gz", + }, + "linux_x86_64": { + "sha": "94f9f9ec43dba151816b5c2fd98698afbfd03d5ac63db77d2d8c2cf77b326bb0", + "url": "https://github.com/hmarr/codeowners/releases/download/v1.2.1/codeowners_1.2.1_linux_amd64.tar.gz", + }, + "macos_aarch64": { + "sha": "1a271d2a3960491d7fceffdca741e7a3830cb2ab5013723ed8f9efe04dd3d9c1", + "url": "https://github.com/hmarr/codeowners/releases/download/v1.2.1/codeowners_1.2.1_darwin_arm64.tar.gz", + }, + "macos_x86_64": { + "sha": "39d5868f50a3716af61c1bd4722b9f840f07a005d3018b20483de26b10ced19a", + "url": "https://github.com/hmarr/codeowners/releases/download/v1.2.1/codeowners_1.2.1_darwin_amd64.tar.gz", + }, +} + +def _codeowners_binary_download(ctx): + """ + Downloads a codeowners validator binary + + Args: + ctx: Repository context. + """ + os = ctx.os.name + arch = ctx.os.arch + os_constraint = OS_NORMALIZE_MAP[os] + arch_constraint = ARCH_NORMALIZE_MAP[arch] + platform_info = URLS_MAP["{os}_{arch}".format(os = os_constraint, arch = arch_constraint)] + ctx.report_progress("downloading codeowners binary") + retry_download_and_extract( + ctx = ctx, + tries = 5, + url = platform_info["url"], + sha256 = platform_info["sha"], + ) + + ctx.file( + "BUILD.bazel", + """ +package(default_visibility = ["//visibility:public"]) +exports_files(["codeowners"]) +""", + ) + + return None + +_codeowners_binary = repository_rule( + implementation = _codeowners_binary_download, + attrs = {}, +) + +def codeowners_binary(): + _codeowners_binary(name = "codeowners_binary") diff --git a/buildscripts/bazel_rules_mongo/codeowners/codeowners_generate.py b/buildscripts/bazel_rules_mongo/codeowners/codeowners_generate.py index 6554f8d7c29..f44d36b43f9 100644 --- a/buildscripts/bazel_rules_mongo/codeowners/codeowners_generate.py +++ b/buildscripts/bazel_rules_mongo/codeowners/codeowners_generate.py @@ -5,10 +5,12 @@ import os import pathlib import subprocess import sys +import tempfile from functools import cache, lru_cache import yaml from codeowners.validate_codeowners import run_validator +from utils import evergreen_git OWNERS_FILE_NAME = "OWNERS" OWNERS_FILE_EXTENSIONS = (".yml", ".yaml") @@ -207,6 +209,65 @@ def validate_generated_codeowners(validator_path: str) -> int: return 1 +def check_new_files(codeowners_binary_path: str, expansions_file: str, branch: str) -> int: + new_files = evergreen_git.get_new_files(expansions_file, branch) + if not new_files: + print("No new files were detected.") + return 0 + print(f"The following new files were detected: {new_files}") + temp_output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt") + temp_output_file.close() + # This file can be bigger than the allowed subprocess buffer so we redirect output into a file + command = f"{codeowners_binary_path} --unowned > {temp_output_file.name}" + process = subprocess.run(command, shell=True, stderr=subprocess.PIPE, text=True) + + if process.returncode != 0: + print(process.stderr) + print("Error while trying to find unowned files") + return process.returncode + + unowned_files = set() + with open(temp_output_file.name, "r") as file: + for line in file.read().split("\n"): + if not line: + continue + parts = line.split() + file_name = parts[0].strip() + unowned_files.add(file_name) + + unowned_new_files = [] + for file in new_files: + if file in unowned_files: + unowned_new_files.append(file) + + if unowned_new_files: + print("The following new files are unowned:") + for file in unowned_new_files: + print(f"- {file}") + print("New files are required to have code owners. See http://go/codeowners-ug") + return 1 + + print("There are no new files added that are unowned.") + return 0 + + +def post_generation_checks( + validator_path: str, + should_run_validation: bool, + codeowners_binary_path: str, + should_check_new_files: bool, + expansions_file: str, + branch: str, +) -> int: + status = 0 + if should_run_validation: + status |= validate_generated_codeowners(validator_path) + if should_check_new_files: + status |= check_new_files(codeowners_binary_path, expansions_file, branch) + + return status + + def main(): # If we are running in bazel, default the directory to the workspace default_dir = os.environ.get("BUILD_WORKSPACE_DIRECTORY") @@ -221,6 +282,13 @@ def main(): raise RuntimeError("no CODEOWNERS_VALIDATOR_PATH env var found") codeowners_validator_path = os.path.abspath(codeowners_validator_path) + + codeowners_binary_path = os.environ.get("CODEOWNERS_BINARY_PATH") + if not codeowners_binary_path: + raise RuntimeError("no CODEOWNERS_BINARY_PATH env var found") + + codeowners_binary_path = os.path.abspath(codeowners_binary_path) + parser = argparse.ArgumentParser( prog="GenerateCodeowners", description="This generates a CODEOWNERS file based off of our OWNERS.yml files. " @@ -242,6 +310,30 @@ def main(): default=False, action="store_true", ) + parser.add_argument( + "--run-validation", + help="When set, validation will be run against the resulting CODEOWNERS file.", + default=True, + action="store_false", + ) + parser.add_argument( + "--check-new-files", + help="When set, this script will check new files to ensure they are owned.", + default=True, + action="store_false", + ) + parser.add_argument( + "--expansions-file", + help="When set, implements CI specific logic around getting new files in a specific patch.", + default=None, + action="store", + ) + parser.add_argument( + "--branch", + help="Helps the script understand what branch to compare against to see what new files are added when run locally. Defaults to master or main.", + default=None, + action="store", + ) args = parser.parse_args() os.chdir(args.repo_dir) @@ -285,14 +377,28 @@ def main(): return 1 print("CODEOWNERS file is up to date") - return validate_generated_codeowners(codeowners_validator_path) + return post_generation_checks( + codeowners_validator_path, + args.run_validation, + codeowners_binary_path, + args.check_new_files, + args.expansions_file, + args.branch, + ) with open(output_file, "w") as file: file.write(new_contents) print(f"Successfully wrote to the CODEOWNERS file at: {os.path.abspath(output_file)}") # Add validation after generating CODEOWNERS file - return validate_generated_codeowners(codeowners_validator_path) + return post_generation_checks( + codeowners_validator_path, + args.run_validation, + codeowners_binary_path, + args.check_new_files, + args.expansions_file, + args.branch, + ) if __name__ == "__main__": diff --git a/buildscripts/bazel_rules_mongo/poetry.lock b/buildscripts/bazel_rules_mongo/poetry.lock index 6fae8fb44b1..1bb83190e10 100644 --- a/buildscripts/bazel_rules_mongo/poetry.lock +++ b/buildscripts/bazel_rules_mongo/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.0.0 and should not be changed by hand. [[package]] name = "decorator" @@ -6,17 +6,53 @@ version = "5.2.1" description = "Decorators for Humans" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a"}, {file = "decorator-5.2.1.tar.gz", hash = "sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360"}, ] +[[package]] +name = "gitdb" +version = "4.0.12" +description = "Git Object Database" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "gitdb-4.0.12-py3-none-any.whl", hash = "sha256:67073e15955400952c6565cc3e707c554a4eea2e428946f7a4c162fab9bd9bcf"}, + {file = "gitdb-4.0.12.tar.gz", hash = "sha256:5ef71f855d191a3326fcfbc0d5da835f26b13fbcba60c32c21091c349ffdb571"}, +] + +[package.dependencies] +smmap = ">=3.0.1,<6" + +[[package]] +name = "gitpython" +version = "3.1.44" +description = "GitPython is a Python library used to interact with Git repositories" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "GitPython-3.1.44-py3-none-any.whl", hash = "sha256:9e0e10cda9bed1ee64bc9a6de50e7e38a9c9943241cd7f585f6df3ed28011110"}, + {file = "gitpython-3.1.44.tar.gz", hash = "sha256:c87e30b26253bf5418b01b0660f818967f3c503193838337fe5e573331249269"}, +] + +[package.dependencies] +gitdb = ">=4.0.1,<5" + +[package.extras] +doc = ["sphinx (>=7.1.2,<7.2)", "sphinx-autodoc-typehints", "sphinx_rtd_theme"] +test = ["coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit", "pytest (>=7.3.1)", "pytest-cov", "pytest-instafail", "pytest-mock", "pytest-sugar", "typing-extensions"] + [[package]] name = "py" version = "1.11.0" description = "library with cross-python path, ini-parsing, io, code, log facilities" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +groups = ["main"] files = [ {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, @@ -28,6 +64,7 @@ version = "6.0.2" description = "YAML parser and emitter for Python" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"}, {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"}, @@ -90,6 +127,7 @@ version = "0.9.2" description = "Easy to use retry decorator." optional = false python-versions = "*" +groups = ["main"] files = [ {file = "retry-0.9.2-py2.py3-none-any.whl", hash = "sha256:ccddf89761fa2c726ab29391837d4327f819ea14d244c232a1d24c67a2f98606"}, {file = "retry-0.9.2.tar.gz", hash = "sha256:f8bfa8b99b69c4506d6f5bd3b0aabf77f98cdb17f3c9fc3f5ca820033336fba4"}, @@ -99,7 +137,19 @@ files = [ decorator = ">=3.4.2" py = ">=1.4.26,<2.0.0" +[[package]] +name = "smmap" +version = "5.0.2" +description = "A pure Python implementation of a sliding window memory map manager" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "smmap-5.0.2-py3-none-any.whl", hash = "sha256:b30115f0def7d7531d22a0fb6502488d879e75b260a9db4d0819cfb25403af5e"}, + {file = "smmap-5.0.2.tar.gz", hash = "sha256:26ea65a03958fa0c8a1c7e8c7a58fdc77221b8910f6be2131affade476898ad5"}, +] + [metadata] -lock-version = "2.0" +lock-version = "2.1" python-versions = "^3.10" -content-hash = "bf0a4d91d4bb05ca502b55ea476b6035ae2f72e0c1e6198e7b58733693e2b908" +content-hash = "ee6675fea24b574ad815d5ca55bbac594a456899ccf18ccc2637d3163a7951c7" diff --git a/buildscripts/bazel_rules_mongo/pyproject.toml b/buildscripts/bazel_rules_mongo/pyproject.toml index ac6c1e47418..95baefdb28f 100644 --- a/buildscripts/bazel_rules_mongo/pyproject.toml +++ b/buildscripts/bazel_rules_mongo/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "bazel_rules_mongo" -version = "0.1.3" +version = "0.1.4" description = "Bazel rule we use to ship common code between bazel repos" authors = ["Trevor Guidry "] readme = "README.md" @@ -9,6 +9,7 @@ readme = "README.md" python = "^3.10" pyyaml = "^6.0.2" retry = "^0.9.2" +gitpython = "^3.1.44" [build-system] diff --git a/buildscripts/bazel_rules_mongo/tests/test_changed_files.py b/buildscripts/bazel_rules_mongo/tests/test_changed_files.py new file mode 100644 index 00000000000..40152b71f19 --- /dev/null +++ b/buildscripts/bazel_rules_mongo/tests/test_changed_files.py @@ -0,0 +1,157 @@ +import os +import shutil +import tempfile +import unittest + +from git import Repo +from mock import MagicMock + +from buildscripts.bazel_rules_mongo.utils import evergreen_git + +changed_file_name = "changed_file.txt" +new_file_name = "new_file.txt" + + +def write_file(repo: Repo, file_name: str) -> None: + # just adding more text to the file so git thinks it has changed or is created + with open(os.path.join(repo.working_tree_dir, file_name), "a+") as file: + file.write("change\n") + + +class TestChangedFiles(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.tmp_dir = tempfile.mkdtemp() + root_repo = Repo() + + # commit of HEAD + commit = root_repo.head.commit.hexsha + + files_to_copy = [] + + # copy the current repo into a temp dir to do testing on + root_repo.git.execute(["git", "worktree", "add", cls.tmp_dir, commit]) + + # get tracked files that have been changed that are tracked by git + diff_output = root_repo.git.execute( + ["git", "diff", "--name-only", "--diff-filter=d", commit] + ) + files_to_copy.extend(diff_output.split("\n")) + + # gets all the untracked changes in the current repo + untracked_changes = root_repo.git.execute(["git", "add", ".", "-n"]) + for line in untracked_changes.split("\n"): + if not line: + continue + files_to_copy.append(line.strip()[5:-1]) + + # copy all changed files from the current repo to the new worktree for testing. + for file in files_to_copy: + if not file: + continue + + if not os.path.exists(file): + raise RuntimeError(f"Changed file was found and does not exist: {file}") + + new_dest = os.path.join(cls.tmp_dir, file) + os.makedirs(os.path.dirname(new_dest), exist_ok=True) + shutil.copy(file, new_dest) + + cls.repo = Repo(cls.tmp_dir) + # add a testing file to this original commit so we can treat it as a preexisting file that + # is going to be modified + write_file(cls.repo, changed_file_name) + cls.repo.git.execute(["git", "add", "."]) + cls.repo.git.execute(["git", "commit", "-m", "Commit changed files"]) + # this new commit is out base revision to compare changes against + cls.base_revision = cls.repo.head.commit.hexsha + os.chdir(cls.tmp_dir) + + @classmethod + def tearDownClass(cls): + shutil.rmtree(cls.tmp_dir) + pass + + def setUp(self): + # change the file already commited to the repo + write_file(self.repo, changed_file_name) + # make a new file that has not been commited yet + write_file(self.repo, new_file_name) + + def tearDown(self): + # reset to the original state between tests + self.repo.git.execute(["git", "reset", "--hard", self.base_revision]) + pass + + def test_local_unchanged_files(self): + evergreen_git.get_remote_branch_ref = MagicMock(return_value=self.base_revision) + new_files = evergreen_git.get_new_files() + self.assertEqual( + new_files, [], msg="New files list was not empty when no new files were added to git." + ) + + changed_files = evergreen_git.get_changed_files() + self.assertEqual( + changed_files, [changed_file_name], msg="Changed file list was not as expected." + ) + + self.repo.git.execute(["git", "add", "."]) + + # random file not tracked by git + write_file(self.repo, "random_other_untracked_file.txt") + + new_files = evergreen_git.get_new_files() + self.assertEqual( + new_files, + [new_file_name], + msg="New file list did not contain the new file added to git.", + ) + + changed_files = evergreen_git.get_changed_files() + self.assertEqual( + changed_files, + [changed_file_name, new_file_name], + msg="Changed file list was not as expected.", + ) + + def test_evergreen_patch(self): + # the files in evergreen patches just live untracked normally so we don't have to do + # anything to the git state + with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8") as tmp: + tmp.write("is_patch: true\n") + tmp.write(f"revision: {self.base_revision}\n") + tmp.flush() + new_files = evergreen_git.get_new_files(expansions_file=tmp.name) + self.assertEqual( + new_files, [new_file_name], msg="New file list did not contain the new file." + ) + + changed_files = evergreen_git.get_changed_files(expansions_file=tmp.name) + self.assertEqual( + changed_files, + [changed_file_name, new_file_name], + msg="Changed file list was not as expected.", + ) + + def test_evergreen_waterfall(self): + # Evergreen waterfall runs just check against the last commit so we need to commit the changes + self.repo.git.execute(["git", "add", "."]) + self.repo.git.execute(["git", "commit", "-m", "Fake waterfall changes"]) + with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8") as tmp: + tmp.write("fake_expansion: true") + tmp.flush() + new_files = evergreen_git.get_new_files(expansions_file=tmp.name) + self.assertEqual( + new_files, [new_file_name], msg="New file list did not contain the new file." + ) + + changed_files = evergreen_git.get_changed_files(expansions_file=tmp.name) + self.assertEqual( + changed_files, + [changed_file_name, new_file_name], + msg="Changed file list was not as expected.", + ) + + def test_remote_picker(self): + remote = evergreen_git.get_mongodb_remote(self.repo) + self.assertIn("10gen/mongo", remote.url, msg="The wrong remote was found.") diff --git a/buildscripts/bazel_rules_mongo/utils/BUILD.bazel b/buildscripts/bazel_rules_mongo/utils/BUILD.bazel index 6e7633bfe01..50f248b9f40 100644 --- a/buildscripts/bazel_rules_mongo/utils/BUILD.bazel +++ b/buildscripts/bazel_rules_mongo/utils/BUILD.bazel @@ -1 +1 @@ -# This file intentionally left blank +exports_files(["evergreen_git.py"]) diff --git a/buildscripts/bazel_rules_mongo/utils/evergreen_git.py b/buildscripts/bazel_rules_mongo/utils/evergreen_git.py new file mode 100644 index 00000000000..95191e0a916 --- /dev/null +++ b/buildscripts/bazel_rules_mongo/utils/evergreen_git.py @@ -0,0 +1,105 @@ +import os +from functools import cache +from typing import Dict, List + +import yaml +from git import Remote, Repo + + +@cache +def get_expansions(expansions_file: str) -> Dict[str, any]: + if not expansions_file: + return None + + if not os.path.exists(expansions_file): + raise RuntimeError(f"Expansions file not found at {expansions_file}") + + with open(expansions_file, "r") as file: + return yaml.safe_load(file) + + +def get_mongodb_remote(repo: Repo) -> Remote: + remotes = repo.remotes + picked_remote = None + for remote in remotes: + url = remote.url + # local repository pointing to a local dir + if not url.endswith(".git"): + continue + + # all other remote urls should end with owner/project.git + parts = url[:-4].split("/") + assert len(parts) >= 2, f"Unexpected git remote url: {url}" + owner = parts[-2].split(":")[-1] + + if owner in ("10gen", "mongodb", "evergreen-ci", "mongodb-ets", "realm", "mongodb-js"): + picked_remote = remote + print(f"Selected remote: {remote.url}") + break + + if picked_remote is None: + print( + "Could not find remote from any mongodb github org, falling back to the first remote found" + ) + picked_remote = next(repo.remotes) + + if picked_remote is None: + raise RuntimeError("Could not find valid remote") + + return picked_remote + + +def get_remote_branch_ref(repo: Repo, branch: str = None) -> str: + # If branch is not specified, default to master or main + if branch is None: + for branch in repo.branches: + if branch.name in ("main", "master"): + branch = branch.name + break + + if branch is None: + raise RuntimeError("Could not infer correct branch name") + + # pick a remote from a mongodb org + picked_remote = get_mongodb_remote(repo) + picked_remote.fetch() + # find the latest commit on the remote branch to check for a valid merge-base with the current branch + remote_branch = repo.refs[f"{picked_remote.name}/{branch}"] + diff_commit = repo.git.execute(["git", "merge-base", remote_branch.commit.hexsha, "HEAD"]) + return diff_commit + + +def get_new_files(expansions_file: str = None, branch: str = None) -> List[str]: + # docs on the diff-filter are here https://www.kernel.org/pub/software/scm/git/docs/git-diff.html + # This gets added, renamed, and copied files from the git diff. + return get_changed_files(expansions_file, branch, diff_filter="ARC") + + +def get_changed_files( + expansions_file: str = None, branch: str = None, diff_filter: str = "d" +) -> List[str]: + expansions = get_expansions(expansions_file) + in_ci = expansions_file is not None + + diff_commit = None + + repo = Repo() + + if not in_ci: + diff_commit = get_remote_branch_ref(repo, branch) + else: + if expansions.get("is_patch", None): + # patches from the cli have the changes uncommited, we need to add them to git for git diff to work + # we add the files in github patches as well to make it fail consistently if new files + # are generated in CI before this point. + repo.git.execute(["git", "add", "."]) + diff_commit = expansions.get("revision") + else: + # in waterfall runs we just want to compare to the previous commit + diff_commit = repo.git.execute(["git", "rev-parse", "HEAD^1"]) + + output = repo.git.execute( + ["git", "diff", "--name-only", f"--diff-filter={diff_filter}", diff_commit] + ) + files = output.split("\n") + return [file for file in files if file] diff --git a/buildscripts/resmokeconfig/suites/buildscripts_test.yml b/buildscripts/resmokeconfig/suites/buildscripts_test.yml index 5ba1cfa4393..0bee6220576 100644 --- a/buildscripts/resmokeconfig/suites/buildscripts_test.yml +++ b/buildscripts/resmokeconfig/suites/buildscripts_test.yml @@ -4,6 +4,7 @@ selector: roots: - buildscripts/tests/**/test_*.py - buildscripts/idl/tests/**/test_*.py + - buildscripts/bazel_rules_mongo/tests/test_*.py exclude_files: # These tests are also @unittest.skip'ed. SERVER-48969 tracks re-enabling them. - buildscripts/tests/resmokelib/test_selector.py # Test assumes POSIX path. diff --git a/etc/evergreen_yml_components/tasks/misc_tasks.yml b/etc/evergreen_yml_components/tasks/misc_tasks.yml index 2f747470ce6..58009abd25a 100644 --- a/etc/evergreen_yml_components/tasks/misc_tasks.yml +++ b/etc/evergreen_yml_components/tasks/misc_tasks.yml @@ -615,8 +615,7 @@ tasks: target: >- //:format -- --check - # TODO(SERVER-97804): rename if display_name appears on the evergreen UI - - name: bazel_run_//:codeowners + - name: bazel_run_codeowners tags: [ "assigned_to_jira_team_devprod_build", @@ -644,7 +643,7 @@ tasks: - func: "bazel run" vars: target: >- - //:codeowners -- --check + //:codeowners -- --check --expansions-file ../expansions.yml # TODO(SERVER-97804): rename if display_name appears on the evergreen UI - name: bazel_run_//buildscripts:resmoke