SERVER-90006 Fix up commit message validation logic (#21906)

Simplifies commit message validation to the simplest possible logic: a
straightforward regex on the PR title.

### Testing

* (Note: the testing procedure below is based on v6.0, not v5.0;
however, the logic at play will apply regardless of v5.0 vs v6.0)
* I spawned a new "parent" branch from `v6.0`:
(`stevegrossmongodb-v6.0-mq`)
* I established GitHub configuration and Evergreen project to match what
is used in v6.0.
* I created [this PR](https://github.com/10gen/mongo/pull/22012) to test
changes to the message validation logic.
* I ran this [PB for a commit with an invalid
title](https://spruce.mongodb.com/task/stevegrossmongodb_v6.0_mq_commit_queue_validate_commit_message_patch_834c3296202453d3e555b76f06e91eb505ebf006_663b98b0e2c04200074f34a4_24_05_08_15_22_29/logs?execution=0&logtype=all),
which correct failed
* I ran this [PB for commit with a valid
title](https://evergreen.mongodb.com/version/663b9f0b1542760007b768d2?redirect_spruce_users=true),
which correctly passed
* I ran the unit tests for validate_commit_message.py:
```
> python -m unittest buildscripts.tests.test_validate_commit_message
Found a commit without a ticket
SERVER-
Found a commit without a ticket
Revert SERVER-60
Found a commit without a ticket

Found a commit without a ticket
nonsense
.Must specify non-empty value for --message
..
----------------------------------------------------------------------
Ran 3 tests in 0.003s

OK
```

GitOrigin-RevId: ec0b8cfc4a6433b1ad4572e1b910e6ad71ee36fd
This commit is contained in:
stevegrossmongodb 2024-05-08 13:49:12 -04:00 committed by MongoDB Bot
parent 399cd5300f
commit 68f755aff6
3 changed files with 33 additions and 167 deletions

View File

@ -1,70 +1,31 @@
"""Unit tests for the evergreen_task_timeout script."""
import itertools
import unittest
from mock import MagicMock, patch
from buildscripts.validate_commit_message import main, STATUS_OK, STATUS_ERROR, GIT_SHOW_COMMAND
from buildscripts.validate_commit_message import main, STATUS_OK, STATUS_ERROR
# pylint: disable=missing-docstring,no-self-use
INVALID_MESSAGES = [
"", # You must provide a message
"RevertEVG-1", # revert and ticket must be formatted
"revert EVG-1", # revert must be capitalized
"This is not a valid message", # message must be valid
"Fix Lint", # Fix lint is strict in terms of caps
]
NS = "buildscripts.validate_commit_message"
def ns(relative_name): # pylint: disable=invalid-name
"""Return a full name from a name relative to the test module"s name space."""
return NS + "." + relative_name
def interleave_new_format(older):
"""Create a new list containing a new and old format copy of each string."""
newer = [
f"Commit Queue Merge: '{old}' into 'mongodb/mongo:SERVER-45949-validate-message-format'"
for old in older
]
return list(itertools.chain(*zip(older, newer)))
class ValidateCommitMessageTest(unittest.TestCase):
def test_valid(self):
messages = [
"Fix lint",
"EVG-1", # Test valid projects with various number lengths
"SERVER-20",
"WT-300",
"SERVER-44338",
"Revert EVG-5",
"Revert SERVER-60",
"Revert WT-700",
"Revert 'SERVER-8000",
'Revert "SERVER-90000',
"Revert \"SERVER-60",
"Import wiredtiger: 58115abb6fbb3c1cc7bfd087d41a47347bce9a69 from branch mongodb-4.4",
"Import tools: 58115abb6fbb3c1cc7bfd087d41a47347bce9a69 from branch mongodb-4.4",
'Revert "Import wiredtiger: 58115abb6fbb3c1cc7bfd087d41a47347bce9a69 from branch mongodb-4.4"',
]
self.assertTrue(
all(main([message]) == STATUS_OK for message in interleave_new_format(messages)))
self.assertTrue(all(main([message]) == STATUS_OK for message in messages))
def test_private(self):
self.assertEqual(main(["XYZ-1"]), STATUS_ERROR)
def test_invalid(self):
messages = [
"SERVER-", # missing number
"Revert SERVER-60", # missing quote before SERVER
"", # empty value
"nonsense", # nonsense value
]
def test_catch_all(self):
self.assertTrue(
all(
main([message]) == STATUS_ERROR
for message in interleave_new_format(INVALID_MESSAGES)))
self.assertTrue(all(main([message]) == STATUS_ERROR for message in messages))
def test_last_git_commit_success(self):
with patch(
ns("subprocess.check_output"),
return_value=bytearray('SERVER-1111 this is a test', 'utf-8')) as check_output_mock:
self.assertEqual(main([]), 0)
check_output_mock.assert_called_with(GIT_SHOW_COMMAND)
def test_message_is_empty_list(self):
self.assertEqual(main([]), STATUS_ERROR)

116
buildscripts/validate_commit_message.py Executable file → Normal file
View File

@ -28,108 +28,15 @@
#
"""Validate that the commit message is ok."""
import argparse
import os
import re
import subprocess
import sys
import logging
LOGGER = logging.getLogger(__name__)
COMMON_PUBLIC_PATTERN = r'''
((?P<revert>Revert)\s+[\"\']?)? # Revert (optional)
((?P<ticket>(?:EVG|SERVER|WT)-[0-9]+)[\"\']?\s*) # ticket identifier
(?P<body>(?:(?!\(cherry\spicked\sfrom).)*)? # To also capture the body
(?P<backport>\(cherry\spicked\sfrom.*)? # back port (optional)
'''
"""Common Public pattern format."""
COMMON_10GENREPO_COMMIT_QUEUE_PATTERN = r' ^\'(?P<repo>10gen/mongo)\'\s.*commit\squeue\smerge.*SERVER-[0-9]+'
"""Common commit queue format."""
COMMON_LINT_PATTERN = r'(?P<lint>Fix\slint)'
"""Common Lint pattern format."""
COMMON_IMPORT_PATTERN = r'(?P<imported>Import\s(wiredtiger|tools):\s.*)'
"""Common Import pattern format."""
COMMON_REVERT_IMPORT_PATTERN = r'Revert\s+[\"\']?(?P<imported>Import\s(wiredtiger|tools):\s.*)'
"""Common revert Import pattern format."""
COMMON_PRIVATE_PATTERN = r'''
((?P<revert>Revert)\s+[\"\']?)? # Revert (optional)
((?P<ticket>[A-Z]+-[0-9]+)[\"\']?\s*) # ticket identifier
(?P<body>(?:(?!('\s(into\s'(([^/]+))/(([^:]+)):(([^']+))'))).)*)? # To also capture the body
'''
"""Common Private pattern format."""
STATUS_OK = 0
STATUS_ERROR = 1
GIT_SHOW_COMMAND = ["git", "show", "-1", "-s", "--format=%s"]
def new_patch_description(pattern: str) -> str:
"""
Wrap the pattern to conform to the new commit queue patch description format.
Add the commit queue prefix and suffix to the pattern. The format looks like:
Commit Queue Merge: '<commit message>' into '<owner>/<repo>:<branch>'
:param pattern: The pattern to wrap.
:return: A pattern to match the new format for the patch description.
"""
return (r"""^((?P<commitqueue>Commit\sQueue\sMerge:)\s')"""
f'{pattern}'
# r"""('\s(?P<into>into\s'((?P<owner>[^/]+))/((?P<repo>[^:]+)):((?P<branch>[^']+))'))"""
)
def old_patch_description(pattern: str) -> str:
"""
Wrap the pattern to conform to the new commit queue patch description format.
Just add a start anchor. The format looks like:
<commit message>
:param pattern: The pattern to wrap.
:return: A pattern to match the old format for the patch description.
"""
return r'^' f'{pattern}'
# NOTE: re.VERBOSE is for visibility / debugging. As such significant white space must be
# escaped (e.g ' ' to \s).
VALID_PATTERNS = [
re.compile(new_patch_description(COMMON_PUBLIC_PATTERN), re.MULTILINE | re.DOTALL | re.VERBOSE),
re.compile(old_patch_description(COMMON_PUBLIC_PATTERN), re.MULTILINE | re.DOTALL | re.VERBOSE),
re.compile(
new_patch_description(COMMON_10GENREPO_COMMIT_QUEUE_PATTERN),
re.MULTILINE | re.DOTALL | re.VERBOSE),
re.compile(
old_patch_description(COMMON_10GENREPO_COMMIT_QUEUE_PATTERN),
re.MULTILINE | re.DOTALL | re.VERBOSE),
re.compile(new_patch_description(COMMON_LINT_PATTERN), re.MULTILINE | re.DOTALL | re.VERBOSE),
re.compile(old_patch_description(COMMON_LINT_PATTERN), re.MULTILINE | re.DOTALL | re.VERBOSE),
re.compile(new_patch_description(COMMON_IMPORT_PATTERN), re.MULTILINE | re.DOTALL | re.VERBOSE),
re.compile(old_patch_description(COMMON_IMPORT_PATTERN), re.MULTILINE | re.DOTALL | re.VERBOSE),
re.compile(
new_patch_description(COMMON_REVERT_IMPORT_PATTERN), re.MULTILINE | re.DOTALL | re.VERBOSE),
re.compile(
old_patch_description(COMMON_REVERT_IMPORT_PATTERN), re.MULTILINE | re.DOTALL | re.VERBOSE),
]
"""valid public patterns."""
PRIVATE_PATTERNS = [
re.compile(
new_patch_description(COMMON_PRIVATE_PATTERN), re.MULTILINE | re.DOTALL | re.VERBOSE),
re.compile(
old_patch_description(COMMON_PRIVATE_PATTERN), re.MULTILINE | re.DOTALL | re.VERBOSE),
]
"""private patterns."""
def main(argv=None):
"""Execute Main function to validate commit messages."""
@ -145,20 +52,21 @@ def main(argv=None):
args = parser.parse_args(argv)
if not args.message:
print('Validating last git commit message')
result = subprocess.check_output(GIT_SHOW_COMMAND)
message = result.decode('utf-8')
else:
message = " ".join(args.message)
LOGGER.error("Must specify non-empty value for --message")
return STATUS_ERROR
message = " ".join(args.message)
if any(valid_pattern.match(message) for valid_pattern in VALID_PATTERNS):
# Valid values look like:
# 1. SERVER-\d+
# 2. Revert "SERVER-\d+
# 3. Import wiredtiger
# 4. Revert "Import wiredtiger
valid_pattern = re.compile(r'(Revert ")?(SERVER-[0-9]+|Import wiredtiger)')
if valid_pattern.match(message):
return STATUS_OK
else:
if any(private_pattern.match(message) for private_pattern in PRIVATE_PATTERNS):
error_type = "Found a reference to a private project"
else:
error_type = "Found a commit without a ticket"
LOGGER.error(f"{error_type}\n{message}") # pylint: disable=logging-fstring-interpolation
LOGGER.error(f"Found a commit without a ticket\n{message}") # pylint: disable=logging-fstring-interpolation
return STATUS_ERROR

View File

@ -1,28 +1,25 @@
# Forced early return indicating success.
# Note: This is part of the migration to the GitHub queue; we're going to
# switch from the validate_commit_message approach to using GH native
# rulesets to validate commit message formatting.
exit 0
# Note: This script is called "commit_message_validate", but it does not actually
# evaluate the commit message. Rather, it leverages the fact that a GitHub merge queue
# triggered branch will have a single commit message that is the name of the PR.
# This script ensures the PR name matches a well-known regex (see below for details).
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null 2>&1 && pwd)"
. "$DIR/prelude.sh"
cd src
set -o verbose
set -o errexit
if [ "${is_commit_queue}" = "true" ]; then
# Fetch the gitlog from the last merge commit to HEAD, which is guaranteed
# to be the commit message of the PR.
gitlog=$(git log --pretty=format:'%s' v5.0-test-merge-queue...HEAD 2>&1)
gitlog=$(git log --pretty=format:'%s' ${branch_name}...HEAD 2>&1)
# In case there are any special characters that could cause issues (like "). To
# do this, we will write it out to a file, then read that file into a variable.
cat > commit_message.txt << END_OF_COMMIT_MSG
${gitlog}
END_OF_COMMIT_MSG
commit_message_content=$(cat commit_message.txt)
activate_venv
$python buildscripts/validate_commit_message.py "$commit_message_content"
else
echo "Not a commit queue PR, skipping commit message validation"
exit 0
fi