SERVER-92573 apply clang-tidy replacements to correct files (#30725)

GitOrigin-RevId: ec8af07eb3af9380d40243f4b50aecc83cf03e1f
This commit is contained in:
Sean Lyons 2025-01-02 08:09:53 -05:00 committed by MongoDB Bot
parent e674443d53
commit 1ffbb36559
3 changed files with 172 additions and 53 deletions

View File

@ -1,69 +1,82 @@
#!/usr/bin/env python3
"""Applies fixes, generated by clang-tidy, across the codebase."""
"""Applies fixes, generated by buildscripts/clang-tidy.py, across the codebase."""
import argparse
import hashlib
import json
import os
import sys
from collections import defaultdict
def get_replacements_for_file(filepath: str, file_data: dict, fixes: dict):
# We cannot reliably make any changes unless we can use the md5 to make
# sure the offsets are going to match up.
if file_data.get("md5"):
for byte_offset in file_data:
if byte_offset != "md5":
if file_data["md5"] in fixes:
fixes[file_data["md5"]]["replacements"].extend(
file_data[byte_offset].get("replacements", [])
)
else:
fixes[file_data["md5"]] = {
"replacements": file_data[byte_offset].get("replacements", []),
"filepath": filepath,
}
def is_writeable(file) -> bool:
try:
with open(file, "a") as f: # noqa: F841
pass
return True
except OSError:
return False
def main():
parser = argparse.ArgumentParser()
parser.add_argument(dest="fixes_file", help="Path to fixes file.")
args = parser.parse_args()
fixes = {}
# Extract just the replacement data, organizing by file
with open(args.fixes_file) as fin:
fixes_data = json.load(fin)
for check in fixes_data:
for file in fixes_data[check]:
get_replacements_for_file(file, fixes_data[check][file], fixes)
# This first for loop is for each file we intended to make changes to
for recorded_md5 in fixes:
current_md5 = None
file_bytes = None
if os.path.exists(fixes[recorded_md5]["filepath"]):
with open(fixes[recorded_md5]["filepath"], "rb") as fin:
def can_replacements_be_applied(replacements) -> bool:
"""Checks if the files containing replacements are unchanged since clang-tidy was run and are writeable.
For any replacement to be valid, all impacted files must be unchanged."""
for replacement in replacements:
if (
os.path.exists(replacement["FilePath"])
and is_writeable(replacement["FilePath"])
and replacement["FileContentsMD5"]
):
with open(replacement["FilePath"], "rb") as fin:
file_bytes = fin.read()
current_md5 = hashlib.md5(file_bytes).hexdigest()
if current_md5 != replacement["FileContentsMD5"]:
return False
else:
return False
return True
# If the md5 is not matching up the file must have changed and our offset may
# not be correct, we will need to bail on this file.
if current_md5 != recorded_md5:
print(
f'ERROR: not applying replacements for {fixes[recorded_md5]["filepath"]}, the file has changed since the replacement offset was recorded.'
)
continue
def get_replacements_to_apply(fixes_file) -> dict:
"""Gets a per file listing of the valid replacements to apply."""
replacements_to_apply = defaultdict(list)
with open(fixes_file) as fin:
fixes_data = json.load(fin)
for clang_tidy_check in fixes_data:
for main_source_file in fixes_data[clang_tidy_check]:
for violation_instance in fixes_data[clang_tidy_check][main_source_file]:
replacements = fixes_data[clang_tidy_check][main_source_file][
violation_instance
]["replacements"]
if can_replacements_be_applied(replacements):
for replacement in replacements:
replacements_to_apply[replacement["FilePath"]].append(replacement)
else:
print(
f"""WARNING: not applying replacements for {clang_tidy_check} in {main_source_file} at offset {violation_instance}, at least one file that is part of the automatic replacement has changed since clang-tidy was run, or is not writeable."""
)
return replacements_to_apply
def main(argv=sys.argv[1:]):
parser = argparse.ArgumentParser()
parser.add_argument(dest="fixes_file", help="Path to fixes file.")
args = parser.parse_args(argv)
replacements_to_apply = get_replacements_to_apply(args.fixes_file)
for file in replacements_to_apply:
with open(file, "rb") as fin:
file_bytes = fin.read()
# perform the swap replacement of the binary data
file_bytes = bytearray(file_bytes)
fixes[recorded_md5]["replacements"].sort(key=lambda r: r["Offset"])
replacements_to_apply[file].sort(key=lambda r: r["Offset"])
adjustments = 0
if len(fixes[recorded_md5]["replacements"]) == 0:
print(f'Warning: no replacements for {fixes[recorded_md5]["filepath"]}')
for replacement in fixes[recorded_md5]["replacements"]:
for replacement in replacements_to_apply[file]:
file_bytes[
replacement["Offset"] + adjustments : replacement["Offset"]
+ adjustments
@ -73,7 +86,7 @@ def main():
if replacement["Length"] != len(replacement["ReplacementText"]):
adjustments += len(replacement["ReplacementText"]) - replacement["Length"]
with open(fixes[recorded_md5]["filepath"], "wb") as fout:
with open(file, "wb") as fout:
fout.write(bytes(file_bytes))

View File

@ -110,12 +110,13 @@ def _combine_errors(fixes_filename: str, files_to_parse: List[str]) -> int:
},
)
)
for replacement in fix_data["replacements"]:
if replacement.get("FilePath") and os.path.exists(replacement.get("FilePath")):
with open(replacement.get("FilePath"), "rb") as contents:
replacement["FileContentsMD5"] = hashlib.md5(contents.read()).hexdigest()
fix_data["count"] += 1
fix_data["source_files"].append(fixes["MainSourceFile"])
if fix_msg.get("FilePath") and os.path.exists(fix_msg.get("FilePath")):
all_fixes[fix["DiagnosticName"]][fix_msg.get("FilePath")]["md5"] = hashlib.md5(
open(fix_msg.get("FilePath"), "rb").read()
).hexdigest()
with open(fixes_filename, "w") as files_file:
json.dump(all_fixes, files_file, indent=4, sort_keys=True)

View File

@ -0,0 +1,105 @@
"""Unit tests for clang_tidy.py and apply_clang_tidy_fixes.py."""
import os
import sys
import tempfile
import unittest
from pathlib import Path
import yaml
sys.path.append("buildscripts")
import apply_clang_tidy_fixes
from clang_tidy import _clang_tidy_executor, _combine_errors
@unittest.skipIf(
sys.platform == "win32" or sys.platform == "darwin",
reason="clang_tidy.py is only run on linux",
)
class TestClangTidy(unittest.TestCase):
def setUp(self):
"""Create a working directory that contains a header and source that clang-tidy will suggest a replacement for."""
source_contents = """
#include "clang_tidy_test.h"
#include <iostream>
void f(std::string s) { std::cout << s; }
"""
header_contents = """
#include <string>
void f(std::string s);
"""
self.tempdir = tempfile.TemporaryDirectory()
self.source = os.path.join(self.tempdir.name, "clang_tidy_test.cpp")
with open(self.source, "w") as source:
source.write(source_contents)
self.header = os.path.join(self.tempdir.name, "clang_tidy_test.h")
with open(self.header, "w") as header:
header.write(header_contents)
self.fixes_dir = os.path.join(self.tempdir.name, "fixes")
os.mkdir(self.fixes_dir)
self.clang_tidy_binary = "/opt/mongodbtoolchain/v4/bin/clang-tidy"
clang_tidy_cfg = "Checks: 'performance-unnecessary-value-param'"
self.clang_tidy_cfg = yaml.safe_load(clang_tidy_cfg)
self.oldpwd = os.getcwd()
os.chdir(self.tempdir.name)
def tearDown(self):
os.chdir(self.oldpwd)
self.tempdir.cleanup()
def test_clang_tidy_and_apply_replacements(self):
_, files_to_parse = _clang_tidy_executor(
Path(self.source),
self.clang_tidy_binary,
self.clang_tidy_cfg,
self.fixes_dir,
False,
"",
)
_combine_errors(Path(self.fixes_dir, "clang_tidy_fixes.json"), [files_to_parse])
apply_clang_tidy_fixes.main([os.path.join(self.fixes_dir, "clang_tidy_fixes.json")])
with open(self.source, "r") as source:
self.assertIn(
"void f(const std::string& s)", source.read(), "The clang tidy fix was not applied."
)
with open(self.header, "r") as header:
self.assertIn(
"void f(const std::string& s)", header.read(), "The clang tidy fix was not applied."
)
def test_source_changed_between_tidy_and_apply_fixes(self):
_, files_to_parse = _clang_tidy_executor(
Path(self.source),
self.clang_tidy_binary,
self.clang_tidy_cfg,
self.fixes_dir,
False,
"",
)
_combine_errors(Path(self.fixes_dir, "clang_tidy_fixes.json"), [files_to_parse])
# Make a modification to one of the files. The clang tidy fix is no longer safe to apply.
with open(self.header, "a") as header:
header.write("\n")
apply_clang_tidy_fixes.main([os.path.join(self.fixes_dir, "clang_tidy_fixes.json")])
with open(self.source, "r") as source:
self.assertIn(
"void f(std::string s)",
source.read(),
"The clang-tidy fix should not have been applied.",
)
with open(self.header, "r") as header:
self.assertIn(
"void f(std::string s)",
header.read(),
"The clang-tidy fix should not have been applied.",
)