SERVER-105866 add vscode clang tidy limiter and caching (#36873)

GitOrigin-RevId: 7e108ca0b458cbbe3fb1126f1da79a9d5984f68d
This commit is contained in:
Daniel Moody 2025-06-10 09:13:43 -05:00 committed by MongoDB Bot
parent 6070fbf65d
commit eaeaa0f8f3
4 changed files with 335 additions and 36 deletions

View File

@ -22,15 +22,13 @@ import yaml
# Get relative imports to work when the package is not installed on the PYTHONPATH.
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from clang_tidy_vscode import CHECKS_SO
from mongo_toolchain import get_mongo_toolchain
from simple_report import make_report, put_report, try_combine_reports
checks_so = ""
for module in CHECKS_SO:
if os.path.exists(module):
checks_so = module
break
checks_so = None
if os.path.exists(".mongo_checks_module_path"):
with open(".mongo_checks_module_path") as f:
checks_so = f.read().strip()
config_file = ""

View File

@ -1,4 +1,4 @@
#!/opt/mongodbtoolchain/v4/bin/python3
#!/usr/bin/env python3
"""
Wraps clang tidy to include our custom checks.
@ -18,64 +18,145 @@ Output:
- Standard output and error from the `clang-tidy` process are captured and printed.
Expected Format:
- command line example: buildscripts/clang_tidy_vscode.py /path/to/file/filename1 --export-fixes=-
- buildscripts/clang_tidy_vscode.py /path/to/file/filename1 /path/to/file/filename2 --export-fixes=-
- command line example: buildscripts/clang_tidy_vscode.py /path/to/file/filename --export-fixes=-
- buildscripts/clang_tidy_vscode.py /path/to/file/filename --export-fixes=-
"""
# TODO: if https://github.com/notskm/vscode-clang-tidy/pull/77#issuecomment-1422910143 is resolved then this script can be removed
import json
import multiprocessing
import os
import pathlib
import subprocess
import sys
# Get relative imports to work when the package is not installed on the PYTHONPATH.
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
import time
from mongo_toolchain import get_mongo_toolchain
CHECKS_SO = [
"build/install/lib/libmongo_tidy_checks.so",
]
CLTCONFIG = """
# This file is intended to document the configuration options available
[preprocessor]
# Compiler command used when running preprocessor
command=%s
# Ignore errors from preprocessor and use whatever output it generated as cache key
# May cause weird issues when no output is generated
ignore_errors=false
if os.path.exists(".mongo_checks_module_path"):
with open(".mongo_checks_module_path") as f:
CHECKS_SO = [f.read().strip()] + CHECKS_SO
# "" => NOLINT-comments don't work properly
# "-C" => NOLINT-comments work for regular code, but not in preprocessor macro expansion
# "-CC" => NOLINT-comments should work everywhere, but valid code may fail preprocessor stage. Combine with ignore_errors if you are paranoid about issues with NOLINT-comments
preserve_comments=-C
# Increase cache hit rate by ignoring some types of string contents
strip_string_versions=true
strip_string_hex_hashes=true
[behavior]
# Cache results even when clang-tidy fails
cache_failure=true
# Print cltcache errors and info in stderr and stdout
verbose=false
"""
def get_half_cpu_mask():
num_cores = multiprocessing.cpu_count()
return max(1, num_cores // 2)
def count_running_clang_tidy(cmd_path):
try:
output = subprocess.check_output(["ps", "-axww", "-o", "pid=,command="], text=True)
return sum(1 for line in output.splitlines() if cmd_path in line)
except Exception as e:
print(f"WARNING: failed to check running clang-tidy processes: {e}")
return 0
def wait_for_available_slot(cmd_path, max_jobs, check_interval):
while True:
if count_running_clang_tidy(cmd_path) < max_jobs:
break
time.sleep(check_interval)
def main():
toolchain = get_mongo_toolchain(version="v5", from_bazel=False)
clang_tidy_args = [toolchain.get_tool_path("clang-tidy")]
for check_lib in CHECKS_SO:
if os.path.isfile(check_lib):
clang_tidy_args += [f"-load={check_lib}"]
break
cltcache_path = pathlib.Path(__file__).parent / "cltcache" / "cltcache.py.txt"
toolchain = get_mongo_toolchain(version="v5", from_bazel=True)
clang_tidy_path = toolchain.get_tool_path("clang-tidy")
max_jobs = get_half_cpu_mask()
wait_for_available_slot(clang_tidy_path, max_jobs, check_interval=0.2)
clang_tidy_cmd = [clang_tidy_path]
checks_so = None
if os.path.exists(".mongo_checks_module_path"):
with open(".mongo_checks_module_path") as f:
checks_so = f.read().strip()
if os.path.isfile(checks_so):
clang_tidy_cmd += [f"-load={checks_so}"]
else:
print("ERROR: failed to find mongo tidy checks, run `bazel build compiledb'")
# Filter out non src/mongo files for clang tidy checks
files_to_check = []
other_args = []
for arg in sys.argv[1:]:
if os.path.isfile(arg):
source_relative_path = os.path.relpath(arg, os.path.dirname(os.path.dirname(__file__)))
rel = os.path.relpath(arg, os.path.dirname(os.path.dirname(__file__)))
if (
(arg.endswith(".cpp") or arg.endswith(".h"))
and source_relative_path.startswith("src/mongo")
# TODO: SERVER-79076 remove this condition when resolved
and not source_relative_path.startswith(
"src/mongo/db/modules/enterprise/src/streams/third_party"
)
and rel.startswith("src/mongo")
and not rel.startswith("src/mongo/db/modules/enterprise/src/streams/third_party")
):
files_to_check.append(arg)
files_to_check.append(rel)
else:
other_args.append(arg)
# No files to check in src/mongo. Skipping clang-tidy
if not files_to_check:
return 0
clang_tidy_args += files_to_check + other_args
if len(files_to_check) > 1:
print(
f"ERROR: more than one file passed: {files_to_check}, only running {files_to_check[0]}"
)
proc = subprocess.run(clang_tidy_args, capture_output=True)
# Write to output buffer here because that is how to copy directly from stdin to stdout without making assumptions about encoding
if not os.path.exists("compile_commands.json"):
print("ERROR: failed to find compile_commands.json, run 'bazel build compiledb'")
sys.exit(1)
with open("compile_commands.json") as f:
compdb = json.load(f)
compile_args = []
executable = None
for entry in compdb:
if entry["file"] == files_to_check[0]:
compile_args = entry["arguments"][1:]
executable = entry["arguments"][0]
try:
index = compile_args.index("-MD")
compile_args = compile_args[:index] + compile_args[index + 3 :]
except ValueError:
pass
break
cfg_dir = pathlib.Path().home() / ".cltcache"
cfg_dir.mkdir(parents=True, exist_ok=True)
with open(cfg_dir / "cltcache.cfg", "w") as f:
f.write(CLTCONFIG % executable)
full_cmd = (
[sys.executable, cltcache_path]
+ clang_tidy_cmd
+ files_to_check
+ other_args
+ ["--"]
+ compile_args
)
proc = subprocess.run(full_cmd, capture_output=True)
sys.stdout.buffer.write(proc.stdout)
sys.stderr.buffer.write(proc.stderr)
return proc.returncode

View File

@ -0,0 +1,6 @@
This is cltcache.py.txt taken from
CLTCACHE_URL = "https://raw.githubusercontent.com/freedick/cltcache/1.2.2/src/cltcache/cltcache.py"
CLTCACHE_SHA256 = "30d9bf6d3615eab1826d5e24aea54873de034014c1e77506c9ff983e1e858b3c"
A small simple clang tidy cacher used with vscode which does not use bazel to run clang tidy. The extension is used to avoid linting and changing the file from its source.

View File

@ -0,0 +1,214 @@
#!/usr/bin/env python3
"""
A simple clang-tidy caching application. Prefix calls to clang-tidy to cache
their results for faster static code analysis.
"""
import gzip
import hashlib
import os
import pathlib
import re
import subprocess
import sys
import time
import configparser
def save_to_file_raw(data, filename):
with open(filename, "wb") as f:
f.write(data)
def save_to_file(string, filename):
save_to_file_raw(string.encode("utf-8"), filename)
def compress_to_file(data, filename):
save_to_file_raw(gzip.compress(data), filename)
def read_from_file_raw(filename):
with open(filename, "rb") as f:
return f.read()
def read_from_file(filename):
return read_from_file_raw(filename).decode("utf-8")
def decompress_from_file(filename):
return gzip.decompress(read_from_file_raw(filename))
def sha256(string):
m = hashlib.sha256()
m.update(string.encode('utf-8'))
return m.hexdigest()
def file_age(filepath):
return time.time() - os.path.getmtime(filepath)
def run_command(command):
return subprocess.run(command, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, check=False)
def run_get_stdout(command, ignore_errors=False):
"""
Run a command and forward its stdout and stderr if exit code is nonzero,
otherwise return stdout.
"""
result = run_command(command)
if not ignore_errors and result.returncode != 0:
raise Exception(f"Bad exit code when running: {command}")
return result.stdout.decode("utf-8")
def remove_o_flag(compile_args):
if "-o" in compile_args:
oflag_index = compile_args.index('-o')
return compile_args[:oflag_index] + compile_args[oflag_index + 2:]
return compile_args
def postprocess_source(source, config):
def hash_replace(match):
return match.group(0).replace(match.group(1), len(match.group(1)) * "0")
replacements = []
if config.get("preprocessor", "strip_string_versions", fallback=True):
replacements.append(
(r'("[^"^\n]*?)([0-9]+(\.[0-9]+)+)', r'\1<version>'))
if config.get("preprocessor", "strip_string_hex_hashes", fallback=True):
replacements.append((r'"[^"^\n]*?([0-9a-fA-F]{5,128})', hash_replace))
for pattern, replacement in replacements:
changedSource = re.sub(pattern, replacement, source)
attempts = 0
while changedSource != source and attempts < 20:
source = changedSource
changedSource = re.sub(pattern, replacement, source)
attempts += 1
return source
def get_preproc_hash(compile_args, config):
compile_args = remove_o_flag(compile_args)
preproc_flag = "-E"
keep_comments_flag = config.get(
"preprocessor", "preserve_comments", fallback="-C")
preproc_command = config.get("preprocessor", "command", fallback="c++")
preproc_source = run_get_stdout(
[preproc_command] + compile_args + [preproc_flag, keep_comments_flag],
config.getboolean("preprocessor", "ignore_errors", fallback=False))
verbose = config.getboolean("behavior", "verbose", fallback=False)
if verbose:
print("cltcache length of preproccesed source:", len(preproc_source))
postproc_source = postprocess_source(preproc_source, config)
if verbose:
print("cltcache length of postproccesed source:", len(postproc_source))
preproc_hash = sha256(postproc_source)
return preproc_hash
def compute_cache_key(clang_tidy_call, config):
clang_tidy = clang_tidy_call[0]
if "--" not in clang_tidy_call:
raise Exception("Missing '--' flag in compiler options")
forwardflag_index = clang_tidy_call.index("--")
compile_args = clang_tidy_call[forwardflag_index + 1:]
clang_tidy_args = clang_tidy_call[1:forwardflag_index]
preproc_hash = get_preproc_hash(compile_args, config)
version_out = run_get_stdout([clang_tidy] + ["--version"])
version = ",".join(re.findall(r'[0-9]+\.[0-9]+\.?[0-9]*', version_out))
version_hash = sha256(version)
clang_tidy_config = run_get_stdout(
[clang_tidy] + clang_tidy_args + ["--dump-config"])
clang_tidy_config_hash = sha256(clang_tidy_config)
return sha256(preproc_hash + clang_tidy_config_hash + version_hash)[:-16]
def init_cltcache():
cltcache_path = os.environ.get(
"CLTCACHE_DIR", pathlib.Path().home() / ".cltcache")
cltcache_path.mkdir(parents=True, exist_ok=True)
config = configparser.ConfigParser()
config.read(cltcache_path / "cltcache.cfg")
return cltcache_path, config
def cache_clang_tidy(clang_tidy_call):
cltcache_path, config = init_cltcache()
cache_path, cat_path, out_path, err_path = (None, None, None, None)
verbose = config.getboolean("behavior", "verbose", fallback=False)
if verbose:
print("cltcache computing cache key")
try:
cache_key = compute_cache_key(clang_tidy_call, config)
if verbose:
print("cltcache key:", cache_key)
cat_path = cltcache_path / cache_key[0]
cache_path = cat_path / cache_key
out_path = cache_path.with_suffix(".out.gz")
err_path = cache_path.with_suffix(".err.gz")
if os.path.exists(cache_path):
if verbose:
print("cltcache hit!")
if os.path.exists(out_path):
clang_tidy_stdout = decompress_from_file(out_path)
if clang_tidy_stdout:
print(clang_tidy_stdout.decode("utf-8"),)
if os.path.exists(err_path):
clang_tidy_stderr = decompress_from_file(err_path)
if clang_tidy_stderr:
print(clang_tidy_stderr.decode("utf-8"), file=sys.stderr,)
sys.exit(int(read_from_file(cache_path)))
elif verbose:
print("cltcache miss...")
except Exception as e:
if verbose:
print("cltcache", e)
print(
"cltcache Preprocessing failed! Forwarding call without caching...",
file=sys.stderr)
result = run_command(clang_tidy_call)
clt_success = result.returncode == 0
preproc_success = cache_path is not None
cache_results = (clt_success or config.getboolean(
"behavior", "cache_failure", fallback=True)) and preproc_success
if cache_results:
cat_path.mkdir(parents=True, exist_ok=True)
if result.stdout:
print(result.stdout.decode("utf-8"),)
if cache_results:
compress_to_file(result.stdout, out_path)
if result.stderr:
print(result.stderr.decode("utf-8"), file=sys.stderr,)
if cache_results:
compress_to_file(result.stderr, err_path)
if cache_results:
if verbose:
print("cltcache caching results...")
save_to_file(str(result.returncode), cache_path)
sys.exit(result.returncode)
def main():
if len(sys.argv) <= 1:
command = sys.argv[0]
helptext = ("Usage:\n"
f" {command}\n"
f" {command} clang-tidy [clang-tidy options] -- "
"-o output [compiler options]")
print(helptext)
else:
cache_clang_tidy(sys.argv[1:])
if __name__ == "__main__":
main()