# Common helpers for CI Bazel scripts (build/run/test) set -o errexit set -o pipefail # --- Pre-flight (assumes prelude.sh already sourced by caller) ------------- bazel_evergreen_shutils::activate_and_cd_src() { cd src set -o verbose activate_venv } # --- Distro quirks ----------------------------------------------------- bazel_evergreen_shutils::export_ssl_paths_if_needed() { if [[ -f /etc/os-release ]]; then local DISTRO DISTRO=$(awk -F '[="]*' '/^PRETTY_NAME/ { print $2 }' " prefix or empty string. # Prints a one-time warning to stderr if a timeout was requested but no # timeout binary is available. Supports macOS 'gtimeout' if installed. bazel_evergreen_shutils::timeout_prefix() { local fallback_remote="${1:-}" # "on" = use 3600s default for remote builds local need_timeout="" # "explicit" | "fallback" | "" local timeout_bin="" # Do we want a timeout? if [[ -n "${build_timeout_seconds:-}" ]]; then need_timeout="explicit" elif [[ "$fallback_remote" == "on" ]]; then need_timeout="fallback" fi # Find a timeout binary (GNU coreutils 'timeout' or macOS 'gtimeout') if command -v timeout >/dev/null 2>&1; then timeout_bin="timeout" elif command -v gtimeout >/dev/null 2>&1; then timeout_bin="gtimeout" fi # If needed but unavailable, warn once and return empty if [[ -n "$need_timeout" && -z "$timeout_bin" ]]; then if [[ -z "${_BAZEL_EVG_TIMEOUT_WARNED:-}" ]]; then if [[ "$need_timeout" == "explicit" ]]; then echo "[warn] 'timeout' not found; requested ${build_timeout_seconds}s timeout will be ignored." >&2 else echo "[warn] 'timeout' not found; remote-build fallback timeout (3600s) will be ignored." >&2 fi # Helpful hint for macOS users if bazel_evergreen_shutils::is_macos; then echo "[hint] On macOS, install coreutils: 'brew install coreutils' (provides 'gtimeout')." >&2 fi _BAZEL_EVG_TIMEOUT_WARNED=1 fi echo "" # no timeout prefix return 0 fi # Produce the prefix if we have a binary if [[ -n "$timeout_bin" ]]; then if [[ "$need_timeout" == "explicit" ]]; then echo "$timeout_bin ${build_timeout_seconds}" elif [[ "$need_timeout" == "fallback" ]]; then echo "$timeout_bin 3600" else echo "" fi else echo "" fi } # --- Bazel server lifecycle & OOM-detect retry ----------------------------- bazel_evergreen_shutils::bazel_output_base() { local BAZEL_BINARY="$1" "$BAZEL_BINARY" info output_base 2>/dev/null } bazel_evergreen_shutils::bazel_pidfile_path() { local BAZEL_BINARY="$1" local ob ob="$(bazel_evergreen_shutils::bazel_output_base "$BAZEL_BINARY")" || return 1 echo "${ob}/server/server.pid.txt" } bazel_evergreen_shutils::is_bazel_server_running() { local BAZEL_BINARY="$1" local pf pid pf="$(bazel_evergreen_shutils::bazel_pidfile_path "$BAZEL_BINARY")" || return 1 [[ -f "$pf" ]] || return 1 pid="$(cat "$pf" 2>/dev/null || true)" [[ -n "$pid" ]] || return 1 if kill -0 "$pid" 2>/dev/null; then return 0 else return 1 fi } bazel_evergreen_shutils::print_bazel_server_pid() { local BAZEL_BINARY="$1" local pf pid pf="$(bazel_evergreen_shutils::bazel_pidfile_path "$BAZEL_BINARY")" || { echo "Bazel server pidfile not found (output_base: $(bazel_evergreen_shutils::bazel_output_base "$BAZEL_BINARY" 2>/dev/null || true))" return 0 } if [[ -f "$pf" ]]; then pid="$(cat "$pf" 2>/dev/null || true)" echo "Bazel server pidfile: $pf (PID=${pid:-unknown})" else echo "Bazel server pidfile not found yet (output_base: $(bazel_evergreen_shutils::bazel_output_base "$BAZEL_BINARY" 2>/dev/null || true))" fi } # Starts server (if needed) and prints PID. Safe to call multiple times. bazel_evergreen_shutils::ensure_server_and_print_pid() { local BAZEL_BINARY="$1" _IGN=$("$BAZEL_BINARY" info >/dev/null 2>&1 || true) bazel_evergreen_shutils::print_bazel_server_pid "$BAZEL_BINARY" } bazel_evergreen_shutils::write_last_engflow_link() { engflow_link=$(grep -Eo 'https://[a-zA-Z0-9./?_=-]+' ${last_command_log_path} | grep 'sodalite\.cluster\.engflow\.com' | tail -n 1) echo ${engflow_link} >.engflow_link } # Generic retry wrapper: # $1: attempts # $3: bazel binary # $4..: full bazel subcommand + args (e.g. "build --verbose_failures ...") # Special handling: # - exit 124 -> timeout # - server death (pid missing) -> restart, then retry # Returns with global RET set. bazel_evergreen_shutils::retry_bazel_cmd() { local attempts="$1" shift local BAZEL_BINARY="$1" shift local timeout_str="$(bazel_evergreen_shutils::timeout_prefix "${evergreen_remote_exec:-}")" # Get command log path for usage afterwards last_command_log_path=$(bazel info command_log) # Everything else is the Bazel subcommand + flags (and possibly redirections/pipes). # We *intentionally* keep it as raw words and reassemble to a single string for eval. local raw_rest=("$@") # Once we detect an OOM/server-death, we enable the guard for subsequent attempts. local use_oom_guard=false local -r OOM_GUARD_FLAG='--local_resources=HOST_CPUS*.5' # Helper: does the current command string already include a local_resources flag? _cmd_has_local_resources() { [[ "$1" == *"--local_resources"* ]] } local RET=1 for i in $(seq 1 "$attempts"); do echo "Attempt ${i}/${attempts}…" >&2 # Ensure/refresh server & pid before we run (helps produce a fresh pidfile too). if ! bazel_evergreen_shutils::is_bazel_server_running "$BAZEL_BINARY"; then echo "[retry ${i}] Bazel server not running (likely OOM/killed); restarting…" >&2 "$BAZEL_BINARY" info >/dev/null 2>&1 || true bazel_evergreen_shutils::print_bazel_server_pid "$BAZEL_BINARY" >&2 fi # Reassemble the caller’s words into a single command string for eval. # We deliberately do *not* try to be clever here—this restores legacy behavior # where quoted pieces inside variables (e.g., --base_dir="..") are honored by the shell. local cmd="\"$BAZEL_BINARY\"" local w for w in "${raw_rest[@]}"; do cmd+=" $w" done # If OOM guard is enabled and not already present, append it. # (Safe with eval; if the caller added redirections earlier, this is still just an arg.) if $use_oom_guard && ! _cmd_has_local_resources "$cmd"; then echo "[retry ${i}] Applying OOM guard: ${OOM_GUARD_FLAG}" >&2 cmd+=" ${OOM_GUARD_FLAG}" fi # Prefix timeout, if any. if [[ -n "$timeout_str" ]]; then cmd="${timeout_str} ${cmd}" fi # Run it. # NOTE: We *do not* add any redirections here; caller controls logging completely. if eval $env "$cmd"; then RET=0 break else RET=$? fi if ! bazel_evergreen_shutils::is_bazel_server_running "$BAZEL_BINARY"; then echo "[retry ${i}] Bazel server down (OOM/killed). Enabling OOM guard for next attempt and restarting…" >&2 use_oom_guard=true "$BAZEL_BINARY" shutdown || true "$BAZEL_BINARY" info >/dev/null 2>&1 || true bazel_evergreen_shutils::print_bazel_server_pid "$BAZEL_BINARY" >&2 elif [[ $RET -eq 124 ]]; then echo "Bazel timed out." >&2 "$BAZEL_BINARY" shutdown || true else if [[ ${RETRY_ON_FAIL:-0} -eq 1 ]]; then echo "Bazel failed (exit=$RET); restarting server before retry..." >&2 "$BAZEL_BINARY" shutdown || true else break fi fi sleep 60 done return "$RET" } # --- Test helpers ---------------------------------------------------------- # Multiplies test timeout on slow arches and appends to bazel_args bazel_evergreen_shutils::maybe_scale_test_timeout_and_append() { if [[ -n "${test_timeout_sec:-}" ]]; then local scaled="$test_timeout_sec" if bazel_evergreen_shutils::is_s390x_or_ppc64le; then scaled=$((test_timeout_sec * 4)) fi bazel_args="${bazel_args:-} --test_timeout=${scaled}" fi }