"""GDB Pretty-printers for MongoDB.""" import os import re import struct import sys import uuid from pathlib import Path import gdb import gdb.printing ROOT_PATH = str(Path(os.path.abspath(__file__)).parent.parent.parent) if ROOT_PATH not in sys.path: sys.path.insert(0, ROOT_PATH) from src.third_party.immer.dist.tools.gdb_pretty_printers.printers import ( ListIter as ImmerListIter, ) if not gdb: from buildscripts.gdb.mongo import ( get_boost_optional, get_decorable_info, get_object_decoration, lookup_type, ) from buildscripts.gdb.optimizer_printers import register_optimizer_printers try: import collections import bson import bson.json_util from bson.codec_options import CodecOptions except ImportError: print("Warning: Could not load bson library for Python '" + str(sys.version) + "'.") print("Check with the pip command if pymongo 3.x is installed.") bson = None if sys.version_info[0] < 3: raise gdb.GdbError( "MongoDB gdb extensions only support Python 3. Your GDB was compiled against Python 2" ) def get_unique_ptr_bytes(obj): """Read the value of a libstdc++ std::unique_ptr. Returns a gdb.Value where its type resolves to `unsigned char*`. The caller must take care to cast the returned value themselves. This function is particularly useful in the context of mongo::Decorable<> types which store the decorations as a slab of memory with std::unique_ptr. In all other cases get_unique_ptr() can be preferred. """ return obj.cast(gdb.lookup_type("std::_Head_base<0, unsigned char*, false>"))["_M_head_impl"] def get_unique_ptr(obj): """Read the value of a libstdc++ std::unique_ptr.""" return get_unique_ptr_bytes(obj).cast(obj.type.template_argument(0).pointer()) ################################################################################################### # # Pretty-Printers # ################################################################################################### class StatusPrinter(object): """Pretty-printer for mongo::Status.""" @staticmethod def extract_error(val): """Extract the error object (if any) from a Status/StatusWith.""" error = val["_error"] if "px" in error.type.iterkeys(): return error["px"] return error @staticmethod def generate_error_details(error): """Generate a (code,reason) tuple from a Status/StatusWith error object.""" info = error.dereference() code = info["code"] # Remove the mongo::ErrorCodes:: prefix. Does nothing if not a real ErrorCode. code = str(code).split("::")[-1] return (code, info["reason"]) def __init__(self, val): """Initialize StatusPrinter.""" self.val = val def to_string(self): """Return status for printing.""" error = StatusPrinter.extract_error(self.val) if not error: return "Status::OK()" return "Status(%s, %s)" % StatusPrinter.generate_error_details(error) class StatusWithPrinter(object): """Pretty-printer for mongo::StatusWith<>.""" def __init__(self, val): """Initialize StatusWithPrinter.""" self.val = val def to_string(self): """Return status for printing.""" error = StatusPrinter.extract_error(self.val["_status"]) if not error: return "StatusWith(OK, %s)" % (self.val["_t"]) return "StatusWith(%s, %s)" % StatusPrinter.generate_error_details(error) class StringDataPrinter(object): """Pretty-printer for mongo::StringData.""" def __init__(self, val): """Initialize StringDataPrinter.""" self.val = val @staticmethod def display_hint(): """Display hint.""" return "string" def to_string(self): """Return data for printing.""" # As of SERVER-82604, StringData is based on std::string_view, so try with that first sv = self.val["_sv"] if sv is not None: return sv # ... back-off to the legacy format otherwise size = self.val["_size"] if size == -1: return self.val["_data"].lazy_string() return self.val["_data"].lazy_string(length=size) class BoostOptionalPrinter(object): """Pretty-printer for boost::optional.""" def __init__(self, val): """Initialize BoostOptionalPriner.""" self.val = val def to_string(self): """Return data for printing.""" return get_boost_optional(self.val) class BSONObjPrinter(object): """Pretty-printer for mongo::BSONObj.""" def __init__(self, val): """Initialize BSONObjPrinter.""" self.val = val self.ptr = self.val["_objdata"].cast(lookup_type("void").pointer()) self.is_valid = False # Handle the endianness of the BSON object size, which is represented as a 32-bit integer # in little-endian format. inferior = gdb.selected_inferior() if self.ptr.is_optimized_out: # If the value has been optimized out, we cannot decode it. self.size = -1 self.raw_memory = None else: self.size = struct.unpack(" 17 * 1024 * 1024: return options = CodecOptions(document_class=collections.OrderedDict) bsondoc = bson.decode(self.raw_memory, codec_options=options) for key, val in list(bsondoc.items()): yield "key", key yield "value", bson.json_util.dumps(val) def to_string(self): """Return BSONObj for printing.""" # The value has been optimized out. if self.size == -1: return "BSONObj @ %s - optimized out" % (self.ptr) ownership = "owned" if self.val["_ownedBuffer"]["_buffer"]["_holder"]["px"] else "unowned" size = self.size # Print an invalid BSONObj size in hex. if size < 5 or size > 17 * 1024 * 1024: size = hex(size) if size == 5: return "%s empty BSONObj @ %s" % (ownership, self.ptr) suffix = "" if not self.is_valid: # Wondering why this is unprintable? See PYTHON-1824. The Python # driver's BSON implementation does not support all possible BSON # datetimes. (specifically any BSON datetime where the year is > # datetime.MAXYEAR (usually 9999)). # Attempting to print any BSONObj that contains an out of range # datetime at any level of the document will cause an exception. # There exists no workaround for this in the driver; not even the # TypeDecoder API works for this because the BSON implementation # errors out early when the date is out of range. suffix = " - unprintable or invalid" return "%s BSONObj %s bytes @ %s%s" % (ownership, size, self.ptr, suffix) class OplogEntryPrinter(object): """Pretty-printer for mongo::repl::OplogEntry.""" def __init__(self, val): """Initialize OplogEntryPrinter.""" self.val = val def to_string(self): """Return OplogEntry for printing.""" optime = self.val["_entry"]["_opTimeBase"] optime_str = "ts(%s, %s)" % (optime["_timestamp"]["secs"], optime["_timestamp"]["i"]) return "OplogEntry(%s, %s, %s, %s)" % ( str(self.val["_entry"]["_durableReplOperation"]["_opType"]).split("::")[-1], str(self.val["_entry"]["_commandType"]).split("::")[-1], self.val["_entry"]["_durableReplOperation"]["_nss"], optime_str, ) class UUIDPrinter(object): """Pretty-printer for mongo::UUID.""" def __init__(self, val): """Initialize UUIDPrinter.""" self.val = val @staticmethod def display_hint(): """Display hint.""" return "string" def to_string(self): """Return UUID for printing.""" raw_bytes = [self.val["_uuid"]["_M_elems"][i] for i in range(16)] uuid_hex_bytes = [hex(int(b))[2:].zfill(2) for b in raw_bytes] return str(uuid.UUID("".join(uuid_hex_bytes))) class OIDPrinter(object): """Pretty-printer for mongo::OID.""" def __init__(self, val): """Initialize OIDPrinter.""" self.val = val @staticmethod def display_hint(): """Display hint.""" return "string" def to_string(self): """Return OID for printing.""" raw_bytes = [int(self.val["_data"][i]) for i in range(OBJECT_ID_WIDTH)] oid_hex_bytes = [hex(b & 0xFF)[2:].zfill(2) for b in raw_bytes] return "ObjectID('%s')" % "".join(oid_hex_bytes) class RecordIdPrinter(object): """Pretty-printer for mongo::RecordId.""" def __init__(self, val): """Initialize RecordIdPrinter.""" self.val = val @staticmethod def display_hint(): """Display hint.""" return "string" ## Get the address at given offset of data as the selected pointer type def __get_data_address(self, ptr, offset): ptr_type = gdb.lookup_type(ptr).pointer() return self.val["_data"]["_M_elems"][offset].address.cast(ptr_type) def to_string(self): """Return RecordId for printing.""" rid_format = int(self.val["_format"]) if rid_format == 0: return "null RecordId" elif rid_format == 1: koffset = 8 - 1 ## std::alignment_of_v - sizeof(Format); (see record_id.h) rid_address = self.__get_data_address("int64_t", koffset) return "RecordId long: %d" % int(rid_address.dereference()) elif rid_format == 2: str_len = self.__get_data_address("int8_t", 0).dereference() array_address = self.__get_data_address("int8_t", 1) raw_bytes = [array_address[i] for i in range(0, str_len)] hex_bytes = [hex(b & 0xFF)[2:].zfill(2) for b in raw_bytes] return "RecordId small string %d hex bytes: %s" % (str_len, str("".join(hex_bytes))) elif rid_format == 3: koffset = ( 8 - 1 ) ## std::alignment_of_v - sizeof(Format); (see record_id.h) buffer = self.__get_data_address("mongo::ConstSharedBuffer", koffset).dereference() holder_ptr = holder = buffer["_buffer"]["_holder"]["px"] holder = holder.dereference() str_len = int(holder["_capacity"]) # Start of data is immediately after pointer for holder start_ptr = (holder_ptr + 1).dereference().cast(lookup_type("char")).address raw_bytes = [start_ptr[i] for i in range(0, str_len)] hex_bytes = [hex(b & 0xFF)[2:].zfill(2) for b in raw_bytes] return "RecordId big string %d hex bytes @ %s: %s" % ( str_len, holder_ptr + 1, str("".join(hex_bytes)), ) else: return "unknown RecordId format: %d" % rid_format MAX_DB_NAME_LENGTH = 63 TENANT_ID_MASK = 0x80 OBJECT_ID_WIDTH = 12 def extract_tenant_id(data): raw_bytes = [int(data[i]) for i in range(1, OBJECT_ID_WIDTH + 1)] return "".join([hex(b & 0xFF)[2:].zfill(2) for b in raw_bytes]) def is_small_string(flags): return bool(flags & 0b00000010) def small_string_size(flags): return flags >> 2 class DatabaseNamePrinter(object): """Pretty-printer for mongo::DatabaseName.""" def __init__(self, val): """Initialize DatabaseNamePrinter.""" self.val = val @staticmethod def display_hint(): """Display hint.""" return "string" def _get_storage_data(self): """Return the data pointer from the _data Storage class.""" data = self.val["_data"] flags = data["_flags"] data_ptr = data["_data"] if is_small_string(flags): return data_ptr.address, small_string_size(flags) else: return data_ptr, data["_length"] def _get_string(self, address, size): data = gdb.selected_inferior().read_memory(address, size).tobytes() tenant = data[0] & TENANT_ID_MASK if tenant: return f"{extract_tenant_id(data)}_{data[1 + OBJECT_ID_WIDTH :].decode()}" else: return data[1:].decode() def to_string(self): """Return string representation of NamespaceString.""" address, size = self._get_storage_data() # Do not decode the DatabaseName if the parsed size exceeds maximum. maxSize = 1 + OBJECT_ID_WIDTH + MAX_DB_NAME_LENGTH if size > maxSize: return "DatabaseName with size {} exceeds maximum {}, _data = {}".format( size, maxSize, self.val["_data"] ) else: return self._get_string(address, size) class DecorablePrinter(object): """Pretty-printer for mongo::Decorable<>.""" def __init__(self, val): """Initialize DecorablePrinter.""" self.val = val self.start, self.count = get_decorable_info(val) @staticmethod def display_hint(): """Display hint.""" return "map" def to_string(self): """Return Decorable for printing.""" return "Decorable<{}> with {} elems ".format(self.val.type.template_argument(0), self.count) def children(self): """Children.""" for index in range(self.count): try: deco_type_name, obj, obj_addr = get_object_decoration(self.val, self.start, index) yield ("key", "{}:{}:{}".format(index, obj_addr, deco_type_name)) yield ("value", obj) except Exception as err: print("Failed to look up decoration type: " + deco_type_name + ": " + str(err)) def _get_flags(flag_val, flags): """ Return a list of flag name strings. `flags` is a list of `(flag_name, flag_value)` pairs. The list must be in sorted in order of the highest `flag_value` first and the lowest last. """ if not flags: return "Flags not parsed from source." ret = [] for name, hex_val in flags: dec_val = int(hex_val, 16) if flag_val < dec_val: continue ret.append(name) flag_val -= dec_val return ret class WtCursorPrinter(object): """ Pretty-printer for WT_CURSOR objects. Complement the `flags: int` field with the macro names used in the source code. """ try: with open("./src/third_party/wiredtiger/src/include/wiredtiger.h.in") as wiredtiger_header: file_contents = wiredtiger_header.read() cursor_flags_re = re.compile(r"#define\s+WT_CURSTD_(\w+)\s+0x(\d+)u") cursor_flags = cursor_flags_re.findall(file_contents)[::-1] except IOError: cursor_flags = [] def __init__(self, val): """Initializer.""" self.val = val def to_string(self): """to_string.""" return None def children(self): """children.""" for field in self.val.type.fields(): field_val = self.val[field.name] if field.name == "flags": yield ( "flags", "{} ({})".format(field_val, str(_get_flags(field_val, self.cursor_flags))), ) else: yield (field.name, field_val) class WtSessionImplPrinter(object): """ Pretty-printer for WT_SESSION_IMPL objects. Complement the `flags: int` field with the macro names used in the source code. """ try: with open("./src/third_party/wiredtiger/src/include/session.h") as session_header: file_contents = session_header.read() session_flags_re = re.compile(r"#define\s+WT_SESSION_(\w+)\s+0x(\d+)u") session_flags = session_flags_re.findall(file_contents)[::-1] except IOError: session_flags = [] def __init__(self, val): """Initializer.""" self.val = val def to_string(self): """to_string.""" return None def children(self): """children.""" for field in self.val.type.fields(): field_val = self.val[field.name] if field.name == "flags": yield ( "flags", "{} ({})".format(field_val, str(_get_flags(field_val, self.session_flags))), ) else: yield (field.name, field_val) class WtTxnPrinter(object): """ Pretty-printer for WT_TXN objects. Complement the `flags: int` field with the macro names used in the source code. """ try: with open("./src/third_party/wiredtiger/src/include/txn.h") as txn_header: file_contents = txn_header.read() txn_flags_re = re.compile(r"#define\s+WT_TXN_(\w+)\s+0x(\d+)u") txn_flags = txn_flags_re.findall(file_contents)[::-1] except IOError: txn_flags = [] def __init__(self, val): """Initializer.""" self.val = val def to_string(self): """to_string.""" return None def children(self): """children.""" for field in self.val.type.fields(): field_val = self.val[field.name] if field.name == "flags": yield ( "flags", "{} ({})".format(field_val, str(_get_flags(field_val, self.txn_flags))), ) else: yield (field.name, field_val) def absl_insert_version_after_absl(cpp_name): """Insert version inline namespace after the first `absl` namespace found in the given string.""" # See more: # https://github.com/abseil/abseil-cpp/blob/929c17cf481222c35ff1652498994871120e832a/absl/base/options.h#L203 ABSL_OPTION_INLINE_NAMESPACE_NAME = "lts_20250512" absl_ns_str = "absl::" absl_ns_start = cpp_name.find(absl_ns_str) if absl_ns_start == -1: raise ValueError("No `absl` namespace found in " + cpp_name) absl_ns_end = absl_ns_start + len(absl_ns_str) return ( cpp_name[:absl_ns_end] + ABSL_OPTION_INLINE_NAMESPACE_NAME + "::" + cpp_name[absl_ns_end:] ) # Cache for types found via find_type_from_info_types as they can be expensive to look up. _type_cache: dict[str, gdb.Type] = {} # Helper to find the gdb.Type of the given symbol given by a regex. # This is useful when compilers disagree about the spelling of a symbol/template instantiation. # Uses `info types ` gdb command to find the type, parses the output and then looks up the type. def find_type_from_info_types(regex): if regex in _type_cache: return _type_cache[regex] output = gdb.execute(f"info types {regex}", to_string=True) # Example output: # All types matching regular expression "absl::lts_.*::container_internal::internal_compressed_tuple::Storage": # File src/third_party/abseil-cpp/dist/absl/container/internal/compressed_tuple.h: # 85: absl::lts_20250512::container_internal::internal_compressed_tuple::Storage; # Regex looking for: number + colon + whitespace + capture group + semicolon type_pattern = re.compile(r"^\s*\d+:\s+(.*?);$", re.MULTILINE) match = re.search(type_pattern, output) if match: type_str = match.group(1) res = gdb.lookup_type(type_str) _type_cache[regex] = res return res raise RuntimeError(f"No types found for regex: {regex}") def absl_get_settings(val): """Gets the settings_ field for abseil (flat/node)_hash_(map/set).""" # Find the type of the CompressedTuple Storage template. # Abseil uses an inline namespace for versioning, so it may contain '::lts_20250512' in the middle of the symbol name. # Clang and GCC may mangle the templates differently for the 0 size_t parameter, so we use '0.*' to match both '0' and '0ul'. common_fields_storage_type = find_type_from_info_types( "absl.*::container_internal::internal_compressed_tuple::Storage", ) # The Hash, Eq, or Alloc functors may not be zero-sized objects. # mongo::LogicalSessionIdHash is one such example. An explicit cast is needed to # disambiguate which `value` member variable of the CompressedTuple is to be accessed. return val["settings_"].cast(common_fields_storage_type)["value"] def absl_container_size(settings): return settings["size_"]["data_"] >> 17 def absl_get_nodes(val): """Return a generator of every node in absl::container_internal::raw_hash_set and derived classes.""" settings = absl_get_settings(val) size = absl_container_size(settings) if size == 0: return capacity = int(settings["capacity_"]) heap = settings["heap_or_soo_"]["heap"] ctrl = heap["control"] # Derive the underlying type stored in the container. slot_type = lookup_type(str(val.type.strip_typedefs()) + "::slot_type").strip_typedefs() # Using the array of ctrl bytes, search for in-use slots and return them # https://github.com/abseil/abseil-cpp/blob/8a3caf7dea955b513a6c1b572a2423c6b4213402/absl/container/internal/raw_hash_set.h#L2108-L2113 for item in range(capacity): ctrl_t = int(ctrl[item]) if ctrl_t >= 0: yield heap["slot_array"]["p"].cast(slot_type.pointer())[item] class AbslHashSetPrinterBase(object): """Pretty-printer base class for absl::[node/flat]_hash_set<>.""" def __init__(self, val, to_str): """Initialize absl::[node/flat]_hash_set.""" self.val = val self.to_str = to_str @staticmethod def display_hint(): """Display hint.""" return "array" def to_string(self): """Return absl::[node/flat]_hash_set for printing.""" return "absl::%s_hash_set<%s> with %s elems " % ( self.to_str, self.val.type.template_argument(0), absl_container_size(absl_get_settings(self.val)), ) class AbslNodeHashSetPrinter(AbslHashSetPrinterBase): """Pretty-printer for absl::node_hash_set<>.""" def __init__(self, val): """Initialize absl::node_hash_set.""" AbslHashSetPrinterBase.__init__(self, val, "node") def children(self): """Children.""" count = 0 for val in absl_get_nodes(self.val): yield (str(count), val.dereference()) count += 1 class AbslFlatHashSetPrinter(AbslHashSetPrinterBase): """Pretty-printer for absl::flat_hash_set<>.""" def __init__(self, val): """Initialize absl::flat_hash_set.""" AbslHashSetPrinterBase.__init__(self, val, "flat") def children(self): """Children.""" count = 0 for val in absl_get_nodes(self.val): yield (str(count), val.reference_value()) count += 1 class AbslHashMapPrinterBase(object): """Pretty-printer base class for absl::[node/flat]_hash_map<>.""" def __init__(self, val, to_str): """Initialize absl::[node/flat]_hash_map.""" self.val = val self.to_str = to_str @staticmethod def display_hint(): """Display hint.""" return "map" def to_string(self): """Return absl::[node/flat]_hash_map for printing.""" return "absl::%s_hash_map<%s, %s> with %s elems " % ( self.to_str, self.val.type.template_argument(0), self.val.type.template_argument(1), absl_container_size(absl_get_settings(self.val)), ) class AbslNodeHashMapPrinter(AbslHashMapPrinterBase): """Pretty-printer for absl::node_hash_map<>.""" def __init__(self, val): """Initialize absl::node_hash_map.""" AbslHashMapPrinterBase.__init__(self, val, "node") def children(self): """Children.""" for kvp in absl_get_nodes(self.val): yield ("key", kvp["first"]) yield ("value", kvp["second"]) class AbslFlatHashMapPrinter(AbslHashMapPrinterBase): """Pretty-printer for absl::flat_hash_map<>.""" def __init__(self, val): """Initialize absl::flat_hash_map.""" AbslHashMapPrinterBase.__init__(self, val, "flat") def children(self): """Children.""" for kvp in absl_get_nodes(self.val): yield ("key", kvp["key"]) yield ("value", kvp["value"]["second"]) class ImmutableMapIter(ImmerListIter): def __init__(self, val): super().__init__(val) self.max = (1 << 64) - 1 self.pair = None self.curr = (None, self.max, self.max) def __next__(self): if self.pair: result = ("value", self.pair["second"]) self.pair = None self.i += 1 return result if self.i == self.size: raise StopIteration if self.i < self.curr[1] or self.i >= self.curr[2]: self.curr = self.region() self.pair = self.curr[0][self.i - self.curr[1]].cast( gdb.lookup_type(self.v.type.template_argument(0).name) ) result = ("key", self.pair["first"]) return result class ImmutableMapPrinter: """Pretty-printer for mongo::immutable::map<>.""" def __init__(self, val): self.val = val def to_string(self): return "%s of size %d" % (self.val.type, int(self.val["_storage"]["impl_"]["size"])) def children(self): return ImmutableMapIter(self.val["_storage"]) def display_hint(self): return "map" class ImmutableSetPrinter: """Pretty-printer for mongo::immutable::set<>.""" def __init__(self, val): self.val = val def to_string(self): return "%s of size %d" % (self.val.type, int(self.val["_storage"]["impl_"]["size"])) def children(self): return ImmerListIter(self.val["_storage"]) def display_hint(self): return "array" class MatchExpressionPrinter: """Pretty-printer for mongo::MatchExpression.""" def __init__(self, val): self.val = val def to_string(self): gdb.set_convenience_variable("_mep_tmp", self.val.reference_value()) return ( gdb.parse_and_eval("$_mep_tmp.toString()") .format_string() .encode() .decode("unicode_escape")[1:-1] ) def display_hint(self): return "map" def find_match_brackets(search, opening="<", closing=">"): """Return the index of the closing bracket that matches the first opening bracket. Return -1 if no last matching bracket is found, i.e. not a template. Example: 'Foo::iterator'' returns 5 """ index = search.find(opening) if index == -1: return -1 start = index + 1 count = 1 str_len = len(search) for index in range(start, str_len): char = search[index] if char == opening: count += 1 elif char == closing: count -= 1 if count == 0: return index return -1 class MongoSubPrettyPrinter(gdb.printing.SubPrettyPrinter): """Sub pretty printer managed by the pretty-printer collection.""" def __init__(self, name, prefix, is_template, printer): """Initialize MongoSubPrettyPrinter.""" super(MongoSubPrettyPrinter, self).__init__(name) self.prefix = prefix self.printer = printer self.is_template = is_template class MongoPrettyPrinterCollection(gdb.printing.PrettyPrinter): """MongoDB-specific printer printer collection that ignores subtypes. It will match 'HashTable but not 'HashTable::iterator' when asked for 'HashTable'. """ def __init__(self): """Initialize MongoPrettyPrinterCollection.""" super(MongoPrettyPrinterCollection, self).__init__("mongo", []) def add(self, name, prefix, is_template, printer): """Add a subprinter.""" self.subprinters.append(MongoSubPrettyPrinter(name, prefix, is_template, printer)) def __call__(self, val): """Return matched printer type.""" # Get the type name. lookup_tag = gdb.types.get_basic_type(val.type).tag if not lookup_tag: lookup_tag = val.type.name if not lookup_tag: return None index = find_match_brackets(lookup_tag) for printer in self.subprinters: if not printer.enabled: continue # Ignore subtypes of templated classes. # We do not want HashTable::iterator as an example, just HashTable if printer.is_template: if index + 1 == len(lookup_tag) and lookup_tag.find(printer.prefix) == 0: return printer.printer(val) elif lookup_tag == printer.prefix: return printer.printer(val) return None class WtUpdateToBsonPrinter(object): """Pretty printer for WT_UPDATE. Interpreting the `data` field as bson.""" def __init__(self, val): """Initializer.""" self.val = val self.size = self.val["size"] self.ptr = self.val["data"] @staticmethod def display_hint(): """DisplayHint.""" return "map" def to_string(self): """ToString.""" elems = [] for idx in range(len(self.val.type.fields())): fld = self.val.type.fields()[idx] val = self.val[fld.name] elems.append(str((fld.name, str(val)))) return "WT_UPDATE: \n %s" % ("\n ".join(elems)) def children(self): """children.""" if self.val["type"] != 3: # Type 3 is a "normal" update. Notably type 4 is a deletion and type 1 represents a # delta relative to the previous committed version in the update chain. Only attempt # to parse type 3 as bson. return memory = gdb.selected_inferior().read_memory(self.ptr, self.size).tobytes() bsonobj = None try: bsonobj = next(bson.decode_iter(memory)) except bson.errors.InvalidBSON: return for key, value in list(bsonobj.items()): yield "key", key yield "value", bson.json_util.dumps(value) def make_inverse_enum_dict(enum_type_name): """ Create a dictionary that maps enum values to the unqualified names of the enum elements. For example, if the enum type is 'mongo::sbe::vm::Builtin' with an element 'regexMatch', the dictionary will contain 'regexMatch' value and not 'mongo::sbe::vm::Builtin::regexMatch'. """ enum_dict = gdb.types.make_enum_dict(lookup_type(enum_type_name)) enum_inverse_dic = dict() for key, value in enum_dict.items(): enum_inverse_dic[int(value)] = key.split("::")[-1] # take last element return enum_inverse_dic def read_as_integer(pmem, size): """Read 'size' bytes at 'pmem' as an integer.""" # We assume the same platform for the debugger and the debuggee (thus, 'sys.byteorder'). If # this becomes a problem look into whether it's possible to determine the byteorder of the # inferior. return int.from_bytes( gdb.selected_inferior().read_memory(pmem, size).tobytes(), sys.byteorder, ) def read_as_integer_signed(pmem, size): """Read 'size' bytes at 'pmem' as an integer.""" # We assume the same platform for the debugger and the debuggee (thus, 'sys.byteorder'). If # this becomes a problem look into whether it's possible to determine the byteorder of the # inferior. return int.from_bytes( gdb.selected_inferior().read_memory(pmem, size).tobytes(), sys.byteorder, signed=True, ) class SbeCodeFragmentPrinter(object): """ Pretty-printer for mongo::sbe::vm::CodeFragment. Objects of 'mongo::sbe::vm::CodeFragment' type contain a stream of op-codes to be executed by the 'sbe::vm::ByteCode' class. The pretty printer decodes the stream and outputs it as a list of named instructions. """ def __init__(self, val): """Initialize SbeCodeFragmentPrinter.""" self.val = val # The instructions stream is stored using 'absl::InlinedVector' type, which can # either use an inline buffer or an allocated one. The choice of storage is decoded in the # last bit of the 'metadata_' field. storage = self.val["_instrs"]["storage_"] meta = storage["metadata_"].cast(lookup_type("size_t")) self.is_inlined = meta % 2 == 0 self.size = meta >> 1 self.pdata = ( storage["data_"]["inlined"]["inlined_data"].cast(lookup_type("uint8_t").pointer()) if self.is_inlined else storage["data_"]["allocated"]["allocated_data"] ) # Precompute lookup tables for Instructions and Builtins. self.optags_lookup = make_inverse_enum_dict("mongo::sbe::vm::Instruction::Tags") self.builtins_lookup = make_inverse_enum_dict("mongo::sbe::vm::Builtin") self.valuetags_lookup = make_inverse_enum_dict("mongo::sbe::value::TypeTags") def to_string(self): """Return sbe::vm::CodeFragment for printing.""" return "%s" % (self.val.type) def children(self): """children.""" yield "_instrs", '{... (to see raw output, run "disable pretty-printer")}' yield "_fixUps", self.val["_fixUps"] yield "_stackSize", self.val["_stackSize"] yield "inlined", self.is_inlined yield "instrs data at", "[{} - {}]".format(hex(self.pdata), hex(self.pdata + self.size)) yield "instrs total size", self.size # Sizes for types we'll use when parsing the insructions stream. int_size = lookup_type("int").sizeof ptr_size = lookup_type("void").pointer().sizeof tag_size = lookup_type("mongo::sbe::value::TypeTags").sizeof value_size = lookup_type("mongo::sbe::value::Value").sizeof uint8_size = lookup_type("uint8_t").sizeof uint32_size = lookup_type("uint32_t").sizeof uint64_size = lookup_type("uint64_t").sizeof builtin_size = lookup_type("mongo::sbe::vm::Builtin").sizeof time_unit_size = lookup_type("mongo::TimeUnit").sizeof timezone_size = lookup_type("mongo::TimeZone").sizeof day_of_week_size = lookup_type("mongo::DayOfWeek").sizeof cur_op = self.pdata end_op = self.pdata + self.size instr_count = 0 error = False while cur_op < end_op: op_addr = cur_op op_tag = read_as_integer(op_addr, 1) if op_tag not in self.optags_lookup: yield hex(op_addr), "unknown op tag: {}".format(op_tag) error = True break op_name = self.optags_lookup[op_tag] cur_op += 1 instr_count += 1 # Some instructions have extra arguments, embedded into the ops stream. args = "" if op_name in [ "pushLocalVal", "pushMoveLocalVal", "pushOneArgLambda", "pushTwoArgLambda", ]: args = "arg: " + str(read_as_integer(cur_op, int_size)) cur_op += int_size elif op_name in ["jmp", "jmpTrue", "jmpFalse", "jmpNothing", "jmpNotNothing"]: offset = read_as_integer_signed(cur_op, int_size) cur_op += int_size args = "offset: " + str(offset) + ", target: " + hex(cur_op + offset) elif op_name in ["pushConstVal", "getFieldImm"]: tag = read_as_integer(cur_op, tag_size) args = ( "tag: " + self.valuetags_lookup.get(tag, "unknown") + ", value: " + hex(read_as_integer(cur_op + tag_size, value_size)) ) cur_op += tag_size + value_size elif op_name in ["pushAccessVal", "pushMoveVal"]: args = "accessor: " + hex(read_as_integer(cur_op, ptr_size)) cur_op += ptr_size elif op_name in ["numConvert"]: args = "convert to: " + self.valuetags_lookup.get( read_as_integer(cur_op, tag_size), "unknown" ) cur_op += tag_size elif op_name in ["typeMatchImm"]: args = "mask: " + hex(read_as_integer(cur_op, uint32_size)) cur_op += uint32_size elif op_name in ["function", "functionSmall"]: arity_size = ( lookup_type("mongo::sbe::vm::ArityType").sizeof if op_name == "function" else lookup_type("mongo::sbe::vm::SmallArityType").sizeof ) builtin_id = read_as_integer(cur_op, builtin_size) args = "builtin: " + self.builtins_lookup.get(builtin_id, "unknown") args += " arity: " + str(read_as_integer(cur_op + builtin_size, arity_size)) cur_op += builtin_size + arity_size elif op_name in ["fillEmptyImm"]: args = "Instruction::Constants: " + str(read_as_integer(cur_op, uint8_size)) cur_op += uint8_size elif op_name in ["traverseFImm", "traversePImm"]: position = read_as_integer(cur_op, uint8_size) cur_op += uint8_size const_enum = read_as_integer(cur_op, uint8_size) cur_op += uint8_size args = ( "providePosition: " + str(position) + ", Instruction::Constants: " + str(const_enum) + ", offset: " + str(read_as_integer_signed(cur_op, int_size)) ) cur_op += int_size elif op_name in ["dateTruncImm"]: unit = read_as_integer(cur_op, time_unit_size) cur_op += time_unit_size args = "unit: " + str(unit) bin_size = read_as_integer(cur_op, uint64_size) cur_op += uint64_size args += ", binSize: " + str(bin_size) timezone = read_as_integer(cur_op, timezone_size) cur_op += timezone_size args += ", timezone: " + hex(timezone) day_of_week = read_as_integer(cur_op, day_of_week_size) cur_op += day_of_week_size args += ", dayOfWeek: " + str(day_of_week) elif op_name in ["traverseCsiCellValues", "traverseCsiCellTypes"]: offset = read_as_integer_signed(cur_op, int_size) cur_op += int_size args = "lambda at: " + hex(cur_op + offset) yield hex(op_addr), "{} ({})".format(op_name, args) yield ( "instructions count", instr_count if not error else "? (successfully parsed {})".format(instr_count), ) def build_pretty_printer(): """Build a pretty printer.""" pp = MongoPrettyPrinterCollection() pp.add("BSONObj", "mongo::BSONObj", False, BSONObjPrinter) pp.add("DatabaseName", "mongo::DatabaseName", False, DatabaseNamePrinter) pp.add("NamespaceString", "mongo::NamespaceString", False, DatabaseNamePrinter) pp.add("Decorable", "mongo::Decorable", True, DecorablePrinter) pp.add("Status", "mongo::Status", False, StatusPrinter) pp.add("StatusWith", "mongo::StatusWith", True, StatusWithPrinter) pp.add("StringData", "mongo::StringData", False, StringDataPrinter) pp.add( "node_hash_map", absl_insert_version_after_absl("absl::node_hash_map"), True, AbslNodeHashMapPrinter, ) pp.add( "node_hash_set", absl_insert_version_after_absl("absl::node_hash_set"), True, AbslNodeHashSetPrinter, ) pp.add( "flat_hash_map", absl_insert_version_after_absl("absl::flat_hash_map"), True, AbslFlatHashMapPrinter, ) pp.add( "flat_hash_set", absl_insert_version_after_absl("absl::flat_hash_set"), True, AbslFlatHashSetPrinter, ) pp.add("RecordId", "mongo::RecordId", False, RecordIdPrinter) pp.add("UUID", "mongo::UUID", False, UUIDPrinter) pp.add("OID", "mongo::OID", False, OIDPrinter) pp.add("OplogEntry", "mongo::repl::OplogEntry", False, OplogEntryPrinter) pp.add("__wt_cursor", "__wt_cursor", False, WtCursorPrinter) pp.add("__wt_session_impl", "__wt_session_impl", False, WtSessionImplPrinter) pp.add("__wt_txn", "__wt_txn", False, WtTxnPrinter) pp.add("__wt_update", "__wt_update", False, WtUpdateToBsonPrinter) pp.add("CodeFragment", "mongo::sbe::vm::CodeFragment", False, SbeCodeFragmentPrinter) pp.add("boost::optional", "boost::optional", True, BoostOptionalPrinter) pp.add("immutable::map", "mongo::immutable::map", True, ImmutableMapPrinter) pp.add("immutable::set", "mongo::immutable::set", True, ImmutableSetPrinter) pp.add("MatchExpression", "mongo::MatchExpression", False, MatchExpressionPrinter) # Optimizer/ABT related pretty printers that can be used only with a running process. register_optimizer_printers(pp) return pp ################################################################################################### # # Setup # ################################################################################################### # Register pretty-printers, replace existing mongo printers gdb.printing.register_pretty_printer(gdb.current_objfile(), build_pretty_printer(), True) print("MongoDB GDB pretty-printers loaded")