SERVER-98102 Workload to determine cost of scanning an index with one more field (#41724)

GitOrigin-RevId: 515a3d9c64e3a430bfc9c3ea62b3115d730f8591
This commit is contained in:
Militsa Sotirova 2025-09-25 14:18:25 -04:00 committed by MongoDB Bot
parent f87506c179
commit b1a79c778c
6 changed files with 79 additions and 1 deletions

View File

@ -330,6 +330,41 @@ def create_intersection_collection_template(
)
def create_ixscan_diff_num_fields_template():
card = 10000
# Generate fields "a", "b", ... "j"
field_names = [chr(ord("a") + i) for i in range(10)]
fields = [
config.FieldTemplate(
name=field_name,
data_type=config.DataType.INTEGER,
distribution=RandomDistribution.uniform(RangeGenerator(DataType.INTEGER, 1, card)),
# We only want a single field index on 'a'.
indexed=(field_name == "a"),
)
for field_name in field_names
]
compound_indexes = [
# Note the single field index is created in the FieldTemplate for 'a' above.
["a", "b"],
["a", "b", "c"],
["a", "b", "c", "d"],
["a", "b", "c", "d", "e"],
["a", "b", "c", "d", "e", "f"],
["a", "b", "c", "d", "e", "f", "g"],
["a", "b", "c", "d", "e", "f", "g", "h"],
["a", "b", "c", "d", "e", "f", "g", "h", "i"],
["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"],
]
return config.CollectionTemplate(
name="index_scan_diff_num_fields",
fields=fields,
compound_indexes=compound_indexes,
cardinalities=[card],
)
collection_cardinalities = list(range(10000, 50001, 10000))
c_int_05 = config.CollectionTemplate(
@ -416,6 +451,8 @@ intersection_hash_collections = create_intersection_collection_template(
value_range=10,
)
index_scan_diff_num_fields_collections = create_ixscan_diff_num_fields_template()
# Data Generator settings
data_generator = config.DataGeneratorConfig(
enabled=True,
@ -429,6 +466,7 @@ data_generator = config.DataGeneratorConfig(
or_collections,
intersection_sorted_collections,
intersection_hash_collections,
index_scan_diff_num_fields_collections,
c_int_05,
c_arr_01,
],
@ -473,6 +511,14 @@ qsn_nodes = [
axis=1,
),
),
config.QsNodeCalibrationConfig(
name="IXSCANS_W_DIFF_NUM_FIELDS",
type="IXSCAN",
variables_override=lambda df: pd.concat(
[df["n_index_fields"].rename("Number of fields in index")],
axis=1,
),
),
config.QsNodeCalibrationConfig(type="FETCH"),
config.QsNodeCalibrationConfig(
type="AND_HASH",

View File

@ -49,6 +49,7 @@ class ExecutionStats:
# Technically superfluous, because it's len(n_processed_per_child), but improves readability
n_children: int
seeks: Optional[int]
n_index_fields: Optional[int]
@dataclass

View File

@ -44,6 +44,7 @@ class Node:
n_processed: int
seeks: Optional[int]
children: list[Node]
n_index_fields: Optional[int]
def get_execution_time(self):
"""Execution time of this node without execution time of its children"""
@ -54,7 +55,7 @@ class Node:
def print(self, level=0):
"""Pretty print the execution tree"""
print(
f'{"| " * level}{self.stage}, totalExecutionTime: {self.execution_time_nanoseconds:,}ns, seeks: {self.seeks}, nReturned: {self.n_returned}, nProcessed: {self.n_processed}'
f'{"| " * level}{self.stage}, totalExecutionTime: {self.execution_time_nanoseconds:,}ns, seeks: {self.seeks}, nReturned: {self.n_returned}, nProcessed: {self.n_processed}, nIndexFields: {self.n_index_fields}'
)
for child in self.children:
child.print(level + 1)
@ -166,4 +167,5 @@ def get_common_fields(json_stage: dict[str, Any]) -> dict[str, Any]:
"execution_time_nanoseconds": json_stage["executionTimeNanos"],
"n_returned": json_stage["nReturned"],
"seeks": json_stage.get("seeks"),
"n_index_fields": len(json_stage.get("keyPattern")) if "keyPattern" in json_stage else None,
}

View File

@ -101,6 +101,8 @@ def get_execution_stats(
n_children=len(enode.children),
# Seeks will be None for any node but IXSCAN.
seeks=enode.seeks,
# n_index_fields will be None for any node but IXSCAN.
n_index_fields=enode.n_index_fields,
)
)
return result

View File

@ -106,6 +106,7 @@ class ParametersBuilderClassic:
"keys_length_in_bytes",
"average_document_size_in_bytes",
"number_of_fields",
"n_index_fields",
],
)
@ -122,6 +123,7 @@ class ParametersBuilderClassic:
execution_time=node.execution_time_nanoseconds,
n_processed=node.n_processed,
seeks=node.seeks,
n_index_fields=node.n_index_fields,
)
@staticmethod
@ -131,12 +133,14 @@ class ParametersBuilderClassic:
execution_time: int = None,
n_processed: int = None,
seeks: int = None,
n_index_fields=None,
):
return [
stage,
execution_time,
n_processed,
seeks,
n_index_fields,
params.note,
params.keys_length_in_bytes,
params.average_document_size_in_bytes,

View File

@ -347,6 +347,28 @@ async def execute_fetches(database: DatabaseInstance, collections: Sequence[Coll
)
async def execute_index_scans_w_diff_num_fields(
database: DatabaseInstance, collections: Sequence[CollectionInfo]
):
collection = [c for c in collections if c.name.startswith("index_scan_diff_num_fields")][0]
requests = []
# The compound_indexes list does not contain the single-field index {a: 1}.
for index in ["a"] + collection.compound_indexes:
hint_obj = {key: 1 for key in index}
requests.append(
Query(
{"filter": {"a": {"$lt": 10000}}, "hint": hint_obj},
note="IXSCANS_W_DIFF_NUM_FIELDS",
)
)
await workload_execution.execute(
database, main_config.workload_execution, [collection], requests
)
async def main():
"""Entry point function."""
script_directory = os.path.abspath(os.path.dirname(__file__))
@ -372,6 +394,7 @@ async def main():
execute_sort_intersections,
execute_hash_intersections,
execute_fetches,
execute_index_scans_w_diff_num_fields,
]
for execute_query in execution_query_functions:
await execute_query(database, generator.collection_infos)