mirror of https://github.com/mongodb/mongo
SERVER-98102 Workload to determine cost of scanning an index with one more field (#41724)
GitOrigin-RevId: 515a3d9c64e3a430bfc9c3ea62b3115d730f8591
This commit is contained in:
parent
f87506c179
commit
b1a79c778c
|
|
@ -330,6 +330,41 @@ def create_intersection_collection_template(
|
|||
)
|
||||
|
||||
|
||||
def create_ixscan_diff_num_fields_template():
|
||||
card = 10000
|
||||
# Generate fields "a", "b", ... "j"
|
||||
field_names = [chr(ord("a") + i) for i in range(10)]
|
||||
fields = [
|
||||
config.FieldTemplate(
|
||||
name=field_name,
|
||||
data_type=config.DataType.INTEGER,
|
||||
distribution=RandomDistribution.uniform(RangeGenerator(DataType.INTEGER, 1, card)),
|
||||
# We only want a single field index on 'a'.
|
||||
indexed=(field_name == "a"),
|
||||
)
|
||||
for field_name in field_names
|
||||
]
|
||||
compound_indexes = [
|
||||
# Note the single field index is created in the FieldTemplate for 'a' above.
|
||||
["a", "b"],
|
||||
["a", "b", "c"],
|
||||
["a", "b", "c", "d"],
|
||||
["a", "b", "c", "d", "e"],
|
||||
["a", "b", "c", "d", "e", "f"],
|
||||
["a", "b", "c", "d", "e", "f", "g"],
|
||||
["a", "b", "c", "d", "e", "f", "g", "h"],
|
||||
["a", "b", "c", "d", "e", "f", "g", "h", "i"],
|
||||
["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"],
|
||||
]
|
||||
|
||||
return config.CollectionTemplate(
|
||||
name="index_scan_diff_num_fields",
|
||||
fields=fields,
|
||||
compound_indexes=compound_indexes,
|
||||
cardinalities=[card],
|
||||
)
|
||||
|
||||
|
||||
collection_cardinalities = list(range(10000, 50001, 10000))
|
||||
|
||||
c_int_05 = config.CollectionTemplate(
|
||||
|
|
@ -416,6 +451,8 @@ intersection_hash_collections = create_intersection_collection_template(
|
|||
value_range=10,
|
||||
)
|
||||
|
||||
index_scan_diff_num_fields_collections = create_ixscan_diff_num_fields_template()
|
||||
|
||||
# Data Generator settings
|
||||
data_generator = config.DataGeneratorConfig(
|
||||
enabled=True,
|
||||
|
|
@ -429,6 +466,7 @@ data_generator = config.DataGeneratorConfig(
|
|||
or_collections,
|
||||
intersection_sorted_collections,
|
||||
intersection_hash_collections,
|
||||
index_scan_diff_num_fields_collections,
|
||||
c_int_05,
|
||||
c_arr_01,
|
||||
],
|
||||
|
|
@ -473,6 +511,14 @@ qsn_nodes = [
|
|||
axis=1,
|
||||
),
|
||||
),
|
||||
config.QsNodeCalibrationConfig(
|
||||
name="IXSCANS_W_DIFF_NUM_FIELDS",
|
||||
type="IXSCAN",
|
||||
variables_override=lambda df: pd.concat(
|
||||
[df["n_index_fields"].rename("Number of fields in index")],
|
||||
axis=1,
|
||||
),
|
||||
),
|
||||
config.QsNodeCalibrationConfig(type="FETCH"),
|
||||
config.QsNodeCalibrationConfig(
|
||||
type="AND_HASH",
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ class ExecutionStats:
|
|||
# Technically superfluous, because it's len(n_processed_per_child), but improves readability
|
||||
n_children: int
|
||||
seeks: Optional[int]
|
||||
n_index_fields: Optional[int]
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@ class Node:
|
|||
n_processed: int
|
||||
seeks: Optional[int]
|
||||
children: list[Node]
|
||||
n_index_fields: Optional[int]
|
||||
|
||||
def get_execution_time(self):
|
||||
"""Execution time of this node without execution time of its children"""
|
||||
|
|
@ -54,7 +55,7 @@ class Node:
|
|||
def print(self, level=0):
|
||||
"""Pretty print the execution tree"""
|
||||
print(
|
||||
f'{"| " * level}{self.stage}, totalExecutionTime: {self.execution_time_nanoseconds:,}ns, seeks: {self.seeks}, nReturned: {self.n_returned}, nProcessed: {self.n_processed}'
|
||||
f'{"| " * level}{self.stage}, totalExecutionTime: {self.execution_time_nanoseconds:,}ns, seeks: {self.seeks}, nReturned: {self.n_returned}, nProcessed: {self.n_processed}, nIndexFields: {self.n_index_fields}'
|
||||
)
|
||||
for child in self.children:
|
||||
child.print(level + 1)
|
||||
|
|
@ -166,4 +167,5 @@ def get_common_fields(json_stage: dict[str, Any]) -> dict[str, Any]:
|
|||
"execution_time_nanoseconds": json_stage["executionTimeNanos"],
|
||||
"n_returned": json_stage["nReturned"],
|
||||
"seeks": json_stage.get("seeks"),
|
||||
"n_index_fields": len(json_stage.get("keyPattern")) if "keyPattern" in json_stage else None,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -101,6 +101,8 @@ def get_execution_stats(
|
|||
n_children=len(enode.children),
|
||||
# Seeks will be None for any node but IXSCAN.
|
||||
seeks=enode.seeks,
|
||||
# n_index_fields will be None for any node but IXSCAN.
|
||||
n_index_fields=enode.n_index_fields,
|
||||
)
|
||||
)
|
||||
return result
|
||||
|
|
|
|||
|
|
@ -106,6 +106,7 @@ class ParametersBuilderClassic:
|
|||
"keys_length_in_bytes",
|
||||
"average_document_size_in_bytes",
|
||||
"number_of_fields",
|
||||
"n_index_fields",
|
||||
],
|
||||
)
|
||||
|
||||
|
|
@ -122,6 +123,7 @@ class ParametersBuilderClassic:
|
|||
execution_time=node.execution_time_nanoseconds,
|
||||
n_processed=node.n_processed,
|
||||
seeks=node.seeks,
|
||||
n_index_fields=node.n_index_fields,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
|
|
@ -131,12 +133,14 @@ class ParametersBuilderClassic:
|
|||
execution_time: int = None,
|
||||
n_processed: int = None,
|
||||
seeks: int = None,
|
||||
n_index_fields=None,
|
||||
):
|
||||
return [
|
||||
stage,
|
||||
execution_time,
|
||||
n_processed,
|
||||
seeks,
|
||||
n_index_fields,
|
||||
params.note,
|
||||
params.keys_length_in_bytes,
|
||||
params.average_document_size_in_bytes,
|
||||
|
|
|
|||
|
|
@ -347,6 +347,28 @@ async def execute_fetches(database: DatabaseInstance, collections: Sequence[Coll
|
|||
)
|
||||
|
||||
|
||||
async def execute_index_scans_w_diff_num_fields(
|
||||
database: DatabaseInstance, collections: Sequence[CollectionInfo]
|
||||
):
|
||||
collection = [c for c in collections if c.name.startswith("index_scan_diff_num_fields")][0]
|
||||
requests = []
|
||||
|
||||
# The compound_indexes list does not contain the single-field index {a: 1}.
|
||||
for index in ["a"] + collection.compound_indexes:
|
||||
hint_obj = {key: 1 for key in index}
|
||||
|
||||
requests.append(
|
||||
Query(
|
||||
{"filter": {"a": {"$lt": 10000}}, "hint": hint_obj},
|
||||
note="IXSCANS_W_DIFF_NUM_FIELDS",
|
||||
)
|
||||
)
|
||||
|
||||
await workload_execution.execute(
|
||||
database, main_config.workload_execution, [collection], requests
|
||||
)
|
||||
|
||||
|
||||
async def main():
|
||||
"""Entry point function."""
|
||||
script_directory = os.path.abspath(os.path.dirname(__file__))
|
||||
|
|
@ -372,6 +394,7 @@ async def main():
|
|||
execute_sort_intersections,
|
||||
execute_hash_intersections,
|
||||
execute_fetches,
|
||||
execute_index_scans_w_diff_num_fields,
|
||||
]
|
||||
for execute_query in execution_query_functions:
|
||||
await execute_query(database, generator.collection_infos)
|
||||
|
|
|
|||
Loading…
Reference in New Issue