mirror of https://github.com/mongodb/mongo
SERVER-113632 Create workload for incremental filter leaf cost (#43737)
GitOrigin-RevId: ae12546684c5ca54b5fa99be0847d787f989b90e
This commit is contained in:
parent
12e61864bf
commit
3ebe2a40b9
|
|
@ -179,17 +179,23 @@ def create_coll_scan_collection_template(
|
|||
),
|
||||
indexed=True,
|
||||
),
|
||||
config.FieldTemplate(
|
||||
name="int_uniform_unindexed",
|
||||
data_type=config.DataType.INTEGER,
|
||||
distribution=RandomDistribution.uniform(RangeGenerator(DataType.INTEGER, 1, 2)),
|
||||
indexed=False,
|
||||
),
|
||||
],
|
||||
compound_indexes=[],
|
||||
cardinalities=cardinalities,
|
||||
)
|
||||
|
||||
# 10 more unindexed fields whose value is always 1.
|
||||
filter_fields = [
|
||||
config.FieldTemplate(
|
||||
name=f"int_uniform_unindexed_{i}",
|
||||
data_type=config.DataType.INTEGER,
|
||||
distribution=RandomDistribution.uniform(RangeGenerator(DataType.INTEGER, 1, 2)),
|
||||
indexed=False,
|
||||
)
|
||||
for i in range(10)
|
||||
]
|
||||
template.fields.extend(filter_fields)
|
||||
|
||||
if payload_size > 0:
|
||||
payload_distr = random_strings_distr(payload_size, 1000)
|
||||
template.fields.append(
|
||||
|
|
@ -442,6 +448,30 @@ qsn_nodes = [
|
|||
axis=1,
|
||||
),
|
||||
),
|
||||
config.QsNodeCalibrationConfig(
|
||||
name="FETCH_W_FILTERS_W_DIFF_NUM_LEAVES",
|
||||
type="FETCH",
|
||||
variables_override=lambda df: pd.concat(
|
||||
[df["n_top_level_and_children"].rename("Number of filters")],
|
||||
axis=1,
|
||||
),
|
||||
),
|
||||
config.QsNodeCalibrationConfig(
|
||||
name="COLLSCAN_W_FILTERS_W_DIFF_NUM_LEAVES",
|
||||
type="COLLSCAN",
|
||||
variables_override=lambda df: pd.concat(
|
||||
[df["n_top_level_and_children"].rename("Number of filters")],
|
||||
axis=1,
|
||||
),
|
||||
),
|
||||
config.QsNodeCalibrationConfig(
|
||||
name="IXSCAN_W_FILTERS_W_DIFF_NUM_LEAVES",
|
||||
type="IXSCAN",
|
||||
variables_override=lambda df: pd.concat(
|
||||
[df["n_top_level_and_children"].rename("Number of filters")],
|
||||
axis=1,
|
||||
),
|
||||
),
|
||||
config.QsNodeCalibrationConfig(
|
||||
name="IXSCAN_W_FILTER",
|
||||
type="IXSCAN",
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@ class ExecutionStats:
|
|||
n_children: int
|
||||
seeks: Optional[int]
|
||||
n_index_fields: Optional[int]
|
||||
n_top_level_and_children: Optional[int]
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ class Node:
|
|||
seeks: Optional[int]
|
||||
children: list[Node]
|
||||
n_index_fields: Optional[int]
|
||||
n_top_level_and_children: Optional[int]
|
||||
|
||||
def get_execution_time(self):
|
||||
"""Execution time of this node without execution time of its children"""
|
||||
|
|
@ -55,7 +56,7 @@ class Node:
|
|||
def print(self, level=0):
|
||||
"""Pretty print the execution tree"""
|
||||
print(
|
||||
f'{"| " * level}{self.stage}, totalExecutionTime: {self.execution_time_nanoseconds:,}ns, seeks: {self.seeks}, nReturned: {self.n_returned}, nProcessed: {self.n_processed}, nIndexFields: {self.n_index_fields}'
|
||||
f'{"| " * level}{self.stage}, totalExecutionTime: {self.execution_time_nanoseconds:,}ns, seeks: {self.seeks}, nReturned: {self.n_returned}, nProcessed: {self.n_processed}, nIndexFields: {self.n_index_fields}, nTopLevelAndChildren: {self.n_top_level_and_children}'
|
||||
)
|
||||
for child in self.children:
|
||||
child.print(level + 1)
|
||||
|
|
@ -168,4 +169,9 @@ def get_common_fields(json_stage: dict[str, Any]) -> dict[str, Any]:
|
|||
"n_returned": json_stage["nReturned"],
|
||||
"seeks": json_stage.get("seeks"),
|
||||
"n_index_fields": len(json_stage.get("keyPattern")) if "keyPattern" in json_stage else None,
|
||||
"n_top_level_and_children": (
|
||||
len(json_stage.get("filter").get("$and")) if "$and" in json_stage.get("filter") else 1
|
||||
)
|
||||
if "filter" in json_stage
|
||||
else None,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -105,6 +105,7 @@ def get_execution_stats(
|
|||
seeks=enode.seeks,
|
||||
# n_index_fields will be None for any node but IXSCAN.
|
||||
n_index_fields=enode.n_index_fields,
|
||||
n_top_level_and_children=enode.n_top_level_and_children,
|
||||
)
|
||||
)
|
||||
return result
|
||||
|
|
|
|||
|
|
@ -101,6 +101,7 @@ class ParametersBuilderClassic:
|
|||
"average_document_size_in_bytes",
|
||||
"number_of_fields",
|
||||
"n_index_fields",
|
||||
"n_top_level_and_children",
|
||||
],
|
||||
)
|
||||
|
||||
|
|
@ -118,6 +119,7 @@ class ParametersBuilderClassic:
|
|||
n_processed=node.n_processed,
|
||||
seeks=node.seeks,
|
||||
n_index_fields=node.n_index_fields,
|
||||
n_top_level_and_children=node.n_top_level_and_children,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
|
|
@ -128,6 +130,7 @@ class ParametersBuilderClassic:
|
|||
n_processed: int = None,
|
||||
seeks: int = None,
|
||||
n_index_fields=None,
|
||||
n_top_level_and_children=None,
|
||||
):
|
||||
return [
|
||||
stage,
|
||||
|
|
@ -135,6 +138,7 @@ class ParametersBuilderClassic:
|
|||
n_processed,
|
||||
seeks,
|
||||
n_index_fields,
|
||||
n_top_level_and_children,
|
||||
params.note,
|
||||
params.keys_length_in_bytes,
|
||||
params.average_document_size_in_bytes,
|
||||
|
|
|
|||
|
|
@ -179,7 +179,7 @@ async def execute_collection_scans(
|
|||
Query(
|
||||
{
|
||||
"limit": limit,
|
||||
"filter": {"int_uniform_unindexed": {"$gt": 0, "$lt": 2}},
|
||||
"filter": {"int_uniform_unindexed_0": {"$gt": 0}},
|
||||
"sort": {"$natural": direction},
|
||||
},
|
||||
note="COLLSCAN_W_FILTER",
|
||||
|
|
@ -188,8 +188,7 @@ async def execute_collection_scans(
|
|||
"direction": dir_text.lower(),
|
||||
"filter": {
|
||||
"$and": [
|
||||
{"int_uniform_unindexed": {"$lt": 2}},
|
||||
{"int_uniform_unindexed": {"$gt": 0}},
|
||||
{"int_uniform_unindexed_0": {"$gt": 0}},
|
||||
]
|
||||
},
|
||||
}
|
||||
|
|
@ -466,11 +465,11 @@ async def execute_fetches(database: DatabaseInstance, collections: Sequence[Coll
|
|||
|
||||
requests.append(
|
||||
Query(
|
||||
# 'int_uniform_unindexed' is not indexed, so the fetch will have a filter.
|
||||
# 'int_uniform_unindexed_0' is not indexed, so the fetch will have a filter.
|
||||
{
|
||||
"filter": {
|
||||
"int_uniform": {"$lt": card},
|
||||
"int_uniform_unindexed": {"$gt": 0, "$lt": 2},
|
||||
"int_uniform_unindexed_0": {"$gt": 0},
|
||||
}
|
||||
},
|
||||
note="FETCH_W_FILTER",
|
||||
|
|
@ -478,8 +477,7 @@ async def execute_fetches(database: DatabaseInstance, collections: Sequence[Coll
|
|||
"FETCH": {
|
||||
"filter": {
|
||||
"$and": [
|
||||
{"int_uniform_unindexed": {"$lt": 2}},
|
||||
{"int_uniform_unindexed": {"$gt": 0}},
|
||||
{"int_uniform_unindexed_0": {"$gt": 0}},
|
||||
]
|
||||
}
|
||||
}
|
||||
|
|
@ -517,6 +515,121 @@ async def execute_index_scans_w_diff_num_fields(
|
|||
)
|
||||
|
||||
|
||||
async def execute_fetch_w_filters_w_diff_num_leaves(
|
||||
database: DatabaseInstance, collections: Sequence[CollectionInfo]
|
||||
):
|
||||
collections = [c for c in collections if c.name == "doc_scan_100000"]
|
||||
assert len(collections) == 1
|
||||
|
||||
requests = []
|
||||
|
||||
unindexed_fields = [field.name for field in collections[0].fields if "unindexed" in field.name]
|
||||
assert len(unindexed_fields) == 10
|
||||
|
||||
for fields_w_preds in [unindexed_fields[:i] for i in range(1, len(unindexed_fields) + 1)]:
|
||||
# We build up queries of the shape
|
||||
# {'int_uniform_unindexed_0': {'$gt': 0}, 'int_uniform': {'$lt': 50000}}},
|
||||
# {'int_uniform_unindexed_0': {'$gt': 0}, 'int_uniform_unindexed_1': {'$gt': 0}, 'int_uniform': {'$lt': 50000}}}
|
||||
# and so on, until we have all 10 unindexed fields in the filter.
|
||||
filter = {f: {"$gt": 0} for f in fields_w_preds}
|
||||
filter["int_uniform"] = {"$lt": 50000}
|
||||
|
||||
requests.append(
|
||||
Query(
|
||||
{"filter": filter},
|
||||
note="FETCH_W_FILTERS_W_DIFF_NUM_LEAVES",
|
||||
expected_stage={
|
||||
"FETCH": {
|
||||
"filter": {fields_w_preds[0]: {"$gt": 0}}
|
||||
if len(fields_w_preds) == 1
|
||||
else {"$and": [{k: v} for k, v in filter.items() if k != "int_uniform"]}
|
||||
}
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
await workload_execution.execute(
|
||||
database, main_config.workload_execution, collections, requests
|
||||
)
|
||||
|
||||
|
||||
async def execute_collscan_w_filters_w_diff_num_leaves(
|
||||
database: DatabaseInstance, collections: Sequence[CollectionInfo]
|
||||
):
|
||||
collections = [c for c in collections if c.name == "doc_scan_100000"]
|
||||
assert len(collections) == 1
|
||||
|
||||
requests = []
|
||||
|
||||
unindexed_fields = [field.name for field in collections[0].fields if "unindexed" in field.name]
|
||||
assert len(unindexed_fields) == 10
|
||||
|
||||
for fields_w_preds in [unindexed_fields[:i] for i in range(1, len(unindexed_fields) + 1)]:
|
||||
# We build up queries of the shape
|
||||
# {'int_uniform_unindexed_0': {'$gt': 0}},
|
||||
# {'int_uniform_unindexed_0': {'$gt': 0}, 'int_uniform_unindexed_1': {'$gt': 0}}
|
||||
# and so on, until we have all 10 unindexed fields in the filter.
|
||||
filter = {f: {"$gt": 0} for f in fields_w_preds}
|
||||
|
||||
requests.append(
|
||||
Query(
|
||||
{"filter": filter, "sort": {"$natural": 1}, "limit": 50000},
|
||||
note="COLLSCAN_W_FILTERS_W_DIFF_NUM_LEAVES",
|
||||
expected_stage={
|
||||
"COLLSCAN": {
|
||||
"filter": {fields_w_preds[0]: {"$gt": 0}}
|
||||
if len(fields_w_preds) == 1
|
||||
else {"$and": [{k: v} for k, v in filter.items()]}
|
||||
}
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
await workload_execution.execute(
|
||||
database, main_config.workload_execution, collections, requests
|
||||
)
|
||||
|
||||
|
||||
async def execute_ixscan_w_filters_w_diff_num_leaves(
|
||||
database: DatabaseInstance, collections: Sequence[CollectionInfo]
|
||||
):
|
||||
collections = [c for c in collections if c.name == "index_scan_10000"]
|
||||
assert len(collections) == 1
|
||||
|
||||
requests = []
|
||||
|
||||
field_names = [chr(ord("a") + i) for i in range(10)]
|
||||
|
||||
# Note we do not include a filter that has only one leaf. We noticed that there is a
|
||||
# large jump between 1 and 2 leaves for the cost of an ixscan filter, so we omitted
|
||||
# it to get a better fit.
|
||||
for fields_w_preds in [field_names[:i] for i in range(2, len(field_names) + 1)]:
|
||||
# We build up queries of the shape
|
||||
# {'a': {"$mod": [1, 0]}, 'b': {"$mod": [1, 0]}},
|
||||
# {'a': {"$mod": [1, 0]}, 'b': {"$mod": [1, 0]}, 'c': {"$mod": [1, 0]}},
|
||||
# and so on, until we have all 10 fields in the filter.
|
||||
filter = {f: {"$mod": [1, 0]} for f in fields_w_preds}
|
||||
|
||||
requests.append(
|
||||
Query(
|
||||
# hint the compound index on {a: 1, b: 1, ... j: 1}
|
||||
{"filter": filter, "hint": {k: 1 for k in field_names}},
|
||||
note="IXSCAN_W_FILTERS_W_DIFF_NUM_LEAVES",
|
||||
expected_stage={
|
||||
"IXSCAN": {
|
||||
"filter": {fields_w_preds[0]: {"$mod": [1, 0]}}
|
||||
if len(fields_w_preds) == 1
|
||||
else {"$and": [{k: v} for k, v in filter.items()]}
|
||||
}
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
await workload_execution.execute(
|
||||
database, main_config.workload_execution, collections, requests
|
||||
)
|
||||
|
||||
|
||||
async def main():
|
||||
"""Entry point function."""
|
||||
script_directory = os.path.abspath(os.path.dirname(__file__))
|
||||
|
|
@ -543,6 +656,9 @@ async def main():
|
|||
execute_hash_intersections,
|
||||
execute_fetches,
|
||||
execute_index_scans_w_diff_num_fields,
|
||||
execute_fetch_w_filters_w_diff_num_leaves,
|
||||
execute_collscan_w_filters_w_diff_num_leaves,
|
||||
execute_ixscan_w_filters_w_diff_num_leaves,
|
||||
]
|
||||
for execute_query in execution_query_functions:
|
||||
await execute_query(database, generator.collection_infos)
|
||||
|
|
|
|||
Loading…
Reference in New Issue