mongo/jstests/libs/query/group_to_distinct_scan_util...

218 lines
8.1 KiB
JavaScript

/**
* Common utility functions variables for $group to DISTINCT_SCAN optimization.
*/
import {FeatureFlagUtil} from "jstests/libs/feature_flag_util.js";
import {getAggPlanStages, getQueryPlanners} from "jstests/libs/query/analyze_plan.js";
export let coll;
// Add test data and indexes. Fields prefixed with "mk" are multikey.
export let indexList = [
{pattern: {a: 1, b: 1, c: 1}, option: {}},
{pattern: {mkA: 1, b: 1, c: 1}, option: {}},
{pattern: {aa: 1, mkB: 1, c: 1}, option: {}},
{pattern: {aa: 1, bb: 1, c: 1}, option: {}},
{pattern: {"foo.a": 1, "foo.b": 1}, option: {}},
{pattern: {"mkFoo.a": 1, "mkFoo.b": 1}, option: {}},
{pattern: {"foo.a": 1, "mkFoo.b": 1}, option: {}},
];
export function createIndexes() {
for (const indexSpec of indexList) {
assert.commandWorked(coll.createIndex(indexSpec.pattern, indexSpec.option));
}
}
export const documents = [
{_id: 0, a: 1, b: 1, c: 1},
{_id: 1, a: 1, b: 2, c: 2},
{_id: 2, a: 1, b: 2, c: 3},
{_id: 3, a: 1, b: 3, c: 2},
{_id: 4, a: 2, b: 2, c: 2},
{_id: 5, b: 1, c: 1},
{_id: 6, a: null, b: 1, c: 1.5},
{_id: 7, aa: 1, mkB: 2, bb: 2},
{_id: 8, aa: 1, mkB: [1, 3], bb: 1},
{_id: 9, aa: 2, mkB: [], bb: 3},
{_id: 10, mkA: 1, c: 3},
{_id: 11, mkA: [2, 3, 4], c: 3},
{_id: 12, mkA: 2, c: 2},
{_id: 13, mkA: 3, c: 4},
{_id: 14, foo: {a: 1, b: 1}, mkFoo: {a: 1, b: 1}},
{_id: 15, foo: {a: 1, b: 2}, mkFoo: {a: 1, b: 2}},
{_id: 16, foo: {a: 2, b: 2}, mkFoo: {a: 2, b: 2}},
{_id: 17, foo: {b: 1}, mkFoo: {b: 1}},
{_id: 18, foo: {a: null, b: 1}, mkFoo: {a: null, b: 1}},
{
_id: 19,
foo: {a: 3},
mkFoo: [
{a: 3, b: 4},
{a: 4, b: 3},
],
},
{_id: 20, str: "foo", d: 1},
{_id: 21, str: "FoO", d: 2},
{_id: 22, str: "bar", d: 4},
{_id: 23, str: "bAr", d: 3},
];
// Helper for dropping an index and removing it from the list of indexes.
export function removeIndex(pattern) {
assert.commandWorked(coll.dropIndex(pattern));
indexList = indexList.filter((ix) => bsonWoCompare(ix.pattern, pattern) != 0);
}
export function addIndex(pattern, option) {
indexList.push({pattern: pattern, option: option});
assert.commandWorked(coll.createIndex(pattern, option));
}
// Prepare the 'coll' collection for testing, inserting documents and creating indexes.
export function prepareCollection(database = null) {
database = database || db;
coll = database[jsTestName()];
assert(coll.drop());
createIndexes();
assert.commandWorked(coll.insert(documents));
}
// Shard the 'coll' collection and insert orphans to the primary shard and one non-primary shard.
// Assumes 'st' has been set up with at least two shards.
export function prepareShardedCollectionWithOrphans(st) {
const db = st.getDB("test");
const primaryShard = st.shard0.shardName;
const otherShard = st.shard1.shardName;
assert.commandWorked(st.s.adminCommand({enableSharding: db.getName(), primaryShard}));
prepareCollection(db);
// Shard the collection and move all docs where 'a' >= 2 to the non-primary shard.
assert.commandWorked(st.s.adminCommand({shardCollection: coll.getFullName(), key: {a: 1}}));
assert.commandWorked(st.s.adminCommand({split: coll.getFullName(), middle: {a: 2}}));
assert.commandWorked(st.s.adminCommand({moveChunk: coll.getFullName(), find: {a: 2}, to: otherShard}));
// Insert orphans to both shards. Both shards must include multikey values in order to not break
// sharded passthrough tests which rely on the assumption that multikey indexes are indeed
// multikey on both shards.
const primaryShardOrphanDocs = [
{
a: 2.1,
b: "orphan",
c: "orphan",
mkA: ["orphan"],
mkB: ["orphan"],
mkFoo: [{a: "orphan"}],
},
{a: 2.2, b: "orphan", c: "orphan", mkA: ["orphan"], mkB: ["orphan"], mkFoo: ["orphan"]},
{a: 2.3, b: "orphan", c: "orphan", mkA: ["orphan"], mkB: ["orphan"], mkFoo: ["orphan"]},
{a: 999.1, b: "orphan", c: "orphan", mkA: ["orphan"], mkB: ["orphan"], mkFoo: ["orphan"]},
];
const otherShardOrphanDocs = [
{
a: 0.1,
b: "orphan",
c: "orphan",
mkA: ["orphan"],
mkB: ["orphan"],
mkFoo: [{a: "orphan"}],
},
{a: 1.1, b: "orphan", c: "orphan", mkA: ["orphan"], mkB: ["orphan"], mkFoo: ["orphan"]},
{a: 1.2, b: "orphan", c: "orphan", mkA: ["orphan"], mkB: ["orphan"], mkFoo: ["orphan"]},
{a: 1.3, b: "orphan", c: "orphan", mkA: ["orphan"], mkB: ["orphan"], mkFoo: ["orphan"]},
];
assert.commandWorked(st.shard0.getCollection(coll.getFullName()).insert(primaryShardOrphanDocs));
assert.commandWorked(st.shard1.getCollection(coll.getFullName()).insert(otherShardOrphanDocs));
return db;
}
// Check that 'pipeline' returns the correct results with and without a hint added to the query.
// We also test with and without indices to check all the possibilities. 'options' is the
// options to pass to aggregate() and may be omitted. Similarly, the hint object can be omitted
// and will default to a $natural hint.
export function assertResultsMatchWithAndWithoutHintandIndexes(
pipeline,
expectedResults,
hintObj = {
$natural: 1,
},
options = {},
) {
assert.commandWorked(coll.dropIndexes());
const resultsNoIndex = coll.aggregate(pipeline, options).toArray();
createIndexes();
const resultsWithIndex = coll.aggregate(pipeline, options).toArray();
const passedOptions = Object.assign({}, {hint: hintObj}, options);
const resultsWithHint = coll.aggregate(pipeline, passedOptions).toArray();
assert.sameMembers(resultsNoIndex, resultsWithIndex, "no index != with index");
assert.sameMembers(resultsWithIndex, resultsWithHint, "with index != with hint");
assert.sameMembers(resultsWithHint, expectedResults, "with hint != expected");
}
export function assertPlanUsesDistinctScan(testDB, explain, keyPattern, shouldFetch) {
const distinctScanStages = getAggPlanStages(explain, "DISTINCT_SCAN");
assert.neq(0, distinctScanStages.length, explain);
const distinctScan = distinctScanStages[0];
if (keyPattern) {
assert.eq(keyPattern, distinctScan.keyPattern, explain);
}
// Pipelines that use the DISTINCT_SCAN optimization should not also have a blocking sort.
assert.eq(0, getAggPlanStages(explain, "SORT").length, explain);
if (shouldFetch) {
// Check that FETCH is pushed into DISTINCT_SCAN iff featureFlagShardFilteringDistinctScan
// is enabled.
if (!FeatureFlagUtil.isEnabled(testDB, "ShardFilteringDistinctScan")) {
assert(getAggPlanStages(explain, "FETCH").length > 0);
} else {
assert(distinctScan.isFetching);
}
}
}
export function assertPlanDoesNotUseDistinctScan(explain) {
assert.eq(0, getAggPlanStages(explain, "DISTINCT_SCAN").length, explain);
}
export function assertPlanUsesIndexScan(explain, keyPattern) {
assertPlanDoesNotUseDistinctScan(explain);
assert.neq(0, getAggPlanStages(explain, "IXSCAN").length, explain);
assert.eq(keyPattern, getAggPlanStages(explain, "IXSCAN")[0].keyPattern);
}
export function assertPlanUsesCollScan(explain) {
assertPlanDoesNotUseDistinctScan(explain);
assert.eq(0, getAggPlanStages(explain, "IXSCAN").length, explain);
assert.neq(0, getAggPlanStages(explain, "COLLSCAN").length, explain);
}
export function assertPipelineResultsAndExplain({
pipeline,
options = {},
hint = undefined,
expectsIndexFilter = false,
expectedOutput,
validateExplain,
}) {
assertResultsMatchWithAndWithoutHintandIndexes(pipeline, expectedOutput, hint, options);
const passedOptions = hint ? Object.assign({}, {hint}, options) : options;
const explain = coll.explain().aggregate(pipeline, passedOptions);
validateExplain(explain);
if (expectsIndexFilter) {
for (const queryPlanner of getQueryPlanners(explain)) {
assert.eq(true, queryPlanner.indexFilterSet, queryPlanner);
}
}
}