mongo/jstests/aggregation/accumulators/accumulator_js.js

379 lines
12 KiB
JavaScript

// Test the behavior of user-defined (Javascript) accumulators.
//
// @tags: [
// requires_scripting,
// ]
import {resultsEq} from "jstests/aggregation/extras/utils.js";
db.accumulator_js.drop();
for (const word of ["hello", "world", "world", "hello", "hi"]) {
db.accumulator_js.insert({word: word, val: 1});
}
const command = {
aggregate: "accumulator_js",
cursor: {},
pipeline: [
{
$group: {
_id: "$word",
wordCount: {
$accumulator: {
init: function () {
return 0;
},
accumulateArgs: ["$val"],
accumulate: function (state, val) {
return state + val;
},
merge: function (state1, state2) {
return state1 + state2;
},
finalize: function (state) {
return state;
},
},
},
},
},
],
};
let expectedResults = [
{_id: "hello", wordCount: 2},
{_id: "world", wordCount: 2},
{_id: "hi", wordCount: 1},
];
let res = assert.commandWorked(db.runCommand(command));
assert(resultsEq(res.cursor.firstBatch, expectedResults), res.cursor);
// Test that the functions can be passed as strings.
{
const accumulatorSpec = command.pipeline[0].$group.wordCount.$accumulator;
accumulatorSpec.init = accumulatorSpec.init.toString();
accumulatorSpec.accumulate = accumulatorSpec.accumulate.toString();
accumulatorSpec.merge = accumulatorSpec.merge.toString();
accumulatorSpec.finalize = accumulatorSpec.finalize.toString();
}
res = assert.commandWorked(db.runCommand(command));
assert(resultsEq(res.cursor.firstBatch, expectedResults), res.cursor);
// Test that finalize is optional.
delete command.pipeline[0].$group.wordCount.$accumulator.finalize;
res = assert.commandWorked(db.runCommand(command));
assert(resultsEq(res.cursor.firstBatch, expectedResults), res.cursor);
// Test a finalizer other than the identity function. Finalizers are useful when the intermediate
// state needs to be a different format from the final result.
res = assert.commandWorked(
db.runCommand(
Object.merge(command, {
pipeline: [
{
$group: {
_id: 1,
avgWordLen: {
$accumulator: {
init: function () {
return {count: 0, sum: 0};
},
accumulateArgs: [{$strLenCP: "$word"}],
accumulate: function ({count, sum}, wordLen) {
return {count: count + 1, sum: sum + wordLen};
},
merge: function (s1, s2) {
return {count: s1.count + s2.count, sum: s1.sum + s2.sum};
},
finalize: function ({count, sum}) {
return sum / count;
},
lang: "js",
},
},
},
},
],
}),
),
);
assert(resultsEq(res.cursor.firstBatch, [{_id: 1, avgWordLen: 22 / 5}]), res.cursor);
// Test that a null word is considered a valid value.
assert.commandWorked(db.accumulator_js.insert({word: null, val: 1}));
expectedResults.push({_id: null, wordCount: 1});
res = assert.commandWorked(db.runCommand(command));
assert(resultsEq(res.cursor.firstBatch, expectedResults), res.cursor);
// Test that missing fields become JS null.
// This is similar to how most other agg operators work.
assert(db.accumulator_js.drop());
assert.commandWorked(db.accumulator_js.insert({sentinel: 1}));
command.pipeline = [
{
$group: {
_id: 1,
value: {
$accumulator: {
init: function () {
return [];
},
accumulateArgs: ["$no_such_field"],
accumulate: function (state, value) {
return state.concat([value]);
},
merge: function (s1, s2) {
return s1.concat(s2);
},
lang: "js",
},
},
},
},
];
res = assert.commandWorked(db.runCommand(command));
assert(resultsEq(res.cursor.firstBatch, [{_id: 1, value: [null]}]), res.cursor);
// Test that initArgs must evaluate to an array.
command.pipeline = [
{
$group: {
_id: 1,
value: {
$accumulator: {
init: function () {},
initArgs: {$const: 5},
accumulateArgs: [],
accumulate: function () {},
merge: function () {},
lang: "js",
},
},
},
},
];
assert.commandFailedWithCode(db.runCommand(command), 4544711);
// Test that initArgs is passed to init.
command.pipeline = [
{
$group: {
_id: 1,
value: {
$accumulator: {
init: function (str1, str2) {
return "initial_state_set_from_" + str1 + "_and_" + str2;
},
initArgs: ["ABC", "DEF"],
accumulateArgs: [],
accumulate: function (state) {
return state;
},
merge: function (s1, s2) {
return s1;
},
lang: "js",
},
},
},
},
];
res = assert.commandWorked(db.runCommand(command));
assert(resultsEq(res.cursor.firstBatch, [{_id: 1, value: "initial_state_set_from_ABC_and_DEF"}]), res.cursor);
// Test that when initArgs errors, we fail gracefully, and don't call init.
command.pipeline = [
{
$group: {
_id: 1,
value: {
$accumulator: {
init: function () {
throw "init should not be called";
},
// Use $cond to thwart constant folding, to ensure we are testing evaluate rather
// than optimize.
initArgs: {$add: {$cond: ["$foo", "", ""]}},
accumulateArgs: [],
accumulate: function () {
throw "accumulate should not be called";
},
merge: function () {
throw "merge should not be called";
},
lang: "js",
},
},
},
},
];
// ErrorCodes.TypeMismatch means "$add only supports numeric or date types". Code 16554 represented
// a type mismatch before 6.1 for this specific check.
assert.commandFailedWithCode(db.runCommand(command), [16554, ErrorCodes.TypeMismatch]);
// Test that initArgs can have a different length per group.
assert(db.accumulator_js.drop());
assert.commandWorked(
db.accumulator_js.insert([
{_id: 1, a: ["A", "B", "C"]},
{_id: 2, a: ["A", "B", "C"]},
{_id: 3, a: ["X", "Y"]},
{_id: 4, a: ["X", "Y"]},
]),
);
command.pipeline = [
{
$group: {
_id: {a: "$a"},
value: {
$accumulator: {
init: function (...args) {
return args.toString();
},
initArgs: "$a",
accumulateArgs: [],
accumulate: function (state) {
return state;
},
merge: function (s1, s2) {
return s1;
},
lang: "js",
},
},
},
},
];
res = assert.commandWorked(db.runCommand(command));
assert(
resultsEq(res.cursor.firstBatch, [
{_id: {a: ["A", "B", "C"]}, value: "A,B,C"},
{_id: {a: ["X", "Y"]}, value: "X,Y"},
]),
res.cursor,
);
// Test that accumulateArgs must evaluate to an array.
command.pipeline = [
{
$group: {
_id: 1,
value: {
$accumulator: {
init: function () {},
accumulateArgs: {$const: 5},
accumulate: function (state, value) {},
merge: function (s1, s2) {},
lang: "js",
},
},
},
},
];
assert.commandFailedWithCode(db.runCommand(command), 4544712);
// Test that accumulateArgs can have more than one element.
command.pipeline = [
{
$group: {
_id: 1,
value: {
$accumulator: {
init: function () {},
accumulateArgs: ["ABC", "DEF"],
accumulate: function (state, str1, str2) {
return str1 + str2;
},
merge: function (s1, s2) {
return s1 || s2;
},
lang: "js",
},
},
},
},
];
res = assert.commandWorked(db.runCommand(command));
expectedResults = [{_id: 1, value: "ABCDEF"}];
assert(resultsEq(res.cursor.firstBatch, expectedResults), res.cursor);
// Test that accumulateArgs can have a different length per document.
command.pipeline = [
{
$group: {
_id: 1,
value: {
$accumulator: {
init: function () {
return [];
},
accumulateArgs: "$a",
accumulate: function (state, ...values) {
state.push(values);
state.sort();
return state;
},
merge: function (s1, s2) {
return s1.concat(s2);
},
lang: "js",
},
},
},
},
];
res = assert.commandWorked(db.runCommand(command));
expectedResults = [
{
_id: 1,
value: [
["A", "B", "C"],
["A", "B", "C"],
["X", "Y"],
["X", "Y"],
],
},
];
assert(resultsEq(res.cursor.firstBatch, expectedResults), res.cursor);
// Test that accumulateArgs can contain expressions that evaluate to null or missing.
// The behavior is the same as ExpressionArray: arrays can't contain missing, so any expressions
// that evaluate to missing get converted to null. Then, the nulls get serialized to BSON and passed
// to JS as usual.
assert(db.accumulator_js.drop());
assert.commandWorked(db.accumulator_js.insert({}));
command.pipeline = [
{
$group: {
_id: 1,
value: {
$accumulator: {
init: function () {
return null;
},
accumulateArgs: [
null,
"$no_such_field",
{$let: {vars: {not_an_object: 5}, in: "$not_an_object.field"}},
],
accumulate: function (state, ...values) {
return {
len: values.length,
types: values.map((v) => typeof v),
values: values,
};
},
merge: function (s1, s2) {
return s1 || s2;
},
lang: "js",
},
},
},
},
];
res = assert.commandWorked(db.runCommand(command));
expectedResults = [{_id: 1, value: {len: 3, types: ["object", "object", "object"], values: [null, null, null]}}];
assert(resultsEq(res.cursor.firstBatch, expectedResults), res.cursor);