mirror of https://github.com/mongodb/mongo
SERVER-107540 Implement $hash expression for XXH64 (#45257)
GitOrigin-RevId: ae0bc33c886cd35f32d973aa3f4e3c9017ed01bd
This commit is contained in:
parent
133badbf1e
commit
9a8289c145
|
|
@ -0,0 +1,83 @@
|
||||||
|
/**
|
||||||
|
* Test the $hash expression for generating hashes of string or binary inputs.
|
||||||
|
*
|
||||||
|
* @tags: [
|
||||||
|
* requires_fcv_83
|
||||||
|
* ]
|
||||||
|
*/
|
||||||
|
import {beforeEach, describe, it} from "jstests/libs/mochalite.js";
|
||||||
|
import {assertDropCollection} from "jstests/libs/collection_drop_recreate.js";
|
||||||
|
import {assertErrorCode} from "jstests/aggregation/extras/utils.js";
|
||||||
|
|
||||||
|
const collName = jsTestName();
|
||||||
|
const coll = db[collName];
|
||||||
|
|
||||||
|
const successTests = [
|
||||||
|
// Normal strings
|
||||||
|
{
|
||||||
|
expressionInput: {input: {$concat: ["Hello", " ", "World"]}, algorithm: "xxh64"},
|
||||||
|
expectedHash: BinData(0, "YzTSBxkkW8I="),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
expressionInput: {input: "[1, 2, 3]", algorithm: {$concat: ["xxh", "64"]}},
|
||||||
|
expectedHash: BinData(0, "wkwP8yqWE2o="),
|
||||||
|
},
|
||||||
|
{expressionInput: {input: "🧐🤓😎", algorithm: "xxh64"}, expectedHash: BinData(0, "tWWU4BmD+Z4=")},
|
||||||
|
|
||||||
|
// Empty
|
||||||
|
{expressionInput: {input: "", algorithm: "xxh64"}, expectedHash: BinData(0, "70bbN1HY6Zk=")},
|
||||||
|
{expressionInput: {input: BinData(0, ""), algorithm: "xxh64"}, expectedHash: BinData(0, "70bbN1HY6Zk=")},
|
||||||
|
|
||||||
|
// Binary input
|
||||||
|
{expressionInput: {input: BinData(0, "aGV5"), algorithm: "xxh64"}, expectedHash: BinData(0, "Wv5M8jFeEv4=")},
|
||||||
|
{expressionInput: {input: BinData(4, "aGV5"), algorithm: "xxh64"}, expectedHash: BinData(0, "Wv5M8jFeEv4=")},
|
||||||
|
|
||||||
|
// Nested
|
||||||
|
{
|
||||||
|
expressionInput: {input: {$hash: {input: "hey", algorithm: "xxh64"}}, algorithm: "xxh64"},
|
||||||
|
expectedHash: BinData(0, "B+qrIpasjj4="),
|
||||||
|
},
|
||||||
|
|
||||||
|
// Nullish input
|
||||||
|
{expressionInput: {input: null, algorithm: "xxh64"}, expectedHash: null},
|
||||||
|
{expressionInput: {input: undefined, algorithm: "xxh64"}, expectedHash: null},
|
||||||
|
{expressionInput: {input: "$missing", algorithm: "xxh64"}, expectedHash: null},
|
||||||
|
];
|
||||||
|
|
||||||
|
const failureTests = [
|
||||||
|
{expressionInput: {}, expectedCode: ErrorCodes.FailedToParse},
|
||||||
|
{expressionInput: {input: "string"}, expectedCode: ErrorCodes.FailedToParse},
|
||||||
|
{expressionInput: {algorithm: "xxh64"}, expectedCode: ErrorCodes.FailedToParse},
|
||||||
|
{expressionInput: {input: "string", algorithm: "xxh64", extra: 1}, expectedCode: ErrorCodes.FailedToParse},
|
||||||
|
{expressionInput: {input: [1, 2, 3], algorithm: "xxh64"}, expectedCode: 10754000},
|
||||||
|
{expressionInput: {input: "string", algorithm: [5]}, expectedCode: 10754001},
|
||||||
|
{expressionInput: {input: "string", algorithm: null}, expectedCode: 10754001},
|
||||||
|
{expressionInput: {input: "string", algorithm: "sha1"}, expectedCode: 10754002},
|
||||||
|
{expressionInput: {input: "string", algorithm: "XXH64"}, expectedCode: 10754002},
|
||||||
|
|
||||||
|
{expressionInput: {input: "string", algorithm: "md5"}, expectedCode: ErrorCodes.NotImplemented},
|
||||||
|
{expressionInput: {input: "string", algorithm: "sha256"}, expectedCode: ErrorCodes.NotImplemented},
|
||||||
|
];
|
||||||
|
|
||||||
|
describe("$hash", () => {
|
||||||
|
beforeEach(() => {
|
||||||
|
assertDropCollection(db, collName);
|
||||||
|
assert.commandWorked(coll.insert({_id: 0}));
|
||||||
|
});
|
||||||
|
|
||||||
|
it("works with various inputs", () => {
|
||||||
|
for (const {expressionInput, expectedHash} of successTests) {
|
||||||
|
const actualHash = coll.aggregate([{$project: {hash: {$hash: expressionInput}}}]).toArray()[0].hash;
|
||||||
|
assert(
|
||||||
|
bsonBinaryEqual(actualHash, expectedHash),
|
||||||
|
`input=${tojson(expressionInput)}, expected=${expectedHash}, actual=${actualHash}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it("fails for various inputs", () => {
|
||||||
|
for (const {expressionInput, expectedCode} of failureTests) {
|
||||||
|
assertErrorCode(coll, [{$project: {hash: {$hash: expressionInput}}}], expectedCode);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
@ -1678,6 +1678,7 @@ mongo_cc_library(
|
||||||
"//src/mongo/db/exec/expression:evaluate_date.cpp",
|
"//src/mongo/db/exec/expression:evaluate_date.cpp",
|
||||||
"//src/mongo/db/exec/expression:evaluate_find_internal.cpp",
|
"//src/mongo/db/exec/expression:evaluate_find_internal.cpp",
|
||||||
"//src/mongo/db/exec/expression:evaluate_fle.cpp",
|
"//src/mongo/db/exec/expression:evaluate_fle.cpp",
|
||||||
|
"//src/mongo/db/exec/expression:evaluate_hash.cpp",
|
||||||
"//src/mongo/db/exec/expression:evaluate_index.cpp",
|
"//src/mongo/db/exec/expression:evaluate_index.cpp",
|
||||||
"//src/mongo/db/exec/expression:evaluate_javascript.cpp",
|
"//src/mongo/db/exec/expression:evaluate_javascript.cpp",
|
||||||
"//src/mongo/db/exec/expression:evaluate_logical.cpp",
|
"//src/mongo/db/exec/expression:evaluate_logical.cpp",
|
||||||
|
|
@ -1788,6 +1789,7 @@ mongo_cc_library(
|
||||||
"//src/mongo/util:pcre_util",
|
"//src/mongo/util:pcre_util",
|
||||||
"//src/mongo/util:pcre_wrapper",
|
"//src/mongo/util:pcre_wrapper",
|
||||||
"//src/mongo/util:summation",
|
"//src/mongo/util:summation",
|
||||||
|
"//src/third_party/zstandard:zstd",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -506,5 +506,7 @@ Value evaluate(const ExpressionEncStrNormalizedEq& expr,
|
||||||
Value evaluate(const ExpressionSerializeEJSON& expr, const Document& root, Variables* variables);
|
Value evaluate(const ExpressionSerializeEJSON& expr, const Document& root, Variables* variables);
|
||||||
Value evaluate(const ExpressionDeserializeEJSON& expr, const Document& root, Variables* variables);
|
Value evaluate(const ExpressionDeserializeEJSON& expr, const Document& root, Variables* variables);
|
||||||
|
|
||||||
|
Value evaluate(const ExpressionHash& expr, const Document& root, Variables* variables);
|
||||||
|
|
||||||
} // namespace exec::expression
|
} // namespace exec::expression
|
||||||
} // namespace mongo
|
} // namespace mongo
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,109 @@
|
||||||
|
/**
|
||||||
|
* Copyright (C) 2025-present MongoDB, Inc.
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the Server Side Public License, version 1,
|
||||||
|
* as published by MongoDB, Inc.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* Server Side Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the Server Side Public License
|
||||||
|
* along with this program. If not, see
|
||||||
|
* <http://www.mongodb.com/licensing/server-side-public-license>.
|
||||||
|
*
|
||||||
|
* As a special exception, the copyright holders give permission to link the
|
||||||
|
* code of portions of this program with the OpenSSL library under certain
|
||||||
|
* conditions as described in each individual source file and distribute
|
||||||
|
* linked combinations including the program with the OpenSSL library. You
|
||||||
|
* must comply with the Server Side Public License in all respects for
|
||||||
|
* all of the code used other than as permitted herein. If you modify file(s)
|
||||||
|
* with this exception, you may extend this exception to your version of the
|
||||||
|
* file(s), but you are not obligated to do so. If you do not wish to do so,
|
||||||
|
* delete this exception statement from your version. If you delete this
|
||||||
|
* exception statement from all source files in the program, then also delete
|
||||||
|
* it in the license file.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "mongo/bson/bsontypes.h"
|
||||||
|
#include "mongo/db/exec/expression/evaluate.h"
|
||||||
|
#include "mongo/db/pipeline/expression.h"
|
||||||
|
#include "mongo/util/text.h"
|
||||||
|
|
||||||
|
#include <common/xxhash.h>
|
||||||
|
|
||||||
|
namespace mongo {
|
||||||
|
|
||||||
|
namespace exec::expression {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
HashAlgorithm parseAlgorithm(Value algorithm) {
|
||||||
|
uassert(10754001,
|
||||||
|
str::stream() << "$hash requires that 'algorithm' be a string, found: "
|
||||||
|
<< typeName(algorithm.getType()) << " with value "
|
||||||
|
<< algorithm.toString(),
|
||||||
|
algorithm.getType() == BSONType::string);
|
||||||
|
|
||||||
|
static const StringDataMap<HashAlgorithm> stringToAlgorithm{
|
||||||
|
{toStringData(HashAlgorithm::md5), HashAlgorithm::md5},
|
||||||
|
{toStringData(HashAlgorithm::sha256), HashAlgorithm::sha256},
|
||||||
|
{toStringData(HashAlgorithm::xxh64), HashAlgorithm::xxh64},
|
||||||
|
};
|
||||||
|
|
||||||
|
auto algorithmString = algorithm.getStringData();
|
||||||
|
auto algorithmPair = stringToAlgorithm.find(algorithmString);
|
||||||
|
|
||||||
|
uassert(10754002,
|
||||||
|
str::stream() << "Currently, the only supported algorithms for $hash are 'md5', "
|
||||||
|
"'sha256' and 'xxh64', found: "
|
||||||
|
<< algorithmString,
|
||||||
|
algorithmPair != stringToAlgorithm.end());
|
||||||
|
|
||||||
|
return algorithmPair->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
Value evaluate(const ExpressionHash& expr, const Document& root, Variables* variables) {
|
||||||
|
auto input = expr.getInput().evaluate(root, variables);
|
||||||
|
if (input.nullish()) {
|
||||||
|
return Value(BSONNULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto inputType = input.getType();
|
||||||
|
uassert(
|
||||||
|
10754000,
|
||||||
|
str::stream() << "$hash requires that 'input' be a valid UTF-8 string or binData, found: "
|
||||||
|
<< typeName(input.getType()) << " with value " << input.toString(),
|
||||||
|
(inputType == BSONType::string && isValidUTF8(input.getStringData())) ||
|
||||||
|
inputType == BSONType::binData);
|
||||||
|
|
||||||
|
auto algorithm = parseAlgorithm(expr.getAlgorithm().evaluate(root, variables));
|
||||||
|
|
||||||
|
ConstDataRange inputBytes = inputType == BSONType::string
|
||||||
|
? ConstDataRange(input.getStringData().data(), input.getStringData().size())
|
||||||
|
: ConstDataRange(static_cast<const char*>(input.getBinData().data),
|
||||||
|
input.getBinData().length);
|
||||||
|
|
||||||
|
switch (algorithm) {
|
||||||
|
case HashAlgorithm::md5:
|
||||||
|
uasserted(ErrorCodes::NotImplemented, "md5 not yet implemented");
|
||||||
|
case HashAlgorithm::sha256:
|
||||||
|
uasserted(ErrorCodes::NotImplemented, "sha256 not yet implemented");
|
||||||
|
case HashAlgorithm::xxh64: {
|
||||||
|
// We use the canonical (big endian) form for a platform-independent representation.
|
||||||
|
XXH64_hash_t hash = ZSTD_XXH64(inputBytes.data(), inputBytes.length(), 0);
|
||||||
|
XXH64_canonical_t canonical;
|
||||||
|
ZSTD_XXH64_canonicalFromHash(&canonical, hash);
|
||||||
|
return Value(BSONBinData(canonical.digest, sizeof(canonical.digest), BinDataGeneral));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MONGO_UNREACHABLE;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace exec::expression
|
||||||
|
} // namespace mongo
|
||||||
|
|
@ -1182,6 +1182,7 @@ mongo_cc_unit_test(
|
||||||
"expression_field_path_test.cpp",
|
"expression_field_path_test.cpp",
|
||||||
"expression_find_internal_test.cpp",
|
"expression_find_internal_test.cpp",
|
||||||
"expression_function_test.cpp",
|
"expression_function_test.cpp",
|
||||||
|
"expression_hash_test.cpp",
|
||||||
"expression_hasher_test.cpp",
|
"expression_hasher_test.cpp",
|
||||||
"expression_let_test.cpp",
|
"expression_let_test.cpp",
|
||||||
"expression_map_reduce_filter_test.cpp",
|
"expression_map_reduce_filter_test.cpp",
|
||||||
|
|
|
||||||
|
|
@ -5583,6 +5583,89 @@ const Expression* ExpressionDeserializeEJSON::getOnError() const {
|
||||||
return _children[_kOnErrorIdx].get();
|
return _children[_kOnErrorIdx].get();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* --------------------------------- ExpressionHash --------------------------------------------- */
|
||||||
|
REGISTER_EXPRESSION_WITH_FEATURE_FLAG(hash,
|
||||||
|
ExpressionHash::parse,
|
||||||
|
AllowedWithApiStrict::kAlways,
|
||||||
|
AllowedWithClientType::kAny,
|
||||||
|
&feature_flags::gFeatureFlagMqlJsEngineGap);
|
||||||
|
|
||||||
|
ExpressionHash::ExpressionHash(ExpressionContext* const expCtx,
|
||||||
|
boost::intrusive_ptr<Expression> input,
|
||||||
|
boost::intrusive_ptr<Expression> algorithm)
|
||||||
|
: Expression(expCtx,
|
||||||
|
{
|
||||||
|
std::move(input),
|
||||||
|
std::move(algorithm),
|
||||||
|
}) {
|
||||||
|
expCtx->setSbeCompatibility(SbeCompatibility::notCompatible);
|
||||||
|
}
|
||||||
|
|
||||||
|
intrusive_ptr<Expression> ExpressionHash::parse(ExpressionContext* const expCtx,
|
||||||
|
BSONElement expr,
|
||||||
|
const VariablesParseState& vps) {
|
||||||
|
|
||||||
|
uassert(ErrorCodes::FailedToParse,
|
||||||
|
str::stream() << "$hash expects an object of named arguments but found: "
|
||||||
|
<< typeName(expr.type()),
|
||||||
|
expr.type() == BSONType::object);
|
||||||
|
|
||||||
|
boost::intrusive_ptr<Expression> input;
|
||||||
|
boost::intrusive_ptr<Expression> algorithm;
|
||||||
|
|
||||||
|
for (auto&& elem : expr.embeddedObject()) {
|
||||||
|
const auto field = elem.fieldNameStringData();
|
||||||
|
if (field == _kInput) {
|
||||||
|
input = parseOperand(expCtx, elem, vps);
|
||||||
|
} else if (field == _kAlgorithm) {
|
||||||
|
algorithm = parseOperand(expCtx, elem, vps);
|
||||||
|
} else {
|
||||||
|
uasserted(ErrorCodes::FailedToParse,
|
||||||
|
str::stream()
|
||||||
|
<< "$hash found an unknown argument: " << elem.fieldNameStringData());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uassert(ErrorCodes::FailedToParse, "Missing 'input' parameter to $hash", input);
|
||||||
|
uassert(ErrorCodes::FailedToParse, "Missing 'algorithm' parameter to $hash", algorithm);
|
||||||
|
return new ExpressionHash(expCtx, std::move(input), std::move(algorithm));
|
||||||
|
}
|
||||||
|
|
||||||
|
const char* ExpressionHash::getOpName() const {
|
||||||
|
return "$hash";
|
||||||
|
}
|
||||||
|
|
||||||
|
Value ExpressionHash::evaluate(const Document& root, Variables* variables) const {
|
||||||
|
return exec::expression::evaluate(*this, root, variables);
|
||||||
|
}
|
||||||
|
|
||||||
|
intrusive_ptr<Expression> ExpressionHash::optimize() {
|
||||||
|
_children[_kInputIdx] = _children[_kInputIdx]->optimize();
|
||||||
|
_children[_kAlgorithmIdx] = _children[_kAlgorithmIdx]->optimize();
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
Value ExpressionHash::serialize(const SerializationOptions& options) const {
|
||||||
|
return Value(Document{{
|
||||||
|
getOpName(),
|
||||||
|
Document{{_kInput, getInput().serialize(options)},
|
||||||
|
{_kAlgorithm, getAlgorithm().serialize(options)}},
|
||||||
|
}});
|
||||||
|
}
|
||||||
|
|
||||||
|
boost::intrusive_ptr<Expression> ExpressionHash::clone() const {
|
||||||
|
return make_intrusive<ExpressionHash>(
|
||||||
|
getExpressionContext(), cloneChild(_kInputIdx), cloneChild(_kAlgorithmIdx));
|
||||||
|
}
|
||||||
|
|
||||||
|
const Expression& ExpressionHash::getInput() const {
|
||||||
|
return *_children[_kInputIdx];
|
||||||
|
}
|
||||||
|
|
||||||
|
const Expression& ExpressionHash::getAlgorithm() const {
|
||||||
|
return *_children[_kAlgorithmIdx];
|
||||||
|
}
|
||||||
|
|
||||||
/* --------------------------------- Parenthesis ---------------------------------------------
|
/* --------------------------------- Parenthesis ---------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6131,6 +6131,53 @@ private:
|
||||||
static constexpr int _kOnErrorIdx = 1;
|
static constexpr int _kOnErrorIdx = 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Represents the hashing algorithm used for $hash.
|
||||||
|
#define HASH_ALGORITHM(F) \
|
||||||
|
F(md5) \
|
||||||
|
F(sha256) \
|
||||||
|
F(xxh64)
|
||||||
|
QUERY_UTIL_NAMED_ENUM_DEFINE(HashAlgorithm, HASH_ALGORITHM);
|
||||||
|
#undef HASH_ALGORITHM
|
||||||
|
|
||||||
|
class ExpressionHash final : public Expression {
|
||||||
|
public:
|
||||||
|
explicit ExpressionHash(ExpressionContext* expCtx,
|
||||||
|
boost::intrusive_ptr<Expression> input,
|
||||||
|
boost::intrusive_ptr<Expression> algorithm);
|
||||||
|
|
||||||
|
static boost::intrusive_ptr<Expression> parse(ExpressionContext* expCtx,
|
||||||
|
BSONElement exprElement,
|
||||||
|
const VariablesParseState& vps);
|
||||||
|
|
||||||
|
Value serialize(const SerializationOptions& options = {}) const final;
|
||||||
|
|
||||||
|
Value evaluate(const Document& root, Variables* variables) const final;
|
||||||
|
|
||||||
|
[[nodiscard]] boost::intrusive_ptr<Expression> optimize() final;
|
||||||
|
|
||||||
|
const char* getOpName() const;
|
||||||
|
|
||||||
|
void acceptVisitor(ExpressionMutableVisitor* visitor) final {
|
||||||
|
return visitor->visit(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
void acceptVisitor(ExpressionConstVisitor* visitor) const final {
|
||||||
|
return visitor->visit(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
boost::intrusive_ptr<Expression> clone() const final;
|
||||||
|
|
||||||
|
const Expression& getInput() const;
|
||||||
|
const Expression& getAlgorithm() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
static constexpr StringData _kInput = "input"_sd;
|
||||||
|
static constexpr size_t _kInputIdx = 0;
|
||||||
|
|
||||||
|
static constexpr StringData _kAlgorithm = "algorithm"_sd;
|
||||||
|
static constexpr size_t _kAlgorithmIdx = 1;
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ExpressionEncTextSearch is the base class for all encrypted text search expressions. The first
|
* ExpressionEncTextSearch is the base class for all encrypted text search expressions. The first
|
||||||
* operand (input) must be a field path expression, and the second operand (text) a constant
|
* operand (input) must be a field path expression, and the second operand (text) a constant
|
||||||
|
|
|
||||||
|
|
@ -253,6 +253,7 @@ MONGO_INITIALIZER_GENERAL(InitExpressionsForCloneTest, ("EndExpressionRegistrati
|
||||||
fromjson("{$first: '$foo'}"),
|
fromjson("{$first: '$foo'}"),
|
||||||
fromjson("{$firstN: {input: '$foo', n: 2}}"),
|
fromjson("{$firstN: {input: '$foo', n: 2}}"),
|
||||||
fromjson("{$getField: {field: '$foo', input: '$bar'}}"),
|
fromjson("{$getField: {field: '$foo', input: '$bar'}}"),
|
||||||
|
fromjson("{$hash: {input: '$foo', algorithm: '$bar'}}"),
|
||||||
fromjson("{$ifNull: ['$a', '$b', '$c']}"),
|
fromjson("{$ifNull: ['$a', '$b', '$c']}"),
|
||||||
fromjson("{$isNumber: '$foo'}"),
|
fromjson("{$isNumber: '$foo'}"),
|
||||||
fromjson("{$last: '$foo'}"),
|
fromjson("{$last: '$foo'}"),
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,209 @@
|
||||||
|
/**
|
||||||
|
* Copyright (C) 2025-present MongoDB, Inc.
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the Server Side Public License, version 1,
|
||||||
|
* as published by MongoDB, Inc.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* Server Side Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the Server Side Public License
|
||||||
|
* along with this program. If not, see
|
||||||
|
* <http://www.mongodb.com/licensing/server-side-public-license>.
|
||||||
|
*
|
||||||
|
* As a special exception, the copyright holders give permission to link the
|
||||||
|
* code of portions of this program with the OpenSSL library under certain
|
||||||
|
* conditions as described in each individual source file and distribute
|
||||||
|
* linked combinations including the program with the OpenSSL library. You
|
||||||
|
* must comply with the Server Side Public License in all respects for
|
||||||
|
* all of the code used other than as permitted herein. If you modify file(s)
|
||||||
|
* with this exception, you may extend this exception to your version of the
|
||||||
|
* file(s), but you are not obligated to do so. If you do not wish to do so,
|
||||||
|
* delete this exception statement from your version. If you delete this
|
||||||
|
* exception statement from all source files in the program, then also delete
|
||||||
|
* it in the license file.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "mongo/bson/bsontypes_util.h"
|
||||||
|
#include "mongo/db/exec/document_value/document_value_test_util.h"
|
||||||
|
#include "mongo/db/pipeline/aggregation_context_fixture.h"
|
||||||
|
#include "mongo/db/pipeline/expression.h"
|
||||||
|
#include "mongo/unittest/unittest.h"
|
||||||
|
|
||||||
|
namespace mongo {
|
||||||
|
namespace ExpressionHashTest {
|
||||||
|
|
||||||
|
using ExpressionHashTest = AggregationContextFixture;
|
||||||
|
|
||||||
|
TEST_F(ExpressionHashTest, ParseAndSerialize) {
|
||||||
|
auto expCtx = getExpCtx();
|
||||||
|
auto spec = fromjson("{$hash: {input: '$path', algorithm: 'xxh64'}}");
|
||||||
|
auto hashExp = Expression::parseExpression(expCtx.get(), spec, expCtx->variablesParseState);
|
||||||
|
|
||||||
|
ASSERT_VALUE_EQ(hashExp->serialize(),
|
||||||
|
Value(fromjson("{$hash: {input: '$path', algorithm: {$const: 'xxh64'}}}")));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(ExpressionHashTest, ParseFailsWithoutInput) {
|
||||||
|
auto expCtx = getExpCtx();
|
||||||
|
auto spec = fromjson("{$hash: {algorithm: 'xxh64'}}");
|
||||||
|
ASSERT_THROWS_WITH_CHECK(
|
||||||
|
Expression::parseExpression(expCtx.get(), spec, expCtx->variablesParseState),
|
||||||
|
AssertionException,
|
||||||
|
[](const AssertionException& exception) {
|
||||||
|
ASSERT_EQ(exception.code(), ErrorCodes::FailedToParse);
|
||||||
|
ASSERT_STRING_CONTAINS(exception.reason(), "Missing 'input' parameter to $hash");
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(ExpressionHashTest, ParseFailsWithoutAlgorithm) {
|
||||||
|
auto expCtx = getExpCtx();
|
||||||
|
auto spec = fromjson("{$hash: {input: '$field'}}");
|
||||||
|
ASSERT_THROWS_WITH_CHECK(
|
||||||
|
Expression::parseExpression(expCtx.get(), spec, expCtx->variablesParseState),
|
||||||
|
AssertionException,
|
||||||
|
[](const AssertionException& exception) {
|
||||||
|
ASSERT_EQ(exception.code(), ErrorCodes::FailedToParse);
|
||||||
|
ASSERT_STRING_CONTAINS(exception.reason(), "Missing 'algorithm' parameter to $hash");
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(ExpressionHashTest, ParseFailsWithUnknownArgument) {
|
||||||
|
auto expCtx = getExpCtx();
|
||||||
|
auto spec = fromjson("{$hash: {input: '$f', algorithm: 'xxh64', extra: 1}}");
|
||||||
|
ASSERT_THROWS_WITH_CHECK(
|
||||||
|
Expression::parseExpression(expCtx.get(), spec, expCtx->variablesParseState),
|
||||||
|
AssertionException,
|
||||||
|
[](const AssertionException& exception) {
|
||||||
|
ASSERT_EQ(exception.code(), ErrorCodes::FailedToParse);
|
||||||
|
ASSERT_STRING_CONTAINS(exception.reason(), "$hash found an unknown argument: extra");
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(ExpressionHashTest, ParseFailsWithNonObjectArg) {
|
||||||
|
auto expCtx = getExpCtx();
|
||||||
|
auto spec = fromjson("{$hash: 'hey'}");
|
||||||
|
ASSERT_THROWS_WITH_CHECK(
|
||||||
|
Expression::parseExpression(expCtx.get(), spec, expCtx->variablesParseState),
|
||||||
|
AssertionException,
|
||||||
|
[](const AssertionException& exception) {
|
||||||
|
ASSERT_EQ(exception.code(), ErrorCodes::FailedToParse);
|
||||||
|
ASSERT_STRING_CONTAINS(exception.reason(),
|
||||||
|
"$hash expects an object of named arguments but found: string");
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(ExpressionHashTest, InvalidUtf8StringFails) {
|
||||||
|
auto expCtx = getExpCtx();
|
||||||
|
auto spec = fromjson("{$hash: {input: '$path', algorithm: 'xxh64'}}");
|
||||||
|
auto hashExp = Expression::parseExpression(expCtx.get(), spec, expCtx->variablesParseState);
|
||||||
|
|
||||||
|
Document input{{"path", "\xc2"_sd}};
|
||||||
|
ASSERT_THROWS_WITH_CHECK(
|
||||||
|
hashExp->evaluate(input, &expCtx->variables),
|
||||||
|
AssertionException,
|
||||||
|
[](const AssertionException& exception) {
|
||||||
|
ASSERT_STRING_CONTAINS(
|
||||||
|
exception.reason(),
|
||||||
|
"$hash requires that 'input' be a valid UTF-8 string or binData, found: string");
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(ExpressionHashTest, InputArrayFails) {
|
||||||
|
auto expCtx = getExpCtx();
|
||||||
|
auto spec = fromjson("{$hash: {input: \"$path\", algorithm: 'xxh64'}}");
|
||||||
|
auto hashExp = Expression::parseExpression(expCtx.get(), spec, expCtx->variablesParseState);
|
||||||
|
|
||||||
|
Document input{{"path", BSON_ARRAY(1 << 2 << 3)}};
|
||||||
|
ASSERT_THROWS_WITH_CHECK(hashExp->evaluate(input, &expCtx->variables),
|
||||||
|
AssertionException,
|
||||||
|
[](const AssertionException& exception) {
|
||||||
|
ASSERT_STRING_CONTAINS(
|
||||||
|
exception.reason(),
|
||||||
|
"$hash requires that 'input' be a valid UTF-8 string or "
|
||||||
|
"binData, found: array with value [1, 2, 3]");
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(ExpressionHashTest, NullAlgorithmFails) {
|
||||||
|
auto expCtx = getExpCtx();
|
||||||
|
auto spec = fromjson("{$hash: {input: '$path', algorithm: null}}");
|
||||||
|
auto hashExp = Expression::parseExpression(expCtx.get(), spec, expCtx->variablesParseState);
|
||||||
|
|
||||||
|
Document input{{"path", "test"_sd}};
|
||||||
|
ASSERT_THROWS_WITH_CHECK(
|
||||||
|
hashExp->evaluate(input, &expCtx->variables),
|
||||||
|
AssertionException,
|
||||||
|
[](const AssertionException& exception) {
|
||||||
|
ASSERT_STRING_CONTAINS(
|
||||||
|
exception.reason(),
|
||||||
|
"$hash requires that 'algorithm' be a string, found: null with value null");
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(ExpressionHashTest, InvalidAlgorithmNameFails) {
|
||||||
|
auto expCtx = getExpCtx();
|
||||||
|
auto spec = fromjson("{$hash: {input: '$path', algorithm: 'sha1'}}");
|
||||||
|
auto hashExp = Expression::parseExpression(expCtx.get(), spec, expCtx->variablesParseState);
|
||||||
|
|
||||||
|
Document input{{"path", "test"_sd}};
|
||||||
|
ASSERT_THROWS_WITH_CHECK(
|
||||||
|
hashExp->evaluate(input, &expCtx->variables),
|
||||||
|
AssertionException,
|
||||||
|
[](const AssertionException& exception) {
|
||||||
|
ASSERT_STRING_CONTAINS(exception.reason(),
|
||||||
|
"Currently, the only supported algorithms for $hash are 'md5', "
|
||||||
|
"'sha256' and 'xxh64', found: sha1");
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(ExpressionHashTest, AlgorithmCaseSensitiveFails) {
|
||||||
|
auto expCtx = getExpCtx();
|
||||||
|
auto spec = fromjson("{$hash: {input: '$path', algorithm: 'MD5'}}");
|
||||||
|
auto hashExp = Expression::parseExpression(expCtx.get(), spec, expCtx->variablesParseState);
|
||||||
|
|
||||||
|
Document input{{"path", "test"_sd}};
|
||||||
|
ASSERT_THROWS_WITH_CHECK(
|
||||||
|
hashExp->evaluate(input, &expCtx->variables),
|
||||||
|
AssertionException,
|
||||||
|
[](const AssertionException& exception) {
|
||||||
|
ASSERT_STRING_CONTAINS(exception.reason(),
|
||||||
|
"Currently, the only supported algorithms for $hash are 'md5', "
|
||||||
|
"'sha256' and 'xxh64', found: MD5");
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(ExpressionHashTest, HashWithXxh64WorksForString) {
|
||||||
|
auto expCtx = getExpCtx();
|
||||||
|
auto spec = fromjson("{$hash: {input: '$path', algorithm: 'xxh64'}}");
|
||||||
|
auto hashExp = Expression::parseExpression(expCtx.get(), spec, expCtx->variablesParseState);
|
||||||
|
|
||||||
|
Document input{{"path", "Hello World"_sd}};
|
||||||
|
auto result = hashExp->evaluate(input, &expCtx->variables);
|
||||||
|
|
||||||
|
auto expectedBytes = base64::decode("YzTSBxkkW8I=");
|
||||||
|
ASSERT_EQ(result.getType(), BSONType::binData);
|
||||||
|
ASSERT_VALUE_EQ(result,
|
||||||
|
Value(BSONBinData(expectedBytes.data(), expectedBytes.size(), BinDataGeneral)));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(ExpressionHashTest, HashWithXxh64WorksForBinaryData) {
|
||||||
|
auto expCtx = getExpCtx();
|
||||||
|
auto spec = fromjson("{$hash: {input: '$path', algorithm: 'xxh64'}}");
|
||||||
|
auto hashExp = Expression::parseExpression(expCtx.get(), spec, expCtx->variablesParseState);
|
||||||
|
|
||||||
|
StringData helloWorld = "Hello World"_sd;
|
||||||
|
Document input{{"path", BSONBinData(helloWorld.data(), helloWorld.size(), BinDataGeneral)}};
|
||||||
|
auto result = hashExp->evaluate(input, &expCtx->variables);
|
||||||
|
|
||||||
|
auto expectedBytes = base64::decode("YzTSBxkkW8I=");
|
||||||
|
ASSERT_EQ(result.getType(), BSONType::binData);
|
||||||
|
ASSERT_VALUE_EQ(result,
|
||||||
|
Value(BSONBinData(expectedBytes.data(), expectedBytes.size(), BinDataGeneral)));
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace ExpressionHashTest
|
||||||
|
} // namespace mongo
|
||||||
|
|
@ -213,6 +213,7 @@ public:
|
||||||
kTestFeatureFlagLastLTS,
|
kTestFeatureFlagLastLTS,
|
||||||
kSerializeEJSON,
|
kSerializeEJSON,
|
||||||
kDeserializeEJSON,
|
kDeserializeEJSON,
|
||||||
|
kHash
|
||||||
};
|
};
|
||||||
|
|
||||||
explicit ExpressionHashVisitor(H hashState) : _hashState(std::move(hashState)) {}
|
explicit ExpressionHashVisitor(H hashState) : _hashState(std::move(hashState)) {}
|
||||||
|
|
@ -915,6 +916,10 @@ public:
|
||||||
combine(OpType::kDeserializeEJSON);
|
combine(OpType::kDeserializeEJSON);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void visit(const ExpressionHash* expr) final {
|
||||||
|
combine(OpType::kHash);
|
||||||
|
}
|
||||||
|
|
||||||
H moveHashState() {
|
H moveHashState() {
|
||||||
return std::move(_hashState);
|
return std::move(_hashState);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -190,6 +190,7 @@ class ExpressionTestFeatureFlagLatest;
|
||||||
class ExpressionTestFeatureFlagLastLTS;
|
class ExpressionTestFeatureFlagLastLTS;
|
||||||
class ExpressionSerializeEJSON;
|
class ExpressionSerializeEJSON;
|
||||||
class ExpressionDeserializeEJSON;
|
class ExpressionDeserializeEJSON;
|
||||||
|
class ExpressionHash;
|
||||||
|
|
||||||
class AccumulatorAvg;
|
class AccumulatorAvg;
|
||||||
class AccumulatorFirstN;
|
class AccumulatorFirstN;
|
||||||
|
|
@ -435,6 +436,7 @@ public:
|
||||||
expression_walker::MaybeConstPtr<IsConst, ExpressionTestFeatureFlagLastLTS>) = 0;
|
expression_walker::MaybeConstPtr<IsConst, ExpressionTestFeatureFlagLastLTS>) = 0;
|
||||||
virtual void visit(expression_walker::MaybeConstPtr<IsConst, ExpressionSerializeEJSON>) = 0;
|
virtual void visit(expression_walker::MaybeConstPtr<IsConst, ExpressionSerializeEJSON>) = 0;
|
||||||
virtual void visit(expression_walker::MaybeConstPtr<IsConst, ExpressionDeserializeEJSON>) = 0;
|
virtual void visit(expression_walker::MaybeConstPtr<IsConst, ExpressionDeserializeEJSON>) = 0;
|
||||||
|
virtual void visit(expression_walker::MaybeConstPtr<IsConst, ExpressionHash>) = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
using ExpressionMutableVisitor = ExpressionVisitor<false>;
|
using ExpressionMutableVisitor = ExpressionVisitor<false>;
|
||||||
|
|
@ -624,5 +626,6 @@ struct SelectiveConstExpressionVisitorBase : public ExpressionConstVisitor {
|
||||||
void visit(const ExpressionTestFeatureFlagLastLTS*) override {}
|
void visit(const ExpressionTestFeatureFlagLastLTS*) override {}
|
||||||
void visit(const ExpressionSerializeEJSON*) override {}
|
void visit(const ExpressionSerializeEJSON*) override {}
|
||||||
void visit(const ExpressionDeserializeEJSON*) override {}
|
void visit(const ExpressionDeserializeEJSON*) override {}
|
||||||
|
void visit(const ExpressionHash*) override {}
|
||||||
};
|
};
|
||||||
} // namespace mongo
|
} // namespace mongo
|
||||||
|
|
|
||||||
|
|
@ -218,6 +218,7 @@ public:
|
||||||
void visit(const ExpressionTestFeatureFlagLastLTS*) override {}
|
void visit(const ExpressionTestFeatureFlagLastLTS*) override {}
|
||||||
void visit(const ExpressionSerializeEJSON*) override {}
|
void visit(const ExpressionSerializeEJSON*) override {}
|
||||||
void visit(const ExpressionDeserializeEJSON*) override {}
|
void visit(const ExpressionDeserializeEJSON*) override {}
|
||||||
|
void visit(const ExpressionHash*) override {}
|
||||||
};
|
};
|
||||||
|
|
||||||
class DependencyVisitor : public DefaultDependencyVisitor {
|
class DependencyVisitor : public DefaultDependencyVisitor {
|
||||||
|
|
|
||||||
|
|
@ -423,6 +423,7 @@ public:
|
||||||
void visit(const ExpressionTestFeatureFlagLastLTS* expr) final {}
|
void visit(const ExpressionTestFeatureFlagLastLTS* expr) final {}
|
||||||
void visit(const ExpressionSerializeEJSON*) final {}
|
void visit(const ExpressionSerializeEJSON*) final {}
|
||||||
void visit(const ExpressionDeserializeEJSON*) final {}
|
void visit(const ExpressionDeserializeEJSON*) final {}
|
||||||
|
void visit(const ExpressionHash* expr) final {}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ExpressionVisitorContext* _context;
|
ExpressionVisitorContext* _context;
|
||||||
|
|
@ -619,6 +620,7 @@ public:
|
||||||
void visit(const ExpressionTestFeatureFlagLastLTS* expr) final {}
|
void visit(const ExpressionTestFeatureFlagLastLTS* expr) final {}
|
||||||
void visit(const ExpressionSerializeEJSON*) final {}
|
void visit(const ExpressionSerializeEJSON*) final {}
|
||||||
void visit(const ExpressionDeserializeEJSON*) final {}
|
void visit(const ExpressionDeserializeEJSON*) final {}
|
||||||
|
void visit(const ExpressionHash* expr) final {}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ExpressionVisitorContext* _context;
|
ExpressionVisitorContext* _context;
|
||||||
|
|
@ -3470,6 +3472,11 @@ public:
|
||||||
unsupportedExpression("$deserializeEJSON");
|
unsupportedExpression("$deserializeEJSON");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void visit(const ExpressionHash* expr) final {
|
||||||
|
// TODO(SERVER-115462): Support $hash in SBE.
|
||||||
|
unsupportedExpression("$hash");
|
||||||
|
}
|
||||||
|
|
||||||
void visit(const ExpressionTsSecond* expr) final {
|
void visit(const ExpressionTsSecond* expr) final {
|
||||||
_context->ensureArity(1);
|
_context->ensureArity(1);
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue