SERVER-112519 Add integration tests for explain on an extension stage (#44669)

GitOrigin-RevId: b3399c3787c48fd15a239f2d8183ada90b57d2bd
This commit is contained in:
Mariano Shaar 2025-12-05 12:04:41 -05:00 committed by MongoDB Bot
parent d2659d3500
commit 929d44bfc8
7 changed files with 98 additions and 50 deletions

View File

@ -34,8 +34,8 @@ function runTest(verbosity, pipeline, expectedStages) {
assert.gt(shardStage.nReturned, 0, result);
}
}
// TODO SERVER-112519 Validate stage-specific execution stats.
assert.eq(stageOutput.execMetricField, "execMetricValue");
assert.gte(stageOutput.executionTimeMillisEstimate, 0);
}
}
}
@ -46,5 +46,3 @@ runTest("executionStats", [{$explain: {input: "hello"}}], {$explain: {input: "he
runTest("allPlansExecution", [{$explain: {input: "hello"}}], {
$explain: {input: "hello", verbosity: "allPlansExecution"},
});
// TODO SERVER-112519 Add tests covering explain on a desugar stage.

View File

@ -0,0 +1,65 @@
/**
* Tests that a desugar extension stage serializes itself correctly for explain.
*
* @tags: [
* featureFlagExtensionsAPI,
* featureFlagVectorSimilarityExpressions,
* requires_fcv_82,
* ]
*/
import {getAggPlanStages} from "jstests/libs/query/analyze_plan.js";
const coll = db[jsTestName()];
coll.drop();
coll.insertMany([
{_id: 1, embedding: [1, 0]},
{_id: 2, embedding: [0.5, 0.5]},
]);
function explainNativeVectorSearch(spec, verbosity = "queryPlanner") {
return coll.explain(verbosity).aggregate([{$nativeVectorSearch: spec}]);
}
function assertVectorSearchExplainContainsMetrics(expl, verbosity) {
// Expected:
// 1. $cursor
// 2. $vectorSearchMetrics
// 3. $setMetadata
// 4. $sort (limit is folded into sort)
const metricsStageName = "$vectorSearchMetrics";
const metricsStages = getAggPlanStages(expl, metricsStageName);
assert.eq(metricsStages[0].$vectorSearchMetrics, {"algorithm": "cosine"});
if (verbosity !== "queryPlanner") {
if (metricsStages.length === 1) {
assert.eq(metricsStages[0].nReturned, 2);
} else {
for (const shardStage of metricsStages) {
// In a sharded cluster the results can be spread across shards, so relax the assertion.
assert.gte(shardStage.nReturned, 0);
}
}
assert.gte(metricsStages[0].executionTimeMillisEstimate, 0);
assert.lte(metricsStages[0].latestStart, Date.now());
}
const sortStageName = "$sort";
const sortStages = getAggPlanStages(expl, sortStageName);
assert.eq(sortStages[0].$sort.limit, 2);
}
function runTest(verbosity) {
const simpleSpec = {
path: "embedding",
queryVector: [1, 0],
limit: 2,
metric: "cosine",
};
const expl = explainNativeVectorSearch(simpleSpec, verbosity);
assertVectorSearchExplainContainsMetrics(expl, verbosity);
}
// Explain output contains post-desugar stages.
runTest("queryPlanner");
// Explain with executionStats gets the custom metrics from $vectorSearchMetrics.
runTest("executionStats");

View File

@ -168,32 +168,3 @@ for (const metric of ["cosine", "dotProduct", "euclidean"]) {
norm.map((d) => d.score),
);
}
// Explain output contains post-desugar stages.
{
resetCollWithDocs([
{_id: 1, embedding: [1, 0]},
{_id: 2, embedding: [0.5, 0.5]},
]);
const expl = coll.explain().aggregate([
{
$nativeVectorSearch: {
path: "embedding",
queryVector: [1, 0],
limit: 2,
metric: "cosine",
},
},
]);
// Expected:
// 1. $cursor
// 2. $vectorSearchMetrics
// 3. $setMetadata
// 4. $sort (limit is folded into sort)
const stages = expl.stages ? expl.stages : expl.shards[Object.keys(expl.shards)[0]].stages;
const names = stages.map((s) => Object.keys(s)[0]);
assert.eq(names, ["$cursor", "$vectorSearchMetrics", "$setMetadata", "$sort"]);
assert.eq(stages[3].$sort.limit, 2);
}

View File

@ -208,7 +208,7 @@ TEST_F(LoadNativeVectorSearchTest, FullParseExpandsWithFilter) {
auto stages = desugarAndSerialize(expCtx, spec);
ASSERT_EQ(stages.size(), 5U);
expectStageEq(stages, 0, R"JSON({ $vectorSearchMetrics: {} })JSON");
expectStageEq(stages, 0, R"JSON({ $vectorSearchMetrics: { metric: "cosine" } })JSON");
expectStageEq(stages, 1, R"JSON({ $match: { x: 1 } })JSON");
expectStageEq(stages, 2, R"JSON({ $setMetadata: { vectorSearchScore: { $similarityCosine: {
vectors: [ [ { $const: 1.0 }, { $const: 2.0 }, { $const: 3.0 } ], "$embedding" ],
@ -223,7 +223,7 @@ TEST_F(LoadNativeVectorSearchTest, FullParseExpandsWithoutFilter) {
auto stages = desugarAndSerialize(expCtx, spec);
ASSERT_EQ(stages.size(), 4U);
expectStageEq(stages, 0, R"JSON({ $vectorSearchMetrics: {} })JSON");
expectStageEq(stages, 0, R"JSON({ $vectorSearchMetrics: { metric: "cosine" } })JSON");
expectStageEq(stages, 1, R"JSON({ $setMetadata: { vectorSearchScore: { $similarityCosine: {
vectors: [ [ { $const: 1.0 }, { $const: 2.0 }, { $const: 3.0 } ], "$embedding" ],
score: false } } } }
@ -236,7 +236,7 @@ TEST_F(LoadNativeVectorSearchTest, CosineNormalizedSerializesAsExpected) {
auto spec = makeNativeVectorSearchSpec(/*filter*/ false, "cosine", /*normalize*/ true);
auto stages = desugarAndSerialize(expCtx, spec);
ASSERT_EQ(stages.size(), 4U);
expectStageEq(stages, 0, R"JSON({ $vectorSearchMetrics: {} })JSON");
expectStageEq(stages, 0, R"JSON({ $vectorSearchMetrics: { metric: "cosine" } })JSON");
expectStageEq(stages, 1, R"JSON({ $setMetadata: { vectorSearchScore: { $similarityCosine: {
vectors: [ [ { $const: 1.0 }, { $const: 2.0 }, { $const: 3.0 } ], "$embedding" ],
score: true } } } })JSON");
@ -249,7 +249,7 @@ TEST_F(LoadNativeVectorSearchTest, DotProductNoNormalizeSerializesAsExpected) {
auto stages = desugarAndSerialize(expCtx, spec);
ASSERT_EQ(stages.size(), 4U);
expectStageEq(stages, 0, R"JSON({ $vectorSearchMetrics: {} })JSON");
expectStageEq(stages, 0, R"JSON({ $vectorSearchMetrics: { metric: "dotProduct" } })JSON");
expectStageEq(stages, 1, R"JSON({ $setMetadata: { vectorSearchScore: { $similarityDotProduct: {
vectors: [ [ { $const: 1.0 }, { $const: 2.0 }, { $const: 3.0 } ], "$embedding" ],
score: false } } } }
@ -263,7 +263,7 @@ TEST_F(LoadNativeVectorSearchTest, DotProductNormalizedSerializesAsExpected) {
auto stages = desugarAndSerialize(expCtx, spec);
ASSERT_EQ(stages.size(), 4U);
expectStageEq(stages, 0, R"JSON({ $vectorSearchMetrics: {} })JSON");
expectStageEq(stages, 0, R"JSON({ $vectorSearchMetrics: { metric: "dotProduct" } })JSON");
expectStageEq(stages, 1, R"JSON({ $setMetadata: { vectorSearchScore: { $similarityDotProduct: {
vectors: [ [ { $const: 1.0 }, { $const: 2.0 }, { $const: 3.0 } ], "$embedding" ],
score: true } } } })JSON");
@ -277,7 +277,7 @@ TEST_F(LoadNativeVectorSearchTest, NativeVectorSearchEuclideanNoNormalizeUsesMul
auto stages = desugarAndSerialize(expCtx, spec);
ASSERT_EQ(stages.size(), 4U);
expectStageEq(stages, 0, R"JSON({ $vectorSearchMetrics: {} })JSON");
expectStageEq(stages, 0, R"JSON({ $vectorSearchMetrics: { metric: "euclidean" } })JSON");
expectStageEq(stages, 1, R"JSON({ $setMetadata: { vectorSearchScore: { $multiply: [
{ $const: -1 },
{ $similarityEuclidean: {
@ -295,7 +295,7 @@ TEST_F(LoadNativeVectorSearchTest, NativeVectorSearchEuclideanNormalizedSerializ
auto stages = desugarAndSerialize(expCtx, spec);
ASSERT_EQ(stages.size(), 4U);
expectStageEq(stages, 0, R"JSON({ $vectorSearchMetrics: {} })JSON");
expectStageEq(stages, 0, R"JSON({ $vectorSearchMetrics: { metric: "euclidean" } })JSON");
expectStageEq(stages, 1, R"JSON({ $setMetadata: { vectorSearchScore: { $similarityEuclidean: {
vectors: [ [ { $const: 1.0 }, { $const: 2.0 }, { $const: 3.0 } ], "$embedding" ],
score: true } } } }

View File

@ -463,7 +463,6 @@ public:
return BSON(kStageName << verbosity);
}
// TODO (SERVER-112395): Implement this function for testing.
std::unique_ptr<ExecAggStageBase> compile() const override {
return nullptr;
}

View File

@ -94,7 +94,9 @@ public:
static constexpr std::string kCounterField = "counter";
explicit MetricsExecAggStage(const std::string& algorithm)
: sdk::ExecAggStageTransform(kMetricsStageName), _algorithm(algorithm) {}
: sdk::ExecAggStageTransform(kMetricsStageName),
_algorithm(algorithm),
_latestStart(Date_t::min()) {}
mongo::extension::ExtensionGetNextResult getNext(
const sdk::QueryExecutionContextHandle& execCtx,
@ -102,9 +104,9 @@ public:
// Get metrics from the execution context (stored on OperationContext).
auto metrics = execCtx.getMetrics(execStage);
auto now = Date_t::now();
_latestStart = Date_t::now();
BSONObjBuilder updateBuilder;
updateBuilder.append("start", now);
updateBuilder.append("start", _latestStart);
updateBuilder.append("algorithm", _algorithm);
auto bson = updateBuilder.obj();
@ -121,7 +123,7 @@ public:
void close() override {}
BSONObj explain(::MongoExtensionExplainVerbosity verbosity) const override {
return BSONObj();
return BSON("latestStart" << _latestStart);
}
/**
@ -134,6 +136,7 @@ public:
private:
std::string _algorithm;
Date_t _latestStart;
};
class MetricsLogicalStage : public sdk::LogicalAggStage {
@ -142,11 +145,12 @@ public:
: sdk::LogicalAggStage(kMetricsStageName), _algorithm(algorithm) {}
BSONObj serialize() const override {
return BSON(kMetricsStageName << BSONObj());
// This is the serialization used for mongos.
return BSON(kMetricsStageName << BSON("metric" << _algorithm));
}
BSONObj explain(::MongoExtensionExplainVerbosity verbosity) const override {
return BSON(kMetricsStageName << BSONObj());
return BSON(kMetricsStageName << BSON("algorithm" << _algorithm));
}
std::unique_ptr<sdk::ExecAggStageBase> compile() const override {

View File

@ -37,8 +37,19 @@
namespace sdk = mongo::extension::sdk;
using namespace mongo;
class ExplainLogicalStage
: public sdk::TestLogicalStage<sdk::shared_test_stages::TransformExecAggStage> {
constexpr char ExplainStageName[] = "$explain";
class ExplainExecStage : public sdk::TestExecStage {
public:
ExplainExecStage(std::string_view stageName, const mongo::BSONObj& arguments)
: sdk::TestExecStage(stageName, arguments) {}
BSONObj explain(::MongoExtensionExplainVerbosity verbosity) const override {
return BSON("execMetricField" << "execMetricValue");
}
};
class ExplainLogicalStage : public sdk::TestLogicalStage<ExplainExecStage> {
public:
ExplainLogicalStage(std::string_view stageName, const mongo::BSONObj& spec)
: TestLogicalStage(stageName, spec) {}
@ -91,7 +102,7 @@ public:
void validate(const mongo::BSONObj& arguments) const override {
sdk_uassert(
11239403,
(str::stream() << "input to " << kStageName << " must be a string " << arguments),
(str::stream() << "input to " << ExplainStageName << " must be a string " << arguments),
arguments["input"] && arguments["input"].type() == mongo::BSONType::string);
}
};