SERVER-73322 Add collationMatchesDefault to mongos to correctly optimize timeseries queries (#42468)

GitOrigin-RevId: 966f0d61dcb72be2d310be72ef7f87c2df62f1f1
This commit is contained in:
Gil Alon 2025-10-16 09:49:16 -04:00 committed by MongoDB Bot
parent a23bdfd1fe
commit f5a8b94655
5 changed files with 55 additions and 29 deletions

View File

@ -7,8 +7,6 @@
* Collection's collation might affect the computed control values.
*
* @tags: [
* # TODO (SERVER-73322): remove
* assumes_against_mongod_not_mongos,
* requires_non_retryable_writes,
* requires_pipeline_optimization,
* does_not_support_stepdowns,

View File

@ -111,7 +111,7 @@ auto makeExpressionContext(OperationContext* opCtx,
// necessary for mapReduce commands because we will always be merging on the _id field. As such,
// the collection default collation has no impact on the selection of fields to merge on.
const auto requiresCollationForParsingUnshardedAggregate = false;
auto collationObj =
auto [collationObj, collationMatchesDefault] =
cluster_aggregation_planner::getCollation(opCtx,
cri,
nss,
@ -157,6 +157,7 @@ auto makeExpressionContext(OperationContext* opCtx,
.explain(verbosity)
.runtimeConstants(runtimeConstants)
.inRouter(true)
.collationMatchesDefault(collationMatchesDefault)
.build();
if (!cri.hasRoutingTable() && collationObj.isEmpty()) {
expCtx->setIgnoreCollator();

View File

@ -184,7 +184,8 @@ boost::intrusive_ptr<ExpressionContext> makeExpressionContext(
boost::optional<UUID> uuid,
ResolvedNamespaceMap resolvedNamespaces,
bool hasChangeStream,
boost::optional<ExplainOptions::Verbosity> verbosity) {
boost::optional<ExplainOptions::Verbosity> verbosity,
ExpressionContextCollationMatchesDefault collationMatchesDefault) {
std::unique_ptr<CollatorInterface> collation;
if (!collationObj.isEmpty()) {
@ -208,6 +209,7 @@ boost::intrusive_ptr<ExpressionContext> makeExpressionContext(
.inRouter(true)
.collUUID(uuid)
.canBeRejected(canBeRejected)
.collationMatchesDefault(collationMatchesDefault)
.build();
if (!(cri && cri->hasRoutingTable()) && collationObj.isEmpty()) {
@ -416,8 +418,9 @@ std::unique_ptr<Pipeline> parsePipelineAndRegisterQueryStats(
// collation, and since collectionless aggregations generally run on the 'admin'
// database, the standard logic would attempt to resolve its non-existent UUID and
// collation by sending a specious 'listCollections' command to the config servers.
auto collationObj = hasChangeStream
? request.getCollation().value_or(BSONObj())
auto [collationObj, collationMatchesDefault] = hasChangeStream
? std::pair(request.getCollation().value_or(BSONObj()),
ExpressionContextCollationMatchesDefault::kYes)
: cluster_aggregation_planner::getCollation(opCtx,
cri,
nsStruct.executionNss,
@ -437,7 +440,8 @@ std::unique_ptr<Pipeline> parsePipelineAndRegisterQueryStats(
boost::none /* uuid */,
resolveInvolvedNamespaces(involvedNamespaces),
hasChangeStream,
verbosity);
verbosity,
collationMatchesDefault);
// If the routing table exists, then the collection is tracked in the router role and we can
// validate if it is timeseries. If the collection is untracked, this validation will happen in

View File

@ -962,33 +962,54 @@ Status dispatchPipelineAndMerge(OperationContext* opCtx,
requestQueryStatsFromRemotes);
}
BSONObj getCollation(OperationContext* opCtx,
const boost::optional<CollectionRoutingInfo>& cri,
const NamespaceString& nss,
const BSONObj& collation,
bool requiresCollationForParsingUnshardedAggregate) {
// If this is a collectionless aggregation or if the user specified an explicit collation,
// we immediately return the user-defined collation if one exists, or an empty BSONObj
// otherwise.
if (nss.isCollectionlessAggregateNS() || !collation.isEmpty() || !cri) {
return collation;
std::pair<BSONObj, ExpressionContextCollationMatchesDefault> getCollation(
OperationContext* opCtx,
const boost::optional<CollectionRoutingInfo>& cri,
const NamespaceString& nss,
const BSONObj& collation,
bool requiresCollationForParsingUnshardedAggregate) {
// If this is a collectionless aggregation, we immediately return the user-defined collation if
// one exists, or an empty BSONObj otherwise.
if (nss.isCollectionlessAggregateNS() || !cri) {
return {collation, ExpressionContextCollationMatchesDefault::kYes};
}
// If the target collection is untracked, we will contact the primary shard to discover this
// information if it is necessary for pipeline parsing. Otherwise, we infer the collation once
// the command is executed on the primary shard.
if (!cri->hasRoutingTable()) {
return requiresCollationForParsingUnshardedAggregate
? getUntrackedCollectionCollation(opCtx, *cri, nss)
: BSONObj();
if (!collation.isEmpty()) {
return {collation, ExpressionContextCollationMatchesDefault::kNo};
}
if (requiresCollationForParsingUnshardedAggregate) {
return {getUntrackedCollectionCollation(opCtx, *cri, nss),
ExpressionContextCollationMatchesDefault::kYes};
}
return {BSONObj(), ExpressionContextCollationMatchesDefault::kYes};
}
// Return the default collator if one exists, otherwise return the simple collation.
// If the collection is tracked and has a collation, check if the user-defined and collection
// collation match. Return the collection collation if the user-defined collation is empty.
// Return the user-defined collation if the collations do not match
if (auto defaultCollator = cri->getChunkManager().getDefaultCollator()) {
return defaultCollator->getSpec().toBSON();
if (collation.isEmpty()) {
return {defaultCollator->getSpec().toBSON(),
ExpressionContextCollationMatchesDefault::kYes};
}
const bool collationsMatch = CollatorInterface::collatorsMatch(
defaultCollator, getUserCollator(opCtx, collation).get());
return {collation,
collationsMatch ? ExpressionContextCollationMatchesDefault::kYes
: ExpressionContextCollationMatchesDefault::kNo};
}
return CollationSpec::kSimpleSpec;
// There is no collection collation, return the user-defined collation.
if (!collation.isEmpty()) {
return {collation, ExpressionContextCollationMatchesDefault::kNo};
}
return {CollationSpec::kSimpleSpec, ExpressionContextCollationMatchesDefault::kYes};
}
Status runPipelineOnSpecificShardOnly(const boost::intrusive_ptr<ExpressionContext>& expCtx,

View File

@ -75,7 +75,8 @@ ClusterClientCursorGuard buildClusterCursor(OperationContext* opCtx,
ClusterClientCursorParams&&);
/**
* Returns the collation for aggregation targeting 'nss' with the following semantics:
* Returns the collation and if the collation matches the collection's collation for aggregation
* targeting 'nss' with the following semantics:
* - Return 'collation' if the aggregation is collectionless.
* - If 'nss' is tracked, we return 'collation' if it is non-empty. If it is empty, we return the
* collection default collation if there is one and the simple collation otherwise.
@ -88,11 +89,12 @@ ClusterClientCursorGuard buildClusterCursor(OperationContext* opCtx,
* unsharded collections are tracked in the sharding catalog as unsplittable along with their
* collation.
*/
BSONObj getCollation(OperationContext* opCtx,
const boost::optional<CollectionRoutingInfo>& cri,
const NamespaceString& nss,
const BSONObj& collation,
bool requiresCollationForParsingUnshardedAggregate);
std::pair<BSONObj, ExpressionContextCollationMatchesDefault> getCollation(
OperationContext* opCtx,
const boost::optional<CollectionRoutingInfo>& cri,
const NamespaceString& nss,
const BSONObj& collation,
bool requiresCollationForParsingUnshardedAggregate);
/**
* This structure contains information for targeting an aggregation pipeline in a sharded cluster.