mirror of https://github.com/mongodb/mongo
SERVER-73322 Add collationMatchesDefault to mongos to correctly optimize timeseries queries (#42468)
GitOrigin-RevId: 966f0d61dcb72be2d310be72ef7f87c2df62f1f1
This commit is contained in:
parent
a23bdfd1fe
commit
f5a8b94655
|
|
@ -7,8 +7,6 @@
|
|||
* Collection's collation might affect the computed control values.
|
||||
*
|
||||
* @tags: [
|
||||
* # TODO (SERVER-73322): remove
|
||||
* assumes_against_mongod_not_mongos,
|
||||
* requires_non_retryable_writes,
|
||||
* requires_pipeline_optimization,
|
||||
* does_not_support_stepdowns,
|
||||
|
|
|
|||
|
|
@ -111,7 +111,7 @@ auto makeExpressionContext(OperationContext* opCtx,
|
|||
// necessary for mapReduce commands because we will always be merging on the _id field. As such,
|
||||
// the collection default collation has no impact on the selection of fields to merge on.
|
||||
const auto requiresCollationForParsingUnshardedAggregate = false;
|
||||
auto collationObj =
|
||||
auto [collationObj, collationMatchesDefault] =
|
||||
cluster_aggregation_planner::getCollation(opCtx,
|
||||
cri,
|
||||
nss,
|
||||
|
|
@ -157,6 +157,7 @@ auto makeExpressionContext(OperationContext* opCtx,
|
|||
.explain(verbosity)
|
||||
.runtimeConstants(runtimeConstants)
|
||||
.inRouter(true)
|
||||
.collationMatchesDefault(collationMatchesDefault)
|
||||
.build();
|
||||
if (!cri.hasRoutingTable() && collationObj.isEmpty()) {
|
||||
expCtx->setIgnoreCollator();
|
||||
|
|
|
|||
|
|
@ -184,7 +184,8 @@ boost::intrusive_ptr<ExpressionContext> makeExpressionContext(
|
|||
boost::optional<UUID> uuid,
|
||||
ResolvedNamespaceMap resolvedNamespaces,
|
||||
bool hasChangeStream,
|
||||
boost::optional<ExplainOptions::Verbosity> verbosity) {
|
||||
boost::optional<ExplainOptions::Verbosity> verbosity,
|
||||
ExpressionContextCollationMatchesDefault collationMatchesDefault) {
|
||||
|
||||
std::unique_ptr<CollatorInterface> collation;
|
||||
if (!collationObj.isEmpty()) {
|
||||
|
|
@ -208,6 +209,7 @@ boost::intrusive_ptr<ExpressionContext> makeExpressionContext(
|
|||
.inRouter(true)
|
||||
.collUUID(uuid)
|
||||
.canBeRejected(canBeRejected)
|
||||
.collationMatchesDefault(collationMatchesDefault)
|
||||
.build();
|
||||
|
||||
if (!(cri && cri->hasRoutingTable()) && collationObj.isEmpty()) {
|
||||
|
|
@ -416,8 +418,9 @@ std::unique_ptr<Pipeline> parsePipelineAndRegisterQueryStats(
|
|||
// collation, and since collectionless aggregations generally run on the 'admin'
|
||||
// database, the standard logic would attempt to resolve its non-existent UUID and
|
||||
// collation by sending a specious 'listCollections' command to the config servers.
|
||||
auto collationObj = hasChangeStream
|
||||
? request.getCollation().value_or(BSONObj())
|
||||
auto [collationObj, collationMatchesDefault] = hasChangeStream
|
||||
? std::pair(request.getCollation().value_or(BSONObj()),
|
||||
ExpressionContextCollationMatchesDefault::kYes)
|
||||
: cluster_aggregation_planner::getCollation(opCtx,
|
||||
cri,
|
||||
nsStruct.executionNss,
|
||||
|
|
@ -437,7 +440,8 @@ std::unique_ptr<Pipeline> parsePipelineAndRegisterQueryStats(
|
|||
boost::none /* uuid */,
|
||||
resolveInvolvedNamespaces(involvedNamespaces),
|
||||
hasChangeStream,
|
||||
verbosity);
|
||||
verbosity,
|
||||
collationMatchesDefault);
|
||||
|
||||
// If the routing table exists, then the collection is tracked in the router role and we can
|
||||
// validate if it is timeseries. If the collection is untracked, this validation will happen in
|
||||
|
|
|
|||
|
|
@ -962,33 +962,54 @@ Status dispatchPipelineAndMerge(OperationContext* opCtx,
|
|||
requestQueryStatsFromRemotes);
|
||||
}
|
||||
|
||||
BSONObj getCollation(OperationContext* opCtx,
|
||||
const boost::optional<CollectionRoutingInfo>& cri,
|
||||
const NamespaceString& nss,
|
||||
const BSONObj& collation,
|
||||
bool requiresCollationForParsingUnshardedAggregate) {
|
||||
// If this is a collectionless aggregation or if the user specified an explicit collation,
|
||||
// we immediately return the user-defined collation if one exists, or an empty BSONObj
|
||||
// otherwise.
|
||||
if (nss.isCollectionlessAggregateNS() || !collation.isEmpty() || !cri) {
|
||||
return collation;
|
||||
std::pair<BSONObj, ExpressionContextCollationMatchesDefault> getCollation(
|
||||
OperationContext* opCtx,
|
||||
const boost::optional<CollectionRoutingInfo>& cri,
|
||||
const NamespaceString& nss,
|
||||
const BSONObj& collation,
|
||||
bool requiresCollationForParsingUnshardedAggregate) {
|
||||
|
||||
// If this is a collectionless aggregation, we immediately return the user-defined collation if
|
||||
// one exists, or an empty BSONObj otherwise.
|
||||
if (nss.isCollectionlessAggregateNS() || !cri) {
|
||||
return {collation, ExpressionContextCollationMatchesDefault::kYes};
|
||||
}
|
||||
|
||||
// If the target collection is untracked, we will contact the primary shard to discover this
|
||||
// information if it is necessary for pipeline parsing. Otherwise, we infer the collation once
|
||||
// the command is executed on the primary shard.
|
||||
if (!cri->hasRoutingTable()) {
|
||||
return requiresCollationForParsingUnshardedAggregate
|
||||
? getUntrackedCollectionCollation(opCtx, *cri, nss)
|
||||
: BSONObj();
|
||||
if (!collation.isEmpty()) {
|
||||
return {collation, ExpressionContextCollationMatchesDefault::kNo};
|
||||
}
|
||||
if (requiresCollationForParsingUnshardedAggregate) {
|
||||
return {getUntrackedCollectionCollation(opCtx, *cri, nss),
|
||||
ExpressionContextCollationMatchesDefault::kYes};
|
||||
}
|
||||
return {BSONObj(), ExpressionContextCollationMatchesDefault::kYes};
|
||||
}
|
||||
|
||||
// Return the default collator if one exists, otherwise return the simple collation.
|
||||
// If the collection is tracked and has a collation, check if the user-defined and collection
|
||||
// collation match. Return the collection collation if the user-defined collation is empty.
|
||||
// Return the user-defined collation if the collations do not match
|
||||
if (auto defaultCollator = cri->getChunkManager().getDefaultCollator()) {
|
||||
return defaultCollator->getSpec().toBSON();
|
||||
if (collation.isEmpty()) {
|
||||
return {defaultCollator->getSpec().toBSON(),
|
||||
ExpressionContextCollationMatchesDefault::kYes};
|
||||
}
|
||||
const bool collationsMatch = CollatorInterface::collatorsMatch(
|
||||
defaultCollator, getUserCollator(opCtx, collation).get());
|
||||
return {collation,
|
||||
collationsMatch ? ExpressionContextCollationMatchesDefault::kYes
|
||||
: ExpressionContextCollationMatchesDefault::kNo};
|
||||
}
|
||||
|
||||
return CollationSpec::kSimpleSpec;
|
||||
// There is no collection collation, return the user-defined collation.
|
||||
if (!collation.isEmpty()) {
|
||||
return {collation, ExpressionContextCollationMatchesDefault::kNo};
|
||||
}
|
||||
|
||||
return {CollationSpec::kSimpleSpec, ExpressionContextCollationMatchesDefault::kYes};
|
||||
}
|
||||
|
||||
Status runPipelineOnSpecificShardOnly(const boost::intrusive_ptr<ExpressionContext>& expCtx,
|
||||
|
|
|
|||
|
|
@ -75,7 +75,8 @@ ClusterClientCursorGuard buildClusterCursor(OperationContext* opCtx,
|
|||
ClusterClientCursorParams&&);
|
||||
|
||||
/**
|
||||
* Returns the collation for aggregation targeting 'nss' with the following semantics:
|
||||
* Returns the collation and if the collation matches the collection's collation for aggregation
|
||||
* targeting 'nss' with the following semantics:
|
||||
* - Return 'collation' if the aggregation is collectionless.
|
||||
* - If 'nss' is tracked, we return 'collation' if it is non-empty. If it is empty, we return the
|
||||
* collection default collation if there is one and the simple collation otherwise.
|
||||
|
|
@ -88,11 +89,12 @@ ClusterClientCursorGuard buildClusterCursor(OperationContext* opCtx,
|
|||
* unsharded collections are tracked in the sharding catalog as unsplittable along with their
|
||||
* collation.
|
||||
*/
|
||||
BSONObj getCollation(OperationContext* opCtx,
|
||||
const boost::optional<CollectionRoutingInfo>& cri,
|
||||
const NamespaceString& nss,
|
||||
const BSONObj& collation,
|
||||
bool requiresCollationForParsingUnshardedAggregate);
|
||||
std::pair<BSONObj, ExpressionContextCollationMatchesDefault> getCollation(
|
||||
OperationContext* opCtx,
|
||||
const boost::optional<CollectionRoutingInfo>& cri,
|
||||
const NamespaceString& nss,
|
||||
const BSONObj& collation,
|
||||
bool requiresCollationForParsingUnshardedAggregate);
|
||||
|
||||
/**
|
||||
* This structure contains information for targeting an aggregation pipeline in a sharded cluster.
|
||||
|
|
|
|||
Loading…
Reference in New Issue