SERVER-95006 Remove ShardMerge code (#27812)

GitOrigin-RevId: cba7613ea436b56b5bfb24ec95ee18692ddcec2e
This commit is contained in:
Jada Lilleboe 2024-10-10 14:01:35 -04:00 committed by MongoDB Bot
parent 855dfadef0
commit 35333ed376
36 changed files with 6 additions and 9248 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 433 KiB

View File

@ -155,12 +155,6 @@ overrides:
exec_timeout: 600 # 10 hours
- task: update_timeseries_fuzzer
exec_timeout: 600 # 10 hours
- task: shard_merge_jscore_passthrough
exec_timeout: 240 # 4 hours
- task: shard_merge_causally_consistent_jscore_passthrough
exec_timeout: 240 # 4 hours
- task: shard_merge_multi_stmt_txn_jscore_passthrough
exec_timeout: 240 # 4 hours
- task: read_concern_linearizable_passthrough
exec_timeout: 270 # 4.5 hours
- task: sharding

View File

@ -718,7 +718,6 @@ env.Library(
"repl/repl_coordinator_impl",
"repl/replication_recovery",
"repl/serveronly_repl",
"repl/shard_merge_recipient_service",
"repl/storage_interface_impl",
"repl/tenant_migration_donor_service",
"repl/tenant_migration_recipient_service",

View File

@ -238,7 +238,6 @@ env.Library(
"$BUILD_DIR/mongo/db/repl/repl_coordinator_interface",
"$BUILD_DIR/mongo/db/repl/repl_server_parameters",
"$BUILD_DIR/mongo/db/repl/replica_set_messages",
"$BUILD_DIR/mongo/db/repl/shard_merge_recipient_service",
"$BUILD_DIR/mongo/db/repl/tenant_migration_donor_service",
"$BUILD_DIR/mongo/db/repl/tenant_migration_recipient_service",
"$BUILD_DIR/mongo/db/rw_concern_d",

View File

@ -96,7 +96,6 @@
#include "mongo/db/repl/repl_set_config.h"
#include "mongo/db/repl/repl_settings.h"
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/repl/shard_merge_recipient_service.h"
#include "mongo/db/repl/tenant_migration_donor_service.h"
#include "mongo/db/repl/tenant_migration_recipient_service.h"
#include "mongo/db/s/config/configsvr_coordinator_service.h"
@ -1641,12 +1640,6 @@ private:
->lookupServiceByName(
repl::TenantMigrationRecipientService::kTenantMigrationRecipientServiceName));
recipientService->abortAllMigrations(opCtx);
auto mergeRecipientService = checked_cast<repl::ShardMergeRecipientService*>(
repl::PrimaryOnlyServiceRegistry::get(opCtx->getServiceContext())
->lookupServiceByName(
repl::ShardMergeRecipientService::kShardMergeRecipientServiceName));
mergeRecipientService->abortAllMigrations(opCtx);
}
/**

View File

@ -52,7 +52,6 @@
#include "mongo/db/repl/repl_server_parameters_gen.h"
#include "mongo/db/repl/repl_settings.h"
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/repl/shard_merge_recipient_service.h"
#include "mongo/db/repl/tenant_migration_recipient_service.h"
#include "mongo/db/repl/tenant_migration_state_machine_gen.h"
#include "mongo/db/repl/tenant_migration_util.h"
@ -134,8 +133,6 @@ public:
switch (migrationProtocol) {
case MigrationProtocolEnum::kMultitenantMigrations:
return _handleMTMRecipientSyncDataCmd(opCtx, cmd);
case MigrationProtocolEnum::kShardMerge:
return _handleShardMergeRecipientSyncDataCmd(opCtx, cmd);
default:
MONGO_UNREACHABLE;
}
@ -167,29 +164,6 @@ public:
: Response(recipientInstance->waitUntilMigrationReachesConsistentState(opCtx));
}
Response _handleShardMergeRecipientSyncDataCmd(OperationContext* opCtx,
const Request& cmd) {
ShardMergeRecipientDocument stateDoc(cmd.getMigrationId(),
cmd.getDonorConnectionString().toString(),
*cmd.getTenantIds(),
cmd.getStartMigrationDonorTimestamp(),
cmd.getReadPreferenceSettings());
auto recipientService =
repl::PrimaryOnlyServiceRegistry::get(opCtx->getServiceContext())
->lookupServiceByName(
repl::ShardMergeRecipientService::kShardMergeRecipientServiceName);
auto recipientInstance = repl::ShardMergeRecipientService::Instance::getOrCreate(
opCtx, recipientService, stateDoc.toBSON());
auto returnAfterReachingDonorTs = cmd.getReturnAfterReachingDonorTimestamp();
return returnAfterReachingDonorTs
? Response(recipientInstance->waitUntilMigrationReachesReturnAfterReachingTimestamp(
opCtx, *returnAfterReachingDonorTs))
: Response(recipientInstance->waitUntilMigrationReachesConsistentState(opCtx));
}
void doCheckAuthorization(OperationContext* opCtx) const final {
uassert(ErrorCodes::Unauthorized,
"Unauthorized",
@ -258,17 +232,6 @@ public:
"Received RecipientVoteImportedFiles request",
"migrationId"_attr = cmd.getMigrationId(),
"from"_attr = cmd.getFrom());
auto recipientService =
repl::PrimaryOnlyServiceRegistry::get(opCtx->getServiceContext())
->lookupServiceByName(
repl::ShardMergeRecipientService::kShardMergeRecipientServiceName);
auto [instance, _] = repl::ShardMergeRecipientService::Instance::lookup(
opCtx, recipientService, BSON("_id" << cmd.getMigrationId()));
uassert(ErrorCodes::NoSuchTenantMigration,
str::stream() << "Could not find tenant migration with id "
<< cmd.getMigrationId(),
instance);
(*instance)->onMemberImportedFiles(cmd.getFrom());
}
private:
@ -344,8 +307,6 @@ public:
switch (migrationProtocol) {
case MigrationProtocolEnum::kMultitenantMigrations:
return _handleMTMRecipientForgetMigrationCmd(opCtx, cmd);
case MigrationProtocolEnum::kShardMerge:
return _handleShardMergeRecipientForgetMigrationCmd(opCtx, cmd);
default:
MONGO_UNREACHABLE;
}
@ -380,32 +341,6 @@ public:
recipientInstance->getForgetMigrationDurableFuture().get(opCtx);
}
void _handleShardMergeRecipientForgetMigrationCmd(OperationContext* opCtx,
const Request& cmd) {
ShardMergeRecipientDocument stateDoc(cmd.getMigrationId(),
cmd.getDonorConnectionString().toString(),
*cmd.getTenantIds(),
kUnusedStartMigrationTimestamp,
cmd.getReadPreferenceSettings());
// Set 'startGarbageCollect' true to not start a migration (and install access blocker
// or get serverless lock) unncessarily if this recipientForgetMigration command is
// received before a recipientSyncData command or after the state doc is garbage
// collected.
stateDoc.setStartGarbageCollect(true);
auto recipientService =
repl::PrimaryOnlyServiceRegistry::get(opCtx->getServiceContext())
->lookupServiceByName(
repl::ShardMergeRecipientService::kShardMergeRecipientServiceName);
auto recipientInstance = repl::ShardMergeRecipientService::Instance::getOrCreate(
opCtx, recipientService, stateDoc.toBSON(), false);
// Instruct the instance run() function to mark this migration garbage collectable.
recipientInstance->onReceiveRecipientForgetMigration(opCtx, *cmd.getDecision());
recipientInstance->getForgetMigrationDurableFuture().get(opCtx);
}
void doCheckAuthorization(OperationContext* opCtx) const final {
uassert(ErrorCodes::Unauthorized,
"Unauthorized",

View File

@ -161,8 +161,6 @@
#include "mongo/db/repl/replication_coordinator_impl_gen.h"
#include "mongo/db/repl/replication_process.h"
#include "mongo/db/repl/replication_recovery.h"
#include "mongo/db/repl/shard_merge_recipient_op_observer.h"
#include "mongo/db/repl/shard_merge_recipient_service.h"
#include "mongo/db/repl/storage_interface.h"
#include "mongo/db/repl/storage_interface_impl.h"
#include "mongo/db/repl/tenant_migration_access_blocker_registry.h"
@ -445,7 +443,6 @@ void registerPrimaryOnlyServices(ServiceContext* serviceContext) {
if (getGlobalReplSettings().isServerless()) {
services.push_back(std::make_unique<TenantMigrationDonorService>(serviceContext));
services.push_back(std::make_unique<repl::TenantMigrationRecipientService>(serviceContext));
services.push_back(std::make_unique<repl::ShardMergeRecipientService>(serviceContext));
}
if (change_stream_serverless_helpers::canInitializeServices()) {
@ -1482,8 +1479,6 @@ void setUpObservers(ServiceContext* serviceContext) {
std::make_unique<repl::TenantMigrationDonorOpObserver>());
opObserverRegistry->addObserver(
std::make_unique<repl::TenantMigrationRecipientOpObserver>());
opObserverRegistry->addObserver(
std::make_unique<repl::ShardMergeRecipientOpObserver>());
}
if (!gMultitenancySupport) {
opObserverRegistry->addObserver(
@ -1511,8 +1506,6 @@ void setUpObservers(ServiceContext* serviceContext) {
std::make_unique<repl::TenantMigrationDonorOpObserver>());
opObserverRegistry->addObserver(
std::make_unique<repl::TenantMigrationRecipientOpObserver>());
opObserverRegistry->addObserver(
std::make_unique<repl::ShardMergeRecipientOpObserver>());
}
auto replCoord = repl::ReplicationCoordinator::get(serviceContext);

View File

@ -148,9 +148,6 @@ bool NamespaceString::isLegalClientSystemNS() const {
* Process updates to 'admin.system.version' individually as well so the secondary's FCV when
* processing each operation matches the primary's when committing that operation.
*
* Process updates to 'config.shardMergeRecipients' individually so they serialize after
* inserts into 'config.donatedFiles.<migrationId>'.
*
* Oplog entries on 'config.shards' should be processed one at a time, otherwise the in-memory state
* that its kept on the TopologyTimeTicker might be wrong.
*
@ -162,7 +159,7 @@ bool NamespaceString::mustBeAppliedInOwnOplogBatch() const {
return isSystemDotViews() || isServerConfigurationCollection() || isPrivilegeCollection() ||
ns == kDonorReshardingOperationsNamespace.ns() ||
ns == kForceOplogBatchBoundaryNamespace.ns() ||
ns == kTenantMigrationDonorsNamespace.ns() || ns == kShardMergeRecipientsNamespace.ns() ||
ns == kTenantMigrationDonorsNamespace.ns() ||
ns == kTenantMigrationRecipientsNamespace.ns() || ns == kConfigsvrShardsNamespace.ns();
}

View File

@ -92,9 +92,6 @@ NSS_CONSTANT(kTenantMigrationRecipientsNamespace,
DatabaseName::kConfig,
"tenantMigrationRecipients"_sd)
// Namespace for storing the persisted state of shard merge recipient service instances.
NSS_CONSTANT(kShardMergeRecipientsNamespace, DatabaseName::kConfig, "shardMergeRecipients"_sd)
// Namespace for view on local.oplog.rs for tenant migrations.
NSS_CONSTANT(kTenantMigrationOplogView, DatabaseName::kLocal, "system.tenantMigration.oplogView"_sd)

View File

@ -14,7 +14,6 @@ env.Benchmark(
"$BUILD_DIR/mongo/db/auth/authserver",
"$BUILD_DIR/mongo/db/repl/primary_only_service",
"$BUILD_DIR/mongo/db/repl/replmocks",
"$BUILD_DIR/mongo/db/repl/shard_merge_recipient_service",
"$BUILD_DIR/mongo/db/repl/tenant_migration_donor_service",
"$BUILD_DIR/mongo/db/repl/tenant_migration_recipient_service",
"$BUILD_DIR/mongo/db/s/sharding_runtime_d",

View File

@ -44,7 +44,6 @@
#include "mongo/db/op_observer/user_write_block_mode_op_observer.h"
#include "mongo/db/repl/primary_only_service_op_observer.h"
#include "mongo/db/repl/replication_coordinator_mock.h"
#include "mongo/db/repl/shard_merge_recipient_op_observer.h"
#include "mongo/db/repl/tenant_migration_donor_op_observer.h"
#include "mongo/db/repl/tenant_migration_recipient_op_observer.h"
#include "mongo/db/s/config_server_op_observer.h"
@ -100,8 +99,6 @@ void setUpObservers(ServiceContext* serviceContext,
std::make_unique<repl::TenantMigrationDonorOpObserver>());
opObserverRegistry->addObserver(
std::make_unique<repl::TenantMigrationRecipientOpObserver>());
opObserverRegistry->addObserver(
std::make_unique<repl::ShardMergeRecipientOpObserver>());
}
if (!gMultitenancySupport) {
opObserverRegistry->addObserver(
@ -129,8 +126,6 @@ void setUpObservers(ServiceContext* serviceContext,
std::make_unique<repl::TenantMigrationDonorOpObserver>());
opObserverRegistry->addObserver(
std::make_unique<repl::TenantMigrationRecipientOpObserver>());
opObserverRegistry->addObserver(
std::make_unique<repl::ShardMergeRecipientOpObserver>());
}
if (!gMultitenancySupport) { // && replCoord && replCoord->getSettings().isReplSet()) {
opObserverRegistry->addObserver(

View File

@ -1473,7 +1473,6 @@ mongo_cc_library(
name = "tenant_migration_utils",
srcs = [
"tenant_migration_recipient_entry_helpers.cpp",
"tenant_migration_shard_merge_util.cpp",
"tenant_migration_util.cpp",
],
hdrs = [

View File

@ -555,7 +555,6 @@ env.Library(
"tenant_database_cloner.cpp",
"tenant_base_cloner.cpp",
"tenant_file_cloner.cpp",
"tenant_file_importer_service.cpp",
],
LIBDEPS=[
"base_cloner",
@ -700,52 +699,6 @@ env.Library(
],
)
env.Library(
target="shard_merge_recipient_service",
source=[
"shard_merge_recipient_op_observer.cpp",
"shard_merge_recipient_service.cpp",
],
LIBDEPS=[
"$BUILD_DIR/mongo/client/fetcher",
"$BUILD_DIR/mongo/client/read_preference",
"$BUILD_DIR/mongo/db/catalog/commit_quorum_options",
"$BUILD_DIR/mongo/db/vector_clock_mutable",
"tenant_migration_access_blocker",
"tenant_migration_statistics",
"tenant_migration_utils",
],
LIBDEPS_PRIVATE=[
"$BUILD_DIR/mongo/client/clientdriver_network",
"$BUILD_DIR/mongo/db/catalog/catalog_helpers",
"$BUILD_DIR/mongo/db/catalog/collection_crud",
"$BUILD_DIR/mongo/db/catalog/local_oplog_info",
"$BUILD_DIR/mongo/db/concurrency/exception_util",
"$BUILD_DIR/mongo/db/index_builds_coordinator_mongod",
"$BUILD_DIR/mongo/db/multitenancy",
"$BUILD_DIR/mongo/db/pipeline/process_interface/mongo_process_interface",
"$BUILD_DIR/mongo/db/query/write_ops/write_ops_exec",
"$BUILD_DIR/mongo/db/serverless/serverless_lock",
"$BUILD_DIR/mongo/db/session/session_catalog_mongod",
"$BUILD_DIR/mongo/db/storage/storage_options",
"$BUILD_DIR/mongo/db/transaction/transaction",
"cloner_utils",
"oplog",
"oplog_buffer_collection",
"oplog_entry",
"oplog_fetcher",
"oplog_interface_local",
"primary_only_service",
"repl_server_parameters",
"replica_set_aware_service",
"replication_auth",
"tenant_migration_cloners",
"tenant_migration_state_machine_idl",
"tenant_oplog_processing",
"timestamp_block",
],
)
env.Library(
target="tenant_migration_recipient_service",
source=[
@ -1027,8 +980,6 @@ if wiredtiger:
"rollback_checker_test.cpp",
"rollback_impl_test.cpp",
"scatter_gather_test.cpp",
"shard_merge_recipient_op_observer_test.cpp",
"shard_merge_recipient_service_test.cpp",
"speculative_majority_read_info_test.cpp",
"split_horizon_test.cpp",
"split_prepare_session_manager_test.cpp",
@ -1037,13 +988,11 @@ if wiredtiger:
"sync_source_resolver_test.cpp",
"task_runner_test.cpp",
"task_runner_test_fixture.cpp",
"tenant_file_importer_service_test.cpp",
"tenant_migration_access_blocker_registry_test.cpp",
"tenant_migration_access_blocker_util_test.cpp",
"tenant_migration_recipient_access_blocker_test.cpp",
"tenant_migration_recipient_entry_helpers_test.cpp",
"tenant_oplog_applier_test.cpp",
"tenant_oplog_applier_shard_merge_test.cpp",
"tenant_oplog_batcher_test.cpp",
"vote_requester_test.cpp",
"wait_for_majority_service_test.cpp",
@ -1133,7 +1082,6 @@ if wiredtiger:
"rollback_impl",
"rollback_test_fixture",
"scatter_gather",
"shard_merge_recipient_service",
"speculative_majority_read_info",
"split_horizon",
"split_prepare_session_manager",
@ -1350,7 +1298,6 @@ env.Benchmark(
"primary_only_service",
"replication_consistency_markers_impl",
"replmocks",
"shard_merge_recipient_service",
"storage_interface_impl",
"tenant_migration_donor_service",
"tenant_migration_recipient_service",

View File

@ -70,10 +70,6 @@ BSONObj ClonerUtils::buildMajorityWaitRequest(Timestamp operationTime) {
bool ClonerUtils::isDatabaseForTenant(const DatabaseName& db,
const boost::optional<TenantId>& tenant,
MigrationProtocolEnum protocol) {
if (!tenant) {
return protocol == MigrationProtocolEnum::kShardMerge;
}
if (auto tenantId = db.tenantId()) {
return tenantId == *tenant;
}

View File

@ -247,28 +247,6 @@ Status insertDocumentsForOplog(OperationContext* opCtx,
return Status::OK();
}
void assertInitialSyncCanContinueDuringShardMerge(OperationContext* opCtx,
const NamespaceString& nss,
const OplogEntry& op) {
// Running shard merge during initial sync can lead to potential data loss on this node.
// So, we perform safety check during oplog catchup and at the end of initial sync
// recovery. (see recoverShardMergeRecipientAccessBlockers() for the detailed comment about the
// problematic scenario that can cause data loss.)
if (nss == NamespaceString::kShardMergeRecipientsNamespace) {
if (auto replCoord = repl::ReplicationCoordinator::get(opCtx); replCoord &&
replCoord->getSettings().isReplSet() && replCoord->getMemberState().startup2()) {
BSONElement idField = op.getObject().getField("_id");
// If the 'o' field does not have an _id, then 'o2' should have it.
// Otherwise, the oplog entry is corrupted.
if (idField.eoo() && op.getObject2()) {
idField = op.getObject2()->getField("_id");
}
const auto& migrationId = uassertStatusOK(UUID::parse(idField));
tenant_migration_access_blocker::assertOnUnsafeInitialSync(migrationId);
}
}
}
} // namespace
ApplyImportCollectionFn applyImportCollection = applyImportCollectionDefault;
@ -1491,8 +1469,6 @@ Status applyOperation_inlock(OperationContext* opCtx,
const CollectionPtr& collection = collectionAcquisition.getCollectionPtr();
assertInitialSyncCanContinueDuringShardMerge(opCtx, requestNss, op);
BSONObj o = op.getObject();
// The feature compatibility version in the server configuration collection must not change

View File

@ -857,13 +857,6 @@ server_parameters:
redact: false
feature_flags:
featureFlagShardMerge:
description: When enabled, multitenant migration uses the "shard merge" protocol.
cpp_varname: feature_flags::gShardMerge
default: true
version: 7.1
shouldBeFCVGated: true
featureFlagSecondaryIndexChecksInDbCheck:
description: When enabled, dbCheck runs document and secondary index consistency checks in addition to replica set data consistency checks.
cpp_varname: feature_flags::gSecondaryIndexChecksInDbCheck

View File

@ -46,7 +46,6 @@
#include "mongo/db/repl/replication_consistency_markers_gen.h"
#include "mongo/db/repl/replication_consistency_markers_impl.h"
#include "mongo/db/repl/replication_coordinator_mock.h"
#include "mongo/db/repl/shard_merge_recipient_op_observer.h"
#include "mongo/db/repl/storage_interface.h"
#include "mongo/db/repl/storage_interface_impl.h"
#include "mongo/db/repl/storage_interface_mock.h"
@ -170,8 +169,6 @@ void setUpObservers(ServiceContext* serviceContext, ClusterRole clusterRole, boo
std::make_unique<repl::TenantMigrationDonorOpObserver>());
opObserverRegistry->addObserver(
std::make_unique<repl::TenantMigrationRecipientOpObserver>());
opObserverRegistry->addObserver(
std::make_unique<repl::ShardMergeRecipientOpObserver>());
}
if (!gMultitenancySupport) {
opObserverRegistry->addObserver(
@ -199,8 +196,6 @@ void setUpObservers(ServiceContext* serviceContext, ClusterRole clusterRole, boo
std::make_unique<repl::TenantMigrationDonorOpObserver>());
opObserverRegistry->addObserver(
std::make_unique<repl::TenantMigrationRecipientOpObserver>());
opObserverRegistry->addObserver(
std::make_unique<repl::ShardMergeRecipientOpObserver>());
}
if (!gMultitenancySupport) {
opObserverRegistry->addObserver(

View File

@ -1,559 +0,0 @@
/**
* Copyright (C) 2023-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#include "mongo/db/repl/shard_merge_recipient_op_observer.h"
#include <algorithm>
#include <fmt/format.h>
#include <iterator>
#include <memory>
#include <string>
#include <absl/container/node_hash_set.h>
#include <boost/filesystem/operations.hpp>
#include <boost/filesystem/path.hpp>
#include <boost/move/utility_core.hpp>
#include <boost/optional/optional.hpp>
// IWYU pragma: no_include "boost/system/detail/error_code.hpp"
#include "mongo/base/error_codes.h"
#include "mongo/base/string_data.h"
#include "mongo/bson/bsonelement.h"
#include "mongo/db/catalog/collection_catalog.h"
#include "mongo/db/catalog/database.h"
#include "mongo/db/catalog/database_holder.h"
#include "mongo/db/catalog_raii.h"
#include "mongo/db/concurrency/exception_util.h"
#include "mongo/db/concurrency/lock_manager_defs.h"
#include "mongo/db/database_name.h"
#include "mongo/db/db_raii.h"
#include "mongo/db/index_builds_coordinator.h"
#include "mongo/db/multitenancy_gen.h"
#include "mongo/db/repl/tenant_file_importer_service.h"
#include "mongo/db/repl/tenant_migration_access_blocker.h"
#include "mongo/db/repl/tenant_migration_access_blocker_registry.h"
#include "mongo/db/repl/tenant_migration_access_blocker_util.h"
#include "mongo/db/repl/tenant_migration_decoration.h"
#include "mongo/db/repl/tenant_migration_recipient_access_blocker.h"
#include "mongo/db/repl/tenant_migration_shard_merge_util.h"
#include "mongo/db/repl/tenant_migration_state_machine_gen.h"
#include "mongo/db/repl/tenant_migration_util.h"
#include "mongo/db/repl/timestamp_block.h"
#include "mongo/db/serverless/serverless_operation_lock_registry.h"
#include "mongo/db/service_context.h"
#include "mongo/db/storage/kv/kv_engine.h"
#include "mongo/db/storage/recovery_unit.h"
#include "mongo/db/storage/storage_engine.h"
#include "mongo/db/tenant_id.h"
#include "mongo/db/transaction_resources.h"
#include "mongo/idl/idl_parser.h"
#include "mongo/logv2/log.h"
#include "mongo/logv2/log_attr.h"
#include "mongo/logv2/log_component.h"
#include "mongo/stdx/unordered_set.h"
#include "mongo/util/assert_util.h"
#include "mongo/util/decorable.h"
#include "mongo/util/str.h"
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kReplication
namespace mongo::repl {
using namespace fmt;
using namespace shard_merge_utils;
namespace {
bool markedGCAfterMigrationStart(const ShardMergeRecipientDocument& doc) {
return !doc.getStartGarbageCollect() && doc.getExpireAt();
}
template <typename Func>
void runOnAlternateClient(const std::string& name, Func func) {
auto parentClientUnkillableByStepDown = [&] {
return !cc().canKillSystemOperationInStepdown(WithLock::withoutLock());
}();
auto client = getGlobalServiceContext()->getService(ClusterRole::ShardServer)->makeClient(name);
AlternativeClientRegion acr(client);
if (parentClientUnkillableByStepDown) {
stdx::lock_guard<Client> lk(cc());
cc().setSystemOperationUnkillableByStepdown(lk);
}
auto opCtx = cc().makeOperationContext();
func(opCtx.get());
}
/**
* Note: Refer to deleteTenantDataWhenMergeAborts() comment for the AlternativeClientRegion
* requirement.
*/
void dropTempFilesAndCollsIfAny(OperationContext* opCtx, const UUID& migrationId) {
// Drop the import done marker collection.
runOnAlternateClient("dropShardMergeMarkerColl", [&migrationId](OperationContext* acrOpCtx) {
dropImportDoneMarkerLocalCollection(acrOpCtx, migrationId);
});
const auto tempWTDirectory = fileClonerTempDir(migrationId);
// Do an early exit if the temp dir is not present.
if (!boost::filesystem::exists(tempWTDirectory))
return;
// Remove idents unknown to both storage and mdb_catalog.
bool filesRemoved = false;
const auto movingIdents = readMovingFilesMarker(tempWTDirectory);
for (const auto& ident : movingIdents) {
// It's impossible for files to be known by mdb_catalog but not storage. Files known to
// storage but not mdb_catalog could occur if node restarts during import. However, startup
// recovery removes such files. Therefore, we only need to handle files unknown to both
// mdb_catalog and storage. Thus, verifying the file(ident) existence in storage is
// sufficent.
bool identKnown =
getGlobalServiceContext()->getStorageEngine()->getEngine()->hasIdent(opCtx, ident);
if (!identKnown) {
filesRemoved = true;
removeFile(constructDestinationPath(ident));
}
}
if (filesRemoved)
fsyncDataDirectory();
// Remove the temp directory.
fsyncRemoveDirectory(tempWTDirectory);
}
/**
* Note: Though opObserver drops tenant collections only after the importer service stops importing
* the collection, a collection might be imported after opObserver's storage txn has started(i.e,
* import collection storage txnId > opObserver storage txnId), causing the collection to be
* invisible to the opObserver. To ensure visibility of all imported collections to the opObserver,
* drop the tenant collection in AlternativeClientRegion.
*/
void deleteTenantDataWhenMergeAborts(const ShardMergeRecipientDocument& doc) {
runOnAlternateClient("dropShardMergeDonorTenantColls", [&doc](OperationContext* opCtx) {
auto storageEngine = opCtx->getServiceContext()->getStorageEngine();
invariant(doc.getAbortOpTime());
const auto dropOpTime = *doc.getAbortOpTime();
TimestampBlock tsBlock(opCtx, dropOpTime.getTimestamp());
UnreplicatedWritesBlock writeBlock{opCtx};
writeConflictRetry(opCtx, "dropShardMergeDonorTenantColls", NamespaceString::kEmpty, [&] {
WriteUnitOfWork wuow(opCtx);
for (const auto& tenantId : doc.getTenantIds()) {
std::vector<DatabaseName> databases;
if (gMultitenancySupport) {
databases = storageEngine->listDatabases(tenantId);
} else {
auto allDatabases = storageEngine->listDatabases();
std::copy_if(allDatabases.begin(),
allDatabases.end(),
std::back_inserter(databases),
[tenant = tenantId.toString() + "_"](const DatabaseName& db) {
// In non multitenacy environment, check if the db has a
// matched tenant prefix.
return StringData{
DatabaseNameUtil::serialize(
db, SerializationContext::stateDefault())}
.startsWith(tenant);
});
}
for (const auto& database : databases) {
AutoGetDb autoDb{opCtx, database, MODE_X};
Database* db = autoDb.getDb();
if (!db) {
continue;
}
LOGV2(7221802,
"Dropping tenant database for shard merge garbage collection",
"tenant"_attr = tenantId,
"database"_attr = database,
"migrationId"_attr = doc.getId(),
"abortOpTime"_attr = dropOpTime);
IndexBuildsCoordinator::get(opCtx)->assertNoBgOpInProgForDb(db->name());
auto catalog = CollectionCatalog::get(opCtx);
for (auto&& collection : catalog->range(db->name())) {
if (!collection) {
break;
}
uassertStatusOK(
db->dropCollectionEvenIfSystem(opCtx, collection->ns(), dropOpTime));
}
auto databaseHolder = DatabaseHolder::get(opCtx);
databaseHolder->close(opCtx, db->name());
}
}
wuow.commit();
});
});
}
void onShardMergeRecipientsNssInsert(OperationContext* opCtx,
std::vector<InsertStatement>::const_iterator first,
std::vector<InsertStatement>::const_iterator last) {
if (tenant_migration_access_blocker::inRecoveryMode(opCtx))
return;
for (auto it = first; it != last; it++) {
auto recipientStateDoc =
ShardMergeRecipientDocument::parse(IDLParserContext("recipientStateDoc"), it->doc);
switch (recipientStateDoc.getState()) {
case ShardMergeRecipientStateEnum::kStarted: {
invariant(!recipientStateDoc.getStartGarbageCollect());
const auto migrationId = recipientStateDoc.getId();
ServerlessOperationLockRegistry::get(opCtx->getServiceContext())
.acquireLock(ServerlessOperationLockRegistry::LockType::kMergeRecipient,
migrationId);
shard_role_details::getRecoveryUnit(opCtx)->onRollback(
[migrationId](OperationContext* opCtx) {
ServerlessOperationLockRegistry::get(opCtx->getServiceContext())
.releaseLock(ServerlessOperationLockRegistry::LockType::kMergeRecipient,
migrationId);
});
auto& registry =
TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext());
for (const auto& tenantId : recipientStateDoc.getTenantIds()) {
registry.add(tenantId,
std::make_shared<TenantMigrationRecipientAccessBlocker>(
opCtx->getServiceContext(), migrationId));
}
shard_role_details::getRecoveryUnit(opCtx)->onRollback(
[migrationId](OperationContext* opCtx) {
TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext())
.removeAccessBlockersForMigration(
migrationId, TenantMigrationAccessBlocker::BlockerType::kRecipient);
});
const auto& startAtOpTimeOptional = recipientStateDoc.getStartAtOpTime();
invariant(startAtOpTimeOptional);
shard_role_details::getRecoveryUnit(opCtx)->onCommit(
[migrationId, startAtOpTime = *startAtOpTimeOptional](OperationContext* opCtx,
auto _) {
repl::TenantFileImporterService::get(opCtx)->startMigration(migrationId,
startAtOpTime);
});
} break;
case ShardMergeRecipientStateEnum::kCommitted:
case ShardMergeRecipientStateEnum::kAborted:
invariant(recipientStateDoc.getStartGarbageCollect());
break;
default:
MONGO_UNREACHABLE;
}
}
}
void onDonatedFilesCollNssInsert(OperationContext* opCtx,
std::vector<InsertStatement>::const_iterator first,
std::vector<InsertStatement>::const_iterator last) {
if (tenant_migration_access_blocker::inRecoveryMode(opCtx))
return;
for (auto it = first; it != last; it++) {
const auto& metadataDoc = it->doc;
const auto migrationId = uassertStatusOK(UUID::parse(metadataDoc[kMigrationIdFieldName]));
repl::TenantFileImporterService::get(opCtx)->learnedFilename(migrationId, metadataDoc);
}
}
void assertStateTransitionIsValid(ShardMergeRecipientStateEnum prevState,
ShardMergeRecipientStateEnum nextState) {
auto validPrevStates = [&]() -> stdx::unordered_set<ShardMergeRecipientStateEnum> {
switch (nextState) {
case ShardMergeRecipientStateEnum::kStarted:
return {ShardMergeRecipientStateEnum::kStarted};
case ShardMergeRecipientStateEnum::kLearnedFilenames:
return {ShardMergeRecipientStateEnum::kStarted,
ShardMergeRecipientStateEnum::kLearnedFilenames};
case ShardMergeRecipientStateEnum::kConsistent:
return {ShardMergeRecipientStateEnum::kLearnedFilenames,
ShardMergeRecipientStateEnum::kConsistent};
case ShardMergeRecipientStateEnum::kCommitted:
return {ShardMergeRecipientStateEnum::kConsistent,
ShardMergeRecipientStateEnum::kCommitted};
case ShardMergeRecipientStateEnum::kAborted:
return {ShardMergeRecipientStateEnum::kStarted,
ShardMergeRecipientStateEnum::kLearnedFilenames,
ShardMergeRecipientStateEnum::kConsistent,
ShardMergeRecipientStateEnum::kAborted};
default:
MONGO_UNREACHABLE;
}
}();
uassert(7339766, "Invalid state transition", validPrevStates.contains(prevState));
}
void onTransitioningToLearnedFilenames(OperationContext* opCtx,
const ShardMergeRecipientDocument& recipientStateDoc) {
shard_role_details::getRecoveryUnit(opCtx)->onCommit(
[migrationId = recipientStateDoc.getId()](OperationContext* opCtx, auto _) {
repl::TenantFileImporterService::get(opCtx)->learnedAllFilenames(migrationId);
});
}
void onTransitioningToConsistent(OperationContext* opCtx,
const ShardMergeRecipientDocument& recipientStateDoc) {
assertImportDoneMarkerLocalCollExistsOnMergeConsistent(opCtx, recipientStateDoc.getId());
if (recipientStateDoc.getRejectReadsBeforeTimestamp()) {
shard_role_details::getRecoveryUnit(opCtx)->onCommit(
[recipientStateDoc](OperationContext* opCtx, auto _) {
auto mtabVector =
TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext())
.getRecipientAccessBlockersForMigration(recipientStateDoc.getId());
invariant(!mtabVector.empty());
for (auto& mtab : mtabVector) {
invariant(mtab);
mtab->startRejectingReadsBefore(
recipientStateDoc.getRejectReadsBeforeTimestamp().get());
}
});
}
}
void onTransitioningToCommitted(OperationContext* opCtx,
const ShardMergeRecipientDocument& recipientStateDoc) {
auto migrationId = recipientStateDoc.getId();
// It's safe to do interrupt outside of onCommit hook as the decision to forget a migration or
// the migration decision is not reversible.
repl::TenantFileImporterService::get(opCtx)->interruptMigration(migrationId);
if (markedGCAfterMigrationStart(recipientStateDoc)) {
shard_role_details::getRecoveryUnit(opCtx)->onCommit([migrationId](OperationContext* opCtx,
auto _) {
auto mtabVector = TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext())
.getRecipientAccessBlockersForMigration(migrationId);
invariant(!mtabVector.empty());
for (auto& mtab : mtabVector) {
invariant(mtab);
// Once the migration is committed and state doc is marked garbage collectable,
// the TTL deletions should be unblocked for the imported donor collections.
mtab->stopBlockingTTL();
}
ServerlessOperationLockRegistry::get(opCtx->getServiceContext())
.releaseLock(ServerlessOperationLockRegistry::LockType::kMergeRecipient,
migrationId);
});
repl::TenantFileImporterService::get(opCtx)->resetMigration(migrationId);
dropTempFilesAndCollsIfAny(opCtx, migrationId);
}
}
void onTransitioningToAborted(OperationContext* opCtx,
const ShardMergeRecipientDocument& recipientStateDoc) {
auto migrationId = recipientStateDoc.getId();
if (!markedGCAfterMigrationStart(recipientStateDoc)) {
// It's safe to do interrupt outside of onCommit hook as the decision to forget a migration
// or the migration decision is not reversible.
repl::TenantFileImporterService::get(opCtx)->interruptMigration(migrationId);
const auto& importCompletedFuture =
repl::TenantFileImporterService::get(opCtx)->getImportCompletedFuture(migrationId);
// Wait for the importer service to stop collection import task before dropping imported
// collections.
if (importCompletedFuture) {
LOGV2(7458507, "Waiting for the importer service to finish importing task");
importCompletedFuture->wait(opCtx);
}
deleteTenantDataWhenMergeAborts(recipientStateDoc);
} else {
shard_role_details::getRecoveryUnit(opCtx)->onCommit(
[migrationId](OperationContext* opCtx, auto _) {
// Remove access blocker and release locks to allow faster migration retry.
// (Note: Not needed to unblock TTL deletions as we would have already dropped all
// imported donor collections immediately on transitioning to `kAborted`).
TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext())
.removeAccessBlockersForMigration(
migrationId, TenantMigrationAccessBlocker::BlockerType::kRecipient);
ServerlessOperationLockRegistry::get(opCtx->getServiceContext())
.releaseLock(ServerlessOperationLockRegistry::LockType::kMergeRecipient,
migrationId);
});
repl::TenantFileImporterService::get(opCtx)->resetMigration(migrationId);
dropTempFilesAndCollsIfAny(opCtx, migrationId);
}
}
void handleUpdateRecoveryMode(OperationContext* opCtx,
const ShardMergeRecipientDocument& recipientStateDoc) {
// Note that we expect this path not running during initial sync(inconsistent data), as we
// intentionally crash the server upon detecting the state document oplog entry for replay.
const auto migrationId = recipientStateDoc.getId();
auto replCoord = repl::ReplicationCoordinator::get(opCtx);
invariant(!(replCoord->getSettings().isReplSet() &&
repl::TenantFileImporterService::get(opCtx)->hasActiveMigration(migrationId)));
if (markedGCAfterMigrationStart(recipientStateDoc)) {
dropTempFilesAndCollsIfAny(opCtx, migrationId);
} else if (recipientStateDoc.getState() == ShardMergeRecipientStateEnum::kAborted) {
deleteTenantDataWhenMergeAborts(recipientStateDoc);
}
}
} // namespace
void ShardMergeRecipientOpObserver::onInserts(OperationContext* opCtx,
const CollectionPtr& coll,
std::vector<InsertStatement>::const_iterator first,
std::vector<InsertStatement>::const_iterator last,
const std::vector<RecordId>& recordIds,
std::vector<bool> fromMigrate,
bool defaultFromMigrate,
OpStateAccumulator* opAccumulator) {
if (coll->ns() == NamespaceString::kShardMergeRecipientsNamespace) {
onShardMergeRecipientsNssInsert(opCtx, first, last);
return;
}
if (isDonatedFilesCollection(coll->ns())) {
onDonatedFilesCollNssInsert(opCtx, first, last);
return;
}
}
void ShardMergeRecipientOpObserver::onUpdate(OperationContext* opCtx,
const OplogUpdateEntryArgs& args,
OpStateAccumulator* opAccumulator) {
if (args.coll->ns() != NamespaceString::kShardMergeRecipientsNamespace) {
return;
}
auto recipientStateDoc = ShardMergeRecipientDocument::parse(
IDLParserContext("recipientStateDoc"), args.updateArgs->updatedDoc);
if (tenant_migration_access_blocker::inRecoveryMode(opCtx)) {
handleUpdateRecoveryMode(opCtx, recipientStateDoc);
return;
}
auto nextState = recipientStateDoc.getState();
auto prevState = ShardMergeRecipientState_parse(
IDLParserContext("preImageRecipientStateDoc"),
args.updateArgs->preImageDoc[ShardMergeRecipientDocument::kStateFieldName]
.valueStringData());
assertStateTransitionIsValid(prevState, nextState);
switch (nextState) {
case ShardMergeRecipientStateEnum::kStarted:
break;
case ShardMergeRecipientStateEnum::kLearnedFilenames:
onTransitioningToLearnedFilenames(opCtx, recipientStateDoc);
break;
case ShardMergeRecipientStateEnum::kConsistent:
onTransitioningToConsistent(opCtx, recipientStateDoc);
break;
case ShardMergeRecipientStateEnum::kCommitted:
onTransitioningToCommitted(opCtx, recipientStateDoc);
break;
case ShardMergeRecipientStateEnum::kAborted:
onTransitioningToAborted(opCtx, recipientStateDoc);
break;
default:
MONGO_UNREACHABLE;
}
}
void ShardMergeRecipientOpObserver::onDelete(OperationContext* opCtx,
const CollectionPtr& coll,
StmtId stmtId,
const BSONObj& doc,
const DocumentKey& documentKey,
const OplogDeleteEntryArgs& args,
OpStateAccumulator* opAccumulator) {
if (coll->ns() != NamespaceString::kShardMergeRecipientsNamespace ||
tenant_migration_access_blocker::inRecoveryMode(opCtx)) {
return;
}
auto recipientStateDoc =
ShardMergeRecipientDocument::parse(IDLParserContext("recipientStateDoc"), doc);
bool isDocMarkedGarbageCollectable = [&] {
auto state = recipientStateDoc.getState();
auto expireAtIsSet = recipientStateDoc.getExpireAt().has_value();
invariant(!expireAtIsSet || state == ShardMergeRecipientStateEnum::kCommitted ||
state == ShardMergeRecipientStateEnum::kAborted);
return expireAtIsSet;
}();
uassert(ErrorCodes::IllegalOperation,
str::stream() << "Cannot delete the recipient state document "
<< " since it has not been marked as garbage collectable: "
<< tenant_migration_util::redactStateDoc(recipientStateDoc.toBSON()),
isDocMarkedGarbageCollectable);
TenantMigrationInfo migrationInfo(recipientStateDoc.getId());
shard_role_details::getRecoveryUnit(opCtx)->onCommit([migrationId = migrationInfo.uuid](
OperationContext* opCtx, auto _) {
LOGV2_INFO(
7339765, "Removing expired recipient access blocker", "migrationId"_attr = migrationId);
TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext())
.removeAccessBlockersForMigration(
migrationId, TenantMigrationAccessBlocker::BlockerType::kRecipient);
});
}
repl::OpTime ShardMergeRecipientOpObserver::onDropCollection(OperationContext* opCtx,
const NamespaceString& collectionName,
const UUID& uuid,
std::uint64_t numRecords,
const CollectionDropType dropType,
bool markFromMigrate) {
if (collectionName == NamespaceString::kShardMergeRecipientsNamespace &&
!tenant_migration_access_blocker::inRecoveryMode(opCtx)) {
uassert(
ErrorCodes::IllegalOperation,
str::stream() << "Cannot drop "
<< NamespaceString::kShardMergeRecipientsNamespace.toStringForErrorMsg()
<< " collection as it is not empty",
!numRecords);
}
return OpTime();
}
} // namespace mongo::repl

View File

@ -1,93 +0,0 @@
/**
* Copyright (C) 2023-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#pragma once
#include <cstdint>
#include <vector>
#include "mongo/bson/bsonobj.h"
#include "mongo/db/catalog/collection.h"
#include "mongo/db/catalog/collection_options.h"
#include "mongo/db/namespace_string.h"
#include "mongo/db/op_observer/op_observer.h"
#include "mongo/db/op_observer/op_observer_noop.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/repl/oplog.h"
#include "mongo/db/repl/optime.h"
#include "mongo/db/session/logical_session_id.h"
#include "mongo/util/uuid.h"
namespace mongo::repl {
/**
* OpObserver for shard merge recipient.
*/
class ShardMergeRecipientOpObserver final : public OpObserverNoop {
ShardMergeRecipientOpObserver(const ShardMergeRecipientOpObserver&) = delete;
ShardMergeRecipientOpObserver& operator=(const ShardMergeRecipientOpObserver&) = delete;
public:
ShardMergeRecipientOpObserver() = default;
~ShardMergeRecipientOpObserver() override = default;
NamespaceFilters getNamespaceFilters() const final {
return {NamespaceFilter::kConfig, NamespaceFilter::kConfig};
}
void onInserts(OperationContext* opCtx,
const CollectionPtr& coll,
std::vector<InsertStatement>::const_iterator first,
std::vector<InsertStatement>::const_iterator last,
const std::vector<RecordId>& recordIds,
std::vector<bool> fromMigrate,
bool defaultFromMigrate,
OpStateAccumulator* opAccumulator = nullptr) final;
void onUpdate(OperationContext* opCtx,
const OplogUpdateEntryArgs& args,
OpStateAccumulator* opAccumulator = nullptr) final;
void onDelete(OperationContext* opCtx,
const CollectionPtr& coll,
StmtId stmtId,
const BSONObj& doc,
const DocumentKey& documentKey,
const OplogDeleteEntryArgs& args,
OpStateAccumulator* opAccumulator = nullptr) final;
repl::OpTime onDropCollection(OperationContext* opCtx,
const NamespaceString& collectionName,
const UUID& uuid,
std::uint64_t numRecords,
CollectionDropType dropType,
bool markFromMigrate) final;
};
} // namespace mongo::repl

View File

@ -1,835 +0,0 @@
/**
* Copyright (C) 2023-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#include <boost/filesystem/fstream.hpp>
#include <memory>
#include <string>
#include <utility>
#include "mongo/base/string_data.h"
#include "mongo/bson/oid.h"
#include "mongo/bson/timestamp.h"
#include "mongo/client/read_preference.h"
#include "mongo/db/catalog/create_collection.h"
#include "mongo/db/catalog_raii.h"
#include "mongo/db/commands/create_gen.h"
#include "mongo/db/concurrency/lock_manager_defs.h"
#include "mongo/db/repl/member_state.h"
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/repl/replication_coordinator_mock.h"
#include "mongo/db/repl/shard_merge_recipient_op_observer.h"
#include "mongo/db/repl/storage_interface.h"
#include "mongo/db/repl/storage_interface_impl.h"
#include "mongo/db/repl/tenant_migration_access_blocker_registry.h"
#include "mongo/db/repl/tenant_migration_shard_merge_util.h"
#include "mongo/db/repl/tenant_migration_state_machine_gen.h"
#include "mongo/db/serverless/serverless_operation_lock_registry.h"
#include "mongo/db/service_context.h"
#include "mongo/db/service_context_d_test_fixture.h"
#include "mongo/db/storage/durable_catalog.h"
#include "mongo/db/storage/write_unit_of_work.h"
#include "mongo/db/tenant_id.h"
#include "mongo/unittest/assert.h"
#include "mongo/unittest/death_test.h"
#include "mongo/unittest/framework.h"
#include "mongo/unittest/log_test.h"
#include "mongo/util/decorable.h"
#include "mongo/util/str.h"
namespace mongo::repl {
using namespace shard_merge_utils;
namespace {
const Timestamp kDefaultStartMigrationTimestamp(1, 1);
static const std::string kDefaultDonorConnStr = "donor-rs/localhost:12345";
static const std::string kDefaultRecipientConnStr = "recipient-rs/localhost:56789";
static const UUID kMigrationId = UUID::gen();
} // namespace
class ShardMergeRecipientOpObserverTest : public ServiceContextMongoDTest {
public:
static bool collectionExists(OperationContext* opCtx, const NamespaceString& nss) {
return static_cast<bool>(AutoGetCollectionForRead(opCtx, nss).getCollection());
}
void setUp() override {
ServiceContextMongoDTest::setUp();
auto serviceContext = getServiceContext();
// Need real (non-mock) storage for testing dropping marker collection.
StorageInterface::set(serviceContext, std::make_unique<StorageInterfaceImpl>());
auto replCoord = std::make_unique<repl::ReplicationCoordinatorMock>(serviceContext);
ASSERT_OK(replCoord->setFollowerMode(repl::MemberState::RS_PRIMARY));
repl::ReplicationCoordinator::set(serviceContext, std::move(replCoord));
_opCtx = makeOperationContext();
TenantMigrationAccessBlockerRegistry::get(getServiceContext()).startup();
repl::createOplog(opCtx());
ASSERT_OK(createCollection(opCtx(),
CreateCommand(NamespaceString::kShardMergeRecipientsNamespace)));
}
void tearDown() override {
TenantMigrationAccessBlockerRegistry::get(getServiceContext()).shutDown();
}
OperationContext* opCtx() const {
return _opCtx.get();
}
protected:
void performUpdates(const BSONObj& UpdatedDoc, const BSONObj& preImageDoc) {
AutoGetCollection collection(
opCtx(), NamespaceString::kShardMergeRecipientsNamespace, MODE_IX);
if (!collection)
FAIL(str::stream()
<< "Collection "
<< NamespaceString::kShardMergeRecipientsNamespace.toStringForErrorMsg()
<< " doesn't exist");
CollectionUpdateArgs updateArgs{preImageDoc};
updateArgs.updatedDoc = UpdatedDoc;
OplogUpdateEntryArgs update(&updateArgs, *collection);
WriteUnitOfWork wuow(opCtx());
_observer.onUpdate(opCtx(), update);
wuow.commit();
}
int64_t countLogLinesWithId(int32_t id) {
return countBSONFormatLogLinesIsSubset(BSON("id" << id));
}
std::vector<TenantId> _tenantIds{TenantId{OID::gen()}, TenantId{OID::gen()}};
private:
unittest::MinimumLoggedSeverityGuard _tenantMigrationSeverityGuard{
logv2::LogComponent::kTenantMigration, logv2::LogSeverity::Debug(1)};
ShardMergeRecipientOpObserver _observer;
ServiceContext::UniqueOperationContext _opCtx;
};
TEST_F(ShardMergeRecipientOpObserverTest, TransitionToConsistentWithImportDoneMarkerCollection) {
ShardMergeRecipientDocument recipientDoc(kMigrationId,
kDefaultDonorConnStr,
_tenantIds,
kDefaultStartMigrationTimestamp,
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
recipientDoc.setState(ShardMergeRecipientStateEnum::kLearnedFilenames);
auto preImageDoc = recipientDoc.toBSON();
recipientDoc.setState(ShardMergeRecipientStateEnum::kConsistent);
auto updatedDoc = recipientDoc.toBSON();
// Create the import done marker collection.
ASSERT_OK(createCollection(
opCtx(), CreateCommand(shard_merge_utils::getImportDoneMarkerNs(kMigrationId))));
performUpdates(updatedDoc, preImageDoc);
}
DEATH_TEST_REGEX_F(ShardMergeRecipientOpObserverTest,
TransitionToConsistentWithoutImportDoneMarkerCollection,
"Fatal assertion.*7219902") {
ShardMergeRecipientDocument recipientDoc(kMigrationId,
kDefaultDonorConnStr,
_tenantIds,
kDefaultStartMigrationTimestamp,
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
recipientDoc.setState(ShardMergeRecipientStateEnum::kLearnedFilenames);
auto preImageDoc = recipientDoc.toBSON();
recipientDoc.setState(ShardMergeRecipientStateEnum::kConsistent);
auto updatedDoc = recipientDoc.toBSON();
performUpdates(updatedDoc, preImageDoc);
}
TEST_F(ShardMergeRecipientOpObserverTest, TransitionToAbortedDropsImportedCollection) {
ShardMergeRecipientDocument recipientDoc(kMigrationId,
kDefaultDonorConnStr,
_tenantIds,
kDefaultStartMigrationTimestamp,
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
recipientDoc.setState(ShardMergeRecipientStateEnum::kConsistent);
auto preImageDoc = recipientDoc.toBSON();
recipientDoc.setState(ShardMergeRecipientStateEnum::kAborted);
recipientDoc.setAbortOpTime(OpTime(Timestamp::max(), 1));
auto updatedDoc = recipientDoc.toBSON();
const NamespaceString importedDonorCollNss1 =
NamespaceString::createNamespaceString_forTest(_tenantIds[0].toString() + "_test.coll1");
ASSERT_OK(createCollection(opCtx(), CreateCommand(importedDonorCollNss1)));
const NamespaceString importedDonorCollNss2 =
NamespaceString::createNamespaceString_forTest(_tenantIds[1].toString() + "_test.coll2");
ASSERT_OK(createCollection(opCtx(), CreateCommand(importedDonorCollNss2)));
ASSERT(collectionExists(opCtx(), importedDonorCollNss1));
ASSERT(collectionExists(opCtx(), importedDonorCollNss2));
performUpdates(updatedDoc, preImageDoc);
ASSERT(!collectionExists(opCtx(), importedDonorCollNss1));
ASSERT(!collectionExists(opCtx(), importedDonorCollNss2));
}
TEST_F(ShardMergeRecipientOpObserverTest, TransitionToCommmittedShouldNotDropImportedCollection) {
ShardMergeRecipientDocument recipientDoc(kMigrationId,
kDefaultDonorConnStr,
_tenantIds,
kDefaultStartMigrationTimestamp,
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
recipientDoc.setState(ShardMergeRecipientStateEnum::kConsistent);
auto preImageDoc = recipientDoc.toBSON();
recipientDoc.setState(ShardMergeRecipientStateEnum::kCommitted);
auto updatedDoc = recipientDoc.toBSON();
const NamespaceString importedDonorCollNss1 =
NamespaceString::createNamespaceString_forTest(_tenantIds[0].toString() + "_test.coll1");
ASSERT_OK(createCollection(opCtx(), CreateCommand(importedDonorCollNss1)));
const NamespaceString importedDonorCollNss2 =
NamespaceString::createNamespaceString_forTest(_tenantIds[1].toString() + "_test.coll2");
ASSERT_OK(createCollection(opCtx(), CreateCommand(importedDonorCollNss2)));
ASSERT(collectionExists(opCtx(), importedDonorCollNss1));
ASSERT(collectionExists(opCtx(), importedDonorCollNss2));
performUpdates(updatedDoc, preImageDoc);
ASSERT(collectionExists(opCtx(), importedDonorCollNss1));
ASSERT(collectionExists(opCtx(), importedDonorCollNss2));
}
TEST_F(ShardMergeRecipientOpObserverTest,
TransitionToAbortedGarbageCollectableShouldDropTempFilesAndMarkerCollection) {
ShardMergeRecipientDocument recipientDoc(kMigrationId,
kDefaultDonorConnStr,
_tenantIds,
kDefaultStartMigrationTimestamp,
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
ServerlessOperationLockRegistry::get(opCtx()->getServiceContext())
.acquireLock(ServerlessOperationLockRegistry::LockType::kMergeRecipient, kMigrationId);
recipientDoc.setState(ShardMergeRecipientStateEnum::kLearnedFilenames);
auto preImageDoc = recipientDoc.toBSON();
recipientDoc.setState(ShardMergeRecipientStateEnum::kAborted);
recipientDoc.setAbortOpTime(OpTime(Timestamp::max(), 1));
recipientDoc.setExpireAt(opCtx()->getServiceContext()->getFastClockSource()->now());
auto updatedDoc = recipientDoc.toBSON();
auto knownIdentListBeforeGC = DurableCatalog::get(opCtx())->getAllIdents(opCtx());
// Create idents unknown to storage.
const auto unknownIdent1 = "collection-70--88888";
const auto unknownIdentPath1 = constructDestinationPath(unknownIdent1);
boost::filesystem::ofstream unknownIdent1Writer(unknownIdentPath1);
unknownIdent1Writer << "Dummy stream1 \n";
unknownIdent1Writer.close();
const auto unknownIdent2 = "index-71--88888";
const auto unknownIdentPath2 = constructDestinationPath(unknownIdent2);
boost::filesystem::ofstream unknownIdent2Writer(unknownIdentPath2);
unknownIdent2Writer << "Dummy stream2 \n";
unknownIdent2Writer.close();
const auto fileClonerTempDirPath = fileClonerTempDir(kMigrationId);
ASSERT_TRUE(boost::filesystem::create_directory(fileClonerTempDirPath));
writeMovingFilesMarker(fileClonerTempDirPath, unknownIdent1, true);
writeMovingFilesMarker(fileClonerTempDirPath, unknownIdent2, false);
// GC shouldn't remove these known idents.
for (const auto& ident : knownIdentListBeforeGC) {
writeMovingFilesMarker(fileClonerTempDirPath, ident, false);
}
// Create the marker collection.
createImportDoneMarkerLocalCollection(opCtx(), kMigrationId);
// Verify that temp files and the marker collection exist before GC.
ASSERT(collectionExists(opCtx(), getImportDoneMarkerNs(kMigrationId)));
ASSERT(boost::filesystem::exists(unknownIdentPath1));
ASSERT(boost::filesystem::exists(unknownIdentPath2));
ASSERT(boost::filesystem::exists(fileClonerTempDirPath));
startCapturingLogMessages();
performUpdates(updatedDoc, preImageDoc);
stopCapturingLogMessages();
// Verify that temp files and the marker collection are deleted after GC.
ASSERT(!collectionExists(opCtx(), getImportDoneMarkerNs(kMigrationId)));
ASSERT(!boost::filesystem::exists(unknownIdentPath1));
ASSERT(!boost::filesystem::exists(unknownIdentPath2));
ASSERT(!boost::filesystem::exists(fileClonerTempDirPath));
ASSERT_EQUALS(2, countLogLinesWithId(7458501));
ASSERT_EQUALS(1, countLogLinesWithId(7458503));
// Verify that GC didn't remove any known idents.
const auto knownIdentListAfterGC = DurableCatalog::get(opCtx())->getAllIdents(opCtx());
ASSERT(knownIdentListBeforeGC == knownIdentListAfterGC);
}
TEST_F(ShardMergeRecipientOpObserverTest,
TransitionToCommittedGarbageCollectableShouldDropTempFilesAndMarkerCollection) {
ShardMergeRecipientDocument recipientDoc(kMigrationId,
kDefaultDonorConnStr,
_tenantIds,
kDefaultStartMigrationTimestamp,
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
auto& registry = TenantMigrationAccessBlockerRegistry::get(getGlobalServiceContext());
for (const auto& tenantId : _tenantIds) {
registry.add(tenantId,
std::make_shared<TenantMigrationRecipientAccessBlocker>(
opCtx()->getServiceContext(), kMigrationId));
}
ServerlessOperationLockRegistry::get(opCtx()->getServiceContext())
.acquireLock(ServerlessOperationLockRegistry::LockType::kMergeRecipient, kMigrationId);
recipientDoc.setState(ShardMergeRecipientStateEnum::kConsistent);
auto preImageDoc = recipientDoc.toBSON();
recipientDoc.setState(ShardMergeRecipientStateEnum::kCommitted);
recipientDoc.setExpireAt(opCtx()->getServiceContext()->getFastClockSource()->now());
auto updatedDoc = recipientDoc.toBSON();
auto knownIdentListBeforeGC = DurableCatalog::get(opCtx())->getAllIdents(opCtx());
// Create idents unknown to storage.
const auto unknownIdent1 = "collection-70--88888";
const auto unknownIdentPath1 = constructDestinationPath(unknownIdent1);
boost::filesystem::ofstream unknownIdent1Writer(unknownIdentPath1);
unknownIdent1Writer << "Dummy stream1 \n";
unknownIdent1Writer.close();
const auto unknownIdent2 = "index-71--88888";
const auto unknownIdentPath2 = constructDestinationPath(unknownIdent2);
boost::filesystem::ofstream unknownIdent2Writer(unknownIdentPath2);
unknownIdent2Writer << "Dummy stream2 \n";
unknownIdent2Writer.close();
const auto fileClonerTempDirPath = fileClonerTempDir(kMigrationId);
ASSERT_TRUE(boost::filesystem::create_directory(fileClonerTempDirPath));
writeMovingFilesMarker(fileClonerTempDirPath, unknownIdent1, true);
writeMovingFilesMarker(fileClonerTempDirPath, unknownIdent2, false);
// GC shouldn't remove these known idents.
for (const auto& ident : knownIdentListBeforeGC) {
writeMovingFilesMarker(fileClonerTempDirPath, ident, false);
}
// Create the marker collection.
createImportDoneMarkerLocalCollection(opCtx(), kMigrationId);
// Verify that temp files and the marker collection exist before GC.
ASSERT(collectionExists(opCtx(), getImportDoneMarkerNs(kMigrationId)));
ASSERT(boost::filesystem::exists(unknownIdentPath1));
ASSERT(boost::filesystem::exists(unknownIdentPath2));
ASSERT(boost::filesystem::exists(fileClonerTempDirPath));
startCapturingLogMessages();
performUpdates(updatedDoc, preImageDoc);
stopCapturingLogMessages();
// Verify that temp files and the marker collection are deleted after GC.
ASSERT(!collectionExists(opCtx(), getImportDoneMarkerNs(kMigrationId)));
ASSERT(!boost::filesystem::exists(unknownIdentPath1));
ASSERT(!boost::filesystem::exists(unknownIdentPath2));
ASSERT(!boost::filesystem::exists(fileClonerTempDirPath));
ASSERT_EQUALS(2, countLogLinesWithId(7458501));
ASSERT_EQUALS(1, countLogLinesWithId(7458503));
// Verify that GC didn't remove any known idents.
const auto knownIdentListAfterGC = DurableCatalog::get(opCtx())->getAllIdents(opCtx());
ASSERT(knownIdentListBeforeGC == knownIdentListAfterGC);
}
TEST_F(ShardMergeRecipientOpObserverTest,
TransitionToAbortedDropsImportedCollectionInStartupRecovery) {
ShardMergeRecipientDocument recipientDoc(kMigrationId,
kDefaultDonorConnStr,
_tenantIds,
kDefaultStartMigrationTimestamp,
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
recipientDoc.setState(ShardMergeRecipientStateEnum::kConsistent);
auto preImageDoc = recipientDoc.toBSON();
recipientDoc.setState(ShardMergeRecipientStateEnum::kAborted);
recipientDoc.setAbortOpTime(OpTime(Timestamp::max(), 1));
auto updatedDoc = recipientDoc.toBSON();
const NamespaceString importedDonorCollNss1 =
NamespaceString::createNamespaceString_forTest(_tenantIds[0].toString() + "_test.coll1");
ASSERT_OK(createCollection(opCtx(), CreateCommand(importedDonorCollNss1)));
const NamespaceString importedDonorCollNss2 =
NamespaceString::createNamespaceString_forTest(_tenantIds[1].toString() + "_test.coll2");
ASSERT_OK(createCollection(opCtx(), CreateCommand(importedDonorCollNss2)));
ASSERT(collectionExists(opCtx(), importedDonorCollNss1));
ASSERT(collectionExists(opCtx(), importedDonorCollNss2));
// Simulate the node is in startup repl state.
ASSERT_OK(
repl::ReplicationCoordinator::get(opCtx())->setFollowerMode(repl::MemberState::RS_STARTUP));
performUpdates(updatedDoc, preImageDoc);
ASSERT(!collectionExists(opCtx(), importedDonorCollNss1));
ASSERT(!collectionExists(opCtx(), importedDonorCollNss2));
}
TEST_F(ShardMergeRecipientOpObserverTest,
TransitionToCommmittedShouldNotDropImportedCollectionInStartupRecovery) {
ShardMergeRecipientDocument recipientDoc(kMigrationId,
kDefaultDonorConnStr,
_tenantIds,
kDefaultStartMigrationTimestamp,
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
recipientDoc.setState(ShardMergeRecipientStateEnum::kConsistent);
auto preImageDoc = recipientDoc.toBSON();
recipientDoc.setState(ShardMergeRecipientStateEnum::kCommitted);
auto updatedDoc = recipientDoc.toBSON();
const NamespaceString importedDonorCollNss1 =
NamespaceString::createNamespaceString_forTest(_tenantIds[0].toString() + "_test.coll1");
ASSERT_OK(createCollection(opCtx(), CreateCommand(importedDonorCollNss1)));
const NamespaceString importedDonorCollNss2 =
NamespaceString::createNamespaceString_forTest(_tenantIds[1].toString() + "_test.coll2");
ASSERT_OK(createCollection(opCtx(), CreateCommand(importedDonorCollNss2)));
ASSERT(collectionExists(opCtx(), importedDonorCollNss1));
ASSERT(collectionExists(opCtx(), importedDonorCollNss2));
// Simulate the node is in startup repl state.
ASSERT_OK(
repl::ReplicationCoordinator::get(opCtx())->setFollowerMode(repl::MemberState::RS_STARTUP));
performUpdates(updatedDoc, preImageDoc);
ASSERT(collectionExists(opCtx(), importedDonorCollNss1));
ASSERT(collectionExists(opCtx(), importedDonorCollNss2));
}
TEST_F(
ShardMergeRecipientOpObserverTest,
TransitionToAbortedGarbageCollectableShouldDropTempFilesAndMarkerCollectionInStartupRecovery) {
ShardMergeRecipientDocument recipientDoc(kMigrationId,
kDefaultDonorConnStr,
_tenantIds,
kDefaultStartMigrationTimestamp,
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
recipientDoc.setState(ShardMergeRecipientStateEnum::kLearnedFilenames);
auto preImageDoc = recipientDoc.toBSON();
recipientDoc.setState(ShardMergeRecipientStateEnum::kAborted);
recipientDoc.setAbortOpTime(OpTime(Timestamp::max(), 1));
recipientDoc.setExpireAt(opCtx()->getServiceContext()->getFastClockSource()->now());
auto updatedDoc = recipientDoc.toBSON();
auto knownIdentListBeforeGC = DurableCatalog::get(opCtx())->getAllIdents(opCtx());
// Create idents unknown to storage.
const auto unknownIdent1 = "collection-70--88888";
const auto unknownIdentPath1 = constructDestinationPath(unknownIdent1);
boost::filesystem::ofstream unknownIdent1Writer(unknownIdentPath1);
unknownIdent1Writer << "Dummy stream1 \n";
unknownIdent1Writer.close();
const auto unknownIdent2 = "index-71--88888";
const auto unknownIdentPath2 = constructDestinationPath(unknownIdent2);
boost::filesystem::ofstream unknownIdent2Writer(unknownIdentPath2);
unknownIdent2Writer << "Dummy stream2 \n";
unknownIdent2Writer.close();
const auto fileClonerTempDirPath = fileClonerTempDir(kMigrationId);
ASSERT_TRUE(boost::filesystem::create_directory(fileClonerTempDirPath));
writeMovingFilesMarker(fileClonerTempDirPath, unknownIdent1, true);
writeMovingFilesMarker(fileClonerTempDirPath, unknownIdent2, false);
// GC shouldn't remove these known idents.
for (const auto& ident : knownIdentListBeforeGC) {
writeMovingFilesMarker(fileClonerTempDirPath, ident, false);
}
// Create the marker collection.
createImportDoneMarkerLocalCollection(opCtx(), kMigrationId);
// Verify that temp files and the marker collection exist before GC.
ASSERT(collectionExists(opCtx(), getImportDoneMarkerNs(kMigrationId)));
ASSERT(boost::filesystem::exists(unknownIdentPath1));
ASSERT(boost::filesystem::exists(unknownIdentPath2));
ASSERT(boost::filesystem::exists(fileClonerTempDirPath));
startCapturingLogMessages();
// Simulate the node is in startup repl state.
ASSERT_OK(
repl::ReplicationCoordinator::get(opCtx())->setFollowerMode(repl::MemberState::RS_STARTUP));
performUpdates(updatedDoc, preImageDoc);
stopCapturingLogMessages();
// Verify that temp files and the marker collection are deleted after GC.
ASSERT(!collectionExists(opCtx(), getImportDoneMarkerNs(kMigrationId)));
ASSERT(!boost::filesystem::exists(unknownIdentPath1));
ASSERT(!boost::filesystem::exists(unknownIdentPath2));
ASSERT(!boost::filesystem::exists(fileClonerTempDirPath));
ASSERT_EQUALS(2, countLogLinesWithId(7458501));
ASSERT_EQUALS(1, countLogLinesWithId(7458503));
// Verify that GC didn't remove any known idents.
const auto knownIdentListAfterGC = DurableCatalog::get(opCtx())->getAllIdents(opCtx());
ASSERT(knownIdentListBeforeGC == knownIdentListAfterGC);
}
TEST_F(
ShardMergeRecipientOpObserverTest,
TransitionToCommittedGarbageCollectableShouldDropTempFilesAndMarkerCollectionInStartupRecovery) {
ShardMergeRecipientDocument recipientDoc(kMigrationId,
kDefaultDonorConnStr,
_tenantIds,
kDefaultStartMigrationTimestamp,
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
recipientDoc.setState(ShardMergeRecipientStateEnum::kConsistent);
auto preImageDoc = recipientDoc.toBSON();
recipientDoc.setState(ShardMergeRecipientStateEnum::kCommitted);
recipientDoc.setExpireAt(opCtx()->getServiceContext()->getFastClockSource()->now());
auto updatedDoc = recipientDoc.toBSON();
auto knownIdentListBeforeGC = DurableCatalog::get(opCtx())->getAllIdents(opCtx());
// Create idents unknown to storage.
const auto unknownIdent1 = "collection-70--88888";
const auto unknownIdentPath1 = constructDestinationPath(unknownIdent1);
boost::filesystem::ofstream unknownIdent1Writer(unknownIdentPath1);
unknownIdent1Writer << "Dummy stream1 \n";
unknownIdent1Writer.close();
const auto unknownIdent2 = "index-71--88888";
const auto unknownIdentPath2 = constructDestinationPath(unknownIdent2);
boost::filesystem::ofstream unknownIdent2Writer(unknownIdentPath2);
unknownIdent2Writer << "Dummy stream2 \n";
unknownIdent2Writer.close();
const auto fileClonerTempDirPath = fileClonerTempDir(kMigrationId);
ASSERT_TRUE(boost::filesystem::create_directory(fileClonerTempDirPath));
writeMovingFilesMarker(fileClonerTempDirPath, unknownIdent1, true);
writeMovingFilesMarker(fileClonerTempDirPath, unknownIdent2, false);
// GC shouldn't remove these known idents.
for (const auto& ident : knownIdentListBeforeGC) {
writeMovingFilesMarker(fileClonerTempDirPath, ident, false);
}
// Create the marker collection.
createImportDoneMarkerLocalCollection(opCtx(), kMigrationId);
// Verify that temp files and the marker collection exist before GC.
ASSERT(collectionExists(opCtx(), getImportDoneMarkerNs(kMigrationId)));
ASSERT(boost::filesystem::exists(unknownIdentPath1));
ASSERT(boost::filesystem::exists(unknownIdentPath2));
ASSERT(boost::filesystem::exists(fileClonerTempDirPath));
startCapturingLogMessages();
// Simulate the node is in startup repl state.
ASSERT_OK(
repl::ReplicationCoordinator::get(opCtx())->setFollowerMode(repl::MemberState::RS_STARTUP));
performUpdates(updatedDoc, preImageDoc);
stopCapturingLogMessages();
// Verify that temp files and the marker collection are deleted after GC.
ASSERT(!collectionExists(opCtx(), getImportDoneMarkerNs(kMigrationId)));
ASSERT(!boost::filesystem::exists(unknownIdentPath1));
ASSERT(!boost::filesystem::exists(unknownIdentPath2));
ASSERT(!boost::filesystem::exists(fileClonerTempDirPath));
ASSERT_EQUALS(2, countLogLinesWithId(7458501));
ASSERT_EQUALS(1, countLogLinesWithId(7458503));
// Verify that GC didn't remove any known idents.
const auto knownIdentListAfterGC = DurableCatalog::get(opCtx())->getAllIdents(opCtx());
ASSERT(knownIdentListBeforeGC == knownIdentListAfterGC);
}
TEST_F(ShardMergeRecipientOpObserverTest,
TransitionToAbortedDropsImportedCollectionInRollbackRecovery) {
ShardMergeRecipientDocument recipientDoc(kMigrationId,
kDefaultDonorConnStr,
_tenantIds,
kDefaultStartMigrationTimestamp,
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
recipientDoc.setState(ShardMergeRecipientStateEnum::kConsistent);
auto preImageDoc = recipientDoc.toBSON();
recipientDoc.setState(ShardMergeRecipientStateEnum::kAborted);
recipientDoc.setAbortOpTime(OpTime(Timestamp::max(), 1));
auto updatedDoc = recipientDoc.toBSON();
const NamespaceString importedDonorCollNss1 =
NamespaceString::createNamespaceString_forTest(_tenantIds[0].toString() + "_test.coll1");
ASSERT_OK(createCollection(opCtx(), CreateCommand(importedDonorCollNss1)));
const NamespaceString importedDonorCollNss2 =
NamespaceString::createNamespaceString_forTest(_tenantIds[1].toString() + "_test.coll2");
ASSERT_OK(createCollection(opCtx(), CreateCommand(importedDonorCollNss2)));
ASSERT(collectionExists(opCtx(), importedDonorCollNss1));
ASSERT(collectionExists(opCtx(), importedDonorCollNss2));
// Simulate the node is in rollback repl state.
ASSERT_OK(repl::ReplicationCoordinator::get(opCtx())->setFollowerMode(
repl::MemberState::RS_ROLLBACK));
performUpdates(updatedDoc, preImageDoc);
ASSERT(!collectionExists(opCtx(), importedDonorCollNss1));
ASSERT(!collectionExists(opCtx(), importedDonorCollNss2));
}
TEST_F(ShardMergeRecipientOpObserverTest,
TransitionToCommmittedShouldNotDropImportedCollectionInRollbackRecovery) {
ShardMergeRecipientDocument recipientDoc(kMigrationId,
kDefaultDonorConnStr,
_tenantIds,
kDefaultStartMigrationTimestamp,
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
recipientDoc.setState(ShardMergeRecipientStateEnum::kConsistent);
auto preImageDoc = recipientDoc.toBSON();
recipientDoc.setState(ShardMergeRecipientStateEnum::kCommitted);
auto updatedDoc = recipientDoc.toBSON();
const NamespaceString importedDonorCollNss1 =
NamespaceString::createNamespaceString_forTest(_tenantIds[0].toString() + "_test.coll1");
ASSERT_OK(createCollection(opCtx(), CreateCommand(importedDonorCollNss1)));
const NamespaceString importedDonorCollNss2 =
NamespaceString::createNamespaceString_forTest(_tenantIds[1].toString() + "_test.coll2");
ASSERT_OK(createCollection(opCtx(), CreateCommand(importedDonorCollNss2)));
ASSERT(collectionExists(opCtx(), importedDonorCollNss1));
ASSERT(collectionExists(opCtx(), importedDonorCollNss2));
// Simulate the node is in rollback repl state.
ASSERT_OK(repl::ReplicationCoordinator::get(opCtx())->setFollowerMode(
repl::MemberState::RS_ROLLBACK));
performUpdates(updatedDoc, preImageDoc);
ASSERT(collectionExists(opCtx(), importedDonorCollNss1));
ASSERT(collectionExists(opCtx(), importedDonorCollNss2));
}
TEST_F(
ShardMergeRecipientOpObserverTest,
TransitionToAbortedGarbageCollectableShouldDropTempFilesAndMarkerCollectionInRollbackRecovery) {
ShardMergeRecipientDocument recipientDoc(kMigrationId,
kDefaultDonorConnStr,
_tenantIds,
kDefaultStartMigrationTimestamp,
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
recipientDoc.setState(ShardMergeRecipientStateEnum::kLearnedFilenames);
auto preImageDoc = recipientDoc.toBSON();
recipientDoc.setState(ShardMergeRecipientStateEnum::kAborted);
recipientDoc.setAbortOpTime(OpTime(Timestamp::max(), 1));
recipientDoc.setExpireAt(opCtx()->getServiceContext()->getFastClockSource()->now());
auto updatedDoc = recipientDoc.toBSON();
auto knownIdentListBeforeGC = DurableCatalog::get(opCtx())->getAllIdents(opCtx());
// Create idents unknown to storage.
const auto unknownIdent1 = "collection-70--88888";
const auto unknownIdentPath1 = constructDestinationPath(unknownIdent1);
boost::filesystem::ofstream unknownIdent1Writer(unknownIdentPath1);
unknownIdent1Writer << "Dummy stream1 \n";
unknownIdent1Writer.close();
const auto unknownIdent2 = "index-71--88888";
const auto unknownIdentPath2 = constructDestinationPath(unknownIdent2);
boost::filesystem::ofstream unknownIdent2Writer(unknownIdentPath2);
unknownIdent2Writer << "Dummy stream2 \n";
unknownIdent2Writer.close();
const auto fileClonerTempDirPath = fileClonerTempDir(kMigrationId);
ASSERT_TRUE(boost::filesystem::create_directory(fileClonerTempDirPath));
writeMovingFilesMarker(fileClonerTempDirPath, unknownIdent1, true);
writeMovingFilesMarker(fileClonerTempDirPath, unknownIdent2, false);
// GC shouldn't remove these known idents.
for (const auto& ident : knownIdentListBeforeGC) {
writeMovingFilesMarker(fileClonerTempDirPath, ident, false);
}
// Create the marker collection.
createImportDoneMarkerLocalCollection(opCtx(), kMigrationId);
// Verify that temp files and the marker collection exist before GC.
ASSERT(collectionExists(opCtx(), getImportDoneMarkerNs(kMigrationId)));
ASSERT(boost::filesystem::exists(unknownIdentPath1));
ASSERT(boost::filesystem::exists(unknownIdentPath2));
ASSERT(boost::filesystem::exists(fileClonerTempDirPath));
startCapturingLogMessages();
// Simulate the node is in rollback repl state.
ASSERT_OK(repl::ReplicationCoordinator::get(opCtx())->setFollowerMode(
repl::MemberState::RS_ROLLBACK));
performUpdates(updatedDoc, preImageDoc);
stopCapturingLogMessages();
// Verify that temp files and the marker collection are deleted after GC.
ASSERT(!collectionExists(opCtx(), getImportDoneMarkerNs(kMigrationId)));
ASSERT(!boost::filesystem::exists(unknownIdentPath1));
ASSERT(!boost::filesystem::exists(unknownIdentPath2));
ASSERT(!boost::filesystem::exists(fileClonerTempDirPath));
ASSERT_EQUALS(2, countLogLinesWithId(7458501));
ASSERT_EQUALS(1, countLogLinesWithId(7458503));
// Verify that GC didn't remove any known idents.
const auto knownIdentListAfterGC = DurableCatalog::get(opCtx())->getAllIdents(opCtx());
ASSERT(knownIdentListBeforeGC == knownIdentListAfterGC);
}
TEST_F(
ShardMergeRecipientOpObserverTest,
TransitionToCommittedGarbageCollectableShouldDropTempFilesAndMarkerCollectionInRollbackRecovery) {
ShardMergeRecipientDocument recipientDoc(kMigrationId,
kDefaultDonorConnStr,
_tenantIds,
kDefaultStartMigrationTimestamp,
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
recipientDoc.setState(ShardMergeRecipientStateEnum::kConsistent);
auto preImageDoc = recipientDoc.toBSON();
recipientDoc.setState(ShardMergeRecipientStateEnum::kCommitted);
recipientDoc.setExpireAt(opCtx()->getServiceContext()->getFastClockSource()->now());
auto updatedDoc = recipientDoc.toBSON();
auto knownIdentListBeforeGC = DurableCatalog::get(opCtx())->getAllIdents(opCtx());
// Create idents unknown to storage.
const auto unknownIdent1 = "collection-70--88888";
const auto unknownIdentPath1 = constructDestinationPath(unknownIdent1);
boost::filesystem::ofstream unknownIdent1Writer(unknownIdentPath1);
unknownIdent1Writer << "Dummy stream1 \n";
unknownIdent1Writer.close();
const auto unknownIdent2 = "index-71--88888";
const auto unknownIdentPath2 = constructDestinationPath(unknownIdent2);
boost::filesystem::ofstream unknownIdent2Writer(unknownIdentPath2);
unknownIdent2Writer << "Dummy stream2 \n";
unknownIdent2Writer.close();
const auto fileClonerTempDirPath = fileClonerTempDir(kMigrationId);
ASSERT_TRUE(boost::filesystem::create_directory(fileClonerTempDirPath));
writeMovingFilesMarker(fileClonerTempDirPath, unknownIdent1, true);
writeMovingFilesMarker(fileClonerTempDirPath, unknownIdent2, false);
// GC shouldn't remove these known idents.
for (const auto& ident : knownIdentListBeforeGC) {
writeMovingFilesMarker(fileClonerTempDirPath, ident, false);
}
// Create the marker collection.
createImportDoneMarkerLocalCollection(opCtx(), kMigrationId);
// Verify that temp files and the marker collection exist before GC.
ASSERT(collectionExists(opCtx(), getImportDoneMarkerNs(kMigrationId)));
ASSERT(boost::filesystem::exists(unknownIdentPath1));
ASSERT(boost::filesystem::exists(unknownIdentPath2));
ASSERT(boost::filesystem::exists(fileClonerTempDirPath));
startCapturingLogMessages();
// Simulate the node is in rollback repl state.
ASSERT_OK(repl::ReplicationCoordinator::get(opCtx())->setFollowerMode(
repl::MemberState::RS_ROLLBACK));
performUpdates(updatedDoc, preImageDoc);
stopCapturingLogMessages();
// Verify that temp files and the marker collection are deleted after GC.
ASSERT(!collectionExists(opCtx(), getImportDoneMarkerNs(kMigrationId)));
ASSERT(!boost::filesystem::exists(unknownIdentPath1));
ASSERT(!boost::filesystem::exists(unknownIdentPath2));
ASSERT(!boost::filesystem::exists(fileClonerTempDirPath));
ASSERT_EQUALS(2, countLogLinesWithId(7458501));
ASSERT_EQUALS(1, countLogLinesWithId(7458503));
// Verify that GC didn't remove any known idents.
const auto knownIdentListAfterGC = DurableCatalog::get(opCtx())->getAllIdents(opCtx());
ASSERT(knownIdentListBeforeGC == knownIdentListAfterGC);
}
} // namespace mongo::repl

File diff suppressed because it is too large Load Diff

View File

@ -1,639 +0,0 @@
/**
* Copyright (C) 2023-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#pragma once
#include <boost/move/utility_core.hpp>
#include <boost/none.hpp>
#include <boost/optional.hpp>
#include <boost/optional/optional.hpp>
#include <functional>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "mongo/base/status.h"
#include "mongo/base/status_with.h"
#include "mongo/base/string_data.h"
#include "mongo/bson/bsonobj.h"
#include "mongo/bson/timestamp.h"
#include "mongo/client/dbclient_connection.h"
#include "mongo/client/dbclient_cursor.h"
#include "mongo/client/fetcher.h"
#include "mongo/client/mongo_uri.h"
#include "mongo/client/read_preference.h"
#include "mongo/db/commands/tenant_migration_donor_cmds_gen.h"
#include "mongo/db/namespace_string.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/pipeline/aggregate_command_gen.h"
#include "mongo/db/pipeline/process_interface/mongo_process_interface.h"
#include "mongo/db/repl/data_replicator_external_state.h"
#include "mongo/db/repl/oplog_buffer_collection.h"
#include "mongo/db/repl/oplog_fetcher.h"
#include "mongo/db/repl/optime.h"
#include "mongo/db/repl/primary_only_service.h"
#include "mongo/db/repl/tenant_migration_shared_data.h"
#include "mongo/db/repl/tenant_migration_state_machine_gen.h"
#include "mongo/db/repl/tenant_oplog_applier.h"
#include "mongo/db/serverless/serverless_types_gen.h"
#include "mongo/db/service_context.h"
#include "mongo/db/tenant_id.h"
#include "mongo/executor/scoped_task_executor.h"
#include "mongo/executor/task_executor.h"
#include "mongo/rpc/metadata/repl_set_metadata.h"
#include "mongo/stdx/condition_variable.h"
#include "mongo/stdx/mutex.h"
#include "mongo/stdx/unordered_set.h"
#include "mongo/util/cancellation.h"
#include "mongo/util/concurrency/thread_pool.h"
#include "mongo/util/concurrency/with_lock.h"
#include "mongo/util/fail_point.h"
#include "mongo/util/future.h"
#include "mongo/util/future_impl.h"
#include "mongo/util/net/hostandport.h"
#include "mongo/util/net/ssl_options.h"
#include "mongo/util/time_support.h"
#include "mongo/util/uuid.h"
namespace mongo {
class DBClientConnection;
class OperationContext;
class ReplicaSetMonitor;
class ServiceContext;
namespace repl {
class OplogBufferCollection;
/**
* ShardMergeRecipientService is a primary only service which orchestrates the
* data migration on the recipient side for shard merge protocol.
*/
class ShardMergeRecipientService final : public PrimaryOnlyService {
// Disallows copying.
ShardMergeRecipientService(const ShardMergeRecipientService&) = delete;
ShardMergeRecipientService& operator=(const ShardMergeRecipientService&) = delete;
public:
static constexpr StringData kShardMergeRecipientServiceName = "ShardMergeRecipientService"_sd;
explicit ShardMergeRecipientService(ServiceContext* serviceContext);
~ShardMergeRecipientService() override = default;
StringData getServiceName() const final;
NamespaceString getStateDocumentsNS() const final;
ThreadPool::Limits getThreadPoolLimits() const final;
void checkIfConflictsWithOtherInstances(
OperationContext* opCtx,
BSONObj initialStateDoc,
const std::vector<const PrimaryOnlyService::Instance*>& existingInstances) final;
std::shared_ptr<PrimaryOnlyService::Instance> constructInstance(BSONObj initialStateDoc) final;
/**
* Interrupts all shard merge recipient service instances.
*/
void abortAllMigrations(OperationContext* opCtx);
class Instance final : public PrimaryOnlyService::TypedInstance<Instance> {
public:
explicit Instance(ServiceContext* serviceContext,
const ShardMergeRecipientService* recipientService,
BSONObj stateDoc);
SemiFuture<void> run(std::shared_ptr<executor::ScopedTaskExecutor> executor,
const CancellationToken& token) noexcept final;
/**
* Unconditional migration interrupt called on node's stepdown/shutdown event.
* Make the instance to not wait for `recipientForgetMigration` command.
*/
void interrupt(Status status) override;
/**
* Conditional migration interrupt called on fcv change or due to oplog fetcher error.
* Make the instance to wait for `recipientForgetMigration` command.
*/
void interruptConditionally(Status status);
/**
* Interrupts the migration for garbage collection.
*/
void onReceiveRecipientForgetMigration(OperationContext* opCtx,
const MigrationDecisionEnum& decision);
/**
* Returns a Future that will be resolved when migration is completed.
*/
SharedSemiFuture<void> getMigrationCompletionFuture() const {
return _migrationCompletionPromise.getFuture();
}
/**
* Returns a Future that will be resolved when the instance has been durably marked garbage
* collectable.
*/
SharedSemiFuture<void> getForgetMigrationDurableFuture() const {
return _forgetMigrationDurablePromise.getFuture();
}
/**
* Returns the instance id.
*/
const UUID& getMigrationUUID() const;
/**
* Returns the instance state document.
*/
ShardMergeRecipientDocument getStateDoc() const;
boost::optional<BSONObj> reportForCurrentOp(
MongoProcessInterface::CurrentOpConnectionsMode connMode,
MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept final;
void checkIfOptionsConflict(const BSONObj& stateDoc) const final;
/**
* Blocks the thread until the migration reaches consistent state in an interruptible
* mode.
*
* Returns the donor OpTime at which the migration reached consistent state. Throws
* exception on error.
*/
OpTime waitUntilMigrationReachesConsistentState(OperationContext* opCtx) const;
/**
* Blocks the thread until the tenant oplog applier applied data past the
* 'returnAfterReachingTimestamp' in an interruptible mode. If the recipient's logical clock
* has not yet reached the 'returnAfterReachingTimestamp', advances the recipient's logical
* clock to 'returnAfterReachingTimestamp'. Finally, stores the
* 'returnAfterReachingTimestamp' as 'rejectReadsBeforeTimestamp' in the state
* document and waits for the write to be replicated to every node (i.e. wait for
* 'rejectReadsBeforeTimestamp' to be set on the TenantMigrationRecipientAccessBlocker of
* every node) to guarantee that no reads will be incorrectly accepted.
*/
OpTime waitUntilMigrationReachesReturnAfterReachingTimestamp(
OperationContext* opCtx, const Timestamp& returnAfterReachingTimestamp);
/**
* Called when a replica set member (self, or a secondary) finishes importing donated files.
*/
void onMemberImportedFiles(const HostAndPort& host);
/**
* Set the oplog creator functor, to allow use of a mock oplog fetcher.
*/
void setCreateOplogFetcherFn_forTest(
std::unique_ptr<OplogFetcherFactory>&& createOplogFetcherFn) {
_createOplogFetcherFn = std::move(createOplogFetcherFn);
}
/**
* Stops the oplog applier without going through recipientForgetMigration.
*/
void stopOplogApplier_forTest() {
stdx::lock_guard lk(_mutex);
_tenantOplogApplier->shutdown();
}
/**
* Suppresses selecting 'host' as the donor sync source, until 'until'.
*/
void excludeDonorHost_forTest(const HostAndPort& host, Date_t until) {
stdx::lock_guard lk(_mutex);
_excludeDonorHost(lk, host, until);
}
const auto& getExcludedDonorHosts_forTest() {
return _excludedDonorHosts;
}
private:
friend class ShardMergeRecipientServiceTest;
/**
* Only used for testing. Allows setting a custom task executor for backup cursor fetcher.
*/
void setBackupCursorFetcherExecutor_forTest(
std::shared_ptr<executor::TaskExecutor> taskExecutor) {
_backupCursorExecutor = std::move(taskExecutor);
}
const NamespaceString _stateDocumentsNS = NamespaceString::kShardMergeRecipientsNamespace;
using ConnectionPair =
std::pair<std::unique_ptr<DBClientConnection>, std::unique_ptr<DBClientConnection>>;
/**
* Transitions the instance state to 'kStarted' if the state is uninitialized.
*/
SemiFuture<void> _initializeAndDurablyPersistStateDoc();
/**
* Execute steps which are necessary to start a migration, such as, establishing donor
* client connection, setting up internal state, get donor cluster keys, etc.
*/
SemiFuture<void> _prepareForMigration(const CancellationToken& token);
/**
* Sets up internal state to begin migration.
*/
void _setup(ConnectionPair connectionPair);
/**
* Start migration only if the following FCV checks passes:
* a) Not in middle of FCV upgrading/downgrading.
* b) Donor and recipient FCV matches.
*/
SemiFuture<void> _startMigrationIfSafeToRunwithCurrentFCV(const CancellationToken& token);
/**
* Helper to run FCV sanity checks at the start of migration.
*/
void _assertIfMigrationIsSafeToRunWithCurrentFcv();
/**
* Waits for all data bearing nodes to complete import.
*/
SemiFuture<void> _waitForAllNodesToFinishImport();
/**
* Tells whether the migration is committed or aborted.
*/
bool _isCommitOrAbortState(WithLock) const;
/**
* Waits for recipientForgetMigartion command for migration decision and then, mark external
* keys doc and instance state doc as garbage collectable.
*/
SemiFuture<void> _waitForForgetMigrationThenMarkMigrationGarbageCollectable(
const CancellationToken& token);
/**
* Durably persists the migration decision in the state doc.
*/
SemiFuture<void> _durablyPersistCommitAbortDecision(MigrationDecisionEnum decision);
/*
* Drops ephemeral collections used for migrations after migration decision is durably
* persisted.
*/
void _dropTempCollections();
/**
* Sets the `expireAt` field at the state doc.
*/
SemiFuture<void> _markStateDocAsGarbageCollectable();
/**
* Deletes the state document. Does not return the opTime for the delete, since it's not
* necessary to wait for this delete to be majority committed (this is one of the last steps
* in the chain, and if the delete rolls back, the new primary will re-do the delete).
*/
SemiFuture<void> _removeStateDoc(const CancellationToken& token);
SemiFuture<void> _waitForGarbageCollectionDelayThenDeleteStateDoc(
const CancellationToken& token);
/**
* Creates a client, connects it to the donor and uses the default
* authentication mode (KeyFile Authentication). Throws a user assertion on failure.
*/
std::unique_ptr<DBClientConnection> _connectAndAuth(const HostAndPort& serverAddress,
StringData applicationName);
/**
* Creates and connects both the oplog fetcher client and the client used for other
* operations.
*/
SemiFuture<ConnectionPair> _createAndConnectClients();
/**
* Fetches all key documents from the donor's admin.system.keys collection, stores them in
* config.external_validation_keys, and refreshes the keys cache.
*/
void _fetchAndStoreDonorClusterTimeKeyDocs(const CancellationToken& token);
/**
* Opens a backup cursor on the donor primary and fetches the
* list of donor files to be cloned.
*/
SemiFuture<void> _openBackupCursor(const CancellationToken& token);
SemiFuture<void> _openBackupCursorWithRetry(const CancellationToken& token);
/**
* Keeps the donor backup cursor alive.
*/
void _keepBackupCursorAlive(const CancellationToken& token);
/**
* Kills the backup cursor opened on donor, if any.
*
*/
void _killBackupCursor();
/**
* Gets the backup cursor metadata info.
*/
const BackupCursorInfo& _getDonorBackupCursorInfo(WithLock) const;
/**
* Get the oldest active multi-statement transaction optime by reading
* config.transactions collection at given ReadTimestamp (i.e, equal to
* startApplyingDonorOpTime) snapshot.
*/
boost::optional<OpTime> _getOldestActiveTransactionAt(Timestamp ReadTimestamp);
/**
* Retrieves the start/fetch optimes from the donor and updates the in-memory/on-disk states
* accordingly.
*/
SemiFuture<void> _getStartOpTimesFromDonor();
/**
* Pushes documents from oplog fetcher to oplog buffer.
*
* Returns a status even though it always returns OK, to conform the interface OplogFetcher
* expects for the EnqueueDocumentsFn.
*/
Status _enqueueDocuments(OplogFetcher::Documents::const_iterator begin,
OplogFetcher::Documents::const_iterator end,
const OplogFetcher::DocumentsInfo& info);
/**
* Creates the oplog buffer that will be populated by donor oplog entries from the retryable
* writes fetching stage and oplog fetching stage.
*/
void _createOplogBuffer(WithLock, OperationContext* opCtx);
/**
* Runs an aggregation that gets the entire oplog chain for every retryable write entry in
* `config.transactions`. Only returns oplog entries in the chain where
* `ts` < `startFetchingOpTime.ts` and adds them to the oplog buffer.
*/
SemiFuture<void> _fetchRetryableWritesOplogBeforeStartOpTime();
/**
* Migrates committed transactions entries into 'config.transactions'.
*/
SemiFuture<void> _fetchCommittedTransactionsBeforeStartOpTime();
/**
* Opens and returns a cursor for all entries with 'lastWriteOpTime' <=
* 'startApplyingDonorOpTime' and state 'committed'.
*/
std::unique_ptr<DBClientCursor> _openCommittedTransactionsFindCursor();
/**
* Creates an aggregation pipeline to fetch transaction entries with 'lastWriteOpTime' <
* 'startFetchingDonorOpTime' and 'state: committed'.
*/
AggregateCommandRequest _makeCommittedTransactionsAggregation() const;
/**
* Processes a committed transaction entry from the donor. Updates the recipient's
* 'config.transactions' collection with the entry and writes a no-op entry for the
* recipient secondaries to replicate the entry.
*/
void _processCommittedTransactionEntry(const BSONObj& entry);
/**
* Starts the oplog buffer only if the node is primary. Otherwise, throw error.
*/
void _startOplogBuffer(OperationContext* opCtx);
/**
* Starts the tenant oplog fetcher.
*/
void _startOplogFetcher();
/**
* Called when the oplog fetcher finishes. Usually the oplog fetcher finishes only when
* cancelled or on error.
*/
void _oplogFetcherCallback(Status oplogFetcherStatus);
/**
* Starts the tenant oplog applier.
*/
void _startOplogApplier();
/**
* Waits for tenant oplog applier to stop.
*/
SemiFuture<TenantOplogApplier::OpTimePair> _waitForMigrationToComplete();
/**
* Advances the majority commit timestamp to be >= donor's backup cursor checkpoint
* timestamp(CkptTs) by:
* 1. Advancing the clusterTime to CkptTs.
* 2. Writing a no-op oplog entry with ts > CkptTs
* 3. Waiting for the majority commit timestamp to be the time of the no-op write.
*
* Notes: This method should be called before transitioning the instance state to
* 'kLearnedFilenames' which causes donor collections to get imported. Current import rule
* is that the import table's checkpoint timestamp can't be later than the recipient's
* stable timestamp. Due to the fact, we don't have a mechanism to wait until a specific
* stable timestamp on a given node or set of nodes in the replica set and the majority
* commit point and stable timestamp aren't atomically updated, advancing the majority
* commit point on the recipient before import collection stage is a best-effort attempt to
* prevent import retry attempts on import timestamp rule violation.
*/
SemiFuture<void> _advanceMajorityCommitTsToBkpCursorCheckpointTs(
const CancellationToken& token);
/**
* Returns a future that will be fulfilled when the tenant migration reaches consistent
* state.
*/
SemiFuture<void> _getDataConsistentFuture();
/**
* Transitions the instance state to 'kLearnedFilenames' after learning all filenames to be
* imported.
*/
SemiFuture<void> _enterLearnedFilenamesState();
/**
* Durably persist that migration has reached consistent state and signal waiters.
*/
SemiFuture<void> _enterConsistentState();
SemiFuture<void> _durablyPersistConsistentState();
/**
* Gets the migration interrupt status. Answers may change after this call as it reads the
* interrupt status without holding mutex lock. It's the caller's responsibility to decide
* if they need to hold mutex lock or not before calling the method.
*/
Status _getInterruptStatus() const;
/**
* Cancels all remaining work in the migration.
*/
void _cancelRemainingWork(WithLock lk, Status status);
/**
* Performs some cleanup work on migration completion, like, shutting down the components or
* fulfilling any instance promises.
*/
void _cleanupOnMigrationCompletion(Status status);
/**
* Suppresses selecting 'host' as the donor sync source, until 'until'.
*/
void _excludeDonorHost(WithLock, const HostAndPort& host, Date_t until);
/**
* Returns a vector of currently excluded donor hosts. Also removes hosts from the list of
* excluded donor nodes, if the exclude duration has expired.
*/
std::vector<HostAndPort> _getExcludedDonorHosts(WithLock);
/**
* Makes the failpoint stop or hang the migration based on failpoint data "action" field.
* If "action" is "hang" and 'opCtx' is not null, the failpoint will be interruptible.
*/
void _stopOrHangOnFailPoint(FailPoint* fp, OperationContext* opCtx = nullptr);
/**
* Updates the shard merge recipient state doc and waits for that change to be
* propagated to a majority.
*/
SemiFuture<void> _updateStateDocForMajority(WithLock lk);
/**
* Updates the shard merge recipient state doc. Throws error if it fails to
* update.
*/
void _updateStateDoc(OperationContext* opCtx, const ShardMergeRecipientDocument& stateDoc);
/**
* Returns the majority OpTime on the donor node that 'client' is connected to.
*/
OpTime _getDonorMajorityOpTime(std::unique_ptr<mongo::DBClientConnection>& client);
mutable stdx::mutex _mutex;
// All member variables are labeled with one of the following codes indicating the
// synchronization rules for accessing them.
//
// (R) Read-only in concurrent operation; no synchronization required.
// (S) Self-synchronizing; access according to class's own rules.
// (M) Reads and writes guarded by _mutex.
// (W) Synchronization required only for writes.
ServiceContext* const _serviceContext;
const ShardMergeRecipientService* const _recipientService; // (R) (not owned)
std::shared_ptr<executor::ScopedTaskExecutor> _scopedExecutor; // (M)
std::shared_ptr<executor::TaskExecutor> _backupCursorExecutor; // (M)
ShardMergeRecipientDocument _stateDoc; // (M)
// This data is provided in the initial state doc and never changes. We keep copies to
// avoid having to obtain the mutex to access them.
const std::vector<TenantId> _tenantIds; // (R)
const UUID _migrationUuid; // (R)
const std::string _donorConnectionString; // (R)
const MongoURI _donorUri; // (R)
const ReadPreferenceSetting _readPreference; // (R)
std::shared_ptr<ReplicaSetMonitor> _donorReplicaSetMonitor; // (M)
// Members of the donor replica set that we have excluded as a potential sync source for
// some period of time.
std::vector<std::pair<HostAndPort, Date_t>> _excludedDonorHosts; // (M)
// The '_client' will be used for other operations such as fetching
// optimes while the '_oplogFetcherClient' will be reserved for the oplog fetcher only.
// Because the oplog fetcher uses exhaust, we need a dedicated connection for oplog fetcher.
//
// Follow DBClientCursor synchonization rules.
std::unique_ptr<DBClientConnection> _client; // (S)
std::unique_ptr<DBClientConnection> _oplogFetcherClient; // (S)
std::unique_ptr<Fetcher> _donorFilenameBackupCursorFileFetcher; // (M)
CancellationSource _backupCursorKeepAliveCancellation = {}; // (X)
boost::optional<SemiFuture<void>> _backupCursorKeepAliveFuture; // (M)
std::unique_ptr<OplogFetcherFactory> _createOplogFetcherFn =
std::make_unique<CreateOplogFetcherFn>(); // (M)
std::unique_ptr<OplogBufferCollection> _donorOplogBuffer; // (M)
std::unique_ptr<DataReplicatorExternalState> _dataReplicatorExternalState; // (M)
std::unique_ptr<OplogFetcher> _donorOplogFetcher; // (M)
std::shared_ptr<TenantOplogApplier> _tenantOplogApplier; // (M)
// Writer pool to do storage write operation. Used by tenant collection cloner and by
// tenant oplog applier.
std::unique_ptr<ThreadPool> _workerPool; //(M)
// Data shared by cloners. Follow TenantMigrationSharedData synchronization rules.
std::unique_ptr<TenantMigrationSharedData> _sharedData; // (S)
// Promise that is resolved when all voting data-bearing recipient nodes have successfully
// imported all donor files.
SharedPromise<void> _importQuorumPromise; // (W)
// Whether we are waiting for members to import donor files.
bool _waitingForMembersToImportFiles = true;
// Which members have imported all donor files.
stdx::unordered_set<HostAndPort> _membersWhoHaveImportedFiles;
// Promise that is resolved when the migration reached consistent point.
SharedPromise<OpTime> _dataConsistentPromise; // (W)
// Promise that is resolved when migration is completed.
SharedPromise<void> _migrationCompletionPromise; // (W)
// Promise that is resolved when the recipientForgetMigration command is received or on
// stepDown/shutDown with errors.
SharedPromise<MigrationDecisionEnum> _receivedRecipientForgetMigrationPromise; // (W)
// Promise that is resolved when the instance has been durably marked garbage collectable.
SharedPromise<void> _forgetMigrationDurablePromise; // (W)
// Promise that is resolved with when the instance is interrupted, and holds interrupt error
// status.
SharedPromise<void> _interruptPromise; // (M)
// Waiters are notified when 'tenantOplogApplier' is valid on restart.
stdx::condition_variable _restartOplogApplierCondVar; // (M)
// Waiters are notified when 'tenantOplogApplier' is ready to use.
stdx::condition_variable _oplogApplierReadyCondVar; // (M)
// Indicates whether 'tenantOplogApplier' is ready to use or not.
bool _oplogApplierReady = false; // (M)
};
private:
/**
* Creates the state document collection.
*/
ExecutorFuture<void> _rebuildService(std::shared_ptr<executor::ScopedTaskExecutor> executor,
const CancellationToken& token) override;
ServiceContext* const _serviceContext;
};
} // namespace repl
} // namespace mongo

File diff suppressed because it is too large Load Diff

View File

@ -1,946 +0,0 @@
/**
* Copyright (C) 2022-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#include "mongo/db/repl/tenant_file_importer_service.h"
#include <boost/none.hpp>
#include <boost/optional.hpp>
#include <boost/optional/optional.hpp>
#include <fmt/format.h>
#include <mutex>
#include <utility>
#include <boost/move/utility_core.hpp>
#include "mongo/base/error_codes.h"
#include "mongo/base/status.h"
#include "mongo/bson/bsonelement.h"
#include "mongo/db/catalog/import_options.h"
#include "mongo/db/catalog_raii.h"
#include "mongo/db/client.h"
#include "mongo/db/commands/tenant_migration_recipient_cmds_gen.h"
#include "mongo/db/concurrency/exception_util.h"
#include "mongo/db/database_name.h"
#include "mongo/db/db_raii.h"
#include "mongo/db/op_observer/op_observer.h"
#include "mongo/db/profile_settings.h"
#include "mongo/db/repl/oplog_applier.h"
#include "mongo/db/repl/repl_server_parameters_gen.h"
#include "mongo/db/repl/replication_auth.h"
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/repl/tenant_migration_shard_merge_util.h"
#include "mongo/db/repl/tenant_migration_shared_data.h"
#include "mongo/db/service_context.h"
#include "mongo/db/storage/durable_catalog.h"
#include "mongo/db/storage/recovery_unit.h"
#include "mongo/db/storage/storage_file_util.h"
#include "mongo/db/storage/wiredtiger/wiredtiger_import.h"
#include "mongo/db/transaction_resources.h"
#include "mongo/executor/task_executor.h"
#include "mongo/idl/cluster_parameter_synchronization_helpers.h"
#include "mongo/logv2/log.h"
#include "mongo/logv2/log_attr.h"
#include "mongo/logv2/log_component.h"
#include "mongo/rpc/get_status_from_command_result.h"
#include "mongo/util/decorable.h"
#include "mongo/util/fail_point.h"
#include "mongo/util/net/hostandport.h"
#include "mongo/util/net/ssl_options.h"
#include "mongo/util/scopeguard.h"
#include "mongo/util/str.h"
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTenantMigration
MONGO_FAIL_POINT_DEFINE(hangBeforeFileImporterThreadExit);
MONGO_FAIL_POINT_DEFINE(skipCloneFiles);
MONGO_FAIL_POINT_DEFINE(hangBeforeVoteImportedFiles);
MONGO_FAIL_POINT_DEFINE(skipImportFiles);
MONGO_FAIL_POINT_DEFINE(hangBeforeImportingFiles);
namespace mongo::repl {
using namespace fmt::literals;
using namespace shard_merge_utils;
namespace {
const auto _TenantFileImporterService =
ServiceContext::declareDecoration<TenantFileImporterService>();
const ReplicaSetAwareServiceRegistry::Registerer<TenantFileImporterService>
_TenantFileImporterServiceRegisterer("TenantFileImporterService");
template <class Promise>
void setPromiseOkifNotReady(WithLock lk, Promise& promise) {
if (promise.getFuture().isReady()) {
return;
}
promise.emplaceValue();
}
/**
* Connect to the donor source and uses the default authentication mode.
*/
void connectAndAuth(const HostAndPort& source, DBClientConnection* client) {
client->connect(source, "TenantFileImporterService", boost::none);
uassertStatusOK(replAuthenticate(client).withContext(
str::stream() << "TenantFileImporterService failed to authenticate to " << source));
}
void buildStorageMetadata(const WTimportArgs& importArgs, BSONObjBuilder& bob) {
bob << importArgs.ident
<< BSON("tableMetadata" << importArgs.tableMetadata << "fileMetadata"
<< importArgs.fileMetadata);
}
/**
* Generate a new ident and move the file.
* Performs an fsync on the destination file and the parent directories of both 'srcFilePath' and
* 'destFilePath'.
*/
std::string fsyncMoveWithNewIdent(OperationContext* opCtx,
const boost::filesystem::path& tempWTDirectory,
const mongo::NamespaceString& metadataNS,
const std::string& oldIdent,
const char* kind,
std::vector<boost::filesystem::path>& movedFiles) {
auto srcFilePath = constructSourcePath(tempWTDirectory, oldIdent);
while (true) {
try {
auto newIdent = DurableCatalog::get(opCtx)->generateUniqueIdent(metadataNS, kind);
auto destFilePath = constructDestinationPath(newIdent);
LOGV2_DEBUG(6114304,
1,
"Moving file",
"from"_attr = srcFilePath.string(),
"to"_attr = destFilePath.string());
uassert(6114401,
"Destination file '{}' already exists"_format(destFilePath.string()),
!boost::filesystem::exists(destFilePath));
writeMovingFilesMarker(
tempWTDirectory, newIdent, (strcmp(kind, "collection") == 0 ? true : false));
uassertStatusOK(fsyncRename(srcFilePath, destFilePath)
.withContext(str::stream()
<< "Failed to move file from: " << srcFilePath.string()
<< " to: " << destFilePath.string()));
// Note the list of files to be cleaned in case of failure to import collection and it's
// indexes.
movedFiles.emplace_back(std::move(destFilePath));
return newIdent;
} catch (const DBException& ex) {
// Retry move on "destination file already exists" error. This can happen due to
// ident collision between this import and another parallel import via
// importCollection command.
if (ex.code() == 6114401) {
LOGV2(7199801,
"Failed to move file from temp to active WT directory. Retrying "
"the move operation using another new unique ident.",
"error"_attr = redact(ex.toStatus()));
continue;
}
throw;
}
}
MONGO_UNREACHABLE;
}
/**
* Import the collection and its indexes into the main wiredTiger instance.
*/
void importCollectionAndItsIndexesInMainWTInstance(OperationContext* opCtx,
const CollectionImportMetadata& metadata,
const UUID& migrationId,
const BSONObj& storageMetaObj) {
const auto nss = metadata.ns;
writeConflictRetry(opCtx, "importCollection", nss, [&] {
LOGV2_DEBUG(6114303, 1, "Importing donor collection", "ns"_attr = nss);
AutoGetDb autoDb(opCtx, nss.dbName(), MODE_IX);
auto db = autoDb.ensureDbExists(opCtx);
invariant(db);
Lock::CollectionLock collLock(opCtx, nss, MODE_X);
auto& dbProfileSettings = DatabaseProfileSettings::get(opCtx->getServiceContext());
WriteUnitOfWork wunit(opCtx);
AutoStatsTracker statsTracker(opCtx,
nss,
Top::LockType::NotLocked,
AutoStatsTracker::LogMode::kUpdateTopAndCurOp,
dbProfileSettings.getDatabaseProfileLevel(nss.dbName()));
// If the collection creation rolls back, ensure that the Top entry created for the
// collection is deleted.
shard_role_details::getRecoveryUnit(opCtx)->onRollback(
[nss, serviceContext = opCtx->getServiceContext()](OperationContext*) {
Top::get(serviceContext).collectionDropped(nss);
});
uassert(ErrorCodes::NamespaceExists,
str::stream() << "Collection already exists. NS: " << nss.toStringForErrorMsg(),
!CollectionCatalog::get(opCtx)->lookupCollectionByNamespace(opCtx, nss));
// Create Collection object.
auto storageEngine = opCtx->getServiceContext()->getStorageEngine();
auto durableCatalog = storageEngine->getCatalog();
ImportOptions importOptions(ImportOptions::ImportCollectionUUIDOption::kKeepOld);
importOptions.importTimestampRule = ImportOptions::ImportTimestampRule::kStable;
// Since we are using the ident id generated by this recipient node, ident collisions in
// the future after import is not possible. So, it's ok to skip the ident collision
// check. Otherwise, we would unnecessarily generate new rand after each collection
// import.
importOptions.skipIdentCollisionCheck = true;
auto importResult = uassertStatusOK(DurableCatalog::get(opCtx)->importCollection(
opCtx, nss, metadata.catalogObject, storageMetaObj, importOptions));
const auto catalogEntry =
durableCatalog->getParsedCatalogEntry(opCtx, importResult.catalogId);
const auto md = catalogEntry->metadata;
for (const auto& index : md->indexes) {
uassert(6114301, "Cannot import non-ready indexes", index.ready);
}
std::shared_ptr<Collection> ownedCollection = Collection::Factory::get(opCtx)->make(
opCtx, nss, importResult.catalogId, md, std::move(importResult.rs));
ownedCollection->init(opCtx);
historicalIDTrackerAllowsMixedModeWrites(ownedCollection->getSharedDecorations())
.store(true);
// Update the number of records and data size on commit.
shard_role_details::getRecoveryUnit(opCtx)->registerChange(
makeCountsChange(ownedCollection->getRecordStore(), metadata));
CollectionCatalog::get(opCtx)->onCreateCollection(opCtx, ownedCollection);
auto importedCatalogEntry =
storageEngine->getCatalog()->getCatalogEntry(opCtx, importResult.catalogId);
opCtx->getServiceContext()->getOpObserver()->onImportCollection(opCtx,
migrationId,
nss,
metadata.numRecords,
metadata.dataSize,
importedCatalogEntry,
storageMetaObj,
/*dryRun=*/false);
wunit.commit();
if (metadata.numRecords > 0 &&
nss == NamespaceString::makeClusterParametersNSS(nss.tenantId())) {
cluster_parameters::initializeAllTenantParametersFromCollection(opCtx,
*ownedCollection);
}
LOGV2(6114300,
"Imported donor collection",
"ns"_attr = nss,
"numRecordsApprox"_attr = metadata.numRecords,
"dataSizeApprox"_attr = metadata.dataSize);
});
}
} // namespace
TenantFileImporterService* TenantFileImporterService::get(ServiceContext* serviceContext) {
return &_TenantFileImporterService(serviceContext);
}
TenantFileImporterService* TenantFileImporterService::get(OperationContext* opCtx) {
return get(opCtx->getServiceContext());
}
TenantFileImporterService::TenantFileImporterService()
: _createConnectionFn(
[]() { return std::make_unique<DBClientConnection>(true /* autoReconnect */); }) {}
TenantFileImporterService::MigrationHandle::MigrationHandle(const UUID& migrationId,
const OpTime& startMigrationOpTime)
: migrationId(migrationId),
startMigrationOpTime(startMigrationOpTime),
eventQueue(std::make_unique<Queue>()),
workerPool(
makeReplWorkerPool(tenantApplierThreadCount, "TenantFileImporterServiceWriter"_sd)),
sharedData(std::make_unique<TenantMigrationSharedData>(
getGlobalServiceContext()->getFastClockSource(), migrationId)) {
stats.fileCopyStart = Date_t::now();
}
void TenantFileImporterService::_makeMigrationHandleIfNotPresent(
WithLock, const UUID& migrationId, const OpTime& startMigrationOpTime) {
if (_mh)
return;
_mh = std::make_unique<MigrationHandle>(migrationId, startMigrationOpTime);
}
void TenantFileImporterService::startMigration(const UUID& migrationId,
const OpTime& startMigrationOpTime) {
stdx::lock_guard lk(_mutex);
if (_isShuttingDown) {
LOGV2_DEBUG(6690701,
3,
"TenantFileImporterService:: Not starting migration due to shutdown",
"migrationId"_attr = migrationId);
return;
}
_makeMigrationHandleIfNotPresent(lk, migrationId, startMigrationOpTime);
auto prevState = _transitionToState(lk, migrationId, State::kStarted);
if (prevState == State::kStarted)
return;
_mh->workerThread = std::make_unique<stdx::thread>([this, migrationId, startMigrationOpTime] {
Client::initThread("TenantFileImporterService",
getGlobalServiceContext()->getService(ClusterRole::ShardServer));
LOGV2_INFO(6378904,
"TenantFileImporterService worker thread started",
"migrationId"_attr = migrationId,
"startMigrationOpTime"_attr = startMigrationOpTime);
{
stdx::lock_guard<Client> lk(cc());
cc().setSystemOperationUnkillableByStepdown(lk);
}
try {
_handleEvents(migrationId);
} catch (...) {
LOGV2_ERROR(6615001,
"TenantFileImporterService::_handleEvents encountered an error",
"migrationId"_attr = migrationId,
"error"_attr = redact(exceptionToStatus()));
}
LOGV2_INFO(7800203,
"TenantFileImporterService worker thread exiting",
"migrationId"_attr = migrationId);
hangBeforeFileImporterThreadExit.pauseWhileSet();
});
}
void TenantFileImporterService::learnedFilename(const UUID& migrationId,
const BSONObj& metadataDoc) {
stdx::lock_guard lk(_mutex);
// Migration handle can be empty only if the node restarts,rolls back, or resyncs while a shard
// merge is in progress.
if (!_mh) {
LOGV2_DEBUG(7800204,
3,
"TenantFileImporterService:: Skipping learned filename",
"migrationId"_attr = migrationId,
"filename"_attr = metadataDoc["filename"]);
return;
}
(void)_transitionToState(lk, migrationId, State::kLearnedFilename);
_mh->stats.totalDataSize += std::max(0ll, metadataDoc["fileSize"].safeNumberLong());
ImporterEvent event{ImporterEvent::Type::kLearnedFileName, migrationId};
event.metadataDoc = metadataDoc.getOwned();
auto success = _mh->eventQueue->tryPush(std::move(event));
uassert(
6378903,
"TenantFileImporterService failed to push '{}' event without blocking for migrationId :{}"_format(
stateToString(_mh->state), migrationId.toString()),
success);
}
void TenantFileImporterService::learnedAllFilenames(const UUID& migrationId) {
stdx::lock_guard lk(_mutex);
// Migration handle can be empty only if the node restarts,rolls back, or resyncs while a shard
// merge is in progress.
if (!_mh) {
LOGV2_DEBUG(7800205,
3,
"TenantFileImporterService:: Skipping learned all filenames",
"migrationId"_attr = migrationId);
return;
}
auto prevState = _transitionToState(lk, migrationId, State::kLearnedAllFilenames);
if (prevState == State::kLearnedAllFilenames)
return;
auto success =
_mh->eventQueue->tryPush({ImporterEvent::Type::kLearnedAllFilenames, migrationId});
uassert(
6378902,
"TenantFileImporterService failed to push '{}' event without blocking for migrationId :{}"_format(
stateToString(_mh->state), migrationId.toString()),
success);
}
void TenantFileImporterService::interruptMigration(const UUID& migrationId) {
stdx::lock_guard lk(_mutex);
// Migration handle can be empty only if the node restarts,rolls back, or resyncs while a shard
// merge is in progress.
if (!_mh) {
LOGV2_DEBUG(7800206,
3,
"TenantFileImporterService:: Skipping interrupting migration",
"migrationId"_attr = migrationId);
return;
}
_interrupt(lk, migrationId);
}
void TenantFileImporterService::resetMigration(const UUID& migrationId) {
_resetMigrationHandle(migrationId);
}
void TenantFileImporterService::interruptAll() {
stdx::lock_guard lk(_mutex);
if (!_mh) {
return;
}
_interrupt(lk, _mh->migrationId);
}
void TenantFileImporterService::_handleEvents(const UUID& migrationId) {
auto uniqueOpCtx = cc().makeOperationContext();
OperationContext* opCtx = uniqueOpCtx.get();
std::unique_ptr<DBClientConnection> donorConnection;
Queue* eventQueue;
ThreadPool* workerPool;
TenantMigrationSharedData* sharedData;
ON_BLOCK_EXIT([this, opId = opCtx->getOpID(), &migrationId] {
stdx::lock_guard lk(_mutex);
invariant(_mh && migrationId == _mh->migrationId);
_mh->stats.fileCopyEnd = Date_t::now();
_mh->opCtx = nullptr;
_mh->donorConnection = nullptr;
});
{
stdx::lock_guard lk(_mutex);
invariant(_mh && migrationId == _mh->migrationId);
uassert(ErrorCodes::Interrupted,
str::stream() << "TenantFileImporterService was interrupted for migrationId:\""
<< migrationId << "\"",
_mh->state < State::kInterrupted);
_mh->opCtx = opCtx;
eventQueue = _mh->eventQueue.get();
workerPool = _mh->workerPool.get();
sharedData = _mh->sharedData.get();
}
auto setUpDonorConnectionIfNeeded = [&](const BSONObj& metadataDoc) {
// Return early if we have already set up the donor connection.
if (donorConnection) {
return;
}
donorConnection = _createConnectionFn();
auto source = HostAndPort::parseThrowing(metadataDoc[kDonorHostNameFieldName].str());
connectAndAuth(source, donorConnection.get());
stdx::lock_guard lk(_mutex);
invariant(_mh && migrationId == _mh->migrationId);
uassert(ErrorCodes::Interrupted,
str::stream() << "TenantFileImporterService was interrupted for migrationId=\""
<< migrationId << "\"",
_mh->state < State::kInterrupted);
_mh->donorConnection = donorConnection.get();
};
using eventType = ImporterEvent::Type;
while (true) {
opCtx->checkForInterrupt();
auto event = eventQueue->pop(opCtx);
// Out-of-order events for a different migration are not permitted.
invariant(event.migrationId == migrationId);
switch (event.type) {
case eventType::kNone:
continue;
case eventType::kLearnedFileName: {
// We won't have valid donor metadata until the first
// 'TenantFileImporterService::learnedFilename' call, so we need to set up the
// connection for the first kLearnedFileName event.
setUpDonorConnectionIfNeeded(event.metadataDoc);
_cloneFile(opCtx,
migrationId,
donorConnection.get(),
workerPool,
sharedData,
event.metadataDoc);
continue;
}
case eventType::kLearnedAllFilenames: {
if (MONGO_unlikely(hangBeforeImportingFiles.shouldFail())) {
LOGV2(8101400, "'hangBeforeImportingFiles' failpoint enabled");
hangBeforeImportingFiles.pauseWhileSet();
}
// This step prevents accidental deletion of committed donor data during startup and
// rollback recovery.
//
// For example, if a migration was initially aborted and retried
// successfully, a node restart or rollback could risk deleting committed donor data
// during oplog replay if recovery/stable timestamp < failed migration's
// abortOpTime. To prevent this data corruption case, a barrier is created by
// checkpointing the startMigrationTimestamp before importing collection for the
// ongoing migration attempt. This prevents startup/rollback recovery from
// replaying oplog entries from various migration attempts.
//
// Note: Since StartMigrationTimestamp is majority committed (given that all
// recipient state document writes are majority committed by the recipient state
// machine), it's safe to await its checkpointing without requiring a no-op write.
_waitUntilStartMigrationTimestampIsCheckpointed(opCtx, migrationId);
_runRollbackAndThenImportFiles(opCtx, migrationId);
createImportDoneMarkerLocalCollection(opCtx, migrationId);
// Take a stable checkpoint to persist both the imported donor collections and the
// marker collection to disk.
opCtx->getServiceContext()->getStorageEngine()->waitUntilUnjournaledWritesDurable(
opCtx,
/*stableCheckpoint*/ true);
_voteImportedFiles(opCtx, migrationId);
return;
}
}
MONGO_UNREACHABLE;
}
}
void TenantFileImporterService::_cloneFile(OperationContext* opCtx,
const UUID& migrationId,
DBClientConnection* clientConnection,
ThreadPool* workerPool,
TenantMigrationSharedData* sharedData,
const BSONObj& metadataDoc) {
if (MONGO_unlikely(skipCloneFiles.shouldFail())) {
LOGV2(7800201,
"Skipping file cloning due to 'skipCloneFiles' failpoint enabled",
"migrationId"_attr = migrationId);
return;
}
const auto fileName = metadataDoc["filename"].str();
const auto backupId = UUID(uassertStatusOK(UUID::parse(metadataDoc[kBackupIdFieldName])));
const auto remoteDbpath = metadataDoc["remoteDbpath"].str();
const size_t fileSize = std::max(0ll, metadataDoc["fileSize"].safeNumberLong());
const auto relativePath =
boost::filesystem::relative(fileName, metadataDoc[kDonorDbPathFieldName].str()).string();
LOGV2_DEBUG(6113320,
1,
"Cloning file",
"migrationId"_attr = migrationId,
"metadata"_attr = metadataDoc,
"destinationRelativePath"_attr = relativePath);
invariant(!relativePath.empty());
auto currentTenantFileCloner =
std::make_unique<TenantFileCloner>(backupId,
migrationId,
fileName,
fileSize,
relativePath,
sharedData,
clientConnection->getServerHostAndPort(),
clientConnection,
repl::StorageInterface::get(cc().getServiceContext()),
workerPool);
ON_BLOCK_EXIT([this, &migrationId] {
stdx::lock_guard lk(_mutex);
invariant(_mh && migrationId == _mh->migrationId);
if (_mh->currentTenantFileCloner) {
_mh->stats.totalBytesCopied += _mh->currentTenantFileCloner->getStats().bytesCopied;
_mh->currentTenantFileCloner = nullptr;
}
});
{
stdx::lock_guard lk(_mutex);
invariant(_mh && migrationId == _mh->migrationId);
_mh->currentTenantFileCloner = currentTenantFileCloner.get();
}
auto cloneStatus = currentTenantFileCloner->run();
uassertStatusOK(cloneStatus.withContext(str::stream()
<< "Failed to clone file, migrationId: " << migrationId
<< ", fileName: " << fileName));
}
void TenantFileImporterService::_waitUntilStartMigrationTimestampIsCheckpointed(
OperationContext* opCtx, const UUID& migrationId) {
const auto startMigrationTs = [&] {
stdx::lock_guard<stdx::mutex> lg(_mutex);
invariant(_mh && migrationId == _mh->migrationId);
return _mh->startMigrationOpTime.getTimestamp();
}();
bool firstWait = true;
auto storageEngine = opCtx->getServiceContext()->getStorageEngine();
while (true) {
const auto& recoveryTs = storageEngine->getLastStableRecoveryTimestamp();
if (recoveryTs && *recoveryTs >= startMigrationTs) {
break;
}
if (firstWait) {
LOGV2_DEBUG(7458500,
2,
"Wait for start migration timestamp to be checkpointed",
"startMigrationTimestamp"_attr = startMigrationTs,
"lastCheckpointTimestamp"_attr = recoveryTs);
firstWait = false;
}
// Sleep a bit so we do not keep hammering the system.
opCtx->sleepFor(Milliseconds(100));
opCtx->getServiceContext()->getStorageEngine()->waitUntilUnjournaledWritesDurable(
opCtx,
/*stableCheckpoint*/ true);
}
}
void TenantFileImporterService::_runRollbackAndThenImportFiles(OperationContext* opCtx,
const UUID& migrationId) {
if (MONGO_unlikely(skipImportFiles.shouldFail())) {
LOGV2(7800200,
"Skipping file import due to 'skipImportFiles' failpoint enabled",
"migrationId"_attr = migrationId);
return;
}
auto tempWTDirectory = fileClonerTempDir(migrationId);
uassert(6113315,
str::stream() << "Missing file cloner's temporary dbpath directory: "
<< tempWTDirectory.string(),
boost::filesystem::exists(tempWTDirectory));
ON_BLOCK_EXIT([&tempWTDirectory, &migrationId] {
LOGV2_INFO(6113324,
"Done importing files, removing the temporary WT dbpath",
"migrationId"_attr = migrationId,
"tempDbPath"_attr = tempWTDirectory.string());
fsyncRemoveDirectory(tempWTDirectory);
});
auto metadatas =
wiredTigerRollbackToStableAndGetMetadata(opCtx, tempWTDirectory.string(), migrationId);
{
stdx::lock_guard lk(_mutex);
invariant(_mh && migrationId == _mh->migrationId);
_mh->importStarted = true;
}
ON_BLOCK_EXIT([&] {
stdx::lock_guard lk(_mutex);
invariant(_mh && migrationId == _mh->migrationId);
setPromiseOkifNotReady(lk, _mh->importCompletedPromise);
});
// Disable replication because this logic is executed on all nodes during a Shard Merge.
repl::UnreplicatedWritesBlock uwb(opCtx);
for (auto&& metadata : metadatas) {
// Check for migration interrupt before importing the collection.
opCtx->checkForInterrupt();
std::vector<boost::filesystem::path> movedFiles;
ScopeGuard removeFilesGuard([&] {
for (const auto& filePath : movedFiles) {
removeFile(filePath);
}
if (!movedFiles.empty())
fsyncDataDirectory();
});
BSONObjBuilder catalogMetaBuilder;
BSONObjBuilder storageMetaBuilder;
// Moves the collection file and it's associated index files from temp dir to dbpath.
// And, regenerate metadata info with new unique ident id.
auto newCollIdent = fsyncMoveWithNewIdent(opCtx,
tempWTDirectory,
metadata.ns,
metadata.collection.ident,
"collection",
movedFiles);
catalogMetaBuilder.append("ident", newCollIdent);
// Update the collection ident id.
metadata.collection.ident = std::move(newCollIdent);
buildStorageMetadata(metadata.collection, storageMetaBuilder);
BSONObjBuilder newIndexIdentMap;
for (auto&& index : metadata.indexes) {
auto newIndexIdent = fsyncMoveWithNewIdent(
opCtx, tempWTDirectory, metadata.ns, index.ident, "index", movedFiles);
newIndexIdentMap.append(index.indexName, newIndexIdent);
// Update the index ident id.
index.ident = std::move(newIndexIdent);
buildStorageMetadata(index, storageMetaBuilder);
}
catalogMetaBuilder.append("idxIdent", newIndexIdentMap.obj());
metadata.catalogObject = metadata.catalogObject.addFields(catalogMetaBuilder.obj());
const auto storageMetaObj = storageMetaBuilder.done();
importCollectionAndItsIndexesInMainWTInstance(opCtx, metadata, migrationId, storageMetaObj);
removeFilesGuard.dismiss();
}
}
void TenantFileImporterService::_voteImportedFiles(OperationContext* opCtx,
const UUID& migrationId) {
if (MONGO_unlikely(hangBeforeVoteImportedFiles.shouldFail())) {
LOGV2(7675000, "'hangBeforeVoteImportedFiles' failpoint enabled");
hangBeforeVoteImportedFiles.pauseWhileSet();
}
// Build the command request.
auto replCoord = ReplicationCoordinator::get(getGlobalServiceContext());
RecipientVoteImportedFiles cmd(migrationId, replCoord->getMyHostAndPort());
Backoff exponentialBackoff(Seconds(1), Milliseconds::max());
while (true) {
opCtx->checkForInterrupt();
try {
auto voteResponse = replCoord->runCmdOnPrimaryAndAwaitResponse(
opCtx,
DatabaseName::kAdmin,
cmd.toBSON(),
[](executor::TaskExecutor::CallbackHandle handle) {},
[](executor::TaskExecutor::CallbackHandle handle) {});
uassertStatusOK(getStatusFromCommandResult(voteResponse));
} catch (DBException& ex) {
if (ErrorCodes::isNetworkError(ex)) {
LOGV2_INFO(7675001,
"Retrying 'recipientVoteImportedFiles' command",
"retryError"_attr = redact(ex));
// Don't hammer the network.
opCtx->sleepFor(exponentialBackoff.nextSleep());
continue;
}
ex.addContext("Failed to run 'recipientVoteImportedFiles' command");
throw;
}
break;
}
}
void TenantFileImporterService::_interrupt(WithLock lk, const UUID& migrationId) {
auto prevState = _transitionToState(lk, migrationId, State::kInterrupted);
if (prevState == State::kInterrupted)
return;
if (_mh->donorConnection) {
_mh->donorConnection->shutdownAndDisallowReconnect();
}
if (_mh->workerPool) {
_mh->workerPool->shutdown();
}
if (_mh->sharedData) {
stdx::lock_guard<TenantMigrationSharedData> sharedDatalk(*_mh->sharedData);
// Prevent the TenantFileCloner from getting retried on retryable errors.
_mh->sharedData->setStatusIfOK(
sharedDatalk, Status{ErrorCodes::CallbackCanceled, "TenantFileCloner canceled"});
}
if (_mh->eventQueue) {
_mh->eventQueue->closeConsumerEnd();
}
if (_mh->opCtx) {
stdx::lock_guard<Client> lk(*_mh->opCtx->getClient());
_mh->opCtx->markKilled(ErrorCodes::Interrupted);
}
// _runRollbackAndThenImportFiles() will fulfill the promise if importStarted is true.
if (!_mh->importStarted) {
setPromiseOkifNotReady(lk, _mh->importCompletedPromise);
}
}
void TenantFileImporterService::_resetMigrationHandle(boost::optional<const UUID&> migrationId) {
stdx::unique_lock<stdx::mutex> lk(_mutex);
_resetCV.wait(lk, [this]() { return _resetInProgress == false; });
if (!_mh) {
return;
}
if (!migrationId) {
migrationId = _mh->migrationId;
}
(void)_transitionToState(lk, migrationId.value(), State::kStopped, true /*dryRun*/);
_resetInProgress = true;
auto workerThread = _mh->workerThread.get();
auto workerPool = _mh->workerPool.get();
lk.unlock();
LOGV2(7800207,
"TenantFileImporterService::Waiting for worker threads to join",
"migrationId"_attr = migrationId);
if (workerThread && workerThread->joinable()) {
workerThread->join();
}
if (workerPool) {
workerPool->join();
}
lk.lock();
(void)_transitionToState(lk, migrationId.value(), State::kStopped);
_mh.reset();
_resetInProgress = false;
_resetCV.notify_all();
}
TenantFileImporterService::State TenantFileImporterService::_transitionToState(
WithLock, const UUID& migrationId, State targetState, const bool dryRun) {
const auto isValid = [&] {
if (!_mh || migrationId != _mh->migrationId)
return false;
switch (targetState) {
case State::kUninitialized:
return _mh->state == State::kUninitialized;
case State::kStarted:
return _mh->state <= State::kStarted;
case State::kLearnedFilename:
return _mh->state <= State::kLearnedFilename;
case State::kLearnedAllFilenames:
return _mh->state == State::kLearnedFilename ||
_mh->state == State::kLearnedAllFilenames;
case State::kInterrupted:
return _mh->state <= State::kInterrupted;
case State::kStopped:
return _mh->state == State::kUninitialized || _mh->state >= State::kInterrupted;
default:
MONGO_UNREACHABLE;
}
}();
std::stringstream errMsg;
errMsg << "Failed state transition check for migrationID: " << migrationId
<< ", state: " << stateToString(targetState);
if (_mh) {
errMsg << ", current migrationId: " << _mh->migrationId
<< ", current state: " << stateToString(_mh->state);
}
uassert(7800210, errMsg.str(), isValid);
if (dryRun)
return _mh->state;
if (targetState != _mh->state) {
LOGV2(7800208,
"TenantFileImporterService:: Transitioning state to",
"migrationId"_attr = migrationId,
"state"_attr = stateToString(targetState));
}
std::swap(_mh->state, targetState);
return targetState;
}
boost::optional<SharedSemiFuture<void>> TenantFileImporterService::getImportCompletedFuture(
const UUID& migrationId) {
stdx::lock_guard lk(_mutex);
return (_mh && _mh->migrationId == migrationId)
? boost::make_optional(_mh->importCompletedPromise.getFuture())
: boost::none;
}
bool TenantFileImporterService::hasActiveMigration(const UUID& migrationId) {
stdx::lock_guard lk(_mutex);
return (_mh && _mh->migrationId == migrationId) ? true : false;
}
BSONObj TenantFileImporterService::getStats(boost::optional<const UUID&> migrationId) {
BSONObjBuilder bob;
getStats(bob, migrationId);
return bob.obj();
}
void TenantFileImporterService::getStats(BSONObjBuilder& bob,
boost::optional<const UUID&> migrationId) {
stdx::lock_guard lk(_mutex);
if (!_mh || (migrationId && migrationId.value() != _mh->migrationId))
return;
bob.append("approxTotalDataSize", static_cast<long long>(_mh->stats.totalDataSize));
auto approxTotalBytesCopied = _mh->stats.totalBytesCopied;
if (_mh->currentTenantFileCloner) {
approxTotalBytesCopied += _mh->currentTenantFileCloner->getStats().bytesCopied;
}
bob.append("approxTotalBytesCopied", static_cast<long long>(approxTotalBytesCopied));
auto fileCopyEnd = [&]() {
return _mh->stats.fileCopyEnd == Date_t() ? Date_t::now() : _mh->stats.fileCopyEnd;
}();
auto elapsedMillis =
duration_cast<Milliseconds>(fileCopyEnd - _mh->stats.fileCopyStart).count();
bob.append("totalReceiveElapsedMillis", static_cast<long long>(elapsedMillis));
if (approxTotalBytesCopied > _mh->stats.totalDataSize) {
LOGV2_ERROR(7800209,
"TenantFileImporterService::Bytes copied is greater than actual data size",
"migrationId"_attr = _mh->migrationId,
"totalDataSize"_attr = _mh->stats.totalDataSize,
"totalBytesCopied"_attr = _mh->stats.totalDataSize);
}
int64_t timeRemainingMillis =
((_mh->stats.totalDataSize - approxTotalBytesCopied) * elapsedMillis) /
(approxTotalBytesCopied + 1);
bob.append("remainingReceiveEstimatedMillis", static_cast<long long>(timeRemainingMillis));
}
} // namespace mongo::repl

View File

@ -1,367 +0,0 @@
/**
* Copyright (C) 2022-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#pragma once
#include <boost/move/utility_core.hpp>
#include <boost/optional/optional.hpp>
#include <functional>
#include <memory>
#include <string>
#include "mongo/base/string_data.h"
#include "mongo/bson/bsonmisc.h"
#include "mongo/bson/bsonobj.h"
#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/client/dbclient_connection.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/repl/replica_set_aware_service.h"
#include "mongo/db/repl/tenant_file_cloner.h"
#include "mongo/db/repl/tenant_migration_shared_data.h"
#include "mongo/db/service_context.h"
#include "mongo/stdx/mutex.h"
#include "mongo/stdx/thread.h"
#include "mongo/util/assert_util.h"
#include "mongo/util/concurrency/thread_pool.h"
#include "mongo/util/concurrency/with_lock.h"
#include "mongo/util/producer_consumer_queue.h"
#include "mongo/util/string_map.h"
#include "mongo/util/uuid.h"
namespace mongo::repl {
/**
* Replica set aware service that runs both on the primary and secondaries. It orchestrates the
* copying of data files from donor, import those files, and notifies the primary when the import is
* successful.
*/
class TenantFileImporterService : public ReplicaSetAwareService<TenantFileImporterService> {
public:
static constexpr StringData kTenantFileImporterServiceName = "TenantFileImporterService"_sd;
static TenantFileImporterService* get(ServiceContext* serviceContext);
static TenantFileImporterService* get(OperationContext* opCtx);
TenantFileImporterService();
using CreateConnectionFn = std::function<std::unique_ptr<DBClientConnection>()>;
struct Stats {
Date_t fileCopyStart;
Date_t fileCopyEnd;
uint64_t totalDataSize{0};
uint64_t totalBytesCopied{0};
};
// Explicit State enum ordering defined here because we rely on comparison
// operators for state checking in various TenantFileImporterService methods.
enum class State {
kUninitialized = 0,
kStarted = 1,
kLearnedFilename = 2,
kLearnedAllFilenames = 3,
kInterrupted = 4,
kStopped = 5
};
static StringData stateToString(State state) {
switch (state) {
case State::kUninitialized:
return "uninitialized";
case State::kStarted:
return "started";
case State::kLearnedFilename:
return "learned filename";
case State::kLearnedAllFilenames:
return "learned all filenames";
case State::kInterrupted:
return "interrupted";
case State::kStopped:
return "stopped";
}
MONGO_UNREACHABLE;
}
/**
* Begins the process of copying and importing files for a given migration.
*/
void startMigration(const UUID& migrationId, const OpTime& startMigrationOpTime);
/**
* Called for each file to be copied for a given migration.
*/
void learnedFilename(const UUID& migrationId, const BSONObj& metadataDoc);
/**
* Called after all files have been copied for a given migration.
*/
void learnedAllFilenames(const UUID& migrationId);
/**
* Interrupts an in-progress migration with the provided migration id.
*/
void interruptMigration(const UUID& migrationId);
/**
* Resets the interrupted migration for the given migrationId by calling
* _resetMigrationHandle(). See _resetMigrationHandle() for detailed comments.
*
* Throws an exception if called before the migration is interrupted.
*/
void resetMigration(const UUID& migrationId);
/**
* Causes any in-progress migration be interrupted.
*/
void interruptAll();
/**
* Returns a Future that will be resolved when the collection import task completes for the
* given migration id. Return boost::none if no active migration matches the provided migration
* id.
*/
boost::optional<SharedSemiFuture<void>> getImportCompletedFuture(const UUID& migrationId);
/**
* Checks if there is an active migration with the given migration ID.
*/
bool hasActiveMigration(const UUID& migrationId);
/**
* Returns the migration stats for the given migrationId.
* If no migrationId is provided, it returns the stats of an ongoing migration, if any.
*/
BSONObj getStats(boost::optional<const UUID&> migrationId = boost::none);
void getStats(BSONObjBuilder& bob, boost::optional<const UUID&> migrationId = boost::none);
void onConsistentDataAvailable(OperationContext*, bool, bool) final {}
void onShutdown() final {
{
stdx::lock_guard lk(_mutex);
// Prevents a new migration from starting up during or after shutdown.
_isShuttingDown = true;
}
interruptAll();
_resetMigrationHandle();
}
void onRollbackBegin() final {
interruptAll();
_resetMigrationHandle();
}
void onStartup(OperationContext*) final {}
void onSetCurrentConfig(OperationContext* opCtx) final {}
void onStepUpBegin(OperationContext*, long long) final {}
void onStepUpComplete(OperationContext*, long long) final {}
void onStepDown() final {}
void onBecomeArbiter() final {}
inline std::string getServiceName() const final {
return "TenantFileImporterService";
}
/**
* Set the function used to create a donor client connection. Used for testing.
*/
void setCreateConnectionFn_forTest(const CreateConnectionFn& fn) {
_createConnectionFn = fn;
};
/**
* Returns the migrationId.
*/
boost::optional<UUID> getMigrationId_forTest() {
return _mh ? boost::make_optional(_mh->migrationId) : boost::none;
}
/**
* Returns the migration state.
*/
boost::optional<TenantFileImporterService::State> getState_forTest() {
return _mh ? boost::make_optional(_mh->state) : boost::none;
}
private:
/**
* A worker function that waits for ImporterEvents and handles cloning and importing files.
*/
void _handleEvents(const UUID& migrationId);
/**
* Performs file copying from the donor for the specified filename in the given metadataDoc.
*/
void _cloneFile(OperationContext* opCtx,
const UUID& migrationId,
DBClientConnection* clientConnection,
ThreadPool* workerPool,
TenantMigrationSharedData* sharedData,
const BSONObj& metadataDoc);
/**
* Waits until the majority committed StartMigrationTimestamp is successfully checkpointed.
*
* Note: Refer to the calling site for more information on its significance.
*/
void _waitUntilStartMigrationTimestampIsCheckpointed(OperationContext* opCtx,
const UUID& migrationId);
/**
* Runs rollback to stable on the cloned files associated with the given migration id,
* and then import the stable cloned files into the main WT instance.
*/
void _runRollbackAndThenImportFiles(OperationContext* opCtx, const UUID& migrationId);
/**
* Called to inform the primary that we have finished copying and importing all files.
*/
void _voteImportedFiles(OperationContext* opCtx, const UUID& migrationId);
/**
* Called internally by interrupt and interruptAll to interrupt a running file cloning and
* import operations.
*/
void _interrupt(WithLock lk, const UUID& migrationId);
/**
* This blocking call waits for the worker threads to finish the execution, and then releases
* the resources held by MigrationHandle for the given migrationId (if provided) or for the
* current ongoing migration.
*
* Throws an exception if called before the migration is interrupted.
*/
void _resetMigrationHandle(boost::optional<const UUID&> migrationId = boost::none);
/*
* Transitions the migration associated with the given migrationId to the specified target
* state. If dryRun is set to 'true', the function performs a dry run of the state transition
* without actually changing the state. Throws an exception for an invalid state transition.
*
* Returns the current migration state before the state transition.
*/
TenantFileImporterService::State _transitionToState(WithLock,
const UUID& migrationId,
State targetState,
bool dryRun = false);
void _makeMigrationHandleIfNotPresent(WithLock,
const UUID& migrationId,
const OpTime& startMigrationOpTime);
struct ImporterEvent {
enum class Type { kNone, kLearnedFileName, kLearnedAllFilenames };
Type type;
UUID migrationId;
BSONObj metadataDoc;
ImporterEvent(Type _type, const UUID& _migrationId)
: type(_type), migrationId(_migrationId) {}
};
using Queue =
MultiProducerSingleConsumerQueue<ImporterEvent,
producer_consumer_queue_detail::DefaultCostFunction>;
// Represents a handle for managing the migration process. It holds various resources and
// information required for cloning files and importing them.
struct MigrationHandle {
explicit MigrationHandle(const UUID& migrationId, const OpTime& startMigrationOpTime);
// Shard merge migration Id.
const UUID migrationId;
// Optime at which the recipient state machine document for this migration is initialized.
const OpTime startMigrationOpTime;
// Queue to process ImporterEvents.
const std::unique_ptr<Queue> eventQueue;
// ThreadPool used by TenantFileCloner to do storage write operations.
const std::unique_ptr<ThreadPool> workerPool;
// Shared between the importer service and TenantFileCloners
const std::unique_ptr<TenantMigrationSharedData> sharedData;
// Indicates if collection import for this migration has begun.
bool importStarted = false;
// Promise fulfilled upon completion of collection import for this migration.
SharedPromise<void> importCompletedPromise;
// Worker thread to orchestrate the cloning, importing and notifying the primary steps.
std::unique_ptr<stdx::thread> workerThread;
// State of the associated migration.
State state = State::kUninitialized;
// Tracks the Statistics of the associated migration.
Stats stats;
// Pointers below are not owned by this struct. The method that sets these
// pointers must manage their lifecycle and ensure proper pointer reset to prevent
// invalid memory access by other methods when reading the pointer value.
// Donor DBClientConnection for file cloning.
DBClientConnection* donorConnection = nullptr;
// OperationContext associated with the migration.
OperationContext* opCtx = nullptr;
// Pointer to the current TenantFileCloner of the associated migration; used for statistics
// purpose.
TenantFileCloner* currentTenantFileCloner = nullptr;
};
stdx::mutex _mutex;
// All member variables are labeled with one of the following codes indicating the
// synchronization rules for accessing them.
//
// (R) Read-only in concurrent operation; no synchronization required.
// (S) Self-synchronizing; access according to class's own rules.
// (M) Reads and writes guarded by _mutex.
// (W) Synchronization required only for writes.
// (I) Independently synchronized, see member variable comment.
// Set to true when the shutdown procedure is initiated.
bool _isShuttingDown = false; // (M)
std::unique_ptr<MigrationHandle> _mh; // (M)
// Used to create a new DBClientConnection to the donor.
CreateConnectionFn _createConnectionFn = {}; // (W)
// Condition variable to block concurrent reset operations.
stdx::condition_variable _resetCV; // (M)
// Flag indicating whether a reset is currently in progress.
bool _resetInProgress = false; // (M)
};
} // namespace mongo::repl

View File

@ -1,574 +0,0 @@
/**
* Copyright (C) 2023-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#include <boost/filesystem/operations.hpp>
#include <boost/filesystem/path.hpp>
#include <boost/optional/optional.hpp>
#include <utility>
#include <vector>
#include <boost/move/utility_core.hpp>
#include "mongo/bson/bsonelement.h"
#include "mongo/bson/bsontypes.h"
#include "mongo/bson/bsontypes_util.h"
#include "mongo/db/database_name.h"
#include "mongo/db/db_raii.h"
#include "mongo/db/namespace_string.h"
#include "mongo/db/query/client_cursor/cursor_response.h"
#include "mongo/db/repl/member_state.h"
#include "mongo/db/repl/replica_set_aware_service.h"
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/repl/replication_coordinator_mock.h"
#include "mongo/db/repl/storage_interface.h"
#include "mongo/db/repl/storage_interface_impl.h"
#include "mongo/db/repl/tenant_file_importer_service.h"
#include "mongo/db/repl/tenant_migration_shard_merge_util.h"
#include "mongo/db/service_context_d_test_fixture.h"
#include "mongo/db/storage/kv/kv_engine.h"
#include "mongo/dbtests/mock/mock_dbclient_connection.h"
#include "mongo/dbtests/mock/mock_remote_db_server.h"
#include "mongo/executor/task_executor_test_fixture.h"
#include "mongo/executor/thread_pool_task_executor_test_fixture.h"
#include "mongo/logv2/log_component.h"
#include "mongo/logv2/log_severity.h"
#include "mongo/unittest/assert.h"
#include "mongo/unittest/bson_test_util.h"
#include "mongo/unittest/death_test.h"
#include "mongo/unittest/framework.h"
#include "mongo/unittest/log_test.h"
#include "mongo/util/fail_point.h"
#include "mongo/util/net/hostandport.h"
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
namespace mongo {
namespace repl {
using namespace repl::shard_merge_utils;
namespace {
constexpr auto kDonorHostName = "localhost:12345"_sd;
constexpr auto kDonorDBPath = "/path/to/remoteDB/"_sd;
static const UUID kBackupId = UUID::gen();
const OpTime kStartMigrationOpTime(Timestamp(1, 1), 1);
} // namespace
class TenantFileImporterServiceTest : public ServiceContextMongoDTest {
public:
/**
* Create TenantFileImporterService::ImporterEvent::kLearnedFileName event.
*/
static BSONObj makefileMetaDoc(const UUID& migrationId,
const std::string& fileName,
uint64_t fileSize) {
return BSON("filename" << kDonorDBPath + "/" + fileName << "fileSize"
<< static_cast<int64_t>(fileSize) << kDonorHostNameFieldName
<< kDonorHostName << kMigrationIdFieldName << migrationId
<< kBackupIdFieldName << kBackupId << kDonorDbPathFieldName
<< kDonorDBPath);
}
/**
* Returns true if collection exists.
*/
static bool collectionExists(OperationContext* opCtx, const NamespaceString& nss) {
return static_cast<bool>(AutoGetCollectionForRead(opCtx, nss).getCollection());
}
void setUp() override {
ServiceContextMongoDTest::setUp();
auto serviceContext = getServiceContext();
auto replCoord = std::make_unique<ReplicationCoordinatorMock>(serviceContext);
replCoord->setRunCmdOnPrimaryAndAwaitResponseFunction([this](OperationContext* opCtx,
const DatabaseName& dbName,
const BSONObj& cmdObj,
ReplicationCoordinator::
OnRemoteCmdScheduledFn
onRemoteCmdScheduled,
ReplicationCoordinator::
OnRemoteCmdCompleteFn
onRemoteCmdComplete) {
runCmdOnPrimaryAndAwaitResponseFnCalls.push_back(RunCmdOnPrimaryCall{dbName, cmdObj});
return runCmdOnPrimaryAndAwaitResponseFnResponse;
});
ASSERT_OK(replCoord->setFollowerMode(repl::MemberState::RS_PRIMARY));
ReplicationCoordinator::set(serviceContext, std::move(replCoord));
StorageInterface::set(serviceContext, std::make_unique<StorageInterfaceImpl>());
_importerService = repl::TenantFileImporterService::get(serviceContext);
_mockDonorServer = std::make_unique<MockRemoteDBServer>(kDonorHostName.toString());
_importerService->setCreateConnectionFn_forTest([&]() {
return std::make_unique<MockDBClientConnection>(_mockDonorServer.get(),
true /* autoReconnect */);
});
globalFailPointRegistry().find("skipImportFiles")->setMode(FailPoint::alwaysOn);
// Set the stable timestamp to avoid hang in
// TenantFileImporterService::_waitUntilStartMigrationTimestampIsCheckpointed().
auto opCtx = cc().makeOperationContext();
auto engine = serviceContext->getStorageEngine()->getEngine();
engine->setStableTimestamp(Timestamp(1, 1), true);
}
void tearDown() override {
_importerService->onShutdown();
StorageInterface::set(getServiceContext(), {});
ReplicationCoordinator::set(getServiceContext(), {});
ServiceContextMongoDTest::tearDown();
}
struct RunCmdOnPrimaryCall {
DatabaseName dbName;
BSONObj cmdObj;
};
std::vector<RunCmdOnPrimaryCall> runCmdOnPrimaryAndAwaitResponseFnCalls;
BSONObj runCmdOnPrimaryAndAwaitResponseFnResponse = BSON("ok" << 1);
private:
unittest::MinimumLoggedSeverityGuard _replicationSeverityGuard{
logv2::LogComponent::kReplication, logv2::LogSeverity::Debug(1)};
unittest::MinimumLoggedSeverityGuard _tenantMigrationSeverityGuard{
logv2::LogComponent::kTenantMigration, logv2::LogSeverity::Debug(1)};
protected:
std::unique_ptr<MockRemoteDBServer> _mockDonorServer;
TenantFileImporterService* _importerService;
};
TEST_F(TenantFileImporterServiceTest, ConcurrentMigrationWithDifferentMigrationID) {
FailPointEnableBlock failPoint("skipCloneFiles");
auto migrationId = UUID::gen();
auto anotherMigrationId = UUID::gen();
auto verifyAllStateTransitionFailsForAnotherMigrationId = [&] {
ASSERT_THROWS_CODE(
_importerService->startMigration(anotherMigrationId, kStartMigrationOpTime),
DBException,
7800210);
ASSERT_THROWS_CODE(_importerService->learnedFilename(
anotherMigrationId, makefileMetaDoc(migrationId, "some-file.wt", 1)),
DBException,
7800210);
ASSERT_THROWS_CODE(
_importerService->learnedAllFilenames(anotherMigrationId), DBException, 7800210);
ASSERT_THROWS_CODE(
_importerService->interruptMigration(anotherMigrationId), DBException, 7800210);
ASSERT_THROWS_CODE(
_importerService->resetMigration(anotherMigrationId), DBException, 7800210);
};
_importerService->startMigration(migrationId, kStartMigrationOpTime);
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
ASSERT_EQ(_importerService->getState_forTest(), TenantFileImporterService::State::kStarted);
verifyAllStateTransitionFailsForAnotherMigrationId();
_importerService->learnedFilename(migrationId, makefileMetaDoc(migrationId, "some-file.wt", 1));
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
ASSERT_EQ(_importerService->getState_forTest(),
TenantFileImporterService::State::kLearnedFilename);
verifyAllStateTransitionFailsForAnotherMigrationId();
_importerService->learnedAllFilenames(migrationId);
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
ASSERT_EQ(_importerService->getState_forTest(),
TenantFileImporterService::State::kLearnedAllFilenames);
verifyAllStateTransitionFailsForAnotherMigrationId();
_importerService->interruptMigration(migrationId);
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
ASSERT_EQ(_importerService->getState_forTest(), TenantFileImporterService::State::kInterrupted);
verifyAllStateTransitionFailsForAnotherMigrationId();
_importerService->resetMigration(migrationId);
ASSERT(!_importerService->getMigrationId_forTest());
{
// Starting a new migration with anotherMigrationId is now possible.
_importerService->startMigration(anotherMigrationId, kStartMigrationOpTime);
ASSERT_EQ(_importerService->getMigrationId_forTest(), anotherMigrationId);
ASSERT_EQ(_importerService->getState_forTest(), TenantFileImporterService::State::kStarted);
}
}
TEST_F(TenantFileImporterServiceTest, StartConcurrentMigrationWithSameMigrationID) {
FailPointEnableBlock failPoint("skipCloneFiles");
auto migrationId = UUID::gen();
_importerService->startMigration(migrationId, kStartMigrationOpTime);
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
ASSERT_EQ(_importerService->getState_forTest(), TenantFileImporterService::State::kStarted);
// startMigration calls with the same migrationId will be ignored.
_importerService->startMigration(migrationId, kStartMigrationOpTime);
_importerService->learnedFilename(migrationId, makefileMetaDoc(migrationId, "some-file.wt", 1));
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
ASSERT_EQ(_importerService->getState_forTest(),
TenantFileImporterService::State::kLearnedFilename);
ASSERT_THROWS_CODE(
_importerService->startMigration(migrationId, kStartMigrationOpTime), DBException, 7800210);
_importerService->learnedAllFilenames(migrationId);
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
ASSERT_EQ(_importerService->getState_forTest(),
TenantFileImporterService::State::kLearnedAllFilenames);
ASSERT_THROWS_CODE(
_importerService->startMigration(migrationId, kStartMigrationOpTime), DBException, 7800210);
_importerService->interruptMigration(migrationId);
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
ASSERT_EQ(_importerService->getState_forTest(), TenantFileImporterService::State::kInterrupted);
_importerService->resetMigration(migrationId);
ASSERT(!_importerService->getMigrationId_forTest());
// Starting a new migration with same migrationId is now possible.
_importerService->startMigration(migrationId, kStartMigrationOpTime);
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
ASSERT_EQ(_importerService->getState_forTest(), TenantFileImporterService::State::kStarted);
}
TEST_F(TenantFileImporterServiceTest, ShouldHaveLearntAtLeastOneFileName) {
auto migrationId = UUID::gen();
_importerService->startMigration(migrationId, kStartMigrationOpTime);
ASSERT_THROWS_CODE(_importerService->learnedAllFilenames(migrationId), DBException, 7800210);
}
TEST_F(TenantFileImporterServiceTest, learnedAllFilenamesFollowedByLearnedFileNameOutOfOrderEvent) {
FailPointEnableBlock failPoint("skipCloneFiles");
auto migrationId = UUID::gen();
_importerService->startMigration(migrationId, kStartMigrationOpTime);
_importerService->learnedFilename(migrationId, makefileMetaDoc(migrationId, "some-file.wt", 1));
_importerService->learnedAllFilenames(migrationId);
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
ASSERT_EQ(_importerService->getState_forTest(),
TenantFileImporterService::State::kLearnedAllFilenames);
ASSERT_THROWS_CODE(
_importerService->learnedFilename(migrationId,
BSON("filename"
<< "some-file.wt" << kDonorHostNameFieldName
<< kDonorHostName << "fileSize" << 1)),
DBException,
7800210);
// Interrupt the migration to prevent running file cloning after exiting this block.
_importerService->interruptMigration(migrationId);
}
TEST_F(TenantFileImporterServiceTest, MigrationNotStartedYetShouldIgnoreAnyStateTransition) {
auto migrationId = UUID::gen();
ASSERT(!_importerService->getMigrationId_forTest());
_importerService->learnedFilename(migrationId, makefileMetaDoc(migrationId, "some-file.wt", 1));
ASSERT(!_importerService->getMigrationId_forTest());
_importerService->learnedAllFilenames(migrationId);
ASSERT(!_importerService->getMigrationId_forTest());
_importerService->interruptMigration(migrationId);
ASSERT(!_importerService->getMigrationId_forTest());
_importerService->resetMigration(migrationId);
ASSERT(!_importerService->getMigrationId_forTest());
}
TEST_F(TenantFileImporterServiceTest, CanInterruptMigrationAfterMigrationStart) {
auto migrationId = UUID::gen();
_importerService->startMigration(migrationId, kStartMigrationOpTime);
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
ASSERT_EQ(_importerService->getState_forTest(), TenantFileImporterService::State::kStarted);
_importerService->interruptMigration(migrationId);
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
ASSERT_EQ(_importerService->getState_forTest(), TenantFileImporterService::State::kInterrupted);
}
TEST_F(TenantFileImporterServiceTest, CanInterruptMigrationWhenLearnedFileName) {
FailPointEnableBlock failPoint("skipCloneFiles");
auto migrationId = UUID::gen();
_importerService->startMigration(migrationId, kStartMigrationOpTime);
_importerService->learnedFilename(migrationId, makefileMetaDoc(migrationId, "some-file.wt", 1));
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
ASSERT_EQ(_importerService->getState_forTest(),
TenantFileImporterService::State::kLearnedFilename);
_importerService->interruptMigration(migrationId);
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
ASSERT_EQ(_importerService->getState_forTest(), TenantFileImporterService::State::kInterrupted);
}
TEST_F(TenantFileImporterServiceTest, CanInterruptMigrationWhenLearnedAllFileNames) {
FailPointEnableBlock failPoint("skipCloneFiles");
auto migrationId = UUID::gen();
_importerService->startMigration(migrationId, kStartMigrationOpTime);
_importerService->learnedFilename(migrationId, makefileMetaDoc(migrationId, "some-file.wt", 1));
_importerService->learnedAllFilenames(migrationId);
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
ASSERT_EQ(_importerService->getState_forTest(),
TenantFileImporterService::State::kLearnedAllFilenames);
_importerService->interruptMigration(migrationId);
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
ASSERT_EQ(_importerService->getState_forTest(), TenantFileImporterService::State::kInterrupted);
}
TEST_F(TenantFileImporterServiceTest, CanInterruptAMigrationMoreThanOnce) {
auto migrationId = UUID::gen();
_importerService->startMigration(migrationId, kStartMigrationOpTime);
_importerService->interruptMigration(migrationId);
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
ASSERT_EQ(_importerService->getState_forTest(), TenantFileImporterService::State::kInterrupted);
_importerService->interruptMigration(migrationId);
}
TEST_F(TenantFileImporterServiceTest, InterruptedMigrationCannotLearnNewFiles) {
auto migrationId = UUID::gen();
_importerService->startMigration(migrationId, kStartMigrationOpTime);
_importerService->interruptMigration(migrationId);
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
ASSERT_EQ(_importerService->getState_forTest(), TenantFileImporterService::State::kInterrupted);
ASSERT_THROWS_CODE(_importerService->learnedFilename(migrationId,
BSON("filename"
<< "some-file.wt"
<< "fileSize" << 1)),
DBException,
7800210);
ASSERT_THROWS_CODE(_importerService->learnedAllFilenames(migrationId), DBException, 7800210);
}
TEST_F(TenantFileImporterServiceTest, resetMigration) {
FailPointEnableBlock failPoint("skipCloneFiles");
auto migrationId = UUID::gen();
_importerService->startMigration(migrationId, kStartMigrationOpTime);
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
ASSERT_EQ(_importerService->getState_forTest(), TenantFileImporterService::State::kStarted);
ASSERT_THROWS_CODE(_importerService->resetMigration(migrationId), DBException, 7800210);
_importerService->learnedFilename(migrationId, makefileMetaDoc(migrationId, "some-file.wt", 1));
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
ASSERT_EQ(_importerService->getState_forTest(),
TenantFileImporterService::State::kLearnedFilename);
ASSERT_THROWS_CODE(_importerService->resetMigration(migrationId), DBException, 7800210);
_importerService->learnedAllFilenames(migrationId);
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
ASSERT_EQ(_importerService->getState_forTest(),
TenantFileImporterService::State::kLearnedAllFilenames);
ASSERT_THROWS_CODE(_importerService->resetMigration(migrationId), DBException, 7800210);
_importerService->interruptMigration(migrationId);
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
ASSERT_EQ(_importerService->getState_forTest(), TenantFileImporterService::State::kInterrupted);
_importerService->resetMigration(migrationId);
ASSERT(!_importerService->getMigrationId_forTest());
// Resetting migration again shouldn't throw.
_importerService->resetMigration(migrationId);
}
TEST_F(TenantFileImporterServiceTest, ImportsFilesWhenAllFilenamesLearned) {
FailPointEnableBlock hangBeforeFileImporterThreadExit("hangBeforeFileImporterThreadExit");
auto fpSkipImportFiles = globalFailPointRegistry().find("skipImportFiles");
const auto fpSkipImportFilesInitialTimesEntered =
fpSkipImportFiles->toBSON()["timesEntered"].safeNumberLong();
auto migrationId = UUID::gen();
const std::string fileName = "some-file.wt";
std::string fileData = "Here is the file data";
CursorResponse fileAggResponse(
NamespaceString::makeCollectionlessAggregateNSS(DatabaseName::kAdmin),
0 /* cursorId */,
{BSON("byteOffset" << 0 << "endOfFile" << true << "data"
<< BSONBinData(fileData.data(), fileData.size(), BinDataGeneral))});
_mockDonorServer->setCommandReply("aggregate", fileAggResponse.toBSONAsInitialResponse());
// Verify that the temp WT db path is empty before migration start.
auto tempWTDirectory = fileClonerTempDir(migrationId);
ASSERT(!boost::filesystem::exists(tempWTDirectory / fileName));
_importerService->startMigration(migrationId, kStartMigrationOpTime);
_importerService->learnedFilename(migrationId,
makefileMetaDoc(migrationId, fileName, fileData.size()));
_importerService->learnedAllFilenames(migrationId);
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
ASSERT_EQ(_importerService->getState_forTest(),
TenantFileImporterService::State::kLearnedAllFilenames);
hangBeforeFileImporterThreadExit->waitForTimesEntered(
hangBeforeFileImporterThreadExit.initialTimesEntered() + 1);
// Verify that the files have been cloned successfully.
ASSERT(boost::filesystem::exists(tempWTDirectory / fileName));
ASSERT_EQ(fileData.size(), boost::filesystem::file_size(tempWTDirectory / fileName));
// Verify if the import files operation has been called.
fpSkipImportFiles->waitForTimesEntered(fpSkipImportFilesInitialTimesEntered + 1);
// Check if the import done marker collection exists.
ASSERT(collectionExists(makeOperationContext().get(), getImportDoneMarkerNs(migrationId)));
// Verify whether the node has notified the primary about the import success.
ASSERT_EQ(runCmdOnPrimaryAndAwaitResponseFnCalls.size(), 1);
auto recipientVoteImportedFilesCmdCall = runCmdOnPrimaryAndAwaitResponseFnCalls.front();
ASSERT_EQ(recipientVoteImportedFilesCmdCall.dbName, DatabaseName::kAdmin);
ASSERT_BSONOBJ_EQ(recipientVoteImportedFilesCmdCall.cmdObj,
BSON("recipientVoteImportedFiles" << 1 << "migrationId" << migrationId
<< "from"
<< ":27017"));
}
TEST_F(TenantFileImporterServiceTest, statsForInvalidMigrationID) {
auto migrationId = UUID::gen();
auto invalidMigrationID = UUID::gen();
_importerService->startMigration(migrationId, kStartMigrationOpTime);
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
ASSERT_EQ(_importerService->getState_forTest(), TenantFileImporterService::State::kStarted);
auto stats = _importerService->getStats(invalidMigrationID);
ASSERT_TRUE(stats.isEmpty());
}
TEST_F(TenantFileImporterServiceTest, statsForValidMigrationID) {
auto migrationId = UUID::gen();
const std::string file1Name = "some-file1.wt";
std::string file1Data = "Here is the file1 data";
CursorResponse file1AggResponse(
NamespaceString::makeCollectionlessAggregateNSS(DatabaseName::kAdmin),
0 /* cursorId */,
{BSON("byteOffset" << 0 << "endOfFile" << true << "data"
<< BSONBinData(file1Data.data(), file1Data.size(), BinDataGeneral))});
const std::string file2Name = "some-file2.wt";
std::string file2Data = "Here is the file2 data";
CursorResponse file2AggResponse(
NamespaceString::makeCollectionlessAggregateNSS(DatabaseName::kAdmin),
0 /* cursorId */,
{BSON("byteOffset" << 0 << "endOfFile" << true << "data"
<< BSONBinData(file2Data.data(), file2Data.size(), BinDataGeneral))});
_mockDonorServer->setCommandReply(
"aggregate",
{file1AggResponse.toBSONAsInitialResponse(), file2AggResponse.toBSONAsInitialResponse()});
const auto totalDataSize = file1Data.size() + file2Data.size();
FailPointEnableBlock hangBeforeFileImporterThreadExit("hangBeforeFileImporterThreadExit");
// Verify that the stat is empty before migration start.
auto stats = _importerService->getStats(migrationId);
ASSERT(stats.isEmpty());
_importerService->startMigration(migrationId, kStartMigrationOpTime);
// Sleep to prevent the race with "totalReceiveElapsedMillis" field.
mongo::sleepmillis(1);
stats = _importerService->getStats(migrationId);
ASSERT(!stats.isEmpty());
ASSERT(stats.hasField("approxTotalDataSize"));
ASSERT(stats.hasField("approxTotalBytesCopied"));
ASSERT(stats.hasField("totalReceiveElapsedMillis"));
ASSERT(stats.hasField("remainingReceiveEstimatedMillis"));
ASSERT_EQ(stats["approxTotalDataSize"].safeNumberLong(), 0ll);
ASSERT_EQ(stats["approxTotalBytesCopied"].safeNumberLong(), 0ll);
ASSERT_GT(stats["totalReceiveElapsedMillis"].safeNumberLong(), 0ll);
ASSERT_EQ(stats["remainingReceiveEstimatedMillis"].safeNumberLong(), 0ll);
{
FailPointEnableBlock fpTenantFileClonerHangDuringFileCloneBackup(
"TenantFileClonerHangDuringFileCloneBackup");
_importerService->learnedFilename(
migrationId, makefileMetaDoc(migrationId, file1Name, file1Data.size()));
_importerService->learnedFilename(
migrationId, makefileMetaDoc(migrationId, file2Name, file2Data.size()));
fpTenantFileClonerHangDuringFileCloneBackup->waitForTimesEntered(
fpTenantFileClonerHangDuringFileCloneBackup.initialTimesEntered() + 1);
stats = _importerService->getStats(migrationId);
ASSERT(!stats.isEmpty());
ASSERT(stats.hasField("approxTotalDataSize"));
ASSERT(stats.hasField("approxTotalBytesCopied"));
ASSERT(stats.hasField("totalReceiveElapsedMillis"));
ASSERT(stats.hasField("remainingReceiveEstimatedMillis"));
ASSERT_EQ(stats["approxTotalDataSize"].safeNumberLong(), totalDataSize);
ASSERT_EQ(stats["approxTotalBytesCopied"].safeNumberLong(), file1Data.size());
ASSERT_GT(stats["totalReceiveElapsedMillis"].safeNumberLong(), 0ll);
ASSERT_GT(stats["remainingReceiveEstimatedMillis"].safeNumberLong(), 0ll);
}
_importerService->learnedAllFilenames(migrationId);
hangBeforeFileImporterThreadExit->waitForTimesEntered(
hangBeforeFileImporterThreadExit.initialTimesEntered() + 1);
stats = _importerService->getStats(migrationId);
ASSERT(!stats.isEmpty());
ASSERT(stats.hasField("approxTotalDataSize"));
ASSERT(stats.hasField("approxTotalBytesCopied"));
ASSERT(stats.hasField("totalReceiveElapsedMillis"));
ASSERT(stats.hasField("remainingReceiveEstimatedMillis"));
ASSERT_EQ(stats["approxTotalDataSize"].safeNumberLong(), totalDataSize);
ASSERT_EQ(stats["approxTotalBytesCopied"].safeNumberLong(), totalDataSize);
ASSERT_GT(stats["totalReceiveElapsedMillis"].safeNumberLong(), 0ll);
ASSERT_EQ(stats["remainingReceiveEstimatedMillis"].safeNumberLong(), 0ll);
}
} // namespace repl
} // namespace mongo

View File

@ -65,7 +65,6 @@
#include "mongo/db/repl/tenant_migration_decoration.h"
#include "mongo/db/repl/tenant_migration_donor_access_blocker.h"
#include "mongo/db/repl/tenant_migration_recipient_access_blocker.h"
#include "mongo/db/repl/tenant_migration_shard_merge_util.h"
#include "mongo/db/repl/tenant_migration_state_machine_gen.h"
#include "mongo/db/serverless/serverless_types_gen.h"
#include "mongo/db/service_context.h"
@ -123,11 +122,6 @@ bool recoverTenantMigrationRecipientAccessBlockers(OperationContext* opCtx,
doc.getId());
auto protocol = doc.getProtocol().value_or(MigrationProtocolEnum::kMultitenantMigrations);
switch (protocol) {
case MigrationProtocolEnum::kShardMerge:
invariant(doc.getTenantIds());
TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext())
.add(*doc.getTenantIds(), mtab);
break;
case MigrationProtocolEnum::kMultitenantMigrations: {
const auto tenantId = TenantId::parseFromString(doc.getTenantId());
TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext())
@ -178,16 +172,6 @@ bool recoverTenantMigrationDonorAccessBlockers(OperationContext* opCtx,
const auto tenantId = TenantId::parseFromString(*doc.getTenantId());
registry.add(tenantId, mtabVector.back());
} break;
case MigrationProtocolEnum::kShardMerge:
invariant(doc.getTenantIds());
// Add global access blocker to avoid any tenant creation during shard merge.
registry.addGlobalDonorAccessBlocker(mtabVector.back());
for (const auto& tenantId : *doc.getTenantIds()) {
mtabVector.push_back(std::make_shared<TenantMigrationDonorAccessBlocker>(
opCtx->getServiceContext(), doc.getId()));
registry.add(tenantId, mtabVector.back());
}
break;
default:
MONGO_UNREACHABLE;
}
@ -227,63 +211,6 @@ bool recoverTenantMigrationDonorAccessBlockers(OperationContext* opCtx,
}
return true;
}
bool recoverShardMergeRecipientAccessBlockers(OperationContext* opCtx,
const ShardMergeRecipientDocument& doc) {
auto replCoord = repl::ReplicationCoordinator::get(getGlobalServiceContext());
invariant(replCoord && replCoord->getSettings().isReplSet());
// If the initial syncing node (both FCBIS and logical initial sync) syncs from a sync source
// that's in the middle of file copy/import phase of shard merge, it can cause the initial
// syncing node to have only partial donor data. And, if this node went into initial sync (i.e,
// resync) after it sent `recipientVoteImportedFiles` to the recipient primary, the primary
// can commit the migration and cause permanent data loss on this node.
if (replCoord->getMemberState().startup2() && !doc.getExpireAt()) {
assertOnUnsafeInitialSync(doc.getId());
}
// Do not create mtab for following cases. Otherwise, we can get into potential race
// causing recovery procedure to fail with `ErrorCodes::ConflictingServerlessOperation`.
// 1) The migration was skipped.
if (doc.getStartGarbageCollect()) {
invariant(doc.getState() == ShardMergeRecipientStateEnum::kAborted ||
doc.getState() == ShardMergeRecipientStateEnum::kCommitted);
return true;
}
// 2) Aborted state doc marked as garbage collectable.
if (doc.getState() == ShardMergeRecipientStateEnum::kAborted && doc.getExpireAt()) {
return true;
}
auto mtab = std::make_shared<TenantMigrationRecipientAccessBlocker>(opCtx->getServiceContext(),
doc.getId());
TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext())
.add(doc.getTenantIds(), mtab);
switch (doc.getState()) {
case ShardMergeRecipientStateEnum::kStarted:
case ShardMergeRecipientStateEnum::kLearnedFilenames:
break;
case ShardMergeRecipientStateEnum::kConsistent:
repl::shard_merge_utils::assertImportDoneMarkerLocalCollExistsOnMergeConsistent(
opCtx, doc.getId());
FMT_FALLTHROUGH;
case ShardMergeRecipientStateEnum::kCommitted:
if (doc.getExpireAt()) {
mtab->stopBlockingTTL();
}
FMT_FALLTHROUGH;
case ShardMergeRecipientStateEnum::kAborted:
if (auto rejectTs = doc.getRejectReadsBeforeTimestamp()) {
mtab->startRejectingReadsBefore(*rejectTs);
}
break;
default:
MONGO_UNREACHABLE;
}
return true;
}
} // namespace
void assertOnUnsafeInitialSync(const UUID& migrationId) {
@ -608,13 +535,6 @@ void recoverTenantMigrationAccessBlockers(OperationContext* opCtx) {
recipientStore.forEach(opCtx, {}, [&](const TenantMigrationRecipientDocument& doc) {
return recoverTenantMigrationRecipientAccessBlockers(opCtx, doc);
});
PersistentTaskStore<ShardMergeRecipientDocument> mergeRecipientStore(
NamespaceString::kShardMergeRecipientsNamespace);
mergeRecipientStore.forEach(opCtx, {}, [&](const ShardMergeRecipientDocument& doc) {
return recoverShardMergeRecipientAccessBlockers(opCtx, doc);
});
}
template <typename MigrationConflictInfoType>

View File

@ -121,16 +121,6 @@ TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveTenantMigrationTrueWithDon
ASSERT(tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), kTenantDB));
}
TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveShardMergeTrueWithDonor) {
auto donorMtab =
std::make_shared<TenantMigrationDonorAccessBlocker>(getServiceContext(), UUID::gen());
TenantMigrationAccessBlockerRegistry::get(getServiceContext())
.addGlobalDonorAccessBlocker(donorMtab);
ASSERT_FALSE(
tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), DatabaseName::kLocal));
ASSERT(tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), kTenantDB));
}
TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveTenantMigrationTrueWithRecipient) {
auto recipientMtab =
std::make_shared<TenantMigrationRecipientAccessBlocker>(getServiceContext(), UUID::gen());
@ -151,21 +141,6 @@ TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveTenantMigrationTrueWithBot
ASSERT(tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), kTenantDB));
}
TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveShardMergeTrueWithBoth) {
auto uuid = UUID::gen();
auto recipientMtab =
std::make_shared<TenantMigrationRecipientAccessBlocker>(getServiceContext(), uuid);
TenantMigrationAccessBlockerRegistry::get(getServiceContext()).add(kTenantId, recipientMtab);
auto donorMtab = std::make_shared<TenantMigrationDonorAccessBlocker>(getServiceContext(), uuid);
TenantMigrationAccessBlockerRegistry::get(getServiceContext())
.addGlobalDonorAccessBlocker(donorMtab);
// Access blocker do not impact ns without tenants.
ASSERT_FALSE(
tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), DatabaseName::kConfig));
ASSERT(tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), kTenantDB));
}
TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveTenantMigrationDonorFalseForNoDbName) {
auto donorMtab =
std::make_shared<TenantMigrationDonorAccessBlocker>(getServiceContext(), UUID::gen());
@ -175,23 +150,6 @@ TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveTenantMigrationDonorFalseF
tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), DatabaseName::kEmpty));
}
TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveShardMergeDonorFalseForNoDbName) {
auto donorMtab =
std::make_shared<TenantMigrationDonorAccessBlocker>(getServiceContext(), UUID::gen());
TenantMigrationAccessBlockerRegistry::get(getServiceContext())
.addGlobalDonorAccessBlocker(donorMtab);
ASSERT_FALSE(
tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), DatabaseName::kEmpty));
}
TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveShardMergeRecipientFalseForNoDbName) {
auto recipientMtab =
std::make_shared<TenantMigrationRecipientAccessBlocker>(getServiceContext(), UUID::gen());
TenantMigrationAccessBlockerRegistry::get(getServiceContext()).add(kTenantId, recipientMtab);
ASSERT_FALSE(
tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), DatabaseName::kEmpty));
}
TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveTenantMigrationFalseForUnrelatedDb) {
auto recipientMtab =
std::make_shared<TenantMigrationRecipientAccessBlocker>(getServiceContext(), UUID::gen());
@ -231,38 +189,6 @@ TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveTenantMigrationFalseAfterR
ASSERT_FALSE(tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), kTenantDB));
}
TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveShardMergeFalseAfterRemoveWithBoth) {
auto migrationId = UUID::gen();
auto recipientMtab =
std::make_shared<TenantMigrationRecipientAccessBlocker>(getServiceContext(), migrationId);
TenantMigrationAccessBlockerRegistry::get(getServiceContext()).add(kTenantId, recipientMtab);
auto donorMtab =
std::make_shared<TenantMigrationDonorAccessBlocker>(getServiceContext(), migrationId);
TenantMigrationAccessBlockerRegistry::get(getServiceContext())
.addGlobalDonorAccessBlocker(donorMtab);
ASSERT(tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), kTenantDB));
ASSERT_FALSE(
tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), DatabaseName::kAdmin));
// Remove donor, should still be a migration for the tenants migrating to the recipient.
TenantMigrationAccessBlockerRegistry::get(getServiceContext())
.removeAccessBlockersForMigration(migrationId,
TenantMigrationAccessBlocker::BlockerType::kDonor);
ASSERT(tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), kTenantDB));
ASSERT_FALSE(
tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), DatabaseName::kAdmin));
// Remove recipient, there should be no migration.
TenantMigrationAccessBlockerRegistry::get(getServiceContext())
.removeAccessBlockersForMigration(migrationId,
TenantMigrationAccessBlocker::BlockerType::kRecipient);
ASSERT_FALSE(tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), kTenantDB));
ASSERT_FALSE(
tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), DatabaseName::kAdmin));
}
TEST_F(TenantMigrationAccessBlockerUtilTest, TestValidateNssBeingMigrated) {
auto migrationId = UUID::gen();
auto recipientMtab =
@ -349,435 +275,4 @@ private:
const repl::ReplSettings _replSettings = repl::createServerlessReplSettings();
};
TEST_F(RecoverAccessBlockerTest, ShardMergeRecipientBlockerStarted) {
ShardMergeRecipientDocument recipientDoc(kMigrationId,
kDefaultDonorConnStr,
_tenantIds,
kDefaultStartMigrationTimestamp,
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
recipientDoc.setState(ShardMergeRecipientStateEnum::kStarted);
insertStateDocument(NamespaceString::kShardMergeRecipientsNamespace, recipientDoc.toBSON());
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx());
for (const auto& tenantId : _tenantIds) {
auto mtab = TenantMigrationAccessBlockerRegistry::get(getServiceContext())
.getTenantMigrationAccessBlockerForTenantId(
tenantId, TenantMigrationAccessBlocker::BlockerType::kRecipient);
ASSERT(mtab);
auto cmdFuture = mtab->getCanRunCommandFuture(opCtx(), "dummyCmd");
ASSERT_TRUE(cmdFuture.isReady());
ASSERT_THROWS_CODE_AND_WHAT(
cmdFuture.get(),
DBException,
ErrorCodes::IllegalOperation,
"Tenant command 'dummyCmd' is not allowed before migration completes");
}
}
TEST_F(RecoverAccessBlockerTest, ShardMergeRecipientAbortedBeforeDataCopy) {
ShardMergeRecipientDocument recipientDoc(kMigrationId,
kDefaultDonorConnStr,
_tenantIds,
kDefaultStartMigrationTimestamp,
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
recipientDoc.setState(ShardMergeRecipientStateEnum::kAborted);
recipientDoc.setStartGarbageCollect(true);
insertStateDocument(NamespaceString::kShardMergeRecipientsNamespace, recipientDoc.toBSON());
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx());
for (const auto& tenantId : _tenantIds) {
auto mtab = TenantMigrationAccessBlockerRegistry::get(getServiceContext())
.getTenantMigrationAccessBlockerForTenantId(
tenantId, TenantMigrationAccessBlocker::BlockerType::kRecipient);
ASSERT(!mtab);
}
}
TEST_F(RecoverAccessBlockerTest, ShardMergeRecipientAbortedAfterDataCopy) {
ShardMergeRecipientDocument recipientDoc(kMigrationId,
kDefaultDonorConnStr,
_tenantIds,
kDefaultStartMigrationTimestamp,
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
recipientDoc.setState(ShardMergeRecipientStateEnum::kAborted);
insertStateDocument(NamespaceString::kShardMergeRecipientsNamespace, recipientDoc.toBSON());
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx());
for (const auto& tenantId : _tenantIds) {
auto mtab = TenantMigrationAccessBlockerRegistry::get(getServiceContext())
.getTenantMigrationAccessBlockerForTenantId(
tenantId, TenantMigrationAccessBlocker::BlockerType::kRecipient);
ASSERT(mtab);
auto cmdFuture = mtab->getCanRunCommandFuture(opCtx(), "dummyCmd");
ASSERT_TRUE(cmdFuture.isReady());
ASSERT_THROWS_CODE_AND_WHAT(
cmdFuture.get(),
DBException,
ErrorCodes::IllegalOperation,
"Tenant command 'dummyCmd' is not allowed before migration completes");
}
}
TEST_F(RecoverAccessBlockerTest, ShardMergeRecipientCommittedWithoutDataCopy) {
ShardMergeRecipientDocument recipientDoc(kMigrationId,
kDefaultDonorConnStr,
_tenantIds,
kDefaultStartMigrationTimestamp,
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
recipientDoc.setState(ShardMergeRecipientStateEnum::kCommitted);
recipientDoc.setStartGarbageCollect(true);
insertStateDocument(NamespaceString::kShardMergeRecipientsNamespace, recipientDoc.toBSON());
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx());
for (const auto& tenantId : _tenantIds) {
auto mtab = TenantMigrationAccessBlockerRegistry::get(getServiceContext())
.getTenantMigrationAccessBlockerForTenantId(
tenantId, TenantMigrationAccessBlocker::BlockerType::kRecipient);
ASSERT(!mtab);
}
}
TEST_F(RecoverAccessBlockerTest, ShardMergeRecipientCommittedAfterDataCopy) {
ShardMergeRecipientDocument recipientDoc(kMigrationId,
kDefaultDonorConnStr,
_tenantIds,
kDefaultStartMigrationTimestamp,
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
recipientDoc.setState(ShardMergeRecipientStateEnum::kCommitted);
insertStateDocument(NamespaceString::kShardMergeRecipientsNamespace, recipientDoc.toBSON());
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx());
for (const auto& tenantId : _tenantIds) {
auto mtab = TenantMigrationAccessBlockerRegistry::get(getServiceContext())
.getTenantMigrationAccessBlockerForTenantId(
tenantId, TenantMigrationAccessBlocker::BlockerType::kRecipient);
ASSERT(mtab);
auto cmdFuture = mtab->getCanRunCommandFuture(opCtx(), "dummyCmd");
ASSERT_TRUE(cmdFuture.isReady());
ASSERT_THROWS_CODE_AND_WHAT(
cmdFuture.get(),
DBException,
ErrorCodes::IllegalOperation,
"Tenant command 'dummyCmd' is not allowed before migration completes");
}
}
TEST_F(RecoverAccessBlockerTest, ShardMergeRecipientLearnedFiles) {
ShardMergeRecipientDocument recipientDoc(kMigrationId,
kDefaultDonorConnStr,
_tenantIds,
kDefaultStartMigrationTimestamp,
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
recipientDoc.setState(ShardMergeRecipientStateEnum::kLearnedFilenames);
insertStateDocument(NamespaceString::kShardMergeRecipientsNamespace, recipientDoc.toBSON());
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx());
for (const auto& tenantId : _tenantIds) {
auto mtab = TenantMigrationAccessBlockerRegistry::get(getServiceContext())
.getTenantMigrationAccessBlockerForTenantId(
tenantId, TenantMigrationAccessBlocker::BlockerType::kRecipient);
ASSERT(mtab);
auto cmdFuture = mtab->getCanRunCommandFuture(opCtx(), "dummyCmd");
ASSERT_TRUE(cmdFuture.isReady());
ASSERT_THROWS_CODE_AND_WHAT(
cmdFuture.get(),
DBException,
ErrorCodes::IllegalOperation,
"Tenant command 'dummyCmd' is not allowed before migration completes");
}
}
TEST_F(RecoverAccessBlockerTest, ShardMergeRecipientConsistent) {
ShardMergeRecipientDocument recipientDoc(kMigrationId,
kDefaultDonorConnStr,
_tenantIds,
kDefaultStartMigrationTimestamp,
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
recipientDoc.setState(ShardMergeRecipientStateEnum::kConsistent);
// Create the import done marker collection.
ASSERT_OK(createCollection(
opCtx(), CreateCommand(repl::shard_merge_utils::getImportDoneMarkerNs(kMigrationId))));
insertStateDocument(NamespaceString::kShardMergeRecipientsNamespace, recipientDoc.toBSON());
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx());
for (const auto& tenantId : _tenantIds) {
auto mtab = TenantMigrationAccessBlockerRegistry::get(getServiceContext())
.getTenantMigrationAccessBlockerForTenantId(
tenantId, TenantMigrationAccessBlocker::BlockerType::kRecipient);
ASSERT(mtab);
auto cmdFuture = mtab->getCanRunCommandFuture(opCtx(), "dummyCmd");
ASSERT_TRUE(cmdFuture.isReady());
ASSERT_THROWS_CODE_AND_WHAT(
cmdFuture.get(),
DBException,
ErrorCodes::IllegalOperation,
"Tenant command 'dummyCmd' is not allowed before migration completes");
}
}
TEST_F(RecoverAccessBlockerTest, ShardMergeRecipientRejectBeforeTimestamp) {
ShardMergeRecipientDocument recipientDoc(kMigrationId,
kDefaultDonorConnStr,
_tenantIds,
kDefaultStartMigrationTimestamp,
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
recipientDoc.setState(ShardMergeRecipientStateEnum::kCommitted);
recipientDoc.setRejectReadsBeforeTimestamp(Timestamp{20, 1});
insertStateDocument(NamespaceString::kShardMergeRecipientsNamespace, recipientDoc.toBSON());
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx());
for (const auto& tenantId : _tenantIds) {
auto mtab = TenantMigrationAccessBlockerRegistry::get(getServiceContext())
.getTenantMigrationAccessBlockerForTenantId(
tenantId, TenantMigrationAccessBlocker::BlockerType::kRecipient);
ASSERT(mtab);
repl::ReadConcernArgs::get(opCtx()) =
repl::ReadConcernArgs(repl::ReadConcernLevel::kMajorityReadConcern);
auto cmdFuture = mtab->getCanRunCommandFuture(opCtx(), "dummyCmd");
ASSERT_OK(cmdFuture.getNoThrow());
repl::ReadConcernArgs::get(opCtx()) =
repl::ReadConcernArgs(repl::ReadConcernLevel::kSnapshotReadConcern);
repl::ReadConcernArgs::get(opCtx()).setArgsAtClusterTimeForSnapshot(Timestamp{15, 1});
auto cmdFutureAtClusterTime = mtab->getCanRunCommandFuture(opCtx(), "dummyCmd");
ASSERT_TRUE(cmdFutureAtClusterTime.isReady());
ASSERT_THROWS_CODE_AND_WHAT(
cmdFutureAtClusterTime.get(),
DBException,
ErrorCodes::SnapshotTooOld,
"Tenant command 'dummyCmd' is not allowed before migration completes");
}
}
TEST_F(RecoverAccessBlockerTest, InitialSyncUsingSyncSourceRunningShardMergeImportAsserts) {
ShardMergeRecipientDocument recipientDoc(UUID::gen(),
kDefaultDonorConnStr,
_tenantIds,
kDefaultStartMigrationTimestamp,
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
recipientDoc.setState(ShardMergeRecipientStateEnum::kLearnedFilenames);
insertStateDocument(NamespaceString::kShardMergeRecipientsNamespace, recipientDoc.toBSON());
// Simulate the node is in initial sync.
ASSERT_OK(_replMock->setFollowerMode(repl::MemberState::RS_STARTUP2));
ASSERT_THROWS_CODE_AND_WHAT(
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx()),
DBException,
ErrorCodes::TenantMigrationInProgress,
"Illegal to run initial sync when shard merge is active");
}
TEST_F(RecoverAccessBlockerTest, SyncSourceCompletesShardMergeBeforeInitialSyncStart) {
ShardMergeRecipientDocument recipientDoc(kMigrationId,
kDefaultDonorConnStr,
_tenantIds,
kDefaultStartMigrationTimestamp,
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
recipientDoc.setState(ShardMergeRecipientStateEnum::kCommitted);
recipientDoc.setExpireAt(opCtx()->getServiceContext()->getFastClockSource()->now());
insertStateDocument(NamespaceString::kShardMergeRecipientsNamespace, recipientDoc.toBSON());
// Simulate the node is in initial sync.
ASSERT_OK(_replMock->setFollowerMode(repl::MemberState::RS_STARTUP2));
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx());
}
DEATH_TEST_REGEX_F(RecoverAccessBlockerTest,
ShardMergeRecipientConsistentStateWithoutImportDoneMarkerCollectionFasserts,
"Fatal assertion.*7219902") {
ShardMergeRecipientDocument recipientDoc(UUID::gen(),
kDefaultDonorConnStr,
_tenantIds,
kDefaultStartMigrationTimestamp,
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
recipientDoc.setState(ShardMergeRecipientStateEnum::kConsistent);
insertStateDocument(NamespaceString::kShardMergeRecipientsNamespace, recipientDoc.toBSON());
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx());
}
TEST_F(RecoverAccessBlockerTest, ShardMergeDonorAbortingIndex) {
TenantMigrationDonorDocument donorDoc(
kMigrationId,
kDefaultRecipientConnStr,
mongo::ReadPreferenceSetting(ReadPreference::PrimaryOnly));
donorDoc.setProtocol(MigrationProtocolEnum::kShardMerge);
donorDoc.setTenantIds(_tenantIds);
donorDoc.setState(TenantMigrationDonorStateEnum::kAbortingIndexBuilds);
insertStateDocument(NamespaceString::kTenantMigrationDonorsNamespace, donorDoc.toBSON());
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx());
for (const auto& tenantId : _tenantIds) {
auto mtab = TenantMigrationAccessBlockerRegistry::get(getServiceContext())
.getTenantMigrationAccessBlockerForTenantId(
tenantId, TenantMigrationAccessBlocker::BlockerType::kDonor);
ASSERT(mtab);
auto cmdFuture = mtab->getCanRunCommandFuture(opCtx(), "dummyCmd");
ASSERT_TRUE(cmdFuture.isReady());
ASSERT_OK(cmdFuture.getNoThrow());
ASSERT_OK(mtab->checkIfCanWrite(Timestamp{10, 1}));
auto indexStatus = mtab->checkIfCanBuildIndex();
ASSERT_EQ(indexStatus.code(), ErrorCodes::TenantMigrationConflict);
auto migrationConflictInfo = indexStatus.extraInfo<TenantMigrationConflictInfo>();
ASSERT_EQ(migrationConflictInfo->getMigrationId(), kMigrationId);
}
}
TEST_F(RecoverAccessBlockerTest, ShardMergeDonorBlocking) {
TenantMigrationDonorDocument donorDoc(
kMigrationId,
kDefaultRecipientConnStr,
mongo::ReadPreferenceSetting(ReadPreference::PrimaryOnly));
donorDoc.setProtocol(MigrationProtocolEnum::kShardMerge);
donorDoc.setTenantIds(_tenantIds);
donorDoc.setState(TenantMigrationDonorStateEnum::kBlocking);
donorDoc.setBlockTimestamp(Timestamp{100, 1});
insertStateDocument(NamespaceString::kTenantMigrationDonorsNamespace, donorDoc.toBSON());
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx());
for (const auto& tenantId : _tenantIds) {
auto mtab = TenantMigrationAccessBlockerRegistry::get(getServiceContext())
.getTenantMigrationAccessBlockerForTenantId(
tenantId, TenantMigrationAccessBlocker::BlockerType::kDonor);
ASSERT(mtab);
repl::ReadConcernArgs::get(opCtx()) =
repl::ReadConcernArgs(repl::ReadConcernLevel::kMajorityReadConcern);
auto cmdFuture = mtab->getCanRunCommandFuture(opCtx(), "dummyCmd");
ASSERT_TRUE(cmdFuture.isReady());
ASSERT_OK(cmdFuture.getNoThrow());
repl::ReadConcernArgs::get(opCtx()) =
repl::ReadConcernArgs(repl::ReadConcernLevel::kSnapshotReadConcern);
repl::ReadConcernArgs::get(opCtx()).setArgsAtClusterTimeForSnapshot(Timestamp{101, 1});
auto afterCmdFuture = mtab->getCanRunCommandFuture(opCtx(), "dummyCmd");
ASSERT_FALSE(afterCmdFuture.isReady());
ASSERT_EQ(mtab->checkIfCanWrite(Timestamp{101, 1}).code(),
ErrorCodes::TenantMigrationConflict);
auto indexStatus = mtab->checkIfCanBuildIndex();
ASSERT_EQ(indexStatus.code(), ErrorCodes::TenantMigrationConflict);
}
}
TEST_F(RecoverAccessBlockerTest, ShardMergeDonorCommitted) {
TenantMigrationDonorDocument donorDoc(
kMigrationId,
kDefaultRecipientConnStr,
mongo::ReadPreferenceSetting(ReadPreference::PrimaryOnly));
donorDoc.setProtocol(MigrationProtocolEnum::kShardMerge);
donorDoc.setTenantIds(_tenantIds);
donorDoc.setState(TenantMigrationDonorStateEnum::kCommitted);
donorDoc.setBlockTimestamp(Timestamp{100, 1});
donorDoc.setCommitOrAbortOpTime(repl::OpTime{Timestamp{101, 1}, 2});
insertStateDocument(NamespaceString::kTenantMigrationDonorsNamespace, donorDoc.toBSON());
_replMock->setCurrentCommittedSnapshotOpTime(repl::OpTime{Timestamp{101, 1}, 2});
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx());
for (const auto& tenantId : _tenantIds) {
auto mtab = TenantMigrationAccessBlockerRegistry::get(getServiceContext())
.getTenantMigrationAccessBlockerForTenantId(
tenantId, TenantMigrationAccessBlocker::BlockerType::kDonor);
ASSERT(mtab);
repl::ReadConcernArgs::get(opCtx()) =
repl::ReadConcernArgs(repl::ReadConcernLevel::kSnapshotReadConcern);
repl::ReadConcernArgs::get(opCtx()).setArgsAtClusterTimeForSnapshot(Timestamp{90, 1});
auto cmdFuture = mtab->getCanRunCommandFuture(opCtx(), "dummyCmd");
ASSERT_TRUE(cmdFuture.isReady());
ASSERT_OK(cmdFuture.getNoThrow());
repl::ReadConcernArgs::get(opCtx()) =
repl::ReadConcernArgs(repl::ReadConcernLevel::kSnapshotReadConcern);
repl::ReadConcernArgs::get(opCtx()).setArgsAtClusterTimeForSnapshot(Timestamp{102, 1});
auto afterCmdFuture = mtab->getCanRunCommandFuture(opCtx(), "dummyCmd");
ASSERT_TRUE(afterCmdFuture.isReady());
ASSERT_EQ(afterCmdFuture.getNoThrow().code(), ErrorCodes::TenantMigrationCommitted);
ASSERT_EQ(mtab->checkIfCanWrite(Timestamp{102, 1}).code(),
ErrorCodes::TenantMigrationCommitted);
auto indexStatus = mtab->checkIfCanBuildIndex();
ASSERT_EQ(indexStatus.code(), ErrorCodes::TenantMigrationCommitted);
}
}
TEST_F(RecoverAccessBlockerTest, ShardMergeDonorAborted) {
TenantMigrationDonorDocument donorDoc(
kMigrationId,
kDefaultRecipientConnStr,
mongo::ReadPreferenceSetting(ReadPreference::PrimaryOnly));
donorDoc.setProtocol(MigrationProtocolEnum::kShardMerge);
donorDoc.setTenantIds(_tenantIds);
donorDoc.setState(TenantMigrationDonorStateEnum::kAborted);
donorDoc.setBlockTimestamp(Timestamp{100, 1});
donorDoc.setCommitOrAbortOpTime(repl::OpTime{Timestamp{101, 1}, 2});
insertStateDocument(NamespaceString::kTenantMigrationDonorsNamespace, donorDoc.toBSON());
_replMock->setCurrentCommittedSnapshotOpTime(repl::OpTime{Timestamp{101, 1}, 2});
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx());
for (const auto& tenantId : _tenantIds) {
auto mtab = TenantMigrationAccessBlockerRegistry::get(getServiceContext())
.getTenantMigrationAccessBlockerForTenantId(
tenantId, TenantMigrationAccessBlocker::BlockerType::kDonor);
ASSERT(mtab);
repl::ReadConcernArgs::get(opCtx()) =
repl::ReadConcernArgs(repl::ReadConcernLevel::kSnapshotReadConcern);
repl::ReadConcernArgs::get(opCtx()).setArgsAtClusterTimeForSnapshot(Timestamp{90, 1});
auto cmdFuture = mtab->getCanRunCommandFuture(opCtx(), "dummyCmd");
ASSERT_TRUE(cmdFuture.isReady());
ASSERT_OK(cmdFuture.getNoThrow());
repl::ReadConcernArgs::get(opCtx()) =
repl::ReadConcernArgs(repl::ReadConcernLevel::kSnapshotReadConcern);
repl::ReadConcernArgs::get(opCtx()).setArgsAtClusterTimeForSnapshot(Timestamp{102, 1});
auto afterCmdFuture = mtab->getCanRunCommandFuture(opCtx(), "dummyCmd");
ASSERT_TRUE(afterCmdFuture.isReady());
ASSERT_OK(afterCmdFuture.getNoThrow());
ASSERT_OK(mtab->checkIfCanWrite(Timestamp{102, 1}));
ASSERT_OK(mtab->checkIfCanBuildIndex());
}
}
} // namespace mongo

View File

@ -94,9 +94,6 @@ void onTransitionToAbortingIndexBuilds(OperationContext* opCtx,
const auto tenantId = TenantId::parseFromString(*donorStateDoc.getTenantId());
TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext()).add(tenantId, mtab);
} else {
tassert(6448702,
"Bad protocol",
donorStateDoc.getProtocol() == MigrationProtocolEnum::kShardMerge);
invariant(donorStateDoc.getTenantIds());
auto& registry = TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext());

View File

@ -196,7 +196,7 @@ public:
/** Returns true if we should retry sending SyncData given the error */
bool recordAndEvaluateRetry(Status status) override {
if (_protocol == MigrationProtocolEnum::kShardMerge || status.isOK()) {
if (status.isOK()) {
return false;
}
auto underlyingError = async_rpc::unpackRPCStatusIgnoringWriteConcernAndWriteErrors(status);
@ -270,7 +270,6 @@ void TenantMigrationDonorService::checkIfConflictsWithOtherInstances(
BSONObj initialState,
const std::vector<const repl::PrimaryOnlyService::Instance*>& existingInstances) {
auto stateDoc = tenant_migration_access_blocker::parseDonorStateDocument(initialState);
auto isNewShardMerge = stateDoc.getProtocol() == MigrationProtocolEnum::kShardMerge;
for (auto& instance : existingInstances) {
auto existingTypedInstance =
@ -282,13 +281,12 @@ void TenantMigrationDonorService::checkIfConflictsWithOtherInstances(
uassert(ErrorCodes::ConflictingOperationInProgress,
str::stream() << "Cannot start a shard merge with existing migrations in progress",
!isNewShardMerge || existingIsAborted);
existingIsAborted);
uassert(
ErrorCodes::ConflictingOperationInProgress,
str::stream() << "Cannot start a migration with an existing shard merge in progress",
existingTypedInstance->getProtocol() != MigrationProtocolEnum::kShardMerge ||
existingIsAborted);
existingIsAborted);
// Any existing migration for this tenant must be aborted and garbage-collectable.
if (stateDoc.getTenantId() &&
@ -492,12 +490,11 @@ void TenantMigrationDonorService::Instance::checkIfOptionsConflict(const BSONObj
auto tenantIdsMatch = [&] {
switch (_protocol) {
case MigrationProtocolEnum::kShardMerge:
invariant(stateDoc.getTenantIds());
return *stateDoc.getTenantIds() == _tenantIds;
case MigrationProtocolEnum::kMultitenantMigrations:
invariant(stateDoc.getTenantId());
return *stateDoc.getTenantId() == _tenantId;
default:
MONGO_UNREACHABLE;
}
MONGO_UNREACHABLE;
};
@ -921,11 +918,6 @@ ExecutorFuture<void> TenantMigrationDonorService::Instance::_sendRecipientForget
void TenantMigrationDonorService::Instance::validateTenantIdsForProtocol() {
switch (_protocol) {
case MigrationProtocolEnum::kShardMerge:
uassert(ErrorCodes::InvalidOptions,
"The field tenantIds must be set and not empty for protocol 'shard merge'",
!_tenantIds.empty());
break;
case MigrationProtocolEnum::kMultitenantMigrations:
uassert(ErrorCodes::InvalidOptions,
"The field tenantIds must not be set for protocol 'multitenant migration'",
@ -1314,10 +1306,6 @@ TenantMigrationDonorService::Instance::_waitUntilStartMigrationDonorTimestampIsC
const std::shared_ptr<executor::ScopedTaskExecutor>& executor,
const CancellationToken& abortToken) {
if (getProtocol() != MigrationProtocolEnum::kShardMerge) {
return ExecutorFuture(**executor);
}
auto opCtxHolder = cc().makeOperationContext();
auto opCtx = opCtxHolder.get();
auto startMigrationDonorTimestamp = [&] {

View File

@ -50,7 +50,6 @@ inline constexpr StringData kMigrationIdFieldName = "migrationId"_sd;
inline constexpr StringData kBackupIdFieldName = "backupId"_sd;
inline constexpr StringData kDonorHostNameFieldName = "donorHostName"_sd;
inline constexpr StringData kDonorDbPathFieldName = "dbpath"_sd;
inline constexpr StringData kMovingFilesMarker = ".shardMergeMovingFiles"_sd;
inline constexpr StringData kTableExtension = ".wt"_sd;
// Keep the backup cursor alive by pinging twice as often as the donor's default

View File

@ -181,13 +181,6 @@ inline Status validateProtocolFCVCompatibility(
if (!protocol)
return Status::OK();
if (*protocol == MigrationProtocolEnum::kShardMerge &&
!repl::feature_flags::gShardMerge.isEnabled(
serverGlobalParams.featureCompatibility.acquireFCVSnapshot())) {
return Status(ErrorCodes::IllegalOperation,
str::stream() << "protocol '" << MigrationProtocol_serializer(*protocol)
<< "' not supported");
}
return Status::OK();
}

View File

@ -162,28 +162,6 @@ void ServerlessOperationLockRegistry::recoverLocks(OperationContext* opCtx) {
return true;
});
PersistentTaskStore<ShardMergeRecipientDocument> mergeRecipientStore(
NamespaceString::kShardMergeRecipientsNamespace);
mergeRecipientStore.forEach(opCtx, {}, [&](const ShardMergeRecipientDocument& doc) {
// Do not acquire locks for following cases. Otherwise, we can get into potential race
// causing recovery procedure to fail with `ErrorCodes::ConflictingServerlessOperation`.
// 1) The migration was skipped.
if (doc.getStartGarbageCollect()) {
invariant(doc.getState() == ShardMergeRecipientStateEnum::kAborted ||
doc.getState() == ShardMergeRecipientStateEnum::kCommitted);
return true;
}
// 2) State doc marked as garbage collectable.
if (doc.getExpireAt()) {
return true;
}
registry.acquireLock(ServerlessOperationLockRegistry::LockType::kMergeRecipient,
doc.getId());
return true;
});
}
const std::string kOperationLockFieldName = "operationLock";

View File

@ -1,66 +0,0 @@
\* Config file to run the TLC model-checker on ShardMerge.tla.
\* See ShardMerge.tla for instructions.
CONSTANTS DonorStartMigrationRequest = DonorStartMigrationRequest
CONSTANTS DonorStartMigrationResponse = DonorStartMigrationResponse
CONSTANTS RecipientSyncDataReturnAfterPinningRequest = RecipientSyncDataReturnAfterPinningRequest
CONSTANTS RecipientSyncDataReturnAfterPinningResponse = RecipientSyncDataReturnAfterPinningResponse
CONSTANTS RecipientSyncDataRequest = RecipientSyncDataRequest
CONSTANTS RecipientSyncDataResponse = RecipientSyncDataResponse
CONSTANTS RecipientSyncDataReturnAfterReachingDonorTimestampRequest = RecipientSyncDataReturnAfterReachingDonorTimestampRequest
CONSTANTS RecipientSyncDataReturnAfterReachingDonorTimestampResponse = RecipientSyncDataReturnAfterReachingDonorTimestampResponse
CONSTANTS DonorForgetMigrationRequest = DonorForgetMigrationRequest
CONSTANTS DonorForgetMigrationResponse = DonorForgetMigrationResponse
CONSTANTS RecipientForgetMigrationRequest = RecipientForgetMigrationRequest
CONSTANTS RecipientForgetMigrationResponse = RecipientForgetMigrationResponse
CONSTANTS RecUninitialized = RecUninitialized
CONSTANTS RecPinned = RecPinned
CONSTANTS RecStarted = RecStarted
CONSTANTS RecConsistent = RecConsistent
CONSTANTS RecLagged = RecLagged
CONSTANTS RecReady = RecReady
CONSTANTS RecAborted = RecAborted
CONSTANTS RecDone = RecDone
CONSTANTS DonUninitialized = DonUninitialized
CONSTANTS DonAbortingIndexBuilds = DonAbortingIndexBuilds
CONSTANTS DonPinning = DonPinning
CONSTANTS DonDataSync = DonDataSync
CONSTANTS DonBlocking = DonBlocking
CONSTANTS DonCommitted = DonCommitted
CONSTANTS DonAborted = DonAborted
CONSTANTS DonDone = DonDone
CONSTANTS CloudUnknown = CloudUnknown
CONSTANTS CloudCommitted = CloudCommitted
CONSTANTS CloudAborted = CloudAborted
CONSTANTS CloudDone = CloudDone
CONSTANTS MigrationNone = MigrationNone
CONSTANTS MigrationCommitted = MigrationCommitted
CONSTANTS MigrationAborted = MigrationAborted
CONSTANTS SyncOK = SyncOK
CONSTANTS SyncAborted = SyncAborted
CONSTANT MaxRequests = 8
INVARIANT StateMachinesConsistent
PROPERTY MigrationEventuallyCompletes
PROPERTY MessageBagEventuallyEmpties
PROPERTY EachRequestHasAResponse
\* Not configurable.
CONSTRAINT StateConstraint
SPECIFICATION Spec
\* The spec can terminate without a deadlock. The liveness properties are present to ensure the
\* termination states are correct.
CHECK_DEADLOCK FALSE

View File

@ -1,15 +0,0 @@
---- MODULE MCShardMerge ----
\* This module defines MCShardMerge.tla constants/constraints for model-checking.
EXTENDS ShardMerge
CONSTANT MaxRequests
(**************************************************************************************************)
(* State Constraint. Used for model checking only. *)
(**************************************************************************************************)
StateConstraint ==
MaxRequests > totalRequests
=============================================================================

View File

@ -1,542 +0,0 @@
\* Copyright 2021-present MongoDB, Inc.
\*
\* This work is licensed under:
\* - Creative Commons Attribution-3.0 United States License
\* http://creativecommons.org/licenses/by/3.0/us/
----------------------------- MODULE ShardMerge -----------------------------
\*
\* A specification of serverless MongoDB's shard merge protocol.
\*
\* To run the model-checker, first edit the constants in MCShardMerge.cfg if desired,
\* then:
\* cd src/mongo/db/repl/tla_plus
\* ./model-check.sh ShardMerge
\*
EXTENDS Integers, FiniteSets, Sequences, TLC
\* Donor command requests and responses
CONSTANTS DonorStartMigrationRequest, DonorStartMigrationResponse
CONSTANTS DonorForgetMigrationRequest, DonorForgetMigrationResponse
\* recipientSyncData command with returnAfterPinningOldestTimestamp.
CONSTANTS RecipientSyncDataReturnAfterPinningRequest, RecipientSyncDataReturnAfterPinningResponse
\* recipientSyncData command with no special params.
CONSTANTS RecipientSyncDataRequest, RecipientSyncDataResponse
\* recipientSyncData command with returnAfterReachingDonorTimestamp.
CONSTANTS RecipientSyncDataReturnAfterReachingDonorTimestampRequest, RecipientSyncDataReturnAfterReachingDonorTimestampResponse
CONSTANTS RecipientForgetMigrationRequest, RecipientForgetMigrationResponse
\* Recipient states. The happy path is:
\* Uninitialized->Pinned->Started->Consistent->Lagged->Ready->Done.
CONSTANTS RecUninitialized, RecPinned, RecStarted, RecConsistent, RecLagged, RecReady, RecAborted, RecDone
\* Donor states. The happy path is:
\* Uninit->AbortingIndexBuilds->Pinning->DataSync->Blocking->Committed->Done.
CONSTANTS DonUninitialized, DonAbortingIndexBuilds, DonPinning, DonDataSync, DonBlocking, DonCommitted, DonAborted, DonDone
\* cloud state
CONSTANTS CloudUnknown, CloudCommitted, CloudAborted, CloudDone
\* Responses to DonorStartMigration request
CONSTANTS MigrationNone, MigrationCommitted, MigrationAborted
\* Responses to RecipientSyncData* requests
CONSTANTS SyncOK, SyncAborted
(**************************************************************************************************)
(* Global variables *)
(**************************************************************************************************)
VARIABLE messages
VARIABLE recipientState
VARIABLE donorState
VARIABLE cloudState
VARIABLE totalRequests
VARIABLE totalResponses
VARIABLE recipientAborted
donorVars == <<donorState>>
recipientVars == <<recipientState, recipientAborted>>
cloudVars == <<cloudState>>
messageVars == <<messages, totalRequests, totalResponses>>
vars == <<donorVars, recipientVars, cloudVars, messageVars>>
-------------------------------------------------------------------------------------------
(**************************************************************************************************)
(* Network Helpers, adapted from https://github.com/ongardie/raft.tla/blob/master/raft.tla *)
(**************************************************************************************************)
\* Helper for Send. Given a message m and bag of messages, return a new bag of messages with one
\* more m in it.
WithMessage(m, msgs) ==
IF m \in DOMAIN msgs THEN
[msgs EXCEPT ![m] = msgs[m] + 1]
ELSE
msgs @@ (m :> 1)
\* Helper for Discard and Reply. Given a message m and bag of messages, return a new bag of
\* messages with one less m in it.
WithoutMessage(m, msgs) ==
IF m \in DOMAIN msgs THEN
IF msgs[m] = 1 THEN
\* Remove message m from the bag.
[n \in DOMAIN msgs \ {m} |-> msgs[n]]
ELSE
[msgs EXCEPT ![m] = msgs[m] - 1]
ELSE
msgs
IsRequest(m) ==
m.mType \in {DonorStartMigrationRequest, RecipientSyncDataReturnAfterPinningRequest,
RecipientSyncDataRequest, RecipientSyncDataReturnAfterReachingDonorTimestampRequest,
DonorForgetMigrationRequest, RecipientForgetMigrationRequest}
IncTotalMessages(m) ==
IF IsRequest(m) THEN
/\ totalRequests' = totalRequests + 1
/\ UNCHANGED <<totalResponses>>
ELSE
/\ totalResponses' = totalResponses + 1
/\ UNCHANGED <<totalRequests>>
\* Add a message to the bag of messages.
Send(m) ==
/\ messages' = WithMessage(m, messages)
/\ IncTotalMessages(m)
\* Remove a message from the bag of messages. Used when a server is done processing a message.
Discard(m) ==
/\ messages' = WithoutMessage(m, messages)
/\ UNCHANGED <<totalRequests, totalResponses>>
\* Helper that both sends a message and discards a message.
SendAndDiscard(sendMessage, discardMessage) ==
/\ messages' = WithoutMessage(discardMessage, WithMessage(sendMessage, messages))
/\ IncTotalMessages(sendMessage)
(**************************************************************************************************)
(* Request and response handlers *)
(**************************************************************************************************)
\* Helper to create the donorStartMigration response based on the donor state.
DonorStartMigrationResponseGen ==
CASE donorState = DonAborted ->
[mType |-> DonorStartMigrationResponse,
mOutcome |-> MigrationAborted]
[] donorState = DonCommitted ->
[mType |-> DonorStartMigrationResponse,
mOutcome |-> MigrationCommitted]
[] donorState \in {DonUninitialized, DonAbortingIndexBuilds, DonPinning, DonDataSync,
DonBlocking, DonDone} ->
[mType |-> DonorStartMigrationResponse,
mOutcome |-> MigrationNone]
\* Donor
HandleDonorStartMigrationRequest(m) ==
/\ m.mType = DonorStartMigrationRequest
\* If the donor is unstarted, it starts, otherwise nothing happens. Either way sends a response
\* to cloud.
/\ CASE donorState = DonUninitialized ->
/\ donorState' = DonAbortingIndexBuilds
\* Send an immediate response to cloud.
/\ SendAndDiscard(DonorStartMigrationResponseGen, m)
[] donorState \in {DonAbortingIndexBuilds, DonPinning, DonDataSync, DonBlocking,
DonCommitted, DonAborted, DonDone} ->
/\ SendAndDiscard(DonorStartMigrationResponseGen, m)
/\ UNCHANGED <<donorVars>>
/\ UNCHANGED <<recipientVars, cloudVars, totalRequests>>
\* Cloud
HandleDonorStartMigrationResponse(m) ==
/\ m.mType = DonorStartMigrationResponse
\* Updates the cloud state to whatever the donor specifies, if specified.
/\ CASE m.mOutcome = MigrationNone ->
UNCHANGED <<cloudState>>
[] m.mOutcome = MigrationCommitted ->
cloudState' = CloudCommitted
[] m.mOutcome = MigrationAborted ->
cloudState' = CloudAborted
/\ Discard(m)
/\ UNCHANGED <<donorVars, recipientVars>>
\* Helper to generate the mSyncStatus field of a recipient response
RecipientSyncStatusGen == IF recipientAborted THEN SyncAborted ELSE SyncOK
\* Recipient
HandleRecipientSyncDataReturnAfterPinningRequest(m) ==
/\ m.mType = RecipientSyncDataReturnAfterPinningRequest
/\ CASE recipientState = RecUninitialized ->
recipientState' = RecPinned
[] recipientState \in {RecPinned, RecStarted, RecConsistent,
RecLagged, RecReady, RecAborted, RecDone} ->
UNCHANGED recipientState
/\ SendAndDiscard([mType |-> RecipientSyncDataReturnAfterPinningResponse,
mSyncStatus |-> RecipientSyncStatusGen], m)
/\ UNCHANGED <<recipientAborted, donorVars, cloudVars>>
\* Factored out of below to make nested Case statements clearer.
HandleRecipientSyncDataReturnAfterPinningResponse_SyncOK(m) ==
CASE donorState = DonPinning ->
\* Move the state machine to "data sync" and send RecipientSyncData
/\ donorState' = DonDataSync
/\ SendAndDiscard([mType |-> RecipientSyncDataRequest], m)
[] donorState \in {DonDataSync, DonBlocking, DonCommitted, DonAborted, DonDone} ->
\* Just ignore this message, since we're past this step in the protocol
\* and this is a delayed message.
/\ Discard(m)
/\ UNCHANGED <<donorState>>
\* Factored out of below to make nested Case statements clearer.
HandleRecipientSyncDataReturnAfterPinningResponse_SyncAborted(m) ==
/\ CASE donorState = DonPinning ->
\* The recipient failed the migration, so abort.
donorState' = DonAborted
[] donorState \in {DonDataSync, DonBlocking, DonAborted, DonDone} ->
\* Delayed response to an earlier message, ignore it.
UNCHANGED <<donorState>>
/\ Discard(m)
\* Donor
HandleRecipientSyncDataReturnAfterPinningResponse(m) ==
/\ m.mType = RecipientSyncDataReturnAfterPinningResponse
/\ CASE m.mSyncStatus = SyncOK ->
HandleRecipientSyncDataReturnAfterPinningResponse_SyncOK(m)
[] m.mSyncStatus = SyncAborted ->
HandleRecipientSyncDataReturnAfterPinningResponse_SyncAborted(m)
/\ UNCHANGED <<recipientVars, cloudVars>>
\* Recipient
HandleRecipientSyncDataRequest(m) ==
/\ m.mType = RecipientSyncDataRequest
\* Don't handle messages until we transition to consistent, or abort.
/\ recipientState # RecStarted
/\ Assert(recipientState # RecUninitialized,
"Received RecipientSyncData in state "
\o ToString(recipientState))
/\ CASE recipientState = RecPinned ->
\* Starts the migration. The recipient does not respond to the donor until it is
\* consistent.
/\ recipientState' = RecStarted
/\ Discard(m)
/\ UNCHANGED <<recipientAborted>>
[] recipientState # RecPinned ->
/\ SendAndDiscard([mType |-> RecipientSyncDataResponse,
mSyncStatus |-> RecipientSyncStatusGen], m)
/\ UNCHANGED <<recipientVars>>
/\ UNCHANGED <<donorVars, cloudVars>>
\* Factored out of below to make nested Case statements clearer.
HandleRecipientSyncDataResponse_SyncOK(m) ==
/\ CASE donorState = DonDataSync ->
\* Move the state machine to "blocking" and send RecipientSyncDataReturnAfterReachingDonorTimestamp.
/\ donorState' = DonBlocking
/\ SendAndDiscard([mType |-> RecipientSyncDataReturnAfterReachingDonorTimestampRequest], m)
[] donorState \in {DonBlocking, DonCommitted, DonAborted, DonDone} ->
\* Just ignore this message, since we're past this step in the protocol
\* and this is a delayed message.
/\ Discard(m)
/\ UNCHANGED <<donorState>>
\* Factored out of below to make nested Case statements clearer.
HandleRecipientSyncDataResponse_SyncAborted(m) ==
/\ CASE donorState \in {DonDataSync, DonBlocking} ->
\* The recipient failed the migration, so abort.
\* We can get this response in Blocking when there are two
\* RecipientSyncData responses and the "OK" one is processed first.
donorState' = DonAborted
[] donorState \in {DonCommitted, DonAborted, DonDone} ->
\* The migration is already finished, do nothing.
UNCHANGED <<donorState>>
/\ Discard(m)
\* Donor
HandleRecipientSyncDataResponse(m) ==
/\ m.mType = RecipientSyncDataResponse
/\ Assert(donorState \notin {DonUninitialized, DonPinning},
"Received RecipientSyncDataResponse in state "
\o ToString(donorState))
/\ CASE m.mSyncStatus = SyncOK ->
HandleRecipientSyncDataResponse_SyncOK(m)
[] m.mSyncStatus = SyncAborted ->
HandleRecipientSyncDataResponse_SyncAborted(m)
/\ UNCHANGED <<recipientVars, cloudVars>>
\* Recipient
HandleRecipientSyncDataReturnAfterReachingDonorTimestampRequest(m) ==
/\ m.mType = RecipientSyncDataReturnAfterReachingDonorTimestampRequest
\* We don't want to handle this request being processed while lagged, since that would
\* require modeling request joining behavior, which is unnecessary complexity for the
\* purposes of this model. A RecipientSyncDataReturnAfterReachingDonorTimestamp request being
\* processed while in RecLagged must be a duplicate message.
/\ recipientState \notin {RecLagged}
/\ CASE recipientState = RecConsistent ->
\* Move the state machine to "lagged", since the recipient now knows the ending
\* timestamp. The recipient does not respond to the donor until it has caught up.
/\ recipientState' = RecLagged
/\ Discard(m)
/\ UNCHANGED <<recipientAborted>>
[] recipientState # RecConsistent ->
/\ SendAndDiscard([mType |-> RecipientSyncDataReturnAfterReachingDonorTimestampResponse,
mSyncStatus |-> RecipientSyncStatusGen], m)
/\ UNCHANGED <<recipientVars>>
/\ UNCHANGED <<donorVars, cloudVars>>
\* Factored out of below to make nested Case statements clearer.
HandleRecipientSyncDataReturnAfterReachingDonorTimestampResponse_SyncOK ==
CASE donorState = DonBlocking ->
\* The recipient is done!
donorState' = DonCommitted
[] donorState \in {DonCommitted, DonAborted, DonDone} ->
\* Just ignore this message, since we're past this step in the protocol
\* and this is a delayed message.
UNCHANGED <<donorState>>
\* Factored out of below to make nested Case statements clearer.
HandleRecipientSyncDataReturnAfterReachingDonorTimestampResponse_SyncAborted ==
CASE donorState = DonBlocking ->
\* The recipient failed the migration, so abort.
donorState' = DonAborted
[] donorState \in {DonAborted, DonDone} ->
\* If the migration is already aborted or finished, do nothing.
UNCHANGED <<donorState>>
\* Donor
HandleRecipientSyncDataReturnAfterReachingDonorTimestampResponse(m) ==
/\ m.mType = RecipientSyncDataReturnAfterReachingDonorTimestampResponse
/\ CASE m.mSyncStatus = SyncOK ->
HandleRecipientSyncDataReturnAfterReachingDonorTimestampResponse_SyncOK
[] m.mSyncStatus = SyncAborted ->
HandleRecipientSyncDataReturnAfterReachingDonorTimestampResponse_SyncAborted
/\ Discard(m)
/\ UNCHANGED <<recipientVars, cloudVars>>
\* Donor
HandleDonorForgetMigrationRequest(m) ==
/\ m.mType = DonorForgetMigrationRequest
\* Don't mark donor finished until recipient is.
/\ SendAndDiscard([mType |-> RecipientForgetMigrationRequest], m)
/\ UNCHANGED <<donorVars, recipientVars, cloudVars>>
\* Cloud
HandleDonorForgetMigrationResponse(m) ==
/\ m.mType = DonorForgetMigrationResponse
\* The donor and recipient unconditionally finish the migration, so cloud can too.
/\ cloudState' = CloudDone
/\ Discard(m)
/\ UNCHANGED <<donorVars, recipientVars>>
\* Recipient
HandleRecipientForgetMigrationRequest(m) ==
/\ m.mType = RecipientForgetMigrationRequest
\* Finish the migration no matter what, and tell the donor.
/\ recipientState' = RecDone
/\ SendAndDiscard([mType |-> RecipientForgetMigrationResponse], m)
/\ UNCHANGED <<donorVars, cloudVars, recipientAborted>>
\* Donor
HandleRecipientForgetMigrationResponse(m) ==
/\ m.mType = RecipientForgetMigrationResponse
\* The recipient has finished the migration, so now the donor can finish the migration and
\* respond to cloud that it has finished the migration.
/\ donorState' = DonDone
/\ SendAndDiscard([mType |-> DonorForgetMigrationResponse], m)
/\ UNCHANGED <<recipientVars, cloudVars>>
(******************************************************************************)
(* [ACTION] *)
(******************************************************************************)
DonorAbortsIndexBuilds ==
/\ donorState = DonAbortingIndexBuilds
/\ donorState' = DonPinning
\* Call recipientSyncData with returnAfterPinningOldestTimestamp.
/\ Send([mType |-> RecipientSyncDataReturnAfterPinningRequest])
/\ UNCHANGED <<totalResponses, recipientVars, cloudVars>>
\* Models a retry of recipientSyncData with returnAfterPinningOldestTimestamp.
DonorSendsRecipientSyncDataReturnAfterPinningRequest ==
/\ donorState = DonPinning
/\ Send([mType |-> RecipientSyncDataReturnAfterPinningRequest])
/\ UNCHANGED <<donorVars, recipientVars, cloudVars>>
\* Models the first try or a retry of recipientSyncData.
DonorSendsRecipientSyncDataRequest ==
/\ donorState = DonDataSync
/\ Send([mType |-> RecipientSyncDataRequest])
/\ UNCHANGED <<donorVars, recipientVars, cloudVars>>
\* Models a retry of RecipientSyncDataReturnAfterReachingDonorTimestamp.
DonorSendsRecipientSyncDataReturnAfterReachingDonorTimestampRequest ==
/\ donorState = DonBlocking
/\ Send([mType |-> RecipientSyncDataReturnAfterReachingDonorTimestampRequest])
/\ UNCHANGED <<donorVars, recipientVars, cloudVars>>
CloudSendsDonorStartMigrationRequest ==
/\ cloudState = CloudUnknown
/\ Send([mType |-> DonorStartMigrationRequest])
/\ UNCHANGED <<donorVars, recipientVars, cloudVars>>
CloudSendsDonorForgetMigrationRequest ==
/\ cloudState \in {CloudAborted, CloudCommitted}
/\ Send([mType |-> DonorForgetMigrationRequest])
/\ UNCHANGED <<donorVars, recipientVars, cloudVars>>
RecipientBecomesConsistent ==
/\ recipientState = RecStarted
/\ recipientState' = RecConsistent
/\ Send([mType |-> RecipientSyncDataResponse,
mSyncStatus |-> RecipientSyncStatusGen])
/\ UNCHANGED <<donorVars, cloudVars, recipientAborted>>
RecipientCatchesUp ==
/\ recipientState = RecLagged
/\ recipientState' = RecReady
/\ Send([mType |-> RecipientSyncDataReturnAfterReachingDonorTimestampResponse,
mSyncStatus |-> RecipientSyncStatusGen])
/\ UNCHANGED <<donorVars, cloudVars, recipientAborted>>
RecipientFailsMigration ==
\* Recipient can't fail after it's ready, finished, or already aborted.
/\ recipientState \notin {RecUninitialized, RecReady, RecAborted, RecDone}
/\ recipientState' = RecAborted
/\ recipientAborted' = TRUE
/\ CASE recipientState = RecStarted ->
\* The recipient has an active RecipientSyncData request.
Send([mType |-> RecipientSyncDataResponse,
mSyncStatus |-> SyncAborted])
[] recipientState = RecLagged ->
\* When "lagged" the recipient has an active RecipientSyncDataReturnAfterReachingDonorTimestamp request.
Send([mType |-> RecipientSyncDataReturnAfterReachingDonorTimestampResponse,
mSyncStatus |-> SyncAborted])
[] recipientState \in {RecUninitialized, RecPinned, RecConsistent} ->
\* No active donor request.
UNCHANGED <<messageVars>>
/\ UNCHANGED <<cloudVars, donorVars>>
(**************************************************************************************************)
(* Correctness Properties *)
(**************************************************************************************************)
StateMachinesInconsistent ==
\/ /\ cloudState = CloudCommitted
/\ \/ recipientState \notin {RecReady, RecDone}
\/ recipientAborted = TRUE
\/ donorState \notin {DonCommitted, DonDone}
\/ /\ donorState = DonCommitted
/\ \/ recipientState \notin {RecReady, RecDone}
\/ recipientAborted = TRUE
StateMachinesConsistent == ~StateMachinesInconsistent
(**************************************************************************************************)
(* Liveness properties *)
(**************************************************************************************************)
\* Checks that the state machines eventually converge on terminating states.
MigrationEventuallyCompletes ==
<> /\ recipientState = RecDone
/\ donorState = DonDone
/\ cloudState = CloudDone
\* Checks that if the bag fills up, it eventually empties.
MessageBagEventuallyEmpties ==
Cardinality(DOMAIN messages) > 0 ~> Cardinality(DOMAIN messages) = 0
\* Checks that the number of totalRequests eventually equals the number of totalResponses,
\* and stays that way. This will always be right before termination.
EachRequestHasAResponse ==
<>[] (totalRequests = totalResponses)
(**************************************************************************************************)
(* Spec definition *)
(**************************************************************************************************)
Init ==
/\ messages = [m \in {} |-> 0]
/\ donorState = DonUninitialized
/\ recipientState = RecUninitialized
/\ cloudState = CloudUnknown
/\ totalRequests = 0
/\ totalResponses = 0
/\ recipientAborted = FALSE
RecipientBecomesConsistentAction == RecipientBecomesConsistent
RecipientCatchesUpAction == RecipientCatchesUp
RecipientFailsMigrationAction == RecipientFailsMigration
CloudSendsDonorStartMigrationRequestAction == CloudSendsDonorStartMigrationRequest
CloudSendsDonorForgetMigrationRequestAction == CloudSendsDonorForgetMigrationRequest
DonorAbortsIndexBuildsAction == DonorAbortsIndexBuilds
DonorSendsRecipientSyncDataReturnAfterPinningRequestAction == DonorSendsRecipientSyncDataReturnAfterPinningRequest
DonorSendsRecipientSyncDataRequestAction == DonorSendsRecipientSyncDataRequest
DonorSendsRecipientSyncDataReturnAfterReachingDonorTimestampRequestAction == DonorSendsRecipientSyncDataReturnAfterReachingDonorTimestampRequest
ReceiveDonorStartMigrationRequestAction == \E m \in DOMAIN messages :
HandleDonorStartMigrationRequest(m)
ReceiveDonorStartMigrationResponseAction == \E m \in DOMAIN messages :
HandleDonorStartMigrationResponse(m)
ReceiveRecipientSyncDataReturnAfterPinningRequestAction == \E m \in DOMAIN messages :
HandleRecipientSyncDataReturnAfterPinningRequest(m)
ReceiveRecipientSyncDataReturnAfterPinningResponseAction == \E m \in DOMAIN messages :
HandleRecipientSyncDataReturnAfterPinningResponse(m)
ReceiveRecipientSyncDataRequestAction == \E m \in DOMAIN messages :
HandleRecipientSyncDataRequest(m)
ReceiveRecipientSyncDataResponseAction == \E m \in DOMAIN messages :
HandleRecipientSyncDataResponse(m)
ReceiveRecipientSyncDataReturnAfterReachingDonorTimestampRequestAction == \E m \in DOMAIN messages :
HandleRecipientSyncDataReturnAfterReachingDonorTimestampRequest(m)
ReceiveRecipientSyncDataReturnAfterReachingDonorTimestampResponseAction == \E m \in DOMAIN messages :
HandleRecipientSyncDataReturnAfterReachingDonorTimestampResponse(m)
ReceiveDonorForgetMigrationRequestAction == \E m \in DOMAIN messages :
HandleDonorForgetMigrationRequest(m)
ReceiveDonorForgetMigrationResponseAction == \E m \in DOMAIN messages :
HandleDonorForgetMigrationResponse(m)
ReceiveRecipientForgetMigrationRequestAction == \E m \in DOMAIN messages :
HandleRecipientForgetMigrationRequest(m)
ReceiveRecipientForgetMigrationResponseAction == \E m \in DOMAIN messages :
HandleRecipientForgetMigrationResponse(m)
Next ==
\/ RecipientBecomesConsistentAction
\/ RecipientCatchesUpAction
\/ RecipientFailsMigrationAction
\/ CloudSendsDonorStartMigrationRequestAction
\/ CloudSendsDonorForgetMigrationRequestAction
\/ DonorAbortsIndexBuildsAction
\/ DonorSendsRecipientSyncDataReturnAfterPinningRequestAction
\/ DonorSendsRecipientSyncDataRequestAction
\/ DonorSendsRecipientSyncDataReturnAfterReachingDonorTimestampRequestAction
\/ ReceiveRecipientSyncDataReturnAfterPinningRequestAction
\/ ReceiveRecipientSyncDataReturnAfterPinningResponseAction
\/ ReceiveDonorStartMigrationRequestAction
\/ ReceiveDonorStartMigrationResponseAction
\/ ReceiveRecipientSyncDataRequestAction
\/ ReceiveRecipientSyncDataResponseAction
\/ ReceiveRecipientSyncDataReturnAfterReachingDonorTimestampRequestAction
\/ ReceiveRecipientSyncDataReturnAfterReachingDonorTimestampResponseAction
\/ ReceiveDonorForgetMigrationRequestAction
\/ ReceiveDonorForgetMigrationResponseAction
\/ ReceiveRecipientForgetMigrationRequestAction
\/ ReceiveRecipientForgetMigrationResponseAction
\* Add fairness constraints so the above liveness properties are met.
Liveness ==
/\ WF_vars(ReceiveDonorStartMigrationRequestAction)
/\ WF_vars(ReceiveDonorStartMigrationResponseAction)
/\ WF_vars(ReceiveRecipientSyncDataReturnAfterPinningRequestAction)
/\ WF_vars(ReceiveRecipientSyncDataReturnAfterPinningResponseAction)
/\ WF_vars(ReceiveRecipientSyncDataRequestAction)
/\ WF_vars(ReceiveRecipientSyncDataResponseAction)
/\ WF_vars(ReceiveRecipientSyncDataReturnAfterReachingDonorTimestampRequestAction)
/\ WF_vars(ReceiveRecipientSyncDataReturnAfterReachingDonorTimestampResponseAction)
/\ WF_vars(ReceiveDonorForgetMigrationRequestAction)
/\ WF_vars(ReceiveDonorForgetMigrationResponseAction)
/\ WF_vars(ReceiveRecipientForgetMigrationRequestAction)
/\ WF_vars(ReceiveRecipientForgetMigrationResponseAction)
/\ WF_vars(CloudSendsDonorStartMigrationRequestAction)
/\ WF_vars(CloudSendsDonorForgetMigrationRequestAction)
Spec == Init /\ [][Next]_vars /\ Liveness
=============================================================================