mirror of https://github.com/mongodb/mongo
SERVER-95006 Remove ShardMerge code (#27812)
GitOrigin-RevId: cba7613ea436b56b5bfb24ec95ee18692ddcec2e
This commit is contained in:
parent
855dfadef0
commit
35333ed376
Binary file not shown.
|
Before Width: | Height: | Size: 433 KiB |
|
|
@ -155,12 +155,6 @@ overrides:
|
|||
exec_timeout: 600 # 10 hours
|
||||
- task: update_timeseries_fuzzer
|
||||
exec_timeout: 600 # 10 hours
|
||||
- task: shard_merge_jscore_passthrough
|
||||
exec_timeout: 240 # 4 hours
|
||||
- task: shard_merge_causally_consistent_jscore_passthrough
|
||||
exec_timeout: 240 # 4 hours
|
||||
- task: shard_merge_multi_stmt_txn_jscore_passthrough
|
||||
exec_timeout: 240 # 4 hours
|
||||
- task: read_concern_linearizable_passthrough
|
||||
exec_timeout: 270 # 4.5 hours
|
||||
- task: sharding
|
||||
|
|
|
|||
|
|
@ -718,7 +718,6 @@ env.Library(
|
|||
"repl/repl_coordinator_impl",
|
||||
"repl/replication_recovery",
|
||||
"repl/serveronly_repl",
|
||||
"repl/shard_merge_recipient_service",
|
||||
"repl/storage_interface_impl",
|
||||
"repl/tenant_migration_donor_service",
|
||||
"repl/tenant_migration_recipient_service",
|
||||
|
|
|
|||
|
|
@ -238,7 +238,6 @@ env.Library(
|
|||
"$BUILD_DIR/mongo/db/repl/repl_coordinator_interface",
|
||||
"$BUILD_DIR/mongo/db/repl/repl_server_parameters",
|
||||
"$BUILD_DIR/mongo/db/repl/replica_set_messages",
|
||||
"$BUILD_DIR/mongo/db/repl/shard_merge_recipient_service",
|
||||
"$BUILD_DIR/mongo/db/repl/tenant_migration_donor_service",
|
||||
"$BUILD_DIR/mongo/db/repl/tenant_migration_recipient_service",
|
||||
"$BUILD_DIR/mongo/db/rw_concern_d",
|
||||
|
|
|
|||
|
|
@ -96,7 +96,6 @@
|
|||
#include "mongo/db/repl/repl_set_config.h"
|
||||
#include "mongo/db/repl/repl_settings.h"
|
||||
#include "mongo/db/repl/replication_coordinator.h"
|
||||
#include "mongo/db/repl/shard_merge_recipient_service.h"
|
||||
#include "mongo/db/repl/tenant_migration_donor_service.h"
|
||||
#include "mongo/db/repl/tenant_migration_recipient_service.h"
|
||||
#include "mongo/db/s/config/configsvr_coordinator_service.h"
|
||||
|
|
@ -1641,12 +1640,6 @@ private:
|
|||
->lookupServiceByName(
|
||||
repl::TenantMigrationRecipientService::kTenantMigrationRecipientServiceName));
|
||||
recipientService->abortAllMigrations(opCtx);
|
||||
|
||||
auto mergeRecipientService = checked_cast<repl::ShardMergeRecipientService*>(
|
||||
repl::PrimaryOnlyServiceRegistry::get(opCtx->getServiceContext())
|
||||
->lookupServiceByName(
|
||||
repl::ShardMergeRecipientService::kShardMergeRecipientServiceName));
|
||||
mergeRecipientService->abortAllMigrations(opCtx);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -52,7 +52,6 @@
|
|||
#include "mongo/db/repl/repl_server_parameters_gen.h"
|
||||
#include "mongo/db/repl/repl_settings.h"
|
||||
#include "mongo/db/repl/replication_coordinator.h"
|
||||
#include "mongo/db/repl/shard_merge_recipient_service.h"
|
||||
#include "mongo/db/repl/tenant_migration_recipient_service.h"
|
||||
#include "mongo/db/repl/tenant_migration_state_machine_gen.h"
|
||||
#include "mongo/db/repl/tenant_migration_util.h"
|
||||
|
|
@ -134,8 +133,6 @@ public:
|
|||
switch (migrationProtocol) {
|
||||
case MigrationProtocolEnum::kMultitenantMigrations:
|
||||
return _handleMTMRecipientSyncDataCmd(opCtx, cmd);
|
||||
case MigrationProtocolEnum::kShardMerge:
|
||||
return _handleShardMergeRecipientSyncDataCmd(opCtx, cmd);
|
||||
default:
|
||||
MONGO_UNREACHABLE;
|
||||
}
|
||||
|
|
@ -167,29 +164,6 @@ public:
|
|||
: Response(recipientInstance->waitUntilMigrationReachesConsistentState(opCtx));
|
||||
}
|
||||
|
||||
Response _handleShardMergeRecipientSyncDataCmd(OperationContext* opCtx,
|
||||
const Request& cmd) {
|
||||
ShardMergeRecipientDocument stateDoc(cmd.getMigrationId(),
|
||||
cmd.getDonorConnectionString().toString(),
|
||||
*cmd.getTenantIds(),
|
||||
cmd.getStartMigrationDonorTimestamp(),
|
||||
cmd.getReadPreferenceSettings());
|
||||
|
||||
auto recipientService =
|
||||
repl::PrimaryOnlyServiceRegistry::get(opCtx->getServiceContext())
|
||||
->lookupServiceByName(
|
||||
repl::ShardMergeRecipientService::kShardMergeRecipientServiceName);
|
||||
auto recipientInstance = repl::ShardMergeRecipientService::Instance::getOrCreate(
|
||||
opCtx, recipientService, stateDoc.toBSON());
|
||||
|
||||
auto returnAfterReachingDonorTs = cmd.getReturnAfterReachingDonorTimestamp();
|
||||
|
||||
return returnAfterReachingDonorTs
|
||||
? Response(recipientInstance->waitUntilMigrationReachesReturnAfterReachingTimestamp(
|
||||
opCtx, *returnAfterReachingDonorTs))
|
||||
: Response(recipientInstance->waitUntilMigrationReachesConsistentState(opCtx));
|
||||
}
|
||||
|
||||
void doCheckAuthorization(OperationContext* opCtx) const final {
|
||||
uassert(ErrorCodes::Unauthorized,
|
||||
"Unauthorized",
|
||||
|
|
@ -258,17 +232,6 @@ public:
|
|||
"Received RecipientVoteImportedFiles request",
|
||||
"migrationId"_attr = cmd.getMigrationId(),
|
||||
"from"_attr = cmd.getFrom());
|
||||
auto recipientService =
|
||||
repl::PrimaryOnlyServiceRegistry::get(opCtx->getServiceContext())
|
||||
->lookupServiceByName(
|
||||
repl::ShardMergeRecipientService::kShardMergeRecipientServiceName);
|
||||
auto [instance, _] = repl::ShardMergeRecipientService::Instance::lookup(
|
||||
opCtx, recipientService, BSON("_id" << cmd.getMigrationId()));
|
||||
uassert(ErrorCodes::NoSuchTenantMigration,
|
||||
str::stream() << "Could not find tenant migration with id "
|
||||
<< cmd.getMigrationId(),
|
||||
instance);
|
||||
(*instance)->onMemberImportedFiles(cmd.getFrom());
|
||||
}
|
||||
|
||||
private:
|
||||
|
|
@ -344,8 +307,6 @@ public:
|
|||
switch (migrationProtocol) {
|
||||
case MigrationProtocolEnum::kMultitenantMigrations:
|
||||
return _handleMTMRecipientForgetMigrationCmd(opCtx, cmd);
|
||||
case MigrationProtocolEnum::kShardMerge:
|
||||
return _handleShardMergeRecipientForgetMigrationCmd(opCtx, cmd);
|
||||
default:
|
||||
MONGO_UNREACHABLE;
|
||||
}
|
||||
|
|
@ -380,32 +341,6 @@ public:
|
|||
recipientInstance->getForgetMigrationDurableFuture().get(opCtx);
|
||||
}
|
||||
|
||||
void _handleShardMergeRecipientForgetMigrationCmd(OperationContext* opCtx,
|
||||
const Request& cmd) {
|
||||
ShardMergeRecipientDocument stateDoc(cmd.getMigrationId(),
|
||||
cmd.getDonorConnectionString().toString(),
|
||||
*cmd.getTenantIds(),
|
||||
kUnusedStartMigrationTimestamp,
|
||||
cmd.getReadPreferenceSettings());
|
||||
|
||||
// Set 'startGarbageCollect' true to not start a migration (and install access blocker
|
||||
// or get serverless lock) unncessarily if this recipientForgetMigration command is
|
||||
// received before a recipientSyncData command or after the state doc is garbage
|
||||
// collected.
|
||||
stateDoc.setStartGarbageCollect(true);
|
||||
|
||||
auto recipientService =
|
||||
repl::PrimaryOnlyServiceRegistry::get(opCtx->getServiceContext())
|
||||
->lookupServiceByName(
|
||||
repl::ShardMergeRecipientService::kShardMergeRecipientServiceName);
|
||||
auto recipientInstance = repl::ShardMergeRecipientService::Instance::getOrCreate(
|
||||
opCtx, recipientService, stateDoc.toBSON(), false);
|
||||
|
||||
// Instruct the instance run() function to mark this migration garbage collectable.
|
||||
recipientInstance->onReceiveRecipientForgetMigration(opCtx, *cmd.getDecision());
|
||||
recipientInstance->getForgetMigrationDurableFuture().get(opCtx);
|
||||
}
|
||||
|
||||
void doCheckAuthorization(OperationContext* opCtx) const final {
|
||||
uassert(ErrorCodes::Unauthorized,
|
||||
"Unauthorized",
|
||||
|
|
|
|||
|
|
@ -161,8 +161,6 @@
|
|||
#include "mongo/db/repl/replication_coordinator_impl_gen.h"
|
||||
#include "mongo/db/repl/replication_process.h"
|
||||
#include "mongo/db/repl/replication_recovery.h"
|
||||
#include "mongo/db/repl/shard_merge_recipient_op_observer.h"
|
||||
#include "mongo/db/repl/shard_merge_recipient_service.h"
|
||||
#include "mongo/db/repl/storage_interface.h"
|
||||
#include "mongo/db/repl/storage_interface_impl.h"
|
||||
#include "mongo/db/repl/tenant_migration_access_blocker_registry.h"
|
||||
|
|
@ -445,7 +443,6 @@ void registerPrimaryOnlyServices(ServiceContext* serviceContext) {
|
|||
if (getGlobalReplSettings().isServerless()) {
|
||||
services.push_back(std::make_unique<TenantMigrationDonorService>(serviceContext));
|
||||
services.push_back(std::make_unique<repl::TenantMigrationRecipientService>(serviceContext));
|
||||
services.push_back(std::make_unique<repl::ShardMergeRecipientService>(serviceContext));
|
||||
}
|
||||
|
||||
if (change_stream_serverless_helpers::canInitializeServices()) {
|
||||
|
|
@ -1482,8 +1479,6 @@ void setUpObservers(ServiceContext* serviceContext) {
|
|||
std::make_unique<repl::TenantMigrationDonorOpObserver>());
|
||||
opObserverRegistry->addObserver(
|
||||
std::make_unique<repl::TenantMigrationRecipientOpObserver>());
|
||||
opObserverRegistry->addObserver(
|
||||
std::make_unique<repl::ShardMergeRecipientOpObserver>());
|
||||
}
|
||||
if (!gMultitenancySupport) {
|
||||
opObserverRegistry->addObserver(
|
||||
|
|
@ -1511,8 +1506,6 @@ void setUpObservers(ServiceContext* serviceContext) {
|
|||
std::make_unique<repl::TenantMigrationDonorOpObserver>());
|
||||
opObserverRegistry->addObserver(
|
||||
std::make_unique<repl::TenantMigrationRecipientOpObserver>());
|
||||
opObserverRegistry->addObserver(
|
||||
std::make_unique<repl::ShardMergeRecipientOpObserver>());
|
||||
}
|
||||
|
||||
auto replCoord = repl::ReplicationCoordinator::get(serviceContext);
|
||||
|
|
|
|||
|
|
@ -148,9 +148,6 @@ bool NamespaceString::isLegalClientSystemNS() const {
|
|||
* Process updates to 'admin.system.version' individually as well so the secondary's FCV when
|
||||
* processing each operation matches the primary's when committing that operation.
|
||||
*
|
||||
* Process updates to 'config.shardMergeRecipients' individually so they serialize after
|
||||
* inserts into 'config.donatedFiles.<migrationId>'.
|
||||
*
|
||||
* Oplog entries on 'config.shards' should be processed one at a time, otherwise the in-memory state
|
||||
* that its kept on the TopologyTimeTicker might be wrong.
|
||||
*
|
||||
|
|
@ -162,7 +159,7 @@ bool NamespaceString::mustBeAppliedInOwnOplogBatch() const {
|
|||
return isSystemDotViews() || isServerConfigurationCollection() || isPrivilegeCollection() ||
|
||||
ns == kDonorReshardingOperationsNamespace.ns() ||
|
||||
ns == kForceOplogBatchBoundaryNamespace.ns() ||
|
||||
ns == kTenantMigrationDonorsNamespace.ns() || ns == kShardMergeRecipientsNamespace.ns() ||
|
||||
ns == kTenantMigrationDonorsNamespace.ns() ||
|
||||
ns == kTenantMigrationRecipientsNamespace.ns() || ns == kConfigsvrShardsNamespace.ns();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -92,9 +92,6 @@ NSS_CONSTANT(kTenantMigrationRecipientsNamespace,
|
|||
DatabaseName::kConfig,
|
||||
"tenantMigrationRecipients"_sd)
|
||||
|
||||
// Namespace for storing the persisted state of shard merge recipient service instances.
|
||||
NSS_CONSTANT(kShardMergeRecipientsNamespace, DatabaseName::kConfig, "shardMergeRecipients"_sd)
|
||||
|
||||
// Namespace for view on local.oplog.rs for tenant migrations.
|
||||
NSS_CONSTANT(kTenantMigrationOplogView, DatabaseName::kLocal, "system.tenantMigration.oplogView"_sd)
|
||||
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@ env.Benchmark(
|
|||
"$BUILD_DIR/mongo/db/auth/authserver",
|
||||
"$BUILD_DIR/mongo/db/repl/primary_only_service",
|
||||
"$BUILD_DIR/mongo/db/repl/replmocks",
|
||||
"$BUILD_DIR/mongo/db/repl/shard_merge_recipient_service",
|
||||
"$BUILD_DIR/mongo/db/repl/tenant_migration_donor_service",
|
||||
"$BUILD_DIR/mongo/db/repl/tenant_migration_recipient_service",
|
||||
"$BUILD_DIR/mongo/db/s/sharding_runtime_d",
|
||||
|
|
|
|||
|
|
@ -44,7 +44,6 @@
|
|||
#include "mongo/db/op_observer/user_write_block_mode_op_observer.h"
|
||||
#include "mongo/db/repl/primary_only_service_op_observer.h"
|
||||
#include "mongo/db/repl/replication_coordinator_mock.h"
|
||||
#include "mongo/db/repl/shard_merge_recipient_op_observer.h"
|
||||
#include "mongo/db/repl/tenant_migration_donor_op_observer.h"
|
||||
#include "mongo/db/repl/tenant_migration_recipient_op_observer.h"
|
||||
#include "mongo/db/s/config_server_op_observer.h"
|
||||
|
|
@ -100,8 +99,6 @@ void setUpObservers(ServiceContext* serviceContext,
|
|||
std::make_unique<repl::TenantMigrationDonorOpObserver>());
|
||||
opObserverRegistry->addObserver(
|
||||
std::make_unique<repl::TenantMigrationRecipientOpObserver>());
|
||||
opObserverRegistry->addObserver(
|
||||
std::make_unique<repl::ShardMergeRecipientOpObserver>());
|
||||
}
|
||||
if (!gMultitenancySupport) {
|
||||
opObserverRegistry->addObserver(
|
||||
|
|
@ -129,8 +126,6 @@ void setUpObservers(ServiceContext* serviceContext,
|
|||
std::make_unique<repl::TenantMigrationDonorOpObserver>());
|
||||
opObserverRegistry->addObserver(
|
||||
std::make_unique<repl::TenantMigrationRecipientOpObserver>());
|
||||
opObserverRegistry->addObserver(
|
||||
std::make_unique<repl::ShardMergeRecipientOpObserver>());
|
||||
}
|
||||
if (!gMultitenancySupport) { // && replCoord && replCoord->getSettings().isReplSet()) {
|
||||
opObserverRegistry->addObserver(
|
||||
|
|
|
|||
|
|
@ -1473,7 +1473,6 @@ mongo_cc_library(
|
|||
name = "tenant_migration_utils",
|
||||
srcs = [
|
||||
"tenant_migration_recipient_entry_helpers.cpp",
|
||||
"tenant_migration_shard_merge_util.cpp",
|
||||
"tenant_migration_util.cpp",
|
||||
],
|
||||
hdrs = [
|
||||
|
|
|
|||
|
|
@ -555,7 +555,6 @@ env.Library(
|
|||
"tenant_database_cloner.cpp",
|
||||
"tenant_base_cloner.cpp",
|
||||
"tenant_file_cloner.cpp",
|
||||
"tenant_file_importer_service.cpp",
|
||||
],
|
||||
LIBDEPS=[
|
||||
"base_cloner",
|
||||
|
|
@ -700,52 +699,6 @@ env.Library(
|
|||
],
|
||||
)
|
||||
|
||||
env.Library(
|
||||
target="shard_merge_recipient_service",
|
||||
source=[
|
||||
"shard_merge_recipient_op_observer.cpp",
|
||||
"shard_merge_recipient_service.cpp",
|
||||
],
|
||||
LIBDEPS=[
|
||||
"$BUILD_DIR/mongo/client/fetcher",
|
||||
"$BUILD_DIR/mongo/client/read_preference",
|
||||
"$BUILD_DIR/mongo/db/catalog/commit_quorum_options",
|
||||
"$BUILD_DIR/mongo/db/vector_clock_mutable",
|
||||
"tenant_migration_access_blocker",
|
||||
"tenant_migration_statistics",
|
||||
"tenant_migration_utils",
|
||||
],
|
||||
LIBDEPS_PRIVATE=[
|
||||
"$BUILD_DIR/mongo/client/clientdriver_network",
|
||||
"$BUILD_DIR/mongo/db/catalog/catalog_helpers",
|
||||
"$BUILD_DIR/mongo/db/catalog/collection_crud",
|
||||
"$BUILD_DIR/mongo/db/catalog/local_oplog_info",
|
||||
"$BUILD_DIR/mongo/db/concurrency/exception_util",
|
||||
"$BUILD_DIR/mongo/db/index_builds_coordinator_mongod",
|
||||
"$BUILD_DIR/mongo/db/multitenancy",
|
||||
"$BUILD_DIR/mongo/db/pipeline/process_interface/mongo_process_interface",
|
||||
"$BUILD_DIR/mongo/db/query/write_ops/write_ops_exec",
|
||||
"$BUILD_DIR/mongo/db/serverless/serverless_lock",
|
||||
"$BUILD_DIR/mongo/db/session/session_catalog_mongod",
|
||||
"$BUILD_DIR/mongo/db/storage/storage_options",
|
||||
"$BUILD_DIR/mongo/db/transaction/transaction",
|
||||
"cloner_utils",
|
||||
"oplog",
|
||||
"oplog_buffer_collection",
|
||||
"oplog_entry",
|
||||
"oplog_fetcher",
|
||||
"oplog_interface_local",
|
||||
"primary_only_service",
|
||||
"repl_server_parameters",
|
||||
"replica_set_aware_service",
|
||||
"replication_auth",
|
||||
"tenant_migration_cloners",
|
||||
"tenant_migration_state_machine_idl",
|
||||
"tenant_oplog_processing",
|
||||
"timestamp_block",
|
||||
],
|
||||
)
|
||||
|
||||
env.Library(
|
||||
target="tenant_migration_recipient_service",
|
||||
source=[
|
||||
|
|
@ -1027,8 +980,6 @@ if wiredtiger:
|
|||
"rollback_checker_test.cpp",
|
||||
"rollback_impl_test.cpp",
|
||||
"scatter_gather_test.cpp",
|
||||
"shard_merge_recipient_op_observer_test.cpp",
|
||||
"shard_merge_recipient_service_test.cpp",
|
||||
"speculative_majority_read_info_test.cpp",
|
||||
"split_horizon_test.cpp",
|
||||
"split_prepare_session_manager_test.cpp",
|
||||
|
|
@ -1037,13 +988,11 @@ if wiredtiger:
|
|||
"sync_source_resolver_test.cpp",
|
||||
"task_runner_test.cpp",
|
||||
"task_runner_test_fixture.cpp",
|
||||
"tenant_file_importer_service_test.cpp",
|
||||
"tenant_migration_access_blocker_registry_test.cpp",
|
||||
"tenant_migration_access_blocker_util_test.cpp",
|
||||
"tenant_migration_recipient_access_blocker_test.cpp",
|
||||
"tenant_migration_recipient_entry_helpers_test.cpp",
|
||||
"tenant_oplog_applier_test.cpp",
|
||||
"tenant_oplog_applier_shard_merge_test.cpp",
|
||||
"tenant_oplog_batcher_test.cpp",
|
||||
"vote_requester_test.cpp",
|
||||
"wait_for_majority_service_test.cpp",
|
||||
|
|
@ -1133,7 +1082,6 @@ if wiredtiger:
|
|||
"rollback_impl",
|
||||
"rollback_test_fixture",
|
||||
"scatter_gather",
|
||||
"shard_merge_recipient_service",
|
||||
"speculative_majority_read_info",
|
||||
"split_horizon",
|
||||
"split_prepare_session_manager",
|
||||
|
|
@ -1350,7 +1298,6 @@ env.Benchmark(
|
|||
"primary_only_service",
|
||||
"replication_consistency_markers_impl",
|
||||
"replmocks",
|
||||
"shard_merge_recipient_service",
|
||||
"storage_interface_impl",
|
||||
"tenant_migration_donor_service",
|
||||
"tenant_migration_recipient_service",
|
||||
|
|
|
|||
|
|
@ -70,10 +70,6 @@ BSONObj ClonerUtils::buildMajorityWaitRequest(Timestamp operationTime) {
|
|||
bool ClonerUtils::isDatabaseForTenant(const DatabaseName& db,
|
||||
const boost::optional<TenantId>& tenant,
|
||||
MigrationProtocolEnum protocol) {
|
||||
if (!tenant) {
|
||||
return protocol == MigrationProtocolEnum::kShardMerge;
|
||||
}
|
||||
|
||||
if (auto tenantId = db.tenantId()) {
|
||||
return tenantId == *tenant;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -247,28 +247,6 @@ Status insertDocumentsForOplog(OperationContext* opCtx,
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
void assertInitialSyncCanContinueDuringShardMerge(OperationContext* opCtx,
|
||||
const NamespaceString& nss,
|
||||
const OplogEntry& op) {
|
||||
// Running shard merge during initial sync can lead to potential data loss on this node.
|
||||
// So, we perform safety check during oplog catchup and at the end of initial sync
|
||||
// recovery. (see recoverShardMergeRecipientAccessBlockers() for the detailed comment about the
|
||||
// problematic scenario that can cause data loss.)
|
||||
if (nss == NamespaceString::kShardMergeRecipientsNamespace) {
|
||||
if (auto replCoord = repl::ReplicationCoordinator::get(opCtx); replCoord &&
|
||||
replCoord->getSettings().isReplSet() && replCoord->getMemberState().startup2()) {
|
||||
BSONElement idField = op.getObject().getField("_id");
|
||||
// If the 'o' field does not have an _id, then 'o2' should have it.
|
||||
// Otherwise, the oplog entry is corrupted.
|
||||
if (idField.eoo() && op.getObject2()) {
|
||||
idField = op.getObject2()->getField("_id");
|
||||
}
|
||||
const auto& migrationId = uassertStatusOK(UUID::parse(idField));
|
||||
tenant_migration_access_blocker::assertOnUnsafeInitialSync(migrationId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
ApplyImportCollectionFn applyImportCollection = applyImportCollectionDefault;
|
||||
|
|
@ -1491,8 +1469,6 @@ Status applyOperation_inlock(OperationContext* opCtx,
|
|||
|
||||
const CollectionPtr& collection = collectionAcquisition.getCollectionPtr();
|
||||
|
||||
assertInitialSyncCanContinueDuringShardMerge(opCtx, requestNss, op);
|
||||
|
||||
BSONObj o = op.getObject();
|
||||
|
||||
// The feature compatibility version in the server configuration collection must not change
|
||||
|
|
|
|||
|
|
@ -857,13 +857,6 @@ server_parameters:
|
|||
redact: false
|
||||
|
||||
feature_flags:
|
||||
featureFlagShardMerge:
|
||||
description: When enabled, multitenant migration uses the "shard merge" protocol.
|
||||
cpp_varname: feature_flags::gShardMerge
|
||||
default: true
|
||||
version: 7.1
|
||||
shouldBeFCVGated: true
|
||||
|
||||
featureFlagSecondaryIndexChecksInDbCheck:
|
||||
description: When enabled, dbCheck runs document and secondary index consistency checks in addition to replica set data consistency checks.
|
||||
cpp_varname: feature_flags::gSecondaryIndexChecksInDbCheck
|
||||
|
|
|
|||
|
|
@ -46,7 +46,6 @@
|
|||
#include "mongo/db/repl/replication_consistency_markers_gen.h"
|
||||
#include "mongo/db/repl/replication_consistency_markers_impl.h"
|
||||
#include "mongo/db/repl/replication_coordinator_mock.h"
|
||||
#include "mongo/db/repl/shard_merge_recipient_op_observer.h"
|
||||
#include "mongo/db/repl/storage_interface.h"
|
||||
#include "mongo/db/repl/storage_interface_impl.h"
|
||||
#include "mongo/db/repl/storage_interface_mock.h"
|
||||
|
|
@ -170,8 +169,6 @@ void setUpObservers(ServiceContext* serviceContext, ClusterRole clusterRole, boo
|
|||
std::make_unique<repl::TenantMigrationDonorOpObserver>());
|
||||
opObserverRegistry->addObserver(
|
||||
std::make_unique<repl::TenantMigrationRecipientOpObserver>());
|
||||
opObserverRegistry->addObserver(
|
||||
std::make_unique<repl::ShardMergeRecipientOpObserver>());
|
||||
}
|
||||
if (!gMultitenancySupport) {
|
||||
opObserverRegistry->addObserver(
|
||||
|
|
@ -199,8 +196,6 @@ void setUpObservers(ServiceContext* serviceContext, ClusterRole clusterRole, boo
|
|||
std::make_unique<repl::TenantMigrationDonorOpObserver>());
|
||||
opObserverRegistry->addObserver(
|
||||
std::make_unique<repl::TenantMigrationRecipientOpObserver>());
|
||||
opObserverRegistry->addObserver(
|
||||
std::make_unique<repl::ShardMergeRecipientOpObserver>());
|
||||
}
|
||||
if (!gMultitenancySupport) {
|
||||
opObserverRegistry->addObserver(
|
||||
|
|
|
|||
|
|
@ -1,559 +0,0 @@
|
|||
/**
|
||||
* Copyright (C) 2023-present MongoDB, Inc.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the Server Side Public License, version 1,
|
||||
* as published by MongoDB, Inc.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* Server Side Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the Server Side Public License
|
||||
* along with this program. If not, see
|
||||
* <http://www.mongodb.com/licensing/server-side-public-license>.
|
||||
*
|
||||
* As a special exception, the copyright holders give permission to link the
|
||||
* code of portions of this program with the OpenSSL library under certain
|
||||
* conditions as described in each individual source file and distribute
|
||||
* linked combinations including the program with the OpenSSL library. You
|
||||
* must comply with the Server Side Public License in all respects for
|
||||
* all of the code used other than as permitted herein. If you modify file(s)
|
||||
* with this exception, you may extend this exception to your version of the
|
||||
* file(s), but you are not obligated to do so. If you do not wish to do so,
|
||||
* delete this exception statement from your version. If you delete this
|
||||
* exception statement from all source files in the program, then also delete
|
||||
* it in the license file.
|
||||
*/
|
||||
|
||||
|
||||
#include "mongo/db/repl/shard_merge_recipient_op_observer.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <fmt/format.h>
|
||||
#include <iterator>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include <absl/container/node_hash_set.h>
|
||||
#include <boost/filesystem/operations.hpp>
|
||||
#include <boost/filesystem/path.hpp>
|
||||
#include <boost/move/utility_core.hpp>
|
||||
#include <boost/optional/optional.hpp>
|
||||
// IWYU pragma: no_include "boost/system/detail/error_code.hpp"
|
||||
|
||||
#include "mongo/base/error_codes.h"
|
||||
#include "mongo/base/string_data.h"
|
||||
#include "mongo/bson/bsonelement.h"
|
||||
#include "mongo/db/catalog/collection_catalog.h"
|
||||
#include "mongo/db/catalog/database.h"
|
||||
#include "mongo/db/catalog/database_holder.h"
|
||||
#include "mongo/db/catalog_raii.h"
|
||||
#include "mongo/db/concurrency/exception_util.h"
|
||||
#include "mongo/db/concurrency/lock_manager_defs.h"
|
||||
#include "mongo/db/database_name.h"
|
||||
#include "mongo/db/db_raii.h"
|
||||
#include "mongo/db/index_builds_coordinator.h"
|
||||
#include "mongo/db/multitenancy_gen.h"
|
||||
#include "mongo/db/repl/tenant_file_importer_service.h"
|
||||
#include "mongo/db/repl/tenant_migration_access_blocker.h"
|
||||
#include "mongo/db/repl/tenant_migration_access_blocker_registry.h"
|
||||
#include "mongo/db/repl/tenant_migration_access_blocker_util.h"
|
||||
#include "mongo/db/repl/tenant_migration_decoration.h"
|
||||
#include "mongo/db/repl/tenant_migration_recipient_access_blocker.h"
|
||||
#include "mongo/db/repl/tenant_migration_shard_merge_util.h"
|
||||
#include "mongo/db/repl/tenant_migration_state_machine_gen.h"
|
||||
#include "mongo/db/repl/tenant_migration_util.h"
|
||||
#include "mongo/db/repl/timestamp_block.h"
|
||||
#include "mongo/db/serverless/serverless_operation_lock_registry.h"
|
||||
#include "mongo/db/service_context.h"
|
||||
#include "mongo/db/storage/kv/kv_engine.h"
|
||||
#include "mongo/db/storage/recovery_unit.h"
|
||||
#include "mongo/db/storage/storage_engine.h"
|
||||
#include "mongo/db/tenant_id.h"
|
||||
#include "mongo/db/transaction_resources.h"
|
||||
#include "mongo/idl/idl_parser.h"
|
||||
#include "mongo/logv2/log.h"
|
||||
#include "mongo/logv2/log_attr.h"
|
||||
#include "mongo/logv2/log_component.h"
|
||||
#include "mongo/stdx/unordered_set.h"
|
||||
#include "mongo/util/assert_util.h"
|
||||
#include "mongo/util/decorable.h"
|
||||
#include "mongo/util/str.h"
|
||||
|
||||
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kReplication
|
||||
|
||||
namespace mongo::repl {
|
||||
using namespace fmt;
|
||||
using namespace shard_merge_utils;
|
||||
namespace {
|
||||
bool markedGCAfterMigrationStart(const ShardMergeRecipientDocument& doc) {
|
||||
return !doc.getStartGarbageCollect() && doc.getExpireAt();
|
||||
}
|
||||
|
||||
template <typename Func>
|
||||
void runOnAlternateClient(const std::string& name, Func func) {
|
||||
auto parentClientUnkillableByStepDown = [&] {
|
||||
return !cc().canKillSystemOperationInStepdown(WithLock::withoutLock());
|
||||
}();
|
||||
|
||||
auto client = getGlobalServiceContext()->getService(ClusterRole::ShardServer)->makeClient(name);
|
||||
AlternativeClientRegion acr(client);
|
||||
|
||||
if (parentClientUnkillableByStepDown) {
|
||||
stdx::lock_guard<Client> lk(cc());
|
||||
cc().setSystemOperationUnkillableByStepdown(lk);
|
||||
}
|
||||
|
||||
auto opCtx = cc().makeOperationContext();
|
||||
|
||||
func(opCtx.get());
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: Refer to deleteTenantDataWhenMergeAborts() comment for the AlternativeClientRegion
|
||||
* requirement.
|
||||
*/
|
||||
void dropTempFilesAndCollsIfAny(OperationContext* opCtx, const UUID& migrationId) {
|
||||
// Drop the import done marker collection.
|
||||
runOnAlternateClient("dropShardMergeMarkerColl", [&migrationId](OperationContext* acrOpCtx) {
|
||||
dropImportDoneMarkerLocalCollection(acrOpCtx, migrationId);
|
||||
});
|
||||
|
||||
const auto tempWTDirectory = fileClonerTempDir(migrationId);
|
||||
// Do an early exit if the temp dir is not present.
|
||||
if (!boost::filesystem::exists(tempWTDirectory))
|
||||
return;
|
||||
|
||||
// Remove idents unknown to both storage and mdb_catalog.
|
||||
bool filesRemoved = false;
|
||||
const auto movingIdents = readMovingFilesMarker(tempWTDirectory);
|
||||
for (const auto& ident : movingIdents) {
|
||||
// It's impossible for files to be known by mdb_catalog but not storage. Files known to
|
||||
// storage but not mdb_catalog could occur if node restarts during import. However, startup
|
||||
// recovery removes such files. Therefore, we only need to handle files unknown to both
|
||||
// mdb_catalog and storage. Thus, verifying the file(ident) existence in storage is
|
||||
// sufficent.
|
||||
bool identKnown =
|
||||
getGlobalServiceContext()->getStorageEngine()->getEngine()->hasIdent(opCtx, ident);
|
||||
if (!identKnown) {
|
||||
filesRemoved = true;
|
||||
removeFile(constructDestinationPath(ident));
|
||||
}
|
||||
}
|
||||
if (filesRemoved)
|
||||
fsyncDataDirectory();
|
||||
|
||||
// Remove the temp directory.
|
||||
fsyncRemoveDirectory(tempWTDirectory);
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: Though opObserver drops tenant collections only after the importer service stops importing
|
||||
* the collection, a collection might be imported after opObserver's storage txn has started(i.e,
|
||||
* import collection storage txnId > opObserver storage txnId), causing the collection to be
|
||||
* invisible to the opObserver. To ensure visibility of all imported collections to the opObserver,
|
||||
* drop the tenant collection in AlternativeClientRegion.
|
||||
*/
|
||||
void deleteTenantDataWhenMergeAborts(const ShardMergeRecipientDocument& doc) {
|
||||
runOnAlternateClient("dropShardMergeDonorTenantColls", [&doc](OperationContext* opCtx) {
|
||||
auto storageEngine = opCtx->getServiceContext()->getStorageEngine();
|
||||
|
||||
invariant(doc.getAbortOpTime());
|
||||
const auto dropOpTime = *doc.getAbortOpTime();
|
||||
TimestampBlock tsBlock(opCtx, dropOpTime.getTimestamp());
|
||||
|
||||
UnreplicatedWritesBlock writeBlock{opCtx};
|
||||
|
||||
writeConflictRetry(opCtx, "dropShardMergeDonorTenantColls", NamespaceString::kEmpty, [&] {
|
||||
WriteUnitOfWork wuow(opCtx);
|
||||
|
||||
for (const auto& tenantId : doc.getTenantIds()) {
|
||||
std::vector<DatabaseName> databases;
|
||||
if (gMultitenancySupport) {
|
||||
databases = storageEngine->listDatabases(tenantId);
|
||||
} else {
|
||||
auto allDatabases = storageEngine->listDatabases();
|
||||
std::copy_if(allDatabases.begin(),
|
||||
allDatabases.end(),
|
||||
std::back_inserter(databases),
|
||||
[tenant = tenantId.toString() + "_"](const DatabaseName& db) {
|
||||
// In non multitenacy environment, check if the db has a
|
||||
// matched tenant prefix.
|
||||
return StringData{
|
||||
DatabaseNameUtil::serialize(
|
||||
db, SerializationContext::stateDefault())}
|
||||
.startsWith(tenant);
|
||||
});
|
||||
}
|
||||
|
||||
for (const auto& database : databases) {
|
||||
AutoGetDb autoDb{opCtx, database, MODE_X};
|
||||
Database* db = autoDb.getDb();
|
||||
if (!db) {
|
||||
continue;
|
||||
}
|
||||
|
||||
LOGV2(7221802,
|
||||
"Dropping tenant database for shard merge garbage collection",
|
||||
"tenant"_attr = tenantId,
|
||||
"database"_attr = database,
|
||||
"migrationId"_attr = doc.getId(),
|
||||
"abortOpTime"_attr = dropOpTime);
|
||||
|
||||
IndexBuildsCoordinator::get(opCtx)->assertNoBgOpInProgForDb(db->name());
|
||||
|
||||
auto catalog = CollectionCatalog::get(opCtx);
|
||||
for (auto&& collection : catalog->range(db->name())) {
|
||||
if (!collection) {
|
||||
break;
|
||||
}
|
||||
|
||||
uassertStatusOK(
|
||||
db->dropCollectionEvenIfSystem(opCtx, collection->ns(), dropOpTime));
|
||||
}
|
||||
|
||||
auto databaseHolder = DatabaseHolder::get(opCtx);
|
||||
databaseHolder->close(opCtx, db->name());
|
||||
}
|
||||
}
|
||||
|
||||
wuow.commit();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
void onShardMergeRecipientsNssInsert(OperationContext* opCtx,
|
||||
std::vector<InsertStatement>::const_iterator first,
|
||||
std::vector<InsertStatement>::const_iterator last) {
|
||||
if (tenant_migration_access_blocker::inRecoveryMode(opCtx))
|
||||
return;
|
||||
|
||||
for (auto it = first; it != last; it++) {
|
||||
auto recipientStateDoc =
|
||||
ShardMergeRecipientDocument::parse(IDLParserContext("recipientStateDoc"), it->doc);
|
||||
switch (recipientStateDoc.getState()) {
|
||||
case ShardMergeRecipientStateEnum::kStarted: {
|
||||
invariant(!recipientStateDoc.getStartGarbageCollect());
|
||||
|
||||
const auto migrationId = recipientStateDoc.getId();
|
||||
ServerlessOperationLockRegistry::get(opCtx->getServiceContext())
|
||||
.acquireLock(ServerlessOperationLockRegistry::LockType::kMergeRecipient,
|
||||
migrationId);
|
||||
shard_role_details::getRecoveryUnit(opCtx)->onRollback(
|
||||
[migrationId](OperationContext* opCtx) {
|
||||
ServerlessOperationLockRegistry::get(opCtx->getServiceContext())
|
||||
.releaseLock(ServerlessOperationLockRegistry::LockType::kMergeRecipient,
|
||||
migrationId);
|
||||
});
|
||||
|
||||
auto& registry =
|
||||
TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext());
|
||||
for (const auto& tenantId : recipientStateDoc.getTenantIds()) {
|
||||
registry.add(tenantId,
|
||||
std::make_shared<TenantMigrationRecipientAccessBlocker>(
|
||||
opCtx->getServiceContext(), migrationId));
|
||||
}
|
||||
shard_role_details::getRecoveryUnit(opCtx)->onRollback(
|
||||
[migrationId](OperationContext* opCtx) {
|
||||
TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext())
|
||||
.removeAccessBlockersForMigration(
|
||||
migrationId, TenantMigrationAccessBlocker::BlockerType::kRecipient);
|
||||
});
|
||||
|
||||
const auto& startAtOpTimeOptional = recipientStateDoc.getStartAtOpTime();
|
||||
invariant(startAtOpTimeOptional);
|
||||
shard_role_details::getRecoveryUnit(opCtx)->onCommit(
|
||||
[migrationId, startAtOpTime = *startAtOpTimeOptional](OperationContext* opCtx,
|
||||
auto _) {
|
||||
repl::TenantFileImporterService::get(opCtx)->startMigration(migrationId,
|
||||
startAtOpTime);
|
||||
});
|
||||
} break;
|
||||
case ShardMergeRecipientStateEnum::kCommitted:
|
||||
case ShardMergeRecipientStateEnum::kAborted:
|
||||
invariant(recipientStateDoc.getStartGarbageCollect());
|
||||
break;
|
||||
default:
|
||||
MONGO_UNREACHABLE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void onDonatedFilesCollNssInsert(OperationContext* opCtx,
|
||||
std::vector<InsertStatement>::const_iterator first,
|
||||
std::vector<InsertStatement>::const_iterator last) {
|
||||
if (tenant_migration_access_blocker::inRecoveryMode(opCtx))
|
||||
return;
|
||||
|
||||
for (auto it = first; it != last; it++) {
|
||||
const auto& metadataDoc = it->doc;
|
||||
const auto migrationId = uassertStatusOK(UUID::parse(metadataDoc[kMigrationIdFieldName]));
|
||||
repl::TenantFileImporterService::get(opCtx)->learnedFilename(migrationId, metadataDoc);
|
||||
}
|
||||
}
|
||||
|
||||
void assertStateTransitionIsValid(ShardMergeRecipientStateEnum prevState,
|
||||
ShardMergeRecipientStateEnum nextState) {
|
||||
|
||||
auto validPrevStates = [&]() -> stdx::unordered_set<ShardMergeRecipientStateEnum> {
|
||||
switch (nextState) {
|
||||
case ShardMergeRecipientStateEnum::kStarted:
|
||||
return {ShardMergeRecipientStateEnum::kStarted};
|
||||
case ShardMergeRecipientStateEnum::kLearnedFilenames:
|
||||
return {ShardMergeRecipientStateEnum::kStarted,
|
||||
ShardMergeRecipientStateEnum::kLearnedFilenames};
|
||||
case ShardMergeRecipientStateEnum::kConsistent:
|
||||
return {ShardMergeRecipientStateEnum::kLearnedFilenames,
|
||||
ShardMergeRecipientStateEnum::kConsistent};
|
||||
case ShardMergeRecipientStateEnum::kCommitted:
|
||||
return {ShardMergeRecipientStateEnum::kConsistent,
|
||||
ShardMergeRecipientStateEnum::kCommitted};
|
||||
case ShardMergeRecipientStateEnum::kAborted:
|
||||
return {ShardMergeRecipientStateEnum::kStarted,
|
||||
ShardMergeRecipientStateEnum::kLearnedFilenames,
|
||||
ShardMergeRecipientStateEnum::kConsistent,
|
||||
ShardMergeRecipientStateEnum::kAborted};
|
||||
default:
|
||||
MONGO_UNREACHABLE;
|
||||
}
|
||||
}();
|
||||
|
||||
uassert(7339766, "Invalid state transition", validPrevStates.contains(prevState));
|
||||
}
|
||||
|
||||
void onTransitioningToLearnedFilenames(OperationContext* opCtx,
|
||||
const ShardMergeRecipientDocument& recipientStateDoc) {
|
||||
shard_role_details::getRecoveryUnit(opCtx)->onCommit(
|
||||
[migrationId = recipientStateDoc.getId()](OperationContext* opCtx, auto _) {
|
||||
repl::TenantFileImporterService::get(opCtx)->learnedAllFilenames(migrationId);
|
||||
});
|
||||
}
|
||||
|
||||
void onTransitioningToConsistent(OperationContext* opCtx,
|
||||
const ShardMergeRecipientDocument& recipientStateDoc) {
|
||||
assertImportDoneMarkerLocalCollExistsOnMergeConsistent(opCtx, recipientStateDoc.getId());
|
||||
if (recipientStateDoc.getRejectReadsBeforeTimestamp()) {
|
||||
shard_role_details::getRecoveryUnit(opCtx)->onCommit(
|
||||
[recipientStateDoc](OperationContext* opCtx, auto _) {
|
||||
auto mtabVector =
|
||||
TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext())
|
||||
.getRecipientAccessBlockersForMigration(recipientStateDoc.getId());
|
||||
invariant(!mtabVector.empty());
|
||||
for (auto& mtab : mtabVector) {
|
||||
invariant(mtab);
|
||||
mtab->startRejectingReadsBefore(
|
||||
recipientStateDoc.getRejectReadsBeforeTimestamp().get());
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void onTransitioningToCommitted(OperationContext* opCtx,
|
||||
const ShardMergeRecipientDocument& recipientStateDoc) {
|
||||
auto migrationId = recipientStateDoc.getId();
|
||||
// It's safe to do interrupt outside of onCommit hook as the decision to forget a migration or
|
||||
// the migration decision is not reversible.
|
||||
repl::TenantFileImporterService::get(opCtx)->interruptMigration(migrationId);
|
||||
|
||||
if (markedGCAfterMigrationStart(recipientStateDoc)) {
|
||||
shard_role_details::getRecoveryUnit(opCtx)->onCommit([migrationId](OperationContext* opCtx,
|
||||
auto _) {
|
||||
auto mtabVector = TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext())
|
||||
.getRecipientAccessBlockersForMigration(migrationId);
|
||||
invariant(!mtabVector.empty());
|
||||
for (auto& mtab : mtabVector) {
|
||||
invariant(mtab);
|
||||
// Once the migration is committed and state doc is marked garbage collectable,
|
||||
// the TTL deletions should be unblocked for the imported donor collections.
|
||||
mtab->stopBlockingTTL();
|
||||
}
|
||||
|
||||
ServerlessOperationLockRegistry::get(opCtx->getServiceContext())
|
||||
.releaseLock(ServerlessOperationLockRegistry::LockType::kMergeRecipient,
|
||||
migrationId);
|
||||
});
|
||||
|
||||
repl::TenantFileImporterService::get(opCtx)->resetMigration(migrationId);
|
||||
dropTempFilesAndCollsIfAny(opCtx, migrationId);
|
||||
}
|
||||
}
|
||||
|
||||
void onTransitioningToAborted(OperationContext* opCtx,
|
||||
const ShardMergeRecipientDocument& recipientStateDoc) {
|
||||
auto migrationId = recipientStateDoc.getId();
|
||||
if (!markedGCAfterMigrationStart(recipientStateDoc)) {
|
||||
// It's safe to do interrupt outside of onCommit hook as the decision to forget a migration
|
||||
// or the migration decision is not reversible.
|
||||
repl::TenantFileImporterService::get(opCtx)->interruptMigration(migrationId);
|
||||
|
||||
const auto& importCompletedFuture =
|
||||
repl::TenantFileImporterService::get(opCtx)->getImportCompletedFuture(migrationId);
|
||||
// Wait for the importer service to stop collection import task before dropping imported
|
||||
// collections.
|
||||
if (importCompletedFuture) {
|
||||
LOGV2(7458507, "Waiting for the importer service to finish importing task");
|
||||
importCompletedFuture->wait(opCtx);
|
||||
}
|
||||
deleteTenantDataWhenMergeAborts(recipientStateDoc);
|
||||
} else {
|
||||
shard_role_details::getRecoveryUnit(opCtx)->onCommit(
|
||||
[migrationId](OperationContext* opCtx, auto _) {
|
||||
// Remove access blocker and release locks to allow faster migration retry.
|
||||
// (Note: Not needed to unblock TTL deletions as we would have already dropped all
|
||||
// imported donor collections immediately on transitioning to `kAborted`).
|
||||
TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext())
|
||||
.removeAccessBlockersForMigration(
|
||||
migrationId, TenantMigrationAccessBlocker::BlockerType::kRecipient);
|
||||
|
||||
ServerlessOperationLockRegistry::get(opCtx->getServiceContext())
|
||||
.releaseLock(ServerlessOperationLockRegistry::LockType::kMergeRecipient,
|
||||
migrationId);
|
||||
});
|
||||
|
||||
repl::TenantFileImporterService::get(opCtx)->resetMigration(migrationId);
|
||||
dropTempFilesAndCollsIfAny(opCtx, migrationId);
|
||||
}
|
||||
}
|
||||
|
||||
void handleUpdateRecoveryMode(OperationContext* opCtx,
|
||||
const ShardMergeRecipientDocument& recipientStateDoc) {
|
||||
// Note that we expect this path not running during initial sync(inconsistent data), as we
|
||||
// intentionally crash the server upon detecting the state document oplog entry for replay.
|
||||
const auto migrationId = recipientStateDoc.getId();
|
||||
|
||||
auto replCoord = repl::ReplicationCoordinator::get(opCtx);
|
||||
invariant(!(replCoord->getSettings().isReplSet() &&
|
||||
repl::TenantFileImporterService::get(opCtx)->hasActiveMigration(migrationId)));
|
||||
|
||||
if (markedGCAfterMigrationStart(recipientStateDoc)) {
|
||||
dropTempFilesAndCollsIfAny(opCtx, migrationId);
|
||||
} else if (recipientStateDoc.getState() == ShardMergeRecipientStateEnum::kAborted) {
|
||||
deleteTenantDataWhenMergeAborts(recipientStateDoc);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void ShardMergeRecipientOpObserver::onInserts(OperationContext* opCtx,
|
||||
const CollectionPtr& coll,
|
||||
std::vector<InsertStatement>::const_iterator first,
|
||||
std::vector<InsertStatement>::const_iterator last,
|
||||
const std::vector<RecordId>& recordIds,
|
||||
std::vector<bool> fromMigrate,
|
||||
bool defaultFromMigrate,
|
||||
OpStateAccumulator* opAccumulator) {
|
||||
if (coll->ns() == NamespaceString::kShardMergeRecipientsNamespace) {
|
||||
onShardMergeRecipientsNssInsert(opCtx, first, last);
|
||||
return;
|
||||
}
|
||||
|
||||
if (isDonatedFilesCollection(coll->ns())) {
|
||||
onDonatedFilesCollNssInsert(opCtx, first, last);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void ShardMergeRecipientOpObserver::onUpdate(OperationContext* opCtx,
|
||||
const OplogUpdateEntryArgs& args,
|
||||
OpStateAccumulator* opAccumulator) {
|
||||
if (args.coll->ns() != NamespaceString::kShardMergeRecipientsNamespace) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto recipientStateDoc = ShardMergeRecipientDocument::parse(
|
||||
IDLParserContext("recipientStateDoc"), args.updateArgs->updatedDoc);
|
||||
if (tenant_migration_access_blocker::inRecoveryMode(opCtx)) {
|
||||
handleUpdateRecoveryMode(opCtx, recipientStateDoc);
|
||||
return;
|
||||
}
|
||||
|
||||
auto nextState = recipientStateDoc.getState();
|
||||
auto prevState = ShardMergeRecipientState_parse(
|
||||
IDLParserContext("preImageRecipientStateDoc"),
|
||||
args.updateArgs->preImageDoc[ShardMergeRecipientDocument::kStateFieldName]
|
||||
.valueStringData());
|
||||
assertStateTransitionIsValid(prevState, nextState);
|
||||
|
||||
switch (nextState) {
|
||||
case ShardMergeRecipientStateEnum::kStarted:
|
||||
break;
|
||||
case ShardMergeRecipientStateEnum::kLearnedFilenames:
|
||||
onTransitioningToLearnedFilenames(opCtx, recipientStateDoc);
|
||||
break;
|
||||
case ShardMergeRecipientStateEnum::kConsistent:
|
||||
onTransitioningToConsistent(opCtx, recipientStateDoc);
|
||||
break;
|
||||
case ShardMergeRecipientStateEnum::kCommitted:
|
||||
onTransitioningToCommitted(opCtx, recipientStateDoc);
|
||||
break;
|
||||
case ShardMergeRecipientStateEnum::kAborted:
|
||||
onTransitioningToAborted(opCtx, recipientStateDoc);
|
||||
break;
|
||||
default:
|
||||
MONGO_UNREACHABLE;
|
||||
}
|
||||
}
|
||||
|
||||
void ShardMergeRecipientOpObserver::onDelete(OperationContext* opCtx,
|
||||
const CollectionPtr& coll,
|
||||
StmtId stmtId,
|
||||
const BSONObj& doc,
|
||||
const DocumentKey& documentKey,
|
||||
const OplogDeleteEntryArgs& args,
|
||||
OpStateAccumulator* opAccumulator) {
|
||||
if (coll->ns() != NamespaceString::kShardMergeRecipientsNamespace ||
|
||||
tenant_migration_access_blocker::inRecoveryMode(opCtx)) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto recipientStateDoc =
|
||||
ShardMergeRecipientDocument::parse(IDLParserContext("recipientStateDoc"), doc);
|
||||
|
||||
bool isDocMarkedGarbageCollectable = [&] {
|
||||
auto state = recipientStateDoc.getState();
|
||||
auto expireAtIsSet = recipientStateDoc.getExpireAt().has_value();
|
||||
invariant(!expireAtIsSet || state == ShardMergeRecipientStateEnum::kCommitted ||
|
||||
state == ShardMergeRecipientStateEnum::kAborted);
|
||||
return expireAtIsSet;
|
||||
}();
|
||||
|
||||
uassert(ErrorCodes::IllegalOperation,
|
||||
str::stream() << "Cannot delete the recipient state document "
|
||||
<< " since it has not been marked as garbage collectable: "
|
||||
<< tenant_migration_util::redactStateDoc(recipientStateDoc.toBSON()),
|
||||
isDocMarkedGarbageCollectable);
|
||||
|
||||
TenantMigrationInfo migrationInfo(recipientStateDoc.getId());
|
||||
|
||||
shard_role_details::getRecoveryUnit(opCtx)->onCommit([migrationId = migrationInfo.uuid](
|
||||
OperationContext* opCtx, auto _) {
|
||||
LOGV2_INFO(
|
||||
7339765, "Removing expired recipient access blocker", "migrationId"_attr = migrationId);
|
||||
TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext())
|
||||
.removeAccessBlockersForMigration(
|
||||
migrationId, TenantMigrationAccessBlocker::BlockerType::kRecipient);
|
||||
});
|
||||
}
|
||||
|
||||
repl::OpTime ShardMergeRecipientOpObserver::onDropCollection(OperationContext* opCtx,
|
||||
const NamespaceString& collectionName,
|
||||
const UUID& uuid,
|
||||
std::uint64_t numRecords,
|
||||
const CollectionDropType dropType,
|
||||
bool markFromMigrate) {
|
||||
if (collectionName == NamespaceString::kShardMergeRecipientsNamespace &&
|
||||
!tenant_migration_access_blocker::inRecoveryMode(opCtx)) {
|
||||
|
||||
uassert(
|
||||
ErrorCodes::IllegalOperation,
|
||||
str::stream() << "Cannot drop "
|
||||
<< NamespaceString::kShardMergeRecipientsNamespace.toStringForErrorMsg()
|
||||
<< " collection as it is not empty",
|
||||
!numRecords);
|
||||
}
|
||||
return OpTime();
|
||||
}
|
||||
|
||||
} // namespace mongo::repl
|
||||
|
|
@ -1,93 +0,0 @@
|
|||
/**
|
||||
* Copyright (C) 2023-present MongoDB, Inc.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the Server Side Public License, version 1,
|
||||
* as published by MongoDB, Inc.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* Server Side Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the Server Side Public License
|
||||
* along with this program. If not, see
|
||||
* <http://www.mongodb.com/licensing/server-side-public-license>.
|
||||
*
|
||||
* As a special exception, the copyright holders give permission to link the
|
||||
* code of portions of this program with the OpenSSL library under certain
|
||||
* conditions as described in each individual source file and distribute
|
||||
* linked combinations including the program with the OpenSSL library. You
|
||||
* must comply with the Server Side Public License in all respects for
|
||||
* all of the code used other than as permitted herein. If you modify file(s)
|
||||
* with this exception, you may extend this exception to your version of the
|
||||
* file(s), but you are not obligated to do so. If you do not wish to do so,
|
||||
* delete this exception statement from your version. If you delete this
|
||||
* exception statement from all source files in the program, then also delete
|
||||
* it in the license file.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#include "mongo/bson/bsonobj.h"
|
||||
#include "mongo/db/catalog/collection.h"
|
||||
#include "mongo/db/catalog/collection_options.h"
|
||||
#include "mongo/db/namespace_string.h"
|
||||
#include "mongo/db/op_observer/op_observer.h"
|
||||
#include "mongo/db/op_observer/op_observer_noop.h"
|
||||
#include "mongo/db/operation_context.h"
|
||||
#include "mongo/db/repl/oplog.h"
|
||||
#include "mongo/db/repl/optime.h"
|
||||
#include "mongo/db/session/logical_session_id.h"
|
||||
#include "mongo/util/uuid.h"
|
||||
|
||||
namespace mongo::repl {
|
||||
|
||||
/**
|
||||
* OpObserver for shard merge recipient.
|
||||
*/
|
||||
class ShardMergeRecipientOpObserver final : public OpObserverNoop {
|
||||
ShardMergeRecipientOpObserver(const ShardMergeRecipientOpObserver&) = delete;
|
||||
ShardMergeRecipientOpObserver& operator=(const ShardMergeRecipientOpObserver&) = delete;
|
||||
|
||||
public:
|
||||
ShardMergeRecipientOpObserver() = default;
|
||||
~ShardMergeRecipientOpObserver() override = default;
|
||||
|
||||
NamespaceFilters getNamespaceFilters() const final {
|
||||
return {NamespaceFilter::kConfig, NamespaceFilter::kConfig};
|
||||
}
|
||||
|
||||
void onInserts(OperationContext* opCtx,
|
||||
const CollectionPtr& coll,
|
||||
std::vector<InsertStatement>::const_iterator first,
|
||||
std::vector<InsertStatement>::const_iterator last,
|
||||
const std::vector<RecordId>& recordIds,
|
||||
std::vector<bool> fromMigrate,
|
||||
bool defaultFromMigrate,
|
||||
OpStateAccumulator* opAccumulator = nullptr) final;
|
||||
|
||||
void onUpdate(OperationContext* opCtx,
|
||||
const OplogUpdateEntryArgs& args,
|
||||
OpStateAccumulator* opAccumulator = nullptr) final;
|
||||
|
||||
void onDelete(OperationContext* opCtx,
|
||||
const CollectionPtr& coll,
|
||||
StmtId stmtId,
|
||||
const BSONObj& doc,
|
||||
const DocumentKey& documentKey,
|
||||
const OplogDeleteEntryArgs& args,
|
||||
OpStateAccumulator* opAccumulator = nullptr) final;
|
||||
|
||||
repl::OpTime onDropCollection(OperationContext* opCtx,
|
||||
const NamespaceString& collectionName,
|
||||
const UUID& uuid,
|
||||
std::uint64_t numRecords,
|
||||
CollectionDropType dropType,
|
||||
bool markFromMigrate) final;
|
||||
};
|
||||
|
||||
} // namespace mongo::repl
|
||||
|
|
@ -1,835 +0,0 @@
|
|||
/**
|
||||
* Copyright (C) 2023-present MongoDB, Inc.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the Server Side Public License, version 1,
|
||||
* as published by MongoDB, Inc.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* Server Side Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the Server Side Public License
|
||||
* along with this program. If not, see
|
||||
* <http://www.mongodb.com/licensing/server-side-public-license>.
|
||||
*
|
||||
* As a special exception, the copyright holders give permission to link the
|
||||
* code of portions of this program with the OpenSSL library under certain
|
||||
* conditions as described in each individual source file and distribute
|
||||
* linked combinations including the program with the OpenSSL library. You
|
||||
* must comply with the Server Side Public License in all respects for
|
||||
* all of the code used other than as permitted herein. If you modify file(s)
|
||||
* with this exception, you may extend this exception to your version of the
|
||||
* file(s), but you are not obligated to do so. If you do not wish to do so,
|
||||
* delete this exception statement from your version. If you delete this
|
||||
* exception statement from all source files in the program, then also delete
|
||||
* it in the license file.
|
||||
*/
|
||||
|
||||
#include <boost/filesystem/fstream.hpp>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "mongo/base/string_data.h"
|
||||
#include "mongo/bson/oid.h"
|
||||
#include "mongo/bson/timestamp.h"
|
||||
#include "mongo/client/read_preference.h"
|
||||
#include "mongo/db/catalog/create_collection.h"
|
||||
#include "mongo/db/catalog_raii.h"
|
||||
#include "mongo/db/commands/create_gen.h"
|
||||
#include "mongo/db/concurrency/lock_manager_defs.h"
|
||||
#include "mongo/db/repl/member_state.h"
|
||||
#include "mongo/db/repl/replication_coordinator.h"
|
||||
#include "mongo/db/repl/replication_coordinator_mock.h"
|
||||
#include "mongo/db/repl/shard_merge_recipient_op_observer.h"
|
||||
#include "mongo/db/repl/storage_interface.h"
|
||||
#include "mongo/db/repl/storage_interface_impl.h"
|
||||
#include "mongo/db/repl/tenant_migration_access_blocker_registry.h"
|
||||
#include "mongo/db/repl/tenant_migration_shard_merge_util.h"
|
||||
#include "mongo/db/repl/tenant_migration_state_machine_gen.h"
|
||||
#include "mongo/db/serverless/serverless_operation_lock_registry.h"
|
||||
#include "mongo/db/service_context.h"
|
||||
#include "mongo/db/service_context_d_test_fixture.h"
|
||||
#include "mongo/db/storage/durable_catalog.h"
|
||||
#include "mongo/db/storage/write_unit_of_work.h"
|
||||
#include "mongo/db/tenant_id.h"
|
||||
#include "mongo/unittest/assert.h"
|
||||
#include "mongo/unittest/death_test.h"
|
||||
#include "mongo/unittest/framework.h"
|
||||
#include "mongo/unittest/log_test.h"
|
||||
#include "mongo/util/decorable.h"
|
||||
#include "mongo/util/str.h"
|
||||
|
||||
namespace mongo::repl {
|
||||
|
||||
using namespace shard_merge_utils;
|
||||
|
||||
namespace {
|
||||
const Timestamp kDefaultStartMigrationTimestamp(1, 1);
|
||||
static const std::string kDefaultDonorConnStr = "donor-rs/localhost:12345";
|
||||
static const std::string kDefaultRecipientConnStr = "recipient-rs/localhost:56789";
|
||||
static const UUID kMigrationId = UUID::gen();
|
||||
|
||||
} // namespace
|
||||
|
||||
class ShardMergeRecipientOpObserverTest : public ServiceContextMongoDTest {
|
||||
public:
|
||||
static bool collectionExists(OperationContext* opCtx, const NamespaceString& nss) {
|
||||
return static_cast<bool>(AutoGetCollectionForRead(opCtx, nss).getCollection());
|
||||
}
|
||||
|
||||
void setUp() override {
|
||||
ServiceContextMongoDTest::setUp();
|
||||
|
||||
auto serviceContext = getServiceContext();
|
||||
|
||||
// Need real (non-mock) storage for testing dropping marker collection.
|
||||
StorageInterface::set(serviceContext, std::make_unique<StorageInterfaceImpl>());
|
||||
|
||||
auto replCoord = std::make_unique<repl::ReplicationCoordinatorMock>(serviceContext);
|
||||
ASSERT_OK(replCoord->setFollowerMode(repl::MemberState::RS_PRIMARY));
|
||||
repl::ReplicationCoordinator::set(serviceContext, std::move(replCoord));
|
||||
|
||||
_opCtx = makeOperationContext();
|
||||
TenantMigrationAccessBlockerRegistry::get(getServiceContext()).startup();
|
||||
|
||||
repl::createOplog(opCtx());
|
||||
ASSERT_OK(createCollection(opCtx(),
|
||||
CreateCommand(NamespaceString::kShardMergeRecipientsNamespace)));
|
||||
}
|
||||
|
||||
void tearDown() override {
|
||||
TenantMigrationAccessBlockerRegistry::get(getServiceContext()).shutDown();
|
||||
}
|
||||
|
||||
OperationContext* opCtx() const {
|
||||
return _opCtx.get();
|
||||
}
|
||||
|
||||
protected:
|
||||
void performUpdates(const BSONObj& UpdatedDoc, const BSONObj& preImageDoc) {
|
||||
AutoGetCollection collection(
|
||||
opCtx(), NamespaceString::kShardMergeRecipientsNamespace, MODE_IX);
|
||||
if (!collection)
|
||||
FAIL(str::stream()
|
||||
<< "Collection "
|
||||
<< NamespaceString::kShardMergeRecipientsNamespace.toStringForErrorMsg()
|
||||
<< " doesn't exist");
|
||||
|
||||
CollectionUpdateArgs updateArgs{preImageDoc};
|
||||
updateArgs.updatedDoc = UpdatedDoc;
|
||||
|
||||
OplogUpdateEntryArgs update(&updateArgs, *collection);
|
||||
|
||||
WriteUnitOfWork wuow(opCtx());
|
||||
_observer.onUpdate(opCtx(), update);
|
||||
wuow.commit();
|
||||
}
|
||||
|
||||
int64_t countLogLinesWithId(int32_t id) {
|
||||
return countBSONFormatLogLinesIsSubset(BSON("id" << id));
|
||||
}
|
||||
|
||||
std::vector<TenantId> _tenantIds{TenantId{OID::gen()}, TenantId{OID::gen()}};
|
||||
|
||||
private:
|
||||
unittest::MinimumLoggedSeverityGuard _tenantMigrationSeverityGuard{
|
||||
logv2::LogComponent::kTenantMigration, logv2::LogSeverity::Debug(1)};
|
||||
|
||||
ShardMergeRecipientOpObserver _observer;
|
||||
ServiceContext::UniqueOperationContext _opCtx;
|
||||
};
|
||||
|
||||
TEST_F(ShardMergeRecipientOpObserverTest, TransitionToConsistentWithImportDoneMarkerCollection) {
|
||||
ShardMergeRecipientDocument recipientDoc(kMigrationId,
|
||||
kDefaultDonorConnStr,
|
||||
_tenantIds,
|
||||
kDefaultStartMigrationTimestamp,
|
||||
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
|
||||
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kLearnedFilenames);
|
||||
auto preImageDoc = recipientDoc.toBSON();
|
||||
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kConsistent);
|
||||
auto updatedDoc = recipientDoc.toBSON();
|
||||
|
||||
// Create the import done marker collection.
|
||||
ASSERT_OK(createCollection(
|
||||
opCtx(), CreateCommand(shard_merge_utils::getImportDoneMarkerNs(kMigrationId))));
|
||||
|
||||
performUpdates(updatedDoc, preImageDoc);
|
||||
}
|
||||
|
||||
DEATH_TEST_REGEX_F(ShardMergeRecipientOpObserverTest,
|
||||
TransitionToConsistentWithoutImportDoneMarkerCollection,
|
||||
"Fatal assertion.*7219902") {
|
||||
ShardMergeRecipientDocument recipientDoc(kMigrationId,
|
||||
kDefaultDonorConnStr,
|
||||
_tenantIds,
|
||||
kDefaultStartMigrationTimestamp,
|
||||
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
|
||||
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kLearnedFilenames);
|
||||
auto preImageDoc = recipientDoc.toBSON();
|
||||
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kConsistent);
|
||||
auto updatedDoc = recipientDoc.toBSON();
|
||||
|
||||
performUpdates(updatedDoc, preImageDoc);
|
||||
}
|
||||
|
||||
TEST_F(ShardMergeRecipientOpObserverTest, TransitionToAbortedDropsImportedCollection) {
|
||||
ShardMergeRecipientDocument recipientDoc(kMigrationId,
|
||||
kDefaultDonorConnStr,
|
||||
_tenantIds,
|
||||
kDefaultStartMigrationTimestamp,
|
||||
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
|
||||
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kConsistent);
|
||||
auto preImageDoc = recipientDoc.toBSON();
|
||||
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kAborted);
|
||||
recipientDoc.setAbortOpTime(OpTime(Timestamp::max(), 1));
|
||||
auto updatedDoc = recipientDoc.toBSON();
|
||||
|
||||
const NamespaceString importedDonorCollNss1 =
|
||||
NamespaceString::createNamespaceString_forTest(_tenantIds[0].toString() + "_test.coll1");
|
||||
ASSERT_OK(createCollection(opCtx(), CreateCommand(importedDonorCollNss1)));
|
||||
|
||||
const NamespaceString importedDonorCollNss2 =
|
||||
NamespaceString::createNamespaceString_forTest(_tenantIds[1].toString() + "_test.coll2");
|
||||
ASSERT_OK(createCollection(opCtx(), CreateCommand(importedDonorCollNss2)));
|
||||
|
||||
ASSERT(collectionExists(opCtx(), importedDonorCollNss1));
|
||||
ASSERT(collectionExists(opCtx(), importedDonorCollNss2));
|
||||
|
||||
performUpdates(updatedDoc, preImageDoc);
|
||||
|
||||
ASSERT(!collectionExists(opCtx(), importedDonorCollNss1));
|
||||
ASSERT(!collectionExists(opCtx(), importedDonorCollNss2));
|
||||
}
|
||||
|
||||
TEST_F(ShardMergeRecipientOpObserverTest, TransitionToCommmittedShouldNotDropImportedCollection) {
|
||||
ShardMergeRecipientDocument recipientDoc(kMigrationId,
|
||||
kDefaultDonorConnStr,
|
||||
_tenantIds,
|
||||
kDefaultStartMigrationTimestamp,
|
||||
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
|
||||
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kConsistent);
|
||||
auto preImageDoc = recipientDoc.toBSON();
|
||||
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kCommitted);
|
||||
auto updatedDoc = recipientDoc.toBSON();
|
||||
|
||||
const NamespaceString importedDonorCollNss1 =
|
||||
NamespaceString::createNamespaceString_forTest(_tenantIds[0].toString() + "_test.coll1");
|
||||
ASSERT_OK(createCollection(opCtx(), CreateCommand(importedDonorCollNss1)));
|
||||
|
||||
const NamespaceString importedDonorCollNss2 =
|
||||
NamespaceString::createNamespaceString_forTest(_tenantIds[1].toString() + "_test.coll2");
|
||||
ASSERT_OK(createCollection(opCtx(), CreateCommand(importedDonorCollNss2)));
|
||||
|
||||
ASSERT(collectionExists(opCtx(), importedDonorCollNss1));
|
||||
ASSERT(collectionExists(opCtx(), importedDonorCollNss2));
|
||||
|
||||
performUpdates(updatedDoc, preImageDoc);
|
||||
|
||||
ASSERT(collectionExists(opCtx(), importedDonorCollNss1));
|
||||
ASSERT(collectionExists(opCtx(), importedDonorCollNss2));
|
||||
}
|
||||
|
||||
TEST_F(ShardMergeRecipientOpObserverTest,
|
||||
TransitionToAbortedGarbageCollectableShouldDropTempFilesAndMarkerCollection) {
|
||||
ShardMergeRecipientDocument recipientDoc(kMigrationId,
|
||||
kDefaultDonorConnStr,
|
||||
_tenantIds,
|
||||
kDefaultStartMigrationTimestamp,
|
||||
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
|
||||
|
||||
ServerlessOperationLockRegistry::get(opCtx()->getServiceContext())
|
||||
.acquireLock(ServerlessOperationLockRegistry::LockType::kMergeRecipient, kMigrationId);
|
||||
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kLearnedFilenames);
|
||||
auto preImageDoc = recipientDoc.toBSON();
|
||||
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kAborted);
|
||||
recipientDoc.setAbortOpTime(OpTime(Timestamp::max(), 1));
|
||||
recipientDoc.setExpireAt(opCtx()->getServiceContext()->getFastClockSource()->now());
|
||||
auto updatedDoc = recipientDoc.toBSON();
|
||||
|
||||
auto knownIdentListBeforeGC = DurableCatalog::get(opCtx())->getAllIdents(opCtx());
|
||||
|
||||
// Create idents unknown to storage.
|
||||
const auto unknownIdent1 = "collection-70--88888";
|
||||
const auto unknownIdentPath1 = constructDestinationPath(unknownIdent1);
|
||||
boost::filesystem::ofstream unknownIdent1Writer(unknownIdentPath1);
|
||||
unknownIdent1Writer << "Dummy stream1 \n";
|
||||
unknownIdent1Writer.close();
|
||||
|
||||
const auto unknownIdent2 = "index-71--88888";
|
||||
const auto unknownIdentPath2 = constructDestinationPath(unknownIdent2);
|
||||
boost::filesystem::ofstream unknownIdent2Writer(unknownIdentPath2);
|
||||
unknownIdent2Writer << "Dummy stream2 \n";
|
||||
unknownIdent2Writer.close();
|
||||
|
||||
const auto fileClonerTempDirPath = fileClonerTempDir(kMigrationId);
|
||||
ASSERT_TRUE(boost::filesystem::create_directory(fileClonerTempDirPath));
|
||||
|
||||
writeMovingFilesMarker(fileClonerTempDirPath, unknownIdent1, true);
|
||||
writeMovingFilesMarker(fileClonerTempDirPath, unknownIdent2, false);
|
||||
// GC shouldn't remove these known idents.
|
||||
for (const auto& ident : knownIdentListBeforeGC) {
|
||||
writeMovingFilesMarker(fileClonerTempDirPath, ident, false);
|
||||
}
|
||||
|
||||
// Create the marker collection.
|
||||
createImportDoneMarkerLocalCollection(opCtx(), kMigrationId);
|
||||
|
||||
// Verify that temp files and the marker collection exist before GC.
|
||||
ASSERT(collectionExists(opCtx(), getImportDoneMarkerNs(kMigrationId)));
|
||||
ASSERT(boost::filesystem::exists(unknownIdentPath1));
|
||||
ASSERT(boost::filesystem::exists(unknownIdentPath2));
|
||||
ASSERT(boost::filesystem::exists(fileClonerTempDirPath));
|
||||
|
||||
startCapturingLogMessages();
|
||||
|
||||
performUpdates(updatedDoc, preImageDoc);
|
||||
|
||||
stopCapturingLogMessages();
|
||||
|
||||
// Verify that temp files and the marker collection are deleted after GC.
|
||||
ASSERT(!collectionExists(opCtx(), getImportDoneMarkerNs(kMigrationId)));
|
||||
|
||||
ASSERT(!boost::filesystem::exists(unknownIdentPath1));
|
||||
ASSERT(!boost::filesystem::exists(unknownIdentPath2));
|
||||
ASSERT(!boost::filesystem::exists(fileClonerTempDirPath));
|
||||
|
||||
ASSERT_EQUALS(2, countLogLinesWithId(7458501));
|
||||
ASSERT_EQUALS(1, countLogLinesWithId(7458503));
|
||||
|
||||
// Verify that GC didn't remove any known idents.
|
||||
const auto knownIdentListAfterGC = DurableCatalog::get(opCtx())->getAllIdents(opCtx());
|
||||
ASSERT(knownIdentListBeforeGC == knownIdentListAfterGC);
|
||||
}
|
||||
|
||||
TEST_F(ShardMergeRecipientOpObserverTest,
|
||||
TransitionToCommittedGarbageCollectableShouldDropTempFilesAndMarkerCollection) {
|
||||
ShardMergeRecipientDocument recipientDoc(kMigrationId,
|
||||
kDefaultDonorConnStr,
|
||||
_tenantIds,
|
||||
kDefaultStartMigrationTimestamp,
|
||||
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
|
||||
|
||||
auto& registry = TenantMigrationAccessBlockerRegistry::get(getGlobalServiceContext());
|
||||
for (const auto& tenantId : _tenantIds) {
|
||||
registry.add(tenantId,
|
||||
std::make_shared<TenantMigrationRecipientAccessBlocker>(
|
||||
opCtx()->getServiceContext(), kMigrationId));
|
||||
}
|
||||
|
||||
ServerlessOperationLockRegistry::get(opCtx()->getServiceContext())
|
||||
.acquireLock(ServerlessOperationLockRegistry::LockType::kMergeRecipient, kMigrationId);
|
||||
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kConsistent);
|
||||
auto preImageDoc = recipientDoc.toBSON();
|
||||
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kCommitted);
|
||||
recipientDoc.setExpireAt(opCtx()->getServiceContext()->getFastClockSource()->now());
|
||||
auto updatedDoc = recipientDoc.toBSON();
|
||||
|
||||
auto knownIdentListBeforeGC = DurableCatalog::get(opCtx())->getAllIdents(opCtx());
|
||||
|
||||
// Create idents unknown to storage.
|
||||
const auto unknownIdent1 = "collection-70--88888";
|
||||
const auto unknownIdentPath1 = constructDestinationPath(unknownIdent1);
|
||||
boost::filesystem::ofstream unknownIdent1Writer(unknownIdentPath1);
|
||||
unknownIdent1Writer << "Dummy stream1 \n";
|
||||
unknownIdent1Writer.close();
|
||||
|
||||
const auto unknownIdent2 = "index-71--88888";
|
||||
const auto unknownIdentPath2 = constructDestinationPath(unknownIdent2);
|
||||
boost::filesystem::ofstream unknownIdent2Writer(unknownIdentPath2);
|
||||
unknownIdent2Writer << "Dummy stream2 \n";
|
||||
unknownIdent2Writer.close();
|
||||
|
||||
const auto fileClonerTempDirPath = fileClonerTempDir(kMigrationId);
|
||||
ASSERT_TRUE(boost::filesystem::create_directory(fileClonerTempDirPath));
|
||||
|
||||
writeMovingFilesMarker(fileClonerTempDirPath, unknownIdent1, true);
|
||||
writeMovingFilesMarker(fileClonerTempDirPath, unknownIdent2, false);
|
||||
// GC shouldn't remove these known idents.
|
||||
for (const auto& ident : knownIdentListBeforeGC) {
|
||||
writeMovingFilesMarker(fileClonerTempDirPath, ident, false);
|
||||
}
|
||||
|
||||
// Create the marker collection.
|
||||
createImportDoneMarkerLocalCollection(opCtx(), kMigrationId);
|
||||
|
||||
// Verify that temp files and the marker collection exist before GC.
|
||||
ASSERT(collectionExists(opCtx(), getImportDoneMarkerNs(kMigrationId)));
|
||||
ASSERT(boost::filesystem::exists(unknownIdentPath1));
|
||||
ASSERT(boost::filesystem::exists(unknownIdentPath2));
|
||||
ASSERT(boost::filesystem::exists(fileClonerTempDirPath));
|
||||
|
||||
startCapturingLogMessages();
|
||||
|
||||
performUpdates(updatedDoc, preImageDoc);
|
||||
|
||||
stopCapturingLogMessages();
|
||||
|
||||
// Verify that temp files and the marker collection are deleted after GC.
|
||||
ASSERT(!collectionExists(opCtx(), getImportDoneMarkerNs(kMigrationId)));
|
||||
|
||||
ASSERT(!boost::filesystem::exists(unknownIdentPath1));
|
||||
ASSERT(!boost::filesystem::exists(unknownIdentPath2));
|
||||
ASSERT(!boost::filesystem::exists(fileClonerTempDirPath));
|
||||
|
||||
ASSERT_EQUALS(2, countLogLinesWithId(7458501));
|
||||
ASSERT_EQUALS(1, countLogLinesWithId(7458503));
|
||||
|
||||
// Verify that GC didn't remove any known idents.
|
||||
const auto knownIdentListAfterGC = DurableCatalog::get(opCtx())->getAllIdents(opCtx());
|
||||
ASSERT(knownIdentListBeforeGC == knownIdentListAfterGC);
|
||||
}
|
||||
|
||||
TEST_F(ShardMergeRecipientOpObserverTest,
|
||||
TransitionToAbortedDropsImportedCollectionInStartupRecovery) {
|
||||
ShardMergeRecipientDocument recipientDoc(kMigrationId,
|
||||
kDefaultDonorConnStr,
|
||||
_tenantIds,
|
||||
kDefaultStartMigrationTimestamp,
|
||||
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
|
||||
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kConsistent);
|
||||
auto preImageDoc = recipientDoc.toBSON();
|
||||
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kAborted);
|
||||
recipientDoc.setAbortOpTime(OpTime(Timestamp::max(), 1));
|
||||
auto updatedDoc = recipientDoc.toBSON();
|
||||
|
||||
const NamespaceString importedDonorCollNss1 =
|
||||
NamespaceString::createNamespaceString_forTest(_tenantIds[0].toString() + "_test.coll1");
|
||||
ASSERT_OK(createCollection(opCtx(), CreateCommand(importedDonorCollNss1)));
|
||||
|
||||
const NamespaceString importedDonorCollNss2 =
|
||||
NamespaceString::createNamespaceString_forTest(_tenantIds[1].toString() + "_test.coll2");
|
||||
ASSERT_OK(createCollection(opCtx(), CreateCommand(importedDonorCollNss2)));
|
||||
|
||||
ASSERT(collectionExists(opCtx(), importedDonorCollNss1));
|
||||
ASSERT(collectionExists(opCtx(), importedDonorCollNss2));
|
||||
|
||||
// Simulate the node is in startup repl state.
|
||||
ASSERT_OK(
|
||||
repl::ReplicationCoordinator::get(opCtx())->setFollowerMode(repl::MemberState::RS_STARTUP));
|
||||
|
||||
performUpdates(updatedDoc, preImageDoc);
|
||||
|
||||
ASSERT(!collectionExists(opCtx(), importedDonorCollNss1));
|
||||
ASSERT(!collectionExists(opCtx(), importedDonorCollNss2));
|
||||
}
|
||||
|
||||
TEST_F(ShardMergeRecipientOpObserverTest,
|
||||
TransitionToCommmittedShouldNotDropImportedCollectionInStartupRecovery) {
|
||||
ShardMergeRecipientDocument recipientDoc(kMigrationId,
|
||||
kDefaultDonorConnStr,
|
||||
_tenantIds,
|
||||
kDefaultStartMigrationTimestamp,
|
||||
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
|
||||
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kConsistent);
|
||||
auto preImageDoc = recipientDoc.toBSON();
|
||||
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kCommitted);
|
||||
auto updatedDoc = recipientDoc.toBSON();
|
||||
|
||||
const NamespaceString importedDonorCollNss1 =
|
||||
NamespaceString::createNamespaceString_forTest(_tenantIds[0].toString() + "_test.coll1");
|
||||
ASSERT_OK(createCollection(opCtx(), CreateCommand(importedDonorCollNss1)));
|
||||
|
||||
const NamespaceString importedDonorCollNss2 =
|
||||
NamespaceString::createNamespaceString_forTest(_tenantIds[1].toString() + "_test.coll2");
|
||||
ASSERT_OK(createCollection(opCtx(), CreateCommand(importedDonorCollNss2)));
|
||||
|
||||
ASSERT(collectionExists(opCtx(), importedDonorCollNss1));
|
||||
ASSERT(collectionExists(opCtx(), importedDonorCollNss2));
|
||||
|
||||
// Simulate the node is in startup repl state.
|
||||
ASSERT_OK(
|
||||
repl::ReplicationCoordinator::get(opCtx())->setFollowerMode(repl::MemberState::RS_STARTUP));
|
||||
performUpdates(updatedDoc, preImageDoc);
|
||||
|
||||
ASSERT(collectionExists(opCtx(), importedDonorCollNss1));
|
||||
ASSERT(collectionExists(opCtx(), importedDonorCollNss2));
|
||||
}
|
||||
|
||||
TEST_F(
|
||||
ShardMergeRecipientOpObserverTest,
|
||||
TransitionToAbortedGarbageCollectableShouldDropTempFilesAndMarkerCollectionInStartupRecovery) {
|
||||
ShardMergeRecipientDocument recipientDoc(kMigrationId,
|
||||
kDefaultDonorConnStr,
|
||||
_tenantIds,
|
||||
kDefaultStartMigrationTimestamp,
|
||||
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
|
||||
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kLearnedFilenames);
|
||||
auto preImageDoc = recipientDoc.toBSON();
|
||||
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kAborted);
|
||||
recipientDoc.setAbortOpTime(OpTime(Timestamp::max(), 1));
|
||||
recipientDoc.setExpireAt(opCtx()->getServiceContext()->getFastClockSource()->now());
|
||||
auto updatedDoc = recipientDoc.toBSON();
|
||||
|
||||
auto knownIdentListBeforeGC = DurableCatalog::get(opCtx())->getAllIdents(opCtx());
|
||||
|
||||
// Create idents unknown to storage.
|
||||
const auto unknownIdent1 = "collection-70--88888";
|
||||
const auto unknownIdentPath1 = constructDestinationPath(unknownIdent1);
|
||||
boost::filesystem::ofstream unknownIdent1Writer(unknownIdentPath1);
|
||||
unknownIdent1Writer << "Dummy stream1 \n";
|
||||
unknownIdent1Writer.close();
|
||||
|
||||
const auto unknownIdent2 = "index-71--88888";
|
||||
const auto unknownIdentPath2 = constructDestinationPath(unknownIdent2);
|
||||
boost::filesystem::ofstream unknownIdent2Writer(unknownIdentPath2);
|
||||
unknownIdent2Writer << "Dummy stream2 \n";
|
||||
unknownIdent2Writer.close();
|
||||
|
||||
const auto fileClonerTempDirPath = fileClonerTempDir(kMigrationId);
|
||||
ASSERT_TRUE(boost::filesystem::create_directory(fileClonerTempDirPath));
|
||||
|
||||
writeMovingFilesMarker(fileClonerTempDirPath, unknownIdent1, true);
|
||||
writeMovingFilesMarker(fileClonerTempDirPath, unknownIdent2, false);
|
||||
// GC shouldn't remove these known idents.
|
||||
for (const auto& ident : knownIdentListBeforeGC) {
|
||||
writeMovingFilesMarker(fileClonerTempDirPath, ident, false);
|
||||
}
|
||||
|
||||
// Create the marker collection.
|
||||
createImportDoneMarkerLocalCollection(opCtx(), kMigrationId);
|
||||
|
||||
// Verify that temp files and the marker collection exist before GC.
|
||||
ASSERT(collectionExists(opCtx(), getImportDoneMarkerNs(kMigrationId)));
|
||||
ASSERT(boost::filesystem::exists(unknownIdentPath1));
|
||||
ASSERT(boost::filesystem::exists(unknownIdentPath2));
|
||||
ASSERT(boost::filesystem::exists(fileClonerTempDirPath));
|
||||
|
||||
startCapturingLogMessages();
|
||||
|
||||
// Simulate the node is in startup repl state.
|
||||
ASSERT_OK(
|
||||
repl::ReplicationCoordinator::get(opCtx())->setFollowerMode(repl::MemberState::RS_STARTUP));
|
||||
performUpdates(updatedDoc, preImageDoc);
|
||||
|
||||
stopCapturingLogMessages();
|
||||
|
||||
// Verify that temp files and the marker collection are deleted after GC.
|
||||
ASSERT(!collectionExists(opCtx(), getImportDoneMarkerNs(kMigrationId)));
|
||||
|
||||
ASSERT(!boost::filesystem::exists(unknownIdentPath1));
|
||||
ASSERT(!boost::filesystem::exists(unknownIdentPath2));
|
||||
ASSERT(!boost::filesystem::exists(fileClonerTempDirPath));
|
||||
|
||||
ASSERT_EQUALS(2, countLogLinesWithId(7458501));
|
||||
ASSERT_EQUALS(1, countLogLinesWithId(7458503));
|
||||
|
||||
// Verify that GC didn't remove any known idents.
|
||||
const auto knownIdentListAfterGC = DurableCatalog::get(opCtx())->getAllIdents(opCtx());
|
||||
ASSERT(knownIdentListBeforeGC == knownIdentListAfterGC);
|
||||
}
|
||||
|
||||
TEST_F(
|
||||
ShardMergeRecipientOpObserverTest,
|
||||
TransitionToCommittedGarbageCollectableShouldDropTempFilesAndMarkerCollectionInStartupRecovery) {
|
||||
ShardMergeRecipientDocument recipientDoc(kMigrationId,
|
||||
kDefaultDonorConnStr,
|
||||
_tenantIds,
|
||||
kDefaultStartMigrationTimestamp,
|
||||
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
|
||||
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kConsistent);
|
||||
auto preImageDoc = recipientDoc.toBSON();
|
||||
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kCommitted);
|
||||
recipientDoc.setExpireAt(opCtx()->getServiceContext()->getFastClockSource()->now());
|
||||
auto updatedDoc = recipientDoc.toBSON();
|
||||
|
||||
auto knownIdentListBeforeGC = DurableCatalog::get(opCtx())->getAllIdents(opCtx());
|
||||
|
||||
// Create idents unknown to storage.
|
||||
const auto unknownIdent1 = "collection-70--88888";
|
||||
const auto unknownIdentPath1 = constructDestinationPath(unknownIdent1);
|
||||
boost::filesystem::ofstream unknownIdent1Writer(unknownIdentPath1);
|
||||
unknownIdent1Writer << "Dummy stream1 \n";
|
||||
unknownIdent1Writer.close();
|
||||
|
||||
const auto unknownIdent2 = "index-71--88888";
|
||||
const auto unknownIdentPath2 = constructDestinationPath(unknownIdent2);
|
||||
boost::filesystem::ofstream unknownIdent2Writer(unknownIdentPath2);
|
||||
unknownIdent2Writer << "Dummy stream2 \n";
|
||||
unknownIdent2Writer.close();
|
||||
|
||||
const auto fileClonerTempDirPath = fileClonerTempDir(kMigrationId);
|
||||
ASSERT_TRUE(boost::filesystem::create_directory(fileClonerTempDirPath));
|
||||
|
||||
writeMovingFilesMarker(fileClonerTempDirPath, unknownIdent1, true);
|
||||
writeMovingFilesMarker(fileClonerTempDirPath, unknownIdent2, false);
|
||||
// GC shouldn't remove these known idents.
|
||||
for (const auto& ident : knownIdentListBeforeGC) {
|
||||
writeMovingFilesMarker(fileClonerTempDirPath, ident, false);
|
||||
}
|
||||
|
||||
// Create the marker collection.
|
||||
createImportDoneMarkerLocalCollection(opCtx(), kMigrationId);
|
||||
|
||||
// Verify that temp files and the marker collection exist before GC.
|
||||
ASSERT(collectionExists(opCtx(), getImportDoneMarkerNs(kMigrationId)));
|
||||
ASSERT(boost::filesystem::exists(unknownIdentPath1));
|
||||
ASSERT(boost::filesystem::exists(unknownIdentPath2));
|
||||
ASSERT(boost::filesystem::exists(fileClonerTempDirPath));
|
||||
|
||||
startCapturingLogMessages();
|
||||
|
||||
// Simulate the node is in startup repl state.
|
||||
ASSERT_OK(
|
||||
repl::ReplicationCoordinator::get(opCtx())->setFollowerMode(repl::MemberState::RS_STARTUP));
|
||||
performUpdates(updatedDoc, preImageDoc);
|
||||
|
||||
stopCapturingLogMessages();
|
||||
|
||||
// Verify that temp files and the marker collection are deleted after GC.
|
||||
ASSERT(!collectionExists(opCtx(), getImportDoneMarkerNs(kMigrationId)));
|
||||
|
||||
ASSERT(!boost::filesystem::exists(unknownIdentPath1));
|
||||
ASSERT(!boost::filesystem::exists(unknownIdentPath2));
|
||||
ASSERT(!boost::filesystem::exists(fileClonerTempDirPath));
|
||||
|
||||
ASSERT_EQUALS(2, countLogLinesWithId(7458501));
|
||||
ASSERT_EQUALS(1, countLogLinesWithId(7458503));
|
||||
|
||||
// Verify that GC didn't remove any known idents.
|
||||
const auto knownIdentListAfterGC = DurableCatalog::get(opCtx())->getAllIdents(opCtx());
|
||||
ASSERT(knownIdentListBeforeGC == knownIdentListAfterGC);
|
||||
}
|
||||
|
||||
TEST_F(ShardMergeRecipientOpObserverTest,
|
||||
TransitionToAbortedDropsImportedCollectionInRollbackRecovery) {
|
||||
ShardMergeRecipientDocument recipientDoc(kMigrationId,
|
||||
kDefaultDonorConnStr,
|
||||
_tenantIds,
|
||||
kDefaultStartMigrationTimestamp,
|
||||
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
|
||||
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kConsistent);
|
||||
auto preImageDoc = recipientDoc.toBSON();
|
||||
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kAborted);
|
||||
recipientDoc.setAbortOpTime(OpTime(Timestamp::max(), 1));
|
||||
auto updatedDoc = recipientDoc.toBSON();
|
||||
|
||||
const NamespaceString importedDonorCollNss1 =
|
||||
NamespaceString::createNamespaceString_forTest(_tenantIds[0].toString() + "_test.coll1");
|
||||
ASSERT_OK(createCollection(opCtx(), CreateCommand(importedDonorCollNss1)));
|
||||
|
||||
const NamespaceString importedDonorCollNss2 =
|
||||
NamespaceString::createNamespaceString_forTest(_tenantIds[1].toString() + "_test.coll2");
|
||||
ASSERT_OK(createCollection(opCtx(), CreateCommand(importedDonorCollNss2)));
|
||||
|
||||
ASSERT(collectionExists(opCtx(), importedDonorCollNss1));
|
||||
ASSERT(collectionExists(opCtx(), importedDonorCollNss2));
|
||||
|
||||
// Simulate the node is in rollback repl state.
|
||||
ASSERT_OK(repl::ReplicationCoordinator::get(opCtx())->setFollowerMode(
|
||||
repl::MemberState::RS_ROLLBACK));
|
||||
|
||||
performUpdates(updatedDoc, preImageDoc);
|
||||
|
||||
ASSERT(!collectionExists(opCtx(), importedDonorCollNss1));
|
||||
ASSERT(!collectionExists(opCtx(), importedDonorCollNss2));
|
||||
}
|
||||
|
||||
TEST_F(ShardMergeRecipientOpObserverTest,
|
||||
TransitionToCommmittedShouldNotDropImportedCollectionInRollbackRecovery) {
|
||||
ShardMergeRecipientDocument recipientDoc(kMigrationId,
|
||||
kDefaultDonorConnStr,
|
||||
_tenantIds,
|
||||
kDefaultStartMigrationTimestamp,
|
||||
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
|
||||
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kConsistent);
|
||||
auto preImageDoc = recipientDoc.toBSON();
|
||||
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kCommitted);
|
||||
auto updatedDoc = recipientDoc.toBSON();
|
||||
|
||||
const NamespaceString importedDonorCollNss1 =
|
||||
NamespaceString::createNamespaceString_forTest(_tenantIds[0].toString() + "_test.coll1");
|
||||
ASSERT_OK(createCollection(opCtx(), CreateCommand(importedDonorCollNss1)));
|
||||
|
||||
const NamespaceString importedDonorCollNss2 =
|
||||
NamespaceString::createNamespaceString_forTest(_tenantIds[1].toString() + "_test.coll2");
|
||||
ASSERT_OK(createCollection(opCtx(), CreateCommand(importedDonorCollNss2)));
|
||||
|
||||
ASSERT(collectionExists(opCtx(), importedDonorCollNss1));
|
||||
ASSERT(collectionExists(opCtx(), importedDonorCollNss2));
|
||||
|
||||
// Simulate the node is in rollback repl state.
|
||||
ASSERT_OK(repl::ReplicationCoordinator::get(opCtx())->setFollowerMode(
|
||||
repl::MemberState::RS_ROLLBACK));
|
||||
performUpdates(updatedDoc, preImageDoc);
|
||||
|
||||
ASSERT(collectionExists(opCtx(), importedDonorCollNss1));
|
||||
ASSERT(collectionExists(opCtx(), importedDonorCollNss2));
|
||||
}
|
||||
|
||||
TEST_F(
|
||||
ShardMergeRecipientOpObserverTest,
|
||||
TransitionToAbortedGarbageCollectableShouldDropTempFilesAndMarkerCollectionInRollbackRecovery) {
|
||||
ShardMergeRecipientDocument recipientDoc(kMigrationId,
|
||||
kDefaultDonorConnStr,
|
||||
_tenantIds,
|
||||
kDefaultStartMigrationTimestamp,
|
||||
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
|
||||
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kLearnedFilenames);
|
||||
auto preImageDoc = recipientDoc.toBSON();
|
||||
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kAborted);
|
||||
recipientDoc.setAbortOpTime(OpTime(Timestamp::max(), 1));
|
||||
recipientDoc.setExpireAt(opCtx()->getServiceContext()->getFastClockSource()->now());
|
||||
auto updatedDoc = recipientDoc.toBSON();
|
||||
|
||||
auto knownIdentListBeforeGC = DurableCatalog::get(opCtx())->getAllIdents(opCtx());
|
||||
|
||||
// Create idents unknown to storage.
|
||||
const auto unknownIdent1 = "collection-70--88888";
|
||||
const auto unknownIdentPath1 = constructDestinationPath(unknownIdent1);
|
||||
boost::filesystem::ofstream unknownIdent1Writer(unknownIdentPath1);
|
||||
unknownIdent1Writer << "Dummy stream1 \n";
|
||||
unknownIdent1Writer.close();
|
||||
|
||||
const auto unknownIdent2 = "index-71--88888";
|
||||
const auto unknownIdentPath2 = constructDestinationPath(unknownIdent2);
|
||||
boost::filesystem::ofstream unknownIdent2Writer(unknownIdentPath2);
|
||||
unknownIdent2Writer << "Dummy stream2 \n";
|
||||
unknownIdent2Writer.close();
|
||||
|
||||
const auto fileClonerTempDirPath = fileClonerTempDir(kMigrationId);
|
||||
ASSERT_TRUE(boost::filesystem::create_directory(fileClonerTempDirPath));
|
||||
|
||||
writeMovingFilesMarker(fileClonerTempDirPath, unknownIdent1, true);
|
||||
writeMovingFilesMarker(fileClonerTempDirPath, unknownIdent2, false);
|
||||
// GC shouldn't remove these known idents.
|
||||
for (const auto& ident : knownIdentListBeforeGC) {
|
||||
writeMovingFilesMarker(fileClonerTempDirPath, ident, false);
|
||||
}
|
||||
|
||||
// Create the marker collection.
|
||||
createImportDoneMarkerLocalCollection(opCtx(), kMigrationId);
|
||||
|
||||
// Verify that temp files and the marker collection exist before GC.
|
||||
ASSERT(collectionExists(opCtx(), getImportDoneMarkerNs(kMigrationId)));
|
||||
ASSERT(boost::filesystem::exists(unknownIdentPath1));
|
||||
ASSERT(boost::filesystem::exists(unknownIdentPath2));
|
||||
ASSERT(boost::filesystem::exists(fileClonerTempDirPath));
|
||||
|
||||
startCapturingLogMessages();
|
||||
|
||||
// Simulate the node is in rollback repl state.
|
||||
ASSERT_OK(repl::ReplicationCoordinator::get(opCtx())->setFollowerMode(
|
||||
repl::MemberState::RS_ROLLBACK));
|
||||
performUpdates(updatedDoc, preImageDoc);
|
||||
|
||||
stopCapturingLogMessages();
|
||||
|
||||
// Verify that temp files and the marker collection are deleted after GC.
|
||||
ASSERT(!collectionExists(opCtx(), getImportDoneMarkerNs(kMigrationId)));
|
||||
|
||||
ASSERT(!boost::filesystem::exists(unknownIdentPath1));
|
||||
ASSERT(!boost::filesystem::exists(unknownIdentPath2));
|
||||
ASSERT(!boost::filesystem::exists(fileClonerTempDirPath));
|
||||
|
||||
ASSERT_EQUALS(2, countLogLinesWithId(7458501));
|
||||
ASSERT_EQUALS(1, countLogLinesWithId(7458503));
|
||||
|
||||
// Verify that GC didn't remove any known idents.
|
||||
const auto knownIdentListAfterGC = DurableCatalog::get(opCtx())->getAllIdents(opCtx());
|
||||
ASSERT(knownIdentListBeforeGC == knownIdentListAfterGC);
|
||||
}
|
||||
|
||||
TEST_F(
|
||||
ShardMergeRecipientOpObserverTest,
|
||||
TransitionToCommittedGarbageCollectableShouldDropTempFilesAndMarkerCollectionInRollbackRecovery) {
|
||||
ShardMergeRecipientDocument recipientDoc(kMigrationId,
|
||||
kDefaultDonorConnStr,
|
||||
_tenantIds,
|
||||
kDefaultStartMigrationTimestamp,
|
||||
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
|
||||
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kConsistent);
|
||||
auto preImageDoc = recipientDoc.toBSON();
|
||||
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kCommitted);
|
||||
recipientDoc.setExpireAt(opCtx()->getServiceContext()->getFastClockSource()->now());
|
||||
auto updatedDoc = recipientDoc.toBSON();
|
||||
|
||||
auto knownIdentListBeforeGC = DurableCatalog::get(opCtx())->getAllIdents(opCtx());
|
||||
|
||||
// Create idents unknown to storage.
|
||||
const auto unknownIdent1 = "collection-70--88888";
|
||||
const auto unknownIdentPath1 = constructDestinationPath(unknownIdent1);
|
||||
boost::filesystem::ofstream unknownIdent1Writer(unknownIdentPath1);
|
||||
unknownIdent1Writer << "Dummy stream1 \n";
|
||||
unknownIdent1Writer.close();
|
||||
|
||||
const auto unknownIdent2 = "index-71--88888";
|
||||
const auto unknownIdentPath2 = constructDestinationPath(unknownIdent2);
|
||||
boost::filesystem::ofstream unknownIdent2Writer(unknownIdentPath2);
|
||||
unknownIdent2Writer << "Dummy stream2 \n";
|
||||
unknownIdent2Writer.close();
|
||||
|
||||
const auto fileClonerTempDirPath = fileClonerTempDir(kMigrationId);
|
||||
ASSERT_TRUE(boost::filesystem::create_directory(fileClonerTempDirPath));
|
||||
|
||||
writeMovingFilesMarker(fileClonerTempDirPath, unknownIdent1, true);
|
||||
writeMovingFilesMarker(fileClonerTempDirPath, unknownIdent2, false);
|
||||
// GC shouldn't remove these known idents.
|
||||
for (const auto& ident : knownIdentListBeforeGC) {
|
||||
writeMovingFilesMarker(fileClonerTempDirPath, ident, false);
|
||||
}
|
||||
|
||||
// Create the marker collection.
|
||||
createImportDoneMarkerLocalCollection(opCtx(), kMigrationId);
|
||||
|
||||
// Verify that temp files and the marker collection exist before GC.
|
||||
ASSERT(collectionExists(opCtx(), getImportDoneMarkerNs(kMigrationId)));
|
||||
ASSERT(boost::filesystem::exists(unknownIdentPath1));
|
||||
ASSERT(boost::filesystem::exists(unknownIdentPath2));
|
||||
ASSERT(boost::filesystem::exists(fileClonerTempDirPath));
|
||||
|
||||
startCapturingLogMessages();
|
||||
|
||||
// Simulate the node is in rollback repl state.
|
||||
ASSERT_OK(repl::ReplicationCoordinator::get(opCtx())->setFollowerMode(
|
||||
repl::MemberState::RS_ROLLBACK));
|
||||
performUpdates(updatedDoc, preImageDoc);
|
||||
|
||||
stopCapturingLogMessages();
|
||||
|
||||
// Verify that temp files and the marker collection are deleted after GC.
|
||||
ASSERT(!collectionExists(opCtx(), getImportDoneMarkerNs(kMigrationId)));
|
||||
|
||||
ASSERT(!boost::filesystem::exists(unknownIdentPath1));
|
||||
ASSERT(!boost::filesystem::exists(unknownIdentPath2));
|
||||
ASSERT(!boost::filesystem::exists(fileClonerTempDirPath));
|
||||
|
||||
ASSERT_EQUALS(2, countLogLinesWithId(7458501));
|
||||
ASSERT_EQUALS(1, countLogLinesWithId(7458503));
|
||||
|
||||
// Verify that GC didn't remove any known idents.
|
||||
const auto knownIdentListAfterGC = DurableCatalog::get(opCtx())->getAllIdents(opCtx());
|
||||
ASSERT(knownIdentListBeforeGC == knownIdentListAfterGC);
|
||||
}
|
||||
} // namespace mongo::repl
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -1,639 +0,0 @@
|
|||
/**
|
||||
* Copyright (C) 2023-present MongoDB, Inc.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the Server Side Public License, version 1,
|
||||
* as published by MongoDB, Inc.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* Server Side Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the Server Side Public License
|
||||
* along with this program. If not, see
|
||||
* <http://www.mongodb.com/licensing/server-side-public-license>.
|
||||
*
|
||||
* As a special exception, the copyright holders give permission to link the
|
||||
* code of portions of this program with the OpenSSL library under certain
|
||||
* conditions as described in each individual source file and distribute
|
||||
* linked combinations including the program with the OpenSSL library. You
|
||||
* must comply with the Server Side Public License in all respects for
|
||||
* all of the code used other than as permitted herein. If you modify file(s)
|
||||
* with this exception, you may extend this exception to your version of the
|
||||
* file(s), but you are not obligated to do so. If you do not wish to do so,
|
||||
* delete this exception statement from your version. If you delete this
|
||||
* exception statement from all source files in the program, then also delete
|
||||
* it in the license file.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <boost/move/utility_core.hpp>
|
||||
#include <boost/none.hpp>
|
||||
#include <boost/optional.hpp>
|
||||
#include <boost/optional/optional.hpp>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "mongo/base/status.h"
|
||||
#include "mongo/base/status_with.h"
|
||||
#include "mongo/base/string_data.h"
|
||||
#include "mongo/bson/bsonobj.h"
|
||||
#include "mongo/bson/timestamp.h"
|
||||
#include "mongo/client/dbclient_connection.h"
|
||||
#include "mongo/client/dbclient_cursor.h"
|
||||
#include "mongo/client/fetcher.h"
|
||||
#include "mongo/client/mongo_uri.h"
|
||||
#include "mongo/client/read_preference.h"
|
||||
#include "mongo/db/commands/tenant_migration_donor_cmds_gen.h"
|
||||
#include "mongo/db/namespace_string.h"
|
||||
#include "mongo/db/operation_context.h"
|
||||
#include "mongo/db/pipeline/aggregate_command_gen.h"
|
||||
#include "mongo/db/pipeline/process_interface/mongo_process_interface.h"
|
||||
#include "mongo/db/repl/data_replicator_external_state.h"
|
||||
#include "mongo/db/repl/oplog_buffer_collection.h"
|
||||
#include "mongo/db/repl/oplog_fetcher.h"
|
||||
#include "mongo/db/repl/optime.h"
|
||||
#include "mongo/db/repl/primary_only_service.h"
|
||||
#include "mongo/db/repl/tenant_migration_shared_data.h"
|
||||
#include "mongo/db/repl/tenant_migration_state_machine_gen.h"
|
||||
#include "mongo/db/repl/tenant_oplog_applier.h"
|
||||
#include "mongo/db/serverless/serverless_types_gen.h"
|
||||
#include "mongo/db/service_context.h"
|
||||
#include "mongo/db/tenant_id.h"
|
||||
#include "mongo/executor/scoped_task_executor.h"
|
||||
#include "mongo/executor/task_executor.h"
|
||||
#include "mongo/rpc/metadata/repl_set_metadata.h"
|
||||
#include "mongo/stdx/condition_variable.h"
|
||||
#include "mongo/stdx/mutex.h"
|
||||
#include "mongo/stdx/unordered_set.h"
|
||||
#include "mongo/util/cancellation.h"
|
||||
#include "mongo/util/concurrency/thread_pool.h"
|
||||
#include "mongo/util/concurrency/with_lock.h"
|
||||
#include "mongo/util/fail_point.h"
|
||||
#include "mongo/util/future.h"
|
||||
#include "mongo/util/future_impl.h"
|
||||
#include "mongo/util/net/hostandport.h"
|
||||
#include "mongo/util/net/ssl_options.h"
|
||||
#include "mongo/util/time_support.h"
|
||||
#include "mongo/util/uuid.h"
|
||||
|
||||
namespace mongo {
|
||||
|
||||
class DBClientConnection;
|
||||
class OperationContext;
|
||||
class ReplicaSetMonitor;
|
||||
class ServiceContext;
|
||||
|
||||
namespace repl {
|
||||
class OplogBufferCollection;
|
||||
|
||||
/**
|
||||
* ShardMergeRecipientService is a primary only service which orchestrates the
|
||||
* data migration on the recipient side for shard merge protocol.
|
||||
*/
|
||||
class ShardMergeRecipientService final : public PrimaryOnlyService {
|
||||
// Disallows copying.
|
||||
ShardMergeRecipientService(const ShardMergeRecipientService&) = delete;
|
||||
ShardMergeRecipientService& operator=(const ShardMergeRecipientService&) = delete;
|
||||
|
||||
public:
|
||||
static constexpr StringData kShardMergeRecipientServiceName = "ShardMergeRecipientService"_sd;
|
||||
|
||||
explicit ShardMergeRecipientService(ServiceContext* serviceContext);
|
||||
~ShardMergeRecipientService() override = default;
|
||||
|
||||
StringData getServiceName() const final;
|
||||
|
||||
NamespaceString getStateDocumentsNS() const final;
|
||||
|
||||
ThreadPool::Limits getThreadPoolLimits() const final;
|
||||
|
||||
void checkIfConflictsWithOtherInstances(
|
||||
OperationContext* opCtx,
|
||||
BSONObj initialStateDoc,
|
||||
const std::vector<const PrimaryOnlyService::Instance*>& existingInstances) final;
|
||||
|
||||
std::shared_ptr<PrimaryOnlyService::Instance> constructInstance(BSONObj initialStateDoc) final;
|
||||
|
||||
/**
|
||||
* Interrupts all shard merge recipient service instances.
|
||||
*/
|
||||
void abortAllMigrations(OperationContext* opCtx);
|
||||
|
||||
class Instance final : public PrimaryOnlyService::TypedInstance<Instance> {
|
||||
public:
|
||||
explicit Instance(ServiceContext* serviceContext,
|
||||
const ShardMergeRecipientService* recipientService,
|
||||
BSONObj stateDoc);
|
||||
|
||||
SemiFuture<void> run(std::shared_ptr<executor::ScopedTaskExecutor> executor,
|
||||
const CancellationToken& token) noexcept final;
|
||||
|
||||
/**
|
||||
* Unconditional migration interrupt called on node's stepdown/shutdown event.
|
||||
* Make the instance to not wait for `recipientForgetMigration` command.
|
||||
*/
|
||||
void interrupt(Status status) override;
|
||||
|
||||
/**
|
||||
* Conditional migration interrupt called on fcv change or due to oplog fetcher error.
|
||||
* Make the instance to wait for `recipientForgetMigration` command.
|
||||
*/
|
||||
void interruptConditionally(Status status);
|
||||
|
||||
/**
|
||||
* Interrupts the migration for garbage collection.
|
||||
*/
|
||||
void onReceiveRecipientForgetMigration(OperationContext* opCtx,
|
||||
const MigrationDecisionEnum& decision);
|
||||
|
||||
/**
|
||||
* Returns a Future that will be resolved when migration is completed.
|
||||
*/
|
||||
SharedSemiFuture<void> getMigrationCompletionFuture() const {
|
||||
return _migrationCompletionPromise.getFuture();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a Future that will be resolved when the instance has been durably marked garbage
|
||||
* collectable.
|
||||
*/
|
||||
SharedSemiFuture<void> getForgetMigrationDurableFuture() const {
|
||||
return _forgetMigrationDurablePromise.getFuture();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the instance id.
|
||||
*/
|
||||
const UUID& getMigrationUUID() const;
|
||||
|
||||
/**
|
||||
* Returns the instance state document.
|
||||
*/
|
||||
ShardMergeRecipientDocument getStateDoc() const;
|
||||
|
||||
boost::optional<BSONObj> reportForCurrentOp(
|
||||
MongoProcessInterface::CurrentOpConnectionsMode connMode,
|
||||
MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept final;
|
||||
|
||||
void checkIfOptionsConflict(const BSONObj& stateDoc) const final;
|
||||
|
||||
/**
|
||||
* Blocks the thread until the migration reaches consistent state in an interruptible
|
||||
* mode.
|
||||
*
|
||||
* Returns the donor OpTime at which the migration reached consistent state. Throws
|
||||
* exception on error.
|
||||
*/
|
||||
OpTime waitUntilMigrationReachesConsistentState(OperationContext* opCtx) const;
|
||||
|
||||
/**
|
||||
* Blocks the thread until the tenant oplog applier applied data past the
|
||||
* 'returnAfterReachingTimestamp' in an interruptible mode. If the recipient's logical clock
|
||||
* has not yet reached the 'returnAfterReachingTimestamp', advances the recipient's logical
|
||||
* clock to 'returnAfterReachingTimestamp'. Finally, stores the
|
||||
* 'returnAfterReachingTimestamp' as 'rejectReadsBeforeTimestamp' in the state
|
||||
* document and waits for the write to be replicated to every node (i.e. wait for
|
||||
* 'rejectReadsBeforeTimestamp' to be set on the TenantMigrationRecipientAccessBlocker of
|
||||
* every node) to guarantee that no reads will be incorrectly accepted.
|
||||
*/
|
||||
OpTime waitUntilMigrationReachesReturnAfterReachingTimestamp(
|
||||
OperationContext* opCtx, const Timestamp& returnAfterReachingTimestamp);
|
||||
|
||||
/**
|
||||
* Called when a replica set member (self, or a secondary) finishes importing donated files.
|
||||
*/
|
||||
void onMemberImportedFiles(const HostAndPort& host);
|
||||
|
||||
/**
|
||||
* Set the oplog creator functor, to allow use of a mock oplog fetcher.
|
||||
*/
|
||||
void setCreateOplogFetcherFn_forTest(
|
||||
std::unique_ptr<OplogFetcherFactory>&& createOplogFetcherFn) {
|
||||
_createOplogFetcherFn = std::move(createOplogFetcherFn);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stops the oplog applier without going through recipientForgetMigration.
|
||||
*/
|
||||
void stopOplogApplier_forTest() {
|
||||
stdx::lock_guard lk(_mutex);
|
||||
_tenantOplogApplier->shutdown();
|
||||
}
|
||||
|
||||
/**
|
||||
* Suppresses selecting 'host' as the donor sync source, until 'until'.
|
||||
*/
|
||||
void excludeDonorHost_forTest(const HostAndPort& host, Date_t until) {
|
||||
stdx::lock_guard lk(_mutex);
|
||||
_excludeDonorHost(lk, host, until);
|
||||
}
|
||||
|
||||
const auto& getExcludedDonorHosts_forTest() {
|
||||
return _excludedDonorHosts;
|
||||
}
|
||||
|
||||
private:
|
||||
friend class ShardMergeRecipientServiceTest;
|
||||
|
||||
/**
|
||||
* Only used for testing. Allows setting a custom task executor for backup cursor fetcher.
|
||||
*/
|
||||
void setBackupCursorFetcherExecutor_forTest(
|
||||
std::shared_ptr<executor::TaskExecutor> taskExecutor) {
|
||||
_backupCursorExecutor = std::move(taskExecutor);
|
||||
}
|
||||
|
||||
const NamespaceString _stateDocumentsNS = NamespaceString::kShardMergeRecipientsNamespace;
|
||||
|
||||
using ConnectionPair =
|
||||
std::pair<std::unique_ptr<DBClientConnection>, std::unique_ptr<DBClientConnection>>;
|
||||
|
||||
/**
|
||||
* Transitions the instance state to 'kStarted' if the state is uninitialized.
|
||||
*/
|
||||
SemiFuture<void> _initializeAndDurablyPersistStateDoc();
|
||||
|
||||
/**
|
||||
* Execute steps which are necessary to start a migration, such as, establishing donor
|
||||
* client connection, setting up internal state, get donor cluster keys, etc.
|
||||
*/
|
||||
SemiFuture<void> _prepareForMigration(const CancellationToken& token);
|
||||
|
||||
/**
|
||||
* Sets up internal state to begin migration.
|
||||
*/
|
||||
void _setup(ConnectionPair connectionPair);
|
||||
|
||||
/**
|
||||
* Start migration only if the following FCV checks passes:
|
||||
* a) Not in middle of FCV upgrading/downgrading.
|
||||
* b) Donor and recipient FCV matches.
|
||||
*/
|
||||
SemiFuture<void> _startMigrationIfSafeToRunwithCurrentFCV(const CancellationToken& token);
|
||||
|
||||
/**
|
||||
* Helper to run FCV sanity checks at the start of migration.
|
||||
*/
|
||||
void _assertIfMigrationIsSafeToRunWithCurrentFcv();
|
||||
|
||||
/**
|
||||
* Waits for all data bearing nodes to complete import.
|
||||
*/
|
||||
SemiFuture<void> _waitForAllNodesToFinishImport();
|
||||
|
||||
/**
|
||||
* Tells whether the migration is committed or aborted.
|
||||
*/
|
||||
bool _isCommitOrAbortState(WithLock) const;
|
||||
|
||||
/**
|
||||
* Waits for recipientForgetMigartion command for migration decision and then, mark external
|
||||
* keys doc and instance state doc as garbage collectable.
|
||||
*/
|
||||
SemiFuture<void> _waitForForgetMigrationThenMarkMigrationGarbageCollectable(
|
||||
const CancellationToken& token);
|
||||
|
||||
/**
|
||||
* Durably persists the migration decision in the state doc.
|
||||
*/
|
||||
SemiFuture<void> _durablyPersistCommitAbortDecision(MigrationDecisionEnum decision);
|
||||
|
||||
/*
|
||||
* Drops ephemeral collections used for migrations after migration decision is durably
|
||||
* persisted.
|
||||
*/
|
||||
void _dropTempCollections();
|
||||
|
||||
/**
|
||||
* Sets the `expireAt` field at the state doc.
|
||||
*/
|
||||
SemiFuture<void> _markStateDocAsGarbageCollectable();
|
||||
|
||||
/**
|
||||
* Deletes the state document. Does not return the opTime for the delete, since it's not
|
||||
* necessary to wait for this delete to be majority committed (this is one of the last steps
|
||||
* in the chain, and if the delete rolls back, the new primary will re-do the delete).
|
||||
*/
|
||||
SemiFuture<void> _removeStateDoc(const CancellationToken& token);
|
||||
|
||||
SemiFuture<void> _waitForGarbageCollectionDelayThenDeleteStateDoc(
|
||||
const CancellationToken& token);
|
||||
|
||||
/**
|
||||
* Creates a client, connects it to the donor and uses the default
|
||||
* authentication mode (KeyFile Authentication). Throws a user assertion on failure.
|
||||
*/
|
||||
std::unique_ptr<DBClientConnection> _connectAndAuth(const HostAndPort& serverAddress,
|
||||
StringData applicationName);
|
||||
|
||||
/**
|
||||
* Creates and connects both the oplog fetcher client and the client used for other
|
||||
* operations.
|
||||
*/
|
||||
SemiFuture<ConnectionPair> _createAndConnectClients();
|
||||
|
||||
/**
|
||||
* Fetches all key documents from the donor's admin.system.keys collection, stores them in
|
||||
* config.external_validation_keys, and refreshes the keys cache.
|
||||
*/
|
||||
void _fetchAndStoreDonorClusterTimeKeyDocs(const CancellationToken& token);
|
||||
|
||||
/**
|
||||
* Opens a backup cursor on the donor primary and fetches the
|
||||
* list of donor files to be cloned.
|
||||
*/
|
||||
SemiFuture<void> _openBackupCursor(const CancellationToken& token);
|
||||
SemiFuture<void> _openBackupCursorWithRetry(const CancellationToken& token);
|
||||
|
||||
/**
|
||||
* Keeps the donor backup cursor alive.
|
||||
*/
|
||||
void _keepBackupCursorAlive(const CancellationToken& token);
|
||||
|
||||
/**
|
||||
* Kills the backup cursor opened on donor, if any.
|
||||
*
|
||||
*/
|
||||
void _killBackupCursor();
|
||||
|
||||
/**
|
||||
* Gets the backup cursor metadata info.
|
||||
*/
|
||||
const BackupCursorInfo& _getDonorBackupCursorInfo(WithLock) const;
|
||||
|
||||
/**
|
||||
* Get the oldest active multi-statement transaction optime by reading
|
||||
* config.transactions collection at given ReadTimestamp (i.e, equal to
|
||||
* startApplyingDonorOpTime) snapshot.
|
||||
*/
|
||||
boost::optional<OpTime> _getOldestActiveTransactionAt(Timestamp ReadTimestamp);
|
||||
|
||||
/**
|
||||
* Retrieves the start/fetch optimes from the donor and updates the in-memory/on-disk states
|
||||
* accordingly.
|
||||
*/
|
||||
SemiFuture<void> _getStartOpTimesFromDonor();
|
||||
|
||||
/**
|
||||
* Pushes documents from oplog fetcher to oplog buffer.
|
||||
*
|
||||
* Returns a status even though it always returns OK, to conform the interface OplogFetcher
|
||||
* expects for the EnqueueDocumentsFn.
|
||||
*/
|
||||
Status _enqueueDocuments(OplogFetcher::Documents::const_iterator begin,
|
||||
OplogFetcher::Documents::const_iterator end,
|
||||
const OplogFetcher::DocumentsInfo& info);
|
||||
|
||||
/**
|
||||
* Creates the oplog buffer that will be populated by donor oplog entries from the retryable
|
||||
* writes fetching stage and oplog fetching stage.
|
||||
*/
|
||||
void _createOplogBuffer(WithLock, OperationContext* opCtx);
|
||||
|
||||
/**
|
||||
* Runs an aggregation that gets the entire oplog chain for every retryable write entry in
|
||||
* `config.transactions`. Only returns oplog entries in the chain where
|
||||
* `ts` < `startFetchingOpTime.ts` and adds them to the oplog buffer.
|
||||
*/
|
||||
SemiFuture<void> _fetchRetryableWritesOplogBeforeStartOpTime();
|
||||
|
||||
/**
|
||||
* Migrates committed transactions entries into 'config.transactions'.
|
||||
*/
|
||||
SemiFuture<void> _fetchCommittedTransactionsBeforeStartOpTime();
|
||||
|
||||
/**
|
||||
* Opens and returns a cursor for all entries with 'lastWriteOpTime' <=
|
||||
* 'startApplyingDonorOpTime' and state 'committed'.
|
||||
*/
|
||||
std::unique_ptr<DBClientCursor> _openCommittedTransactionsFindCursor();
|
||||
|
||||
/**
|
||||
* Creates an aggregation pipeline to fetch transaction entries with 'lastWriteOpTime' <
|
||||
* 'startFetchingDonorOpTime' and 'state: committed'.
|
||||
*/
|
||||
AggregateCommandRequest _makeCommittedTransactionsAggregation() const;
|
||||
|
||||
/**
|
||||
* Processes a committed transaction entry from the donor. Updates the recipient's
|
||||
* 'config.transactions' collection with the entry and writes a no-op entry for the
|
||||
* recipient secondaries to replicate the entry.
|
||||
*/
|
||||
void _processCommittedTransactionEntry(const BSONObj& entry);
|
||||
|
||||
/**
|
||||
* Starts the oplog buffer only if the node is primary. Otherwise, throw error.
|
||||
*/
|
||||
void _startOplogBuffer(OperationContext* opCtx);
|
||||
|
||||
/**
|
||||
* Starts the tenant oplog fetcher.
|
||||
*/
|
||||
void _startOplogFetcher();
|
||||
|
||||
/**
|
||||
* Called when the oplog fetcher finishes. Usually the oplog fetcher finishes only when
|
||||
* cancelled or on error.
|
||||
*/
|
||||
void _oplogFetcherCallback(Status oplogFetcherStatus);
|
||||
|
||||
/**
|
||||
* Starts the tenant oplog applier.
|
||||
*/
|
||||
void _startOplogApplier();
|
||||
|
||||
/**
|
||||
* Waits for tenant oplog applier to stop.
|
||||
*/
|
||||
SemiFuture<TenantOplogApplier::OpTimePair> _waitForMigrationToComplete();
|
||||
|
||||
/**
|
||||
* Advances the majority commit timestamp to be >= donor's backup cursor checkpoint
|
||||
* timestamp(CkptTs) by:
|
||||
* 1. Advancing the clusterTime to CkptTs.
|
||||
* 2. Writing a no-op oplog entry with ts > CkptTs
|
||||
* 3. Waiting for the majority commit timestamp to be the time of the no-op write.
|
||||
*
|
||||
* Notes: This method should be called before transitioning the instance state to
|
||||
* 'kLearnedFilenames' which causes donor collections to get imported. Current import rule
|
||||
* is that the import table's checkpoint timestamp can't be later than the recipient's
|
||||
* stable timestamp. Due to the fact, we don't have a mechanism to wait until a specific
|
||||
* stable timestamp on a given node or set of nodes in the replica set and the majority
|
||||
* commit point and stable timestamp aren't atomically updated, advancing the majority
|
||||
* commit point on the recipient before import collection stage is a best-effort attempt to
|
||||
* prevent import retry attempts on import timestamp rule violation.
|
||||
*/
|
||||
SemiFuture<void> _advanceMajorityCommitTsToBkpCursorCheckpointTs(
|
||||
const CancellationToken& token);
|
||||
|
||||
/**
|
||||
* Returns a future that will be fulfilled when the tenant migration reaches consistent
|
||||
* state.
|
||||
*/
|
||||
SemiFuture<void> _getDataConsistentFuture();
|
||||
|
||||
/**
|
||||
* Transitions the instance state to 'kLearnedFilenames' after learning all filenames to be
|
||||
* imported.
|
||||
*/
|
||||
SemiFuture<void> _enterLearnedFilenamesState();
|
||||
|
||||
/**
|
||||
* Durably persist that migration has reached consistent state and signal waiters.
|
||||
*/
|
||||
SemiFuture<void> _enterConsistentState();
|
||||
SemiFuture<void> _durablyPersistConsistentState();
|
||||
|
||||
/**
|
||||
* Gets the migration interrupt status. Answers may change after this call as it reads the
|
||||
* interrupt status without holding mutex lock. It's the caller's responsibility to decide
|
||||
* if they need to hold mutex lock or not before calling the method.
|
||||
*/
|
||||
Status _getInterruptStatus() const;
|
||||
|
||||
/**
|
||||
* Cancels all remaining work in the migration.
|
||||
*/
|
||||
void _cancelRemainingWork(WithLock lk, Status status);
|
||||
|
||||
/**
|
||||
* Performs some cleanup work on migration completion, like, shutting down the components or
|
||||
* fulfilling any instance promises.
|
||||
*/
|
||||
void _cleanupOnMigrationCompletion(Status status);
|
||||
|
||||
/**
|
||||
* Suppresses selecting 'host' as the donor sync source, until 'until'.
|
||||
*/
|
||||
void _excludeDonorHost(WithLock, const HostAndPort& host, Date_t until);
|
||||
|
||||
/**
|
||||
* Returns a vector of currently excluded donor hosts. Also removes hosts from the list of
|
||||
* excluded donor nodes, if the exclude duration has expired.
|
||||
*/
|
||||
std::vector<HostAndPort> _getExcludedDonorHosts(WithLock);
|
||||
|
||||
/**
|
||||
* Makes the failpoint stop or hang the migration based on failpoint data "action" field.
|
||||
* If "action" is "hang" and 'opCtx' is not null, the failpoint will be interruptible.
|
||||
*/
|
||||
void _stopOrHangOnFailPoint(FailPoint* fp, OperationContext* opCtx = nullptr);
|
||||
|
||||
/**
|
||||
* Updates the shard merge recipient state doc and waits for that change to be
|
||||
* propagated to a majority.
|
||||
*/
|
||||
SemiFuture<void> _updateStateDocForMajority(WithLock lk);
|
||||
|
||||
/**
|
||||
* Updates the shard merge recipient state doc. Throws error if it fails to
|
||||
* update.
|
||||
*/
|
||||
void _updateStateDoc(OperationContext* opCtx, const ShardMergeRecipientDocument& stateDoc);
|
||||
|
||||
/**
|
||||
* Returns the majority OpTime on the donor node that 'client' is connected to.
|
||||
*/
|
||||
OpTime _getDonorMajorityOpTime(std::unique_ptr<mongo::DBClientConnection>& client);
|
||||
|
||||
mutable stdx::mutex _mutex;
|
||||
|
||||
// All member variables are labeled with one of the following codes indicating the
|
||||
// synchronization rules for accessing them.
|
||||
//
|
||||
// (R) Read-only in concurrent operation; no synchronization required.
|
||||
// (S) Self-synchronizing; access according to class's own rules.
|
||||
// (M) Reads and writes guarded by _mutex.
|
||||
// (W) Synchronization required only for writes.
|
||||
|
||||
ServiceContext* const _serviceContext;
|
||||
const ShardMergeRecipientService* const _recipientService; // (R) (not owned)
|
||||
std::shared_ptr<executor::ScopedTaskExecutor> _scopedExecutor; // (M)
|
||||
std::shared_ptr<executor::TaskExecutor> _backupCursorExecutor; // (M)
|
||||
ShardMergeRecipientDocument _stateDoc; // (M)
|
||||
|
||||
// This data is provided in the initial state doc and never changes. We keep copies to
|
||||
// avoid having to obtain the mutex to access them.
|
||||
const std::vector<TenantId> _tenantIds; // (R)
|
||||
const UUID _migrationUuid; // (R)
|
||||
const std::string _donorConnectionString; // (R)
|
||||
const MongoURI _donorUri; // (R)
|
||||
const ReadPreferenceSetting _readPreference; // (R)
|
||||
|
||||
std::shared_ptr<ReplicaSetMonitor> _donorReplicaSetMonitor; // (M)
|
||||
|
||||
// Members of the donor replica set that we have excluded as a potential sync source for
|
||||
// some period of time.
|
||||
std::vector<std::pair<HostAndPort, Date_t>> _excludedDonorHosts; // (M)
|
||||
|
||||
// The '_client' will be used for other operations such as fetching
|
||||
// optimes while the '_oplogFetcherClient' will be reserved for the oplog fetcher only.
|
||||
// Because the oplog fetcher uses exhaust, we need a dedicated connection for oplog fetcher.
|
||||
//
|
||||
// Follow DBClientCursor synchonization rules.
|
||||
std::unique_ptr<DBClientConnection> _client; // (S)
|
||||
std::unique_ptr<DBClientConnection> _oplogFetcherClient; // (S)
|
||||
|
||||
std::unique_ptr<Fetcher> _donorFilenameBackupCursorFileFetcher; // (M)
|
||||
CancellationSource _backupCursorKeepAliveCancellation = {}; // (X)
|
||||
boost::optional<SemiFuture<void>> _backupCursorKeepAliveFuture; // (M)
|
||||
|
||||
std::unique_ptr<OplogFetcherFactory> _createOplogFetcherFn =
|
||||
std::make_unique<CreateOplogFetcherFn>(); // (M)
|
||||
std::unique_ptr<OplogBufferCollection> _donorOplogBuffer; // (M)
|
||||
std::unique_ptr<DataReplicatorExternalState> _dataReplicatorExternalState; // (M)
|
||||
std::unique_ptr<OplogFetcher> _donorOplogFetcher; // (M)
|
||||
std::shared_ptr<TenantOplogApplier> _tenantOplogApplier; // (M)
|
||||
|
||||
// Writer pool to do storage write operation. Used by tenant collection cloner and by
|
||||
// tenant oplog applier.
|
||||
std::unique_ptr<ThreadPool> _workerPool; //(M)
|
||||
// Data shared by cloners. Follow TenantMigrationSharedData synchronization rules.
|
||||
std::unique_ptr<TenantMigrationSharedData> _sharedData; // (S)
|
||||
|
||||
// Promise that is resolved when all voting data-bearing recipient nodes have successfully
|
||||
// imported all donor files.
|
||||
SharedPromise<void> _importQuorumPromise; // (W)
|
||||
// Whether we are waiting for members to import donor files.
|
||||
bool _waitingForMembersToImportFiles = true;
|
||||
// Which members have imported all donor files.
|
||||
stdx::unordered_set<HostAndPort> _membersWhoHaveImportedFiles;
|
||||
|
||||
// Promise that is resolved when the migration reached consistent point.
|
||||
SharedPromise<OpTime> _dataConsistentPromise; // (W)
|
||||
// Promise that is resolved when migration is completed.
|
||||
SharedPromise<void> _migrationCompletionPromise; // (W)
|
||||
// Promise that is resolved when the recipientForgetMigration command is received or on
|
||||
// stepDown/shutDown with errors.
|
||||
SharedPromise<MigrationDecisionEnum> _receivedRecipientForgetMigrationPromise; // (W)
|
||||
// Promise that is resolved when the instance has been durably marked garbage collectable.
|
||||
SharedPromise<void> _forgetMigrationDurablePromise; // (W)
|
||||
// Promise that is resolved with when the instance is interrupted, and holds interrupt error
|
||||
// status.
|
||||
SharedPromise<void> _interruptPromise; // (M)
|
||||
|
||||
// Waiters are notified when 'tenantOplogApplier' is valid on restart.
|
||||
stdx::condition_variable _restartOplogApplierCondVar; // (M)
|
||||
// Waiters are notified when 'tenantOplogApplier' is ready to use.
|
||||
stdx::condition_variable _oplogApplierReadyCondVar; // (M)
|
||||
// Indicates whether 'tenantOplogApplier' is ready to use or not.
|
||||
bool _oplogApplierReady = false; // (M)
|
||||
};
|
||||
|
||||
private:
|
||||
/**
|
||||
* Creates the state document collection.
|
||||
*/
|
||||
ExecutorFuture<void> _rebuildService(std::shared_ptr<executor::ScopedTaskExecutor> executor,
|
||||
const CancellationToken& token) override;
|
||||
|
||||
ServiceContext* const _serviceContext;
|
||||
};
|
||||
} // namespace repl
|
||||
} // namespace mongo
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -1,946 +0,0 @@
|
|||
/**
|
||||
* Copyright (C) 2022-present MongoDB, Inc.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the Server Side Public License, version 1,
|
||||
* as published by MongoDB, Inc.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* Server Side Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the Server Side Public License
|
||||
* along with this program. If not, see
|
||||
* <http://www.mongodb.com/licensing/server-side-public-license>.
|
||||
*
|
||||
* As a special exception, the copyright holders give permission to link the
|
||||
* code of portions of this program with the OpenSSL library under certain
|
||||
* conditions as described in each individual source file and distribute
|
||||
* linked combinations including the program with the OpenSSL library. You
|
||||
* must comply with the Server Side Public License in all respects for
|
||||
* all of the code used other than as permitted herein. If you modify file(s)
|
||||
* with this exception, you may extend this exception to your version of the
|
||||
* file(s), but you are not obligated to do so. If you do not wish to do so,
|
||||
* delete this exception statement from your version. If you delete this
|
||||
* exception statement from all source files in the program, then also delete
|
||||
* it in the license file.
|
||||
*/
|
||||
|
||||
|
||||
#include "mongo/db/repl/tenant_file_importer_service.h"
|
||||
|
||||
#include <boost/none.hpp>
|
||||
#include <boost/optional.hpp>
|
||||
#include <boost/optional/optional.hpp>
|
||||
#include <fmt/format.h>
|
||||
#include <mutex>
|
||||
#include <utility>
|
||||
|
||||
#include <boost/move/utility_core.hpp>
|
||||
|
||||
#include "mongo/base/error_codes.h"
|
||||
#include "mongo/base/status.h"
|
||||
#include "mongo/bson/bsonelement.h"
|
||||
#include "mongo/db/catalog/import_options.h"
|
||||
#include "mongo/db/catalog_raii.h"
|
||||
#include "mongo/db/client.h"
|
||||
#include "mongo/db/commands/tenant_migration_recipient_cmds_gen.h"
|
||||
#include "mongo/db/concurrency/exception_util.h"
|
||||
#include "mongo/db/database_name.h"
|
||||
#include "mongo/db/db_raii.h"
|
||||
#include "mongo/db/op_observer/op_observer.h"
|
||||
#include "mongo/db/profile_settings.h"
|
||||
#include "mongo/db/repl/oplog_applier.h"
|
||||
#include "mongo/db/repl/repl_server_parameters_gen.h"
|
||||
#include "mongo/db/repl/replication_auth.h"
|
||||
#include "mongo/db/repl/replication_coordinator.h"
|
||||
#include "mongo/db/repl/tenant_migration_shard_merge_util.h"
|
||||
#include "mongo/db/repl/tenant_migration_shared_data.h"
|
||||
#include "mongo/db/service_context.h"
|
||||
#include "mongo/db/storage/durable_catalog.h"
|
||||
#include "mongo/db/storage/recovery_unit.h"
|
||||
#include "mongo/db/storage/storage_file_util.h"
|
||||
#include "mongo/db/storage/wiredtiger/wiredtiger_import.h"
|
||||
#include "mongo/db/transaction_resources.h"
|
||||
#include "mongo/executor/task_executor.h"
|
||||
#include "mongo/idl/cluster_parameter_synchronization_helpers.h"
|
||||
#include "mongo/logv2/log.h"
|
||||
#include "mongo/logv2/log_attr.h"
|
||||
#include "mongo/logv2/log_component.h"
|
||||
#include "mongo/rpc/get_status_from_command_result.h"
|
||||
#include "mongo/util/decorable.h"
|
||||
#include "mongo/util/fail_point.h"
|
||||
#include "mongo/util/net/hostandport.h"
|
||||
#include "mongo/util/net/ssl_options.h"
|
||||
#include "mongo/util/scopeguard.h"
|
||||
#include "mongo/util/str.h"
|
||||
|
||||
|
||||
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTenantMigration
|
||||
|
||||
MONGO_FAIL_POINT_DEFINE(hangBeforeFileImporterThreadExit);
|
||||
MONGO_FAIL_POINT_DEFINE(skipCloneFiles);
|
||||
MONGO_FAIL_POINT_DEFINE(hangBeforeVoteImportedFiles);
|
||||
MONGO_FAIL_POINT_DEFINE(skipImportFiles);
|
||||
MONGO_FAIL_POINT_DEFINE(hangBeforeImportingFiles);
|
||||
|
||||
namespace mongo::repl {
|
||||
|
||||
using namespace fmt::literals;
|
||||
using namespace shard_merge_utils;
|
||||
|
||||
namespace {
|
||||
const auto _TenantFileImporterService =
|
||||
ServiceContext::declareDecoration<TenantFileImporterService>();
|
||||
|
||||
const ReplicaSetAwareServiceRegistry::Registerer<TenantFileImporterService>
|
||||
_TenantFileImporterServiceRegisterer("TenantFileImporterService");
|
||||
|
||||
template <class Promise>
|
||||
void setPromiseOkifNotReady(WithLock lk, Promise& promise) {
|
||||
if (promise.getFuture().isReady()) {
|
||||
return;
|
||||
}
|
||||
|
||||
promise.emplaceValue();
|
||||
}
|
||||
|
||||
/**
|
||||
* Connect to the donor source and uses the default authentication mode.
|
||||
*/
|
||||
void connectAndAuth(const HostAndPort& source, DBClientConnection* client) {
|
||||
client->connect(source, "TenantFileImporterService", boost::none);
|
||||
uassertStatusOK(replAuthenticate(client).withContext(
|
||||
str::stream() << "TenantFileImporterService failed to authenticate to " << source));
|
||||
}
|
||||
|
||||
void buildStorageMetadata(const WTimportArgs& importArgs, BSONObjBuilder& bob) {
|
||||
bob << importArgs.ident
|
||||
<< BSON("tableMetadata" << importArgs.tableMetadata << "fileMetadata"
|
||||
<< importArgs.fileMetadata);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a new ident and move the file.
|
||||
* Performs an fsync on the destination file and the parent directories of both 'srcFilePath' and
|
||||
* 'destFilePath'.
|
||||
*/
|
||||
std::string fsyncMoveWithNewIdent(OperationContext* opCtx,
|
||||
const boost::filesystem::path& tempWTDirectory,
|
||||
const mongo::NamespaceString& metadataNS,
|
||||
const std::string& oldIdent,
|
||||
const char* kind,
|
||||
std::vector<boost::filesystem::path>& movedFiles) {
|
||||
auto srcFilePath = constructSourcePath(tempWTDirectory, oldIdent);
|
||||
|
||||
while (true) {
|
||||
try {
|
||||
auto newIdent = DurableCatalog::get(opCtx)->generateUniqueIdent(metadataNS, kind);
|
||||
auto destFilePath = constructDestinationPath(newIdent);
|
||||
|
||||
LOGV2_DEBUG(6114304,
|
||||
1,
|
||||
"Moving file",
|
||||
"from"_attr = srcFilePath.string(),
|
||||
"to"_attr = destFilePath.string());
|
||||
|
||||
uassert(6114401,
|
||||
"Destination file '{}' already exists"_format(destFilePath.string()),
|
||||
!boost::filesystem::exists(destFilePath));
|
||||
|
||||
writeMovingFilesMarker(
|
||||
tempWTDirectory, newIdent, (strcmp(kind, "collection") == 0 ? true : false));
|
||||
|
||||
uassertStatusOK(fsyncRename(srcFilePath, destFilePath)
|
||||
.withContext(str::stream()
|
||||
<< "Failed to move file from: " << srcFilePath.string()
|
||||
<< " to: " << destFilePath.string()));
|
||||
|
||||
// Note the list of files to be cleaned in case of failure to import collection and it's
|
||||
// indexes.
|
||||
movedFiles.emplace_back(std::move(destFilePath));
|
||||
|
||||
return newIdent;
|
||||
} catch (const DBException& ex) {
|
||||
// Retry move on "destination file already exists" error. This can happen due to
|
||||
// ident collision between this import and another parallel import via
|
||||
// importCollection command.
|
||||
if (ex.code() == 6114401) {
|
||||
LOGV2(7199801,
|
||||
"Failed to move file from temp to active WT directory. Retrying "
|
||||
"the move operation using another new unique ident.",
|
||||
"error"_attr = redact(ex.toStatus()));
|
||||
continue;
|
||||
}
|
||||
throw;
|
||||
}
|
||||
}
|
||||
MONGO_UNREACHABLE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Import the collection and its indexes into the main wiredTiger instance.
|
||||
*/
|
||||
void importCollectionAndItsIndexesInMainWTInstance(OperationContext* opCtx,
|
||||
const CollectionImportMetadata& metadata,
|
||||
const UUID& migrationId,
|
||||
const BSONObj& storageMetaObj) {
|
||||
const auto nss = metadata.ns;
|
||||
writeConflictRetry(opCtx, "importCollection", nss, [&] {
|
||||
LOGV2_DEBUG(6114303, 1, "Importing donor collection", "ns"_attr = nss);
|
||||
AutoGetDb autoDb(opCtx, nss.dbName(), MODE_IX);
|
||||
auto db = autoDb.ensureDbExists(opCtx);
|
||||
invariant(db);
|
||||
Lock::CollectionLock collLock(opCtx, nss, MODE_X);
|
||||
auto& dbProfileSettings = DatabaseProfileSettings::get(opCtx->getServiceContext());
|
||||
WriteUnitOfWork wunit(opCtx);
|
||||
AutoStatsTracker statsTracker(opCtx,
|
||||
nss,
|
||||
Top::LockType::NotLocked,
|
||||
AutoStatsTracker::LogMode::kUpdateTopAndCurOp,
|
||||
dbProfileSettings.getDatabaseProfileLevel(nss.dbName()));
|
||||
|
||||
// If the collection creation rolls back, ensure that the Top entry created for the
|
||||
// collection is deleted.
|
||||
shard_role_details::getRecoveryUnit(opCtx)->onRollback(
|
||||
[nss, serviceContext = opCtx->getServiceContext()](OperationContext*) {
|
||||
Top::get(serviceContext).collectionDropped(nss);
|
||||
});
|
||||
|
||||
uassert(ErrorCodes::NamespaceExists,
|
||||
str::stream() << "Collection already exists. NS: " << nss.toStringForErrorMsg(),
|
||||
!CollectionCatalog::get(opCtx)->lookupCollectionByNamespace(opCtx, nss));
|
||||
|
||||
// Create Collection object.
|
||||
auto storageEngine = opCtx->getServiceContext()->getStorageEngine();
|
||||
auto durableCatalog = storageEngine->getCatalog();
|
||||
ImportOptions importOptions(ImportOptions::ImportCollectionUUIDOption::kKeepOld);
|
||||
importOptions.importTimestampRule = ImportOptions::ImportTimestampRule::kStable;
|
||||
// Since we are using the ident id generated by this recipient node, ident collisions in
|
||||
// the future after import is not possible. So, it's ok to skip the ident collision
|
||||
// check. Otherwise, we would unnecessarily generate new rand after each collection
|
||||
// import.
|
||||
importOptions.skipIdentCollisionCheck = true;
|
||||
|
||||
auto importResult = uassertStatusOK(DurableCatalog::get(opCtx)->importCollection(
|
||||
opCtx, nss, metadata.catalogObject, storageMetaObj, importOptions));
|
||||
|
||||
const auto catalogEntry =
|
||||
durableCatalog->getParsedCatalogEntry(opCtx, importResult.catalogId);
|
||||
const auto md = catalogEntry->metadata;
|
||||
for (const auto& index : md->indexes) {
|
||||
uassert(6114301, "Cannot import non-ready indexes", index.ready);
|
||||
}
|
||||
|
||||
std::shared_ptr<Collection> ownedCollection = Collection::Factory::get(opCtx)->make(
|
||||
opCtx, nss, importResult.catalogId, md, std::move(importResult.rs));
|
||||
ownedCollection->init(opCtx);
|
||||
historicalIDTrackerAllowsMixedModeWrites(ownedCollection->getSharedDecorations())
|
||||
.store(true);
|
||||
|
||||
// Update the number of records and data size on commit.
|
||||
shard_role_details::getRecoveryUnit(opCtx)->registerChange(
|
||||
makeCountsChange(ownedCollection->getRecordStore(), metadata));
|
||||
|
||||
CollectionCatalog::get(opCtx)->onCreateCollection(opCtx, ownedCollection);
|
||||
|
||||
auto importedCatalogEntry =
|
||||
storageEngine->getCatalog()->getCatalogEntry(opCtx, importResult.catalogId);
|
||||
opCtx->getServiceContext()->getOpObserver()->onImportCollection(opCtx,
|
||||
migrationId,
|
||||
nss,
|
||||
metadata.numRecords,
|
||||
metadata.dataSize,
|
||||
importedCatalogEntry,
|
||||
storageMetaObj,
|
||||
/*dryRun=*/false);
|
||||
|
||||
wunit.commit();
|
||||
|
||||
if (metadata.numRecords > 0 &&
|
||||
nss == NamespaceString::makeClusterParametersNSS(nss.tenantId())) {
|
||||
cluster_parameters::initializeAllTenantParametersFromCollection(opCtx,
|
||||
*ownedCollection);
|
||||
}
|
||||
|
||||
LOGV2(6114300,
|
||||
"Imported donor collection",
|
||||
"ns"_attr = nss,
|
||||
"numRecordsApprox"_attr = metadata.numRecords,
|
||||
"dataSizeApprox"_attr = metadata.dataSize);
|
||||
});
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TenantFileImporterService* TenantFileImporterService::get(ServiceContext* serviceContext) {
|
||||
return &_TenantFileImporterService(serviceContext);
|
||||
}
|
||||
|
||||
TenantFileImporterService* TenantFileImporterService::get(OperationContext* opCtx) {
|
||||
return get(opCtx->getServiceContext());
|
||||
}
|
||||
|
||||
TenantFileImporterService::TenantFileImporterService()
|
||||
: _createConnectionFn(
|
||||
[]() { return std::make_unique<DBClientConnection>(true /* autoReconnect */); }) {}
|
||||
|
||||
TenantFileImporterService::MigrationHandle::MigrationHandle(const UUID& migrationId,
|
||||
const OpTime& startMigrationOpTime)
|
||||
: migrationId(migrationId),
|
||||
startMigrationOpTime(startMigrationOpTime),
|
||||
eventQueue(std::make_unique<Queue>()),
|
||||
workerPool(
|
||||
makeReplWorkerPool(tenantApplierThreadCount, "TenantFileImporterServiceWriter"_sd)),
|
||||
sharedData(std::make_unique<TenantMigrationSharedData>(
|
||||
getGlobalServiceContext()->getFastClockSource(), migrationId)) {
|
||||
stats.fileCopyStart = Date_t::now();
|
||||
}
|
||||
|
||||
void TenantFileImporterService::_makeMigrationHandleIfNotPresent(
|
||||
WithLock, const UUID& migrationId, const OpTime& startMigrationOpTime) {
|
||||
if (_mh)
|
||||
return;
|
||||
_mh = std::make_unique<MigrationHandle>(migrationId, startMigrationOpTime);
|
||||
}
|
||||
|
||||
void TenantFileImporterService::startMigration(const UUID& migrationId,
|
||||
const OpTime& startMigrationOpTime) {
|
||||
stdx::lock_guard lk(_mutex);
|
||||
if (_isShuttingDown) {
|
||||
LOGV2_DEBUG(6690701,
|
||||
3,
|
||||
"TenantFileImporterService:: Not starting migration due to shutdown",
|
||||
"migrationId"_attr = migrationId);
|
||||
return;
|
||||
}
|
||||
|
||||
_makeMigrationHandleIfNotPresent(lk, migrationId, startMigrationOpTime);
|
||||
auto prevState = _transitionToState(lk, migrationId, State::kStarted);
|
||||
if (prevState == State::kStarted)
|
||||
return;
|
||||
|
||||
_mh->workerThread = std::make_unique<stdx::thread>([this, migrationId, startMigrationOpTime] {
|
||||
Client::initThread("TenantFileImporterService",
|
||||
getGlobalServiceContext()->getService(ClusterRole::ShardServer));
|
||||
LOGV2_INFO(6378904,
|
||||
"TenantFileImporterService worker thread started",
|
||||
"migrationId"_attr = migrationId,
|
||||
"startMigrationOpTime"_attr = startMigrationOpTime);
|
||||
|
||||
{
|
||||
stdx::lock_guard<Client> lk(cc());
|
||||
cc().setSystemOperationUnkillableByStepdown(lk);
|
||||
}
|
||||
|
||||
try {
|
||||
_handleEvents(migrationId);
|
||||
} catch (...) {
|
||||
LOGV2_ERROR(6615001,
|
||||
"TenantFileImporterService::_handleEvents encountered an error",
|
||||
"migrationId"_attr = migrationId,
|
||||
"error"_attr = redact(exceptionToStatus()));
|
||||
}
|
||||
|
||||
LOGV2_INFO(7800203,
|
||||
"TenantFileImporterService worker thread exiting",
|
||||
"migrationId"_attr = migrationId);
|
||||
hangBeforeFileImporterThreadExit.pauseWhileSet();
|
||||
});
|
||||
}
|
||||
|
||||
void TenantFileImporterService::learnedFilename(const UUID& migrationId,
|
||||
const BSONObj& metadataDoc) {
|
||||
stdx::lock_guard lk(_mutex);
|
||||
// Migration handle can be empty only if the node restarts,rolls back, or resyncs while a shard
|
||||
// merge is in progress.
|
||||
if (!_mh) {
|
||||
LOGV2_DEBUG(7800204,
|
||||
3,
|
||||
"TenantFileImporterService:: Skipping learned filename",
|
||||
"migrationId"_attr = migrationId,
|
||||
"filename"_attr = metadataDoc["filename"]);
|
||||
return;
|
||||
}
|
||||
|
||||
(void)_transitionToState(lk, migrationId, State::kLearnedFilename);
|
||||
_mh->stats.totalDataSize += std::max(0ll, metadataDoc["fileSize"].safeNumberLong());
|
||||
|
||||
ImporterEvent event{ImporterEvent::Type::kLearnedFileName, migrationId};
|
||||
event.metadataDoc = metadataDoc.getOwned();
|
||||
auto success = _mh->eventQueue->tryPush(std::move(event));
|
||||
|
||||
uassert(
|
||||
6378903,
|
||||
"TenantFileImporterService failed to push '{}' event without blocking for migrationId :{}"_format(
|
||||
stateToString(_mh->state), migrationId.toString()),
|
||||
success);
|
||||
}
|
||||
|
||||
void TenantFileImporterService::learnedAllFilenames(const UUID& migrationId) {
|
||||
stdx::lock_guard lk(_mutex);
|
||||
// Migration handle can be empty only if the node restarts,rolls back, or resyncs while a shard
|
||||
// merge is in progress.
|
||||
if (!_mh) {
|
||||
LOGV2_DEBUG(7800205,
|
||||
3,
|
||||
"TenantFileImporterService:: Skipping learned all filenames",
|
||||
"migrationId"_attr = migrationId);
|
||||
return;
|
||||
}
|
||||
|
||||
auto prevState = _transitionToState(lk, migrationId, State::kLearnedAllFilenames);
|
||||
if (prevState == State::kLearnedAllFilenames)
|
||||
return;
|
||||
|
||||
auto success =
|
||||
_mh->eventQueue->tryPush({ImporterEvent::Type::kLearnedAllFilenames, migrationId});
|
||||
uassert(
|
||||
6378902,
|
||||
"TenantFileImporterService failed to push '{}' event without blocking for migrationId :{}"_format(
|
||||
stateToString(_mh->state), migrationId.toString()),
|
||||
success);
|
||||
}
|
||||
|
||||
void TenantFileImporterService::interruptMigration(const UUID& migrationId) {
|
||||
stdx::lock_guard lk(_mutex);
|
||||
// Migration handle can be empty only if the node restarts,rolls back, or resyncs while a shard
|
||||
// merge is in progress.
|
||||
if (!_mh) {
|
||||
LOGV2_DEBUG(7800206,
|
||||
3,
|
||||
"TenantFileImporterService:: Skipping interrupting migration",
|
||||
"migrationId"_attr = migrationId);
|
||||
return;
|
||||
}
|
||||
_interrupt(lk, migrationId);
|
||||
}
|
||||
|
||||
void TenantFileImporterService::resetMigration(const UUID& migrationId) {
|
||||
_resetMigrationHandle(migrationId);
|
||||
}
|
||||
|
||||
void TenantFileImporterService::interruptAll() {
|
||||
stdx::lock_guard lk(_mutex);
|
||||
if (!_mh) {
|
||||
return;
|
||||
}
|
||||
_interrupt(lk, _mh->migrationId);
|
||||
}
|
||||
|
||||
void TenantFileImporterService::_handleEvents(const UUID& migrationId) {
|
||||
auto uniqueOpCtx = cc().makeOperationContext();
|
||||
OperationContext* opCtx = uniqueOpCtx.get();
|
||||
|
||||
std::unique_ptr<DBClientConnection> donorConnection;
|
||||
Queue* eventQueue;
|
||||
ThreadPool* workerPool;
|
||||
TenantMigrationSharedData* sharedData;
|
||||
|
||||
ON_BLOCK_EXIT([this, opId = opCtx->getOpID(), &migrationId] {
|
||||
stdx::lock_guard lk(_mutex);
|
||||
invariant(_mh && migrationId == _mh->migrationId);
|
||||
|
||||
_mh->stats.fileCopyEnd = Date_t::now();
|
||||
|
||||
_mh->opCtx = nullptr;
|
||||
_mh->donorConnection = nullptr;
|
||||
});
|
||||
|
||||
{
|
||||
stdx::lock_guard lk(_mutex);
|
||||
invariant(_mh && migrationId == _mh->migrationId);
|
||||
uassert(ErrorCodes::Interrupted,
|
||||
str::stream() << "TenantFileImporterService was interrupted for migrationId:\""
|
||||
<< migrationId << "\"",
|
||||
_mh->state < State::kInterrupted);
|
||||
_mh->opCtx = opCtx;
|
||||
|
||||
eventQueue = _mh->eventQueue.get();
|
||||
workerPool = _mh->workerPool.get();
|
||||
sharedData = _mh->sharedData.get();
|
||||
}
|
||||
|
||||
auto setUpDonorConnectionIfNeeded = [&](const BSONObj& metadataDoc) {
|
||||
// Return early if we have already set up the donor connection.
|
||||
if (donorConnection) {
|
||||
return;
|
||||
}
|
||||
|
||||
donorConnection = _createConnectionFn();
|
||||
auto source = HostAndPort::parseThrowing(metadataDoc[kDonorHostNameFieldName].str());
|
||||
connectAndAuth(source, donorConnection.get());
|
||||
|
||||
stdx::lock_guard lk(_mutex);
|
||||
invariant(_mh && migrationId == _mh->migrationId);
|
||||
uassert(ErrorCodes::Interrupted,
|
||||
str::stream() << "TenantFileImporterService was interrupted for migrationId=\""
|
||||
<< migrationId << "\"",
|
||||
_mh->state < State::kInterrupted);
|
||||
_mh->donorConnection = donorConnection.get();
|
||||
};
|
||||
|
||||
using eventType = ImporterEvent::Type;
|
||||
while (true) {
|
||||
opCtx->checkForInterrupt();
|
||||
|
||||
auto event = eventQueue->pop(opCtx);
|
||||
|
||||
// Out-of-order events for a different migration are not permitted.
|
||||
invariant(event.migrationId == migrationId);
|
||||
|
||||
switch (event.type) {
|
||||
case eventType::kNone:
|
||||
continue;
|
||||
case eventType::kLearnedFileName: {
|
||||
// We won't have valid donor metadata until the first
|
||||
// 'TenantFileImporterService::learnedFilename' call, so we need to set up the
|
||||
// connection for the first kLearnedFileName event.
|
||||
setUpDonorConnectionIfNeeded(event.metadataDoc);
|
||||
|
||||
_cloneFile(opCtx,
|
||||
migrationId,
|
||||
donorConnection.get(),
|
||||
workerPool,
|
||||
sharedData,
|
||||
event.metadataDoc);
|
||||
continue;
|
||||
}
|
||||
case eventType::kLearnedAllFilenames: {
|
||||
if (MONGO_unlikely(hangBeforeImportingFiles.shouldFail())) {
|
||||
LOGV2(8101400, "'hangBeforeImportingFiles' failpoint enabled");
|
||||
hangBeforeImportingFiles.pauseWhileSet();
|
||||
}
|
||||
|
||||
// This step prevents accidental deletion of committed donor data during startup and
|
||||
// rollback recovery.
|
||||
//
|
||||
// For example, if a migration was initially aborted and retried
|
||||
// successfully, a node restart or rollback could risk deleting committed donor data
|
||||
// during oplog replay if recovery/stable timestamp < failed migration's
|
||||
// abortOpTime. To prevent this data corruption case, a barrier is created by
|
||||
// checkpointing the startMigrationTimestamp before importing collection for the
|
||||
// ongoing migration attempt. This prevents startup/rollback recovery from
|
||||
// replaying oplog entries from various migration attempts.
|
||||
//
|
||||
// Note: Since StartMigrationTimestamp is majority committed (given that all
|
||||
// recipient state document writes are majority committed by the recipient state
|
||||
// machine), it's safe to await its checkpointing without requiring a no-op write.
|
||||
_waitUntilStartMigrationTimestampIsCheckpointed(opCtx, migrationId);
|
||||
|
||||
_runRollbackAndThenImportFiles(opCtx, migrationId);
|
||||
createImportDoneMarkerLocalCollection(opCtx, migrationId);
|
||||
// Take a stable checkpoint to persist both the imported donor collections and the
|
||||
// marker collection to disk.
|
||||
opCtx->getServiceContext()->getStorageEngine()->waitUntilUnjournaledWritesDurable(
|
||||
opCtx,
|
||||
/*stableCheckpoint*/ true);
|
||||
_voteImportedFiles(opCtx, migrationId);
|
||||
return;
|
||||
}
|
||||
}
|
||||
MONGO_UNREACHABLE;
|
||||
}
|
||||
}
|
||||
|
||||
void TenantFileImporterService::_cloneFile(OperationContext* opCtx,
|
||||
const UUID& migrationId,
|
||||
DBClientConnection* clientConnection,
|
||||
ThreadPool* workerPool,
|
||||
TenantMigrationSharedData* sharedData,
|
||||
const BSONObj& metadataDoc) {
|
||||
if (MONGO_unlikely(skipCloneFiles.shouldFail())) {
|
||||
LOGV2(7800201,
|
||||
"Skipping file cloning due to 'skipCloneFiles' failpoint enabled",
|
||||
"migrationId"_attr = migrationId);
|
||||
return;
|
||||
}
|
||||
|
||||
const auto fileName = metadataDoc["filename"].str();
|
||||
const auto backupId = UUID(uassertStatusOK(UUID::parse(metadataDoc[kBackupIdFieldName])));
|
||||
const auto remoteDbpath = metadataDoc["remoteDbpath"].str();
|
||||
const size_t fileSize = std::max(0ll, metadataDoc["fileSize"].safeNumberLong());
|
||||
const auto relativePath =
|
||||
boost::filesystem::relative(fileName, metadataDoc[kDonorDbPathFieldName].str()).string();
|
||||
LOGV2_DEBUG(6113320,
|
||||
1,
|
||||
"Cloning file",
|
||||
"migrationId"_attr = migrationId,
|
||||
"metadata"_attr = metadataDoc,
|
||||
"destinationRelativePath"_attr = relativePath);
|
||||
invariant(!relativePath.empty());
|
||||
|
||||
auto currentTenantFileCloner =
|
||||
std::make_unique<TenantFileCloner>(backupId,
|
||||
migrationId,
|
||||
fileName,
|
||||
fileSize,
|
||||
relativePath,
|
||||
sharedData,
|
||||
clientConnection->getServerHostAndPort(),
|
||||
clientConnection,
|
||||
repl::StorageInterface::get(cc().getServiceContext()),
|
||||
workerPool);
|
||||
|
||||
ON_BLOCK_EXIT([this, &migrationId] {
|
||||
stdx::lock_guard lk(_mutex);
|
||||
invariant(_mh && migrationId == _mh->migrationId);
|
||||
if (_mh->currentTenantFileCloner) {
|
||||
_mh->stats.totalBytesCopied += _mh->currentTenantFileCloner->getStats().bytesCopied;
|
||||
_mh->currentTenantFileCloner = nullptr;
|
||||
}
|
||||
});
|
||||
|
||||
{
|
||||
stdx::lock_guard lk(_mutex);
|
||||
invariant(_mh && migrationId == _mh->migrationId);
|
||||
_mh->currentTenantFileCloner = currentTenantFileCloner.get();
|
||||
}
|
||||
|
||||
auto cloneStatus = currentTenantFileCloner->run();
|
||||
uassertStatusOK(cloneStatus.withContext(str::stream()
|
||||
<< "Failed to clone file, migrationId: " << migrationId
|
||||
<< ", fileName: " << fileName));
|
||||
}
|
||||
|
||||
|
||||
void TenantFileImporterService::_waitUntilStartMigrationTimestampIsCheckpointed(
|
||||
OperationContext* opCtx, const UUID& migrationId) {
|
||||
const auto startMigrationTs = [&] {
|
||||
stdx::lock_guard<stdx::mutex> lg(_mutex);
|
||||
invariant(_mh && migrationId == _mh->migrationId);
|
||||
return _mh->startMigrationOpTime.getTimestamp();
|
||||
}();
|
||||
|
||||
bool firstWait = true;
|
||||
auto storageEngine = opCtx->getServiceContext()->getStorageEngine();
|
||||
while (true) {
|
||||
const auto& recoveryTs = storageEngine->getLastStableRecoveryTimestamp();
|
||||
if (recoveryTs && *recoveryTs >= startMigrationTs) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (firstWait) {
|
||||
LOGV2_DEBUG(7458500,
|
||||
2,
|
||||
"Wait for start migration timestamp to be checkpointed",
|
||||
"startMigrationTimestamp"_attr = startMigrationTs,
|
||||
"lastCheckpointTimestamp"_attr = recoveryTs);
|
||||
firstWait = false;
|
||||
}
|
||||
|
||||
// Sleep a bit so we do not keep hammering the system.
|
||||
opCtx->sleepFor(Milliseconds(100));
|
||||
opCtx->getServiceContext()->getStorageEngine()->waitUntilUnjournaledWritesDurable(
|
||||
opCtx,
|
||||
/*stableCheckpoint*/ true);
|
||||
}
|
||||
}
|
||||
|
||||
void TenantFileImporterService::_runRollbackAndThenImportFiles(OperationContext* opCtx,
|
||||
const UUID& migrationId) {
|
||||
if (MONGO_unlikely(skipImportFiles.shouldFail())) {
|
||||
LOGV2(7800200,
|
||||
"Skipping file import due to 'skipImportFiles' failpoint enabled",
|
||||
"migrationId"_attr = migrationId);
|
||||
return;
|
||||
}
|
||||
auto tempWTDirectory = fileClonerTempDir(migrationId);
|
||||
uassert(6113315,
|
||||
str::stream() << "Missing file cloner's temporary dbpath directory: "
|
||||
<< tempWTDirectory.string(),
|
||||
boost::filesystem::exists(tempWTDirectory));
|
||||
|
||||
ON_BLOCK_EXIT([&tempWTDirectory, &migrationId] {
|
||||
LOGV2_INFO(6113324,
|
||||
"Done importing files, removing the temporary WT dbpath",
|
||||
"migrationId"_attr = migrationId,
|
||||
"tempDbPath"_attr = tempWTDirectory.string());
|
||||
fsyncRemoveDirectory(tempWTDirectory);
|
||||
});
|
||||
|
||||
auto metadatas =
|
||||
wiredTigerRollbackToStableAndGetMetadata(opCtx, tempWTDirectory.string(), migrationId);
|
||||
|
||||
{
|
||||
stdx::lock_guard lk(_mutex);
|
||||
invariant(_mh && migrationId == _mh->migrationId);
|
||||
_mh->importStarted = true;
|
||||
}
|
||||
|
||||
ON_BLOCK_EXIT([&] {
|
||||
stdx::lock_guard lk(_mutex);
|
||||
invariant(_mh && migrationId == _mh->migrationId);
|
||||
setPromiseOkifNotReady(lk, _mh->importCompletedPromise);
|
||||
});
|
||||
|
||||
// Disable replication because this logic is executed on all nodes during a Shard Merge.
|
||||
repl::UnreplicatedWritesBlock uwb(opCtx);
|
||||
|
||||
for (auto&& metadata : metadatas) {
|
||||
|
||||
// Check for migration interrupt before importing the collection.
|
||||
opCtx->checkForInterrupt();
|
||||
|
||||
std::vector<boost::filesystem::path> movedFiles;
|
||||
ScopeGuard removeFilesGuard([&] {
|
||||
for (const auto& filePath : movedFiles) {
|
||||
removeFile(filePath);
|
||||
}
|
||||
if (!movedFiles.empty())
|
||||
fsyncDataDirectory();
|
||||
});
|
||||
|
||||
BSONObjBuilder catalogMetaBuilder;
|
||||
BSONObjBuilder storageMetaBuilder;
|
||||
|
||||
// Moves the collection file and it's associated index files from temp dir to dbpath.
|
||||
// And, regenerate metadata info with new unique ident id.
|
||||
auto newCollIdent = fsyncMoveWithNewIdent(opCtx,
|
||||
tempWTDirectory,
|
||||
metadata.ns,
|
||||
metadata.collection.ident,
|
||||
"collection",
|
||||
movedFiles);
|
||||
|
||||
catalogMetaBuilder.append("ident", newCollIdent);
|
||||
// Update the collection ident id.
|
||||
metadata.collection.ident = std::move(newCollIdent);
|
||||
buildStorageMetadata(metadata.collection, storageMetaBuilder);
|
||||
|
||||
BSONObjBuilder newIndexIdentMap;
|
||||
for (auto&& index : metadata.indexes) {
|
||||
auto newIndexIdent = fsyncMoveWithNewIdent(
|
||||
opCtx, tempWTDirectory, metadata.ns, index.ident, "index", movedFiles);
|
||||
newIndexIdentMap.append(index.indexName, newIndexIdent);
|
||||
// Update the index ident id.
|
||||
index.ident = std::move(newIndexIdent);
|
||||
buildStorageMetadata(index, storageMetaBuilder);
|
||||
}
|
||||
|
||||
catalogMetaBuilder.append("idxIdent", newIndexIdentMap.obj());
|
||||
metadata.catalogObject = metadata.catalogObject.addFields(catalogMetaBuilder.obj());
|
||||
const auto storageMetaObj = storageMetaBuilder.done();
|
||||
|
||||
importCollectionAndItsIndexesInMainWTInstance(opCtx, metadata, migrationId, storageMetaObj);
|
||||
|
||||
removeFilesGuard.dismiss();
|
||||
}
|
||||
}
|
||||
|
||||
void TenantFileImporterService::_voteImportedFiles(OperationContext* opCtx,
|
||||
const UUID& migrationId) {
|
||||
if (MONGO_unlikely(hangBeforeVoteImportedFiles.shouldFail())) {
|
||||
LOGV2(7675000, "'hangBeforeVoteImportedFiles' failpoint enabled");
|
||||
hangBeforeVoteImportedFiles.pauseWhileSet();
|
||||
}
|
||||
|
||||
// Build the command request.
|
||||
auto replCoord = ReplicationCoordinator::get(getGlobalServiceContext());
|
||||
RecipientVoteImportedFiles cmd(migrationId, replCoord->getMyHostAndPort());
|
||||
|
||||
Backoff exponentialBackoff(Seconds(1), Milliseconds::max());
|
||||
|
||||
while (true) {
|
||||
|
||||
opCtx->checkForInterrupt();
|
||||
|
||||
try {
|
||||
auto voteResponse = replCoord->runCmdOnPrimaryAndAwaitResponse(
|
||||
opCtx,
|
||||
DatabaseName::kAdmin,
|
||||
cmd.toBSON(),
|
||||
[](executor::TaskExecutor::CallbackHandle handle) {},
|
||||
[](executor::TaskExecutor::CallbackHandle handle) {});
|
||||
|
||||
uassertStatusOK(getStatusFromCommandResult(voteResponse));
|
||||
} catch (DBException& ex) {
|
||||
if (ErrorCodes::isNetworkError(ex)) {
|
||||
LOGV2_INFO(7675001,
|
||||
"Retrying 'recipientVoteImportedFiles' command",
|
||||
"retryError"_attr = redact(ex));
|
||||
|
||||
// Don't hammer the network.
|
||||
opCtx->sleepFor(exponentialBackoff.nextSleep());
|
||||
continue;
|
||||
}
|
||||
|
||||
ex.addContext("Failed to run 'recipientVoteImportedFiles' command");
|
||||
throw;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void TenantFileImporterService::_interrupt(WithLock lk, const UUID& migrationId) {
|
||||
auto prevState = _transitionToState(lk, migrationId, State::kInterrupted);
|
||||
if (prevState == State::kInterrupted)
|
||||
return;
|
||||
|
||||
if (_mh->donorConnection) {
|
||||
_mh->donorConnection->shutdownAndDisallowReconnect();
|
||||
}
|
||||
|
||||
if (_mh->workerPool) {
|
||||
_mh->workerPool->shutdown();
|
||||
}
|
||||
|
||||
if (_mh->sharedData) {
|
||||
stdx::lock_guard<TenantMigrationSharedData> sharedDatalk(*_mh->sharedData);
|
||||
// Prevent the TenantFileCloner from getting retried on retryable errors.
|
||||
_mh->sharedData->setStatusIfOK(
|
||||
sharedDatalk, Status{ErrorCodes::CallbackCanceled, "TenantFileCloner canceled"});
|
||||
}
|
||||
|
||||
if (_mh->eventQueue) {
|
||||
_mh->eventQueue->closeConsumerEnd();
|
||||
}
|
||||
|
||||
if (_mh->opCtx) {
|
||||
stdx::lock_guard<Client> lk(*_mh->opCtx->getClient());
|
||||
_mh->opCtx->markKilled(ErrorCodes::Interrupted);
|
||||
}
|
||||
|
||||
// _runRollbackAndThenImportFiles() will fulfill the promise if importStarted is true.
|
||||
if (!_mh->importStarted) {
|
||||
setPromiseOkifNotReady(lk, _mh->importCompletedPromise);
|
||||
}
|
||||
}
|
||||
|
||||
void TenantFileImporterService::_resetMigrationHandle(boost::optional<const UUID&> migrationId) {
|
||||
stdx::unique_lock<stdx::mutex> lk(_mutex);
|
||||
_resetCV.wait(lk, [this]() { return _resetInProgress == false; });
|
||||
if (!_mh) {
|
||||
return;
|
||||
}
|
||||
if (!migrationId) {
|
||||
migrationId = _mh->migrationId;
|
||||
}
|
||||
|
||||
(void)_transitionToState(lk, migrationId.value(), State::kStopped, true /*dryRun*/);
|
||||
_resetInProgress = true;
|
||||
|
||||
auto workerThread = _mh->workerThread.get();
|
||||
auto workerPool = _mh->workerPool.get();
|
||||
lk.unlock();
|
||||
|
||||
LOGV2(7800207,
|
||||
"TenantFileImporterService::Waiting for worker threads to join",
|
||||
"migrationId"_attr = migrationId);
|
||||
if (workerThread && workerThread->joinable()) {
|
||||
workerThread->join();
|
||||
}
|
||||
|
||||
if (workerPool) {
|
||||
workerPool->join();
|
||||
}
|
||||
|
||||
lk.lock();
|
||||
(void)_transitionToState(lk, migrationId.value(), State::kStopped);
|
||||
_mh.reset();
|
||||
|
||||
_resetInProgress = false;
|
||||
_resetCV.notify_all();
|
||||
}
|
||||
|
||||
TenantFileImporterService::State TenantFileImporterService::_transitionToState(
|
||||
WithLock, const UUID& migrationId, State targetState, const bool dryRun) {
|
||||
const auto isValid = [&] {
|
||||
if (!_mh || migrationId != _mh->migrationId)
|
||||
return false;
|
||||
|
||||
switch (targetState) {
|
||||
case State::kUninitialized:
|
||||
return _mh->state == State::kUninitialized;
|
||||
case State::kStarted:
|
||||
return _mh->state <= State::kStarted;
|
||||
case State::kLearnedFilename:
|
||||
return _mh->state <= State::kLearnedFilename;
|
||||
case State::kLearnedAllFilenames:
|
||||
return _mh->state == State::kLearnedFilename ||
|
||||
_mh->state == State::kLearnedAllFilenames;
|
||||
case State::kInterrupted:
|
||||
return _mh->state <= State::kInterrupted;
|
||||
case State::kStopped:
|
||||
return _mh->state == State::kUninitialized || _mh->state >= State::kInterrupted;
|
||||
default:
|
||||
MONGO_UNREACHABLE;
|
||||
}
|
||||
}();
|
||||
|
||||
std::stringstream errMsg;
|
||||
errMsg << "Failed state transition check for migrationID: " << migrationId
|
||||
<< ", state: " << stateToString(targetState);
|
||||
if (_mh) {
|
||||
errMsg << ", current migrationId: " << _mh->migrationId
|
||||
<< ", current state: " << stateToString(_mh->state);
|
||||
}
|
||||
uassert(7800210, errMsg.str(), isValid);
|
||||
|
||||
if (dryRun)
|
||||
return _mh->state;
|
||||
if (targetState != _mh->state) {
|
||||
LOGV2(7800208,
|
||||
"TenantFileImporterService:: Transitioning state to",
|
||||
"migrationId"_attr = migrationId,
|
||||
"state"_attr = stateToString(targetState));
|
||||
}
|
||||
std::swap(_mh->state, targetState);
|
||||
return targetState;
|
||||
}
|
||||
|
||||
boost::optional<SharedSemiFuture<void>> TenantFileImporterService::getImportCompletedFuture(
|
||||
const UUID& migrationId) {
|
||||
stdx::lock_guard lk(_mutex);
|
||||
return (_mh && _mh->migrationId == migrationId)
|
||||
? boost::make_optional(_mh->importCompletedPromise.getFuture())
|
||||
: boost::none;
|
||||
}
|
||||
|
||||
bool TenantFileImporterService::hasActiveMigration(const UUID& migrationId) {
|
||||
stdx::lock_guard lk(_mutex);
|
||||
return (_mh && _mh->migrationId == migrationId) ? true : false;
|
||||
}
|
||||
|
||||
BSONObj TenantFileImporterService::getStats(boost::optional<const UUID&> migrationId) {
|
||||
BSONObjBuilder bob;
|
||||
getStats(bob, migrationId);
|
||||
return bob.obj();
|
||||
}
|
||||
|
||||
void TenantFileImporterService::getStats(BSONObjBuilder& bob,
|
||||
boost::optional<const UUID&> migrationId) {
|
||||
stdx::lock_guard lk(_mutex);
|
||||
if (!_mh || (migrationId && migrationId.value() != _mh->migrationId))
|
||||
return;
|
||||
|
||||
bob.append("approxTotalDataSize", static_cast<long long>(_mh->stats.totalDataSize));
|
||||
|
||||
auto approxTotalBytesCopied = _mh->stats.totalBytesCopied;
|
||||
if (_mh->currentTenantFileCloner) {
|
||||
approxTotalBytesCopied += _mh->currentTenantFileCloner->getStats().bytesCopied;
|
||||
}
|
||||
bob.append("approxTotalBytesCopied", static_cast<long long>(approxTotalBytesCopied));
|
||||
|
||||
auto fileCopyEnd = [&]() {
|
||||
return _mh->stats.fileCopyEnd == Date_t() ? Date_t::now() : _mh->stats.fileCopyEnd;
|
||||
}();
|
||||
auto elapsedMillis =
|
||||
duration_cast<Milliseconds>(fileCopyEnd - _mh->stats.fileCopyStart).count();
|
||||
bob.append("totalReceiveElapsedMillis", static_cast<long long>(elapsedMillis));
|
||||
|
||||
|
||||
if (approxTotalBytesCopied > _mh->stats.totalDataSize) {
|
||||
LOGV2_ERROR(7800209,
|
||||
"TenantFileImporterService::Bytes copied is greater than actual data size",
|
||||
"migrationId"_attr = _mh->migrationId,
|
||||
"totalDataSize"_attr = _mh->stats.totalDataSize,
|
||||
"totalBytesCopied"_attr = _mh->stats.totalDataSize);
|
||||
}
|
||||
int64_t timeRemainingMillis =
|
||||
((_mh->stats.totalDataSize - approxTotalBytesCopied) * elapsedMillis) /
|
||||
(approxTotalBytesCopied + 1);
|
||||
bob.append("remainingReceiveEstimatedMillis", static_cast<long long>(timeRemainingMillis));
|
||||
}
|
||||
|
||||
} // namespace mongo::repl
|
||||
|
|
@ -1,367 +0,0 @@
|
|||
/**
|
||||
* Copyright (C) 2022-present MongoDB, Inc.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the Server Side Public License, version 1,
|
||||
* as published by MongoDB, Inc.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* Server Side Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the Server Side Public License
|
||||
* along with this program. If not, see
|
||||
* <http://www.mongodb.com/licensing/server-side-public-license>.
|
||||
*
|
||||
* As a special exception, the copyright holders give permission to link the
|
||||
* code of portions of this program with the OpenSSL library under certain
|
||||
* conditions as described in each individual source file and distribute
|
||||
* linked combinations including the program with the OpenSSL library. You
|
||||
* must comply with the Server Side Public License in all respects for
|
||||
* all of the code used other than as permitted herein. If you modify file(s)
|
||||
* with this exception, you may extend this exception to your version of the
|
||||
* file(s), but you are not obligated to do so. If you do not wish to do so,
|
||||
* delete this exception statement from your version. If you delete this
|
||||
* exception statement from all source files in the program, then also delete
|
||||
* it in the license file.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <boost/move/utility_core.hpp>
|
||||
#include <boost/optional/optional.hpp>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "mongo/base/string_data.h"
|
||||
#include "mongo/bson/bsonmisc.h"
|
||||
#include "mongo/bson/bsonobj.h"
|
||||
#include "mongo/bson/bsonobjbuilder.h"
|
||||
#include "mongo/client/dbclient_connection.h"
|
||||
#include "mongo/db/operation_context.h"
|
||||
#include "mongo/db/repl/replica_set_aware_service.h"
|
||||
#include "mongo/db/repl/tenant_file_cloner.h"
|
||||
#include "mongo/db/repl/tenant_migration_shared_data.h"
|
||||
#include "mongo/db/service_context.h"
|
||||
#include "mongo/stdx/mutex.h"
|
||||
#include "mongo/stdx/thread.h"
|
||||
#include "mongo/util/assert_util.h"
|
||||
#include "mongo/util/concurrency/thread_pool.h"
|
||||
#include "mongo/util/concurrency/with_lock.h"
|
||||
#include "mongo/util/producer_consumer_queue.h"
|
||||
#include "mongo/util/string_map.h"
|
||||
#include "mongo/util/uuid.h"
|
||||
|
||||
namespace mongo::repl {
|
||||
/**
|
||||
* Replica set aware service that runs both on the primary and secondaries. It orchestrates the
|
||||
* copying of data files from donor, import those files, and notifies the primary when the import is
|
||||
* successful.
|
||||
*/
|
||||
class TenantFileImporterService : public ReplicaSetAwareService<TenantFileImporterService> {
|
||||
public:
|
||||
static constexpr StringData kTenantFileImporterServiceName = "TenantFileImporterService"_sd;
|
||||
static TenantFileImporterService* get(ServiceContext* serviceContext);
|
||||
static TenantFileImporterService* get(OperationContext* opCtx);
|
||||
TenantFileImporterService();
|
||||
|
||||
using CreateConnectionFn = std::function<std::unique_ptr<DBClientConnection>()>;
|
||||
|
||||
struct Stats {
|
||||
Date_t fileCopyStart;
|
||||
Date_t fileCopyEnd;
|
||||
uint64_t totalDataSize{0};
|
||||
uint64_t totalBytesCopied{0};
|
||||
};
|
||||
|
||||
// Explicit State enum ordering defined here because we rely on comparison
|
||||
// operators for state checking in various TenantFileImporterService methods.
|
||||
enum class State {
|
||||
kUninitialized = 0,
|
||||
kStarted = 1,
|
||||
kLearnedFilename = 2,
|
||||
kLearnedAllFilenames = 3,
|
||||
kInterrupted = 4,
|
||||
kStopped = 5
|
||||
};
|
||||
|
||||
static StringData stateToString(State state) {
|
||||
switch (state) {
|
||||
case State::kUninitialized:
|
||||
return "uninitialized";
|
||||
case State::kStarted:
|
||||
return "started";
|
||||
case State::kLearnedFilename:
|
||||
return "learned filename";
|
||||
case State::kLearnedAllFilenames:
|
||||
return "learned all filenames";
|
||||
case State::kInterrupted:
|
||||
return "interrupted";
|
||||
case State::kStopped:
|
||||
return "stopped";
|
||||
}
|
||||
MONGO_UNREACHABLE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Begins the process of copying and importing files for a given migration.
|
||||
*/
|
||||
void startMigration(const UUID& migrationId, const OpTime& startMigrationOpTime);
|
||||
|
||||
/**
|
||||
* Called for each file to be copied for a given migration.
|
||||
*/
|
||||
void learnedFilename(const UUID& migrationId, const BSONObj& metadataDoc);
|
||||
|
||||
/**
|
||||
* Called after all files have been copied for a given migration.
|
||||
*/
|
||||
void learnedAllFilenames(const UUID& migrationId);
|
||||
|
||||
/**
|
||||
* Interrupts an in-progress migration with the provided migration id.
|
||||
*/
|
||||
void interruptMigration(const UUID& migrationId);
|
||||
|
||||
/**
|
||||
* Resets the interrupted migration for the given migrationId by calling
|
||||
* _resetMigrationHandle(). See _resetMigrationHandle() for detailed comments.
|
||||
*
|
||||
* Throws an exception if called before the migration is interrupted.
|
||||
*/
|
||||
void resetMigration(const UUID& migrationId);
|
||||
|
||||
/**
|
||||
* Causes any in-progress migration be interrupted.
|
||||
*/
|
||||
void interruptAll();
|
||||
|
||||
/**
|
||||
* Returns a Future that will be resolved when the collection import task completes for the
|
||||
* given migration id. Return boost::none if no active migration matches the provided migration
|
||||
* id.
|
||||
*/
|
||||
boost::optional<SharedSemiFuture<void>> getImportCompletedFuture(const UUID& migrationId);
|
||||
|
||||
/**
|
||||
* Checks if there is an active migration with the given migration ID.
|
||||
*/
|
||||
bool hasActiveMigration(const UUID& migrationId);
|
||||
|
||||
/**
|
||||
* Returns the migration stats for the given migrationId.
|
||||
* If no migrationId is provided, it returns the stats of an ongoing migration, if any.
|
||||
*/
|
||||
BSONObj getStats(boost::optional<const UUID&> migrationId = boost::none);
|
||||
void getStats(BSONObjBuilder& bob, boost::optional<const UUID&> migrationId = boost::none);
|
||||
|
||||
void onConsistentDataAvailable(OperationContext*, bool, bool) final {}
|
||||
|
||||
void onShutdown() final {
|
||||
{
|
||||
stdx::lock_guard lk(_mutex);
|
||||
// Prevents a new migration from starting up during or after shutdown.
|
||||
_isShuttingDown = true;
|
||||
}
|
||||
interruptAll();
|
||||
_resetMigrationHandle();
|
||||
}
|
||||
|
||||
void onRollbackBegin() final {
|
||||
interruptAll();
|
||||
_resetMigrationHandle();
|
||||
}
|
||||
|
||||
void onStartup(OperationContext*) final {}
|
||||
|
||||
void onSetCurrentConfig(OperationContext* opCtx) final {}
|
||||
|
||||
void onStepUpBegin(OperationContext*, long long) final {}
|
||||
|
||||
void onStepUpComplete(OperationContext*, long long) final {}
|
||||
|
||||
void onStepDown() final {}
|
||||
|
||||
void onBecomeArbiter() final {}
|
||||
|
||||
inline std::string getServiceName() const final {
|
||||
return "TenantFileImporterService";
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the function used to create a donor client connection. Used for testing.
|
||||
*/
|
||||
void setCreateConnectionFn_forTest(const CreateConnectionFn& fn) {
|
||||
_createConnectionFn = fn;
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns the migrationId.
|
||||
*/
|
||||
boost::optional<UUID> getMigrationId_forTest() {
|
||||
return _mh ? boost::make_optional(_mh->migrationId) : boost::none;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the migration state.
|
||||
*/
|
||||
boost::optional<TenantFileImporterService::State> getState_forTest() {
|
||||
return _mh ? boost::make_optional(_mh->state) : boost::none;
|
||||
}
|
||||
|
||||
private:
|
||||
/**
|
||||
* A worker function that waits for ImporterEvents and handles cloning and importing files.
|
||||
*/
|
||||
void _handleEvents(const UUID& migrationId);
|
||||
|
||||
/**
|
||||
* Performs file copying from the donor for the specified filename in the given metadataDoc.
|
||||
*/
|
||||
void _cloneFile(OperationContext* opCtx,
|
||||
const UUID& migrationId,
|
||||
DBClientConnection* clientConnection,
|
||||
ThreadPool* workerPool,
|
||||
TenantMigrationSharedData* sharedData,
|
||||
const BSONObj& metadataDoc);
|
||||
|
||||
/**
|
||||
* Waits until the majority committed StartMigrationTimestamp is successfully checkpointed.
|
||||
*
|
||||
* Note: Refer to the calling site for more information on its significance.
|
||||
*/
|
||||
void _waitUntilStartMigrationTimestampIsCheckpointed(OperationContext* opCtx,
|
||||
const UUID& migrationId);
|
||||
/**
|
||||
* Runs rollback to stable on the cloned files associated with the given migration id,
|
||||
* and then import the stable cloned files into the main WT instance.
|
||||
*/
|
||||
void _runRollbackAndThenImportFiles(OperationContext* opCtx, const UUID& migrationId);
|
||||
|
||||
/**
|
||||
* Called to inform the primary that we have finished copying and importing all files.
|
||||
*/
|
||||
void _voteImportedFiles(OperationContext* opCtx, const UUID& migrationId);
|
||||
|
||||
/**
|
||||
* Called internally by interrupt and interruptAll to interrupt a running file cloning and
|
||||
* import operations.
|
||||
*/
|
||||
void _interrupt(WithLock lk, const UUID& migrationId);
|
||||
|
||||
/**
|
||||
* This blocking call waits for the worker threads to finish the execution, and then releases
|
||||
* the resources held by MigrationHandle for the given migrationId (if provided) or for the
|
||||
* current ongoing migration.
|
||||
*
|
||||
* Throws an exception if called before the migration is interrupted.
|
||||
*/
|
||||
void _resetMigrationHandle(boost::optional<const UUID&> migrationId = boost::none);
|
||||
|
||||
/*
|
||||
* Transitions the migration associated with the given migrationId to the specified target
|
||||
* state. If dryRun is set to 'true', the function performs a dry run of the state transition
|
||||
* without actually changing the state. Throws an exception for an invalid state transition.
|
||||
*
|
||||
* Returns the current migration state before the state transition.
|
||||
*/
|
||||
TenantFileImporterService::State _transitionToState(WithLock,
|
||||
const UUID& migrationId,
|
||||
State targetState,
|
||||
bool dryRun = false);
|
||||
|
||||
void _makeMigrationHandleIfNotPresent(WithLock,
|
||||
const UUID& migrationId,
|
||||
const OpTime& startMigrationOpTime);
|
||||
|
||||
struct ImporterEvent {
|
||||
enum class Type { kNone, kLearnedFileName, kLearnedAllFilenames };
|
||||
Type type;
|
||||
UUID migrationId;
|
||||
BSONObj metadataDoc;
|
||||
|
||||
ImporterEvent(Type _type, const UUID& _migrationId)
|
||||
: type(_type), migrationId(_migrationId) {}
|
||||
};
|
||||
|
||||
using Queue =
|
||||
MultiProducerSingleConsumerQueue<ImporterEvent,
|
||||
producer_consumer_queue_detail::DefaultCostFunction>;
|
||||
|
||||
// Represents a handle for managing the migration process. It holds various resources and
|
||||
// information required for cloning files and importing them.
|
||||
struct MigrationHandle {
|
||||
explicit MigrationHandle(const UUID& migrationId, const OpTime& startMigrationOpTime);
|
||||
|
||||
// Shard merge migration Id.
|
||||
const UUID migrationId;
|
||||
|
||||
// Optime at which the recipient state machine document for this migration is initialized.
|
||||
const OpTime startMigrationOpTime;
|
||||
|
||||
// Queue to process ImporterEvents.
|
||||
const std::unique_ptr<Queue> eventQueue;
|
||||
|
||||
// ThreadPool used by TenantFileCloner to do storage write operations.
|
||||
const std::unique_ptr<ThreadPool> workerPool;
|
||||
|
||||
// Shared between the importer service and TenantFileCloners
|
||||
const std::unique_ptr<TenantMigrationSharedData> sharedData;
|
||||
|
||||
// Indicates if collection import for this migration has begun.
|
||||
bool importStarted = false;
|
||||
|
||||
// Promise fulfilled upon completion of collection import for this migration.
|
||||
SharedPromise<void> importCompletedPromise;
|
||||
|
||||
// Worker thread to orchestrate the cloning, importing and notifying the primary steps.
|
||||
std::unique_ptr<stdx::thread> workerThread;
|
||||
|
||||
// State of the associated migration.
|
||||
State state = State::kUninitialized;
|
||||
|
||||
// Tracks the Statistics of the associated migration.
|
||||
Stats stats;
|
||||
|
||||
// Pointers below are not owned by this struct. The method that sets these
|
||||
// pointers must manage their lifecycle and ensure proper pointer reset to prevent
|
||||
// invalid memory access by other methods when reading the pointer value.
|
||||
|
||||
// Donor DBClientConnection for file cloning.
|
||||
DBClientConnection* donorConnection = nullptr;
|
||||
|
||||
// OperationContext associated with the migration.
|
||||
OperationContext* opCtx = nullptr;
|
||||
|
||||
// Pointer to the current TenantFileCloner of the associated migration; used for statistics
|
||||
// purpose.
|
||||
TenantFileCloner* currentTenantFileCloner = nullptr;
|
||||
};
|
||||
|
||||
stdx::mutex _mutex;
|
||||
|
||||
// All member variables are labeled with one of the following codes indicating the
|
||||
// synchronization rules for accessing them.
|
||||
//
|
||||
// (R) Read-only in concurrent operation; no synchronization required.
|
||||
// (S) Self-synchronizing; access according to class's own rules.
|
||||
// (M) Reads and writes guarded by _mutex.
|
||||
// (W) Synchronization required only for writes.
|
||||
// (I) Independently synchronized, see member variable comment.
|
||||
|
||||
// Set to true when the shutdown procedure is initiated.
|
||||
bool _isShuttingDown = false; // (M)
|
||||
|
||||
std::unique_ptr<MigrationHandle> _mh; // (M)
|
||||
|
||||
// Used to create a new DBClientConnection to the donor.
|
||||
CreateConnectionFn _createConnectionFn = {}; // (W)
|
||||
|
||||
// Condition variable to block concurrent reset operations.
|
||||
stdx::condition_variable _resetCV; // (M)
|
||||
// Flag indicating whether a reset is currently in progress.
|
||||
bool _resetInProgress = false; // (M)
|
||||
};
|
||||
} // namespace mongo::repl
|
||||
|
|
@ -1,574 +0,0 @@
|
|||
/**
|
||||
* Copyright (C) 2023-present MongoDB, Inc.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the Server Side Public License, version 1,
|
||||
* as published by MongoDB, Inc.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* Server Side Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the Server Side Public License
|
||||
* along with this program. If not, see
|
||||
* <http://www.mongodb.com/licensing/server-side-public-license>.
|
||||
*
|
||||
* As a special exception, the copyright holders give permission to link the
|
||||
* code of portions of this program with the OpenSSL library under certain
|
||||
* conditions as described in each individual source file and distribute
|
||||
* linked combinations including the program with the OpenSSL library. You
|
||||
* must comply with the Server Side Public License in all respects for
|
||||
* all of the code used other than as permitted herein. If you modify file(s)
|
||||
* with this exception, you may extend this exception to your version of the
|
||||
* file(s), but you are not obligated to do so. If you do not wish to do so,
|
||||
* delete this exception statement from your version. If you delete this
|
||||
* exception statement from all source files in the program, then also delete
|
||||
* it in the license file.
|
||||
*/
|
||||
|
||||
#include <boost/filesystem/operations.hpp>
|
||||
#include <boost/filesystem/path.hpp>
|
||||
#include <boost/optional/optional.hpp>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/move/utility_core.hpp>
|
||||
|
||||
#include "mongo/bson/bsonelement.h"
|
||||
#include "mongo/bson/bsontypes.h"
|
||||
#include "mongo/bson/bsontypes_util.h"
|
||||
#include "mongo/db/database_name.h"
|
||||
#include "mongo/db/db_raii.h"
|
||||
#include "mongo/db/namespace_string.h"
|
||||
#include "mongo/db/query/client_cursor/cursor_response.h"
|
||||
#include "mongo/db/repl/member_state.h"
|
||||
#include "mongo/db/repl/replica_set_aware_service.h"
|
||||
#include "mongo/db/repl/replication_coordinator.h"
|
||||
#include "mongo/db/repl/replication_coordinator_mock.h"
|
||||
#include "mongo/db/repl/storage_interface.h"
|
||||
#include "mongo/db/repl/storage_interface_impl.h"
|
||||
#include "mongo/db/repl/tenant_file_importer_service.h"
|
||||
#include "mongo/db/repl/tenant_migration_shard_merge_util.h"
|
||||
#include "mongo/db/service_context_d_test_fixture.h"
|
||||
#include "mongo/db/storage/kv/kv_engine.h"
|
||||
#include "mongo/dbtests/mock/mock_dbclient_connection.h"
|
||||
#include "mongo/dbtests/mock/mock_remote_db_server.h"
|
||||
#include "mongo/executor/task_executor_test_fixture.h"
|
||||
#include "mongo/executor/thread_pool_task_executor_test_fixture.h"
|
||||
#include "mongo/logv2/log_component.h"
|
||||
#include "mongo/logv2/log_severity.h"
|
||||
#include "mongo/unittest/assert.h"
|
||||
#include "mongo/unittest/bson_test_util.h"
|
||||
#include "mongo/unittest/death_test.h"
|
||||
#include "mongo/unittest/framework.h"
|
||||
#include "mongo/unittest/log_test.h"
|
||||
#include "mongo/util/fail_point.h"
|
||||
#include "mongo/util/net/hostandport.h"
|
||||
|
||||
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
|
||||
|
||||
namespace mongo {
|
||||
|
||||
|
||||
namespace repl {
|
||||
|
||||
using namespace repl::shard_merge_utils;
|
||||
|
||||
namespace {
|
||||
constexpr auto kDonorHostName = "localhost:12345"_sd;
|
||||
constexpr auto kDonorDBPath = "/path/to/remoteDB/"_sd;
|
||||
static const UUID kBackupId = UUID::gen();
|
||||
const OpTime kStartMigrationOpTime(Timestamp(1, 1), 1);
|
||||
|
||||
} // namespace
|
||||
class TenantFileImporterServiceTest : public ServiceContextMongoDTest {
|
||||
public:
|
||||
/**
|
||||
* Create TenantFileImporterService::ImporterEvent::kLearnedFileName event.
|
||||
*/
|
||||
static BSONObj makefileMetaDoc(const UUID& migrationId,
|
||||
const std::string& fileName,
|
||||
uint64_t fileSize) {
|
||||
return BSON("filename" << kDonorDBPath + "/" + fileName << "fileSize"
|
||||
<< static_cast<int64_t>(fileSize) << kDonorHostNameFieldName
|
||||
<< kDonorHostName << kMigrationIdFieldName << migrationId
|
||||
<< kBackupIdFieldName << kBackupId << kDonorDbPathFieldName
|
||||
<< kDonorDBPath);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns true if collection exists.
|
||||
*/
|
||||
static bool collectionExists(OperationContext* opCtx, const NamespaceString& nss) {
|
||||
return static_cast<bool>(AutoGetCollectionForRead(opCtx, nss).getCollection());
|
||||
}
|
||||
|
||||
|
||||
void setUp() override {
|
||||
ServiceContextMongoDTest::setUp();
|
||||
auto serviceContext = getServiceContext();
|
||||
auto replCoord = std::make_unique<ReplicationCoordinatorMock>(serviceContext);
|
||||
replCoord->setRunCmdOnPrimaryAndAwaitResponseFunction([this](OperationContext* opCtx,
|
||||
const DatabaseName& dbName,
|
||||
const BSONObj& cmdObj,
|
||||
ReplicationCoordinator::
|
||||
OnRemoteCmdScheduledFn
|
||||
onRemoteCmdScheduled,
|
||||
ReplicationCoordinator::
|
||||
OnRemoteCmdCompleteFn
|
||||
onRemoteCmdComplete) {
|
||||
runCmdOnPrimaryAndAwaitResponseFnCalls.push_back(RunCmdOnPrimaryCall{dbName, cmdObj});
|
||||
return runCmdOnPrimaryAndAwaitResponseFnResponse;
|
||||
});
|
||||
ASSERT_OK(replCoord->setFollowerMode(repl::MemberState::RS_PRIMARY));
|
||||
ReplicationCoordinator::set(serviceContext, std::move(replCoord));
|
||||
StorageInterface::set(serviceContext, std::make_unique<StorageInterfaceImpl>());
|
||||
|
||||
_importerService = repl::TenantFileImporterService::get(serviceContext);
|
||||
|
||||
_mockDonorServer = std::make_unique<MockRemoteDBServer>(kDonorHostName.toString());
|
||||
_importerService->setCreateConnectionFn_forTest([&]() {
|
||||
return std::make_unique<MockDBClientConnection>(_mockDonorServer.get(),
|
||||
true /* autoReconnect */);
|
||||
});
|
||||
|
||||
globalFailPointRegistry().find("skipImportFiles")->setMode(FailPoint::alwaysOn);
|
||||
|
||||
// Set the stable timestamp to avoid hang in
|
||||
// TenantFileImporterService::_waitUntilStartMigrationTimestampIsCheckpointed().
|
||||
auto opCtx = cc().makeOperationContext();
|
||||
auto engine = serviceContext->getStorageEngine()->getEngine();
|
||||
engine->setStableTimestamp(Timestamp(1, 1), true);
|
||||
}
|
||||
|
||||
void tearDown() override {
|
||||
_importerService->onShutdown();
|
||||
StorageInterface::set(getServiceContext(), {});
|
||||
ReplicationCoordinator::set(getServiceContext(), {});
|
||||
ServiceContextMongoDTest::tearDown();
|
||||
}
|
||||
|
||||
struct RunCmdOnPrimaryCall {
|
||||
DatabaseName dbName;
|
||||
BSONObj cmdObj;
|
||||
};
|
||||
std::vector<RunCmdOnPrimaryCall> runCmdOnPrimaryAndAwaitResponseFnCalls;
|
||||
BSONObj runCmdOnPrimaryAndAwaitResponseFnResponse = BSON("ok" << 1);
|
||||
|
||||
private:
|
||||
unittest::MinimumLoggedSeverityGuard _replicationSeverityGuard{
|
||||
logv2::LogComponent::kReplication, logv2::LogSeverity::Debug(1)};
|
||||
unittest::MinimumLoggedSeverityGuard _tenantMigrationSeverityGuard{
|
||||
logv2::LogComponent::kTenantMigration, logv2::LogSeverity::Debug(1)};
|
||||
|
||||
protected:
|
||||
std::unique_ptr<MockRemoteDBServer> _mockDonorServer;
|
||||
TenantFileImporterService* _importerService;
|
||||
};
|
||||
|
||||
TEST_F(TenantFileImporterServiceTest, ConcurrentMigrationWithDifferentMigrationID) {
|
||||
FailPointEnableBlock failPoint("skipCloneFiles");
|
||||
auto migrationId = UUID::gen();
|
||||
auto anotherMigrationId = UUID::gen();
|
||||
|
||||
auto verifyAllStateTransitionFailsForAnotherMigrationId = [&] {
|
||||
ASSERT_THROWS_CODE(
|
||||
_importerService->startMigration(anotherMigrationId, kStartMigrationOpTime),
|
||||
DBException,
|
||||
7800210);
|
||||
ASSERT_THROWS_CODE(_importerService->learnedFilename(
|
||||
anotherMigrationId, makefileMetaDoc(migrationId, "some-file.wt", 1)),
|
||||
DBException,
|
||||
7800210);
|
||||
ASSERT_THROWS_CODE(
|
||||
_importerService->learnedAllFilenames(anotherMigrationId), DBException, 7800210);
|
||||
ASSERT_THROWS_CODE(
|
||||
_importerService->interruptMigration(anotherMigrationId), DBException, 7800210);
|
||||
ASSERT_THROWS_CODE(
|
||||
_importerService->resetMigration(anotherMigrationId), DBException, 7800210);
|
||||
};
|
||||
|
||||
_importerService->startMigration(migrationId, kStartMigrationOpTime);
|
||||
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
|
||||
ASSERT_EQ(_importerService->getState_forTest(), TenantFileImporterService::State::kStarted);
|
||||
|
||||
verifyAllStateTransitionFailsForAnotherMigrationId();
|
||||
|
||||
_importerService->learnedFilename(migrationId, makefileMetaDoc(migrationId, "some-file.wt", 1));
|
||||
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
|
||||
ASSERT_EQ(_importerService->getState_forTest(),
|
||||
TenantFileImporterService::State::kLearnedFilename);
|
||||
|
||||
verifyAllStateTransitionFailsForAnotherMigrationId();
|
||||
|
||||
_importerService->learnedAllFilenames(migrationId);
|
||||
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
|
||||
ASSERT_EQ(_importerService->getState_forTest(),
|
||||
TenantFileImporterService::State::kLearnedAllFilenames);
|
||||
|
||||
verifyAllStateTransitionFailsForAnotherMigrationId();
|
||||
|
||||
_importerService->interruptMigration(migrationId);
|
||||
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
|
||||
ASSERT_EQ(_importerService->getState_forTest(), TenantFileImporterService::State::kInterrupted);
|
||||
|
||||
verifyAllStateTransitionFailsForAnotherMigrationId();
|
||||
|
||||
_importerService->resetMigration(migrationId);
|
||||
ASSERT(!_importerService->getMigrationId_forTest());
|
||||
|
||||
{
|
||||
// Starting a new migration with anotherMigrationId is now possible.
|
||||
_importerService->startMigration(anotherMigrationId, kStartMigrationOpTime);
|
||||
ASSERT_EQ(_importerService->getMigrationId_forTest(), anotherMigrationId);
|
||||
ASSERT_EQ(_importerService->getState_forTest(), TenantFileImporterService::State::kStarted);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TenantFileImporterServiceTest, StartConcurrentMigrationWithSameMigrationID) {
|
||||
FailPointEnableBlock failPoint("skipCloneFiles");
|
||||
auto migrationId = UUID::gen();
|
||||
|
||||
_importerService->startMigration(migrationId, kStartMigrationOpTime);
|
||||
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
|
||||
ASSERT_EQ(_importerService->getState_forTest(), TenantFileImporterService::State::kStarted);
|
||||
|
||||
// startMigration calls with the same migrationId will be ignored.
|
||||
_importerService->startMigration(migrationId, kStartMigrationOpTime);
|
||||
|
||||
_importerService->learnedFilename(migrationId, makefileMetaDoc(migrationId, "some-file.wt", 1));
|
||||
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
|
||||
ASSERT_EQ(_importerService->getState_forTest(),
|
||||
TenantFileImporterService::State::kLearnedFilename);
|
||||
|
||||
ASSERT_THROWS_CODE(
|
||||
_importerService->startMigration(migrationId, kStartMigrationOpTime), DBException, 7800210);
|
||||
|
||||
_importerService->learnedAllFilenames(migrationId);
|
||||
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
|
||||
ASSERT_EQ(_importerService->getState_forTest(),
|
||||
TenantFileImporterService::State::kLearnedAllFilenames);
|
||||
|
||||
ASSERT_THROWS_CODE(
|
||||
_importerService->startMigration(migrationId, kStartMigrationOpTime), DBException, 7800210);
|
||||
|
||||
_importerService->interruptMigration(migrationId);
|
||||
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
|
||||
ASSERT_EQ(_importerService->getState_forTest(), TenantFileImporterService::State::kInterrupted);
|
||||
|
||||
_importerService->resetMigration(migrationId);
|
||||
ASSERT(!_importerService->getMigrationId_forTest());
|
||||
|
||||
// Starting a new migration with same migrationId is now possible.
|
||||
_importerService->startMigration(migrationId, kStartMigrationOpTime);
|
||||
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
|
||||
ASSERT_EQ(_importerService->getState_forTest(), TenantFileImporterService::State::kStarted);
|
||||
}
|
||||
|
||||
TEST_F(TenantFileImporterServiceTest, ShouldHaveLearntAtLeastOneFileName) {
|
||||
auto migrationId = UUID::gen();
|
||||
|
||||
_importerService->startMigration(migrationId, kStartMigrationOpTime);
|
||||
ASSERT_THROWS_CODE(_importerService->learnedAllFilenames(migrationId), DBException, 7800210);
|
||||
}
|
||||
|
||||
TEST_F(TenantFileImporterServiceTest, learnedAllFilenamesFollowedByLearnedFileNameOutOfOrderEvent) {
|
||||
FailPointEnableBlock failPoint("skipCloneFiles");
|
||||
auto migrationId = UUID::gen();
|
||||
|
||||
_importerService->startMigration(migrationId, kStartMigrationOpTime);
|
||||
_importerService->learnedFilename(migrationId, makefileMetaDoc(migrationId, "some-file.wt", 1));
|
||||
_importerService->learnedAllFilenames(migrationId);
|
||||
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
|
||||
ASSERT_EQ(_importerService->getState_forTest(),
|
||||
TenantFileImporterService::State::kLearnedAllFilenames);
|
||||
|
||||
ASSERT_THROWS_CODE(
|
||||
_importerService->learnedFilename(migrationId,
|
||||
BSON("filename"
|
||||
<< "some-file.wt" << kDonorHostNameFieldName
|
||||
<< kDonorHostName << "fileSize" << 1)),
|
||||
DBException,
|
||||
7800210);
|
||||
|
||||
// Interrupt the migration to prevent running file cloning after exiting this block.
|
||||
_importerService->interruptMigration(migrationId);
|
||||
}
|
||||
|
||||
TEST_F(TenantFileImporterServiceTest, MigrationNotStartedYetShouldIgnoreAnyStateTransition) {
|
||||
auto migrationId = UUID::gen();
|
||||
|
||||
ASSERT(!_importerService->getMigrationId_forTest());
|
||||
|
||||
_importerService->learnedFilename(migrationId, makefileMetaDoc(migrationId, "some-file.wt", 1));
|
||||
ASSERT(!_importerService->getMigrationId_forTest());
|
||||
|
||||
_importerService->learnedAllFilenames(migrationId);
|
||||
ASSERT(!_importerService->getMigrationId_forTest());
|
||||
|
||||
_importerService->interruptMigration(migrationId);
|
||||
ASSERT(!_importerService->getMigrationId_forTest());
|
||||
|
||||
_importerService->resetMigration(migrationId);
|
||||
ASSERT(!_importerService->getMigrationId_forTest());
|
||||
}
|
||||
|
||||
TEST_F(TenantFileImporterServiceTest, CanInterruptMigrationAfterMigrationStart) {
|
||||
auto migrationId = UUID::gen();
|
||||
|
||||
_importerService->startMigration(migrationId, kStartMigrationOpTime);
|
||||
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
|
||||
ASSERT_EQ(_importerService->getState_forTest(), TenantFileImporterService::State::kStarted);
|
||||
|
||||
_importerService->interruptMigration(migrationId);
|
||||
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
|
||||
ASSERT_EQ(_importerService->getState_forTest(), TenantFileImporterService::State::kInterrupted);
|
||||
}
|
||||
|
||||
TEST_F(TenantFileImporterServiceTest, CanInterruptMigrationWhenLearnedFileName) {
|
||||
FailPointEnableBlock failPoint("skipCloneFiles");
|
||||
auto migrationId = UUID::gen();
|
||||
|
||||
_importerService->startMigration(migrationId, kStartMigrationOpTime);
|
||||
_importerService->learnedFilename(migrationId, makefileMetaDoc(migrationId, "some-file.wt", 1));
|
||||
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
|
||||
ASSERT_EQ(_importerService->getState_forTest(),
|
||||
TenantFileImporterService::State::kLearnedFilename);
|
||||
|
||||
_importerService->interruptMigration(migrationId);
|
||||
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
|
||||
ASSERT_EQ(_importerService->getState_forTest(), TenantFileImporterService::State::kInterrupted);
|
||||
}
|
||||
|
||||
TEST_F(TenantFileImporterServiceTest, CanInterruptMigrationWhenLearnedAllFileNames) {
|
||||
FailPointEnableBlock failPoint("skipCloneFiles");
|
||||
auto migrationId = UUID::gen();
|
||||
|
||||
_importerService->startMigration(migrationId, kStartMigrationOpTime);
|
||||
_importerService->learnedFilename(migrationId, makefileMetaDoc(migrationId, "some-file.wt", 1));
|
||||
_importerService->learnedAllFilenames(migrationId);
|
||||
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
|
||||
ASSERT_EQ(_importerService->getState_forTest(),
|
||||
TenantFileImporterService::State::kLearnedAllFilenames);
|
||||
|
||||
_importerService->interruptMigration(migrationId);
|
||||
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
|
||||
ASSERT_EQ(_importerService->getState_forTest(), TenantFileImporterService::State::kInterrupted);
|
||||
}
|
||||
|
||||
TEST_F(TenantFileImporterServiceTest, CanInterruptAMigrationMoreThanOnce) {
|
||||
auto migrationId = UUID::gen();
|
||||
_importerService->startMigration(migrationId, kStartMigrationOpTime);
|
||||
_importerService->interruptMigration(migrationId);
|
||||
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
|
||||
ASSERT_EQ(_importerService->getState_forTest(), TenantFileImporterService::State::kInterrupted);
|
||||
|
||||
_importerService->interruptMigration(migrationId);
|
||||
}
|
||||
|
||||
TEST_F(TenantFileImporterServiceTest, InterruptedMigrationCannotLearnNewFiles) {
|
||||
auto migrationId = UUID::gen();
|
||||
|
||||
_importerService->startMigration(migrationId, kStartMigrationOpTime);
|
||||
_importerService->interruptMigration(migrationId);
|
||||
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
|
||||
ASSERT_EQ(_importerService->getState_forTest(), TenantFileImporterService::State::kInterrupted);
|
||||
|
||||
ASSERT_THROWS_CODE(_importerService->learnedFilename(migrationId,
|
||||
BSON("filename"
|
||||
<< "some-file.wt"
|
||||
<< "fileSize" << 1)),
|
||||
DBException,
|
||||
7800210);
|
||||
ASSERT_THROWS_CODE(_importerService->learnedAllFilenames(migrationId), DBException, 7800210);
|
||||
}
|
||||
|
||||
TEST_F(TenantFileImporterServiceTest, resetMigration) {
|
||||
FailPointEnableBlock failPoint("skipCloneFiles");
|
||||
auto migrationId = UUID::gen();
|
||||
|
||||
_importerService->startMigration(migrationId, kStartMigrationOpTime);
|
||||
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
|
||||
ASSERT_EQ(_importerService->getState_forTest(), TenantFileImporterService::State::kStarted);
|
||||
|
||||
ASSERT_THROWS_CODE(_importerService->resetMigration(migrationId), DBException, 7800210);
|
||||
|
||||
_importerService->learnedFilename(migrationId, makefileMetaDoc(migrationId, "some-file.wt", 1));
|
||||
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
|
||||
ASSERT_EQ(_importerService->getState_forTest(),
|
||||
TenantFileImporterService::State::kLearnedFilename);
|
||||
|
||||
ASSERT_THROWS_CODE(_importerService->resetMigration(migrationId), DBException, 7800210);
|
||||
|
||||
_importerService->learnedAllFilenames(migrationId);
|
||||
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
|
||||
ASSERT_EQ(_importerService->getState_forTest(),
|
||||
TenantFileImporterService::State::kLearnedAllFilenames);
|
||||
|
||||
ASSERT_THROWS_CODE(_importerService->resetMigration(migrationId), DBException, 7800210);
|
||||
|
||||
_importerService->interruptMigration(migrationId);
|
||||
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
|
||||
ASSERT_EQ(_importerService->getState_forTest(), TenantFileImporterService::State::kInterrupted);
|
||||
|
||||
_importerService->resetMigration(migrationId);
|
||||
ASSERT(!_importerService->getMigrationId_forTest());
|
||||
|
||||
// Resetting migration again shouldn't throw.
|
||||
_importerService->resetMigration(migrationId);
|
||||
}
|
||||
|
||||
TEST_F(TenantFileImporterServiceTest, ImportsFilesWhenAllFilenamesLearned) {
|
||||
FailPointEnableBlock hangBeforeFileImporterThreadExit("hangBeforeFileImporterThreadExit");
|
||||
|
||||
auto fpSkipImportFiles = globalFailPointRegistry().find("skipImportFiles");
|
||||
const auto fpSkipImportFilesInitialTimesEntered =
|
||||
fpSkipImportFiles->toBSON()["timesEntered"].safeNumberLong();
|
||||
auto migrationId = UUID::gen();
|
||||
|
||||
const std::string fileName = "some-file.wt";
|
||||
std::string fileData = "Here is the file data";
|
||||
CursorResponse fileAggResponse(
|
||||
NamespaceString::makeCollectionlessAggregateNSS(DatabaseName::kAdmin),
|
||||
0 /* cursorId */,
|
||||
{BSON("byteOffset" << 0 << "endOfFile" << true << "data"
|
||||
<< BSONBinData(fileData.data(), fileData.size(), BinDataGeneral))});
|
||||
|
||||
_mockDonorServer->setCommandReply("aggregate", fileAggResponse.toBSONAsInitialResponse());
|
||||
|
||||
// Verify that the temp WT db path is empty before migration start.
|
||||
auto tempWTDirectory = fileClonerTempDir(migrationId);
|
||||
ASSERT(!boost::filesystem::exists(tempWTDirectory / fileName));
|
||||
|
||||
_importerService->startMigration(migrationId, kStartMigrationOpTime);
|
||||
_importerService->learnedFilename(migrationId,
|
||||
makefileMetaDoc(migrationId, fileName, fileData.size()));
|
||||
_importerService->learnedAllFilenames(migrationId);
|
||||
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
|
||||
ASSERT_EQ(_importerService->getState_forTest(),
|
||||
TenantFileImporterService::State::kLearnedAllFilenames);
|
||||
|
||||
hangBeforeFileImporterThreadExit->waitForTimesEntered(
|
||||
hangBeforeFileImporterThreadExit.initialTimesEntered() + 1);
|
||||
|
||||
// Verify that the files have been cloned successfully.
|
||||
ASSERT(boost::filesystem::exists(tempWTDirectory / fileName));
|
||||
ASSERT_EQ(fileData.size(), boost::filesystem::file_size(tempWTDirectory / fileName));
|
||||
|
||||
// Verify if the import files operation has been called.
|
||||
fpSkipImportFiles->waitForTimesEntered(fpSkipImportFilesInitialTimesEntered + 1);
|
||||
|
||||
// Check if the import done marker collection exists.
|
||||
ASSERT(collectionExists(makeOperationContext().get(), getImportDoneMarkerNs(migrationId)));
|
||||
|
||||
// Verify whether the node has notified the primary about the import success.
|
||||
ASSERT_EQ(runCmdOnPrimaryAndAwaitResponseFnCalls.size(), 1);
|
||||
auto recipientVoteImportedFilesCmdCall = runCmdOnPrimaryAndAwaitResponseFnCalls.front();
|
||||
ASSERT_EQ(recipientVoteImportedFilesCmdCall.dbName, DatabaseName::kAdmin);
|
||||
ASSERT_BSONOBJ_EQ(recipientVoteImportedFilesCmdCall.cmdObj,
|
||||
BSON("recipientVoteImportedFiles" << 1 << "migrationId" << migrationId
|
||||
<< "from"
|
||||
<< ":27017"));
|
||||
}
|
||||
|
||||
TEST_F(TenantFileImporterServiceTest, statsForInvalidMigrationID) {
|
||||
auto migrationId = UUID::gen();
|
||||
auto invalidMigrationID = UUID::gen();
|
||||
|
||||
_importerService->startMigration(migrationId, kStartMigrationOpTime);
|
||||
ASSERT_EQ(_importerService->getMigrationId_forTest(), migrationId);
|
||||
ASSERT_EQ(_importerService->getState_forTest(), TenantFileImporterService::State::kStarted);
|
||||
|
||||
auto stats = _importerService->getStats(invalidMigrationID);
|
||||
ASSERT_TRUE(stats.isEmpty());
|
||||
}
|
||||
|
||||
TEST_F(TenantFileImporterServiceTest, statsForValidMigrationID) {
|
||||
auto migrationId = UUID::gen();
|
||||
|
||||
const std::string file1Name = "some-file1.wt";
|
||||
std::string file1Data = "Here is the file1 data";
|
||||
CursorResponse file1AggResponse(
|
||||
NamespaceString::makeCollectionlessAggregateNSS(DatabaseName::kAdmin),
|
||||
0 /* cursorId */,
|
||||
{BSON("byteOffset" << 0 << "endOfFile" << true << "data"
|
||||
<< BSONBinData(file1Data.data(), file1Data.size(), BinDataGeneral))});
|
||||
|
||||
const std::string file2Name = "some-file2.wt";
|
||||
std::string file2Data = "Here is the file2 data";
|
||||
CursorResponse file2AggResponse(
|
||||
NamespaceString::makeCollectionlessAggregateNSS(DatabaseName::kAdmin),
|
||||
0 /* cursorId */,
|
||||
{BSON("byteOffset" << 0 << "endOfFile" << true << "data"
|
||||
<< BSONBinData(file2Data.data(), file2Data.size(), BinDataGeneral))});
|
||||
|
||||
_mockDonorServer->setCommandReply(
|
||||
"aggregate",
|
||||
{file1AggResponse.toBSONAsInitialResponse(), file2AggResponse.toBSONAsInitialResponse()});
|
||||
|
||||
const auto totalDataSize = file1Data.size() + file2Data.size();
|
||||
FailPointEnableBlock hangBeforeFileImporterThreadExit("hangBeforeFileImporterThreadExit");
|
||||
|
||||
// Verify that the stat is empty before migration start.
|
||||
auto stats = _importerService->getStats(migrationId);
|
||||
ASSERT(stats.isEmpty());
|
||||
|
||||
_importerService->startMigration(migrationId, kStartMigrationOpTime);
|
||||
// Sleep to prevent the race with "totalReceiveElapsedMillis" field.
|
||||
mongo::sleepmillis(1);
|
||||
|
||||
stats = _importerService->getStats(migrationId);
|
||||
ASSERT(!stats.isEmpty());
|
||||
ASSERT(stats.hasField("approxTotalDataSize"));
|
||||
ASSERT(stats.hasField("approxTotalBytesCopied"));
|
||||
ASSERT(stats.hasField("totalReceiveElapsedMillis"));
|
||||
ASSERT(stats.hasField("remainingReceiveEstimatedMillis"));
|
||||
ASSERT_EQ(stats["approxTotalDataSize"].safeNumberLong(), 0ll);
|
||||
ASSERT_EQ(stats["approxTotalBytesCopied"].safeNumberLong(), 0ll);
|
||||
ASSERT_GT(stats["totalReceiveElapsedMillis"].safeNumberLong(), 0ll);
|
||||
ASSERT_EQ(stats["remainingReceiveEstimatedMillis"].safeNumberLong(), 0ll);
|
||||
|
||||
{
|
||||
FailPointEnableBlock fpTenantFileClonerHangDuringFileCloneBackup(
|
||||
"TenantFileClonerHangDuringFileCloneBackup");
|
||||
|
||||
_importerService->learnedFilename(
|
||||
migrationId, makefileMetaDoc(migrationId, file1Name, file1Data.size()));
|
||||
_importerService->learnedFilename(
|
||||
migrationId, makefileMetaDoc(migrationId, file2Name, file2Data.size()));
|
||||
|
||||
fpTenantFileClonerHangDuringFileCloneBackup->waitForTimesEntered(
|
||||
fpTenantFileClonerHangDuringFileCloneBackup.initialTimesEntered() + 1);
|
||||
stats = _importerService->getStats(migrationId);
|
||||
ASSERT(!stats.isEmpty());
|
||||
ASSERT(stats.hasField("approxTotalDataSize"));
|
||||
ASSERT(stats.hasField("approxTotalBytesCopied"));
|
||||
ASSERT(stats.hasField("totalReceiveElapsedMillis"));
|
||||
ASSERT(stats.hasField("remainingReceiveEstimatedMillis"));
|
||||
ASSERT_EQ(stats["approxTotalDataSize"].safeNumberLong(), totalDataSize);
|
||||
ASSERT_EQ(stats["approxTotalBytesCopied"].safeNumberLong(), file1Data.size());
|
||||
ASSERT_GT(stats["totalReceiveElapsedMillis"].safeNumberLong(), 0ll);
|
||||
ASSERT_GT(stats["remainingReceiveEstimatedMillis"].safeNumberLong(), 0ll);
|
||||
}
|
||||
|
||||
|
||||
_importerService->learnedAllFilenames(migrationId);
|
||||
|
||||
hangBeforeFileImporterThreadExit->waitForTimesEntered(
|
||||
hangBeforeFileImporterThreadExit.initialTimesEntered() + 1);
|
||||
stats = _importerService->getStats(migrationId);
|
||||
ASSERT(!stats.isEmpty());
|
||||
ASSERT(stats.hasField("approxTotalDataSize"));
|
||||
ASSERT(stats.hasField("approxTotalBytesCopied"));
|
||||
ASSERT(stats.hasField("totalReceiveElapsedMillis"));
|
||||
ASSERT(stats.hasField("remainingReceiveEstimatedMillis"));
|
||||
ASSERT_EQ(stats["approxTotalDataSize"].safeNumberLong(), totalDataSize);
|
||||
ASSERT_EQ(stats["approxTotalBytesCopied"].safeNumberLong(), totalDataSize);
|
||||
ASSERT_GT(stats["totalReceiveElapsedMillis"].safeNumberLong(), 0ll);
|
||||
ASSERT_EQ(stats["remainingReceiveEstimatedMillis"].safeNumberLong(), 0ll);
|
||||
}
|
||||
|
||||
} // namespace repl
|
||||
} // namespace mongo
|
||||
|
|
@ -65,7 +65,6 @@
|
|||
#include "mongo/db/repl/tenant_migration_decoration.h"
|
||||
#include "mongo/db/repl/tenant_migration_donor_access_blocker.h"
|
||||
#include "mongo/db/repl/tenant_migration_recipient_access_blocker.h"
|
||||
#include "mongo/db/repl/tenant_migration_shard_merge_util.h"
|
||||
#include "mongo/db/repl/tenant_migration_state_machine_gen.h"
|
||||
#include "mongo/db/serverless/serverless_types_gen.h"
|
||||
#include "mongo/db/service_context.h"
|
||||
|
|
@ -123,11 +122,6 @@ bool recoverTenantMigrationRecipientAccessBlockers(OperationContext* opCtx,
|
|||
doc.getId());
|
||||
auto protocol = doc.getProtocol().value_or(MigrationProtocolEnum::kMultitenantMigrations);
|
||||
switch (protocol) {
|
||||
case MigrationProtocolEnum::kShardMerge:
|
||||
invariant(doc.getTenantIds());
|
||||
TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext())
|
||||
.add(*doc.getTenantIds(), mtab);
|
||||
break;
|
||||
case MigrationProtocolEnum::kMultitenantMigrations: {
|
||||
const auto tenantId = TenantId::parseFromString(doc.getTenantId());
|
||||
TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext())
|
||||
|
|
@ -178,16 +172,6 @@ bool recoverTenantMigrationDonorAccessBlockers(OperationContext* opCtx,
|
|||
const auto tenantId = TenantId::parseFromString(*doc.getTenantId());
|
||||
registry.add(tenantId, mtabVector.back());
|
||||
} break;
|
||||
case MigrationProtocolEnum::kShardMerge:
|
||||
invariant(doc.getTenantIds());
|
||||
// Add global access blocker to avoid any tenant creation during shard merge.
|
||||
registry.addGlobalDonorAccessBlocker(mtabVector.back());
|
||||
for (const auto& tenantId : *doc.getTenantIds()) {
|
||||
mtabVector.push_back(std::make_shared<TenantMigrationDonorAccessBlocker>(
|
||||
opCtx->getServiceContext(), doc.getId()));
|
||||
registry.add(tenantId, mtabVector.back());
|
||||
}
|
||||
break;
|
||||
default:
|
||||
MONGO_UNREACHABLE;
|
||||
}
|
||||
|
|
@ -227,63 +211,6 @@ bool recoverTenantMigrationDonorAccessBlockers(OperationContext* opCtx,
|
|||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool recoverShardMergeRecipientAccessBlockers(OperationContext* opCtx,
|
||||
const ShardMergeRecipientDocument& doc) {
|
||||
auto replCoord = repl::ReplicationCoordinator::get(getGlobalServiceContext());
|
||||
invariant(replCoord && replCoord->getSettings().isReplSet());
|
||||
|
||||
// If the initial syncing node (both FCBIS and logical initial sync) syncs from a sync source
|
||||
// that's in the middle of file copy/import phase of shard merge, it can cause the initial
|
||||
// syncing node to have only partial donor data. And, if this node went into initial sync (i.e,
|
||||
// resync) after it sent `recipientVoteImportedFiles` to the recipient primary, the primary
|
||||
// can commit the migration and cause permanent data loss on this node.
|
||||
if (replCoord->getMemberState().startup2() && !doc.getExpireAt()) {
|
||||
assertOnUnsafeInitialSync(doc.getId());
|
||||
}
|
||||
|
||||
// Do not create mtab for following cases. Otherwise, we can get into potential race
|
||||
// causing recovery procedure to fail with `ErrorCodes::ConflictingServerlessOperation`.
|
||||
// 1) The migration was skipped.
|
||||
if (doc.getStartGarbageCollect()) {
|
||||
invariant(doc.getState() == ShardMergeRecipientStateEnum::kAborted ||
|
||||
doc.getState() == ShardMergeRecipientStateEnum::kCommitted);
|
||||
return true;
|
||||
}
|
||||
// 2) Aborted state doc marked as garbage collectable.
|
||||
if (doc.getState() == ShardMergeRecipientStateEnum::kAborted && doc.getExpireAt()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
auto mtab = std::make_shared<TenantMigrationRecipientAccessBlocker>(opCtx->getServiceContext(),
|
||||
doc.getId());
|
||||
TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext())
|
||||
.add(doc.getTenantIds(), mtab);
|
||||
|
||||
switch (doc.getState()) {
|
||||
case ShardMergeRecipientStateEnum::kStarted:
|
||||
case ShardMergeRecipientStateEnum::kLearnedFilenames:
|
||||
break;
|
||||
case ShardMergeRecipientStateEnum::kConsistent:
|
||||
repl::shard_merge_utils::assertImportDoneMarkerLocalCollExistsOnMergeConsistent(
|
||||
opCtx, doc.getId());
|
||||
FMT_FALLTHROUGH;
|
||||
case ShardMergeRecipientStateEnum::kCommitted:
|
||||
if (doc.getExpireAt()) {
|
||||
mtab->stopBlockingTTL();
|
||||
}
|
||||
FMT_FALLTHROUGH;
|
||||
case ShardMergeRecipientStateEnum::kAborted:
|
||||
if (auto rejectTs = doc.getRejectReadsBeforeTimestamp()) {
|
||||
mtab->startRejectingReadsBefore(*rejectTs);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
MONGO_UNREACHABLE;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void assertOnUnsafeInitialSync(const UUID& migrationId) {
|
||||
|
|
@ -608,13 +535,6 @@ void recoverTenantMigrationAccessBlockers(OperationContext* opCtx) {
|
|||
recipientStore.forEach(opCtx, {}, [&](const TenantMigrationRecipientDocument& doc) {
|
||||
return recoverTenantMigrationRecipientAccessBlockers(opCtx, doc);
|
||||
});
|
||||
|
||||
PersistentTaskStore<ShardMergeRecipientDocument> mergeRecipientStore(
|
||||
NamespaceString::kShardMergeRecipientsNamespace);
|
||||
|
||||
mergeRecipientStore.forEach(opCtx, {}, [&](const ShardMergeRecipientDocument& doc) {
|
||||
return recoverShardMergeRecipientAccessBlockers(opCtx, doc);
|
||||
});
|
||||
}
|
||||
|
||||
template <typename MigrationConflictInfoType>
|
||||
|
|
|
|||
|
|
@ -121,16 +121,6 @@ TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveTenantMigrationTrueWithDon
|
|||
ASSERT(tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), kTenantDB));
|
||||
}
|
||||
|
||||
TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveShardMergeTrueWithDonor) {
|
||||
auto donorMtab =
|
||||
std::make_shared<TenantMigrationDonorAccessBlocker>(getServiceContext(), UUID::gen());
|
||||
TenantMigrationAccessBlockerRegistry::get(getServiceContext())
|
||||
.addGlobalDonorAccessBlocker(donorMtab);
|
||||
ASSERT_FALSE(
|
||||
tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), DatabaseName::kLocal));
|
||||
ASSERT(tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), kTenantDB));
|
||||
}
|
||||
|
||||
TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveTenantMigrationTrueWithRecipient) {
|
||||
auto recipientMtab =
|
||||
std::make_shared<TenantMigrationRecipientAccessBlocker>(getServiceContext(), UUID::gen());
|
||||
|
|
@ -151,21 +141,6 @@ TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveTenantMigrationTrueWithBot
|
|||
ASSERT(tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), kTenantDB));
|
||||
}
|
||||
|
||||
TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveShardMergeTrueWithBoth) {
|
||||
auto uuid = UUID::gen();
|
||||
auto recipientMtab =
|
||||
std::make_shared<TenantMigrationRecipientAccessBlocker>(getServiceContext(), uuid);
|
||||
TenantMigrationAccessBlockerRegistry::get(getServiceContext()).add(kTenantId, recipientMtab);
|
||||
|
||||
auto donorMtab = std::make_shared<TenantMigrationDonorAccessBlocker>(getServiceContext(), uuid);
|
||||
TenantMigrationAccessBlockerRegistry::get(getServiceContext())
|
||||
.addGlobalDonorAccessBlocker(donorMtab);
|
||||
// Access blocker do not impact ns without tenants.
|
||||
ASSERT_FALSE(
|
||||
tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), DatabaseName::kConfig));
|
||||
ASSERT(tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), kTenantDB));
|
||||
}
|
||||
|
||||
TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveTenantMigrationDonorFalseForNoDbName) {
|
||||
auto donorMtab =
|
||||
std::make_shared<TenantMigrationDonorAccessBlocker>(getServiceContext(), UUID::gen());
|
||||
|
|
@ -175,23 +150,6 @@ TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveTenantMigrationDonorFalseF
|
|||
tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), DatabaseName::kEmpty));
|
||||
}
|
||||
|
||||
TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveShardMergeDonorFalseForNoDbName) {
|
||||
auto donorMtab =
|
||||
std::make_shared<TenantMigrationDonorAccessBlocker>(getServiceContext(), UUID::gen());
|
||||
TenantMigrationAccessBlockerRegistry::get(getServiceContext())
|
||||
.addGlobalDonorAccessBlocker(donorMtab);
|
||||
ASSERT_FALSE(
|
||||
tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), DatabaseName::kEmpty));
|
||||
}
|
||||
|
||||
TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveShardMergeRecipientFalseForNoDbName) {
|
||||
auto recipientMtab =
|
||||
std::make_shared<TenantMigrationRecipientAccessBlocker>(getServiceContext(), UUID::gen());
|
||||
TenantMigrationAccessBlockerRegistry::get(getServiceContext()).add(kTenantId, recipientMtab);
|
||||
ASSERT_FALSE(
|
||||
tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), DatabaseName::kEmpty));
|
||||
}
|
||||
|
||||
TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveTenantMigrationFalseForUnrelatedDb) {
|
||||
auto recipientMtab =
|
||||
std::make_shared<TenantMigrationRecipientAccessBlocker>(getServiceContext(), UUID::gen());
|
||||
|
|
@ -231,38 +189,6 @@ TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveTenantMigrationFalseAfterR
|
|||
ASSERT_FALSE(tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), kTenantDB));
|
||||
}
|
||||
|
||||
TEST_F(TenantMigrationAccessBlockerUtilTest, HasActiveShardMergeFalseAfterRemoveWithBoth) {
|
||||
auto migrationId = UUID::gen();
|
||||
auto recipientMtab =
|
||||
std::make_shared<TenantMigrationRecipientAccessBlocker>(getServiceContext(), migrationId);
|
||||
TenantMigrationAccessBlockerRegistry::get(getServiceContext()).add(kTenantId, recipientMtab);
|
||||
|
||||
auto donorMtab =
|
||||
std::make_shared<TenantMigrationDonorAccessBlocker>(getServiceContext(), migrationId);
|
||||
TenantMigrationAccessBlockerRegistry::get(getServiceContext())
|
||||
.addGlobalDonorAccessBlocker(donorMtab);
|
||||
|
||||
ASSERT(tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), kTenantDB));
|
||||
ASSERT_FALSE(
|
||||
tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), DatabaseName::kAdmin));
|
||||
|
||||
// Remove donor, should still be a migration for the tenants migrating to the recipient.
|
||||
TenantMigrationAccessBlockerRegistry::get(getServiceContext())
|
||||
.removeAccessBlockersForMigration(migrationId,
|
||||
TenantMigrationAccessBlocker::BlockerType::kDonor);
|
||||
ASSERT(tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), kTenantDB));
|
||||
ASSERT_FALSE(
|
||||
tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), DatabaseName::kAdmin));
|
||||
|
||||
// Remove recipient, there should be no migration.
|
||||
TenantMigrationAccessBlockerRegistry::get(getServiceContext())
|
||||
.removeAccessBlockersForMigration(migrationId,
|
||||
TenantMigrationAccessBlocker::BlockerType::kRecipient);
|
||||
ASSERT_FALSE(tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), kTenantDB));
|
||||
ASSERT_FALSE(
|
||||
tenant_migration_access_blocker::hasActiveTenantMigration(opCtx(), DatabaseName::kAdmin));
|
||||
}
|
||||
|
||||
TEST_F(TenantMigrationAccessBlockerUtilTest, TestValidateNssBeingMigrated) {
|
||||
auto migrationId = UUID::gen();
|
||||
auto recipientMtab =
|
||||
|
|
@ -349,435 +275,4 @@ private:
|
|||
const repl::ReplSettings _replSettings = repl::createServerlessReplSettings();
|
||||
};
|
||||
|
||||
TEST_F(RecoverAccessBlockerTest, ShardMergeRecipientBlockerStarted) {
|
||||
ShardMergeRecipientDocument recipientDoc(kMigrationId,
|
||||
kDefaultDonorConnStr,
|
||||
_tenantIds,
|
||||
kDefaultStartMigrationTimestamp,
|
||||
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kStarted);
|
||||
|
||||
insertStateDocument(NamespaceString::kShardMergeRecipientsNamespace, recipientDoc.toBSON());
|
||||
|
||||
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx());
|
||||
|
||||
for (const auto& tenantId : _tenantIds) {
|
||||
auto mtab = TenantMigrationAccessBlockerRegistry::get(getServiceContext())
|
||||
.getTenantMigrationAccessBlockerForTenantId(
|
||||
tenantId, TenantMigrationAccessBlocker::BlockerType::kRecipient);
|
||||
ASSERT(mtab);
|
||||
auto cmdFuture = mtab->getCanRunCommandFuture(opCtx(), "dummyCmd");
|
||||
ASSERT_TRUE(cmdFuture.isReady());
|
||||
ASSERT_THROWS_CODE_AND_WHAT(
|
||||
cmdFuture.get(),
|
||||
DBException,
|
||||
ErrorCodes::IllegalOperation,
|
||||
"Tenant command 'dummyCmd' is not allowed before migration completes");
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(RecoverAccessBlockerTest, ShardMergeRecipientAbortedBeforeDataCopy) {
|
||||
ShardMergeRecipientDocument recipientDoc(kMigrationId,
|
||||
kDefaultDonorConnStr,
|
||||
_tenantIds,
|
||||
kDefaultStartMigrationTimestamp,
|
||||
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kAborted);
|
||||
recipientDoc.setStartGarbageCollect(true);
|
||||
|
||||
insertStateDocument(NamespaceString::kShardMergeRecipientsNamespace, recipientDoc.toBSON());
|
||||
|
||||
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx());
|
||||
|
||||
for (const auto& tenantId : _tenantIds) {
|
||||
auto mtab = TenantMigrationAccessBlockerRegistry::get(getServiceContext())
|
||||
.getTenantMigrationAccessBlockerForTenantId(
|
||||
tenantId, TenantMigrationAccessBlocker::BlockerType::kRecipient);
|
||||
ASSERT(!mtab);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(RecoverAccessBlockerTest, ShardMergeRecipientAbortedAfterDataCopy) {
|
||||
ShardMergeRecipientDocument recipientDoc(kMigrationId,
|
||||
kDefaultDonorConnStr,
|
||||
_tenantIds,
|
||||
kDefaultStartMigrationTimestamp,
|
||||
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kAborted);
|
||||
|
||||
insertStateDocument(NamespaceString::kShardMergeRecipientsNamespace, recipientDoc.toBSON());
|
||||
|
||||
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx());
|
||||
|
||||
for (const auto& tenantId : _tenantIds) {
|
||||
auto mtab = TenantMigrationAccessBlockerRegistry::get(getServiceContext())
|
||||
.getTenantMigrationAccessBlockerForTenantId(
|
||||
tenantId, TenantMigrationAccessBlocker::BlockerType::kRecipient);
|
||||
ASSERT(mtab);
|
||||
auto cmdFuture = mtab->getCanRunCommandFuture(opCtx(), "dummyCmd");
|
||||
ASSERT_TRUE(cmdFuture.isReady());
|
||||
ASSERT_THROWS_CODE_AND_WHAT(
|
||||
cmdFuture.get(),
|
||||
DBException,
|
||||
ErrorCodes::IllegalOperation,
|
||||
"Tenant command 'dummyCmd' is not allowed before migration completes");
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(RecoverAccessBlockerTest, ShardMergeRecipientCommittedWithoutDataCopy) {
|
||||
ShardMergeRecipientDocument recipientDoc(kMigrationId,
|
||||
kDefaultDonorConnStr,
|
||||
_tenantIds,
|
||||
kDefaultStartMigrationTimestamp,
|
||||
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kCommitted);
|
||||
recipientDoc.setStartGarbageCollect(true);
|
||||
|
||||
insertStateDocument(NamespaceString::kShardMergeRecipientsNamespace, recipientDoc.toBSON());
|
||||
|
||||
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx());
|
||||
|
||||
for (const auto& tenantId : _tenantIds) {
|
||||
auto mtab = TenantMigrationAccessBlockerRegistry::get(getServiceContext())
|
||||
.getTenantMigrationAccessBlockerForTenantId(
|
||||
tenantId, TenantMigrationAccessBlocker::BlockerType::kRecipient);
|
||||
ASSERT(!mtab);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(RecoverAccessBlockerTest, ShardMergeRecipientCommittedAfterDataCopy) {
|
||||
ShardMergeRecipientDocument recipientDoc(kMigrationId,
|
||||
kDefaultDonorConnStr,
|
||||
_tenantIds,
|
||||
kDefaultStartMigrationTimestamp,
|
||||
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kCommitted);
|
||||
|
||||
insertStateDocument(NamespaceString::kShardMergeRecipientsNamespace, recipientDoc.toBSON());
|
||||
|
||||
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx());
|
||||
|
||||
for (const auto& tenantId : _tenantIds) {
|
||||
auto mtab = TenantMigrationAccessBlockerRegistry::get(getServiceContext())
|
||||
.getTenantMigrationAccessBlockerForTenantId(
|
||||
tenantId, TenantMigrationAccessBlocker::BlockerType::kRecipient);
|
||||
ASSERT(mtab);
|
||||
auto cmdFuture = mtab->getCanRunCommandFuture(opCtx(), "dummyCmd");
|
||||
ASSERT_TRUE(cmdFuture.isReady());
|
||||
ASSERT_THROWS_CODE_AND_WHAT(
|
||||
cmdFuture.get(),
|
||||
DBException,
|
||||
ErrorCodes::IllegalOperation,
|
||||
"Tenant command 'dummyCmd' is not allowed before migration completes");
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(RecoverAccessBlockerTest, ShardMergeRecipientLearnedFiles) {
|
||||
ShardMergeRecipientDocument recipientDoc(kMigrationId,
|
||||
kDefaultDonorConnStr,
|
||||
_tenantIds,
|
||||
kDefaultStartMigrationTimestamp,
|
||||
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kLearnedFilenames);
|
||||
|
||||
insertStateDocument(NamespaceString::kShardMergeRecipientsNamespace, recipientDoc.toBSON());
|
||||
|
||||
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx());
|
||||
|
||||
for (const auto& tenantId : _tenantIds) {
|
||||
auto mtab = TenantMigrationAccessBlockerRegistry::get(getServiceContext())
|
||||
.getTenantMigrationAccessBlockerForTenantId(
|
||||
tenantId, TenantMigrationAccessBlocker::BlockerType::kRecipient);
|
||||
ASSERT(mtab);
|
||||
auto cmdFuture = mtab->getCanRunCommandFuture(opCtx(), "dummyCmd");
|
||||
ASSERT_TRUE(cmdFuture.isReady());
|
||||
ASSERT_THROWS_CODE_AND_WHAT(
|
||||
cmdFuture.get(),
|
||||
DBException,
|
||||
ErrorCodes::IllegalOperation,
|
||||
"Tenant command 'dummyCmd' is not allowed before migration completes");
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(RecoverAccessBlockerTest, ShardMergeRecipientConsistent) {
|
||||
ShardMergeRecipientDocument recipientDoc(kMigrationId,
|
||||
kDefaultDonorConnStr,
|
||||
_tenantIds,
|
||||
kDefaultStartMigrationTimestamp,
|
||||
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kConsistent);
|
||||
// Create the import done marker collection.
|
||||
ASSERT_OK(createCollection(
|
||||
opCtx(), CreateCommand(repl::shard_merge_utils::getImportDoneMarkerNs(kMigrationId))));
|
||||
|
||||
insertStateDocument(NamespaceString::kShardMergeRecipientsNamespace, recipientDoc.toBSON());
|
||||
|
||||
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx());
|
||||
|
||||
for (const auto& tenantId : _tenantIds) {
|
||||
auto mtab = TenantMigrationAccessBlockerRegistry::get(getServiceContext())
|
||||
.getTenantMigrationAccessBlockerForTenantId(
|
||||
tenantId, TenantMigrationAccessBlocker::BlockerType::kRecipient);
|
||||
ASSERT(mtab);
|
||||
auto cmdFuture = mtab->getCanRunCommandFuture(opCtx(), "dummyCmd");
|
||||
ASSERT_TRUE(cmdFuture.isReady());
|
||||
ASSERT_THROWS_CODE_AND_WHAT(
|
||||
cmdFuture.get(),
|
||||
DBException,
|
||||
ErrorCodes::IllegalOperation,
|
||||
"Tenant command 'dummyCmd' is not allowed before migration completes");
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(RecoverAccessBlockerTest, ShardMergeRecipientRejectBeforeTimestamp) {
|
||||
ShardMergeRecipientDocument recipientDoc(kMigrationId,
|
||||
kDefaultDonorConnStr,
|
||||
_tenantIds,
|
||||
kDefaultStartMigrationTimestamp,
|
||||
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kCommitted);
|
||||
recipientDoc.setRejectReadsBeforeTimestamp(Timestamp{20, 1});
|
||||
|
||||
insertStateDocument(NamespaceString::kShardMergeRecipientsNamespace, recipientDoc.toBSON());
|
||||
|
||||
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx());
|
||||
|
||||
for (const auto& tenantId : _tenantIds) {
|
||||
auto mtab = TenantMigrationAccessBlockerRegistry::get(getServiceContext())
|
||||
.getTenantMigrationAccessBlockerForTenantId(
|
||||
tenantId, TenantMigrationAccessBlocker::BlockerType::kRecipient);
|
||||
ASSERT(mtab);
|
||||
|
||||
repl::ReadConcernArgs::get(opCtx()) =
|
||||
repl::ReadConcernArgs(repl::ReadConcernLevel::kMajorityReadConcern);
|
||||
auto cmdFuture = mtab->getCanRunCommandFuture(opCtx(), "dummyCmd");
|
||||
ASSERT_OK(cmdFuture.getNoThrow());
|
||||
|
||||
repl::ReadConcernArgs::get(opCtx()) =
|
||||
repl::ReadConcernArgs(repl::ReadConcernLevel::kSnapshotReadConcern);
|
||||
repl::ReadConcernArgs::get(opCtx()).setArgsAtClusterTimeForSnapshot(Timestamp{15, 1});
|
||||
auto cmdFutureAtClusterTime = mtab->getCanRunCommandFuture(opCtx(), "dummyCmd");
|
||||
ASSERT_TRUE(cmdFutureAtClusterTime.isReady());
|
||||
ASSERT_THROWS_CODE_AND_WHAT(
|
||||
cmdFutureAtClusterTime.get(),
|
||||
DBException,
|
||||
ErrorCodes::SnapshotTooOld,
|
||||
"Tenant command 'dummyCmd' is not allowed before migration completes");
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(RecoverAccessBlockerTest, InitialSyncUsingSyncSourceRunningShardMergeImportAsserts) {
|
||||
ShardMergeRecipientDocument recipientDoc(UUID::gen(),
|
||||
kDefaultDonorConnStr,
|
||||
_tenantIds,
|
||||
kDefaultStartMigrationTimestamp,
|
||||
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kLearnedFilenames);
|
||||
|
||||
insertStateDocument(NamespaceString::kShardMergeRecipientsNamespace, recipientDoc.toBSON());
|
||||
|
||||
// Simulate the node is in initial sync.
|
||||
ASSERT_OK(_replMock->setFollowerMode(repl::MemberState::RS_STARTUP2));
|
||||
|
||||
ASSERT_THROWS_CODE_AND_WHAT(
|
||||
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx()),
|
||||
DBException,
|
||||
ErrorCodes::TenantMigrationInProgress,
|
||||
"Illegal to run initial sync when shard merge is active");
|
||||
}
|
||||
|
||||
TEST_F(RecoverAccessBlockerTest, SyncSourceCompletesShardMergeBeforeInitialSyncStart) {
|
||||
ShardMergeRecipientDocument recipientDoc(kMigrationId,
|
||||
kDefaultDonorConnStr,
|
||||
_tenantIds,
|
||||
kDefaultStartMigrationTimestamp,
|
||||
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kCommitted);
|
||||
recipientDoc.setExpireAt(opCtx()->getServiceContext()->getFastClockSource()->now());
|
||||
|
||||
insertStateDocument(NamespaceString::kShardMergeRecipientsNamespace, recipientDoc.toBSON());
|
||||
|
||||
// Simulate the node is in initial sync.
|
||||
ASSERT_OK(_replMock->setFollowerMode(repl::MemberState::RS_STARTUP2));
|
||||
|
||||
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx());
|
||||
}
|
||||
|
||||
DEATH_TEST_REGEX_F(RecoverAccessBlockerTest,
|
||||
ShardMergeRecipientConsistentStateWithoutImportDoneMarkerCollectionFasserts,
|
||||
"Fatal assertion.*7219902") {
|
||||
ShardMergeRecipientDocument recipientDoc(UUID::gen(),
|
||||
kDefaultDonorConnStr,
|
||||
_tenantIds,
|
||||
kDefaultStartMigrationTimestamp,
|
||||
ReadPreferenceSetting(ReadPreference::PrimaryOnly));
|
||||
recipientDoc.setState(ShardMergeRecipientStateEnum::kConsistent);
|
||||
|
||||
insertStateDocument(NamespaceString::kShardMergeRecipientsNamespace, recipientDoc.toBSON());
|
||||
|
||||
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx());
|
||||
}
|
||||
|
||||
TEST_F(RecoverAccessBlockerTest, ShardMergeDonorAbortingIndex) {
|
||||
TenantMigrationDonorDocument donorDoc(
|
||||
kMigrationId,
|
||||
kDefaultRecipientConnStr,
|
||||
mongo::ReadPreferenceSetting(ReadPreference::PrimaryOnly));
|
||||
|
||||
donorDoc.setProtocol(MigrationProtocolEnum::kShardMerge);
|
||||
donorDoc.setTenantIds(_tenantIds);
|
||||
donorDoc.setState(TenantMigrationDonorStateEnum::kAbortingIndexBuilds);
|
||||
|
||||
insertStateDocument(NamespaceString::kTenantMigrationDonorsNamespace, donorDoc.toBSON());
|
||||
|
||||
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx());
|
||||
|
||||
for (const auto& tenantId : _tenantIds) {
|
||||
auto mtab = TenantMigrationAccessBlockerRegistry::get(getServiceContext())
|
||||
.getTenantMigrationAccessBlockerForTenantId(
|
||||
tenantId, TenantMigrationAccessBlocker::BlockerType::kDonor);
|
||||
ASSERT(mtab);
|
||||
|
||||
auto cmdFuture = mtab->getCanRunCommandFuture(opCtx(), "dummyCmd");
|
||||
ASSERT_TRUE(cmdFuture.isReady());
|
||||
ASSERT_OK(cmdFuture.getNoThrow());
|
||||
|
||||
ASSERT_OK(mtab->checkIfCanWrite(Timestamp{10, 1}));
|
||||
|
||||
auto indexStatus = mtab->checkIfCanBuildIndex();
|
||||
ASSERT_EQ(indexStatus.code(), ErrorCodes::TenantMigrationConflict);
|
||||
auto migrationConflictInfo = indexStatus.extraInfo<TenantMigrationConflictInfo>();
|
||||
ASSERT_EQ(migrationConflictInfo->getMigrationId(), kMigrationId);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(RecoverAccessBlockerTest, ShardMergeDonorBlocking) {
|
||||
TenantMigrationDonorDocument donorDoc(
|
||||
kMigrationId,
|
||||
kDefaultRecipientConnStr,
|
||||
mongo::ReadPreferenceSetting(ReadPreference::PrimaryOnly));
|
||||
|
||||
donorDoc.setProtocol(MigrationProtocolEnum::kShardMerge);
|
||||
donorDoc.setTenantIds(_tenantIds);
|
||||
donorDoc.setState(TenantMigrationDonorStateEnum::kBlocking);
|
||||
donorDoc.setBlockTimestamp(Timestamp{100, 1});
|
||||
|
||||
insertStateDocument(NamespaceString::kTenantMigrationDonorsNamespace, donorDoc.toBSON());
|
||||
|
||||
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx());
|
||||
|
||||
for (const auto& tenantId : _tenantIds) {
|
||||
auto mtab = TenantMigrationAccessBlockerRegistry::get(getServiceContext())
|
||||
.getTenantMigrationAccessBlockerForTenantId(
|
||||
tenantId, TenantMigrationAccessBlocker::BlockerType::kDonor);
|
||||
ASSERT(mtab);
|
||||
|
||||
repl::ReadConcernArgs::get(opCtx()) =
|
||||
repl::ReadConcernArgs(repl::ReadConcernLevel::kMajorityReadConcern);
|
||||
auto cmdFuture = mtab->getCanRunCommandFuture(opCtx(), "dummyCmd");
|
||||
ASSERT_TRUE(cmdFuture.isReady());
|
||||
ASSERT_OK(cmdFuture.getNoThrow());
|
||||
|
||||
repl::ReadConcernArgs::get(opCtx()) =
|
||||
repl::ReadConcernArgs(repl::ReadConcernLevel::kSnapshotReadConcern);
|
||||
repl::ReadConcernArgs::get(opCtx()).setArgsAtClusterTimeForSnapshot(Timestamp{101, 1});
|
||||
auto afterCmdFuture = mtab->getCanRunCommandFuture(opCtx(), "dummyCmd");
|
||||
ASSERT_FALSE(afterCmdFuture.isReady());
|
||||
|
||||
ASSERT_EQ(mtab->checkIfCanWrite(Timestamp{101, 1}).code(),
|
||||
ErrorCodes::TenantMigrationConflict);
|
||||
|
||||
auto indexStatus = mtab->checkIfCanBuildIndex();
|
||||
ASSERT_EQ(indexStatus.code(), ErrorCodes::TenantMigrationConflict);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(RecoverAccessBlockerTest, ShardMergeDonorCommitted) {
|
||||
TenantMigrationDonorDocument donorDoc(
|
||||
kMigrationId,
|
||||
kDefaultRecipientConnStr,
|
||||
mongo::ReadPreferenceSetting(ReadPreference::PrimaryOnly));
|
||||
|
||||
donorDoc.setProtocol(MigrationProtocolEnum::kShardMerge);
|
||||
donorDoc.setTenantIds(_tenantIds);
|
||||
donorDoc.setState(TenantMigrationDonorStateEnum::kCommitted);
|
||||
donorDoc.setBlockTimestamp(Timestamp{100, 1});
|
||||
donorDoc.setCommitOrAbortOpTime(repl::OpTime{Timestamp{101, 1}, 2});
|
||||
|
||||
insertStateDocument(NamespaceString::kTenantMigrationDonorsNamespace, donorDoc.toBSON());
|
||||
_replMock->setCurrentCommittedSnapshotOpTime(repl::OpTime{Timestamp{101, 1}, 2});
|
||||
|
||||
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx());
|
||||
|
||||
for (const auto& tenantId : _tenantIds) {
|
||||
auto mtab = TenantMigrationAccessBlockerRegistry::get(getServiceContext())
|
||||
.getTenantMigrationAccessBlockerForTenantId(
|
||||
tenantId, TenantMigrationAccessBlocker::BlockerType::kDonor);
|
||||
ASSERT(mtab);
|
||||
|
||||
repl::ReadConcernArgs::get(opCtx()) =
|
||||
repl::ReadConcernArgs(repl::ReadConcernLevel::kSnapshotReadConcern);
|
||||
repl::ReadConcernArgs::get(opCtx()).setArgsAtClusterTimeForSnapshot(Timestamp{90, 1});
|
||||
auto cmdFuture = mtab->getCanRunCommandFuture(opCtx(), "dummyCmd");
|
||||
ASSERT_TRUE(cmdFuture.isReady());
|
||||
ASSERT_OK(cmdFuture.getNoThrow());
|
||||
|
||||
repl::ReadConcernArgs::get(opCtx()) =
|
||||
repl::ReadConcernArgs(repl::ReadConcernLevel::kSnapshotReadConcern);
|
||||
repl::ReadConcernArgs::get(opCtx()).setArgsAtClusterTimeForSnapshot(Timestamp{102, 1});
|
||||
auto afterCmdFuture = mtab->getCanRunCommandFuture(opCtx(), "dummyCmd");
|
||||
ASSERT_TRUE(afterCmdFuture.isReady());
|
||||
ASSERT_EQ(afterCmdFuture.getNoThrow().code(), ErrorCodes::TenantMigrationCommitted);
|
||||
|
||||
ASSERT_EQ(mtab->checkIfCanWrite(Timestamp{102, 1}).code(),
|
||||
ErrorCodes::TenantMigrationCommitted);
|
||||
|
||||
auto indexStatus = mtab->checkIfCanBuildIndex();
|
||||
ASSERT_EQ(indexStatus.code(), ErrorCodes::TenantMigrationCommitted);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(RecoverAccessBlockerTest, ShardMergeDonorAborted) {
|
||||
TenantMigrationDonorDocument donorDoc(
|
||||
kMigrationId,
|
||||
kDefaultRecipientConnStr,
|
||||
mongo::ReadPreferenceSetting(ReadPreference::PrimaryOnly));
|
||||
|
||||
donorDoc.setProtocol(MigrationProtocolEnum::kShardMerge);
|
||||
donorDoc.setTenantIds(_tenantIds);
|
||||
donorDoc.setState(TenantMigrationDonorStateEnum::kAborted);
|
||||
donorDoc.setBlockTimestamp(Timestamp{100, 1});
|
||||
donorDoc.setCommitOrAbortOpTime(repl::OpTime{Timestamp{101, 1}, 2});
|
||||
|
||||
insertStateDocument(NamespaceString::kTenantMigrationDonorsNamespace, donorDoc.toBSON());
|
||||
_replMock->setCurrentCommittedSnapshotOpTime(repl::OpTime{Timestamp{101, 1}, 2});
|
||||
|
||||
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx());
|
||||
|
||||
for (const auto& tenantId : _tenantIds) {
|
||||
auto mtab = TenantMigrationAccessBlockerRegistry::get(getServiceContext())
|
||||
.getTenantMigrationAccessBlockerForTenantId(
|
||||
tenantId, TenantMigrationAccessBlocker::BlockerType::kDonor);
|
||||
ASSERT(mtab);
|
||||
|
||||
repl::ReadConcernArgs::get(opCtx()) =
|
||||
repl::ReadConcernArgs(repl::ReadConcernLevel::kSnapshotReadConcern);
|
||||
repl::ReadConcernArgs::get(opCtx()).setArgsAtClusterTimeForSnapshot(Timestamp{90, 1});
|
||||
auto cmdFuture = mtab->getCanRunCommandFuture(opCtx(), "dummyCmd");
|
||||
ASSERT_TRUE(cmdFuture.isReady());
|
||||
ASSERT_OK(cmdFuture.getNoThrow());
|
||||
|
||||
repl::ReadConcernArgs::get(opCtx()) =
|
||||
repl::ReadConcernArgs(repl::ReadConcernLevel::kSnapshotReadConcern);
|
||||
repl::ReadConcernArgs::get(opCtx()).setArgsAtClusterTimeForSnapshot(Timestamp{102, 1});
|
||||
auto afterCmdFuture = mtab->getCanRunCommandFuture(opCtx(), "dummyCmd");
|
||||
ASSERT_TRUE(afterCmdFuture.isReady());
|
||||
ASSERT_OK(afterCmdFuture.getNoThrow());
|
||||
|
||||
ASSERT_OK(mtab->checkIfCanWrite(Timestamp{102, 1}));
|
||||
|
||||
ASSERT_OK(mtab->checkIfCanBuildIndex());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace mongo
|
||||
|
|
|
|||
|
|
@ -94,9 +94,6 @@ void onTransitionToAbortingIndexBuilds(OperationContext* opCtx,
|
|||
const auto tenantId = TenantId::parseFromString(*donorStateDoc.getTenantId());
|
||||
TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext()).add(tenantId, mtab);
|
||||
} else {
|
||||
tassert(6448702,
|
||||
"Bad protocol",
|
||||
donorStateDoc.getProtocol() == MigrationProtocolEnum::kShardMerge);
|
||||
invariant(donorStateDoc.getTenantIds());
|
||||
|
||||
auto& registry = TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext());
|
||||
|
|
|
|||
|
|
@ -196,7 +196,7 @@ public:
|
|||
|
||||
/** Returns true if we should retry sending SyncData given the error */
|
||||
bool recordAndEvaluateRetry(Status status) override {
|
||||
if (_protocol == MigrationProtocolEnum::kShardMerge || status.isOK()) {
|
||||
if (status.isOK()) {
|
||||
return false;
|
||||
}
|
||||
auto underlyingError = async_rpc::unpackRPCStatusIgnoringWriteConcernAndWriteErrors(status);
|
||||
|
|
@ -270,7 +270,6 @@ void TenantMigrationDonorService::checkIfConflictsWithOtherInstances(
|
|||
BSONObj initialState,
|
||||
const std::vector<const repl::PrimaryOnlyService::Instance*>& existingInstances) {
|
||||
auto stateDoc = tenant_migration_access_blocker::parseDonorStateDocument(initialState);
|
||||
auto isNewShardMerge = stateDoc.getProtocol() == MigrationProtocolEnum::kShardMerge;
|
||||
|
||||
for (auto& instance : existingInstances) {
|
||||
auto existingTypedInstance =
|
||||
|
|
@ -282,12 +281,11 @@ void TenantMigrationDonorService::checkIfConflictsWithOtherInstances(
|
|||
|
||||
uassert(ErrorCodes::ConflictingOperationInProgress,
|
||||
str::stream() << "Cannot start a shard merge with existing migrations in progress",
|
||||
!isNewShardMerge || existingIsAborted);
|
||||
existingIsAborted);
|
||||
|
||||
uassert(
|
||||
ErrorCodes::ConflictingOperationInProgress,
|
||||
str::stream() << "Cannot start a migration with an existing shard merge in progress",
|
||||
existingTypedInstance->getProtocol() != MigrationProtocolEnum::kShardMerge ||
|
||||
existingIsAborted);
|
||||
|
||||
// Any existing migration for this tenant must be aborted and garbage-collectable.
|
||||
|
|
@ -492,12 +490,11 @@ void TenantMigrationDonorService::Instance::checkIfOptionsConflict(const BSONObj
|
|||
|
||||
auto tenantIdsMatch = [&] {
|
||||
switch (_protocol) {
|
||||
case MigrationProtocolEnum::kShardMerge:
|
||||
invariant(stateDoc.getTenantIds());
|
||||
return *stateDoc.getTenantIds() == _tenantIds;
|
||||
case MigrationProtocolEnum::kMultitenantMigrations:
|
||||
invariant(stateDoc.getTenantId());
|
||||
return *stateDoc.getTenantId() == _tenantId;
|
||||
default:
|
||||
MONGO_UNREACHABLE;
|
||||
}
|
||||
MONGO_UNREACHABLE;
|
||||
};
|
||||
|
|
@ -921,11 +918,6 @@ ExecutorFuture<void> TenantMigrationDonorService::Instance::_sendRecipientForget
|
|||
|
||||
void TenantMigrationDonorService::Instance::validateTenantIdsForProtocol() {
|
||||
switch (_protocol) {
|
||||
case MigrationProtocolEnum::kShardMerge:
|
||||
uassert(ErrorCodes::InvalidOptions,
|
||||
"The field tenantIds must be set and not empty for protocol 'shard merge'",
|
||||
!_tenantIds.empty());
|
||||
break;
|
||||
case MigrationProtocolEnum::kMultitenantMigrations:
|
||||
uassert(ErrorCodes::InvalidOptions,
|
||||
"The field tenantIds must not be set for protocol 'multitenant migration'",
|
||||
|
|
@ -1314,10 +1306,6 @@ TenantMigrationDonorService::Instance::_waitUntilStartMigrationDonorTimestampIsC
|
|||
const std::shared_ptr<executor::ScopedTaskExecutor>& executor,
|
||||
const CancellationToken& abortToken) {
|
||||
|
||||
if (getProtocol() != MigrationProtocolEnum::kShardMerge) {
|
||||
return ExecutorFuture(**executor);
|
||||
}
|
||||
|
||||
auto opCtxHolder = cc().makeOperationContext();
|
||||
auto opCtx = opCtxHolder.get();
|
||||
auto startMigrationDonorTimestamp = [&] {
|
||||
|
|
|
|||
|
|
@ -50,7 +50,6 @@ inline constexpr StringData kMigrationIdFieldName = "migrationId"_sd;
|
|||
inline constexpr StringData kBackupIdFieldName = "backupId"_sd;
|
||||
inline constexpr StringData kDonorHostNameFieldName = "donorHostName"_sd;
|
||||
inline constexpr StringData kDonorDbPathFieldName = "dbpath"_sd;
|
||||
inline constexpr StringData kMovingFilesMarker = ".shardMergeMovingFiles"_sd;
|
||||
inline constexpr StringData kTableExtension = ".wt"_sd;
|
||||
|
||||
// Keep the backup cursor alive by pinging twice as often as the donor's default
|
||||
|
|
|
|||
|
|
@ -181,13 +181,6 @@ inline Status validateProtocolFCVCompatibility(
|
|||
if (!protocol)
|
||||
return Status::OK();
|
||||
|
||||
if (*protocol == MigrationProtocolEnum::kShardMerge &&
|
||||
!repl::feature_flags::gShardMerge.isEnabled(
|
||||
serverGlobalParams.featureCompatibility.acquireFCVSnapshot())) {
|
||||
return Status(ErrorCodes::IllegalOperation,
|
||||
str::stream() << "protocol '" << MigrationProtocol_serializer(*protocol)
|
||||
<< "' not supported");
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -162,28 +162,6 @@ void ServerlessOperationLockRegistry::recoverLocks(OperationContext* opCtx) {
|
|||
|
||||
return true;
|
||||
});
|
||||
|
||||
PersistentTaskStore<ShardMergeRecipientDocument> mergeRecipientStore(
|
||||
NamespaceString::kShardMergeRecipientsNamespace);
|
||||
mergeRecipientStore.forEach(opCtx, {}, [&](const ShardMergeRecipientDocument& doc) {
|
||||
// Do not acquire locks for following cases. Otherwise, we can get into potential race
|
||||
// causing recovery procedure to fail with `ErrorCodes::ConflictingServerlessOperation`.
|
||||
// 1) The migration was skipped.
|
||||
if (doc.getStartGarbageCollect()) {
|
||||
invariant(doc.getState() == ShardMergeRecipientStateEnum::kAborted ||
|
||||
doc.getState() == ShardMergeRecipientStateEnum::kCommitted);
|
||||
return true;
|
||||
}
|
||||
// 2) State doc marked as garbage collectable.
|
||||
if (doc.getExpireAt()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
registry.acquireLock(ServerlessOperationLockRegistry::LockType::kMergeRecipient,
|
||||
doc.getId());
|
||||
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
const std::string kOperationLockFieldName = "operationLock";
|
||||
|
|
|
|||
|
|
@ -1,66 +0,0 @@
|
|||
\* Config file to run the TLC model-checker on ShardMerge.tla.
|
||||
\* See ShardMerge.tla for instructions.
|
||||
|
||||
CONSTANTS DonorStartMigrationRequest = DonorStartMigrationRequest
|
||||
CONSTANTS DonorStartMigrationResponse = DonorStartMigrationResponse
|
||||
|
||||
CONSTANTS RecipientSyncDataReturnAfterPinningRequest = RecipientSyncDataReturnAfterPinningRequest
|
||||
CONSTANTS RecipientSyncDataReturnAfterPinningResponse = RecipientSyncDataReturnAfterPinningResponse
|
||||
|
||||
CONSTANTS RecipientSyncDataRequest = RecipientSyncDataRequest
|
||||
CONSTANTS RecipientSyncDataResponse = RecipientSyncDataResponse
|
||||
|
||||
CONSTANTS RecipientSyncDataReturnAfterReachingDonorTimestampRequest = RecipientSyncDataReturnAfterReachingDonorTimestampRequest
|
||||
CONSTANTS RecipientSyncDataReturnAfterReachingDonorTimestampResponse = RecipientSyncDataReturnAfterReachingDonorTimestampResponse
|
||||
|
||||
CONSTANTS DonorForgetMigrationRequest = DonorForgetMigrationRequest
|
||||
CONSTANTS DonorForgetMigrationResponse = DonorForgetMigrationResponse
|
||||
|
||||
CONSTANTS RecipientForgetMigrationRequest = RecipientForgetMigrationRequest
|
||||
CONSTANTS RecipientForgetMigrationResponse = RecipientForgetMigrationResponse
|
||||
|
||||
CONSTANTS RecUninitialized = RecUninitialized
|
||||
CONSTANTS RecPinned = RecPinned
|
||||
CONSTANTS RecStarted = RecStarted
|
||||
CONSTANTS RecConsistent = RecConsistent
|
||||
CONSTANTS RecLagged = RecLagged
|
||||
CONSTANTS RecReady = RecReady
|
||||
CONSTANTS RecAborted = RecAborted
|
||||
CONSTANTS RecDone = RecDone
|
||||
|
||||
CONSTANTS DonUninitialized = DonUninitialized
|
||||
CONSTANTS DonAbortingIndexBuilds = DonAbortingIndexBuilds
|
||||
CONSTANTS DonPinning = DonPinning
|
||||
CONSTANTS DonDataSync = DonDataSync
|
||||
CONSTANTS DonBlocking = DonBlocking
|
||||
CONSTANTS DonCommitted = DonCommitted
|
||||
CONSTANTS DonAborted = DonAborted
|
||||
CONSTANTS DonDone = DonDone
|
||||
|
||||
CONSTANTS CloudUnknown = CloudUnknown
|
||||
CONSTANTS CloudCommitted = CloudCommitted
|
||||
CONSTANTS CloudAborted = CloudAborted
|
||||
CONSTANTS CloudDone = CloudDone
|
||||
|
||||
CONSTANTS MigrationNone = MigrationNone
|
||||
CONSTANTS MigrationCommitted = MigrationCommitted
|
||||
CONSTANTS MigrationAborted = MigrationAborted
|
||||
|
||||
CONSTANTS SyncOK = SyncOK
|
||||
CONSTANTS SyncAborted = SyncAborted
|
||||
|
||||
CONSTANT MaxRequests = 8
|
||||
|
||||
INVARIANT StateMachinesConsistent
|
||||
|
||||
PROPERTY MigrationEventuallyCompletes
|
||||
PROPERTY MessageBagEventuallyEmpties
|
||||
PROPERTY EachRequestHasAResponse
|
||||
|
||||
\* Not configurable.
|
||||
CONSTRAINT StateConstraint
|
||||
SPECIFICATION Spec
|
||||
|
||||
\* The spec can terminate without a deadlock. The liveness properties are present to ensure the
|
||||
\* termination states are correct.
|
||||
CHECK_DEADLOCK FALSE
|
||||
|
|
@ -1,15 +0,0 @@
|
|||
---- MODULE MCShardMerge ----
|
||||
\* This module defines MCShardMerge.tla constants/constraints for model-checking.
|
||||
|
||||
EXTENDS ShardMerge
|
||||
|
||||
CONSTANT MaxRequests
|
||||
|
||||
(**************************************************************************************************)
|
||||
(* State Constraint. Used for model checking only. *)
|
||||
(**************************************************************************************************)
|
||||
|
||||
StateConstraint ==
|
||||
MaxRequests > totalRequests
|
||||
|
||||
=============================================================================
|
||||
|
|
@ -1,542 +0,0 @@
|
|||
\* Copyright 2021-present MongoDB, Inc.
|
||||
\*
|
||||
\* This work is licensed under:
|
||||
\* - Creative Commons Attribution-3.0 United States License
|
||||
\* http://creativecommons.org/licenses/by/3.0/us/
|
||||
|
||||
----------------------------- MODULE ShardMerge -----------------------------
|
||||
\*
|
||||
\* A specification of serverless MongoDB's shard merge protocol.
|
||||
\*
|
||||
\* To run the model-checker, first edit the constants in MCShardMerge.cfg if desired,
|
||||
\* then:
|
||||
\* cd src/mongo/db/repl/tla_plus
|
||||
\* ./model-check.sh ShardMerge
|
||||
\*
|
||||
|
||||
EXTENDS Integers, FiniteSets, Sequences, TLC
|
||||
|
||||
\* Donor command requests and responses
|
||||
CONSTANTS DonorStartMigrationRequest, DonorStartMigrationResponse
|
||||
CONSTANTS DonorForgetMigrationRequest, DonorForgetMigrationResponse
|
||||
|
||||
\* recipientSyncData command with returnAfterPinningOldestTimestamp.
|
||||
CONSTANTS RecipientSyncDataReturnAfterPinningRequest, RecipientSyncDataReturnAfterPinningResponse
|
||||
\* recipientSyncData command with no special params.
|
||||
CONSTANTS RecipientSyncDataRequest, RecipientSyncDataResponse
|
||||
\* recipientSyncData command with returnAfterReachingDonorTimestamp.
|
||||
CONSTANTS RecipientSyncDataReturnAfterReachingDonorTimestampRequest, RecipientSyncDataReturnAfterReachingDonorTimestampResponse
|
||||
CONSTANTS RecipientForgetMigrationRequest, RecipientForgetMigrationResponse
|
||||
|
||||
\* Recipient states. The happy path is:
|
||||
\* Uninitialized->Pinned->Started->Consistent->Lagged->Ready->Done.
|
||||
CONSTANTS RecUninitialized, RecPinned, RecStarted, RecConsistent, RecLagged, RecReady, RecAborted, RecDone
|
||||
|
||||
\* Donor states. The happy path is:
|
||||
\* Uninit->AbortingIndexBuilds->Pinning->DataSync->Blocking->Committed->Done.
|
||||
CONSTANTS DonUninitialized, DonAbortingIndexBuilds, DonPinning, DonDataSync, DonBlocking, DonCommitted, DonAborted, DonDone
|
||||
|
||||
\* cloud state
|
||||
CONSTANTS CloudUnknown, CloudCommitted, CloudAborted, CloudDone
|
||||
|
||||
\* Responses to DonorStartMigration request
|
||||
CONSTANTS MigrationNone, MigrationCommitted, MigrationAborted
|
||||
|
||||
\* Responses to RecipientSyncData* requests
|
||||
CONSTANTS SyncOK, SyncAborted
|
||||
|
||||
(**************************************************************************************************)
|
||||
(* Global variables *)
|
||||
(**************************************************************************************************)
|
||||
|
||||
VARIABLE messages
|
||||
VARIABLE recipientState
|
||||
VARIABLE donorState
|
||||
VARIABLE cloudState
|
||||
VARIABLE totalRequests
|
||||
VARIABLE totalResponses
|
||||
VARIABLE recipientAborted
|
||||
|
||||
donorVars == <<donorState>>
|
||||
recipientVars == <<recipientState, recipientAborted>>
|
||||
cloudVars == <<cloudState>>
|
||||
messageVars == <<messages, totalRequests, totalResponses>>
|
||||
vars == <<donorVars, recipientVars, cloudVars, messageVars>>
|
||||
|
||||
-------------------------------------------------------------------------------------------
|
||||
|
||||
(**************************************************************************************************)
|
||||
(* Network Helpers, adapted from https://github.com/ongardie/raft.tla/blob/master/raft.tla *)
|
||||
(**************************************************************************************************)
|
||||
|
||||
\* Helper for Send. Given a message m and bag of messages, return a new bag of messages with one
|
||||
\* more m in it.
|
||||
WithMessage(m, msgs) ==
|
||||
IF m \in DOMAIN msgs THEN
|
||||
[msgs EXCEPT ![m] = msgs[m] + 1]
|
||||
ELSE
|
||||
msgs @@ (m :> 1)
|
||||
|
||||
\* Helper for Discard and Reply. Given a message m and bag of messages, return a new bag of
|
||||
\* messages with one less m in it.
|
||||
WithoutMessage(m, msgs) ==
|
||||
IF m \in DOMAIN msgs THEN
|
||||
IF msgs[m] = 1 THEN
|
||||
\* Remove message m from the bag.
|
||||
[n \in DOMAIN msgs \ {m} |-> msgs[n]]
|
||||
ELSE
|
||||
[msgs EXCEPT ![m] = msgs[m] - 1]
|
||||
ELSE
|
||||
msgs
|
||||
|
||||
IsRequest(m) ==
|
||||
m.mType \in {DonorStartMigrationRequest, RecipientSyncDataReturnAfterPinningRequest,
|
||||
RecipientSyncDataRequest, RecipientSyncDataReturnAfterReachingDonorTimestampRequest,
|
||||
DonorForgetMigrationRequest, RecipientForgetMigrationRequest}
|
||||
|
||||
IncTotalMessages(m) ==
|
||||
IF IsRequest(m) THEN
|
||||
/\ totalRequests' = totalRequests + 1
|
||||
/\ UNCHANGED <<totalResponses>>
|
||||
ELSE
|
||||
/\ totalResponses' = totalResponses + 1
|
||||
/\ UNCHANGED <<totalRequests>>
|
||||
|
||||
\* Add a message to the bag of messages.
|
||||
Send(m) ==
|
||||
/\ messages' = WithMessage(m, messages)
|
||||
/\ IncTotalMessages(m)
|
||||
|
||||
\* Remove a message from the bag of messages. Used when a server is done processing a message.
|
||||
Discard(m) ==
|
||||
/\ messages' = WithoutMessage(m, messages)
|
||||
/\ UNCHANGED <<totalRequests, totalResponses>>
|
||||
|
||||
\* Helper that both sends a message and discards a message.
|
||||
SendAndDiscard(sendMessage, discardMessage) ==
|
||||
/\ messages' = WithoutMessage(discardMessage, WithMessage(sendMessage, messages))
|
||||
/\ IncTotalMessages(sendMessage)
|
||||
|
||||
(**************************************************************************************************)
|
||||
(* Request and response handlers *)
|
||||
(**************************************************************************************************)
|
||||
|
||||
\* Helper to create the donorStartMigration response based on the donor state.
|
||||
DonorStartMigrationResponseGen ==
|
||||
CASE donorState = DonAborted ->
|
||||
[mType |-> DonorStartMigrationResponse,
|
||||
mOutcome |-> MigrationAborted]
|
||||
[] donorState = DonCommitted ->
|
||||
[mType |-> DonorStartMigrationResponse,
|
||||
mOutcome |-> MigrationCommitted]
|
||||
[] donorState \in {DonUninitialized, DonAbortingIndexBuilds, DonPinning, DonDataSync,
|
||||
DonBlocking, DonDone} ->
|
||||
[mType |-> DonorStartMigrationResponse,
|
||||
mOutcome |-> MigrationNone]
|
||||
|
||||
\* Donor
|
||||
HandleDonorStartMigrationRequest(m) ==
|
||||
/\ m.mType = DonorStartMigrationRequest
|
||||
\* If the donor is unstarted, it starts, otherwise nothing happens. Either way sends a response
|
||||
\* to cloud.
|
||||
/\ CASE donorState = DonUninitialized ->
|
||||
/\ donorState' = DonAbortingIndexBuilds
|
||||
\* Send an immediate response to cloud.
|
||||
/\ SendAndDiscard(DonorStartMigrationResponseGen, m)
|
||||
[] donorState \in {DonAbortingIndexBuilds, DonPinning, DonDataSync, DonBlocking,
|
||||
DonCommitted, DonAborted, DonDone} ->
|
||||
/\ SendAndDiscard(DonorStartMigrationResponseGen, m)
|
||||
/\ UNCHANGED <<donorVars>>
|
||||
/\ UNCHANGED <<recipientVars, cloudVars, totalRequests>>
|
||||
|
||||
\* Cloud
|
||||
HandleDonorStartMigrationResponse(m) ==
|
||||
/\ m.mType = DonorStartMigrationResponse
|
||||
\* Updates the cloud state to whatever the donor specifies, if specified.
|
||||
/\ CASE m.mOutcome = MigrationNone ->
|
||||
UNCHANGED <<cloudState>>
|
||||
[] m.mOutcome = MigrationCommitted ->
|
||||
cloudState' = CloudCommitted
|
||||
[] m.mOutcome = MigrationAborted ->
|
||||
cloudState' = CloudAborted
|
||||
/\ Discard(m)
|
||||
/\ UNCHANGED <<donorVars, recipientVars>>
|
||||
|
||||
\* Helper to generate the mSyncStatus field of a recipient response
|
||||
RecipientSyncStatusGen == IF recipientAborted THEN SyncAborted ELSE SyncOK
|
||||
|
||||
\* Recipient
|
||||
HandleRecipientSyncDataReturnAfterPinningRequest(m) ==
|
||||
/\ m.mType = RecipientSyncDataReturnAfterPinningRequest
|
||||
/\ CASE recipientState = RecUninitialized ->
|
||||
recipientState' = RecPinned
|
||||
[] recipientState \in {RecPinned, RecStarted, RecConsistent,
|
||||
RecLagged, RecReady, RecAborted, RecDone} ->
|
||||
UNCHANGED recipientState
|
||||
/\ SendAndDiscard([mType |-> RecipientSyncDataReturnAfterPinningResponse,
|
||||
mSyncStatus |-> RecipientSyncStatusGen], m)
|
||||
/\ UNCHANGED <<recipientAborted, donorVars, cloudVars>>
|
||||
|
||||
\* Factored out of below to make nested Case statements clearer.
|
||||
HandleRecipientSyncDataReturnAfterPinningResponse_SyncOK(m) ==
|
||||
CASE donorState = DonPinning ->
|
||||
\* Move the state machine to "data sync" and send RecipientSyncData
|
||||
/\ donorState' = DonDataSync
|
||||
/\ SendAndDiscard([mType |-> RecipientSyncDataRequest], m)
|
||||
[] donorState \in {DonDataSync, DonBlocking, DonCommitted, DonAborted, DonDone} ->
|
||||
\* Just ignore this message, since we're past this step in the protocol
|
||||
\* and this is a delayed message.
|
||||
/\ Discard(m)
|
||||
/\ UNCHANGED <<donorState>>
|
||||
|
||||
\* Factored out of below to make nested Case statements clearer.
|
||||
HandleRecipientSyncDataReturnAfterPinningResponse_SyncAborted(m) ==
|
||||
/\ CASE donorState = DonPinning ->
|
||||
\* The recipient failed the migration, so abort.
|
||||
donorState' = DonAborted
|
||||
[] donorState \in {DonDataSync, DonBlocking, DonAborted, DonDone} ->
|
||||
\* Delayed response to an earlier message, ignore it.
|
||||
UNCHANGED <<donorState>>
|
||||
/\ Discard(m)
|
||||
|
||||
\* Donor
|
||||
HandleRecipientSyncDataReturnAfterPinningResponse(m) ==
|
||||
/\ m.mType = RecipientSyncDataReturnAfterPinningResponse
|
||||
/\ CASE m.mSyncStatus = SyncOK ->
|
||||
HandleRecipientSyncDataReturnAfterPinningResponse_SyncOK(m)
|
||||
[] m.mSyncStatus = SyncAborted ->
|
||||
HandleRecipientSyncDataReturnAfterPinningResponse_SyncAborted(m)
|
||||
/\ UNCHANGED <<recipientVars, cloudVars>>
|
||||
|
||||
\* Recipient
|
||||
HandleRecipientSyncDataRequest(m) ==
|
||||
/\ m.mType = RecipientSyncDataRequest
|
||||
\* Don't handle messages until we transition to consistent, or abort.
|
||||
/\ recipientState # RecStarted
|
||||
/\ Assert(recipientState # RecUninitialized,
|
||||
"Received RecipientSyncData in state "
|
||||
\o ToString(recipientState))
|
||||
/\ CASE recipientState = RecPinned ->
|
||||
\* Starts the migration. The recipient does not respond to the donor until it is
|
||||
\* consistent.
|
||||
/\ recipientState' = RecStarted
|
||||
/\ Discard(m)
|
||||
/\ UNCHANGED <<recipientAborted>>
|
||||
[] recipientState # RecPinned ->
|
||||
/\ SendAndDiscard([mType |-> RecipientSyncDataResponse,
|
||||
mSyncStatus |-> RecipientSyncStatusGen], m)
|
||||
/\ UNCHANGED <<recipientVars>>
|
||||
/\ UNCHANGED <<donorVars, cloudVars>>
|
||||
|
||||
\* Factored out of below to make nested Case statements clearer.
|
||||
HandleRecipientSyncDataResponse_SyncOK(m) ==
|
||||
/\ CASE donorState = DonDataSync ->
|
||||
\* Move the state machine to "blocking" and send RecipientSyncDataReturnAfterReachingDonorTimestamp.
|
||||
/\ donorState' = DonBlocking
|
||||
/\ SendAndDiscard([mType |-> RecipientSyncDataReturnAfterReachingDonorTimestampRequest], m)
|
||||
[] donorState \in {DonBlocking, DonCommitted, DonAborted, DonDone} ->
|
||||
\* Just ignore this message, since we're past this step in the protocol
|
||||
\* and this is a delayed message.
|
||||
/\ Discard(m)
|
||||
/\ UNCHANGED <<donorState>>
|
||||
|
||||
\* Factored out of below to make nested Case statements clearer.
|
||||
HandleRecipientSyncDataResponse_SyncAborted(m) ==
|
||||
/\ CASE donorState \in {DonDataSync, DonBlocking} ->
|
||||
\* The recipient failed the migration, so abort.
|
||||
\* We can get this response in Blocking when there are two
|
||||
\* RecipientSyncData responses and the "OK" one is processed first.
|
||||
donorState' = DonAborted
|
||||
[] donorState \in {DonCommitted, DonAborted, DonDone} ->
|
||||
\* The migration is already finished, do nothing.
|
||||
UNCHANGED <<donorState>>
|
||||
/\ Discard(m)
|
||||
|
||||
\* Donor
|
||||
HandleRecipientSyncDataResponse(m) ==
|
||||
/\ m.mType = RecipientSyncDataResponse
|
||||
/\ Assert(donorState \notin {DonUninitialized, DonPinning},
|
||||
"Received RecipientSyncDataResponse in state "
|
||||
\o ToString(donorState))
|
||||
/\ CASE m.mSyncStatus = SyncOK ->
|
||||
HandleRecipientSyncDataResponse_SyncOK(m)
|
||||
[] m.mSyncStatus = SyncAborted ->
|
||||
HandleRecipientSyncDataResponse_SyncAborted(m)
|
||||
/\ UNCHANGED <<recipientVars, cloudVars>>
|
||||
|
||||
\* Recipient
|
||||
HandleRecipientSyncDataReturnAfterReachingDonorTimestampRequest(m) ==
|
||||
/\ m.mType = RecipientSyncDataReturnAfterReachingDonorTimestampRequest
|
||||
\* We don't want to handle this request being processed while lagged, since that would
|
||||
\* require modeling request joining behavior, which is unnecessary complexity for the
|
||||
\* purposes of this model. A RecipientSyncDataReturnAfterReachingDonorTimestamp request being
|
||||
\* processed while in RecLagged must be a duplicate message.
|
||||
/\ recipientState \notin {RecLagged}
|
||||
/\ CASE recipientState = RecConsistent ->
|
||||
\* Move the state machine to "lagged", since the recipient now knows the ending
|
||||
\* timestamp. The recipient does not respond to the donor until it has caught up.
|
||||
/\ recipientState' = RecLagged
|
||||
/\ Discard(m)
|
||||
/\ UNCHANGED <<recipientAborted>>
|
||||
[] recipientState # RecConsistent ->
|
||||
/\ SendAndDiscard([mType |-> RecipientSyncDataReturnAfterReachingDonorTimestampResponse,
|
||||
mSyncStatus |-> RecipientSyncStatusGen], m)
|
||||
/\ UNCHANGED <<recipientVars>>
|
||||
/\ UNCHANGED <<donorVars, cloudVars>>
|
||||
|
||||
\* Factored out of below to make nested Case statements clearer.
|
||||
HandleRecipientSyncDataReturnAfterReachingDonorTimestampResponse_SyncOK ==
|
||||
CASE donorState = DonBlocking ->
|
||||
\* The recipient is done!
|
||||
donorState' = DonCommitted
|
||||
[] donorState \in {DonCommitted, DonAborted, DonDone} ->
|
||||
\* Just ignore this message, since we're past this step in the protocol
|
||||
\* and this is a delayed message.
|
||||
UNCHANGED <<donorState>>
|
||||
|
||||
\* Factored out of below to make nested Case statements clearer.
|
||||
HandleRecipientSyncDataReturnAfterReachingDonorTimestampResponse_SyncAborted ==
|
||||
CASE donorState = DonBlocking ->
|
||||
\* The recipient failed the migration, so abort.
|
||||
donorState' = DonAborted
|
||||
[] donorState \in {DonAborted, DonDone} ->
|
||||
\* If the migration is already aborted or finished, do nothing.
|
||||
UNCHANGED <<donorState>>
|
||||
\* Donor
|
||||
HandleRecipientSyncDataReturnAfterReachingDonorTimestampResponse(m) ==
|
||||
/\ m.mType = RecipientSyncDataReturnAfterReachingDonorTimestampResponse
|
||||
/\ CASE m.mSyncStatus = SyncOK ->
|
||||
HandleRecipientSyncDataReturnAfterReachingDonorTimestampResponse_SyncOK
|
||||
[] m.mSyncStatus = SyncAborted ->
|
||||
HandleRecipientSyncDataReturnAfterReachingDonorTimestampResponse_SyncAborted
|
||||
/\ Discard(m)
|
||||
/\ UNCHANGED <<recipientVars, cloudVars>>
|
||||
|
||||
\* Donor
|
||||
HandleDonorForgetMigrationRequest(m) ==
|
||||
/\ m.mType = DonorForgetMigrationRequest
|
||||
\* Don't mark donor finished until recipient is.
|
||||
/\ SendAndDiscard([mType |-> RecipientForgetMigrationRequest], m)
|
||||
/\ UNCHANGED <<donorVars, recipientVars, cloudVars>>
|
||||
|
||||
\* Cloud
|
||||
HandleDonorForgetMigrationResponse(m) ==
|
||||
/\ m.mType = DonorForgetMigrationResponse
|
||||
\* The donor and recipient unconditionally finish the migration, so cloud can too.
|
||||
/\ cloudState' = CloudDone
|
||||
/\ Discard(m)
|
||||
/\ UNCHANGED <<donorVars, recipientVars>>
|
||||
|
||||
\* Recipient
|
||||
HandleRecipientForgetMigrationRequest(m) ==
|
||||
/\ m.mType = RecipientForgetMigrationRequest
|
||||
\* Finish the migration no matter what, and tell the donor.
|
||||
/\ recipientState' = RecDone
|
||||
/\ SendAndDiscard([mType |-> RecipientForgetMigrationResponse], m)
|
||||
/\ UNCHANGED <<donorVars, cloudVars, recipientAborted>>
|
||||
|
||||
\* Donor
|
||||
HandleRecipientForgetMigrationResponse(m) ==
|
||||
/\ m.mType = RecipientForgetMigrationResponse
|
||||
\* The recipient has finished the migration, so now the donor can finish the migration and
|
||||
\* respond to cloud that it has finished the migration.
|
||||
/\ donorState' = DonDone
|
||||
/\ SendAndDiscard([mType |-> DonorForgetMigrationResponse], m)
|
||||
/\ UNCHANGED <<recipientVars, cloudVars>>
|
||||
|
||||
|
||||
(******************************************************************************)
|
||||
(* [ACTION] *)
|
||||
(******************************************************************************)
|
||||
|
||||
DonorAbortsIndexBuilds ==
|
||||
/\ donorState = DonAbortingIndexBuilds
|
||||
/\ donorState' = DonPinning
|
||||
\* Call recipientSyncData with returnAfterPinningOldestTimestamp.
|
||||
/\ Send([mType |-> RecipientSyncDataReturnAfterPinningRequest])
|
||||
/\ UNCHANGED <<totalResponses, recipientVars, cloudVars>>
|
||||
|
||||
\* Models a retry of recipientSyncData with returnAfterPinningOldestTimestamp.
|
||||
DonorSendsRecipientSyncDataReturnAfterPinningRequest ==
|
||||
/\ donorState = DonPinning
|
||||
/\ Send([mType |-> RecipientSyncDataReturnAfterPinningRequest])
|
||||
/\ UNCHANGED <<donorVars, recipientVars, cloudVars>>
|
||||
|
||||
\* Models the first try or a retry of recipientSyncData.
|
||||
DonorSendsRecipientSyncDataRequest ==
|
||||
/\ donorState = DonDataSync
|
||||
/\ Send([mType |-> RecipientSyncDataRequest])
|
||||
/\ UNCHANGED <<donorVars, recipientVars, cloudVars>>
|
||||
|
||||
\* Models a retry of RecipientSyncDataReturnAfterReachingDonorTimestamp.
|
||||
DonorSendsRecipientSyncDataReturnAfterReachingDonorTimestampRequest ==
|
||||
/\ donorState = DonBlocking
|
||||
/\ Send([mType |-> RecipientSyncDataReturnAfterReachingDonorTimestampRequest])
|
||||
/\ UNCHANGED <<donorVars, recipientVars, cloudVars>>
|
||||
|
||||
CloudSendsDonorStartMigrationRequest ==
|
||||
/\ cloudState = CloudUnknown
|
||||
/\ Send([mType |-> DonorStartMigrationRequest])
|
||||
/\ UNCHANGED <<donorVars, recipientVars, cloudVars>>
|
||||
|
||||
CloudSendsDonorForgetMigrationRequest ==
|
||||
/\ cloudState \in {CloudAborted, CloudCommitted}
|
||||
/\ Send([mType |-> DonorForgetMigrationRequest])
|
||||
/\ UNCHANGED <<donorVars, recipientVars, cloudVars>>
|
||||
|
||||
RecipientBecomesConsistent ==
|
||||
/\ recipientState = RecStarted
|
||||
/\ recipientState' = RecConsistent
|
||||
/\ Send([mType |-> RecipientSyncDataResponse,
|
||||
mSyncStatus |-> RecipientSyncStatusGen])
|
||||
/\ UNCHANGED <<donorVars, cloudVars, recipientAborted>>
|
||||
|
||||
RecipientCatchesUp ==
|
||||
/\ recipientState = RecLagged
|
||||
/\ recipientState' = RecReady
|
||||
/\ Send([mType |-> RecipientSyncDataReturnAfterReachingDonorTimestampResponse,
|
||||
mSyncStatus |-> RecipientSyncStatusGen])
|
||||
/\ UNCHANGED <<donorVars, cloudVars, recipientAborted>>
|
||||
|
||||
RecipientFailsMigration ==
|
||||
\* Recipient can't fail after it's ready, finished, or already aborted.
|
||||
/\ recipientState \notin {RecUninitialized, RecReady, RecAborted, RecDone}
|
||||
/\ recipientState' = RecAborted
|
||||
/\ recipientAborted' = TRUE
|
||||
/\ CASE recipientState = RecStarted ->
|
||||
\* The recipient has an active RecipientSyncData request.
|
||||
Send([mType |-> RecipientSyncDataResponse,
|
||||
mSyncStatus |-> SyncAborted])
|
||||
[] recipientState = RecLagged ->
|
||||
\* When "lagged" the recipient has an active RecipientSyncDataReturnAfterReachingDonorTimestamp request.
|
||||
Send([mType |-> RecipientSyncDataReturnAfterReachingDonorTimestampResponse,
|
||||
mSyncStatus |-> SyncAborted])
|
||||
[] recipientState \in {RecUninitialized, RecPinned, RecConsistent} ->
|
||||
\* No active donor request.
|
||||
UNCHANGED <<messageVars>>
|
||||
/\ UNCHANGED <<cloudVars, donorVars>>
|
||||
|
||||
(**************************************************************************************************)
|
||||
(* Correctness Properties *)
|
||||
(**************************************************************************************************)
|
||||
|
||||
StateMachinesInconsistent ==
|
||||
\/ /\ cloudState = CloudCommitted
|
||||
/\ \/ recipientState \notin {RecReady, RecDone}
|
||||
\/ recipientAborted = TRUE
|
||||
\/ donorState \notin {DonCommitted, DonDone}
|
||||
\/ /\ donorState = DonCommitted
|
||||
/\ \/ recipientState \notin {RecReady, RecDone}
|
||||
\/ recipientAborted = TRUE
|
||||
|
||||
StateMachinesConsistent == ~StateMachinesInconsistent
|
||||
|
||||
(**************************************************************************************************)
|
||||
(* Liveness properties *)
|
||||
(**************************************************************************************************)
|
||||
|
||||
\* Checks that the state machines eventually converge on terminating states.
|
||||
MigrationEventuallyCompletes ==
|
||||
<> /\ recipientState = RecDone
|
||||
/\ donorState = DonDone
|
||||
/\ cloudState = CloudDone
|
||||
|
||||
\* Checks that if the bag fills up, it eventually empties.
|
||||
MessageBagEventuallyEmpties ==
|
||||
Cardinality(DOMAIN messages) > 0 ~> Cardinality(DOMAIN messages) = 0
|
||||
|
||||
\* Checks that the number of totalRequests eventually equals the number of totalResponses,
|
||||
\* and stays that way. This will always be right before termination.
|
||||
EachRequestHasAResponse ==
|
||||
<>[] (totalRequests = totalResponses)
|
||||
|
||||
(**************************************************************************************************)
|
||||
(* Spec definition *)
|
||||
(**************************************************************************************************)
|
||||
Init ==
|
||||
/\ messages = [m \in {} |-> 0]
|
||||
/\ donorState = DonUninitialized
|
||||
/\ recipientState = RecUninitialized
|
||||
/\ cloudState = CloudUnknown
|
||||
/\ totalRequests = 0
|
||||
/\ totalResponses = 0
|
||||
/\ recipientAborted = FALSE
|
||||
|
||||
RecipientBecomesConsistentAction == RecipientBecomesConsistent
|
||||
RecipientCatchesUpAction == RecipientCatchesUp
|
||||
RecipientFailsMigrationAction == RecipientFailsMigration
|
||||
CloudSendsDonorStartMigrationRequestAction == CloudSendsDonorStartMigrationRequest
|
||||
CloudSendsDonorForgetMigrationRequestAction == CloudSendsDonorForgetMigrationRequest
|
||||
DonorAbortsIndexBuildsAction == DonorAbortsIndexBuilds
|
||||
DonorSendsRecipientSyncDataReturnAfterPinningRequestAction == DonorSendsRecipientSyncDataReturnAfterPinningRequest
|
||||
DonorSendsRecipientSyncDataRequestAction == DonorSendsRecipientSyncDataRequest
|
||||
DonorSendsRecipientSyncDataReturnAfterReachingDonorTimestampRequestAction == DonorSendsRecipientSyncDataReturnAfterReachingDonorTimestampRequest
|
||||
|
||||
ReceiveDonorStartMigrationRequestAction == \E m \in DOMAIN messages :
|
||||
HandleDonorStartMigrationRequest(m)
|
||||
ReceiveDonorStartMigrationResponseAction == \E m \in DOMAIN messages :
|
||||
HandleDonorStartMigrationResponse(m)
|
||||
ReceiveRecipientSyncDataReturnAfterPinningRequestAction == \E m \in DOMAIN messages :
|
||||
HandleRecipientSyncDataReturnAfterPinningRequest(m)
|
||||
ReceiveRecipientSyncDataReturnAfterPinningResponseAction == \E m \in DOMAIN messages :
|
||||
HandleRecipientSyncDataReturnAfterPinningResponse(m)
|
||||
ReceiveRecipientSyncDataRequestAction == \E m \in DOMAIN messages :
|
||||
HandleRecipientSyncDataRequest(m)
|
||||
ReceiveRecipientSyncDataResponseAction == \E m \in DOMAIN messages :
|
||||
HandleRecipientSyncDataResponse(m)
|
||||
ReceiveRecipientSyncDataReturnAfterReachingDonorTimestampRequestAction == \E m \in DOMAIN messages :
|
||||
HandleRecipientSyncDataReturnAfterReachingDonorTimestampRequest(m)
|
||||
ReceiveRecipientSyncDataReturnAfterReachingDonorTimestampResponseAction == \E m \in DOMAIN messages :
|
||||
HandleRecipientSyncDataReturnAfterReachingDonorTimestampResponse(m)
|
||||
ReceiveDonorForgetMigrationRequestAction == \E m \in DOMAIN messages :
|
||||
HandleDonorForgetMigrationRequest(m)
|
||||
ReceiveDonorForgetMigrationResponseAction == \E m \in DOMAIN messages :
|
||||
HandleDonorForgetMigrationResponse(m)
|
||||
ReceiveRecipientForgetMigrationRequestAction == \E m \in DOMAIN messages :
|
||||
HandleRecipientForgetMigrationRequest(m)
|
||||
ReceiveRecipientForgetMigrationResponseAction == \E m \in DOMAIN messages :
|
||||
HandleRecipientForgetMigrationResponse(m)
|
||||
|
||||
Next ==
|
||||
\/ RecipientBecomesConsistentAction
|
||||
\/ RecipientCatchesUpAction
|
||||
\/ RecipientFailsMigrationAction
|
||||
\/ CloudSendsDonorStartMigrationRequestAction
|
||||
\/ CloudSendsDonorForgetMigrationRequestAction
|
||||
\/ DonorAbortsIndexBuildsAction
|
||||
\/ DonorSendsRecipientSyncDataReturnAfterPinningRequestAction
|
||||
\/ DonorSendsRecipientSyncDataRequestAction
|
||||
\/ DonorSendsRecipientSyncDataReturnAfterReachingDonorTimestampRequestAction
|
||||
\/ ReceiveRecipientSyncDataReturnAfterPinningRequestAction
|
||||
\/ ReceiveRecipientSyncDataReturnAfterPinningResponseAction
|
||||
\/ ReceiveDonorStartMigrationRequestAction
|
||||
\/ ReceiveDonorStartMigrationResponseAction
|
||||
\/ ReceiveRecipientSyncDataRequestAction
|
||||
\/ ReceiveRecipientSyncDataResponseAction
|
||||
\/ ReceiveRecipientSyncDataReturnAfterReachingDonorTimestampRequestAction
|
||||
\/ ReceiveRecipientSyncDataReturnAfterReachingDonorTimestampResponseAction
|
||||
\/ ReceiveDonorForgetMigrationRequestAction
|
||||
\/ ReceiveDonorForgetMigrationResponseAction
|
||||
\/ ReceiveRecipientForgetMigrationRequestAction
|
||||
\/ ReceiveRecipientForgetMigrationResponseAction
|
||||
|
||||
\* Add fairness constraints so the above liveness properties are met.
|
||||
Liveness ==
|
||||
/\ WF_vars(ReceiveDonorStartMigrationRequestAction)
|
||||
/\ WF_vars(ReceiveDonorStartMigrationResponseAction)
|
||||
/\ WF_vars(ReceiveRecipientSyncDataReturnAfterPinningRequestAction)
|
||||
/\ WF_vars(ReceiveRecipientSyncDataReturnAfterPinningResponseAction)
|
||||
/\ WF_vars(ReceiveRecipientSyncDataRequestAction)
|
||||
/\ WF_vars(ReceiveRecipientSyncDataResponseAction)
|
||||
/\ WF_vars(ReceiveRecipientSyncDataReturnAfterReachingDonorTimestampRequestAction)
|
||||
/\ WF_vars(ReceiveRecipientSyncDataReturnAfterReachingDonorTimestampResponseAction)
|
||||
/\ WF_vars(ReceiveDonorForgetMigrationRequestAction)
|
||||
/\ WF_vars(ReceiveDonorForgetMigrationResponseAction)
|
||||
/\ WF_vars(ReceiveRecipientForgetMigrationRequestAction)
|
||||
/\ WF_vars(ReceiveRecipientForgetMigrationResponseAction)
|
||||
/\ WF_vars(CloudSendsDonorStartMigrationRequestAction)
|
||||
/\ WF_vars(CloudSendsDonorForgetMigrationRequestAction)
|
||||
|
||||
Spec == Init /\ [][Next]_vars /\ Liveness
|
||||
|
||||
=============================================================================
|
||||
Loading…
Reference in New Issue