SERVER-108646 Introduce new passthrough suites for testing that server-based clients properly retry load-shedding errors (#43235)

GitOrigin-RevId: 491e1061ab4fe5f96e88541a426be7b9463f8f80
This commit is contained in:
Guillaume Racicot 2025-11-24 13:17:05 -05:00 committed by MongoDB Bot
parent 3e7ced3b9e
commit ec7f11e6dd
9 changed files with 669 additions and 2 deletions

1
.github/CODEOWNERS vendored
View File

@ -250,6 +250,7 @@ WORKSPACE.bazel @10gen/devprod-build @svc-auto-approve-bot
/buildscripts/resmokeconfig/suites/**/concurrency_simultaneous_replication.yml @10gen/server-replication-reviewers @svc-auto-approve-bot
/buildscripts/resmokeconfig/suites/**/change_streams* @10gen/query-execution-change-streams @svc-auto-approve-bot
/buildscripts/resmokeconfig/suites/**/sharding_change_streams_v2.yml @10gen/query-execution-change-streams @svc-auto-approve-bot
/buildscripts/resmokeconfig/suites/**/*rate_limited.yml @10gen/server-workload-resilience @svc-auto-approve-bot
# The following patterns are parsed from ./buildscripts/resmokelib/OWNERS.yml
/buildscripts/resmokelib/ @10gen/devprod-correctness @svc-auto-approve-bot

View File

@ -198,3 +198,6 @@ filters:
- "sharding_change_streams_v2.yml":
approvers:
- 10gen/query-execution-change-streams
- "*rate_limited.yml":
approvers:
- 10gen/server-workload-resilience

View File

@ -0,0 +1,342 @@
# Based on multi_shard_multi_stmt_txn_jscore_passthrough. This suite tests the js core with commands
# running in a multi statement transaction, and also with a simulated rate limiter enabled to reject
# random requests. The purpose of this passthrough is to add coverage on the retryability support in
# various process internal clients. With a high amount of maximum retries, no errors should bubble
# up to the driver, even with a balance present.
test_kind: multi_stmt_txn_passthrough
selector:
roots:
- jstests/core/**/*.js
exclude_files:
# These tests run in the jscore_txn passthrough suites.
- jstests/core/txns/**/*.js
# The following tests fail because a certain command or functionality is not supported by
# mongos. This command or functionality is placed in a comment next to the failing test.
- jstests/core/**/apitest_db.js # serverStatus output doesn't have storageEngine.
- jstests/core/**/awaitdata_getmore_cmd.js # capped collections.
- jstests/core/**/bypass_doc_validation.js # sharded $out output not permitted
- jstests/core/**/check_shard_index.js # checkShardingIndex.
- jstests/core/**/compact_keeps_indexes.js # compact.
- jstests/core/**/currentop.js # uses fsync.
- jstests/core/**/dbhash.js # dbhash.
- jstests/core/**/fsync.js # uses fsync.
- jstests/core/**/geo_s2cursorlimitskip.js # profiling.
- jstests/core/**/geo_update_btree2.js # notablescan.
- jstests/core/**/queryoptimizera.js # "local" database.
- jstests/core/**/startup_log.js # "local" database.
- jstests/core/**/tailable_cursor_invalidation.js # capped collections.
- jstests/core/**/tailable_getmore_batch_size.js # capped collections.
- jstests/core/**/tailable_skip_limit.js # capped collections.
- jstests/core/**/query/top/top.js # top.
# The following tests fail because mongos behaves differently from mongod when testing certain
# functionality. The differences are in a comment next to the failing test.
- jstests/core/**/geo_2d_explain.js # executionSuccess in different spot in explain().
- jstests/core/**/geo_s2explain.js # inputStage in different spot in explain().
- jstests/core/**/geo_s2sparse.js # keysPerIndex in different spot in validate().
- jstests/core/**/operation_latency_histogram.js # Stats are counted differently on mongos, SERVER-24880.
# TODO: Remove after fixing SERVER-103278. executionStats.nReturned is incorrect for sharded distinct commands.
- jstests/core/**/distinct_index1.js
# TODO SERVER-32311: These tests use plan stage helpers which can't handle sharded explain output.
- jstests/core/**/expr_index_use.js
- jstests/core/**/index_multikey.js
- jstests/core/**/query/explain/optimized_match_explain.js
- jstests/core/**/sort_array.js
##
## Limitations with the way the runner file injects transactions.
##
# These tests expects some statements to error, which will cause txns to abort entirely.
- jstests/core/**/bulk_api_ordered.js
- jstests/core/**/bulk_api_unordered.js
- jstests/core/**/commands_with_uuid.js
- jstests/core/**/explain_execution_error.js
- jstests/core/**/expr.js
- jstests/core/**/find9.js
- jstests/core/**/find_getmore_bsonsize.js
- jstests/core/**/find_getmore_cmd.js
- jstests/core/**/geo_allowedcomparisons.js
- jstests/core/**/geo_big_polygon2.js
- jstests/core/**/geonear_key.js
- jstests/core/**/in.js
- jstests/core/**/index8.js # No explicit check for failed command.
- jstests/core/**/index_decimal.js
- jstests/core/**/index_large_and_small_dates.js
- jstests/core/**/index_multiple_compatibility.js
- jstests/core/**/index_partial_write_ops.js
- jstests/core/**/indexa.js # No explicit check for failed command.
- jstests/core/**/indexes_multiple_commands.js
- jstests/core/**/js2.js
- jstests/core/query/json_schema/json_schema.js
- jstests/core/**/mr_bigobject.js
- jstests/core/**/not2.js
- jstests/core/**/null_query_semantics.js
- jstests/core/**/or1.js
- jstests/core/**/or2.js
- jstests/core/**/or3.js
- jstests/core/**/ord.js
- jstests/core/**/orj.js
- jstests/core/**/ref.js
- jstests/core/**/ref4.js
- jstests/core/**/regex_limit.js
- jstests/core/**/remove_undefined.js
- jstests/core/**/set7.js
- jstests/core/**/sortb.js
- jstests/core/**/sortf.js
- jstests/core/**/sortg.js
- jstests/core/**/sortj.js
- jstests/core/**/sort_with_meta_operator.js
- jstests/core/**/tailable_skip_limit.js
- jstests/core/**/type_array.js
- jstests/core/**/uniqueness.js
- jstests/core/**/unset2.js
- jstests/core/**/update_addToSet.js
- jstests/core/**/update_array_offset_positional.js
- jstests/core/**/update_arrayFilters.js
- jstests/core/**/update_find_and_modify_id.js
- jstests/core/**/update_modifier_pop.js
- jstests/core/**/update_dollar_fields.js
- jstests/core/**/update_fail_halts_modifications.js
# TODO: SERVER-38207 Cannot insert document with MaxKey shard key.
- jstests/core/**/type8.js
# Reads from system.views.
- jstests/core/catalog/views/views_drop.js
##
## Some aggregation stages don't support snapshot readconcern.
##
# explain (requires read concern local)
- jstests/core/**/agg_hint.js
- jstests/core/**/and.js
- jstests/core/**/query/collation/collation.js
- jstests/core/**/explain_shell_helpers.js
- jstests/core/**/index_partial_read_ops.js
- jstests/core/**/explain_server_params.js
- jstests/core/**/query/explain/optimized_match_explain.js
- jstests/core/**/sort_array.js
- jstests/core/views/views_collation.js
- jstests/core/**/wildcard_index_count.js
# $listSessions
- jstests/core/**/list_all_local_sessions.js
- jstests/core/**/list_all_sessions.js
- jstests/core/**/list_sessions.js
# $collStats
- jstests/core/**/operation_latency_histogram.js
- jstests/core/catalog/collstats/views_coll_stats.js
- jstests/core/catalog/views/views_stats.js
# Errors expected to happen in tests, which can cause transactions to get aborted.
# So when the test tries to inspect the documents it can be out of sync (relative
# to test run without multi statement transactions).
- jstests/core/**/bulk_api_ordered.js
- jstests/core/**/bulk_api_unordered.js
- jstests/core/**/doc_validation.js
- jstests/core/**/doc_validation_options.js
- jstests/core/**/query/field_name_validation.js
- jstests/core/**/insert_illegal_doc.js
- jstests/core/**/push_sort.js
- jstests/core/**/update_arrayFilters.js
- jstests/core/**/update_dbref.js
- jstests/core/**/update_positional_no_array_elem.js
- jstests/core/**/write_result.js
- jstests/core/**/query/project/positional_projection.js
# Multiple writes in a txn, some of which fail because the collection doesn't exist.
# We create the collection and retry the last write, but previous writes would have
# still failed.
- jstests/core/**/dbref1.js
- jstests/core/**/dbref2.js
- jstests/core/**/ref3.js
- jstests/core/**/update_mod_dotted.js
##
## Error: Unable to acquire lock within a max lock request timeout of '0ms' milliseconds
##
# Collection drops done through applyOps are not converted to w:majority
- jstests/core/catalog/views/invalid_system_views.js
##
## Misc. reasons.
##
# SERVER-34868 Cannot run a legacy query on a session.
- jstests/core/**/query/exhaust.js
# SERVER-34772 Tailable Cursors are not allowed with snapshot readconcern.
- jstests/core/**/awaitdata_getmore_cmd.js
- jstests/core/**/getmore_cmd_maxtimems.js
- jstests/core/**/tailable_cursor_invalidation.js
- jstests/core/**/tailable_getmore_batch_size.js
# Wrong count for top info (WriteLock)
- jstests/core/**/query/top/top.js
# Expects collection to not have been created
- jstests/core/**/insert_id_undefined.js
# Creates sessions explicitly, resulting in txns being run through different sessions
# using a single txnNumber.
- jstests/core/query/json_schema/misc_validation.js
- jstests/core/catalog/views/views_all_commands.js
# Committing a transaction when the server is fsync locked fails.
- jstests/core/**/fsync.js
# Expects legacy errors ($err).
- jstests/core/**/constructors.js
# txn interrupted by command outside of txn before getMore runs.
- jstests/core/**/commands_namespace_parsing.js
- jstests/core/ddl/drop_collection_cursors.js
- jstests/core/**/geo_s2cursorlimitskip.js
- jstests/core/**/getmore_invalidated_cursors.js
- jstests/core/**/getmore_invalidated_documents.js
- jstests/core/**/query/kill_cursors.js
- jstests/core/**/list_indexes.js
- jstests/core/**/oro.js
- jstests/core/**/sort_with_update_between_getmores.js
# Parallel Shell - we do not signal the override to end a txn when a parallel shell closes.
- jstests/core/**/awaitdata_getmore_cmd.js
- jstests/core/**/compact_keeps_indexes.js
- jstests/core/**/count10.js
- jstests/core/**/count_plan_summary.js
- jstests/core/**/coveredIndex3.js
- jstests/core/**/currentop.js
- jstests/core/**/distinct3.js
- jstests/core/**/find_and_modify_concurrent_update.js
- jstests/core/**/fsync.js
- jstests/core/**/geo_update_btree.js
- jstests/core/**/loadserverscripts.js
- jstests/core/**/mr_killop.js
- jstests/core/**/remove_concurrent_inserts.js
- jstests/core/**/remove_adjacent_index_keys.js
- jstests/core/**/shellstartparallel.js
- jstests/core/**/update_namespace_details.js
# Command expects to see result from parallel operation.
# E.g. Suppose the following sequence of events: op1, join() op2 in parallel shell, op3.
# op3 will still be using the snapshot from op1, and not see op2 at all.
- jstests/core/**/bench_test1.js
- jstests/core/**/benchrun_pipeline_updates.js
- jstests/core/**/cursora.js
# Does not support tojson of command objects.
- jstests/core/**/query/function_prototype_bson_type.js
exclude_with_any_tags:
# "Cowardly refusing to override read concern of command: ..."
- assumes_read_concern_unchanged
# "writeConcern is not allowed within a multi-statement transaction"
- assumes_write_concern_unchanged
- assumes_against_mongod_not_mongos
- assumes_standalone_mongod
# This passthrough implicitly shards the accessed collections. Do not run tests where collections
# can't be created on `getCollection` call.
- assumes_no_implicit_collection_creation_on_get_collection
# Tests tagged with the following will fail because they assume collections are not sharded.
- assumes_no_implicit_collection_creation_after_drop
- assumes_no_implicit_index_creation
- assumes_unsharded_collection
- cannot_create_unique_index_when_using_hashed_shard_key
# Cannot retry a getMore command if a transient transaction or network error occurs during
# it, since we won't know whether the cursor was advanced or not.
- requires_getmore
# Snapshot reads in transactions are banned on capped collections.
- requires_capped
# system.profile collection doesn't exist on mongos.
- requires_profiling
# Retrying a query can change whether a plan cache entry is active.
- inspects_whether_plan_cache_entry_is_active
- does_not_support_transactions
# Transaction-continuing commands must use the same API parameters as the first command, so tests
# that use API parameters cannot be run with transactions.
- uses_api_parameters
# "Cowardly refusing to run test with transaction override enabled when it uses
# startParallelShell()"
- uses_parallel_shell
- does_not_support_causal_consistency
- requires_timeseries # Transactions not supported
executor:
archive:
hooks:
- CheckReplDBHashInBackground
- CheckReplDBHash
- CheckMetadataConsistencyInBackground
- ValidateCollections
config:
shell_options:
eval: >-
globalThis.testingReplication = true;
await import('jstests/libs/override_methods/implicitly_shard_accessed_collections.js');
await import('jstests/libs/override_methods/enable_sessions.js');
await import('jstests/libs/override_methods/txn_passthrough_cmd_massage.js');
await import('jstests/libs/override_methods/network_error_and_txn_override.js');
global_vars:
TestData:
defaultReadConcernLevel: "majority"
defaultTransactionReadConcernLevel: "snapshot"
defaultTransactionWriteConcernW: "majority"
defaultWriteConcernW: "majority"
networkErrorAndTxnOverrideConfig:
wrapCRUDinTransactions: true
sessionOptions:
# Read your own writes is not guaranteed without causal consistency since the
# coordinateCommit commmand returns as soon as the decision is made durable, and
# prepare conflicts are explicity ignored for non-causal reads.
causalConsistency: true
hooks:
- class: CheckReplDBHashInBackground
# The CheckReplDBHash hook waits until all operations have replicated to and have been applied
# on the secondaries, so we run the ValidateCollections hook after it to ensure we're
# validating the entire contents of the collection.
- class: CheckReplDBHash
- class: CheckMetadataConsistencyInBackground
- class: ValidateCollections
- class: CheckOrphansDeleted
- class: CleanEveryN
n: 20
fixture:
class: ShardedClusterFixture
mongos_options:
set_parameters:
enableTestCommands: 1
defaultClientMaxRetryAttempts: 16
defaultClientBaseBackoffMillis: 10
defaultClientMaxBackoffMillis: 1000
mongod_options:
set_parameters:
defaultClientMaxRetryAttempts: 16
defaultClientBaseBackoffMillis: 10
defaultClientMaxBackoffMillis: 1000
logComponentVerbosity:
verbosity: 0
command: 2
replication:
election: 4
heartbeats: 2
initialSync: 2
rollback: 2
storage:
recovery: 2
transaction: 4
enableTestCommands: 1
numInitialSyncConnectAttempts: 16
numInitialSyncAttempts: 16
numInitialSyncOplogFindAttempts: 16
coordinateCommitReturnImmediatelyAfterPersistingDecision: true
failpoint.failRateLimiting:
mode:
activationProbability: 0.25
num_rs_nodes_per_shard: 2
num_shards: 2

View File

@ -0,0 +1,107 @@
# This suite tests the core collection functionality with a simulated rate limiter enabled to reject
# random requests. The purpose of this passthrough is to add coverage on the retryability support in
# various process internal clients. With a high amount of maximum retries, no errors should bubble
# up to the driver.
test_kind: js_test
selector:
roots:
- jstests/core/**/*.js
- jstests/core_sharding/**/*.js
- jstests/fle2/**/*.js
- src/mongo/db/modules/*/jstests/fle2/**/*.js
exclude_files:
# These tests run in the jscore_txn passthrough suites.
- jstests/core/txns/**/*.js
# The following tests fail because a certain command or functionality is not supported by
# mongos. This command or functionality is placed in a comment next to the failing test.
- jstests/core/**/apitest_db.js # serverStatus output doesn't have storageEngine.
- jstests/core/**/awaitdata_getmore_cmd.js # capped collections.
- jstests/core/**/bypass_doc_validation.js # sharded $out output not permitted
- jstests/core/**/check_shard_index.js # checkShardingIndex.
- jstests/core/**/compact_keeps_indexes.js # compact.
- jstests/core/**/currentop.js # uses fsync.
- jstests/core/**/dbhash.js # dbhash.
- jstests/core/**/fsync.js # uses fsync.
- jstests/core/**/geo_s2cursorlimitskip.js # profiling.
- jstests/core/**/geo_update_btree2.js # notablescan.
- jstests/core/**/queryoptimizera.js # "local" database.
- jstests/core/**/startup_log.js # "local" database.
- jstests/core/**/tailable_cursor_invalidation.js # capped collections.
- jstests/core/**/tailable_getmore_batch_size.js # capped collections.
- jstests/core/**/tailable_skip_limit.js # capped collections.
- jstests/core/**/query/top/top.js # top.
# The following tests fail because mongos behaves differently from mongod when testing certain
# functionality. The differences are in a comment next to the failing test.
- jstests/core/**/geo_2d_explain.js # executionSuccess in different spot in explain().
- jstests/core/**/geo_s2explain.js # inputStage in different spot in explain().
- jstests/core/**/geo_s2sparse.js # keysPerIndex in different spot in validate().
- jstests/core/**/operation_latency_histogram.js # Stats are counted differently on mongos, SERVER-24880.
# The following tests fail because they count indexes. These counts do not take into account the
# additional hashed shard key indexes that are automatically added by this passthrough.
- jstests/core/**/apitest_dbcollection.js
- jstests/core/**/bad_index_plugin.js
- jstests/core/**/create_indexes.js
- jstests/core/**/list_indexes_non_existent_ns.js
- jstests/core/**/mr_preserve_indexes.js
# TODO: Remove after fixing SERVER-103278. executionStats.nReturned is incorrect for sharded distinct commands.
- jstests/core/**/distinct_index1.js
# TODO SERVER-32311: These tests use plan stage helpers which can't handle sharded explain output.
- jstests/core/**/expr_index_use.js
- jstests/core/**/index_multikey.js
- jstests/core/**/query/explain/optimized_match_explain.js
- jstests/core/**/sort_array.js
exclude_with_any_tags:
- assumes_standalone_mongod
- assumes_against_mongod_not_mongos
# This passthrough implicitly shards the accessed collections. Do not run tests where collections
# can't be created on `getCollection` call.
- assumes_no_implicit_collection_creation_on_get_collection
# Tests tagged with the following will fail because they assume collections are not sharded.
- assumes_no_implicit_collection_creation_after_drop
- assumes_no_implicit_index_creation
- assumes_unsharded_collection
- cannot_create_unique_index_when_using_hashed_shard_key
# system.profile collection doesn't exist on mongos.
- requires_profiling
# Capped collections cannot be sharded
- requires_capped
executor:
archive:
hooks:
- CheckReplDBHash
- CheckMetadataConsistencyInBackground
- ValidateCollections
config:
shell_options:
eval: await import("jstests/libs/override_methods/implicitly_shard_accessed_collections.js");
hooks:
- class: CheckReplDBHash
- class: CheckMetadataConsistencyInBackground
- class: ValidateCollections
- class: CheckOrphansDeleted
- class: CleanEveryN
n: 20
fixture:
class: ShardedClusterFixture
num_shards: 2
enable_balancer: false
mongos_options:
set_parameters:
enableTestCommands: 1
defaultClientMaxRetryAttempts: 16
defaultClientBaseBackoffMillis: 10
defaultClientMaxBackoffMillis: 1000
mongod_options:
set_parameters:
enableTestCommands: 1
defaultClientMaxRetryAttempts: 16
defaultClientBaseBackoffMillis: 10
defaultClientMaxBackoffMillis: 1000
failpoint.failRateLimiting:
mode:
activationProbability: 0.25

View File

@ -0,0 +1,118 @@
# Based on sharding_jscore_passthrough. This suite tests the js core with a balancer and with a
# simulated rate limiter enabled to reject random requests. The purpose of this passthrough is to
# add coverage on the retryability support in various process internal clients. With a high amount
# of maximum retries, no errors should bubble up to the driver, even with a balance present.
test_kind: js_test
selector:
roots:
- jstests/core/**/*.js
- jstests/core_sharding/**/*.js
- jstests/fle2/**/*.js
- src/mongo/db/modules/*/jstests/fle2/**/*.js
exclude_files:
# These tests use chunk migration, and they expect the balancer to be disabled.
- jstests/core_sharding/chunk_migration/*.js
# These tests are run in sharded_jscore_txns.
- jstests/core/txns/**/*.js
# TODO SERVER-87108 re-enable all map reduce tests
- jstests/core/query/map_reduce/*.js
# The following tests fail because a certain command or functionality is not supported on
# mongos. This command or functionality is placed in a comment next to the failing test.
- jstests/core/**/apitest_db.js # serverStatus output doesn't have storageEngine.
- jstests/core/**/check_shard_index.js # checkShardingIndex.
- jstests/core/**/compact_keeps_indexes.js # compact.
- jstests/core/**/currentop.js # uses fsync.
- jstests/core/**/dbhash.js # dbhash.
- jstests/core/**/fsync.js # uses fsync.
- jstests/core/**/geo_s2cursorlimitskip.js # profiling.
- jstests/core/**/geo_update_btree2.js # notablescan.
- jstests/core/**/queryoptimizera.js # "local" database.
- jstests/core/**/startup_log.js # "local" database.
- jstests/core/**/query/top/top.js # top.
# The following tests fail because mongos behaves differently from mongod when testing certain
# functionality. The differences are in a comment next to the failing test.
- jstests/core/**/geo_2d_explain.js # executionSuccess in different spot in explain().
- jstests/core/**/geo_s2explain.js # inputStage in different spot in explain().
- jstests/core/**/geo_s2sparse.js # keysPerIndex in different spot in validate().
- jstests/core/**/operation_latency_histogram.js # Stats are counted differently on mongos, SERVER-24880.
# override library is not able to intercept createIndex commands executed inside benchRun
- jstests/core/**/bench_test1.js
# moveCollection will drop indexes which will reset index statistics this test asserts on.
- jstests/core/index/index_stats.js
# moveCollection and the many deleteMany commands in this test block each other and can easily cause the test to time out.
# This is caused by the pauseMigrationsDuringMultiUpdates cluster parameter and the random migrations failpoint both being enabled for this suite.
# TODO SERVER-91655: re-enable this test in this suite or potentially in a noPassthrough suite.
- jstests/core/timeseries/query/timeseries_predicates.js
# moveCollection and the many multi updates in this test block each other and can easily cause the test to time out.
# This is caused by the pauseMigrationsDuringMultiUpdates cluster parameter and the random migrations failpoint both being enabled for this suite.
# TODO SERVER-91722: re-enable this test in this suite or potentially in a noPassthrough suite.
- jstests/core/index/geo/geo_update_btree.js
# TODO SERVER-88275: A moveCollection command changes the UUID of the targeted collection.
# Any query using an auto yielding policy will likely return a QueryPlanKilled error if a moveCollection commit happens during the query execution.
- jstests/core/timeseries/geo/timeseries_geonear_random_measurements.js
- jstests/core/timeseries/query/timeseries_homogeneous_top_bottom.js
exclude_with_any_tags:
- assumes_standalone_mongod
- assumes_against_mongod_not_mongos
# system.profile collection doesn't exist on mongos.
- requires_profiling
- assumes_balancer_off
# fsync lock is not compatible with migrations since it
# can't be executed while DDL lock is being held.
- requires_fsync
# This suite performs balancing of unsharded collection in background
# using moveCollection that changes collections UUID
- assumes_stable_collection_uuid
# implicitly_retry_on_migration_in_progress.js alters find/aggregate commands
# so that the whole result set is returned through a single batch
- assumes_no_implicit_cursor_exhaustion
executor:
archive:
hooks:
- CheckReplDBHash
- CheckMetadataConsistencyInBackground
- ValidateCollections
config:
shell_options:
global_vars:
TestData:
runningWithBalancer: true
eval: await import("jstests/libs/override_methods/implicitly_retry_on_migration_in_progress.js");
hooks:
- class: CheckReplDBHash
- class: CheckMetadataConsistencyInBackground
- class: ValidateCollections
- class: CheckOrphansDeleted
- class: CleanEveryN
n: 20
fixture:
class: ShardedClusterFixture
mongos_options:
set_parameters:
enableTestCommands: 1
defaultClientMaxRetryAttempts: 16
defaultClientBaseBackoffMillis: 10
defaultClientMaxBackoffMillis: 1000
mongod_options:
set_parameters:
enableTestCommands: 1
reshardingMinimumOperationDurationMillis: 0
useBatchedDeletesForRangeDeletion: true
defaultClientMaxRetryAttempts: 16
defaultClientBaseBackoffMillis: 10
defaultClientMaxBackoffMillis: 1000
failpoint.failRateLimiting:
mode:
activationProbability: 0.25
num_rs_nodes_per_shard: 1
num_shards: 2
enable_balancer: true
random_migrations: true
set_cluster_parameter:
parameter: pauseMigrationsDuringMultiUpdates
value:
enabled: True

View File

@ -1305,6 +1305,22 @@ tasks:
use_large_distro: "true"
resmoke_jobs_max: 0 # No cap on number of jobs.
- <<: *gen_task_template
name: multi_shard_multi_stmt_txn_jscore_passthrough_rate_limited_gen
tags:
[
"assigned_to_jira_team_server_workload_scheduling",
"default",
"multi_shard",
"multi_stmt",
"common",
]
commands:
- func: "generate resmoke tasks"
vars:
use_large_distro: "true"
resmoke_jobs_max: 0 # No cap on number of jobs.
- <<: *gen_task_template
name: multi_shard_multi_stmt_txn_kill_primary_jscore_passthrough_gen
tags:
@ -1563,6 +1579,19 @@ tasks:
commands:
- func: "generate resmoke tasks"
- <<: *gen_task_template
name: sharded_collections_jscore_passthrough_rate_limited_gen
tags:
[
"assigned_to_jira_team_server_workload_scheduling",
"release_critical",
"incompatible_mac",
"sharding",
"jscore",
]
commands:
- func: "generate resmoke tasks"
- <<: *gen_task_template
name: sharded_collections_jscore_passthrough_with_config_shard_gen
tags:
@ -2222,6 +2251,19 @@ tasks:
commands:
- func: "generate resmoke tasks"
- <<: *gen_task_template
name: sharding_jscore_passthrough_with_balancer_rate_limited_gen
tags:
[
"assigned_to_jira_team_server_workload_scheduling",
"release_critical",
"sharding",
"jscore",
"common",
]
commands:
- func: "generate resmoke tasks"
- <<: *gen_task_template
name: concurrency_sharded_with_balancer_and_config_shard_gen
tags:

View File

@ -2416,6 +2416,7 @@ mongo_cc_library(
"validate_api_parameters",
"write_block_bypass",
"//src/mongo:base",
"//src/mongo/db:commands",
"//src/mongo/db/admission:ingress_admission_control",
"//src/mongo/db/admission:ingress_request_rate_limiter",
"//src/mongo/db/auth",
@ -2440,6 +2441,7 @@ mongo_cc_library(
"//src/mongo/db/storage:storage_engine_metadata",
"//src/mongo/db/storage:storage_options",
"//src/mongo/db/transaction",
"//src/mongo/rpc:client_metadata",
"//src/mongo/s:query_analysis_sampler",
"//src/mongo/s:sharding_router_api",
"//src/mongo/s:startup_initialization",

View File

@ -45,6 +45,9 @@ mongo_cc_library(
"ingress_admission_controller.cpp",
":ingress_admission_control_gen",
],
hdrs = [
"ingress_admission_controller.h",
],
deps = [
":ingress_admission_context",
":rate_limiter",

View File

@ -182,6 +182,7 @@ MONGO_FAIL_POINT_DEFINE(hangAfterSessionCheckOut);
MONGO_FAIL_POINT_DEFINE(hangBeforeSettingTxnInterruptFlag);
MONGO_FAIL_POINT_DEFINE(hangAfterCheckingWritabilityForMultiDocumentTransactions);
MONGO_FAIL_POINT_DEFINE(failWithErrorCodeAfterSessionCheckOut);
MONGO_FAIL_POINT_DEFINE(failIngressRequestRateLimiting);
// Tracks the number of times a legacy unacknowledged write failed due to
// not primary error resulted in network disconnection.
@ -1750,7 +1751,56 @@ void ExecCommandDatabase::_initiateCommand() {
}
}
// TODO(SERVER-114130): Move those condition inside the gIngressAdmissionControlEnabled scope.
const auto isProcessInternalCommand = isProcessInternalClient(*opCtx->getClient());
const auto isExemptFromAdmissionControl = isProcessInternalCommand ||
!_invocation->isSubjectToIngressAdmissionControl() ||
IngressAdmissionContext::get(opCtx).isHoldingTicket();
failIngressRequestRateLimiting.executeIf(
[&](const BSONObj& data) {
// TODO(SERVER-114130): Remove error label override when moving to the ingress
// request rate limiter.
BSONArrayBuilder arrayBuilder;
arrayBuilder.append(ErrorLabel::kSystemOverloadedError);
arrayBuilder.append(ErrorLabel::kRetryableError);
auto& errorLabels = errorLabelsOverride(opCtx);
invariant(!errorLabels);
errorLabels.emplace(arrayBuilder.arr());
// We simulate a request being rejected by the rate limiter.
uasserted(ErrorCodes::IngressRequestRateLimitExceeded,
"Rejection from the 'failIngressRequestRateLimiting' fail point");
},
[&](const BSONObj& data) {
// Because we don't have a maintenance port yet, we must only simulate the rate limiter
// on non critical operations.
// TODO(SERVER-114130): Move this fail point to the ingress request rate limiter and
// remove this condition.
if (isExemptFromAdmissionControl) {
return false;
}
// Because we don't have a maintenance port yet, we must only simulate the rate limiter
// on requests directly coming from mongod and mongos. As the maintenance port is
// implemented, background checks and heatbeat won't interfere with rate-limiting
// behavior.
// TODO(SERVER-114130): Move this fail point to the ingress request rate limiter and
// remove this condition.
auto clientMetadata = ClientMetadata::get(opCtx->getClient());
if (clientMetadata) {
auto document = clientMetadata->getDocument();
auto clientName = clientMetadata->getApplicationName();
auto isFromMongoExecutable =
clientName.ends_with("mongos") || clientName.ends_with("mongod");
if (!isFromMongoExecutable) {
return false;
}
}
return true;
});
if (gIngressAdmissionControlEnabled.load()) {
// The way ingress admission works, one ticket should cover all the work for the operation.
@ -1758,8 +1808,7 @@ void ExecCommandDatabase::_initiateCommand() {
// of the subsequent admissions of the same operation (e.g. via DBDirectClient) should be
// exempt from ingress admission control.
boost::optional<ScopedAdmissionPriority<IngressAdmissionContext>> admissionPriority;
if (isProcessInternalCommand || !_invocation->isSubjectToIngressAdmissionControl() ||
IngressAdmissionContext::get(opCtx).isHoldingTicket()) {
if (isExemptFromAdmissionControl) {
admissionPriority.emplace(opCtx, AdmissionContext::Priority::kExempt);
}
auto& admissionController = IngressAdmissionController::get(opCtx);