diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 704e066d844..eb44d2a81be 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -2,8 +2,10 @@ * @10gen/server-release # Exclude some test files and READMEs from the backport approvals -/jstests/**/* /etc/backports_required_for_multiversion_tests.yml /etc/*.suppressions /README.md **/README.md + +# Exclude all the tests under "jstests" directories from backport approvals +**/jstests/**/* diff --git a/SConstruct b/SConstruct index 1288667f97e..dbe1391c7d9 100644 --- a/SConstruct +++ b/SConstruct @@ -1541,26 +1541,6 @@ if env.get('ENABLE_OOM_RETRY'): else: env['OOM_RETRY_ATTEMPTS'] = 10 env['OOM_RETRY_MAX_DELAY_SECONDS'] = 120 - - if env.ToolchainIs('clang', 'gcc'): - env['OOM_RETRY_MESSAGES'] = [ - ': out of memory', - 'virtual memory exhausted: Cannot allocate memory', - ': fatal error: Killed signal terminated program cc1', - # TODO: SERVER-77322 remove this non memory related ICE. - r'during IPA pass: cp.+g\+\+: internal compiler error', - 'ld terminated with signal 9', - ] - elif env.ToolchainIs('msvc'): - env['OOM_RETRY_MESSAGES'] = [ - 'LNK1102: out of memory', - 'C1060: compiler is out of heap space', - 'c1xx : fatal error C1063: INTERNAL COMPILER ERROR', - r'LNK1171: unable to load mspdbcore\.dll', - "LNK1201: error writing to program database ''", - ] - env['OOM_RETRY_RETURNCODES'] = [1102] - env.Tool('oom_auto_retry') if env['TARGET_ARCH']: diff --git a/buildscripts/resmokeconfig/suites/search_pinned_connections_auth.yml b/buildscripts/resmokeconfig/suites/search_pinned_connections_auth.yml index 6b6b0e07bdc..fc3cecf4018 100644 --- a/buildscripts/resmokeconfig/suites/search_pinned_connections_auth.yml +++ b/buildscripts/resmokeconfig/suites/search_pinned_connections_auth.yml @@ -11,6 +11,11 @@ selector: exclude_files: # Skip any tests that run with auth explicitly. - src/mongo/db/modules/*/jstests/search/auth_list_search_indexes_agg.js + # This test creates a race condition with the network in pinned connections mode: if mongod + # is still waiting on a response from mongot following the getMore, mongod must close the + # connection because it cannot send the killCursor command to mongot while the getMore + # command is on-going. + - src/mongo/db/modules/enterprise/jstests/mongot/mongot_kill_cursors.js executor: config: diff --git a/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml b/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml index 095ca918119..b48d73bf3f2 100644 --- a/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml +++ b/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml @@ -235,6 +235,9 @@ selector: # SERVER-51805 splitChunk op is not idempotent - jstests/sharding/mongos_get_shard_version.js + # Expects reshardCollection executes without config server stepdown + - jstests/sharding/shard_encrypted_collection.js + # Test will fail if it is unable to lock the config server primary successfully. - jstests/sharding/fsync_lock_unlock.js - jstests/sharding/fsync_deadlock.js diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml index da0f807ff61..e8dee56220c 100644 --- a/etc/backports_required_for_multiversion_tests.yml +++ b/etc/backports_required_for_multiversion_tests.yml @@ -371,6 +371,8 @@ last-continuous: ticket: SERVER-86674 - test_file: jstests/core/wildcard_index_validindex.js ticket: SERVER-93105 + - test_file: jstests/sharding/refresh_sessions.js + ticket: SERVER-94635 suites: null last-lts: all: @@ -790,4 +792,6 @@ last-lts: ticket: SERVER-86674 - test_file: jstests/core/wildcard_index_validindex.js ticket: SERVER-93105 + - test_file: jstests/sharding/refresh_sessions.js + ticket: SERVER-94635 suites: null diff --git a/etc/evergreen.yml b/etc/evergreen.yml index 120d17c883a..0d87fb6a44e 100644 --- a/etc/evergreen.yml +++ b/etc/evergreen.yml @@ -302,6 +302,7 @@ buildvariants: expansions: &linux-64-required-duroff-expansions compile_flags: -j$(grep -c ^processor /proc/cpuinfo) --variables-files=etc/scons/mongodbtoolchain_v3_gcc.vars --enable-http-client=off --link-model=dynamic --use-diagnostic-latches=on --modules= multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: targeted # Running WiredTiger with --nojournal in a replica set is no longer supported, so this variant # does not include replica set tests. Since transactions are only supported on replica sets, we @@ -464,6 +465,7 @@ buildvariants: compile_flags: --dbg=on --gcov --ssl MONGO_DISTMOD=rhel80 -j$(grep -c ^processor /proc/cpuinfo) --variables-files=etc/scons/mongodbtoolchain_v3_gcc.vars --link-model=dynamic --use-diagnostic-latches=on large_distro_name: rhel80-medium multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise resmoke_jobs_factor: 0.5 # Avoid starting too many mongod's # The gcov instrumentation saves the path the .gcno files were created in as the default path @@ -545,6 +547,7 @@ buildvariants: compile_flags: --dbg=on --gcov --ssl MONGO_DISTMOD=rhel80 -j$(grep -c ^processor /proc/cpuinfo) --variables-files=etc/scons/mongodbtoolchain_v3_clang.vars --link-model=dynamic --use-diagnostic-latches=on large_distro_name: rhel80-medium multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise resmoke_jobs_factor: 0.5 # Avoid starting too many mongod's # The gcov instrumentation saves the path the .gcno files were created in as the default path @@ -576,6 +579,7 @@ buildvariants: compile_flags: --dbg=on --gcov --ssl MONGO_DISTMOD=rhel80 -j$(grep -c ^processor /proc/cpuinfo) --variables-files=etc/scons/mongodbtoolchain_v4_gcc.vars --link-model=dynamic --use-diagnostic-latches=on large_distro_name: rhel80-medium multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise resmoke_jobs_factor: 0.5 # Avoid starting too many mongod's # The gcov instrumentation saves the path the .gcno files were created in as the default path @@ -604,6 +608,7 @@ buildvariants: compile_flags: --dbg=on --gcov --ssl MONGO_DISTMOD=rhel80 -j$(grep -c ^processor /proc/cpuinfo) --variables-files=etc/scons/mongodbtoolchain_v4_clang.vars --link-model=dynamic --use-diagnostic-latches=on large_distro_name: rhel80-medium multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise resmoke_jobs_factor: 0.5 # Avoid starting too many mongod's # The gcov instrumentation saves the path the .gcno files were created in as the default path @@ -628,6 +633,7 @@ buildvariants: additional_package_targets: archive-mongocryptd archive-mongocryptd-debug archive-mh archive-mh-debug compile_flags: --ssl MONGO_DISTMOD=rhel80 -j$(grep -c ^processor /proc/cpuinfo) --variables-files=etc/scons/mongodbtoolchain_v3_gcc.vars --link-model=dynamic --use-diagnostic-latches=on multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise has_packages: false scons_cache_scope: shared @@ -769,6 +775,7 @@ buildvariants: - rhel80-small expansions: multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise stepback: false tasks: @@ -1097,7 +1104,7 @@ buildvariants: display_name: "* macOS DEBUG" cron: "0 */4 * * *" # From the ${project_required_suggested_cron} parameter run_on: - - macos-1100 + - macos-11 expansions: &macos-debug-expansions test_flags: --excludeWithAnyTags=incompatible_with_macos --enableEnterpriseTests=off resmoke_jobs_max: 6 @@ -1125,7 +1132,7 @@ buildvariants: display_name: "Enterprise macOS Via Rosetta 2" cron: "0 4 * * *" # From the ${project_nightly_cron} parameter. run_on: - - macos-1100-arm64 + - macos-11-arm64 expansions: test_flags: --excludeWithAnyTags=incompatible_with_macos,requires_gcm compile_env: DEVELOPER_DIR=/Applications/Xcode13.app @@ -1151,7 +1158,7 @@ buildvariants: display_name: "Enterprise macOS C++20 DEBUG" cron: "0 4 * * *" # From the ${project_nightly_cron} parameter. run_on: - - macos-1100 + - macos-11 expansions: test_flags: --excludeWithAnyTags=incompatible_with_macos,requires_gcm compile_env: DEVELOPER_DIR=/Applications/Xcode13.app @@ -1175,7 +1182,7 @@ buildvariants: - name: enterprise-macos-arm64 display_name: "~ Enterprise macOS arm64" run_on: - - macos-1100-arm64 + - macos-11-arm64 expansions: test_flags: --excludeWithAnyTags=incompatible_with_macos,requires_gcm compile_env: DEVELOPER_DIR=/Applications/Xcode13.app @@ -1206,7 +1213,7 @@ buildvariants: - name: embedded-sdk-macos display_name: "Embedded SDK - macOS" run_on: - - macos-1100 + - macos-11 cron: "0 4 * * 0" # From the ${project_weekly_cron} parameter expansions: test_flags: --excludeWithAnyTags=uses_transactions,incompatible_with_macos --enableEnterpriseTests=off @@ -1269,6 +1276,7 @@ buildvariants: compile_flags: --ssl MONGO_DISTMOD=rhel80 -j$(grep -c ^processor /proc/cpuinfo) --variables-files=etc/scons/mongodbtoolchain_v3_gcc.vars --link-model=dynamic --use-diagnostic-latches=on crypt_task_compile_flags: SHLINKFLAGS_EXTRA="-Wl,-Bsymbolic -Wl,--no-gnu-unique" CCFLAGS="-fno-gnu-unique" multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise has_packages: false scons_cache_scope: shared @@ -1417,6 +1425,7 @@ buildvariants: compile_flags: --ssl MONGO_DISTMOD=rhel80 -j$(grep -c ^processor /proc/cpuinfo) --variables-files=etc/scons/mongodbtoolchain_v3_gcc.vars --link-model=dynamic --use-diagnostic-latches=on crypt_task_compile_flags: SHLINKFLAGS_EXTRA="-Wl,-Bsymbolic -Wl,--no-gnu-unique" CCFLAGS="-fno-gnu-unique" multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise has_packages: false scons_cache_scope: shared @@ -1560,6 +1569,7 @@ buildvariants: additional_package_targets: archive-mongocryptd archive-mongocryptd-debug archive-mh archive-mh-debug compile_flags: --ssl MONGO_DISTMOD=rhel80 -j$(grep -c ^processor /proc/cpuinfo) --variables-files=etc/scons/mongodbtoolchain_v3_gcc.vars --link-model=dynamic --use-diagnostic-latches=on multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise has_packages: false scons_cache_scope: shared @@ -1686,6 +1696,7 @@ buildvariants: --link-model=dynamic --use-diagnostic-latches=on multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise repo_edition: enterprise scons_cache_mode: all @@ -1767,6 +1778,7 @@ buildvariants: --link-model=dynamic --use-diagnostic-latches=on multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise repo_edition: enterprise scons_cache_scope: shared @@ -1829,6 +1841,7 @@ buildvariants: --link-model=dynamic --use-diagnostic-latches=on multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise repo_edition: enterprise scons_cache_scope: shared @@ -1870,6 +1883,7 @@ buildvariants: # MONGO_DISTMOD=rhel80 # --link-model=dynamic # multiversion_platform: rhel80 +# multiversion_platform_50_or_later: rhel8 # multiversion_edition: enterprise # scons_cache_scope: shared # scons_cache_mode: all @@ -1898,6 +1912,7 @@ buildvariants: additional_package_targets: archive-mongocryptd archive-mongocryptd-debug archive-mh archive-mh-debug compile_flags: --ssl MONGO_DISTMOD=rhel80 -j$(grep -c ^processor /proc/cpuinfo) --variables-files=etc/scons/mongodbtoolchain_v3_gcc.vars --use-diagnostic-latches=on multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise has_packages: true packager_script: packager_enterprise.py @@ -2162,59 +2177,6 @@ buildvariants: - name: compile_benchmarks - name: .benchmarks -- name: linux-64-ephemeralForTest - display_name: Linux (ephemeralForTest) - cron: "0 4 * * *" # From the ${project_nightly_cron} parameter. - run_on: - - rhel80-medium - expansions: - # Transactions are not explicitly supported on the ephemeralForTest storage engine. - # Speculative majority reads are currently only allowed for change streams, which are only supported on WiredTiger. - # We also relax oplog constraints so that applying oplog entries after a rollbackViaRefetch works correctly. - # TODO (SERVER-47022): Re-enable oplog constraint enforcement once we set the application mode - # correctly after rollbackViaRefetch. - test_flags: >- - --storageEngine=ephemeralForTest - --excludeWithAnyTags=requires_persistence,requires_fsync,requires_journaling,requires_wiredtiger,uses_transactions,uses_speculative_majority,requires_snapshot_read,requires_majority_read_concern,uses_change_streams,requires_sharding,incompatible_with_eft - --mongodSetParameters="{oplogApplicationEnforcesSteadyStateConstraints: false}" - --enableEnterpriseTests=off - compile_flags: -j$(grep -c ^processor /proc/cpuinfo) --dbg=off --opt=on --variables-files=etc/scons/mongodbtoolchain_v3_gcc.vars --link-model=dynamic --modules= --use-diagnostic-latches=on - multiversion_platform: rhel80 - multiversion_edition: targeted - scons_cache_mode: all - scons_cache_scope: shared - num_scons_link_jobs_available: 0.99 - tasks: - - name: compile_test_and_package_parallel_core_stream_TG - - name: compile_test_and_package_parallel_unittest_stream_TG - - name: compile_test_and_package_parallel_dbtest_stream_TG - # TODO (SERVER-58125): Re-enable the timeseries fuzzer for EFT - - name: .aggfuzzer .common !.timeseries - - name: aggregation - - name: .auth !.multiversion !.audit !.sharding - # SERVER-58597: Re-enable parallel - - name: .misc_js !.parallel - # SERVER-50296: Investigate concurrency test failures. - # - name: concurrency_gen - # - name: concurrency_replication_gen - # - name: concurrency_replication_causal_consistency_gen - # - name: concurrency_simultaneous_gen - # - name: concurrency_simultaneous_replication - - name: .integration !.audit - - name: .jscore .common !.txns !.decimal - # SERVER-50295: Investigate jstestfuzz time outs. - # - name: .jstestfuzz .common - # SERVER-59095: Investigate the EFT failure and re-enable the failing jstest suite on evergreen. - # - name: .logical_session_cache .one_sec - - name: logical_session_cache_sharding_1sec_refresh_jscore_passthrough_gen - - name: logical_session_cache_standalone_1sec_refresh_jscore_passthrough_gen - - name: .read_write_concern .linearize - - name: replica_sets_gen - - name: .replica_sets .common - # SERVER-49428: Disabled due to writeConcernMajorityJournalDefault is not off - # rollback_fuzzer_gen - - name: .updatefuzzer - ########################################### # Experimental buildvariants # ########################################### @@ -2233,6 +2195,7 @@ buildvariants: san_options: LSAN_OPTIONS="suppressions=etc/lsan.suppressions:report_objects=1:external_symbolizer_path=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer" ASAN_OPTIONS="detect_leaks=1:check_initialization_order=true:strict_init_order=true:abort_on_error=1:disable_coredump=0:handle_abort=1:external_symbolizer_path=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer" compile_flags: --variables-files=etc/scons/mongodbtoolchain_v3_clang.vars --dbg=on --opt=on --allocator=system --sanitize=address --ssl --ocsp-stapling=off -j$(grep -c ^processor /proc/cpuinfo) --modules= --use-diagnostic-latches=on multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise resmoke_jobs_factor: 0.3 # Avoid starting too many mongod's under ASAN build. hang_analyzer_dump_core: false @@ -2315,6 +2278,7 @@ buildvariants: --excludeWithAnyTags=requires_fast_memory,requires_ocsp_stapling --enableEnterpriseTests=off multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise resmoke_jobs_factor: 0.3 # Avoid starting too many mongod's under ASAN build. hang_analyzer_dump_core: false @@ -2389,6 +2353,7 @@ buildvariants: --additionalFeatureFlagsFile all_feature_flags.txt --enableEnterpriseTests=off multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise resmoke_jobs_factor: 0.3 # Avoid starting too many mongod's under UBSAN build. scons_cache_scope: shared @@ -2457,6 +2422,7 @@ buildvariants: --excludeWithAnyTags=requires_ocsp_stapling,requires_increased_memlock_limits --enableEnterpriseTests=off multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise resmoke_jobs_factor: 0.3 # Avoid starting too many mongod's under UBSAN build. scons_cache_scope: shared @@ -2638,6 +2604,7 @@ buildvariants: num_scons_link_jobs_available: 0.99 compile_flags: MONGO_DISTMOD=rhel80 -j$(grep -c ^processor /proc/cpuinfo) --variables-files=etc/scons/mongodbtoolchain_v3_gcc.vars --link-model=dynamic --use-diagnostic-latches=on multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise test_flags: >- --mongosSetParameters="{initialServiceExecutorThreadingModel: borrowed}" @@ -2676,6 +2643,7 @@ buildvariants: num_scons_link_jobs_available: 0.99 compile_flags: MONGO_DISTMOD=rhel80 -j$(grep -c ^processor /proc/cpuinfo) --variables-files=etc/scons/mongodbtoolchain_v3_gcc.vars --link-model=dynamic --use-diagnostic-latches=on multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise test_flags: >- --mongosSetParameters="{replicaSetMonitorProtocol: sdam}" @@ -2713,6 +2681,7 @@ buildvariants: num_scons_link_jobs_available: 0.99 compile_flags: MONGO_DISTMOD=rhel80 -j$(grep -c ^processor /proc/cpuinfo) --variables-files=etc/scons/mongodbtoolchain_v3_gcc.vars --link-model=dynamic --use-diagnostic-latches=on multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise test_flags: |- # Set the taskExecutorPoolSize for all tests --mongosSetParameters="taskExecutorPoolSize: 4" @@ -2748,6 +2717,7 @@ buildvariants: num_scons_link_jobs_available: 0.99 compile_flags: MONGO_DISTMOD=rhel80 -j$(grep -c ^processor /proc/cpuinfo) --variables-files=etc/scons/mongodbtoolchain_v3_gcc.vars --link-model=dynamic --use-diagnostic-latches=on multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise test_flags: >- --mongosSetParameters="ShardingTaskExecutorPoolReplicaSetMatching: \"matchPrimaryNode\"" @@ -2784,6 +2754,7 @@ buildvariants: num_scons_link_jobs_available: 0.99 compile_flags: MONGO_DISTMOD=rhel80 -j$(grep -c ^processor /proc/cpuinfo) --variables-files=etc/scons/mongodbtoolchain_v3_gcc.vars --link-model=dynamic --use-diagnostic-latches=on multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise test_flags: >- --mongosSetParameters="ShardingTaskExecutorPoolReplicaSetMatching: \"matchBusiestNode\"" @@ -2820,6 +2791,7 @@ buildvariants: num_scons_link_jobs_available: 0.99 compile_flags: MONGO_DISTMOD=rhel80 -j$(grep -c ^processor /proc/cpuinfo) --variables-files=etc/scons/mongodbtoolchain_v3_gcc.vars --link-model=dynamic --use-diagnostic-latches=on multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise test_flags: >- --mongosSetParameters="ShardingTaskExecutorPoolReplicaSetMatching: \"disabled\"" @@ -2854,6 +2826,7 @@ buildvariants: scons_cache_scope: shared compile_flags: MONGO_DISTMOD=rhel80 -j$(grep -c ^processor /proc/cpuinfo) --variables-files=etc/scons/mongodbtoolchain_v3_gcc.vars --use-diagnostic-latches=on multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise test_flags: >- --mongosSetParameters="joinIngressSessionsOnShutdown: \"true\"" diff --git a/etc/evergreen_nightly.yml b/etc/evergreen_nightly.yml index 7688719bd4c..99eb07df15a 100644 --- a/etc/evergreen_nightly.yml +++ b/etc/evergreen_nightly.yml @@ -173,6 +173,7 @@ buildvariants: compile_flags: --ssl MONGO_DISTMOD=rhel80 -j$(grep -c ^processor /proc/cpuinfo) --variables-files=etc/scons/mongodbtoolchain_v3_gcc.vars --link-model=dynamic crypt_task_compile_flags: SHLINKFLAGS_EXTRA="-Wl,-Bsymbolic -Wl,--no-gnu-unique" CCFLAGS="-fno-gnu-unique" multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise has_packages: false scons_cache_scope: shared @@ -422,6 +423,7 @@ buildvariants: --variables-files=etc/scons/mongodbtoolchain_v3_gcc.vars --link-model=dynamic multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise repo_edition: enterprise scons_cache_mode: all @@ -490,6 +492,7 @@ buildvariants: additional_package_targets: archive-mongocryptd archive-mongocryptd-debug archive-mh archive-mh-debug compile_flags: --ssl MONGO_DISTMOD=rhel80 -j$(grep -c ^processor /proc/cpuinfo) --variables-files=etc/scons/mongodbtoolchain_stable_gcc.vars --link-model=dynamic multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise has_packages: false scons_cache_scope: shared diff --git a/etc/evergreen_yml_components/variants/in_memory.yml b/etc/evergreen_yml_components/variants/in_memory.yml index 9f3797c8e7b..a55e4cd7979 100644 --- a/etc/evergreen_yml_components/variants/in_memory.yml +++ b/etc/evergreen_yml_components/variants/in_memory.yml @@ -11,6 +11,7 @@ buildvariants: test_flags: --storageEngine=inMemory --excludeWithAnyTags=requires_persistence,requires_journaling compile_flags: --ssl MONGO_DISTMOD=rhel80 -j$(grep -c ^processor /proc/cpuinfo) --variables-files=etc/scons/mongodbtoolchain_v3_gcc.vars --use-diagnostic-latches=on multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise scons_cache_scope: shared large_distro_name: rhel80-large diff --git a/etc/evergreen_yml_components/variants/misc_release.yml b/etc/evergreen_yml_components/variants/misc_release.yml index ce385594026..717b47ebed0 100644 --- a/etc/evergreen_yml_components/variants/misc_release.yml +++ b/etc/evergreen_yml_components/variants/misc_release.yml @@ -786,6 +786,7 @@ buildvariants: compile_flags: --ssl MONGO_DISTMOD=rhel80 -j$(grep -c ^processor /proc/cpuinfo) --variables-files=etc/scons/mongodbtoolchain_v3_gcc.vars --modules= test_flags: --enableEnterpriseTests=off --excludeWithAnyTags=requires_latch_analyzer multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: targeted has_packages: true packager_script: packager.py @@ -838,6 +839,7 @@ buildvariants: compile_flags: --ssl MONGO_DISTMOD=rhel80 -j$(grep -c ^processor /proc/cpuinfo) --variables-files=etc/scons/mongodbtoolchain_v3_gcc.vars crypt_task_compile_flags: SHLINKFLAGS_EXTRA="-Wl,-Bsymbolic -Wl,--no-gnu-unique" CCFLAGS="-fno-gnu-unique" multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise has_packages: true packager_script: packager_enterprise.py @@ -2215,7 +2217,7 @@ buildvariants: display_name: macOS cron: "0 4 * * *" # From the ${project_nightly_cron} parameter. run_on: - - macos-1100 + - macos-11 expansions: test_flags: --excludeWithAnyTags=incompatible_with_macos,requires_latch_analyzer --enableEnterpriseTests=off push_path: osx @@ -2245,7 +2247,7 @@ buildvariants: display_name: macOS arm64 cron: "0 4 * * *" # From the ${project_nightly_cron} parameter. run_on: - - macos-1100-arm64 + - macos-11-arm64 expansions: test_flags: --excludeWithAnyTags=incompatible_with_macos,requires_latch_analyzer --enableEnterpriseTests=off push_path: osx @@ -2275,7 +2277,7 @@ buildvariants: display_name: Enterprise macOS cron: "0 4 * * *" # From the ${project_nightly_cron} parameter. run_on: - - macos-1100 + - macos-11 expansions: test_flags: --excludeWithAnyTags=incompatible_with_macos,requires_gcm,requires_latch_analyzer additional_package_targets: archive-mongocryptd archive-mongocryptd-debug archive-mh archive-mh-debug @@ -2304,7 +2306,7 @@ buildvariants: display_name: Enterprise macOS arm64 cron: "0 4 * * *" # From the ${project_nightly_cron} parameter. run_on: - - macos-1100-arm64 + - macos-11-arm64 expansions: test_flags: --excludeWithAnyTags=incompatible_with_macos,requires_gcm,requires_latch_analyzer additional_package_targets: archive-mongocryptd archive-mongocryptd-debug archive-mh archive-mh-debug diff --git a/etc/evergreen_yml_components/variants/ninja.yml b/etc/evergreen_yml_components/variants/ninja.yml index 07d5be46125..4dbe6fb576f 100644 --- a/etc/evergreen_yml_components/variants/ninja.yml +++ b/etc/evergreen_yml_components/variants/ninja.yml @@ -17,7 +17,7 @@ buildvariants: display_name: "Ninja Build: macOS Enterprise" cron: "0 4 * * 0" # Run once a week to ensure no failures introduced to ninja builds run_on: - - macos-1100 + - macos-11 expansions: compile_env: DEVELOPER_DIR=/Applications/Xcode13.app compile_flags: --ssl -j$(sysctl -n hw.logicalcpu) --libc++ --variables-files=etc/scons/xcode_macosx.vars diff --git a/etc/evergreen_yml_components/variants/sanitizer.yml b/etc/evergreen_yml_components/variants/sanitizer.yml index bdc478003e8..14a982998ff 100644 --- a/etc/evergreen_yml_components/variants/sanitizer.yml +++ b/etc/evergreen_yml_components/variants/sanitizer.yml @@ -14,6 +14,7 @@ buildvariants: san_options: LSAN_OPTIONS="suppressions=etc/lsan.suppressions:report_objects=1:external_symbolizer_path=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer" ASAN_OPTIONS="detect_leaks=1:check_initialization_order=true:strict_init_order=true:abort_on_error=1:disable_coredump=0:handle_abort=1:external_symbolizer_path=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer" compile_flags: --variables-files=etc/scons/mongodbtoolchain_v3_clang.vars --opt=on --allocator=system --sanitize=address --ssl --ocsp-stapling=off -j$(grep -c ^processor /proc/cpuinfo) --use-diagnostic-latches=on multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise resmoke_jobs_factor: 0.3 # Avoid starting too many mongod's under ASAN build. hang_analyzer_dump_core: false @@ -41,6 +42,7 @@ buildvariants: compile_flags: --variables-files=etc/scons/mongodbtoolchain_v3_clang.vars --dbg=on --opt=on --allocator=system --sanitize=address --ssl --ocsp-stapling=off -j$(grep -c ^processor /proc/cpuinfo) test_flags: --excludeWithAnyTags=requires_fast_memory,requires_ocsp_stapling,requires_increased_memlock_limits,requires_latch_analyzer multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise resmoke_jobs_factor: 0.3 # Avoid starting too many mongod's under ASAN build. hang_analyzer_dump_core: false @@ -113,6 +115,7 @@ buildvariants: compile_flags: --variables-files=etc/scons/mongodbtoolchain_v3_clang.vars --dbg=on --opt=on --sanitize=undefined --ssl --ocsp-stapling=off -j$(grep -c ^processor /proc/cpuinfo) --use-diagnostic-latches=on test_flags: --excludeWithAnyTags=requires_ocsp_stapling,requires_increased_memlock_limits multiversion_platform: rhel80 + multiversion_platform_50_or_later: rhel8 multiversion_edition: enterprise resmoke_jobs_factor: 0.3 # Avoid starting too many mongod's under UBSAN build. scons_cache_scope: shared diff --git a/etc/evergreen_yml_components/variants/task_generation.yml b/etc/evergreen_yml_components/variants/task_generation.yml index fa010996291..1f7a0294a28 100644 --- a/etc/evergreen_yml_components/variants/task_generation.yml +++ b/etc/evergreen_yml_components/variants/task_generation.yml @@ -1,8 +1,14 @@ # Build variant to generate tasks for evergreen versions. +# +# Updates to this file may also need to appear in etc/system_perf_yml_components/variants/task_generation.yml, +# which is the same but excludes resmoke task generation tasks. +# buildvariants: - name: generate-tasks-for-version display_name: "Generate tasks for evergreen version" + batchtime: 0 + activate: true run_on: - rhel80-medium tasks: diff --git a/etc/system_perf.yml b/etc/system_perf.yml index 7664bf74e89..752891122c8 100755 --- a/etc/system_perf.yml +++ b/etc/system_perf.yml @@ -4,8 +4,12 @@ exec_timeout_secs: &exec_timeout_secs 21600 timeout_secs: &timeout_secs 7200 include: - - filename: etc/system_perf_yml_components/tasks.yml - - filename: etc/system_perf_yml_components/variants.yml + - filename: evergreen/system_perf/6.0/variants.yml + module: dsi + - filename: evergreen/system_perf/shared_tasks.yml + module: dsi + - filename: evergreen/system_perf/6.0/genny_tasks.yml + module: genny ## Parameters for parameterized builds (see https://github.com/evergreen-ci/evergreen/wiki/Parameterized-Builds) parameters: @@ -90,7 +94,7 @@ modules: owner: mongodb-labs repo: YCSB prefix: ${workdir}/src - branch: production + branch: main - name: py-tpcc owner: mongodb-labs repo: py-tpcc @@ -128,61 +132,6 @@ timeout: functions: - ### - # Same in every DSI project - f_dsi_pre_run: - - command: manifest.load - f_dsi_post_run: - - command: shell.exec - params: - script: ./src/dsi/run-dsi post_run - - command: perf.send - params: - file: ./build/CedarReports/cedar_report.json - aws_key: ${terraform_key} - aws_secret: ${terraform_secret} - bucket: genny-metrics - region: us-east-1 - prefix: ${task_id}_${execution} - - command: attach.results - params: - file_location: ./build/EvergreenResultsJson/results.json - - command: s3.put - params: - aws_key: ${aws_key} - aws_secret: ${aws_secret} - local_file: ./build/Artifacts/DSIArtifacts.tgz - remote_file: ${project_dir}/${build_variant}/${revision}/${task_id}/${version_id}/logs/dsi-artifacts-${task_name}-${build_id}-${execution}.tgz - bucket: mciuploads - permissions: public-read - content_type: application/x-gzip - display_name: DSI Artifacts - Execution ${execution} - - command: s3.put - params: - aws_key: ${aws_key} - aws_secret: ${aws_secret} - local_file: ./build/Documentation/index.html - remote_file: ${project_dir}/${build_variant}/${revision}/${task_id}/${version_id}/logs/${task_name}-${build_id}-index.html - bucket: mciuploads - permissions: public-read - content_type: text/html - display_name: Documentation - - command: s3.put - params: - aws_key: ${aws_key} - aws_secret: ${aws_secret} - local_file: bootstrap.yml - remote_file: ${project_dir}/${build_variant}/${revision}/${task_id}/${version_id}/bootstrap-${task_name}-${build_id}-${execution}.yml - bucket: mciuploads - permissions: public-read - content_type: text/plain - display_name: Task Bootstrap Config - f_dsi_timeout: - - command: shell.exec - params: - script: ./src/dsi/run-dsi on_timeout - ### - f_other_post_ops: - command: shell.exec params: @@ -408,14 +357,6 @@ tasks: - func: "compile mongodb" buildvariants: - - name: task_generation - display_name: Task Generation - modules: *modules - run_on: - - amazon2-build - tasks: - - name: schedule_global_auto_tasks - - &compile-amazon2 name: compile-amazon2 display_name: Compile diff --git a/etc/system_perf_yml_components/tasks.yml b/etc/system_perf_yml_components/tasks.yml deleted file mode 100644 index be0aa910373..00000000000 --- a/etc/system_perf_yml_components/tasks.yml +++ /dev/null @@ -1,927 +0,0 @@ -functions: - f_dsi_run_workload: &dsi_run_func # this function replaces f_run_dsi_workload - - command: timeout.update - params: - exec_timeout_secs: ${exec_timeout_secs_override} - timeout_secs: ${timeout_secs_override} - - command: git.get_project - params: - directory: src/mongo - clone_depth: 1000 - revisions: - dsi: ${dsi_rev} - genny: ${genny_rev} - linkbench: ${linkbench_rev} - linkbench2: ${linkbench2_rev} - tsbs: ${tsbs_rev} - workloads: ${workloads_rev} - YCSB: ${YCSB_rev} - flamegraph: ${flamegraph_rev} - PrivateWorkloads: ${PrivateWorkloads_rev} - - command: expansions.write - params: - file: ./expansions.yml - redacted: true - - command: shell.exec - params: - script: ./src/dsi/run-dsi run_workload - - command: shell.exec - type: system - params: - script: ./src/dsi/run-dsi determine_failure -m SYSTEM - - command: shell.exec - type: setup - params: - script: ./src/dsi/run-dsi determine_failure -m SETUP - - command: shell.exec - type: test - params: - script: ./src/dsi/run-dsi determine_failure -m TEST - - f_run_dsi_workload: *dsi_run_func # Do not use this function. It is deprecated. - - ## Schedule Tasks ## - f_schedule_tasks: - - command: git.get_project - params: - directory: src/mongo - clone_depth: 1000 - revisions: - dsi: ${dsi_rev} - genny: ${genny_rev} - linkbench: ${linkbench_rev} - linkbench2: ${linkbench2_rev} - tsbs: ${tsbs_rev} - workloads: ${workloads_rev} - mongo-perf: ${mongo-perf_rev} - YCSB: ${YCSB_rev} - py-tpcc: ${py-tpcc_rev} - PrivateWorkloads: ${PrivateWorkloads_rev} - - command: expansions.write - params: - file: ./expansions.yml - - command: shell.exec - params: - script: ./src/dsi/run-dsi schedule_tasks --tasks=${tasks} - - command: generate.tasks - params: - files: - - build/TaskJSON/Tasks.json - -tasks: - ### - # Same in every DSI project - - name: schedule_global_auto_tasks - priority: 5 - commands: - - func: f_schedule_tasks - vars: - tasks: all_tasks - - name: schedule_variant_auto_tasks - priority: 5 - commands: - - func: f_schedule_tasks - vars: - tasks: variant_tasks - - name: schedule_patch_auto_tasks - priority: 5 - commands: - - func: f_schedule_tasks - vars: - tasks: patch_tasks - - name: smoke_test - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: short - - name: canaries_only - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: canaries - - name: smoke_test_ssl - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: short - mongodb_setup: replica-ssl - infrastructure_provisioning: replica - - name: smoke_test_standalone_auth - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: short - mongodb_setup: standalone-auth - infrastructure_provisioning: single - - name: smoke_test_replset_auth - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: short - mongodb_setup: replica-auth - infrastructure_provisioning: replica - - name: smoke_test_shard_lite_auth - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: short - mongodb_setup: shard-lite-auth - infrastructure_provisioning: shard-lite - ### - - - name: linkbench - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "linkbench" - - - name: linkbench_stepdowns - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "linkbench_stepdowns" - - - name: linkbench_rolling_restarts - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "linkbench_rolling_restarts" - - - name: linkbench_non_retryable_writes_stepdowns - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "linkbench_non_retryable_writes_stepdowns" - - - name: linkbench_non_retryable_writes_rolling_restarts - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "linkbench_non_retryable_writes_rolling_restarts" - - - name: linkbench2 - priority: 5 - exec_timeout_secs: 43200 # 12 hours - commands: - - func: f_dsi_run_workload - vars: - test_control: "linkbench2" - additional_tfvars: "tags: {expire-on-delta: 12}" - - - name: tsbs_load - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "tsbs_load" - - - name: tsbs_query - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "tsbs_query" - - - name: tsbs_query_manual_bucketing - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "tsbs_query_manual_bucketing" - - - name: tpcc - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "tpcc" - - - name: tpcc_majority - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "tpcc_majority" - - - name: industry_benchmarks - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "ycsb" - - - name: ycsb_60GB - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "ycsb-60GB" - - - name: industry_benchmarks_secondary_reads - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "ycsb-secondary-reads" - - - name: industry_benchmarks_wmajority - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "ycsb-wmajority" - - - name: industry_benchmarks_stepdowns - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "ycsb_stepdowns" - - - name: industry_benchmarks_rolling_restarts - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "ycsb_rolling_restarts" - - - name: industry_benchmarks_non_retryable_writes_stepdowns - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "ycsb_non_retryable_writes_stepdowns" - - - name: industry_benchmarks_non_retryable_writes_rolling_restarts - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "ycsb_non_retryable_writes_rolling_restarts" - - - name: ycsb.2023-09 - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "ycsb.2023-09" - - - name: ycsb_w1.2023-09 - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "ycsb-w1.2023-09" - - - name: ycsb_60GB.2023-09 - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "ycsb-60GB.2023-09" - - - name: ycsb_60GB.long.2023-09 - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "ycsb-60GB.long.2023-09" - - - name: ycsb_secondary_reads.2023-09 - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "ycsb-secondary-reads.2023-09" - - - name: ycsb.load - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "ycsb.load" - - - name: crud_workloads - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "crud_workloads" - - - name: crud_workloads_majority - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "crud_workloads_majority" - - - name: crud_workloads_w1 - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "crud_workloads_w1.2023-02" - - - name: cursor_manager - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "cursor_manager" - - - name: mixed_workloads_genny_stepdowns - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "mixed_workloads_genny_stepdowns" - - - name: mixed_workloads_genny_rolling_restarts - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "mixed_workloads_genny_rolling_restarts" - - - name: big_update_10k - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "BigUpdate10k" - - - name: misc_workloads - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "misc_workloads" - - - - name: map_reduce_workloads - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "map_reduce_workloads" - - - name: genny_canaries - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "genny_canaries" - - - name: retryable_writes_workloads - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "retryable_writes" - - - name: snapshot_reads - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "snapshot_reads" - - - name: secondary_reads - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "secondary_reads" - - - name: bestbuy_agg - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "bestbuy_agg" - - - name: bestbuy_agg_merge_same_db - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "bestbuy_agg_merge_same_db" - - - name: bestbuy_agg_merge_different_db - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "bestbuy_agg_merge_different_db" - - - name: bestbuy_agg_merge_target_hashed - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "bestbuy_agg_merge_target_hashed" - - - name: bestbuy_agg_merge_wordcount - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "bestbuy_agg_merge_wordcount" - - - name: bestbuy_query - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "bestbuy_query" - - - name: tpch_1_normalized - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "tpch" - test_control_params: | - {scale: 1, - schema: normalized} - - - name: tpch_1_denormalized - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "tpch" - test_control_params: | - {scale: 1, - schema: denormalized} - - - name: tpch_10_normalized - priority: 5 - exec_timeout_secs: 43200 # 12 hours - commands: - - func: f_dsi_run_workload - vars: - test_control: "tpch" - test_control_params: | - {scale: 10, - schema: normalized} - - - name: tpch_10_denormalized - priority: 5 - exec_timeout_secs: 43200 # 12 hours - commands: - - func: f_dsi_run_workload - vars: - test_control: "tpch" - test_control_params: | - {scale: 10, - schema: denormalized} - - - name: non_sharded_workloads - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "non_sharded" - - - name: mongos_workloads - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "mongos" - - - name: mongos_large_catalog_workloads - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "mongos_large_catalog" - - - name: move_chunk_workloads - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "move_chunk" - - - name: move_chunk_waiting_workloads - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "move_chunk_waiting" - - - name: move_chunk_large_chunk_map_workloads - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "move_chunk_large_chunk_map" - - - name: refine_shard_key_transaction_stress - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "refine_shard_key_transaction_stress" - - - name: secondary_performance - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - # Unfortunately the dash/underscore style is different for mongodb_setup and test_control - test_control: "secondary_performance" - mongodb_setup: "secondary-performance" - - - name: initialsync - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "initialsync" - - - name: initialsync-fcbis - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "initialsync" - mongodb_setup: "replica-2node-fcbis" - - - name: initialsync-logkeeper - priority: 5 - exec_timeout_secs: 43200 # 12 hours - commands: - - func: f_dsi_run_workload - timeout_secs: 43200 # 12 hours - vars: - test_control: "initialsync-logkeeper" - - - name: initialsync-logkeeper-fcbis - priority: 5 - exec_timeout_secs: 43200 # 12 hours - commands: - - func: f_dsi_run_workload - timeout_secs: 43200 # 12 hours - vars: - test_control: "initialsync-logkeeper" - mongodb_setup: "initialsync-logkeeper-fcbis" - - # The following two initial sync logkeeper automation tasks are only used in the commented-out - # "Linux ReplSet Initial Sync LogKeeper Snapshot Update" variant below and are only intended to be - # run in patch builds to update FCV for logkeeper datasets. - - - name: initialsync-logkeeper-snapshot-update - priority: 5 - exec_timeout_secs: 216000 # 2.5 days - commands: - - func: f_dsi_run_workload - vars: - test_control: "initialsync-logkeeper-snapshot-update" - - - name: initialsync-large - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "initialsync-large" - - - name: initialsync-large-fcbis - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "initialsync-large" - mongodb_setup: "replica-2node-fcbis" - - - name: change_streams_latency - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "change_streams_latency" - - - name: change_streams_preimage_throughput - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "change_streams_preimage_throughput" - - - name: change_streams_preimage_latency - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "change_streams_preimage_latency" - - - name: change_streams_listen_throughput - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "change_streams_listen_throughput" - - - name: change_streams_multi_mongos - priority: 5 - commands: - - func: f_dsi_run_workload - vars: - test_control: "change_streams_multi_mongos" - - - name: genny_execution_UserAcquisition - commands: - - func: f_dsi_run_workload - vars: - test_control: auto_genny_workload - auto_workload_path: ./src/genny/dist/etc/genny/workloads/execution/UserAcquisition.yml - - name: genny_scale_InsertRemove - commands: - - func: f_dsi_run_workload - vars: - test_control: auto_genny_workload - auto_workload_path: ./src/genny/dist/etc/genny/workloads/scale/InsertRemove.yml - - name: query_read_commands - commands: - - func: f_dsi_run_workload - vars: - test_control: mongo-perf.2023-02 - test_control_params: | - {include_filter_1: query, - include_filter_2: core regression, - exclude_filter: single_threaded, - threads: "1 2 4 8", - read_cmd: 'true'} - - name: query_read_commands_large_dataset - commands: - - func: f_dsi_run_workload - vars: - test_control: mongo-perf.2023-02 - test_control_params: | - {include_filter_1: query_large_dataset, - include_filter_2: regression, - exclude_filter: none, - threads: "1 4", - read_cmd: 'true', - share_dataset: 'true'} - - name: big_collection - commands: - - func: f_dsi_run_workload - vars: - test_control: mongo-perf.2023-02 - test_control_params: | - {include_filter_1: query, - include_filter_2: getmore, - exclude_filter: none, - threads: "1 2 4 8", - read_cmd: 'true'} - - name: views-query - commands: - - func: f_dsi_run_workload - vars: - test_control: mongo-perf.2023-02 - test_control_params: | - {include_filter_1: query_identityview, - include_filter_2: core regression, - exclude_filter: single_threaded, - threads: "1 2 4 8", - read_cmd: 'true'} - - name: views-aggregation - commands: - - func: f_dsi_run_workload - vars: - test_control: mongo-perf.2023-02 - test_control_params: | - {include_filter_1: aggregation_identityview, - include_filter_2: regression, - exclude_filter: none, - threads: "1", - read_cmd: 'true'} - - name: where_read_commands - commands: - - func: f_dsi_run_workload - vars: - test_control: mongo-perf.2023-02 - test_control_params: | - {include_filter_1: where, - include_filter_2: core regression, - exclude_filter: single_threaded, - threads: "1 2 4 8", - read_cmd: 'true'} - - name: update_read_commands - commands: - - func: f_dsi_run_workload - vars: - test_control: mongo-perf.2023-02 - test_control_params: | - {include_filter_1: update, - include_filter_2: core regression, - exclude_filter: single_threaded, - threads: "1 2 4 8", - read_cmd: 'true'} - - name: insert_read_commands - commands: - - func: f_dsi_run_workload - vars: - test_control: mongo-perf.2023-02 - test_control_params: | - {include_filter_1: insert, - include_filter_2: core regression, - exclude_filter: single_threaded, - threads: "1 2 4 8", - read_cmd: 'true'} - # - name: compound-wildcard-index-insert - # commands: - # - func: f_dsi_run_workload - # vars: - # test_control: mongo-perf.2023-02 - # test_control_params: | - # {include_filter_1: compound-wildcard-insert, - # include_filter_2: core regression, - # exclude_filter: single_threaded, - # threads: "1 2 4 8", - # read_cmd: 'true'} - - name: wildcard-index-read_read_commands - commands: - - func: f_dsi_run_workload - vars: - test_control: mongo-perf.2023-02 - test_control_params: | - {include_filter_1: wildcard_read, - include_filter_2: core regression, - exclude_filter: single_threaded, - threads: "1 2 4 8", - read_cmd: 'true'} - - name: wildcard-index-write_read_commands - commands: - - func: f_dsi_run_workload - vars: - test_control: mongo-perf.2023-02 - test_control_params: | - {include_filter_1: wildcard_write, - include_filter_2: core regression, - exclude_filter: single_threaded, - threads: "1 2 4 8", - read_cmd: 'true'} - - name: geo_read_commands - commands: - - func: f_dsi_run_workload - vars: - test_control: mongo-perf.2023-02 - test_control_params: | - {include_filter_1: geo, - include_filter_2: core regression, - exclude_filter: single_threaded, - threads: "1 2 4 8", - read_cmd: 'true'} - - name: misc_read_commands - commands: - - func: f_dsi_run_workload - vars: - test_control: mongo-perf.2023-02 - test_control_params: | - {include_filter_1: command multi remove mixed, - include_filter_2: core regression, - exclude_filter: single_threaded, - threads: "1 2 4 8", - read_cmd: 'true'} - - name: misc_custom_filter_default_read_commands - commands: - - func: f_dsi_run_workload - vars: - test_control: mongo-perf.2023-02 - mongodb_setup: mongo-perf-standalone-custom-filter-default.2023-02 - test_control_params: | - {include_filter_1: command multi remove mixed, - include_filter_2: core regression, - exclude_filter: single_threaded, - threads: "1 2 4 8", - read_cmd: 'true'} - - name: misc_custom_filter_slow_or_sample_read_commands - commands: - - func: f_dsi_run_workload - vars: - test_control: mongo-perf.2023-02 - mongodb_setup: mongo-perf-standalone-custom-filter-slow-or-sample.2023-02 - test_control_params: | - {include_filter_1: command multi remove mixed, - include_filter_2: core regression, - exclude_filter: single_threaded, - threads: "1 2 4 8", - read_cmd: 'true'} - - name: misc_custom_filter_complex_read_commands - commands: - - func: f_dsi_run_workload - vars: - test_control: mongo-perf.2023-02 - mongodb_setup: mongo-perf-standalone-custom-filter-complex.2023-02 - test_control_params: | - {include_filter_1: command multi remove mixed, - include_filter_2: core regression, - exclude_filter: single_threaded, - threads: "1 2 4 8", - read_cmd: 'true'} - - name: misc_custom_filter_whole_doc_read_commands - commands: - - func: f_dsi_run_workload - vars: - test_control: mongo-perf.2023-02 - mongodb_setup: mongo-perf-standalone-custom-filter-whole-doc.2023-02 - test_control_params: | - {include_filter_1: command multi remove mixed, - include_filter_2: core regression, - exclude_filter: single_threaded, - threads: "1 2 4 8", - read_cmd: 'true'} - - name: misc_slowms_everything_read_commands - commands: - - func: f_dsi_run_workload - vars: - test_control: mongo-perf.2023-02 - mongodb_setup: mongo-perf-standalone-slowms-everything.2023-02 - test_control_params: | - {include_filter_1: command multi remove mixed, - include_filter_2: core regression, - exclude_filter: single_threaded, - threads: "1 2 4 8", - read_cmd: 'true'} - - name: singleThreaded_read_commands - commands: - - func: f_dsi_run_workload - vars: - test_control: mongo-perf.2023-02 - test_control_params: | - {include_filter_1: single_threaded, - include_filter_2: core regression, - exclude_filter: none, - threads: "1", - read_cmd: 'true'} - - name: aggregation_read_commands - commands: - - func: f_dsi_run_workload - vars: - test_control: mongo-perf.2023-02 - test_control_params: | - {include_filter_1: aggregation, - include_filter_2: regression, - exclude_filter: js, - threads: "1", - read_cmd: 'true'} - - name: aggregation_read_commands_large_dataset - commands: - - func: f_dsi_run_workload - vars: - test_control: mongo-perf.2023-02 - test_control_params: | - {include_filter_1: aggregation_large_dataset, - include_filter_2: regression, - exclude_filter: js, - threads: "1 4", - read_cmd: 'true', - share_dataset: 'true'} - - name: agg-query-comparison_read_commands - commands: - - func: f_dsi_run_workload - vars: - test_control: mongo-perf.2023-02 - test_control_params: | - {include_filter_1: agg_query_comparison, - include_filter_2: core regression, - exclude_filter: single_threaded, - threads: "1 2 4 8", - read_cmd: 'true'} - - name: pipeline-updates - commands: - - func: f_dsi_run_workload - vars: - test_control: mongo-perf.2023-02 - test_control_params: | - {include_filter_1: pipeline-updates, - include_filter_2: regression, - exclude_filter: none, - threads: "1 2 4 8", - read_cmd: 'true'} - - name: javascript - commands: - - func: f_dsi_run_workload - vars: - test_control: mongo-perf.2023-02 - test_control_params: | - {include_filter_1: js, - include_filter_2: aggregation, - exclude_filter: none, - threads: "1 2 4 8", - read_cmd: 'true'} diff --git a/etc/system_perf_yml_components/variants.yml b/etc/system_perf_yml_components/variants.yml deleted file mode 100644 index c2a5a78b149..00000000000 --- a/etc/system_perf_yml_components/variants.yml +++ /dev/null @@ -1,272 +0,0 @@ -variables: - _modules: &modules - - mongo-tools - - dsi - - genny - - workloads - - linkbench - - linkbench2 - - tsbs - - mongo-perf - - YCSB - - py-tpcc - - PrivateWorkloads - - flamegraph - _project_dir: &project_dir dsi - _compile_amazon2: &_compile_amazon2 - - name: compile - variant: compile-amazon2 - - name: schedule_global_auto_tasks - variant: task_generation - _compile_amazon_linux2_arm64: &_compile_amazon_linux2_arm64 - - name: compile - variant: compile-amazon-linux2-arm64 - - name: schedule_global_auto_tasks - variant: task_generation - -buildvariants: - - name: perf-atlas-M60-real.arm.aws.2023-11 - display_name: PERF M60-Atlas ReplSet ARM AWS 2023-11 - modules: *modules - expansions: - mongodb_setup_release: 2022-11 - mongodb_setup: atlas - canaries: none - atlas_setup: M60-repl - use_custom_build: true - infrastructure_provisioning: workload_client_arm.2023-04 - infrastructure_provisioning_release: 2023-09 - workload_setup: 2022-11 - platform: linux - project_dir: *project_dir - storageEngine: wiredTiger - compile_variant: "-arm64" - run_on: - - "rhel70-perf-atlas-large" - depends_on: - - name: compile - variant: compile-amazon2 - - name: schedule_global_auto_tasks - variant: task_generation - - name: compile - variant: compile-amazon-linux2-arm64 - - name: schedule_global_auto_tasks - variant: task_generation - tasks: - - name: schedule_patch_auto_tasks - - name: schedule_variant_auto_tasks - - name: ycsb.2023-09 - - name: ycsb_60GB.2023-09 - - name: tpcc - - name: tpcc_majority - - name: linkbench - - name: linkbench2 - - - name: perf-atlas-M60-real.intel.azure.2023-11 - display_name: PERF M60-Atlas ReplSet Intel Azure 2023-11 - modules: *modules - expansions: - mongodb_setup_release: 2022-11 - mongodb_setup: atlas - canaries: none - atlas_setup: M60-repl-azure - use_custom_build_azure: true - infrastructure_provisioning: workload_client_intel.2023-11 - infrastructure_provisioning_release: 2023-09 - workload_setup: 2022-11 - platform: linux - project_dir: *project_dir - storageEngine: wiredTiger - run_on: - - "rhel70-perf-atlas-large" - depends_on: *_compile_amazon2 - tasks: # Cannot use *3nodetasks because secondary_performance uses a special mongodb setup - - name: schedule_patch_auto_tasks - - name: schedule_variant_auto_tasks - - name: ycsb.2023-09 - - name: ycsb_60GB.2023-09 - - name: tpcc - - name: tpcc_majority - - name: linkbench - - name: linkbench2 - - - name: perf-3-shard.arm.aws.2023-11 - display_name: PERF 3-Shard Cluster ARM AWS 2023-11 - modules: *modules - expansions: - mongodb_setup_release: 2022-11 - mongodb_setup: shard - infrastructure_provisioning_release: 2023-09 - infrastructure_provisioning: shard - workload_setup: 2022-11 - platform: linux - project_dir: *project_dir - authentication: enabled - storageEngine: wiredTiger - compile_variant: "-arm64" - run_on: - - "rhel70-perf-shard" - depends_on: *_compile_amazon_linux2_arm64 - tasks: - - name: schedule_patch_auto_tasks - - name: schedule_variant_auto_tasks - - name: ycsb.2023-09 - - name: ycsb_w1.2023-09 - - name: crud_workloads_majority - - name: crud_workloads_w1 - - name: misc_workloads - - name: map_reduce_workloads - - name: smoke_test - - name: mongos_workloads - - name: mongos_large_catalog_workloads - - name: change_streams_latency - - name: change_streams_multi_mongos - - - name: perf-3-node-replSet.arm.aws.2023-11 - display_name: PERF 3-Node ReplSet ARM AWS 2023-11 - modules: *modules - expansions: - mongodb_setup_release: 2022-11 - mongodb_setup: replica - infrastructure_provisioning_release: 2023-09 - infrastructure_provisioning: replica - workload_setup: 2022-11 - platform: linux - project_dir: *project_dir - authentication: enabled - storageEngine: wiredTiger - compile_variant: "-arm64" - run_on: - - "rhel70-perf-replset" - depends_on: *_compile_amazon_linux2_arm64 - tasks: &3nodetasks - - name: schedule_patch_auto_tasks - - name: schedule_variant_auto_tasks - - name: ycsb.2023-09 - - name: ycsb_w1.2023-09 - - name: ycsb_60GB.2023-09 - - name: ycsb.load - - name: ycsb_60GB.long.2023-09 - - name: ycsb_secondary_reads.2023-09 - - name: crud_workloads_majority - - name: crud_workloads_w1 - - name: misc_workloads - - name: map_reduce_workloads - - name: refine_shard_key_transaction_stress - - name: smoke_test - - name: secondary_performance # Uses a special 2 node mongodb setup - - name: non_sharded_workloads - - name: bestbuy_agg - - name: bestbuy_agg_merge_different_db - - name: bestbuy_agg_merge_same_db - - name: bestbuy_agg_merge_wordcount - - name: bestbuy_query - - name: change_streams_latency - - name: snapshot_reads - - name: secondary_reads - - name: tpcc - - name: linkbench - - name: linkbench2 - - - name: perf-3-node-replSet-intel.intel.aws.2023-11 - display_name: PERF 3-Node ReplSet Intel AWS 2023-11 - modules: *modules - expansions: - mongodb_setup_release: 2022-11 - mongodb_setup: replica - infrastructure_provisioning_release: 2023-09 - infrastructure_provisioning: replica-intel.2023-11 - workload_setup: 2022-11 - platform: linux - project_dir: *project_dir - authentication: enabled - storageEngine: wiredTiger - run_on: - - "rhel70-perf-replset" - depends_on: *_compile_amazon2 - tasks: &3nodetasks - - name: schedule_patch_auto_tasks - - name: schedule_variant_auto_tasks - - name: industry_benchmarks - - name: ycsb.2023-09 - - name: ycsb_60GB.2023-09 - - name: ycsb_60GB.long.2023-09 - - name: crud_workloads_majority - - name: smoke_test - - name: linkbench - - name: linkbench2 - - - - name: perf-2-node-replSet-initialsync.arm.aws.2023-11 - display_name: PERF 2-Node ReplSet Initial Sync ARM AWS 2023-11 - modules: *modules - expansions: - mongodb_setup_release: 2022-11 - mongodb_setup: replica-2node - infrastructure_provisioning_release: 2023-09 - infrastructure_provisioning: replica-2node - workload_setup: 2022-11 - platform: linux - authentication: disabled - storageEngine: wiredTiger - compile_variant: "-arm64" - project_dir: *project_dir - depends_on: *_compile_amazon_linux2_arm64 - run_on: - - "rhel70-perf-replset" - tasks: - - name: schedule_patch_auto_tasks - - name: schedule_variant_auto_tasks - - name: initialsync-large - - - &linux-microbenchmarks-standalone-arm - name: perf-mongo-perf-standalone.arm.aws.2023-11 - display_name: PERF Monogo-Perf Standalone inMemory ARM AWS 2023-11 - cron: "0 0 * * 0" # Weekly, Sundays at 12 AM - modules: *modules - expansions: &standalone-arm-expansions - mongodb_setup_release: 2022-11 - mongodb_setup: mongo-perf-standalone.2023-02 - infrastructure_provisioning_release: 2023-09 - infrastructure_provisioning: workload_client_mongod_combined.2023-01 - workload_setup: 2022-11 - use_scons_cache: true - platform: linux - canaries: none - storageEngine: inMemory - project_dir: *project_dir - compile_variant: "-arm64" - run_on: - - "rhel70-perf-microbenchmarks" - depends_on: *_compile_amazon_linux2_arm64 - tasks: - - name: genny_scale_InsertRemove - - name: genny_execution_UserAcquisition - - name: aggregation_read_commands - - name: agg-query-comparison_read_commands - - name: query_read_commands - - name: views-aggregation - - name: views-query - - name: where_read_commands - - name: update_read_commands - - name: insert_read_commands - - name: wildcard-index-read_read_commands - - name: wildcard-index-write_read_commands - - name: geo_read_commands - - name: misc_read_commands - - name: singleThreaded_read_commands - - name: pipeline-updates - - name: javascript - - - &linux-microbenchmarks-standalone-intel - <<: *linux-microbenchmarks-standalone-arm - name: perf-mongo-perf-standalone.intel.aws.2023-11 - display_name: PERF Mongo-Perf Standalone inMemory Intel AWS 2023-11 - cron: "0 0 * * 0" # Weekly, Sundays at 12 AM - expansions: &standalone-intel-expansions - <<: *standalone-arm-expansions - infrastructure_provisioning: workload_client_mongod_combined_intel.2023-01 - compile_variant: "" - run_on: - - "rhel70-perf-microbenchmarks" - depends_on: *_compile_amazon2 diff --git a/evergreen/compiled_binaries_get.sh b/evergreen/compiled_binaries_get.sh index 42372bb93dd..121cb7c8b34 100755 --- a/evergreen/compiled_binaries_get.sh +++ b/evergreen/compiled_binaries_get.sh @@ -36,6 +36,10 @@ if [ ! -z "${multiversion_architecture_44_or_later}" ]; then architecture="${multiversion_architecture_44_or_later}" fi +if [ ! -z "${multiversion_platform_50_or_later}" ]; then + platform="${multiversion_platform_50_or_later}" +fi + version=${project#mongodb-mongo-} version=${version#v} diff --git a/evergreen/do_jepsen_setup/install_jepsen.sh b/evergreen/do_jepsen_setup/install_jepsen.sh index e92b684935e..3c2a6c0f08a 100755 --- a/evergreen/do_jepsen_setup/install_jepsen.sh +++ b/evergreen/do_jepsen_setup/install_jepsen.sh @@ -1,7 +1,7 @@ set -o errexit cd src -git clone --branch=v0.2.0-jepsen-mongodb-master --depth=1 git@github.com:10gen/jepsen.git jepsen-mongodb +git clone --branch=v0.3.0-jepsen-mongodb-master --depth=1 git@github.com:10gen/jepsen.git jepsen-mongodb cd jepsen-mongodb lein install diff --git a/evergreen/multiversion_setup.sh b/evergreen/multiversion_setup.sh index f20257297ff..b3db41d90b6 100644 --- a/evergreen/multiversion_setup.sh +++ b/evergreen/multiversion_setup.sh @@ -110,6 +110,10 @@ if [ ! -z "${multiversion_architecture_last_lts}" ]; then architecture="${multiversion_architecture_last_lts}" fi +if [ ! -z "${multiversion_platform_50_or_later}" ]; then + platform="${multiversion_platform_50_or_later}" +fi + db-contrib-tool setup-repro-env \ --installDir /data/install \ --linkDir /data/multiversion \ diff --git a/jstests/auth/speculative-auth-replset.js b/jstests/auth/speculative-auth-replset.js index c0b8ef6c19a..fa6eabc61b1 100644 --- a/jstests/auth/speculative-auth-replset.js +++ b/jstests/auth/speculative-auth-replset.js @@ -27,8 +27,9 @@ function countAuthInLog(conn) { } } else if (entry.id === kAuthenticationFailedLogId) { // Authentication can fail legitimately because the secondary abandons the connection - // during shutdown. - assert.eq(entry.attr.error.code, ErrorCodes.AuthenticationAbandoned); + // during shutdown - if we do encounter an authentication failure in the log, make sure + // that it is only of this type, fail anything else + assert.eq(entry.attr.result, ErrorCodes.AuthenticationAbandoned); } else { // Irrelevant. return; diff --git a/jstests/concurrency/fsm_workloads/random_moveChunk_refine_collection_shard_key.js b/jstests/concurrency/fsm_workloads/random_moveChunk_refine_collection_shard_key.js index d74d2454176..18fa771a945 100644 --- a/jstests/concurrency/fsm_workloads/random_moveChunk_refine_collection_shard_key.js +++ b/jstests/concurrency/fsm_workloads/random_moveChunk_refine_collection_shard_key.js @@ -57,7 +57,17 @@ var $config = extendWorkload($config, function($config, $super) { // migrated back in. The particular error code is replaced with a more generic one, so this // is identified by the failed migration's error message. $config.data.isMoveChunkErrorAcceptable = (err) => { - const codes = [ErrorCodes.LockBusy, ErrorCodes.ShardKeyNotFound, ErrorCodes.LockTimeout]; + const codes = [ + // TODO SERVER-68551: Remove lockbusy error since the balancer won't acquire anymore the + // DDL lock for migrations + ErrorCodes.LockBusy, + ErrorCodes.ShardKeyNotFound, + ErrorCodes.LockTimeout, + // The refienCollectionCoordinator interrupt all migrations by setting `allowMigration` + // to false + ErrorCodes.Interrupted, + ErrorCodes.OrphanedRangeCleanUpFailed, + ]; return (err.code && codes.includes(err.code)) || (err.message && (err.message.includes("CommandFailed") || diff --git a/jstests/core/currentop_shell.js b/jstests/core/currentop_shell.js index 5dbbe958dc7..31b084bf132 100644 --- a/jstests/core/currentop_shell.js +++ b/jstests/core/currentop_shell.js @@ -2,7 +2,16 @@ * Tests that the shell helper db.currentOpCursor isn't constrained by the legacy currentOp server * command - ie. the result set isn't limited to 16MB and long operations aren't truncated. * + * Note: On newer branches, this test contains additional cases for the currentOp command (without a + * shell helper) and the $currentOp pipeline stage, which are not included here. Those cases would + * behave unreliably because of SERVER-92284. + * * @tags: [ + * # The collection may be completely moved to another shard, which results in currentOp not + * # returning the expected command. + * assumes_balancer_off, + * # The test runs commands that are not allowed with security token: getLog. + * not_allowed_with_signed_security_token, * uses_parallel_shell, * # This test uses currentOp to check whether an aggregate command is running. In replica set * # environments, because currentOp is run against the admin database it is routed to the @@ -20,31 +29,45 @@ "use strict"; load("jstests/libs/fixture_helpers.js"); // for FixtureHelpers +load('jstests/libs/parallel_shell_helpers.js'); const coll = db.currentOp_cursor; coll.drop(); -for (let i = 0; i < 3; i++) { +for (let i = 0; i < 100; i++) { assert.commandWorked(coll.insert({val: 1})); } +// // Test that db.currentOpCursor() returns an iterable cursor. -let res = db.currentOpCursor(); -assert(res.hasNext()); -assert(res.next()); +// +const cursorFromCurrentOp = db.currentOpCursor(); +assert(cursorFromCurrentOp.hasNext()); +assert(cursorFromCurrentOp.next()); -// Test that db.currentOp() interface does not change. -res = db.currentOp(); -assert("inprog" in res, "Result contains 'inprog' field"); -assert("ok" in res, "Result contains 'ok' field"); +// +// Test that db.currentOp() returns an object in the expected format. +// +const currentOpRes = db.currentOp(); +assert("inprog" in currentOpRes, "Result contains 'inprog' field"); +assert("ok" in currentOpRes, "Result contains 'ok' field"); -// Attempting to access the fsyncLock field from the results throws with an error message. -let error = assert.throws(() => res.fsyncLock); +// +// Test that attempting to access the fsyncLock field from the results throws with an error message. +// +const error = assert.throws(() => currentOpRes.fsyncLock); assert( /fsyncLock is no longer included in the currentOp shell helper, run db\.runCommand\({currentOp: 1}\) instead/ .test(error)); -function shellOp() { +// +// Start a pipeline with a large command object in a parallel shell and then test three different +// methods of executing "currentOp" queries to ensure that they all observe the operation and that +// they do or do not truncate its command object (according to each one's specification). +// + +// Starts the query. Intended to b e called from a parallel shell. +function startLongRunningAggregation(collName, comment) { function createLargeDoc() { let doc = {}; for (let i = 0; i < 100; i++) { @@ -54,90 +77,96 @@ function shellOp() { } assert.commandFailedWithCode(db.runCommand({ - aggregate: "currentOp_cursor", + aggregate: collName, pipeline: [{ $addFields: { newVal: {$function: {args: [], body: "sleep(1000000)", lang: "js"}}, bigDoc: createLargeDoc() } }], - comment: TestData.comment, + comment: comment, cursor: {} }), ErrorCodes.Interrupted); } -function startShellWithOp(comment) { - TestData.comment = comment; - const awaitShell = startParallelShell(shellOp); - - // Confirm that the operation has started in the parallel shell. +// Repeatedly executes 'getOperationsFunction()' until it returns exactly one operation for each +// shard in a sharded collection or exactly one operation for an unsharded collection. +function awaitOperations(getOperationsFunction) { + let operations; assert.soon( function() { - let aggRes = - db.getSiblingDB("admin") - .aggregate([ - {$currentOp: {}}, - {$match: {ns: "test.currentOp_cursor", "command.comment": TestData.comment}} - ]) - .toArray(); - return aggRes.length >= 1; + const numShards = FixtureHelpers.numberOfShardsForCollection(coll); + operations = getOperationsFunction(); + + // No shard should have more than one operation matching the query comment. First check + // that the total number of operations is no greater than the total number of shards. + assert.lte(operations.length, numShards, operations); + + // Also explicitly check that each shard appears no more than once in the list of + // operations. + const distinctShardNames = new Set(operations.map(op => "shard" in op ? op.shard : "")); + assert.eq(operations.length, distinctShardNames.size, {operations, numShards}); + + if (operations.length < numShards) { + print(`Found ${operations.length} operation(s); waiting until there are ${ + numShards} operation(s)`); + return false; + } else if (operations.some(op => op.op !== "getmore" && "cursor" in op && + op.cursor.batchSize === 0)) { + print(`Found command with empty 'batchSize' value; waiting for getmore: ${ + tojson(operations)}`); + return false; + } + + return true; }, function() { return "Failed to find parallel shell operation in $currentOp output: " + tojson(db.currentOp()); }); - return awaitShell; + + return operations; } -// Test that the currentOp server command truncates long operations with a warning logged. -const serverCommandTest = startShellWithOp("currentOp_server"); -res = db.adminCommand({ - currentOp: true, - $and: [{"ns": "test.currentOp_cursor"}, {"command.comment": "currentOp_server"}] +function getCommandFromCurrentOpEntry(entry) { + if (entry.op === "command" && "command" in entry) { + return entry.command; + } else if (entry.op === "getmore" && "cursor" in entry && + "originatingCommand" in entry.cursor) { + return entry.cursor.originatingCommand; + } else { + assert(false, entry); + } +} + +const comment = "long_running_aggregation"; +const awaitShell = + startParallelShell(funWithArgs(startLongRunningAggregation, coll.getName(), comment)); + +const filter = { + ns: coll.getFullName(), + "command.comment": comment, + + // On the replica set endpoint, currentOp reports both router and shard operations. So filter + // out one of them. + role: TestData.testingReplicaSetEndpoint ? "ClusterRole{router}" : {$exists: false} +}; + +// The 'currentOp' shell helper should _not_ truncate the command. +const operationsViaCurrentOpShellHelper = awaitOperations(function() { + return db.currentOp(filter).inprog; }); +assert(operationsViaCurrentOpShellHelper.every(op => { + const command = getCommandFromCurrentOpEntry(op); + return !("$truncated" in command) && command.aggregate == coll.getName(); +}), + operationsViaCurrentOpShellHelper); -if (FixtureHelpers.isMongos(db) && FixtureHelpers.isSharded(coll)) { - // Assert currentOp truncation behavior for each shard in the cluster. - assert(res.inprog.length >= 1, res); - res.inprog.forEach((result) => { - assert.eq(result.op, "getmore", result); - assert(result.cursor.originatingCommand.hasOwnProperty("$truncated"), result); - }); -} else { - // Assert currentOp truncation behavior for unsharded collections. - assert.eq(res.inprog.length, 1, res); - assert.eq(res.inprog[0].op, "command", res); - assert(res.inprog[0].command.hasOwnProperty("$truncated"), res); -} - -const log = FixtureHelpers.getPrimaryForNodeHostingDatabase(db).adminCommand({getLog: "global"}); -assert(/will be truncated/.test(log.log)); - -res.inprog.forEach((op) => { +// Finish the test by killing the long-running aggregation pipeline and joining the parallel shell +// that launched it. +for (let op of operationsViaCurrentOpShellHelper) { assert.commandWorked(db.killOp(op.opid)); -}); - -serverCommandTest(); - -// Test that the db.currentOp() shell helper does not truncate ops. -const shellHelperTest = startShellWithOp("currentOp_shell"); -res = db.currentOp({"ns": "test.currentOp_cursor", "command.comment": "currentOp_shell"}); - -if (FixtureHelpers.isMongos(db) && FixtureHelpers.isSharded(coll)) { - assert(res.inprog.length >= 1, res); - res.inprog.forEach((result) => { - assert.eq(result.op, "getmore", result); - assert(!result.cursor.originatingCommand.hasOwnProperty("$truncated"), result); - }); -} else { - assert.eq(res.inprog.length, 1, res); - assert(!res.inprog[0].command.hasOwnProperty("$truncated"), res); } - -res.inprog.forEach((op) => { - assert.commandWorked(db.killOp(op.opid)); -}); - -shellHelperTest(); +awaitShell(); })(); diff --git a/jstests/noPassthrough/or_pushdown_disable_optimization.js b/jstests/noPassthrough/or_pushdown_disable_optimization.js new file mode 100644 index 00000000000..58c4ea1f8d2 --- /dev/null +++ b/jstests/noPassthrough/or_pushdown_disable_optimization.js @@ -0,0 +1,39 @@ +/** + * Test that queries eligible for OR-pushdown optimization do not crash the server when the + * 'disableMatchExpressionOptimization' failpoint is enabled. + * + * Originally designed to reproduce SERVER-70597. + */ +(function() { +"use strict"; + +const conn = MongoRunner.runMongod(); +const db = conn.getDB("test"); + +assert.commandWorked( + db.adminCommand({configureFailPoint: "disableMatchExpressionOptimization", mode: "alwaysOn"})); + +const coll = db.getCollection(jsTestName()); +coll.drop(); +assert.commandWorked(coll.createIndex({a: 1, b: 1})); + +let docs = []; +for (let a = 1; a <= 3; ++a) { + for (let b = 1; b <= 3; ++b) { + docs.push({a, b}); + } +} +assert.commandWorked(coll.insert(docs)); + +// This query has a nested $and, and a one-argument contained $or. Normally we canonicalize this +// predicate by flattening the $and and unwrapping the $or. The OR-pushdown optimization assumes the +// predicate has been canonicalized, but this assumption is broken by the failpoint. +const results = coll.aggregate([ + {$match: {$and: [{$and: [{a: 2}]}, {$or: [{b: 3}]}]}}, + {$unset: "_id"}, + ]) + .toArray(); +assert.eq(results, [{a: 2, b: 3}]); + +MongoRunner.stopMongod(conn); +})(); diff --git a/jstests/noPassthrough/read_preference_metrics.js b/jstests/noPassthrough/read_preference_metrics.js index b6290a23f47..2f47a0c4797 100644 --- a/jstests/noPassthrough/read_preference_metrics.js +++ b/jstests/noPassthrough/read_preference_metrics.js @@ -33,9 +33,24 @@ function verifyMetricIncrement(conn, readPref, executedOn, tagged) { const expectedCount = preMetrics[executedOn][readPref].external + 1; const count = postMetrics[executedOn][readPref].external; - assert(expectedCount == count, - `Actual count ${count} did not equal expected count ${ - expectedCount} for readPreference ${readPref}.`); + // Replica set nodes run a periodic job to refresh keys for HMAC computation. Although this + // job is internal, it is classified as "external" because it uses a client flagged as such. + // The job performs two find operations on system collections using read preference 'nearest', + // which asynchronously increments the external 'nearest' read preference counter twice. This + // can race with retrieving the pre- and post-metrics, and so we must include a special case. + // The incorrect external classification is fixed in future versions. + if (readPref === "nearest") { + // Ensure the actual count is greater than or equal to the expected count. In the case + // we've hit the race condition, ensure the count is no greater than two more increments + // beyond the expected count. + assert(count >= expectedCount && count <= expectedCount + 2, + `Actual count ${count} is not greater than or equal to expected count ${ + expectedCount} for readPreference ${readPref}.`); + } else { + assert(expectedCount == count, + `Actual count ${count} did not equal expected count ${ + expectedCount} for readPreference ${readPref}.`); + } if (tagged) { const expectedTaggedCount = preMetrics[executedOn].tagged.external + 1; @@ -78,38 +93,22 @@ let serverStatus = assert.commandWorked(standalone.getDB("admin").runCommand({se assert(!serverStatus.hasOwnProperty("readPreferenceCounters"), tojson(serverStatus)); MongoRunner.stopMongod(standalone); -// Test that replica set nodes tracks metrics around read preference. The assert.soon() that -// checks for a periodic job to complete below assumes the replica set will have two nodes, so -// we should keep that consistent. +// Test that replica set nodes tracks metrics around read preference. const rst = new ReplSetTest({nodes: 2}); rst.startSet(); rst.initiateWithHighElectionTimeout(); -// On startup, the replica set nodes will run a periodic job to refresh keys for HMAC computation. -// This job will perform two find operations on system collections, and this will increment the -// external 'nearest' read preference counter twice. We should wait for this periodic job to -// complete on both nodes, so the counters aren't incremented during the test. -assert.soon(() => { - return getReadPreferenceMetrics(rst.getPrimary()).executedOnPrimary.nearest.external >= 2 && - getReadPreferenceMetrics(rst.getSecondary()).executedOnSecondary.nearest.external >= 2; -}); jsTestLog("Testing against replica set"); runTest(rst); rst.stopSet(); // Test that mongos omits metrics around read preference, and shard servers include them. -// The assert.soon() below assumes two shard server nodes, similar to the replica set case above. const st = new ShardingTest({shards: 1, rs: {nodes: 2}}); serverStatus = assert.commandWorked(st.s.getDB("admin").runCommand({serverStatus: 1})); assert(serverStatus.process.startsWith("mongos"), tojson(serverStatus)); assert(!serverStatus.hasOwnProperty("readPreferenceCounters"), tojson(serverStatus)); -// The newly started shard servers will also run the same periodic job mentioned above. -assert.soon(() => { - return getReadPreferenceMetrics(st.rs0.getPrimary()).executedOnPrimary.nearest.external >= 2 && - getReadPreferenceMetrics(st.rs0.getSecondary()).executedOnSecondary.nearest.external >= 2; -}); jsTestLog("Testing against sharded cluster"); runTest(st.rs0); diff --git a/jstests/noPassthroughWithMongod/cursor_server_status_metrics_lifespan_histogram.js b/jstests/noPassthroughWithMongod/cursor_server_status_metrics_lifespan_histogram.js index b80ce4dcc0f..ce6988a7489 100644 --- a/jstests/noPassthroughWithMongod/cursor_server_status_metrics_lifespan_histogram.js +++ b/jstests/noPassthroughWithMongod/cursor_server_status_metrics_lifespan_histogram.js @@ -22,14 +22,6 @@ function getNumCursorsLessThan30Seconds() { return db.serverStatus().metrics.cursor.lifespan.lessThan30Seconds; } -function getNumCursorsLessThan1Minute() { - return db.serverStatus().metrics.cursor.lifespan.lessThan1Minute; -} - -function getNumCursorsLessThan10Minutes() { - return db.serverStatus().metrics.cursor.lifespan.lessThan10Minutes; -} - for (let i = 0; i < 40; i++) { coll.insert({a: i, b: "field b"}); } @@ -38,8 +30,6 @@ const initialNumCursorsLt1s = getNumCursorsLessThan1Second(); const initialNumCursorsLt5s = getNumCursorsLessThan5Seconds(); const initialNumCursorsLt15s = getNumCursorsLessThan15Seconds(); const initialNumCursorsLt30s = getNumCursorsLessThan30Seconds(); -const initialNumCursorsLt1m = getNumCursorsLessThan1Minute(); -const initialNumCursorsLt10m = getNumCursorsLessThan10Minutes(); // Since we aren't guaranteed perfect timings, the checks in this test have been relaxed to window // sizes of 30s. For example, a cursor that is expected to die in under 5s may actually take longer @@ -65,21 +55,4 @@ for (let i = 0; i < 3; i++) { } assert.eq(cursorsDeadSinceStartLt30Seconds(), 4); - -const cursorLt1Minute = coll.find().batchSize(2); -const cursorLt10Minutes = coll.aggregate([], {cursor: {batchSize: 2}}); -cursorLt1Minute.next(); -cursorLt10Minutes.next(); - -sleep(31000); // Sleep for 31 s. -while (cursorLt1Minute.hasNext()) { - cursorLt1Minute.next(); -} -assert.eq(getNumCursorsLessThan1Minute() - initialNumCursorsLt1m, 1); - -sleep(30000); // Sleep another 30s, so the total should be greater than 1m and less than 10m. -while (cursorLt10Minutes.hasNext()) { - cursorLt10Minutes.next(); -} -assert.eq(getNumCursorsLessThan10Minutes() - initialNumCursorsLt10m, 1); }()); \ No newline at end of file diff --git a/jstests/sharding/range_deletions_setFCV.js b/jstests/sharding/range_deletions_setFCV.js index a664e20ab54..6a0091a1c7c 100644 --- a/jstests/sharding/range_deletions_setFCV.js +++ b/jstests/sharding/range_deletions_setFCV.js @@ -13,6 +13,8 @@ load("jstests/libs/fail_point_util.js"); load('jstests/libs/parallel_shell_helpers.js'); +load("jstests/sharding/libs/chunk_bounds_util.js"); +load("jstests/sharding/libs/find_chunks_util.js"); const rangeDeleterBatchSize = 128; @@ -23,75 +25,127 @@ const st = new ShardingTest({ } }); -// Setup database and collection for test const dbName = 'db'; +const numDocsInColl = 1000; const db = st.getDB(dbName); +const configDB = st.getDB('config'); +const primaryShard = st.shard0; assert.commandWorked( - st.s.adminCommand({enableSharding: dbName, primaryShard: st.shard0.shardName})); -const coll = db['test']; -const nss = coll.getFullName(); -assert.commandWorked(st.s.adminCommand({shardCollection: nss, key: {_id: 1}})); + st.s.adminCommand({enableSharding: dbName, primaryShard: primaryShard.shardName})); -assert.commandWorked(db.adminCommand({setFeatureCompatibilityVersion: lastContinuousFCV})); - -function assertOrphanCountIsCorrectOrMissing(conn, ns, numOrphans) { - let fcv = - assert.commandWorked(conn.adminCommand({getParameter: 1, featureCompatibilityVersion: 1})); +function checkNumOrphansOnRangeDeletionTask(conn, ns, numOrphans) { const rangeDeletionDoc = conn.getDB("config").getCollection("rangeDeletions").findOne({nss: ns}); - if (fcv.featureCompatibilityVersion.version === "6.0") { - assert.neq( - null, - rangeDeletionDoc, - "did not find document for namespace " + ns + - ", contents of config.rangeDeletions on " + conn + ": " + - tojson(conn.getDB("config").getCollection("rangeDeletions").find().toArray())); - assert.eq(numOrphans, - rangeDeletionDoc.numOrphanDocs, - "Incorrect count of orphaned documents in config.rangeDeletions on " + conn + - ": expected " + numOrphans + - " orphaned documents but found range deletion document " + - tojson(rangeDeletionDoc)); + assert.neq(null, + rangeDeletionDoc, + "did not find document for namespace " + ns + + ", contents of config.rangeDeletions on " + conn + ": " + + tojson(conn.getDB("config").getCollection("rangeDeletions").find().toArray())); + assert.eq(numOrphans, + rangeDeletionDoc.numOrphanDocs, + "Incorrect count of orphaned documents in config.rangeDeletions on " + conn + + ": expected " + numOrphans + + " orphaned documents but found range deletion document " + + tojson(rangeDeletionDoc)); +} + +function moveChunkAndCheckRangeDeletionTasksUponFCVUpgrade(nss, donorShard, moveChunkCmd) { + // Ensure that no outstanding range deletion task will actually be executed (so that their + // recovery docs may be inspected). + let beforeDeletionFailpoint = configureFailPoint(donorShard, "hangBeforeDoingDeletion"); + let afterDeletionFailpoint = configureFailPoint(donorShard, "hangAfterDoingDeletion"); + + // Downgrade the cluster + assert.commandWorked(db.adminCommand({setFeatureCompatibilityVersion: lastContinuousFCV})); + + // Upgrade the cluster - pausing the process before the "drain outstanding migrations" step + let pauseBeforeDrainingMigrations = + configureFailPoint(donorShard, "hangBeforeDrainingMigrations"); + const joinFCVUpgrade = startParallelShell( + funWithArgs(function(fcv) { + assert.commandWorked(db.adminCommand({setFeatureCompatibilityVersion: fcv})); + }, latestFCV), st.s.port); + pauseBeforeDrainingMigrations.wait(); + // Complete a migration and check the pending range deletion tasks in the donor + assert.commandWorked(db.adminCommand(moveChunkCmd)); + + pauseBeforeDrainingMigrations.off(); + joinFCVUpgrade(); + // Check the batches are deleted correctly + const numBatches = numDocsInColl / rangeDeleterBatchSize; + assert(numBatches > 0); + for (let i = 0; i < numBatches; i++) { + // Wait for failpoint and check num orphans + beforeDeletionFailpoint.wait(); + checkNumOrphansOnRangeDeletionTask( + donorShard, nss, numDocsInColl - rangeDeleterBatchSize * i); + // Unset and reset failpoint without allowing any batches deleted in the meantime + afterDeletionFailpoint = configureFailPoint(donorShard, "hangAfterDoingDeletion"); + beforeDeletionFailpoint.off(); + afterDeletionFailpoint.wait(); + beforeDeletionFailpoint = configureFailPoint(donorShard, "hangBeforeDoingDeletion"); + afterDeletionFailpoint.off(); } -} - -// Insert some docs into the collection. -const numDocs = 1000; -let bulk = coll.initializeUnorderedBulkOp(); -for (let i = 0; i < numDocs; i++) { - bulk.insert({_id: i}); -} -assert.commandWorked(bulk.execute()); - -// Pause before first range deletion task -let beforeDeletionFailpoint = configureFailPoint(st.shard0, "hangBeforeDoingDeletion"); -let afterDeletionFailpoint = configureFailPoint(st.shard0, "hangAfterDoingDeletion"); - -// Upgrade FCV to 6.0 -let pauseBeforeDrainingMigrations = configureFailPoint(st.shard0, "hangBeforeDrainingMigrations"); -const FCVUpgrade = startParallelShell( - funWithArgs(function(fcv) { - assert.commandWorked(db.adminCommand({setFeatureCompatibilityVersion: fcv})); - }, latestFCV), st.s.port); -pauseBeforeDrainingMigrations.wait(); -assert.commandWorked(db.adminCommand({moveChunk: nss, find: {_id: 0}, to: st.shard1.shardName})); - -pauseBeforeDrainingMigrations.off(); -// Check the batches are deleted correctly -const numBatches = numDocs / rangeDeleterBatchSize; -for (let i = 0; i < numBatches; i++) { - // Wait for failpoint and check num orphans - beforeDeletionFailpoint.wait(); - assertOrphanCountIsCorrectOrMissing(st.shard0, nss, numDocs - rangeDeleterBatchSize * i); - // Unset and reset failpoint without allowing any batches deleted in the meantime - afterDeletionFailpoint = configureFailPoint(st.shard0, "hangAfterDoingDeletion"); beforeDeletionFailpoint.off(); - afterDeletionFailpoint.wait(); - beforeDeletionFailpoint = configureFailPoint(st.shard0, "hangBeforeDoingDeletion"); - afterDeletionFailpoint.off(); } -beforeDeletionFailpoint.off(); -FCVUpgrade(); +function testAgainstCollectionWithRangeShardKey() { + // Fill up a sharded collection + const coll = db['collWithRangeShardKey']; + const nss = coll.getFullName(); + assert.commandWorked(st.s.adminCommand({shardCollection: nss, key: {_id: 1}})); + + // Insert some docs into the collection. + let bulk = coll.initializeUnorderedBulkOp(); + for (let i = 0; i < numDocsInColl; i++) { + bulk.insert({_id: i}); + } + assert.commandWorked(bulk.execute()); + + const moveChunkCmd = {moveChunk: nss, find: {_id: 0}, to: st.shard1.shardName}; + + moveChunkAndCheckRangeDeletionTasksUponFCVUpgrade(nss, primaryShard, moveChunkCmd); +} + +function testAgainstCollectionWithHashedShardKey() { + const collName = 'collWithHashedShardKey'; + const hotKeyValue = 'hotKeyValue'; + const hashedKeyValue = convertShardKeyToHashed(hotKeyValue); + const docWithHotShardKey = {k: hotKeyValue}; + const coll = db[collName]; + const nss = coll.getFullName(); + assert.commandWorked( + st.s.adminCommand({shardCollection: nss, key: {k: 'hashed'}, numInitialChunks: 1})); + + // Insert some docs into the collection. + let bulk = coll.initializeUnorderedBulkOp(); + for (let i = 0; i < numDocsInColl; i++) { + bulk.insert(docWithHotShardKey); + } + assert.commandWorked(bulk.execute()); + + // All the documents are supposed to be stored within a single shard + const allCollChunks = findChunksUtil.findChunksByNs(configDB, nss).toArray(); + const chunksWithDoc = allCollChunks.filter((chunk) => { + return chunkBoundsUtil.containsKey({k: hashedKeyValue}, chunk.min, chunk.max); + }); + assert.eq(1, chunksWithDoc.length); + const shardHoldingData = chunksWithDoc[0].shard === st.shard0.shardName ? st.shard0 : st.shard1; + const shardWithoutData = + shardHoldingData.shardName === st.shard0.shardName ? st.shard1 : st.shard0; + + const moveChunkCmd = { + moveChunk: nss, + bounds: [chunksWithDoc[0].min, chunksWithDoc[0].max], + to: shardWithoutData.shardName + }; + + moveChunkAndCheckRangeDeletionTasksUponFCVUpgrade(nss, shardHoldingData, moveChunkCmd); +} + +// Test Cases +testAgainstCollectionWithRangeShardKey(); +testAgainstCollectionWithHashedShardKey(); + st.stop(); })(); diff --git a/jstests/sharding/refresh_sessions.js b/jstests/sharding/refresh_sessions.js index c6d229707ca..dec07890b38 100644 --- a/jstests/sharding/refresh_sessions.js +++ b/jstests/sharding/refresh_sessions.js @@ -8,8 +8,15 @@ var sessionsDb = "config"; var refresh = {refreshLogicalSessionCacheNow: 1}; var startSession = {startSession: 1}; -// Create a cluster with 1 shard. -var cluster = new ShardingTest({shards: 2}); +var cluster = new ShardingTest({ + mongos: [{setParameter: {sessionWriteConcernTimeoutSystemMillis: 0, sessionMaxBatchSize: 500}}], + shards: 2, + rs: {setParameter: {sessionWriteConcernTimeoutSystemMillis: 0, sessionMaxBatchSize: 500}}, + other: { + configOptions: + {setParameter: {sessionWriteConcernTimeoutSystemMillis: 0, sessionMaxBatchSize: 500}} + } +}); // Test that we can refresh without any sessions, as a sanity check. { diff --git a/jstests/sharding/resharding_coordinator_recovers_abort_decision.js b/jstests/sharding/resharding_coordinator_recovers_abort_decision.js index 124e575208b..623f1adda62 100644 --- a/jstests/sharding/resharding_coordinator_recovers_abort_decision.js +++ b/jstests/sharding/resharding_coordinator_recovers_abort_decision.js @@ -94,10 +94,11 @@ reshardingTest.withReshardingInBackground( assert.commandWorked(mongos.getDB("admin").killOp(ops[0].opid)); // Step down the config shard's primary. - let replSet = reshardingTest.getReplSetForShard(reshardingTest.configShardName); - let primary = replSet.getPrimary(); + let configRS = reshardingTest.getReplSetForShard(reshardingTest.configShardName); + let primary = configRS.getPrimary(); assert.commandWorked( primary.getDB("admin").runCommand({replSetStepDown: 60, force: true})); + configRS.waitForPrimary(); // After a stepdown, the _configsvrReshardCollection command will be retried by the // primary shard. We use the reshardCollectionJoinedExistingOperation failpoint to @@ -116,7 +117,6 @@ reshardingTest.withReshardingInBackground( // Wait for secondaries to recover and catchup with primary before turning off the // failpoints as a replication roll back can disconnect the test client. - const configRS = reshardingTest.getReplSetForShard(reshardingTest.configShardName); configRS.awaitSecondaryNodes(); configRS.awaitReplication(); reshardCollectionJoinedFailPointsList.forEach( diff --git a/jstests/sharding/ttl_deletes_not_targeting_orphaned_documents.js b/jstests/sharding/ttl_deletes_not_targeting_orphaned_documents.js index 10ec9c91e31..7590dce0a73 100644 --- a/jstests/sharding/ttl_deletes_not_targeting_orphaned_documents.js +++ b/jstests/sharding/ttl_deletes_not_targeting_orphaned_documents.js @@ -21,14 +21,12 @@ const testDB = st.s.getDB('test'); const coll = testDB[jsTest.name()]; const collName = coll.getFullName(); +// Shard a collection on _id:1 so that the initial chunk will reside on the primary shard (shard0) assert.commandWorked( st.s.adminCommand({enableSharding: dbName, primaryShard: st.shard0.shardName})); assert.commandWorked(st.s.adminCommand({shardCollection: collName, key: {_id: 1}})); -// Initialize TTL index: delete documents with field `a: ` after 20 seconds -assert.commandWorked(coll.createIndex({a: 1}, {expireAfterSeconds: 20})); - -// Insert documents that are going to be deleted in 20 seconds +// Insert documents that are going to be deleted by the TTL index created later on const currTime = new Date(); var bulk = coll.initializeUnorderedBulkOp(); const nDocs = 100; @@ -37,16 +35,20 @@ for (let i = 0; i < nDocs; i++) { } assert.commandWorked(bulk.execute()); -// Move all documents on other shards +// Move all documents to the other shard (shard1) but keep a chunk on shard0 to create the TTL index +assert.commandWorked(st.s.adminCommand({split: collName, middle: {_id: -1}})); assert.commandWorked( st.s.adminCommand({moveChunk: collName, find: {_id: 0}, to: st.shard1.shardName})); -// Verify that TTL index worked properly on owned documents +// Initialize TTL index: delete documents with field `a: ` older than 1 second +assert.commandWorked(coll.createIndex({a: 1}, {expireAfterSeconds: 1})); + +// Verify that TTL index worked properly on owned documents on shard1 assert.soon(function() { return coll.countDocuments({}) == 0; }, "Failed to move all documents", 60000 /* 60 seconds */, 5000 /* 5 seconds */); -// Verify that TTL index did not delete orphaned documents +// Verify that TTL index did not delete orphaned documents on shard0 assert.eq(nDocs, st.rs0.getPrimary().getCollection(collName).countDocuments({})); st.stop(); diff --git a/jstests/sharding/verify_sessions_expiration_sharded.js b/jstests/sharding/verify_sessions_expiration_sharded.js index bc3cf254755..ea225b96d04 100644 --- a/jstests/sharding/verify_sessions_expiration_sharded.js +++ b/jstests/sharding/verify_sessions_expiration_sharded.js @@ -123,6 +123,10 @@ for (let i = 0; i < 3; i++) { lastUseValues[j] = sessionsCollectionArray[j].lastUse; } } + + // Date_t has the granularity of milliseconds, so we have to make sure we don't run this loop + // faster than that. + sleep(10); } // 3. Verify that letting sessions expire (simulated by manual deletion) will kill their diff --git a/site_scons/site_tools/oom_auto_retry.py b/site_scons/site_tools/oom_auto_retry.py index 7ff457d2798..d051dfc7383 100644 --- a/site_scons/site_tools/oom_auto_retry.py +++ b/site_scons/site_tools/oom_auto_retry.py @@ -32,6 +32,9 @@ import re from typing import Callable, List, Dict +# Note: The auto-retry settings are prefixed w/ "OOM", but since it's an unconditional retry, +# it's not really OOM-specific. We're keeping the OOM prefix to make the code change simpler. +# (This custom retry logic will go away once the build is fully Bazelified). def command_spawn_func(sh: str, escape: Callable[[str], str], cmd: str, args: List, env: Dict, target: List, source: List): @@ -39,11 +42,6 @@ def command_spawn_func(sh: str, escape: Callable[[str], str], cmd: str, args: Li success = False build_env = target[0].get_build_env() - oom_messages = [ - re.compile(msg, re.MULTILINE | re.DOTALL) - for msg in build_env.get('OOM_RETRY_MESSAGES', []) - ] - oom_returncodes = [int(returncode) for returncode in build_env.get('OOM_RETRY_RETURNCODES', [])] max_retries = build_env.get('OOM_RETRY_ATTEMPTS', 10) oom_max_retry_delay = build_env.get('OOM_RETRY_MAX_DELAY_SECONDS', 120) @@ -63,16 +61,14 @@ def command_spawn_func(sh: str, escape: Callable[[str], str], cmd: str, args: Li except subprocess.CalledProcessError as exc: print(f"{os.path.basename(__file__)} captured error:") print(exc.stdout) - if any([re.findall(oom_message, exc.stdout) for oom_message in oom_messages]) or any( - [oom_returncode == exc.returncode for oom_returncode in oom_returncodes]): - retries += 1 - retry_delay = int((time.time() - start_time) + - oom_max_retry_delay * random.random()) - print(f"Ran out of memory while trying to build {target[0]}", ) - if retries <= max_retries: - print(f"trying again in {retry_delay} seconds with retry attempt {retries}") - time.sleep(retry_delay) - continue + retries += 1 + retry_delay = int((time.time() - start_time) + + oom_max_retry_delay * random.random()) + print(f"Failed while trying to build {target[0]}", ) + if retries <= max_retries: + print(f"trying again in {retry_delay} seconds with retry attempt {retries}") + time.sleep(retry_delay) + continue # There was no OOM error or no more OOM retries left return exc.returncode diff --git a/src/mongo/client/cyrus_sasl_client_session.cpp b/src/mongo/client/cyrus_sasl_client_session.cpp index 3f920ffff67..2bca7e1b1de 100644 --- a/src/mongo/client/cyrus_sasl_client_session.cpp +++ b/src/mongo/client/cyrus_sasl_client_session.cpp @@ -47,7 +47,8 @@ void saslSetError(sasl_conn_t* conn, const std::string& msg) { } SaslClientSession* createCyrusSaslClientSession(const std::string& mech) { - if ((mech == "SCRAM-SHA-1") || (mech == "SCRAM-SHA-256") || mech == "MONGODB-AWS") { + if ((mech == "SCRAM-SHA-1") || (mech == "SCRAM-SHA-256") || (mech == "PLAIN") || + mech == "MONGODB-AWS") { return new NativeSaslClientSession(); } return new CyrusSaslClientSession(); diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript index f25da06afbe..c7e4b6ff146 100644 --- a/src/mongo/db/SConscript +++ b/src/mongo/db/SConscript @@ -1713,6 +1713,7 @@ env.Library( target='sessions_collection', source=[ 'sessions_collection.cpp', + 'sessions_server_parameters.idl' ], LIBDEPS=[ '$BUILD_DIR/mongo/base', diff --git a/src/mongo/db/query/plan_enumerator.cpp b/src/mongo/db/query/plan_enumerator.cpp index 9e30afa0049..9ad6f6b81aa 100644 --- a/src/mongo/db/query/plan_enumerator.cpp +++ b/src/mongo/db/query/plan_enumerator.cpp @@ -262,7 +262,8 @@ PlanEnumerator::PlanEnumerator(const PlanEnumeratorParams& params) _ixisect(params.intersect), _enumerateOrChildrenLockstep(params.enumerateOrChildrenLockstep), _orLimit(params.maxSolutionsPerOr), - _intersectLimit(params.maxIntersectPerAnd) {} + _intersectLimit(params.maxIntersectPerAnd), + _disableOrPushdown(params.disableOrPushdown) {} PlanEnumerator::~PlanEnumerator() { typedef stdx::unordered_map MemoMap; @@ -529,10 +530,14 @@ bool PlanEnumerator::prepMemo(MatchExpression* node, PrepMemoContext context) { // preds to 'indexedPreds'. Adding the mandatory preds directly to 'indexedPreds' would lead // to problems such as pulling a predicate beneath an OR into a set joined by an AND. getIndexedPreds(node, childContext, &indexedPreds); - // Pass in the indexed predicates as outside predicates when prepping the subnodes. + // Pass in the indexed predicates as outside predicates when prepping the subnodes. But if + // match expression optimization is disabled, skip this part: we don't want to do + // OR-pushdown because it relies on the expression being canonicalized. auto childContextCopy = childContext; - for (auto pred : indexedPreds) { - childContextCopy.outsidePreds[pred] = OutsidePredRoute{}; + if (MONGO_likely(!_disableOrPushdown)) { + for (auto pred : indexedPreds) { + childContextCopy.outsidePreds[pred] = OutsidePredRoute{}; + } } if (!prepSubNodes(node, childContextCopy, &subnodes, &mandatorySubnodes)) { return false; @@ -836,6 +841,13 @@ void PlanEnumerator::assignPredicate( MatchExpression* pred, size_t position, OneIndexAssignment* indexAssignment) { + if (MONGO_unlikely(_disableOrPushdown)) { + // If match expression optimization is disabled, we also disable OR-pushdown, + // so we should never get 'outsidePreds' here. + tassert(7059700, + "Tried to do OR-pushdown despite disableMatchExpressionOptimization", + outsidePreds.empty()); + } if (outsidePreds.find(pred) != outsidePreds.end()) { OrPushdownTag::Destination dest; dest.route = outsidePreds.at(pred).route; diff --git a/src/mongo/db/query/plan_enumerator.h b/src/mongo/db/query/plan_enumerator.h index b82b738c57b..9eabd5b09b9 100644 --- a/src/mongo/db/query/plan_enumerator.h +++ b/src/mongo/db/query/plan_enumerator.h @@ -44,7 +44,8 @@ namespace mongo { struct PlanEnumeratorParams { PlanEnumeratorParams() : maxSolutionsPerOr(internalQueryEnumerationMaxOrSolutions.load()), - maxIntersectPerAnd(internalQueryEnumerationMaxIntersectPerAnd.load()) {} + maxIntersectPerAnd(internalQueryEnumerationMaxIntersectPerAnd.load()), + disableOrPushdown(disableMatchExpressionOptimization.shouldFail()) {} // Do we provide solutions that use more indices than the minimum required to provide // an indexed solution? @@ -69,6 +70,11 @@ struct PlanEnumeratorParams { // all-pairs approach, we could wind up creating a lot of enumeration possibilities for // certain inputs. size_t maxIntersectPerAnd; + + // Whether to disable OR-pushdown optimization. OR-pushdown assumes that the expression has been + // simplified: for example, that single-child $or nodes are unwrapped. To avoid this, when + // the 'disableMatchExpressionOptimization' failpoint is set, we also disable OR-pushdown. + bool disableOrPushdown; }; /** @@ -594,6 +600,9 @@ private: // How many things do we want from each AND? size_t _intersectLimit; + + // Whether we should disable OR-pushdown optimization. + const bool _disableOrPushdown; }; } // namespace mongo diff --git a/src/mongo/db/query/query_planner_tree_test.cpp b/src/mongo/db/query/query_planner_tree_test.cpp index 5b7055dd720..7ea6d55dfb3 100644 --- a/src/mongo/db/query/query_planner_tree_test.cpp +++ b/src/mongo/db/query/query_planner_tree_test.cpp @@ -434,6 +434,39 @@ TEST_F(QueryPlannerTest, RootedOrOfAndDontCollapseDifferentBounds) { "bounds: {c: [[3,3,true,true]], d: [[4,4,true,true]]}}}]}}}}"); } +TEST_F(QueryPlannerTest, DontCrashTryingToPushToSingleChildIndexedOr1) { + FailPointEnableBlock failPoint("disableMatchExpressionOptimization"); + addIndex(BSON("indexed" << 1)); + runQuery( + fromjson("{ $and : [\n" + " { $and : [ { indexed : { $gt : 5 } },\n" + " { unindexed : 42 } ] },\n" + " { $or : [ { indexed: { $lt : 100 } } ] }\n" + " ] }")); + + assertNumSolutions(3U); +} + +TEST_F(QueryPlannerTest, DontCrashTryingToPushToSingleChildIndexedOr2) { + // Test that queries with single-child $and, $or do not crash when match-expression optimization + // is disabled. Normally these single-child nodes are eliminated, so when they are left in place + // it can confuse OR-pushdown optimization. + // + // Originally designed to reproduce SERVER-70597, which would only happen when the + // INDEX_INTERSECTION option is enabled. + FailPointEnableBlock failPoint("disableMatchExpressionOptimization"); + addIndex(BSON("a" << 1 << "b" << 1)); + + params.options |= QueryPlannerParams::INDEX_INTERSECTION; + runQuery( + fromjson("{ $and : [\n" + " { $and : [ { a : 2 } ] },\n" + " { $or : [ { b : 3 } ] }\n" + " ] }")); + + assertNumSolutions(2U); +} + // SERVER-13960: properly handle $or with a mix of exact and inexact predicates. TEST_F(QueryPlannerTest, OrInexactWithExact) { addIndex(BSON("name" << 1)); diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp index 9b750af05ed..3e79a96362e 100644 --- a/src/mongo/db/repl/oplog.cpp +++ b/src/mongo/db/repl/oplog.cpp @@ -1906,8 +1906,15 @@ Status applyOperation_inlock(OperationContext* opCtx, // It is legal for a delete operation on the pre-images collection to delete zero // documents - pre-image collections are not guaranteed to contain the same set of // documents at all times. + // + // It is also legal for a delete operation on the config.image_collection (used for + // find-and-modify retries) to delete zero documents. Since we do not write updates + // to this collection which are in the same batch as later deletes, a rollback to + // the middle of a batch with both an update and a delete may result in a missing + // document, which may be later deleted. if (result.nDeleted == 0 && mode == OplogApplication::Mode::kSecondary && - !requestNss.isChangeStreamPreImagesCollection()) { + !requestNss.isChangeStreamPreImagesCollection() && + !requestNss.isConfigImagesCollection()) { // In FCV 4.4, each node is responsible for deleting the excess documents in // capped collections. This implies that capped deletes may not be synchronized // between nodes at times. When upgraded to FCV 5.0, the primary will generate diff --git a/src/mongo/db/s/range_deletion_util.cpp b/src/mongo/db/s/range_deletion_util.cpp index 9ca0a8f2518..54944dcb0bd 100644 --- a/src/mongo/db/s/range_deletion_util.cpp +++ b/src/mongo/db/s/range_deletion_util.cpp @@ -52,6 +52,8 @@ #include "mongo/db/repl/repl_client_info.h" #include "mongo/db/repl/wait_for_majority_service.h" #include "mongo/db/s/migration_util.h" +#include "mongo/db/s/operation_sharding_state.h" +#include "mongo/db/s/shard_filtering_metadata_refresh.h" #include "mongo/db/s/shard_key_index_util.h" #include "mongo/db/s/sharding_runtime_d_params_gen.h" #include "mongo/db/s/sharding_statistics.h" @@ -668,45 +670,59 @@ void setOrphanCountersOnRangeDeletionTasks(OperationContext* opCtx) { opCtx, BSONObj(), [opCtx, &store, &setNumOrphansOnTask](const RangeDeletionTask& deletionTask) { - AutoGetCollection collection(opCtx, deletionTask.getNss(), MODE_IX); - if (!collection || collection->uuid() != deletionTask.getCollectionUuid()) { - // The deletion task is referring to a collection that has been dropped - setNumOrphansOnTask(deletionTask, 0); - return true; + // The operation context is not bound to any specific namespace; acquire the shard role + // to ensure that the collection key pattern may be retrieved through the + // AutoGetCollection object. + ScopedSetShardRole scopedRole( + opCtx, deletionTask.getNss(), ChunkVersion::IGNORED(), boost::none); + while (true) { + try { + AutoGetCollection collection(opCtx, deletionTask.getNss(), MODE_IX); + if (!collection || collection->uuid() != deletionTask.getCollectionUuid()) { + // The deletion task is referring to a collection that has been dropped + setNumOrphansOnTask(deletionTask, 0); + return true; + } + + + const auto keyPattern = collection.getCollection().getShardKeyPattern(); + auto shardKeyIdx = findShardKeyPrefixedIndex(opCtx, + *collection, + collection->getIndexCatalog(), + keyPattern, + /*requireSingleKey=*/false); + + uassert(ErrorCodes::IndexNotFound, + str::stream() << "couldn't find index over shard key " << keyPattern + << " for collection " << deletionTask.getNss() + << " (uuid: " << deletionTask.getCollectionUuid() << ")", + shardKeyIdx); + + const auto& range = deletionTask.getRange(); + auto forwardIdxScanner = + InternalPlanner::shardKeyIndexScan(opCtx, + &(*collection), + *shardKeyIdx, + range.getMin(), + range.getMax(), + BoundInclusion::kIncludeStartKeyOnly, + PlanYieldPolicy::YieldPolicy::YIELD_AUTO, + InternalPlanner::FORWARD); + int64_t numOrphansInRange = 0; + BSONObj indexEntry; + while (forwardIdxScanner->getNext(&indexEntry, nullptr) != + PlanExecutor::IS_EOF) { + ++numOrphansInRange; + } + + setNumOrphansOnTask(deletionTask, numOrphansInRange); + return true; + + } catch (const ExceptionFor& e) { + onShardVersionMismatchNoExcept(opCtx, e->getNss(), e->getVersionReceived()) + .ignore(); + } } - - KeyPattern keyPattern; - uassertStatusOK(deletionTask.getRange().extractKeyPattern(&keyPattern)); - auto shardKeyIdx = findShardKeyPrefixedIndex(opCtx, - *collection, - collection->getIndexCatalog(), - keyPattern.toBSON(), - /*requireSingleKey=*/false); - - uassert(ErrorCodes::IndexNotFound, - str::stream() << "couldn't find index over shard key " << keyPattern.toBSON() - << " for collection " << deletionTask.getNss() - << " (uuid: " << deletionTask.getCollectionUuid() << ")", - shardKeyIdx); - - const auto& range = deletionTask.getRange(); - auto forwardIdxScanner = - InternalPlanner::shardKeyIndexScan(opCtx, - &(*collection), - *shardKeyIdx, - range.getMin(), - range.getMax(), - BoundInclusion::kIncludeStartKeyOnly, - PlanYieldPolicy::YieldPolicy::YIELD_AUTO, - InternalPlanner::FORWARD); - int64_t numOrphansInRange = 0; - BSONObj indexEntry; - while (forwardIdxScanner->getNext(&indexEntry, nullptr) != PlanExecutor::IS_EOF) { - ++numOrphansInRange; - } - - setNumOrphansOnTask(deletionTask, numOrphansInRange); - return true; }); } diff --git a/src/mongo/db/s/range_deletion_util_test.cpp b/src/mongo/db/s/range_deletion_util_test.cpp index 1a8154dffd8..c45eeb53e82 100644 --- a/src/mongo/db/s/range_deletion_util_test.cpp +++ b/src/mongo/db/s/range_deletion_util_test.cpp @@ -32,6 +32,7 @@ #include "mongo/db/catalog/create_collection.h" #include "mongo/db/db_raii.h" #include "mongo/db/dbdirectclient.h" +#include "mongo/db/hasher.h" #include "mongo/db/persistent_task_store.h" #include "mongo/db/repl/wait_for_majority_service.h" #include "mongo/db/s/collection_sharding_runtime.h" @@ -51,7 +52,7 @@ namespace { const NamespaceString kNss = NamespaceString("foo", "bar"); const std::string kShardKey = "_id"; -const BSONObj kShardKeyPattern = BSON(kShardKey << 1); +const BSONObj kRangeBasedShardKeyPattern = BSON(kShardKey << 1); class RangeDeleterTest : public ShardServerTestFixture { public: @@ -96,13 +97,14 @@ public: ShardServerTestFixture::tearDown(); } - void setFilteringMetadataWithUUID(const UUID& uuid) { + void setFilteringMetadataWithUUID(const UUID& uuid, + const BSONObj& shardKeyPattern = kRangeBasedShardKeyPattern) { const OID epoch = OID::gen(); auto rt = RoutingTableHistory::makeNew( kNss, uuid, - kShardKeyPattern, + shardKeyPattern, nullptr, false, epoch, @@ -228,7 +230,7 @@ TEST_F(RangeDeleterTest, std::move(queriesComplete), kNss, uuid(), - kShardKeyPattern, + kRangeBasedShardKeyPattern, range, task.getId(), Seconds(0) /* delayForActiveQueriesOnSecondariesToComplete*/); @@ -257,7 +259,7 @@ TEST_F(RangeDeleterTest, std::move(queriesComplete), kNss, uuid(), - kShardKeyPattern, + kRangeBasedShardKeyPattern, range, task.getId(), Seconds(0) /* delayForActiveQueriesOnSecondariesToComplete*/); @@ -280,7 +282,7 @@ TEST_F(RangeDeleterTest, RemoveDocumentsInRangeInsertsDocumentToNotifySecondarie std::move(queriesComplete), kNss, uuid(), - kShardKeyPattern, + kRangeBasedShardKeyPattern, range, task.getId(), Seconds(0) /* delayForActiveQueriesOnSecondariesToComplete*/); @@ -313,7 +315,7 @@ TEST_F( std::move(queriesComplete), kNss, uuid(), - kShardKeyPattern, + kRangeBasedShardKeyPattern, range, task.getId(), Seconds(0) /* delayForActiveQueriesOnSecondariesToComplete*/); @@ -346,7 +348,7 @@ TEST_F(RangeDeleterTest, std::move(queriesComplete), kNss, uuid(), - kShardKeyPattern, + kRangeBasedShardKeyPattern, range, task.getId(), Seconds(0) /* delayForActiveQueriesOnSecondariesToComplete*/); @@ -378,7 +380,7 @@ TEST_F(RangeDeleterTest, std::move(queriesComplete), kNss, uuid(), - kShardKeyPattern, + kRangeBasedShardKeyPattern, range, task.getId(), Seconds(0) /* delayForActiveQueriesOnSecondariesToComplete*/); @@ -407,7 +409,7 @@ TEST_F(RangeDeleterTest, kNss, // Use a different UUID from the collection UUID. UUID::gen(), - kShardKeyPattern, + kRangeBasedShardKeyPattern, range, task.getId(), Seconds(0) /* delayForActiveQueriesOnSecondariesToComplete*/); @@ -429,7 +431,7 @@ TEST_F(RangeDeleterTest, RemoveDocumentsInRangeThrowsErrorWhenCollectionDoesNotE std::move(queriesComplete), NamespaceString("someFake", "namespace"), UUID::gen(), - kShardKeyPattern, + kRangeBasedShardKeyPattern, ChunkRange(BSON(kShardKey << 0), BSON(kShardKey << 10)), task.getId(), Seconds(0) /* delayForActiveQueriesOnSecondariesToComplete*/); @@ -472,7 +474,7 @@ TEST_F(RangeDeleterTest, RemoveDocumentsInRangeLeavesDocumentsWhenTaskDocumentDo std::move(queriesComplete), kNss, uuid(), - kShardKeyPattern, + kRangeBasedShardKeyPattern, range, UUID::gen(), Seconds(0) /* delayForActiveQueriesOnSecondariesToComplete */); @@ -522,7 +524,7 @@ TEST_F(RangeDeleterTest, RemoveDocumentsInRangeWaitsForReplicationAfterDeletingS std::move(queriesComplete), kNss, uuid(), - kShardKeyPattern, + kRangeBasedShardKeyPattern, range, t.getId(), Seconds(0) /* delayForActiveQueriesOnSecondariesToComplete*/); @@ -572,7 +574,7 @@ TEST_F(RangeDeleterTest, RemoveDocumentsInRangeWaitsForReplicationOnlyOnceAfterS std::move(queriesComplete), kNss, uuid(), - kShardKeyPattern, + kRangeBasedShardKeyPattern, range, t.getId(), Seconds(0) /* delayForActiveQueriesOnSecondariesToComplete */); @@ -618,7 +620,7 @@ TEST_F(RangeDeleterTest, RemoveDocumentsInRangeDoesNotWaitForReplicationIfErrorD std::move(queriesComplete), kNss, uuid(), - kShardKeyPattern, + kRangeBasedShardKeyPattern, range, t.getId(), Seconds(0) /* delayForActiveQueriesOnSecondariesToComplete*/); @@ -648,7 +650,7 @@ TEST_F(RangeDeleterTest, RemoveDocumentsInRangeRetriesOnWriteConflictException) std::move(queriesComplete), kNss, uuid(), - kShardKeyPattern, + kRangeBasedShardKeyPattern, range, t.getId(), Seconds(0) /* delayForActiveQueriesOnSecondariesToComplete */); @@ -679,7 +681,7 @@ TEST_F(RangeDeleterTest, RemoveDocumentsInRangeRetriesOnUnexpectedError) { std::move(queriesComplete), kNss, uuid(), - kShardKeyPattern, + kRangeBasedShardKeyPattern, range, t.getId(), Seconds(0) /* delayForActiveQueriesOnSecondariesToComplete */); @@ -720,7 +722,7 @@ TEST_F(RangeDeleterTest, RemoveDocumentsInRangeRespectsDelayInBetweenBatches) { std::move(queriesComplete), kNss, uuid(), - kShardKeyPattern, + kRangeBasedShardKeyPattern, range, task.getId(), Seconds(0) /* delayForActiveQueriesOnSecondariesToComplete */); @@ -752,7 +754,7 @@ TEST_F(RangeDeleterTest, RemoveDocumentsInRangeRespectsOrphanCleanupDelay) { std::move(queriesComplete), kNss, uuid(), - kShardKeyPattern, + kRangeBasedShardKeyPattern, range, task.getId(), orphanCleanupDelay); @@ -790,7 +792,7 @@ TEST_F(RangeDeleterTest, RemoveDocumentsInRangeRemovesRangeDeletionTaskOnSuccess std::move(queriesComplete), kNss, uuid(), - kShardKeyPattern, + kRangeBasedShardKeyPattern, range, t.getId(), Seconds(0) /* delayForActiveQueriesOnSecondariesToComplete */); @@ -820,7 +822,7 @@ TEST_F(RangeDeleterTest, std::move(queriesComplete), kNss, fakeUuid, - kShardKeyPattern, + kRangeBasedShardKeyPattern, range, t.getId(), Seconds(0) /* delayForActiveQueriesOnSecondariesToComplete */); @@ -857,7 +859,7 @@ TEST_F(RangeDeleterTest, std::move(queriesComplete), kNss, uuid(), - kShardKeyPattern, + kRangeBasedShardKeyPattern, range, t.getId(), Seconds(0) /* delayForActiveQueriesOnSecondariesToComplete */); @@ -888,7 +890,7 @@ DEATH_TEST_F(RangeDeleterTest, RemoveDocumentsInRangeCrashesIfInputFutureHasErro std::move((queriesCompletePf.future)).semi(), kNss, uuid(), - kShardKeyPattern, + kRangeBasedShardKeyPattern, range, t.getId(), Seconds(0) /* delayForActiveQueriesOnSecondariesToComplete */); @@ -1025,8 +1027,9 @@ TEST_F(RenameRangeDeletionsTest, IdempotentRenameRangeDeletionsTest) { ASSERT_EQ(0, forRenameStore.count(_opCtx, BSONObj())); } -TEST_F(RangeDeleterTest, - setOrphanCountersOnRangeDeletionTasksUpdatesTaskWithExpectedNumberOfOrphans) { +TEST_F( + RangeDeleterTest, + setOrphanCountersOnRangeDeletionTasksUpdatesTaskForCollectionWithRangeShardKeyWithExpectedNumberOfOrphans) { const auto numOrphansInRange = 5; setFilteringMetadataWithUUID(uuid()); @@ -1046,6 +1049,44 @@ TEST_F(RangeDeleterTest, 1); } +TEST_F( + RangeDeleterTest, + setOrphanCountersOnRangeDeletionTasksUpdatesTaskForCollectionWithHashedShardKeyWithExpectedNumberOfOrphans) { + const BSONObj kHashedShardKeyPattern = BSON(kShardKey << "hashed"); + + DBDirectClient dbClient(_opCtx); + dbClient.createIndex(kNss.ns(), + BSON("_id" + << "hashed")); + + setFilteringMetadataWithUUID(uuid(), kHashedShardKeyPattern); + + const auto orphanedRangeLowerBoud = std::numeric_limits::max() / 2; + const ChunkRange orphansRange(BSON(kShardKey << orphanedRangeLowerBoud), + BSON(kShardKey << MAXKEY)); + + auto t = insertRangeDeletionTask(_opCtx, uuid(), orphansRange); + const auto numDocInserted = 10; + auto numOrphansInRange = 0; + for (auto i = 0; i < numDocInserted; ++i) { + dbClient.insert(kNss.toString(), BSON(kShardKey << i)); + const auto hashedDocId = BSONElementHasher::hash64(BSON("_id" << i).firstElement(), + BSONElementHasher::DEFAULT_HASH_SEED); + if (hashedDocId >= orphanedRangeLowerBoud) { + ++numOrphansInRange; + } + } + + ASSERT(numOrphansInRange > 0); + + setOrphanCountersOnRangeDeletionTasks(_opCtx); + + PersistentTaskStore store(NamespaceString::kRangeDeletionNamespace); + ASSERT_EQ( + store.count(_opCtx, BSON(RangeDeletionTask::kNumOrphanDocsFieldName << numOrphansInRange)), + 1); +} + TEST_F(RangeDeleterTest, setOrphanCountersOnRangeDeletionTasksAddsZeroValueWhenNamespaceNotFound) { NamespaceString unexistentCollection("foo", "iDontExist"); auto collUuid = UUID::gen(); diff --git a/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.cpp b/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.cpp index 5700f0326ae..c87cb3667dd 100644 --- a/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.cpp +++ b/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.cpp @@ -91,17 +91,19 @@ CoordinatorCommitMonitor::CoordinatorCommitMonitor( std::vector recipientShards, CoordinatorCommitMonitor::TaskExecutorPtr executor, CancellationToken cancelToken, + int delayBeforeInitialQueryMillis, Milliseconds maxDelayBetweenQueries) : _ns(std::move(ns)), _recipientShards(std::move(recipientShards)), _executor(std::move(executor)), _cancelToken(std::move(cancelToken)), _threshold(Milliseconds(gRemainingReshardingOperationTimeThresholdMillis.load())), + _delayBeforeInitialQueryMillis(Milliseconds(delayBeforeInitialQueryMillis)), _maxDelayBetweenQueries(maxDelayBetweenQueries) {} SemiFuture CoordinatorCommitMonitor::waitUntilRecipientsAreWithinCommitThreshold() const { - return _makeFuture() + return _makeFuture(_delayBeforeInitialQueryMillis) .onError([](Status status) { if (ErrorCodes::isCancellationError(status.code()) || ErrorCodes::isInterruption(status.code())) { @@ -195,9 +197,16 @@ CoordinatorCommitMonitor::queryRemainingOperationTimeForRecipients() const { return {minRemainingTime, maxRemainingTime}; } -ExecutorFuture CoordinatorCommitMonitor::_makeFuture() const { +ExecutorFuture CoordinatorCommitMonitor::_makeFuture(Milliseconds delayBetweenQueries) const { return ExecutorFuture(_executor) - .then([this] { return queryRemainingOperationTimeForRecipients(); }) + // Start waiting so that we have a more time to calculate a more realistic remaining time + // estimate. + .then([this, anchor = shared_from_this(), delayBetweenQueries] { + return _executor->sleepFor(delayBetweenQueries, _cancelToken) + .then([this, anchor = std::move(anchor)] { + return queryRemainingOperationTimeForRecipients(); + }); + }) .onError([this](Status status) { if (_cancelToken.isCanceled()) { // Do not retry on cancellation errors. @@ -233,12 +242,10 @@ ExecutorFuture CoordinatorCommitMonitor::_makeFuture() const { // The following ensures that the monitor would never sleep for more than a predefined // maximum delay between querying recipient shards. Thus, it can handle very large, // and potentially inaccurate estimates of the remaining operation time. - auto sleepTime = std::min(remainingTimes.max - _threshold, _maxDelayBetweenQueries); - return _executor->sleepFor(sleepTime, _cancelToken) - .then([this, anchor = std::move(anchor)] { - // We are not canceled yet, so schedule new queries against recipient shards. - return _makeFuture(); - }); + auto delayBetweenQueries = + std::min(remainingTimes.max - _threshold, _maxDelayBetweenQueries); + + return _makeFuture(delayBetweenQueries); }); } diff --git a/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.h b/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.h index 64544981ae5..be722fb11bd 100644 --- a/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.h +++ b/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.h @@ -72,6 +72,7 @@ public: std::vector recipientShards, TaskExecutorPtr executor, CancellationToken cancelToken, + int delayBeforeInitialQueryMillis, Milliseconds maxDelayBetweenQueries = kMaxDelayBetweenQueries); SemiFuture waitUntilRecipientsAreWithinCommitThreshold() const; @@ -88,7 +89,7 @@ public: RemainingOperationTimes queryRemainingOperationTimeForRecipients() const; private: - ExecutorFuture _makeFuture() const; + ExecutorFuture _makeFuture(Milliseconds delayBetweenQueries) const; static constexpr auto kDiagnosticLogLevel = 0; static constexpr auto kMaxDelayBetweenQueries = Seconds(30); @@ -98,6 +99,8 @@ private: const TaskExecutorPtr _executor; const CancellationToken _cancelToken; const Milliseconds _threshold; + + const Milliseconds _delayBeforeInitialQueryMillis; const Milliseconds _maxDelayBetweenQueries; TaskExecutorPtr _networkExecutor; diff --git a/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor_test.cpp b/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor_test.cpp index 2fe3075f1fc..0804565201c 100644 --- a/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor_test.cpp +++ b/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor_test.cpp @@ -152,7 +152,7 @@ void CoordinatorCommitMonitorTest::setUp() { _cancellationSource = std::make_unique(); _commitMonitor = std::make_shared( - _ns, _recipientShards, _futureExecutor, _cancellationSource->token(), Milliseconds(0)); + _ns, _recipientShards, _futureExecutor, _cancellationSource->token(), 0, Milliseconds(0)); _commitMonitor->setNetworkExecutorForTest(executor()); } diff --git a/src/mongo/db/s/resharding/resharding_coordinator_service.cpp b/src/mongo/db/s/resharding/resharding_coordinator_service.cpp index ea3451f1c4a..81b12b97108 100644 --- a/src/mongo/db/s/resharding/resharding_coordinator_service.cpp +++ b/src/mongo/db/s/resharding/resharding_coordinator_service.cpp @@ -1714,7 +1714,8 @@ void ReshardingCoordinatorService::ReshardingCoordinator::_startCommitMonitor( _coordinatorDoc.getSourceNss(), extractShardIdsFromParticipantEntries(_coordinatorDoc.getRecipientShards()), **executor, - _ctHolder->getCommitMonitorToken()); + _ctHolder->getCommitMonitorToken(), + resharding::gReshardingDelayBeforeRemainingOperationTimeQueryMillis.load()); _commitMonitorQuiesced = _commitMonitor->waitUntilRecipientsAreWithinCommitThreshold() .thenRunOn(**executor) diff --git a/src/mongo/db/s/resharding/resharding_server_parameters.idl b/src/mongo/db/s/resharding/resharding_server_parameters.idl index daaedfc9ea4..7508d30c772 100644 --- a/src/mongo/db/s/resharding/resharding_server_parameters.idl +++ b/src/mongo/db/s/resharding/resharding_server_parameters.idl @@ -151,6 +151,21 @@ server_parameters: validator: gte: 0 + reshardingDelayBeforeRemainingOperationTimeQueryMillis: + description: >- + Initial delay before querying for remaining operation time from recipient shards. + The delay allows for applying more oplog entries before calculating time remaining, giving + a more accurate value. + Note we will have this delay every time we happen to have a failover occur. + set_at: [startup, runtime] + cpp_vartype: AtomicWord + cpp_varname: gReshardingDelayBeforeRemainingOperationTimeQueryMillis + default: + expr: 0 + validator: + gte: 0 + redact: false + reshardingCriticalSectionTimeoutMillis: description: >- The upper limit on how long to wait to hear back from recipient shards reaching strict diff --git a/src/mongo/db/sessions_collection.cpp b/src/mongo/db/sessions_collection.cpp index a5595f21dbf..6deccfe9ff3 100644 --- a/src/mongo/db/sessions_collection.cpp +++ b/src/mongo/db/sessions_collection.cpp @@ -42,26 +42,13 @@ #include "mongo/db/logical_session_id_helpers.h" #include "mongo/db/ops/write_ops.h" #include "mongo/db/repl/read_concern_args.h" +#include "mongo/db/sessions_server_parameters_gen.h" #include "mongo/rpc/get_status_from_command_result.h" +#include "mongo/util/duration.h" namespace mongo { namespace { -// This batch size is chosen to ensure that we don't form requests larger than the 16mb limit. -// Especially for refreshes, the updates we send include the full user name (user@db), and user -// names can be quite large (we enforce a max 10k limit for usernames used with sessions). -// -// At 1000 elements, a 16mb payload gives us a budget of 16000 bytes per user, which we should -// comfortably be able to stay under, even with 10k user names. -constexpr size_t kMaxBatchSize = 1000; - -// Used to refresh or remove items from the session collection with write -// concern majority -const WriteConcernOptions kMajorityWriteConcern{WriteConcernOptions::kMajority, - WriteConcernOptions::SyncMode::UNSET, - WriteConcernOptions::kWriteConcernTimeoutSystem}; - - BSONObj lsidQuery(const LogicalSessionId& lsid) { return BSON(LogicalSessionRecord::kIdFieldName << lsid.toBSON()); } @@ -104,7 +91,7 @@ void runBulkGeneric(TFactory makeT, AddLineFn addLine, SendFn sendBatch, const C for (const auto& item : items) { addLine(*thing, item); - if (++i >= kMaxBatchSize) { + if (++i >= std::size_t(mongo::gSessionMaxBatchSize.load())) { sendLocalBatch(); setupBatch(); @@ -192,7 +179,14 @@ SessionsCollection::FindBatchFn SessionsCollection::makeFindFnForCommand(const N void SessionsCollection::_doRefresh(const NamespaceString& ns, const std::vector& sessions, SendBatchFn send) { - auto init = [ns](BSONObjBuilder* batch) { + // Used to refresh items from the session collection with write + // concern majority + const WriteConcernOptions kMajorityWriteConcern{ + WriteConcernOptions::kMajority, + WriteConcernOptions::SyncMode::UNSET, + Milliseconds(mongo::gSessionWriteConcernTimeoutSystemMillis.load())}; + + auto init = [ns, kMajorityWriteConcern](BSONObjBuilder* batch) { batch->append("update", ns.coll()); batch->append("ordered", false); batch->append(WriteConcernOptions::kWriteConcernField, kMajorityWriteConcern.toBSON()); @@ -202,14 +196,20 @@ void SessionsCollection::_doRefresh(const NamespaceString& ns, entries->append( BSON("q" << lsidQuery(record) << "u" << updateQuery(record) << "upsert" << true)); }; - runBulkCmd("updates", init, add, send, sessions); } void SessionsCollection::_doRemove(const NamespaceString& ns, const std::vector& sessions, SendBatchFn send) { - auto init = [ns](BSONObjBuilder* batch) { + // Used to remove items from the session collection with write + // concern majority + const WriteConcernOptions kMajorityWriteConcern{ + WriteConcernOptions::kMajority, + WriteConcernOptions::SyncMode::UNSET, + Milliseconds(mongo::gSessionWriteConcernTimeoutSystemMillis.load())}; + + auto init = [ns, kMajorityWriteConcern](BSONObjBuilder* batch) { batch->append("delete", ns.coll()); batch->append("ordered", false); batch->append(WriteConcernOptions::kWriteConcernField, kMajorityWriteConcern.toBSON()); diff --git a/src/mongo/db/sessions_server_parameters.idl b/src/mongo/db/sessions_server_parameters.idl new file mode 100644 index 00000000000..c8676b03073 --- /dev/null +++ b/src/mongo/db/sessions_server_parameters.idl @@ -0,0 +1,63 @@ +# Copyright (C) 2024-present MongoDB, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the Server Side Public License, version 1, +# as published by MongoDB, Inc. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Server Side Public License for more details. +# +# You should have received a copy of the Server Side Public License +# along with this program. If not, see +# . +# +# As a special exception, the copyright holders give permission to link the +# code of portions of this program with the OpenSSL library under certain +# conditions as described in each individual source file and distribute +# linked combinations including the program with the OpenSSL library. You +# must comply with the Server Side Public License in all respects for +# all of the code used other than as permitted herein. If you modify file(s) +# with this exception, you may extend this exception to your version of the +# file(s), but you are not obligated to do so. If you do not wish to do so, +# delete this exception statement from your version. If you delete this +# exception statement from all source files in the program, then also delete +# it in the license file. +# + +# Server parameters for configuring the refresh of the session colelction. + +global: + cpp_namespace: "mongo" + +imports: + - "mongo/idl/basic_types.idl" + +server_parameters: + sessionWriteConcernTimeoutSystemMillis: + description: Controls the write concern timeout (in milliseconds) for the refresh or removal of items from the session collection. + set_at: [startup, runtime] + cpp_vartype: AtomicWord + cpp_varname: gSessionWriteConcernTimeoutSystemMillis + default: 60000 + validator: + gte: 0 + redact: false + + sessionMaxBatchSize: + description: >- + Controls the maximum batch size (number of elements) for the sessions' refresh. + This batch size is chosen to ensure that we don't form requests larger than the 16mb limit. + Especially for refreshes, the updates we send include the full user name (user@db), and user + names can be quite large (we enforce a max 10k limit for usernames used with sessions). + At a default of 1000 elements, a 16mb payload gives us a budget of 16000 bytes per user, which we should + comfortably be able to stay under, even with 10k user names. so we do not form requests larger than the 16mb limit. + set_at: [startup, runtime] + cpp_vartype: AtomicWord + cpp_varname: gSessionMaxBatchSize + default: 1000 + validator: + gte: 100 + lte: 10000 + redact: false