SERVER-109844 Basic support for disaggregated storage clusters (#40566)

Co-authored-by: Benety Goh <benety@mongodb.com>
Co-authored-by: Mathias Stearn <mathias@mongodb.com>
Co-authored-by: Kaitlin Mahar <kaitlin.mahar@mongodb.com>
Co-authored-by: Brandon Stoll <bstoll@users.noreply.github.com>
Co-authored-by: Vanessa Noia <54818020+nessnoia@users.noreply.github.com>
Co-authored-by: graphite-app[bot] <96075541+graphite-app[bot]@users.noreply.github.com>
Co-authored-by: Vishnu K <vishnu.kaushik@mongodb.com>
Co-authored-by: Sunil Narasimhamurthy <suniltheta@gmail.com>
Co-authored-by: Jiawei Yang <youngyang0820@gmail.com>
Co-authored-by: Will Korteland <korteland@users.noreply.github.com>
Co-authored-by: Saman Memaripour <amirsaman.memaripour@mongodb.com>
Co-authored-by: huayu-ouyang <huayu.ouyang@mongodb.com>
Co-authored-by: Suganthi Mani <38441312+smani87@users.noreply.github.com>
Co-authored-by: Thomas Goyne <thomas.goyne@mongodb.com>
Co-authored-by: Haley Connelly <haley.connelly@mongodb.com>
Co-authored-by: Billy Donahue <BillyDonahue@users.noreply.github.com>
Co-authored-by: Kirollos Morkos <kiro.morkos@mongodb.com>
Co-authored-by: Lingzhi Deng <lingzhi.deng@mongodb.com>
Co-authored-by: Hartek Sabharwal <hartek.sabharwal@mongodb.com>
Co-authored-by: Aaron Himelman <aaron.himelman@mongodb.com>
Co-authored-by: Moustafa Maher <m.maher@mongodb.com>
Co-authored-by: prathmesh-kallurkar <prathmesh.kallurkar@mongodb.com>
Co-authored-by: Dan Larkin-York <13419935+dhly-etc@users.noreply.github.com>
Co-authored-by: Shreyas Kalyan <35750327+shreyaskalyan@users.noreply.github.com>
Co-authored-by: Shreyas Kalyan <shreyas.kalyan@mongodb.com>
Co-authored-by: Jonathan Reams <jbreams@mongodb.com>
Co-authored-by: adriangzz <adriangonzalezmontemayor@gmail.com>
Co-authored-by: Eric Milkie <milkie@users.noreply.github.com>
Co-authored-by: Aaron B <aaron.balsara@mongodb.com>
Co-authored-by: Ali Mir <ali.mir@mongodb.com>
Co-authored-by: Alex Blekhman <alexander.blekhman@mongodb.com>
Co-authored-by: mpobrien <mpobrien005@gmail.com>
Co-authored-by: Mark Benvenuto <mark.benvenuto@mongodb.com>
Co-authored-by: Ruby Chen <ruby.chen@mongodb.com>
Co-authored-by: Jagadish Nallapaneni <146780625+jagadishmdb@users.noreply.github.com>
Co-authored-by: Jonas Bergler <jonas.bergler@mongodb.com>
Co-authored-by: Peter Macko <peter.macko@mongodb.com>
Co-authored-by: Nic <nic.hollingum@mongodb.com>
Co-authored-by: Jiawei Yang <jiawei.yang@mongodb.com>
Co-authored-by: Jordi Serra Torrens <jordist@users.noreply.github.com>
Co-authored-by: Sunil Narasimhamurthy <sunil.narasimhamurthy@mongodb.com>
GitOrigin-RevId: a1c6609c820052137e2aa759711e86c337ae6f9f
This commit is contained in:
Matthew Russotto 2025-08-29 17:49:01 -04:00 committed by MongoDB Bot
parent 7f3bd6ca62
commit 8d12269eec
182 changed files with 44273 additions and 689 deletions

21
.github/CODEOWNERS vendored
View File

@ -81,6 +81,9 @@ WORKSPACE.bazel @10gen/devprod-build @svc-auto-approve-bot
/buildscripts/idl/**/idl_compatibility_errors.py @10gen/query-optimization @svc-auto-approve-bot /buildscripts/idl/**/idl_compatibility_errors.py @10gen/query-optimization @svc-auto-approve-bot
/buildscripts/idl/**/test_compatibility.py @10gen/query-optimization @svc-auto-approve-bot /buildscripts/idl/**/test_compatibility.py @10gen/query-optimization @svc-auto-approve-bot
# The following patterns are parsed from ./buildscripts/modules/atlas/OWNERS.yml
/buildscripts/modules/atlas/ @10gen/server-disagg-storage @svc-auto-approve-bot
# The following patterns are parsed from ./buildscripts/monitor_build_status/OWNERS.yml # The following patterns are parsed from ./buildscripts/monitor_build_status/OWNERS.yml
/buildscripts/monitor_build_status/ @10gen/devprod-correctness @svc-auto-approve-bot /buildscripts/monitor_build_status/ @10gen/devprod-correctness @svc-auto-approve-bot
@ -888,6 +891,8 @@ WORKSPACE.bazel @10gen/devprod-build @svc-auto-approve-bot
/jstests/libs/**/catalog_list_operations_consistency_validator.js @10gen/server-catalog-and-routing @svc-auto-approve-bot /jstests/libs/**/catalog_list_operations_consistency_validator.js @10gen/server-catalog-and-routing @svc-auto-approve-bot
/jstests/libs/**/raw_operation_utils.js @10gen/server-collection-write-path @svc-auto-approve-bot /jstests/libs/**/raw_operation_utils.js @10gen/server-collection-write-path @svc-auto-approve-bot
/jstests/libs/**/json_utils.js @10gen/query-integration-extensions @svc-auto-approve-bot /jstests/libs/**/json_utils.js @10gen/query-integration-extensions @svc-auto-approve-bot
/jstests/libs/**/replicated_ident_utils.js @10gen/server-storage-engine-integration @svc-auto-approve-bot
/jstests/libs/**/replicated_record_ids_utils.js @10gen/server-storage-engine-integration @svc-auto-approve-bot
# The following patterns are parsed from ./jstests/libs/clustered_collections/OWNERS.yml # The following patterns are parsed from ./jstests/libs/clustered_collections/OWNERS.yml
/jstests/libs/clustered_collections/**/* @10gen/server-collection-write-path @svc-auto-approve-bot /jstests/libs/clustered_collections/**/* @10gen/server-collection-write-path @svc-auto-approve-bot
@ -1831,9 +1836,6 @@ WORKSPACE.bazel @10gen/devprod-build @svc-auto-approve-bot
/src/mongo/db/commands/query_cmd/**/release_memory_cmd.* @10gen/query-execution @svc-auto-approve-bot /src/mongo/db/commands/query_cmd/**/release_memory_cmd.* @10gen/query-execution @svc-auto-approve-bot
/src/mongo/db/commands/query_cmd/**/update_metrics.* @10gen/query-execution @svc-auto-approve-bot /src/mongo/db/commands/query_cmd/**/update_metrics.* @10gen/query-execution @svc-auto-approve-bot
# The following patterns are parsed from ./src/mongo/db/disagg_storage/OWNERS.yml
/src/mongo/db/disagg_storage/**/* @10gen/server-disagg-storage @svc-auto-approve-bot
# The following patterns are parsed from ./src/mongo/db/exec/OWNERS.yml # The following patterns are parsed from ./src/mongo/db/exec/OWNERS.yml
/src/mongo/db/exec/**/* @10gen/query-execution-classic @svc-auto-approve-bot /src/mongo/db/exec/**/* @10gen/query-execution-classic @svc-auto-approve-bot
/src/mongo/db/exec/**/OWNERS.yml @10gen/query-execution-staff-leads @10gen/query-integration-staff-leads @10gen/query-optimization-staff-leads @svc-auto-approve-bot /src/mongo/db/exec/**/OWNERS.yml @10gen/query-execution-staff-leads @10gen/query-integration-staff-leads @10gen/query-optimization-staff-leads @svc-auto-approve-bot
@ -2043,6 +2045,15 @@ WORKSPACE.bazel @10gen/devprod-build @svc-auto-approve-bot
# The following patterns are parsed from ./src/mongo/db/modules/atlas/OWNERS.yml # The following patterns are parsed from ./src/mongo/db/modules/atlas/OWNERS.yml
/src/mongo/db/modules/atlas/**/* @10gen/server-disagg-storage @svc-auto-approve-bot /src/mongo/db/modules/atlas/**/* @10gen/server-disagg-storage @svc-auto-approve-bot
# The following patterns are parsed from ./src/mongo/db/modules/atlas/jstests/disagg_storage/OWNERS.yml
/src/mongo/db/modules/atlas/jstests/disagg_storage/**/* @10gen/server-disagg-storage @svc-auto-approve-bot
# The following patterns are parsed from ./src/mongo/db/modules/atlas/src/disagg_storage/OWNERS.yml
/src/mongo/db/modules/atlas/src/disagg_storage/**/* @10gen/server-disagg-storage @svc-auto-approve-bot
# The following patterns are parsed from ./src/mongo/db/modules/atlas/src/disagg_storage/encryption/OWNERS.yml
/src/mongo/db/modules/atlas/src/disagg_storage/encryption/**/* @10gen/server-security @svc-auto-approve-bot
# The following patterns are parsed from ./src/mongo/db/modules/enterprise/OWNERS.yml # The following patterns are parsed from ./src/mongo/db/modules/enterprise/OWNERS.yml
/src/mongo/db/modules/enterprise/BUILD.bazel @10gen/devprod-build @svc-auto-approve-bot /src/mongo/db/modules/enterprise/BUILD.bazel @10gen/devprod-build @svc-auto-approve-bot
/src/mongo/db/modules/enterprise/README.md @10gen/server-release @svc-auto-approve-bot /src/mongo/db/modules/enterprise/README.md @10gen/server-release @svc-auto-approve-bot
@ -2589,6 +2600,9 @@ WORKSPACE.bazel @10gen/devprod-build @svc-auto-approve-bot
# The following patterns are parsed from ./src/mongo/db/repl/split_horizon/OWNERS.yml # The following patterns are parsed from ./src/mongo/db/repl/split_horizon/OWNERS.yml
/src/mongo/db/repl/split_horizon/**/* @10gen/server-split-horizon @svc-auto-approve-bot /src/mongo/db/repl/split_horizon/**/* @10gen/server-split-horizon @svc-auto-approve-bot
# The following patterns are parsed from ./src/mongo/db/rss/OWNERS.yml
/src/mongo/db/rss/**/* @10gen/server-replication @10gen/server-storage-execution @svc-auto-approve-bot
# The following patterns are parsed from ./src/mongo/db/s/OWNERS.yml # The following patterns are parsed from ./src/mongo/db/s/OWNERS.yml
/src/mongo/db/s/**/* @10gen/server-cluster-scalability @svc-auto-approve-bot /src/mongo/db/s/**/* @10gen/server-cluster-scalability @svc-auto-approve-bot
/src/mongo/db/s/**/*transaction* @10gen/server-transactions @svc-auto-approve-bot /src/mongo/db/s/**/*transaction* @10gen/server-transactions @svc-auto-approve-bot
@ -3068,6 +3082,7 @@ WORKSPACE.bazel @10gen/devprod-build @svc-auto-approve-bot
/src/third_party/**/croaring @10gen/query-execution @svc-auto-approve-bot /src/third_party/**/croaring @10gen/query-execution @svc-auto-approve-bot
/src/third_party/**/fmt @10gen/server-programmability @svc-auto-approve-bot /src/third_party/**/fmt @10gen/server-programmability @svc-auto-approve-bot
/src/third_party/**/folly @10gen/server-workload-scheduling @svc-auto-approve-bot /src/third_party/**/folly @10gen/server-workload-scheduling @svc-auto-approve-bot
/src/third_party/**/googletest_restricted_for_disagg_only @10gen/server-disagg-storage @svc-auto-approve-bot
/src/third_party/**/gperftools @10gen/server-workload-scheduling @svc-auto-approve-bot /src/third_party/**/gperftools @10gen/server-workload-scheduling @svc-auto-approve-bot
/src/third_party/**/grpc @10gen/server-networking-and-observability @svc-auto-approve-bot /src/third_party/**/grpc @10gen/server-networking-and-observability @svc-auto-approve-bot
/src/third_party/**/icu4c* @10gen/query-execution @svc-auto-approve-bot /src/third_party/**/icu4c* @10gen/query-execution @svc-auto-approve-bot

View File

@ -66,6 +66,7 @@ a notice will be included in
| [pyiso8601] | MIT | 2.1.0 | unknown | | | [pyiso8601] | MIT | 2.1.0 | unknown | |
| [RoaringBitmap/CRoaring] | Unknown License | v3.0.1 | | ✗ | | [RoaringBitmap/CRoaring] | Unknown License | v3.0.1 | | ✗ |
| [SchemaStore/schemastore] | Apache-2.0 | Unknown | | | | [SchemaStore/schemastore] | Apache-2.0 | Unknown | | |
| [sls-proto] | Unknown License | 1.0 | unknown | ✗ |
| [smhasher] | Unknown License | Unknown | unknown | ✗ | | [smhasher] | Unknown License | Unknown | unknown | ✗ |
| [Snowball Stemming Algorithms] | BSD-3-Clause | 7b264ffa0f767c579d052fd8142558dc8264d795 | ✗ | ✗ | | [Snowball Stemming Algorithms] | BSD-3-Clause | 7b264ffa0f767c579d052fd8142558dc8264d795 | ✗ | ✗ |
| [subunit] | BSD-3-Clause, Apache-2.0 | 1.4.4 | unknown | | | [subunit] | BSD-3-Clause, Apache-2.0 | 1.4.4 | unknown | |
@ -122,6 +123,7 @@ a notice will be included in
[opentelemetry-cpp]: https://github.com/open-telemetry/opentelemetry-cpp/ [opentelemetry-cpp]: https://github.com/open-telemetry/opentelemetry-cpp/
[opentelemetry-proto]: https://github.com/open-telemetry/opentelemetry-proto [opentelemetry-proto]: https://github.com/open-telemetry/opentelemetry-proto
[pyiso8601]: https://pypi.org/project/iso8601/ [pyiso8601]: https://pypi.org/project/iso8601/
[sls-proto]: https://github.com/10gen/sls
[smhasher]: https://github.com/aappleby/smhasher/blob/a6bd3ce/ [smhasher]: https://github.com/aappleby/smhasher/blob/a6bd3ce/
[subunit]: https://github.com/testing-cabal/subunit [subunit]: https://github.com/testing-cabal/subunit
[tcmalloc]: https://github.com/google/tcmalloc [tcmalloc]: https://github.com/google/tcmalloc

View File

@ -1,12 +1,12 @@
enterprise: enterprise:
jstest_dirs: jstest_dirs:
- src/mongo/db/modules/enterprise/jstests - src/mongo/db/modules/enterprise/jstests
# atlas: atlas:
# fixture_dirs: fixture_dirs:
# - buildscripts/modules/atlas/fixtures - buildscripts/modules/atlas/fixtures
# hook_dirs: hook_dirs:
# - buildscripts/modules/atlas/hooks - buildscripts/modules/atlas/hooks
# suite_dirs: suite_dirs:
# - buildscripts/modules/atlas/suites - buildscripts/modules/atlas/suites
# jstest_dirs: jstest_dirs:
# - buildscripts/modules/atlas/jstests - src/mongo/db/modules/atlas/jstests

View File

@ -1,5 +1,6 @@
"""Utilities for constructing fixtures that may span multiple versions.""" """Utilities for constructing fixtures that may span multiple versions."""
import json
import logging import logging
import threading import threading
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
@ -223,6 +224,33 @@ class ReplSetBuilder(FixtureBuilder):
) )
replset.install_mongod(node) replset.install_mongod(node)
if replset.disagg_base_config:
members = []
for idx, node in enumerate(replset.nodes):
member = {
"_id": idx,
"host": node.get_internal_connection_string(),
"priority": 1
}
members.append(member)
disagg_base_config = {
**replset.disagg_base_config,
"replSetConfig": {
"_id": replset.replset_name,
"version": 1,
"term": 1,
"members": members,
}
}
for node in replset.nodes:
opts = node.get_mongod_options()
opts["set_parameters"]["disaggregatedStorageConfig"] = json.dumps(
disagg_base_config)
opts["set_parameters"]["disaggregatedStorageEnabled"] = True
opts["set_parameters"]["logComponentVerbosity"] = json.dumps(
{"disaggregatedStorage": 5})
node.set_mongod_options(opts)
if replset.start_initial_sync_node: if replset.start_initial_sync_node:
if not replset.initial_sync_node: if not replset.initial_sync_node:
replset.initial_sync_node_idx = replset.num_nodes replset.initial_sync_node_idx = replset.num_nodes

View File

@ -74,6 +74,7 @@ class ReplicaSetFixture(interface.ReplFixture, interface._DockerComposeInterface
launch_mongot=False, launch_mongot=False,
load_all_extensions=False, load_all_extensions=False,
router_endpoint_for_mongot: Optional[int] = None, router_endpoint_for_mongot: Optional[int] = None,
disagg_base_config=None,
): ):
"""Initialize ReplicaSetFixture.""" """Initialize ReplicaSetFixture."""
@ -139,6 +140,8 @@ class ReplicaSetFixture(interface.ReplFixture, interface._DockerComposeInterface
# Set the default oplogSize to 511MB. # Set the default oplogSize to 511MB.
self.mongod_options.setdefault("oplogSize", 511) self.mongod_options.setdefault("oplogSize", 511)
self.disagg_base_config = disagg_base_config
# The dbpath in mongod_options is used as the dbpath prefix for replica set members and # The dbpath in mongod_options is used as the dbpath prefix for replica set members and
# takes precedence over other settings. The ShardedClusterFixture uses this parameter to # takes precedence over other settings. The ShardedClusterFixture uses this parameter to
# create replica sets and assign their dbpath structure explicitly. # create replica sets and assign their dbpath structure explicitly.
@ -462,12 +465,14 @@ class ReplicaSetFixture(interface.ReplFixture, interface._DockerComposeInterface
primary = self.nodes[0] primary = self.nodes[0]
client = primary.mongo_client() client = primary.mongo_client()
while True: while True:
self.logger.info("Waiting for primary on port %d to be elected.", primary.port) self.logger.info(
is_master = client.admin.command("isMaster")["ismaster"] "Waiting for primary on port %d to be elected.", primary.port)
if is_master: cmd_result = client.admin.command("isMaster")
if cmd_result["ismaster"]:
break break
time.sleep(0.1) # Wait a little bit before trying again. time.sleep(0.1) # Wait a little bit before trying again.
self.logger.info("Primary on port %d successfully elected.", primary.port) self.logger.info(
"Primary on port %d successfully elected.", primary.port)
def _await_secondaries(self): def _await_secondaries(self):
# Wait for the secondaries to become available. # Wait for the secondaries to become available.

View File

@ -188,6 +188,12 @@ class MongoDFixture(interface.Fixture, interface._DockerComposeInterface):
self.logger.debug("Mongod not running when gathering standalone fixture pid.") self.logger.debug("Mongod not running when gathering standalone fixture pid.")
return out return out
def get_mongod_options(self):
return self.mongod_options
def set_mongod_options(self, options):
self.mongod_options = options
def _handle_await_ready_retry(self, deadline): def _handle_await_ready_retry(self, deadline):
remaining = deadline - time.time() remaining = deadline - time.time()
if remaining <= 0.0: if remaining <= 0.0:

View File

@ -50,6 +50,10 @@ class FixtureSetupTestCase(FixtureTestCase):
self.fixture.await_ready() self.fixture.await_ready()
if ( if (
not isinstance(self.fixture, (fixture_interface.NoOpFixture, ExternalFixture)) not isinstance(self.fixture, (fixture_interface.NoOpFixture, ExternalFixture))
# TODO(SERVER-109851): Remove this.
# disagg mongod does not yet support "refreshLogicalSessionCacheNow" because it requires
# wtimeout support.
and self.fixture.__class__.__name__ != "DisaggFixture"
# Replica set with --configsvr cannot run refresh unless it is part of a sharded cluster. # Replica set with --configsvr cannot run refresh unless it is part of a sharded cluster.
and not ( and not (
isinstance(self.fixture, ReplicaSetFixture) isinstance(self.fixture, ReplicaSetFixture)

View File

@ -3,7 +3,9 @@
import copy import copy
import os import os
import os.path import os.path
import random
import shutil import shutil
import string
import sys import sys
import threading import threading
import uuid import uuid
@ -67,6 +69,9 @@ class _SingleJSTestCase(interface.ProcessTestCase):
global_vars["MongoRunner.dataPath"] = data_path global_vars["MongoRunner.dataPath"] = data_path
test_data = global_vars.get("TestData", {}).copy() test_data = global_vars.get("TestData", {}).copy()
test_run_id = "".join(random.choices(string.ascii_letters + string.digits, k=10))
self.fixture.test_run_id = test_run_id
test_data["test_run_id"] = test_run_id
test_data["minPort"] = core.network.PortAllocator.min_test_port(self.fixture.job_num) test_data["minPort"] = core.network.PortAllocator.min_test_port(self.fixture.job_num)
test_data["maxPort"] = core.network.PortAllocator.max_test_port(self.fixture.job_num) test_data["maxPort"] = core.network.PortAllocator.max_test_port(self.fixture.job_num)
test_data["peerPids"] = self.fixture.pids() test_data["peerPids"] = self.fixture.pids()

View File

@ -1 +0,0 @@
"""Empty."""

View File

@ -1,51 +0,0 @@
import os
import subprocess
import sys
import unittest
import yaml
import buildscripts.burn_in_tests as under_test
class TestBurnInTestsEnd2End(unittest.TestCase):
@unittest.skip(
"Disabled since this test has behavior dependent on currently modified jstests. Re-enable with SERVER-108783."
)
@classmethod
def setUpClass(cls):
subprocess.run(
[
sys.executable,
"buildscripts/burn_in_tests.py",
"generate-test-membership-map-file-for-ci",
]
)
@classmethod
def tearDownClass(cls):
if os.path.exists(under_test.BURN_IN_TEST_MEMBERSHIP_FILE):
os.remove(under_test.BURN_IN_TEST_MEMBERSHIP_FILE)
def test_valid_yaml_output(self):
process = subprocess.run(
[
sys.executable,
"buildscripts/burn_in_tests.py",
"run",
"--yaml",
],
text=True,
capture_output=True,
)
self.assertEqual(
0,
process.returncode,
process.stderr,
)
output = process.stdout
try:
yaml.safe_load(output)
except Exception:
self.fail(msg="burn_in_tests.py does not output valid yaml.")

View File

@ -93,8 +93,6 @@ include:
- filename: etc/evergreen_yml_components/variants/codecoverage/test_dev.yml - filename: etc/evergreen_yml_components/variants/codecoverage/test_dev.yml
- filename: src/mongo/db/modules/atlas/atlas_dev.yml
parameters: parameters:
- key: evergreen_config_file_path - key: evergreen_config_file_path
value: "etc/evergreen.yml" value: "etc/evergreen.yml"

View File

@ -79,6 +79,7 @@ rules:
- assigned_to_jira_team_server_workload_scheduling - assigned_to_jira_team_server_workload_scheduling
- assigned_to_jira_team_server_networking_and_observability - assigned_to_jira_team_server_networking_and_observability
- assigned_to_jira_team_server_integration - assigned_to_jira_team_server_integration
- assigned_to_jira_team_server_disagg
# https://github.com/10gen/mothra/blob/main/mothra/teams/rnd_dev_prod.yaml # https://github.com/10gen/mothra/blob/main/mothra/teams/rnd_dev_prod.yaml
- assigned_to_jira_team_devprod_build - assigned_to_jira_team_devprod_build
@ -130,6 +131,7 @@ rules:
- incompatible_inmemory - incompatible_inmemory
- incompatible_all_feature_flags - incompatible_all_feature_flags
- incompatible_development_variant - incompatible_development_variant
- incompatible_disaggregated_storage
- requires_compile_variant - requires_compile_variant
- requires_large_host - requires_large_host
- requires_large_host_tsan - requires_large_host_tsan

View File

@ -1236,6 +1236,19 @@ functions:
args: args:
- "./src/evergreen/resmoke_tests_execute_bazel.sh" - "./src/evergreen/resmoke_tests_execute_bazel.sh"
"assume ECR role": &assume_ecr_role
command: ec2.assume_role
params:
role_arn: "${disagg_storage_ecr_arn}"
"fetch module images": &fetch_module_images
command: subprocess.exec
params:
binary: bash
add_expansions_to_env: true # needed to get the AWS secrets from ec2.assume_role
args:
- "./src/evergreen/fetch_module_images.sh"
"retrieve generated test configuration": "retrieve generated test configuration":
&retrieve_generated_test_configuration &retrieve_generated_test_configuration
command: s3.get command: s3.get
@ -1472,6 +1485,8 @@ functions:
- *f_expansions_write - *f_expansions_write
- *sign_macos_dev_binaries - *sign_macos_dev_binaries
- *multiversion_exclude_tags_generate - *multiversion_exclude_tags_generate
- *assume_ecr_role
- *fetch_module_images
- *execute_resmoke_tests - *execute_resmoke_tests
# The existence of the "run_tests_infrastructure_failure" file indicates this failure isn't # The existence of the "run_tests_infrastructure_failure" file indicates this failure isn't
# directly actionable. We use type=setup rather than type=system or type=test for this command # directly actionable. We use type=setup rather than type=system or type=test for this command
@ -1521,6 +1536,8 @@ functions:
- *configure_evergreen_api_credentials - *configure_evergreen_api_credentials
- *sign_macos_dev_binaries - *sign_macos_dev_binaries
- *multiversion_exclude_tags_generate - *multiversion_exclude_tags_generate
- *assume_ecr_role
- *fetch_module_images
- *execute_resmoke_tests - *execute_resmoke_tests
# The existence of the "run_tests_infrastructure_failure" file indicates this failure isn't # The existence of the "run_tests_infrastructure_failure" file indicates this failure isn't
# directly actionable. We use type=setup rather than type=system or type=test for this command # directly actionable. We use type=setup rather than type=system or type=test for this command
@ -3294,3 +3311,11 @@ functions:
[AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN] [AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN]
args: args:
- "./src/evergreen/container_registry_login.sh" - "./src/evergreen/container_registry_login.sh"
"build and push module images": &build_and_push_module_images
command: subprocess.exec
params:
binary: bash
add_expansions_to_env: true # needed to get the AWS secrets from ec2.assume_role
args:
- "./src/evergreen/build_and_push_module_images.sh"

View File

@ -212,6 +212,8 @@ tasks:
- "src/src/**.yml" - "src/src/**.yml"
- "src/src/mongo/client/sdam/json_tests/sdam_tests/**" - "src/src/mongo/client/sdam/json_tests/sdam_tests/**"
- "src/src/mongo/client/sdam/json_tests/server_selection_tests/**" - "src/src/mongo/client/sdam/json_tests/server_selection_tests/**"
- "src/src/mongo/db/modules/atlas/evergreen/**"
- "src/src/mongo/db/modules/atlas/jstests/**"
- "src/src/mongo/db/modules/enterprise/docs/**" - "src/src/mongo/db/modules/enterprise/docs/**"
- "src/src/mongo/db/modules/enterprise/jstests/**" - "src/src/mongo/db/modules/enterprise/jstests/**"
- "src/src/mongo/db/modules/subscription/jstests/**" - "src/src/mongo/db/modules/subscription/jstests/**"

View File

@ -431,7 +431,13 @@ tasks:
- <<: *run_jepsen_template - <<: *run_jepsen_template
name: jepsen_config_fuzzer_list-append name: jepsen_config_fuzzer_list-append
tags: ["assigned_to_jira_team_server_repl", "experimental", "jepsen_docker"] tags:
[
"assigned_to_jira_team_server_repl",
"experimental",
"jepsen_docker",
"uses_docker",
]
commands: commands:
- func: "do setup" - func: "do setup"
- func: "do jepsen docker setup" - func: "do jepsen docker setup"
@ -514,7 +520,13 @@ tasks:
- <<: *run_jepsen_template - <<: *run_jepsen_template
name: jepsen_list-append name: jepsen_list-append
tags: ["assigned_to_jira_team_server_repl", "experimental", "jepsen_docker"] tags:
[
"assigned_to_jira_team_server_repl",
"experimental",
"jepsen_docker",
"uses_docker",
]
commands: commands:
- func: "do setup" - func: "do setup"
- func: "do jepsen docker setup" - func: "do jepsen docker setup"
@ -689,11 +701,7 @@ tasks:
# Check that the mutational fuzzer can parse JS files modified in a patch build. # Check that the mutational fuzzer can parse JS files modified in a patch build.
- name: lint_fuzzer_sanity_patch - name: lint_fuzzer_sanity_patch
tags: tags: ["assigned_to_jira_team_devprod_correctness", "experimental"]
[
"assigned_to_jira_team_devprod_correctness",
"development_critical_single_variant",
]
patch_only: true patch_only: true
commands: commands:
- command: manifest.load - command: manifest.load
@ -1517,6 +1525,22 @@ tasks:
commands: commands:
- func: "generate smoke test tasks" - func: "generate smoke test tasks"
- name: push_mongod_to_ecr
tags: ["assigned_to_jira_team_disag_mongod"]
depends_on:
- name: package
commands:
- command: manifest.load
- func: "git get project and add git tag"
- func: "f_expansions_write"
- func: "set up venv"
- func: "fetch dist tarball"
- func: "extract binaries"
- command: ec2.assume_role
params:
role_arn: "${disagg_storage_ecr_arn}"
- func: "build and push module images"
- name: selinux_rhel8_enterprise - name: selinux_rhel8_enterprise
tags: ["assigned_to_jira_team_server_security", "experimental"] tags: ["assigned_to_jira_team_server_security", "experimental"]
depends_on: depends_on:

View File

@ -1962,14 +1962,15 @@ tasks:
- <<: *jstestfuzz_template - <<: *jstestfuzz_template
name: resharding_timeseries_fuzzer_gen name: resharding_timeseries_fuzzer_gen
tags: tags: [
[
"assigned_to_jira_team_server_cluster_scalability", "assigned_to_jira_team_server_cluster_scalability",
"default", "default",
"feature_flag_guarded", "feature_flag_guarded",
"random_name", "random_name",
"require_npm", "require_npm",
"requires_all_feature_flags", "requires_all_feature_flags",
# TODO SERVER-109849: Remove this tag.
"incompatible_disaggregated_storage",
] ]
commands: commands:
- func: "generate resmoke tasks" - func: "generate resmoke tasks"

View File

@ -667,12 +667,13 @@ tasks:
- <<: *jstestfuzz_template - <<: *jstestfuzz_template
name: initial_sync_fuzzer_sanity_patch_gen name: initial_sync_fuzzer_sanity_patch_gen
patch_only: true patch_only: true
tags: tags: [
[
"assigned_to_jira_team_server_repl", "assigned_to_jira_team_server_repl",
"default", "default",
"require_npm", "require_npm",
"random_name", "random_name",
# TODO SERVER-109849: Remove this tag.
"incompatible_disaggregated_storage",
] ]
commands: commands:
- func: "generate resmoke tasks" - func: "generate resmoke tasks"
@ -1551,13 +1552,14 @@ tasks:
- <<: *jstestfuzz_template - <<: *jstestfuzz_template
name: rollback_fuzzer_sanity_patch_gen name: rollback_fuzzer_sanity_patch_gen
patch_only: true patch_only: true
tags: tags: [
[
"assigned_to_jira_team_server_repl", "assigned_to_jira_team_server_repl",
"default", "default",
"rollbackfuzzer", "rollbackfuzzer",
"require_npm", "require_npm",
"random_name", "random_name",
# TODO SERVER-109849: Remove this tag.
"incompatible_disaggregated_storage",
] ]
commands: commands:
- func: "generate resmoke tasks" - func: "generate resmoke tasks"
@ -2341,3 +2343,30 @@ tasks:
- func: "generate resmoke tasks" - func: "generate resmoke tasks"
vars: vars:
suite: v1index_jscore_passthrough suite: v1index_jscore_passthrough
################################################
# Disagg Storage tasks #
################################################
- <<: *gen_task_template
name: disagg_storage_gen
tags:
[
"assigned_to_jira_team_server_disagg",
"default",
"large",
"clustered_collections",
"uses_docker",
]
commands:
- func: "generate resmoke tasks"
vars:
suite: disagg_storage
use_large_distro: "true"
- <<: *task_template
name: disagg_repl_jscore_passthrough
tags: ["assigned_to_jira_team_server_disagg", "default", "uses_docker"]
commands:
- func: "do setup"
- func: "run tests"

View File

@ -85,7 +85,7 @@ buildvariants:
- <<: *linux-arm64-dynamic-compile-params - <<: *linux-arm64-dynamic-compile-params
name: &amazon-linux2023-arm64-static-compile amazon-linux2023-arm64-static-compile name: &amazon-linux2023-arm64-static-compile amazon-linux2023-arm64-static-compile
display_name: "! Amazon Linux 2023 arm64 Enterprise Compile" display_name: "! Amazon Linux 2023 arm64 Atlas Compile"
tags: ["required", "bazel_check", "forbid_tasks_tagged_with_experimental"] tags: ["required", "bazel_check", "forbid_tasks_tagged_with_experimental"]
expansions: expansions:
<<: *linux-arm64-static-enterprise-compile-expansions <<: *linux-arm64-static-enterprise-compile-expansions
@ -104,7 +104,6 @@ buildvariants:
# since it's running on a c6g.16xlarge # since it's running on a c6g.16xlarge
bazel_compile_flags: >- bazel_compile_flags: >-
--define=MONGO_DISTMOD=amazon2023 --define=MONGO_DISTMOD=amazon2023
--//bazel/config:build_otel=True
--remote_execution_priority=3 --remote_execution_priority=3
--jobs=1600 --jobs=1600
--build_atlas=True --build_atlas=True
@ -169,8 +168,8 @@ buildvariants:
- name: .release_critical .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.requires_all_feature_flags - name: .release_critical .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.requires_all_feature_flags
distros: distros:
- amazon2023-arm64-atlas-latest-large - amazon2023-arm64-atlas-latest-large
- name: .default !.requires_large_host !.requires_compile_variant !.incompatible_development_variant !.requires_all_feature_flags - name: .default !.requires_large_host !.uses_docker !.requires_compile_variant !.incompatible_development_variant !.requires_all_feature_flags
- name: .default .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.requires_all_feature_flags - name: .default .requires_large_host !.uses_docker !.requires_compile_variant !.incompatible_development_variant !.requires_all_feature_flags
distros: distros:
- amazon2023-arm64-atlas-latest-large - amazon2023-arm64-atlas-latest-large
- name: .fuzzer_deterministic - name: .fuzzer_deterministic

View File

@ -168,10 +168,16 @@ buildvariants:
- name: .release_critical .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags !.suggested_excluding_required__for_devprod_mitigation_only - name: .release_critical .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags !.suggested_excluding_required__for_devprod_mitigation_only
distros: distros:
- amazon2023-arm64-atlas-latest-large - amazon2023-arm64-atlas-latest-large
- name: .default !.requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags !.suggested_excluding_required__for_devprod_mitigation_only - name: .default !.requires_large_host !.uses_docker !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags !.suggested_excluding_required__for_devprod_mitigation_only
- name: .default .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags !.suggested_excluding_required__for_devprod_mitigation_only - name: .default .requires_large_host !.uses_docker !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags !.suggested_excluding_required__for_devprod_mitigation_only
distros: distros:
- amazon2023-arm64-atlas-latest-large - amazon2023-arm64-atlas-latest-large
- name: .default !.requires_large_host .uses_docker !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags !.suggested_excluding_required__for_devprod_mitigation_only
distros:
- amazon2023-arm64-latest-small
- name: .default .requires_large_host .uses_docker !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags !.suggested_excluding_required__for_devprod_mitigation_only
distros:
- amazon2023-arm64-latest-large
- name: .fuzzer_deterministic - name: .fuzzer_deterministic
- <<: *enterprise-amazon-linux2023-arm64-all-feature-flags-template - <<: *enterprise-amazon-linux2023-arm64-all-feature-flags-template
@ -193,8 +199,8 @@ buildvariants:
- name: .release_critical .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags !.suggested_excluding_required__for_devprod_mitigation_only !.requires_all_feature_flags - name: .release_critical .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags !.suggested_excluding_required__for_devprod_mitigation_only !.requires_all_feature_flags
distros: distros:
- amazon2023-arm64-latest-large - amazon2023-arm64-latest-large
- name: .default !.requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags !.suggested_excluding_required__for_devprod_mitigation_only !.requires_all_feature_flags - name: .default !.requires_large_host !.uses_docker !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags !.suggested_excluding_required__for_devprod_mitigation_only !.requires_all_feature_flags
- name: .default .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags !.suggested_excluding_required__for_devprod_mitigation_only !.requires_all_feature_flags - name: .default .requires_large_host !.uses_docker !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags !.suggested_excluding_required__for_devprod_mitigation_only !.requires_all_feature_flags
distros: distros:
- amazon2023-arm64-latest-large - amazon2023-arm64-latest-large
- name: .fuzzer_deterministic - name: .fuzzer_deterministic
@ -219,8 +225,8 @@ buildvariants:
- name: .release_critical .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.requires_all_feature_flags !.multiversion !.suggested_excluding_required__for_devprod_mitigation_only - name: .release_critical .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.requires_all_feature_flags !.multiversion !.suggested_excluding_required__for_devprod_mitigation_only
distros: distros:
- amazon2023-arm64-atlas-latest-large - amazon2023-arm64-atlas-latest-large
- name: .default !.requires_large_host !.requires_compile_variant !.incompatible_development_variant !.requires_all_feature_flags !.multiversion !.suggested_excluding_required__for_devprod_mitigation_only - name: .default !.requires_large_host !.uses_docker !.requires_compile_variant !.incompatible_development_variant !.requires_all_feature_flags !.multiversion !.suggested_excluding_required__for_devprod_mitigation_only
- name: .default .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.requires_all_feature_flags !.multiversion !.suggested_excluding_required__for_devprod_mitigation_only - name: .default .requires_large_host !.uses_docker !.requires_compile_variant !.incompatible_development_variant !.requires_all_feature_flags !.multiversion !.suggested_excluding_required__for_devprod_mitigation_only
distros: distros:
- amazon2023-arm64-atlas-latest-large - amazon2023-arm64-atlas-latest-large
- name: .fuzzer_deterministic - name: .fuzzer_deterministic
@ -524,8 +530,8 @@ buildvariants:
- name: .release_critical .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags !.suggested_excluding_required__for_devprod_mitigation_only !.multiversion - name: .release_critical .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags !.suggested_excluding_required__for_devprod_mitigation_only !.multiversion
distros: distros:
- amazon2023-arm64-latest-large - amazon2023-arm64-latest-large
- name: .default !.requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags !.suggested_excluding_required__for_devprod_mitigation_only !.multiversion - name: .default !.requires_large_host !.uses_docker !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags !.suggested_excluding_required__for_devprod_mitigation_only !.multiversion
- name: .default .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags !.suggested_excluding_required__for_devprod_mitigation_only !.multiversion - name: .default .requires_large_host !.uses_docker !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags !.suggested_excluding_required__for_devprod_mitigation_only !.multiversion
distros: distros:
- amazon2023-arm64-latest-large - amazon2023-arm64-latest-large
- name: .fuzzer_deterministic !.multiversion - name: .fuzzer_deterministic !.multiversion
@ -548,8 +554,8 @@ buildvariants:
# - name: .release_critical .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags !.multiversion !.serverless !.exclude_when_record_ids_replicated # - name: .release_critical .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags !.multiversion !.serverless !.exclude_when_record_ids_replicated
# distros: # distros:
# - amazon2023-arm64-atlas-latest-large # - amazon2023-arm64-atlas-latest-large
# - name: .default !.requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags !.multiversion !.serverless !.exclude_when_record_ids_replicated # - name: .default !.requires_large_host !.uses_docker !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags !.multiversion !.serverless !.exclude_when_record_ids_replicated
# - name: .default .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags !.multiversion !.serverless !.exclude_when_record_ids_replicated # - name: .default .requires_large_host !.uses_docker !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags !.multiversion !.serverless !.exclude_when_record_ids_replicated
# distros: # distros:
# - amazon2023-arm64-atlas-latest-large # - amazon2023-arm64-atlas-latest-large
# expansions: # expansions:

View File

@ -129,8 +129,8 @@ buildvariants:
- name: .release_critical .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.requires_all_feature_flags - name: .release_critical .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.requires_all_feature_flags
distros: distros:
- rhel8.8-medium - rhel8.8-medium
- name: .default !.requires_large_host !.requires_compile_variant !.incompatible_development_variant !.requires_all_feature_flags - name: .default !.requires_large_host !.uses_docker !.requires_compile_variant !.incompatible_development_variant !.requires_all_feature_flags
- name: .default .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.requires_all_feature_flags - name: .default .requires_large_host !.uses_docker !.requires_compile_variant !.incompatible_development_variant !.requires_all_feature_flags
distros: distros:
- rhel8.8-medium - rhel8.8-medium

View File

@ -303,8 +303,8 @@ buildvariants:
- name: .release_critical .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags - name: .release_critical .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags
distros: distros:
- rhel8.8-medium - rhel8.8-medium
- name: .default !.requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags - name: .default !.requires_large_host !.uses_docker !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags
- name: .default .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags - name: .default .requires_large_host !.uses_docker !.requires_compile_variant !.incompatible_development_variant !.incompatible_all_feature_flags
distros: distros:
- rhel8.8-medium - rhel8.8-medium

View File

@ -226,11 +226,11 @@ buildvariants:
- name: .release_critical .requires_large_host_debug_mode !.incompatible_development_variant !.incompatible_debug_mode !.incompatible_system_allocator !.requires_all_feature_flags - name: .release_critical .requires_large_host_debug_mode !.incompatible_development_variant !.incompatible_debug_mode !.incompatible_system_allocator !.requires_all_feature_flags
distros: distros:
- *enterprise-rhel-8-64-bit-dynamic-debug-mode-large-distro-name - *enterprise-rhel-8-64-bit-dynamic-debug-mode-large-distro-name
- name: .default !.requires_large_host !.requires_large_host_debug_mode !.incompatible_development_variant !.incompatible_debug_mode !.incompatible_system_allocator !.requires_all_feature_flags - name: .default !.requires_large_host !.requires_large_host_debug_mode !.uses_docker !.incompatible_development_variant !.incompatible_debug_mode !.incompatible_system_allocator !.requires_all_feature_flags
- name: .default .requires_large_host !.incompatible_development_variant !.incompatible_debug_mode !.incompatible_system_allocator !.requires_all_feature_flags - name: .default .requires_large_host !.uses_docker !.incompatible_development_variant !.incompatible_debug_mode !.incompatible_system_allocator !.requires_all_feature_flags
distros: distros:
- *enterprise-rhel-8-64-bit-dynamic-debug-mode-large-distro-name - *enterprise-rhel-8-64-bit-dynamic-debug-mode-large-distro-name
- name: .default .requires_large_host_debug_mode !.incompatible_development_variant !.incompatible_debug_mode !.incompatible_system_allocator !.requires_all_feature_flags - name: .default .requires_large_host_debug_mode !.uses_docker !.incompatible_development_variant !.incompatible_debug_mode !.incompatible_system_allocator !.requires_all_feature_flags
distros: distros:
- *enterprise-rhel-8-64-bit-dynamic-debug-mode-large-distro-name - *enterprise-rhel-8-64-bit-dynamic-debug-mode-large-distro-name
- name: .non_deterministic !.requires_large_host !.requires_large_host_debug_mode !.incompatible_development_variant !.incompatible_debug_mode !.incompatible_system_allocator !.requires_all_feature_flags - name: .non_deterministic !.requires_large_host !.requires_large_host_debug_mode !.incompatible_development_variant !.incompatible_debug_mode !.incompatible_system_allocator !.requires_all_feature_flags
@ -480,8 +480,8 @@ buildvariants:
- name: .release_critical .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_aubsan !.incompatible_system_allocator !.incompatible_all_feature_flags - name: .release_critical .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_aubsan !.incompatible_system_allocator !.incompatible_all_feature_flags
distros: distros:
- rhel8.8-xlarge - rhel8.8-xlarge
- name: .default !.requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_aubsan !.incompatible_system_allocator !.incompatible_all_feature_flags - name: .default !.requires_large_host !.uses_docker !.requires_compile_variant !.incompatible_development_variant !.incompatible_aubsan !.incompatible_system_allocator !.incompatible_all_feature_flags
- name: .default .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_aubsan !.incompatible_system_allocator !.incompatible_all_feature_flags - name: .default .requires_large_host !.uses_docker !.requires_compile_variant !.incompatible_development_variant !.incompatible_aubsan !.incompatible_system_allocator !.incompatible_all_feature_flags
distros: distros:
- rhel8.8-xlarge - rhel8.8-xlarge
- name: .non_deterministic !.requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_aubsan !.incompatible_system_allocator !.incompatible_all_feature_flags - name: .non_deterministic !.requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_aubsan !.incompatible_system_allocator !.incompatible_all_feature_flags
@ -583,11 +583,11 @@ buildvariants:
- name: .release_critical .requires_large_host_tsan !.requires_compile_variant !.incompatible_development_variant !.incompatible_tsan !.incompatible_system_allocator !.incompatible_all_feature_flags - name: .release_critical .requires_large_host_tsan !.requires_compile_variant !.incompatible_development_variant !.incompatible_tsan !.incompatible_system_allocator !.incompatible_all_feature_flags
distros: distros:
- *enterprise-rhel8-debug-tsan-large-distro-name - *enterprise-rhel8-debug-tsan-large-distro-name
- name: .default !.requires_large_host !.requires_large_host_tsan !.requires_compile_variant !.incompatible_development_variant !.incompatible_tsan !.incompatible_system_allocator !.incompatible_all_feature_flags - name: .default !.requires_large_host !.requires_large_host_tsan !.uses_docker !.requires_compile_variant !.incompatible_development_variant !.incompatible_tsan !.incompatible_system_allocator !.incompatible_all_feature_flags
- name: .default .requires_large_host !.requires_compile_variant !.incompatible_development_variant !.incompatible_tsan !.incompatible_system_allocator !.incompatible_all_feature_flags - name: .default .requires_large_host !.uses_docker !.requires_compile_variant !.incompatible_development_variant !.incompatible_tsan !.incompatible_system_allocator !.incompatible_all_feature_flags
distros: distros:
- *enterprise-rhel8-debug-tsan-large-distro-name - *enterprise-rhel8-debug-tsan-large-distro-name
- name: .default .requires_large_host_tsan !.requires_compile_variant !.incompatible_development_variant !.incompatible_tsan !.incompatible_system_allocator !.incompatible_all_feature_flags - name: .default .requires_large_host_tsan !.uses_docker !.requires_compile_variant !.incompatible_development_variant !.incompatible_tsan !.incompatible_system_allocator !.incompatible_all_feature_flags
distros: distros:
- *enterprise-rhel8-debug-tsan-large-distro-name - *enterprise-rhel8-debug-tsan-large-distro-name
- name: .non_deterministic !.requires_large_host !.requires_large_host_tsan !.requires_compile_variant !.incompatible_development_variant !.incompatible_tsan !.incompatible_system_allocator !.incompatible_all_feature_flags - name: .non_deterministic !.requires_large_host !.requires_large_host_tsan !.requires_compile_variant !.incompatible_development_variant !.incompatible_tsan !.incompatible_system_allocator !.incompatible_all_feature_flags

View File

@ -0,0 +1,7 @@
set -e
for dir in ./src/src/mongo/db/modules/*; do
if test -f $dir/evergreen/build_and_push_images.sh; then
bash $dir/evergreen/build_and_push_images.sh
fi
done

View File

@ -0,0 +1,7 @@
set -e
for dir in ./src/src/mongo/db/modules/*; do
if test -f $dir/evergreen/fetch_images.sh; then
bash $dir/evergreen/fetch_images.sh
fi
done

View File

@ -87,3 +87,9 @@ filters:
- "json_utils.js": - "json_utils.js":
approvers: approvers:
- 10gen/query-integration-extensions - 10gen/query-integration-extensions
- "replicated_ident_utils.js":
approvers:
- 10gen/server-storage-engine-integration
- "replicated_record_ids_utils.js":
approvers:
- 10gen/server-storage-engine-integration

View File

@ -2,7 +2,7 @@
// documents, otherwise. // documents, otherwise.
import {ReplSetTest} from "jstests/libs/replsettest.js"; import {ReplSetTest} from "jstests/libs/replsettest.js";
function getShowRecordIdsCursor(node, dbName, replicatedCollName) { export function getShowRecordIdsCursor(node, dbName, replicatedCollName) {
return node return node
.getDB(dbName) .getDB(dbName)
[replicatedCollName].aggregate([{"$project": {"recordId": {"$meta": "recordId"}, "document": "$$ROOT"}}]); [replicatedCollName].aggregate([{"$project": {"recordId": {"$meta": "recordId"}, "document": "$$ROOT"}}]);

View File

@ -0,0 +1,86 @@
/*
* Helpers for basic testing of replicated idents.
*/
function getOplog(node) {
return node.getDB("local").oplog.rs;
}
export function getSortedCatalogEntries(node, sortField = "ident") {
const adminDB = node.getDB("admin");
const isSystemProfile = {"name": "system.profile"};
const isLocal = {"db": "local"};
const match = {$nor: [isSystemProfile, isLocal]};
return adminDB.aggregate([{$listCatalog: {}}, {$match: match}, {$sort: {sortField: 1}}]).toArray();
}
/**
* Given catalog entries for 2 nodes, where catalog entries for both nodes must be sorted by the
* same field, validates that each entry has a matching 'ident'.
*/
export function assertMatchingCatalogIdents(node0CatalogIdents, node1CatalogIdents) {
jsTest.log(
`Asserting catalog entries for node0 ${tojson(node0CatalogIdents)} with node1 ${tojson(node1CatalogIdents)}`,
);
assert.eq(
node0CatalogIdents.length,
node1CatalogIdents.length,
`Expected nodes to have same number of entries. Entries for node0 ${tojson(
node0CatalogIdents,
)}, entries for node1 ${node1CatalogIdents}`,
);
const numCatalogEntries = node0CatalogIdents.length;
const entriesThatDontMatch = [];
for (let i = 0; i < numCatalogEntries; i++) {
const entryNode0 = node0CatalogIdents[i];
const entryNode1 = node1CatalogIdents[i];
if (bsonWoCompare(entryNode0, entryNode1) !== 0) {
// For visibility, collect all mismatched entries before failing.
entriesThatDontMatch.push([entryNode0, entryNode1]);
jsTest.log(
`Expected both nodes to have same entries. Node0 has ${tojson(
entryNode0,
)}, Node1 has ${tojson(entryNode1)}`,
);
}
}
assert.eq(
0,
entriesThatDontMatch.length,
`Catalog entries for were expected to match, but don't. Entries that don't match ${tojson(
entriesThatDontMatch,
)}`,
);
}
// Validates that all 'create' collection oplog entries contain collection idents.
export function assertCreateOplogEntriesContainIdents(node) {
const createOps = getOplog(node)
.find({"op": "c", "o.create": {$exists: true}})
.toArray();
jsTest.log("Create oplog entries on node " + node.port + " " + tojson(createOps));
assert.lt(0, createOps.length);
for (let op of createOps) {
assert(
op.hasOwnProperty("o2"),
`Expected to have 'o2' field present in ${tojson(
op,
)}. Dumping all create oplog entries ${tojson(createOps)}`,
);
const o2 = op["o2"];
assert(
o2.hasOwnProperty("ident"),
`Expected to find 'ident' property in 'o2' field of ${tojson(
op,
)}. Dumping all create oplog entries ${tojson(createOps)}`,
);
assert(
o2.hasOwnProperty("idIndexIdent"),
`Expected to find 'iddIndexIdent' property in 'o2' field of ${tojson(
op,
)}. Dumping all create oplog entries ${tojson(createOps)}`,
);
}
}

View File

@ -1768,6 +1768,28 @@ export class ReplSetTest {
}); });
} }
/**
* Runs replSetInitiate on the first node of the replica set.
*
* TODO (SERVER-109841): Replsetinitiate is currently a no-op command for disagg. Determine the
* next steps for this function if additional functionality is to be incorporated.
*/
initiateForDisagg(cfg, initCmd) {
const startTime = new Date(); // Measure the execution time of this function.
// Blocks until there is a primary. We use a faster retry interval here since we expect the
// primary to be ready very soon. We also turn the failpoint off once we have a primary.
this.getPrimary(this.kDefaultTimeoutMS, 25 /* retryIntervalMS */);
jsTest.log(
"ReplSetTest initiateForDisagg took " +
(new Date() - startTime) +
"ms for " +
this.nodes.length +
" nodes.",
);
}
/** /**
* Steps up 'node' as primary and by default it waits for the stepped up node to become a * Steps up 'node' as primary and by default it waits for the stepped up node to become a
* writable primary and waits for all nodes to reach the same optime before sending the * writable primary and waits for all nodes to reach the same optime before sending the
@ -3589,7 +3611,7 @@ function _constructStartNewInstances(rst, opts) {
rst._unbridgedPorts = Array.from({length: numNodes}, rst._allocatePortForNode); rst._unbridgedPorts = Array.from({length: numNodes}, rst._allocatePortForNode);
rst._unbridgedNodes = []; rst._unbridgedNodes = [];
} else { } else {
rst.ports = Array.from({length: numNodes}, rst._allocatePortForNode); rst.ports = opts.ports || Array.from({length: numNodes}, rst._allocatePortForNode);
} }
for (let i = 0; i < numNodes; i++) { for (let i = 0; i < numNodes; i++) {

View File

@ -530,6 +530,7 @@ encrypted_storage_engine:
slack: server-security slack: server-security
jira: Server Security jira: Server Security
files: files:
- src/mongo/db/modules/atlas/src/disagg_storage/encryption
- src/mongo/db/modules/enterprise/src/encryptdb - src/mongo/db/modules/enterprise/src/encryptdb
security: security:
@ -854,8 +855,7 @@ disagg_storage:
slack: disaggregated-storage-mongod slack: disaggregated-storage-mongod
jira: RSSD jira: RSSD
files: files:
- src/mongo/db/modules/atlas - src/mongo/db/modules/atlas/src/disagg_storage
- src/mongo/db/disagg_storage
storage_engine_api: storage_engine_api:
meta: meta:
@ -992,6 +992,13 @@ installer:
files: files:
- src/mongo/installer/ - src/mongo/installer/
replicated_storage_service:
meta:
slack: server-replication
jira: Server Replication
files:
- src/mongo/db/rss
replication: replication:
meta: meta:
slack: server-replication slack: server-replication

View File

@ -914,6 +914,49 @@
}, },
"scope": "required" "scope": "required"
}, },
{
"supplier": {
"name": "Organization: github"
},
"name": "googletest",
"version": "1.17.0",
"licenses": [
{
"license": {
"id": "BSD-3-Clause"
}
}
],
"purl": "pkg:github/googletest/googletest@v1.17.0",
"properties": [
{
"name": "internal:team_responsible",
"value": "Disaggregated Storage"
},
{
"name": "emits_persisted_data",
"value": "false"
},
{
"name": "info_link",
"value": "https://github.com/google/googletest"
},
{
"name": "import_script_path",
"value": "src/third_party/googletest_restricted_for_disagg_only/scripts/import.sh"
}
],
"type": "library",
"bom-ref": "e57f94bd-b0b1-4e47-912e-c690a01e4f95",
"evidence": {
"occurrences": [
{
"location": "src/third_party/googletest_restricted_for_disagg_only"
}
]
},
"scope": "required"
},
{ {
"type": "library", "type": "library",
"bom-ref": "pkg:github/gperftools/gperftools@gperftools-2.9.1", "bom-ref": "pkg:github/gperftools/gperftools@gperftools-2.9.1",

View File

@ -21,6 +21,11 @@ generate_config_header(
"MONGO_CONFIG_OTEL": "1", "MONGO_CONFIG_OTEL": "1",
}, },
"//conditions:default": {}, "//conditions:default": {},
}) | select({
"//bazel/config:build_atlas_enabled": {
"MONGO_CONFIG_DISAGG_STORAGE": "1",
},
"//conditions:default": {},
}) | select({ }) | select({
"//bazel/config:mutex_observation_enabled": { "//bazel/config:mutex_observation_enabled": {
"MONGO_CONFIG_MUTEX_OBSERVATION": "1", "MONGO_CONFIG_MUTEX_OBSERVATION": "1",

View File

@ -125,5 +125,8 @@
// Defined if the build includes OpenTelemetry // Defined if the build includes OpenTelemetry
@mongo_config_otel@ @mongo_config_otel@
// Defined if the build includes disaggregated storage
@mongo_config_disagg_storage@
// Defined if the build includes mutex observation // Defined if the build includes mutex observation
@mongo_config_mutex_observation@ @mongo_config_mutex_observation@

View File

@ -3320,6 +3320,8 @@ mongo_cc_library(
"//src/mongo/db/repl:serveronly_repl", "//src/mongo/db/repl:serveronly_repl",
"//src/mongo/db/repl:storage_interface_impl", "//src/mongo/db/repl:storage_interface_impl",
"//src/mongo/db/repl:topology_coordinator", "//src/mongo/db/repl:topology_coordinator",
"//src/mongo/db/rss:persistence_provider_impl",
"//src/mongo/db/rss:service_lifecycle_impl",
"rw_concern_d", "rw_concern_d",
"//src/mongo/db/session:kill_sessions_local", "//src/mongo/db/session:kill_sessions_local",
"//src/mongo/db/session:service_liaison_mongod", "//src/mongo/db/session:service_liaison_mongod",
@ -3433,6 +3435,11 @@ mongo_cc_library(
"//src/mongo/db/modules/enterprise/src/kmip:kmip_configuration", "//src/mongo/db/modules/enterprise/src/kmip:kmip_configuration",
], ],
"//conditions:default": [], "//conditions:default": [],
}) + select({
"//bazel/config:build_atlas_enabled": [
"//src/mongo/db/modules/atlas/src/disagg_storage/encryption:sls_log_encryption_manager",
],
"//conditions:default": [],
}), }),
) )
@ -3530,6 +3537,7 @@ mongo_cc_library(
"//src/mongo/db/repl:storage_interface_impl", "//src/mongo/db/repl:storage_interface_impl",
"//src/mongo/db/repl:topology_coordinator", "//src/mongo/db/repl:topology_coordinator",
"//src/mongo/db/repl:wait_for_majority_service", "//src/mongo/db/repl:wait_for_majority_service",
"//src/mongo/db/rss:replicated_storage_service",
"//src/mongo/db/s:query_analysis_writer", "//src/mongo/db/s:query_analysis_writer",
"//src/mongo/db/s:sessions_collection_config_server", "//src/mongo/db/s:sessions_collection_config_server",
"//src/mongo/db/s:sharding_commands_d", "//src/mongo/db/s:sharding_commands_d",
@ -3563,11 +3571,6 @@ mongo_cc_library(
"//src/mongo/util/tracing_profiler", "//src/mongo/util/tracing_profiler",
], ],
"//conditions:default": [], "//conditions:default": [],
}) + select({
"//bazel/config:build_atlas_required_settings": [
"//src/mongo/db/modules/atlas:atlas_only",
],
"//conditions:default": [],
}), }),
) )
@ -3697,6 +3700,7 @@ mongo_cc_library(
"//src/mongo/db/local_catalog:catalog_impl", "//src/mongo/db/local_catalog:catalog_impl",
"//src/mongo/db/op_observer", "//src/mongo/db/op_observer",
"//src/mongo/db/repl:replmocks", "//src/mongo/db/repl:replmocks",
"//src/mongo/db/rss:persistence_provider_impl",
"//src/mongo/db/s:sharding_runtime_d", "//src/mongo/db/s:sharding_runtime_d",
"//src/mongo/db/storage:storage_control", "//src/mongo/db/storage:storage_control",
"//src/mongo/db/storage:storage_options", "//src/mongo/db/storage:storage_options",
@ -4138,6 +4142,7 @@ mongo_cc_library(
"//src/mongo/db/local_catalog:database_holder", "//src/mongo/db/local_catalog:database_holder",
"//src/mongo/db/op_observer", "//src/mongo/db/op_observer",
"//src/mongo/db/repl:replmocks", "//src/mongo/db/repl:replmocks",
"//src/mongo/db/rss:persistence_provider_impl",
"//src/mongo/db/s:sharding_runtime_d", "//src/mongo/db/s:sharding_runtime_d",
"//src/mongo/db/stats:top", "//src/mongo/db/stats:top",
"//src/mongo/db/storage:storage_control", "//src/mongo/db/storage:storage_control",
@ -4369,6 +4374,7 @@ mongo_cc_benchmark(
"//src/mongo/db/repl:repl_coordinator_impl", "//src/mongo/db/repl:repl_coordinator_impl",
"//src/mongo/db/repl:serveronly_repl", "//src/mongo/db/repl:serveronly_repl",
"//src/mongo/db/repl:storage_interface_impl", "//src/mongo/db/repl:storage_interface_impl",
"//src/mongo/db/rss:persistence_provider_impl",
"//src/mongo/db/s:sharding_runtime_d", "//src/mongo/db/s:sharding_runtime_d",
"//src/mongo/db/storage:storage_control", "//src/mongo/db/storage:storage_control",
"//src/mongo/db/storage/wiredtiger:storage_wiredtiger", "//src/mongo/db/storage/wiredtiger:storage_wiredtiger",

View File

@ -127,24 +127,62 @@ Timestamp getMedianAppliedTimestamp(const std::vector<repl::MemberData>& sortedM
const int sustainerIdx = sortedMemberData.size() / 2; const int sustainerIdx = sortedMemberData.size() / 2;
return sortedMemberData[sustainerIdx].getLastAppliedOpTime().getTimestamp(); return sortedMemberData[sustainerIdx].getLastAppliedOpTime().getTimestamp();
} }
} // namespace
namespace flow_control_details {
ReplicationTimestampProvider::ReplicationTimestampProvider(repl::ReplicationCoordinator* replCoord)
: _replCoord(replCoord) {}
Timestamp ReplicationTimestampProvider::getCurrSustainerTimestamp() const {
return getMedianAppliedTimestamp(_currMemberData);
}
Timestamp ReplicationTimestampProvider::getPrevSustainerTimestamp() const {
return getMedianAppliedTimestamp(_prevMemberData);
}
repl::TimestampAndWallTime ReplicationTimestampProvider::getTargetTimestampAndWallTime() const {
auto time = _replCoord->getLastCommittedOpTimeAndWallTime();
return {.timestamp = time.opTime.getTimestamp(), .wallTime = time.wallTime};
}
repl::TimestampAndWallTime ReplicationTimestampProvider::getLastWriteTimestampAndWallTime() const {
auto time = _replCoord->getMyLastAppliedOpTimeAndWallTime();
return {.timestamp = time.opTime.getTimestamp(), .wallTime = time.wallTime};
}
void ReplicationTimestampProvider::update() {
_prevMemberData = _currMemberData;
_currMemberData = _replCoord->getMemberData();
// Sort MemberData with the 0th index being the node with the lowest applied optime.
std::sort(_currMemberData.begin(),
_currMemberData.end(),
[](const repl::MemberData& left, const repl::MemberData& right) -> bool {
return left.getLastAppliedOpTime() < right.getLastAppliedOpTime();
});
}
bool ReplicationTimestampProvider::flowControlUsable() const {
return _replCoord->canAcceptNonLocalWrites();
}
/** /**
* Sanity checks whether the successive queries of topology data are comparable for doing a flow * Sanity checks whether the successive queries of topology data are comparable for doing a flow
* control calculation. In particular, the number of members must be the same and the median * control calculation. In particular, the number of members must be the same and the median
* applier's timestamp must not go backwards. * applier's timestamp must not go backwards.
*/ */
bool sustainerAdvanced(const std::vector<repl::MemberData>& prevMemberData, bool ReplicationTimestampProvider::sustainerAdvanced() const {
const std::vector<repl::MemberData>& currMemberData) { if (_currMemberData.size() == 0 || _currMemberData.size() != _prevMemberData.size()) {
if (currMemberData.size() == 0 || currMemberData.size() != prevMemberData.size()) {
LOGV2_WARNING(22223, LOGV2_WARNING(22223,
"Flow control detected a change in topology", "Flow control detected a change in topology",
"prevSize"_attr = prevMemberData.size(), "prevSize"_attr = _prevMemberData.size(),
"currSize"_attr = currMemberData.size()); "currSize"_attr = _currMemberData.size());
return false; return false;
} }
auto currSustainerAppliedTs = getMedianAppliedTimestamp(currMemberData); auto currSustainerAppliedTs = getMedianAppliedTimestamp(_currMemberData);
auto prevSustainerAppliedTs = getMedianAppliedTimestamp(prevMemberData); auto prevSustainerAppliedTs = getMedianAppliedTimestamp(_prevMemberData);
if (currSustainerAppliedTs < prevSustainerAppliedTs) { if (currSustainerAppliedTs < prevSustainerAppliedTs) {
LOGV2_WARNING(22224, LOGV2_WARNING(22224,
@ -156,13 +194,42 @@ bool sustainerAdvanced(const std::vector<repl::MemberData>& prevMemberData,
return true; return true;
} }
} // namespace
void ReplicationTimestampProvider::setCurrMemberData_forTest(
const std::vector<repl::MemberData>& memberData) {
_currMemberData = memberData;
std::sort(_currMemberData.begin(),
_currMemberData.end(),
[](const repl::MemberData& left, const repl::MemberData& right) -> bool {
return left.getLastAppliedOpTime() < right.getLastAppliedOpTime();
});
}
void ReplicationTimestampProvider::setPrevMemberData_forTest(
const std::vector<repl::MemberData>& memberData) {
_prevMemberData = memberData;
std::sort(_prevMemberData.begin(),
_prevMemberData.end(),
[](const repl::MemberData& left, const repl::MemberData& right) -> bool {
return left.getLastAppliedOpTime() < right.getLastAppliedOpTime();
});
}
} // namespace flow_control_details
FlowControl::FlowControl(repl::ReplicationCoordinator* replCoord) FlowControl::FlowControl(repl::ReplicationCoordinator* replCoord)
: _replCoord(replCoord), _lastTimeSustainerAdvanced(Date_t::now()) {} : _timestampProvider(
std::make_unique<flow_control_details::ReplicationTimestampProvider>(replCoord)),
_lastTimeSustainerAdvanced(Date_t::now()) {}
FlowControl::FlowControl(ServiceContext* service, repl::ReplicationCoordinator* replCoord) FlowControl::FlowControl(ServiceContext* service, repl::ReplicationCoordinator* replCoord)
: _replCoord(replCoord), _lastTimeSustainerAdvanced(Date_t::now()) { : FlowControl(service,
std::make_unique<flow_control_details::ReplicationTimestampProvider>(replCoord)) {
}
FlowControl::FlowControl(ServiceContext* service,
std::unique_ptr<TimestampProvider> timestampProvider)
: _timestampProvider(std::move(timestampProvider)), _lastTimeSustainerAdvanced(Date_t::now()) {
// Initialize _lastTargetTicketsPermitted to maximum tickets to make sure flow control doesn't // Initialize _lastTargetTicketsPermitted to maximum tickets to make sure flow control doesn't
// cause a slow start on start up. // cause a slow start on start up.
FlowControlTicketholder::set(service, std::make_unique<FlowControlTicketholder>(kMaxTickets)); FlowControlTicketholder::set(service, std::make_unique<FlowControlTicketholder>(kMaxTickets));
@ -254,44 +321,26 @@ void FlowControl::disableUntil(Date_t deadline) {
_disableUntil.store(deadline); _disableUntil.store(deadline);
} }
/** int FlowControl::_calculateNewTicketsForLag(const Timestamp& prevSustainerTimestamp,
* Advance the `_*MemberData` fields and sort the new data by the element's last applied optime. const Timestamp& currSustainerTimestamp,
*/
void FlowControl::_updateTopologyData() {
_prevMemberData = _currMemberData;
_currMemberData = _replCoord->getMemberData();
// Sort MemberData with the 0th index being the node with the lowest applied optime.
std::sort(_currMemberData.begin(),
_currMemberData.end(),
[](const repl::MemberData& left, const repl::MemberData& right) -> bool {
return left.getLastAppliedOpTime() < right.getLastAppliedOpTime();
});
}
int FlowControl::_calculateNewTicketsForLag(const std::vector<repl::MemberData>& prevMemberData,
const std::vector<repl::MemberData>& currMemberData,
std::int64_t locksUsedLastPeriod, std::int64_t locksUsedLastPeriod,
double locksPerOp, double locksPerOp,
std::uint64_t lagMillis, std::uint64_t lagMillis,
std::uint64_t thresholdLagMillis) { std::uint64_t thresholdLagMillis) {
invariant(prevSustainerTimestamp <= currSustainerTimestamp,
fmt::format("PrevSustainer: {} CurrSustainer: {}",
prevSustainerTimestamp.toString(),
currSustainerTimestamp.toString()));
invariant(lagMillis >= thresholdLagMillis); invariant(lagMillis >= thresholdLagMillis);
const auto currSustainerAppliedTs = getMedianAppliedTimestamp(currMemberData);
const auto prevSustainerAppliedTs = getMedianAppliedTimestamp(prevMemberData);
invariant(prevSustainerAppliedTs <= currSustainerAppliedTs,
fmt::format("PrevSustainer: {} CurrSustainer: {}",
prevSustainerAppliedTs.toString(),
currSustainerAppliedTs.toString()));
const std::int64_t sustainerAppliedCount = const std::int64_t sustainerAppliedCount =
_approximateOpsBetween(prevSustainerAppliedTs, currSustainerAppliedTs); _approximateOpsBetween(prevSustainerTimestamp, currSustainerTimestamp);
LOGV2_DEBUG(22218, LOGV2_DEBUG(22218,
DEBUG_LOG_LEVEL, DEBUG_LOG_LEVEL,
" PrevApplied: {prevSustainerAppliedTs} CurrApplied: {currSustainerAppliedTs} " " PrevApplied: {prevSustainerTimestamp} CurrApplied: {currSustainerTimestamp} "
"NumSustainerApplied: {sustainerAppliedCount}", "NumSustainerApplied: {sustainerAppliedCount}",
"prevSustainerAppliedTs"_attr = prevSustainerAppliedTs, "prevSustainerTimestamp"_attr = prevSustainerTimestamp,
"currSustainerAppliedTs"_attr = currSustainerAppliedTs, "currSustainerTimestamp"_attr = currSustainerTimestamp,
"sustainerAppliedCount"_attr = sustainerAppliedCount); "sustainerAppliedCount"_attr = sustainerAppliedCount);
if (sustainerAppliedCount > 0) { if (sustainerAppliedCount > 0) {
_lastTimeSustainerAdvanced = Date_t::now(); _lastTimeSustainerAdvanced = Date_t::now();
@ -359,35 +408,35 @@ int FlowControl::getNumTickets(Date_t now) {
} }
// Flow Control is only enabled on nodes that can accept writes. // Flow Control is only enabled on nodes that can accept writes.
const bool canAcceptWrites = _replCoord->canAcceptNonLocalWrites(); const bool flowControlUsable = _timestampProvider->flowControlUsable();
if (auto sfp = flowControlTicketOverride.scoped(); MONGO_unlikely(sfp.isActive())) { if (auto sfp = flowControlTicketOverride.scoped(); MONGO_unlikely(sfp.isActive())) {
int numTickets = sfp.getData().getIntField("numTickets"); int numTickets = sfp.getData().getIntField("numTickets");
if (numTickets > 0 && canAcceptWrites) { if (numTickets > 0 && flowControlUsable) {
return numTickets; return numTickets;
} }
} }
// It's important to update the topology on each iteration. // It's important to update the topology on each iteration.
_updateTopologyData(); _timestampProvider->update();
const repl::OpTimeAndWallTime myLastApplied = _replCoord->getMyLastAppliedOpTimeAndWallTime(); const auto lastWriteTime = _timestampProvider->getLastWriteTimestampAndWallTime();
const repl::OpTimeAndWallTime lastCommitted = _replCoord->getLastCommittedOpTimeAndWallTime(); const auto lastTargetTime = _timestampProvider->getTargetTimestampAndWallTime();
const double locksPerOp = _getLocksPerOp(); const double locksPerOp = _getLocksPerOp();
const std::int64_t locksUsedLastPeriod = _getLocksUsedLastPeriod(); const std::int64_t locksUsedLastPeriod = _getLocksUsedLastPeriod();
if (gFlowControlEnabled.load() == false || canAcceptWrites == false || locksPerOp < 0.0) { if (gFlowControlEnabled.load() == false || flowControlUsable == false || locksPerOp < 0.0) {
_trimSamples(std::min(lastCommitted.opTime.getTimestamp(), _trimSamples(
getMedianAppliedTimestamp(_prevMemberData))); std::min(lastTargetTime.timestamp, _timestampProvider->getPrevSustainerTimestamp()));
return kMaxTickets; return kMaxTickets;
} }
int ret = 0; int ret = 0;
const auto thresholdLagMillis = getThresholdLagMillis(); const auto thresholdLagMillis = getThresholdLagMillis();
// Successive lastCommitted and lastApplied wall clock time recordings are not guaranteed to be // Successive lastTargetTime and lastApplied wall clock time recordings are not guaranteed to be
// monotonically increasing. Recordings that satisfy the following check result in a negative // monotonically increasing. Recordings that satisfy the following check result in a negative
// value for lag, so ignore them. // value for lag, so ignore them.
const bool ignoreWallTimes = lastCommitted.wallTime > myLastApplied.wallTime; const bool ignoreWallTimes = lastTargetTime.wallTime > lastWriteTime.wallTime;
// _approximateOpsBetween will return -1 if the input timestamps are in the same "bucket". // _approximateOpsBetween will return -1 if the input timestamps are in the same "bucket".
// This is an indication that there are very few ops between the two timestamps. // This is an indication that there are very few ops between the two timestamps.
@ -395,9 +444,8 @@ int FlowControl::getNumTickets(Date_t now) {
// Don't let the no-op writer on idle systems fool the sophisticated "is the replica set // Don't let the no-op writer on idle systems fool the sophisticated "is the replica set
// lagged" classifier. // lagged" classifier.
const bool isHealthy = !ignoreWallTimes && const bool isHealthy = !ignoreWallTimes &&
(getLagMillis(myLastApplied.wallTime, lastCommitted.wallTime) < thresholdLagMillis || (getLagMillis(lastWriteTime.wallTime, lastTargetTime.wallTime) < thresholdLagMillis ||
_approximateOpsBetween(lastCommitted.opTime.getTimestamp(), _approximateOpsBetween(lastTargetTime.timestamp, lastWriteTime.timestamp) == -1);
myLastApplied.opTime.getTimestamp()) == -1);
if (isHealthy) { if (isHealthy) {
// The add/multiply technique is used to ensure ticket allocation can ramp up quickly, // The add/multiply technique is used to ensure ticket allocation can ramp up quickly,
@ -412,16 +460,16 @@ int FlowControl::getNumTickets(Date_t now) {
auto waitTime = curTimeMicros64() - _startWaitTime; auto waitTime = curTimeMicros64() - _startWaitTime;
_isLaggedTimeMicros.fetchAndAddRelaxed(waitTime); _isLaggedTimeMicros.fetchAndAddRelaxed(waitTime);
} }
} else if (!ignoreWallTimes && sustainerAdvanced(_prevMemberData, _currMemberData)) { } else if (!ignoreWallTimes && _timestampProvider->sustainerAdvanced()) {
// Expected case where flow control has meaningful data from the last period to make a new // Expected case where flow control has meaningful data from the last period to make a new
// calculation. // calculation.
ret = ret = _calculateNewTicketsForLag(
_calculateNewTicketsForLag(_prevMemberData, _timestampProvider->getPrevSustainerTimestamp(),
_currMemberData, _timestampProvider->getCurrSustainerTimestamp(),
locksUsedLastPeriod, locksUsedLastPeriod,
locksPerOp, locksPerOp,
getLagMillis(myLastApplied.wallTime, lastCommitted.wallTime), getLagMillis(lastWriteTime.wallTime, lastTargetTime.wallTime),
thresholdLagMillis); thresholdLagMillis);
if (!_isLagged.load()) { if (!_isLagged.load()) {
_isLagged.store(true); _isLagged.store(true);
_isLaggedCount.fetchAndAddRelaxed(1); _isLaggedCount.fetchAndAddRelaxed(1);
@ -443,9 +491,10 @@ int FlowControl::getNumTickets(Date_t now) {
DEBUG_LOG_LEVEL, DEBUG_LOG_LEVEL,
"FlowControl debug.", "FlowControl debug.",
"isLagged"_attr = (_isLagged.load() ? "true" : "false"), "isLagged"_attr = (_isLagged.load() ? "true" : "false"),
"currlagMillis"_attr = getLagMillis(myLastApplied.wallTime, lastCommitted.wallTime), "currlagMillis"_attr =
"opsLagged"_attr = _approximateOpsBetween(lastCommitted.opTime.getTimestamp(), getLagMillis(lastWriteTime.wallTime, lastTargetTime.wallTime),
myLastApplied.opTime.getTimestamp()), "opsLagged"_attr =
_approximateOpsBetween(lastTargetTime.timestamp, lastWriteTime.timestamp),
"granting"_attr = ret, "granting"_attr = ret,
"lastGranted"_attr = _lastTargetTicketsPermitted.load(), "lastGranted"_attr = _lastTargetTicketsPermitted.load(),
"lastSustainerApplied"_attr = _lastSustainerAppliedCount.load(), "lastSustainerApplied"_attr = _lastSustainerAppliedCount.load(),
@ -457,7 +506,7 @@ int FlowControl::getNumTickets(Date_t now) {
_lastTargetTicketsPermitted.store(ret); _lastTargetTicketsPermitted.store(ret);
_trimSamples( _trimSamples(
std::min(lastCommitted.opTime.getTimestamp(), getMedianAppliedTimestamp(_prevMemberData))); std::min(lastTargetTime.timestamp, _timestampProvider->getPrevSustainerTimestamp()));
return ret; return ret;
} }

View File

@ -34,6 +34,7 @@
#include "mongo/bson/timestamp.h" #include "mongo/bson/timestamp.h"
#include "mongo/db/operation_context.h" #include "mongo/db/operation_context.h"
#include "mongo/db/repl/member_data.h" #include "mongo/db/repl/member_data.h"
#include "mongo/db/repl/optime.h"
#include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/repl/replication_coordinator_fwd.h" #include "mongo/db/repl/replication_coordinator_fwd.h"
#include "mongo/db/service_context.h" #include "mongo/db/service_context.h"
@ -62,12 +63,66 @@ namespace mongo {
*/ */
class FlowControl { class FlowControl {
public: public:
class TimestampProvider {
public:
virtual ~TimestampProvider() = default;
/**
* The sustainer timestamp is the timestamp which, if moved forward, will cause an
* advance in the target timestamp. For replication, it is the median applied timestamp
* on all the relevant nodes. We need to know this timestamp both for the current iteration
* and the previous iteration.
*/
virtual Timestamp getCurrSustainerTimestamp() const = 0;
virtual Timestamp getPrevSustainerTimestamp() const = 0;
/**
* The target time is the time we are trying to throttle to. For replication, it is the
* last committed time (majority snapshot time).
*/
virtual repl::TimestampAndWallTime getTargetTimestampAndWallTime() const = 0;
/**
* The last write time is what we are trying to control. For replication, it is
* the last applied time.
*/
virtual repl::TimestampAndWallTime getLastWriteTimestampAndWallTime() const = 0;
/**
* Is flow control possible with this timestamp provider? For replication,
* true if this is a primary and majority read concern is enabled.
*/
virtual bool flowControlUsable() const = 0;
/**
* Are the previous and current updates compatible? For replication,
* makes sure number of nodes is the same and the median node timestamp (the sustainer)
* has not gone backwards.
*/
virtual bool sustainerAdvanced() const = 0;
/**
* Advance the `_*MemberData` fields and sort the new data by the element's last applied
* optime.
*/
virtual void update() = 0;
};
static constexpr int kMaxTickets = 1000 * 1000 * 1000; static constexpr int kMaxTickets = 1000 * 1000 * 1000;
/**
* Construct a flow control object based on a custom timestamp provider.
* Takes ownership of the timestamp provider.
*/
FlowControl(ServiceContext* service, std::unique_ptr<TimestampProvider> timestampProvider);
/**
* Construct a replication-based flow control object.
*/
FlowControl(ServiceContext* service, repl::ReplicationCoordinator* replCoord); FlowControl(ServiceContext* service, repl::ReplicationCoordinator* replCoord);
/** /**
* Construct a flow control object without adding a periodic job runner for testing. * Construct a replication-based flow control object without adding a periodic job runner for
* testing.
*/ */
FlowControl(repl::ReplicationCoordinator* replCoord); FlowControl(repl::ReplicationCoordinator* replCoord);
@ -122,13 +177,13 @@ public:
std::int64_t _approximateOpsBetween(Timestamp prevTs, Timestamp currTs); std::int64_t _approximateOpsBetween(Timestamp prevTs, Timestamp currTs);
void _updateTopologyData(); int _calculateNewTicketsForLag(const Timestamp& prevSustainerTimestamp,
int _calculateNewTicketsForLag(const std::vector<repl::MemberData>& prevMemberData, const Timestamp& currSustainerTimestamp,
const std::vector<repl::MemberData>& currMemberData,
std::int64_t locksUsedLastPeriod, std::int64_t locksUsedLastPeriod,
double locksPerOp, double locksPerOp,
std::uint64_t lagMillis, std::uint64_t lagMillis,
std::uint64_t thresholdLagMillis); std::uint64_t thresholdLagMillis);
void _trimSamples(Timestamp trimSamplesTo); void _trimSamples(Timestamp trimSamplesTo);
// Sample of (timestamp, ops, lock acquisitions) where ops and lock acquisitions are // Sample of (timestamp, ops, lock acquisitions) where ops and lock acquisitions are
@ -139,7 +194,7 @@ public:
} }
private: private:
repl::ReplicationCoordinator* _replCoord; std::unique_ptr<TimestampProvider> _timestampProvider;
// These values are updated with each flow control computation and are also surfaced in server // These values are updated with each flow control computation and are also surfaced in server
// status. // status.
@ -161,9 +216,6 @@ private:
std::int64_t _lastPollLockAcquisitions = 0; std::int64_t _lastPollLockAcquisitions = 0;
std::vector<repl::MemberData> _currMemberData;
std::vector<repl::MemberData> _prevMemberData;
Date_t _lastTimeSustainerAdvanced; Date_t _lastTimeSustainerAdvanced;
// This value is used for calculating server status metrics. // This value is used for calculating server status metrics.
@ -172,4 +224,27 @@ private:
PeriodicJobAnchor _jobAnchor; PeriodicJobAnchor _jobAnchor;
}; };
namespace flow_control_details {
class ReplicationTimestampProvider final : public FlowControl::TimestampProvider {
public:
explicit ReplicationTimestampProvider(repl::ReplicationCoordinator* replCoord);
Timestamp getCurrSustainerTimestamp() const final;
Timestamp getPrevSustainerTimestamp() const final;
repl::TimestampAndWallTime getTargetTimestampAndWallTime() const final;
repl::TimestampAndWallTime getLastWriteTimestampAndWallTime() const final;
bool flowControlUsable() const final;
bool sustainerAdvanced() const final;
void update() final;
void setCurrMemberData_forTest(const std::vector<repl::MemberData>& memberData);
void setPrevMemberData_forTest(const std::vector<repl::MemberData>& memberData);
private:
repl::ReplicationCoordinator* _replCoord;
std::vector<repl::MemberData> _currMemberData;
std::vector<repl::MemberData> _prevMemberData;
};
} // namespace flow_control_details
} // namespace mongo } // namespace mongo

View File

@ -241,6 +241,15 @@ TEST_F(FlowControlTest, CalculatingTickets) {
currMemberData.emplace_back(constructMemberData(Timestamp(2000))); currMemberData.emplace_back(constructMemberData(Timestamp(2000)));
currMemberData.emplace_back(constructMemberData(Timestamp(3000))); currMemberData.emplace_back(constructMemberData(Timestamp(3000)));
flow_control_details::ReplicationTimestampProvider timestampProvider(replCoordMock);
timestampProvider.setPrevMemberData_forTest(prevMemberData);
timestampProvider.setCurrMemberData_forTest(currMemberData);
auto prevSustainerTimestamp = timestampProvider.getPrevSustainerTimestamp();
auto currSustainerTimestamp = timestampProvider.getCurrSustainerTimestamp();
ASSERT_EQ(Timestamp(1000), prevSustainerTimestamp);
ASSERT_EQ(Timestamp(2000), currSustainerTimestamp);
// Construct samples where Timestamp X maps to operation number X. // Construct samples where Timestamp X maps to operation number X.
for (int ts = 1; ts <= 3000; ++ts) { for (int ts = 1; ts <= 3000; ++ts) {
flowControl->sample(Timestamp(ts), 1); flowControl->sample(Timestamp(ts), 1);
@ -251,8 +260,8 @@ TEST_F(FlowControlTest, CalculatingTickets) {
const std::uint64_t thresholdLag = 1; const std::uint64_t thresholdLag = 1;
const std::uint64_t currLag = thresholdLag; const std::uint64_t currLag = thresholdLag;
ASSERT_EQ(1900, ASSERT_EQ(1900,
flowControl->_calculateNewTicketsForLag(prevMemberData, flowControl->_calculateNewTicketsForLag(prevSustainerTimestamp,
currMemberData, currSustainerTimestamp,
locksUsedLastPeriod, locksUsedLastPeriod,
locksPerOp, locksPerOp,
currLag, currLag,

View File

@ -39,6 +39,7 @@
#include "mongo/db/op_observer/op_observer_util.h" #include "mongo/db/op_observer/op_observer_util.h"
#include "mongo/db/operation_context.h" #include "mongo/db/operation_context.h"
#include "mongo/db/repl/oplog_entry.h" #include "mongo/db/repl/oplog_entry.h"
#include "mongo/db/rss/replicated_storage_service.h"
#include "mongo/util/assert_util.h" #include "mongo/util/assert_util.h"
#include "mongo/util/decorable.h" #include "mongo/util/decorable.h"
#include "mongo/util/namespace_string_util.h" #include "mongo/util/namespace_string_util.h"
@ -123,7 +124,9 @@ void AuthOpObserver::onCreateCollection(
BSONObj o2; BSONObj o2;
if (createCollCatalogIdentifier.has_value() && if (createCollCatalogIdentifier.has_value() &&
shouldReplicateLocalCatalogIdentifers(VersionContext::getDecoration(opCtx))) { shouldReplicateLocalCatalogIdentifers(
rss::ReplicatedStorageService::get(opCtx).getPersistenceProvider(),
VersionContext::getDecoration(opCtx))) {
o2 = repl::MutableOplogEntry::makeCreateCollObject2( o2 = repl::MutableOplogEntry::makeCreateCollObject2(
createCollCatalogIdentifier->catalogId, createCollCatalogIdentifier->catalogId,
createCollCatalogIdentifier->ident, createCollCatalogIdentifier->ident,

View File

@ -1471,6 +1471,7 @@ mongo_cc_library(
"//src/mongo/db/repl:repl_server_parameters", "//src/mongo/db/repl:repl_server_parameters",
"//src/mongo/db/repl:replica_set_messages", "//src/mongo/db/repl:replica_set_messages",
"//src/mongo/db/repl/dbcheck", "//src/mongo/db/repl/dbcheck",
"//src/mongo/db/rss:replicated_storage_service",
"//src/mongo/db/s:sharding_catalog_manager", "//src/mongo/db/s:sharding_catalog_manager",
"//src/mongo/db/s:sharding_commands_d", "//src/mongo/db/s:sharding_commands_d",
"//src/mongo/db/s:transaction_coordinator", "//src/mongo/db/s:transaction_coordinator",

View File

@ -35,6 +35,7 @@
#include "mongo/db/index_builds/index_builds_coordinator.h" #include "mongo/db/index_builds/index_builds_coordinator.h"
#include "mongo/db/operation_context.h" #include "mongo/db/operation_context.h"
#include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/rss/replicated_storage_service.h"
#include "mongo/db/s/transaction_coordinator_service.h" #include "mongo/db/s/transaction_coordinator_service.h"
#include "mongo/logv2/log.h" #include "mongo/logv2/log.h"
#include "mongo/platform/compiler.h" #include "mongo/platform/compiler.h"
@ -71,7 +72,10 @@ Status stepDownForShutdown(OperationContext* opCtx,
// Specify a high freeze time, so that if there is a stall during shut down, the node // Specify a high freeze time, so that if there is a stall during shut down, the node
// does not run for election. // does not run for election.
replCoord->stepDown(opCtx, false /* force */, waitTime, Days(1)); auto& rss = rss::ReplicatedStorageService::get(opCtx);
if (rss.getPersistenceProvider().shouldStepDownForShutdown()) {
replCoord->stepDown(opCtx, false /* force */, waitTime, Days(1));
}
if (MONGO_unlikely(hangInShutdownAfterStepdown.shouldFail())) { if (MONGO_unlikely(hangInShutdownAfterStepdown.shouldFail())) {
LOGV2(4695100, "hangInShutdownAfterStepdown failpoint enabled"); LOGV2(4695100, "hangInShutdownAfterStepdown failpoint enabled");

View File

@ -1,31 +0,0 @@
load("//bazel:mongo_src_rules.bzl", "idl_generator", "mongo_cc_benchmark", "mongo_cc_library", "mongo_cc_unit_test")
package(default_visibility = ["//visibility:public"])
exports_files(
glob([
"*.h",
"*.cpp",
]),
)
idl_generator(
name = "server_parameters_gen",
src = "server_parameters.idl",
deps = [
"//src/mongo/db:basic_types_gen",
],
)
mongo_cc_library(
name = "server_parameters",
srcs = [
"server_parameters_gen",
],
hdrs = [
],
deps = [
"//src/mongo/db:server_base",
"//src/mongo/idl:idl_parser",
],
)

View File

@ -1,5 +0,0 @@
version: 1.0.0
filters:
- "*":
approvers:
- 10gen/server-disagg-storage

View File

@ -1,45 +0,0 @@
# Copyright (C) 2025-present MongoDB, Inc.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the Server Side Public License, version 1,
# as published by MongoDB, Inc.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Server Side Public License for more details.
#
# You should have received a copy of the Server Side Public License
# along with this program. If not, see
# <http://www.mongodb.com/licensing/server-side-public-license>.
#
# As a special exception, the copyright holders give permission to link the
# code of portions of this program with the OpenSSL library under certain
# conditions as described in each individual source file and distribute
# linked combinations including the program with the OpenSSL library. You
# must comply with the Server Side Public License in all respects for
# all of the code used other than as permitted herein. If you modify file(s)
# with this exception, you may extend this exception to your version of the
# file(s), but you are not obligated to do so. If you do not wish to do so,
# delete this exception statement from your version. If you delete this
# exception statement from all source files in the program, then also delete
# it in the license file.
#
# server setParameters for disaggregated storage
global:
cpp_namespace: "mongo::disagg"
imports:
- "mongo/db/basic_types.idl"
server_parameters:
disaggregatedStorageEnabled:
description: >-
Set this to run the server as a compute node in a disaggregated storage cluster.
set_at: startup
cpp_vartype: bool
cpp_varname: gDisaggregatedStorageEnabled
default: false
redact: false

View File

@ -158,7 +158,7 @@ private:
X(abortAllTransactions) \ X(abortAllTransactions) \
X(joinLogicalSessionCache) \ X(joinLogicalSessionCache) \
X(shutDownCursorManager) \ X(shutDownCursorManager) \
X(shutDownSLSStateMachine) \ X(shutDownStateRequiredForStorageAccess) \
/* For magic restore: */ \ /* For magic restore: */ \
X(magicRestoreToolTotal) \ X(magicRestoreToolTotal) \
X(readMagicRestoreConfig) \ X(readMagicRestoreConfig) \

View File

@ -33,20 +33,31 @@
#include "mongo/db/ftdc/collector.h" #include "mongo/db/ftdc/collector.h"
#include "mongo/db/ftdc/controller.h" #include "mongo/db/ftdc/controller.h"
#include "mongo/db/ftdc/ftdc_system_stats.h" #include "mongo/db/ftdc/ftdc_system_stats.h"
#include "mongo/logv2/log.h"
#include "mongo/util/errno_util.h" #include "mongo/util/errno_util.h"
#include "mongo/util/functional.h" #include "mongo/util/functional.h"
#include "mongo/util/processinfo.h" #include "mongo/util/processinfo.h"
#include "mongo/util/procparser.h" #include "mongo/util/procparser.h"
#include <cstdint> #include <cstdint>
#include <iostream>
#include <memory> #include <memory>
#include <set>
#include <string> #include <string>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include <ifaddrs.h>
#include <linux/ethtool.h>
#include <linux/if.h>
#include <linux/sockios.h>
#include <sys/ioctl.h>
#include <sys/prctl.h> #include <sys/prctl.h>
#include <sys/resource.h> #include <sys/resource.h>
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kFTDC
namespace mongo { namespace mongo {
namespace { namespace {
@ -102,6 +113,188 @@ static const std::map<StringData, std::set<StringData>> kSockstatKeys{
{"TCP"_sd, {"inuse"_sd, "orphan"_sd, "tw"_sd, "alloc"_sd}}, {"TCP"_sd, {"inuse"_sd, "orphan"_sd, "tw"_sd, "alloc"_sd}},
}; };
/**
* Class to gather NIC stats by emulating ethtool -S functionality by using the ioctl SIOCETHTOOL.
*/
class EthTool {
public:
static std::unique_ptr<EthTool> create(StringData interface) {
int fd = socket(AF_INET, SOCK_DGRAM, 0);
if (fd == -1) {
auto ec = lastPosixError();
LOGV2_WARNING(
10985539, "Ethtool socket allocation failed", "error"_attr = errorMessage(ec));
return nullptr;
}
auto ethtool = std::unique_ptr<EthTool>(new EthTool(interface, fd));
auto drvinfo = ethtool->get_info();
// Some Linux interfaces cannot be found by ethtool IOCTL.
// Some Linux interfaces have no stats (i.e. the "bridge" driver used by containers).
if (!drvinfo.has_value() || drvinfo->n_stats == 0) {
LOGV2_WARNING(10985540,
"Skipping Ethtool stats collection for interface",
"interface"_attr = interface);
return nullptr;
}
return ethtool;
}
~EthTool() {
free(_gstrings);
close(_fd);
}
// Get a list of all non-loopback interfaces for the machine
static std::vector<std::string> interface_names() {
struct ifaddrs* ifaddr;
if (getifaddrs(&ifaddr) == -1) {
auto ec = lastPosixError();
uasserted(10985538, fmt::format("getifaddrs failed: {}", errorMessage(ec)));
}
ON_BLOCK_EXIT([&] { freeifaddrs(ifaddr); });
std::set<std::string> names;
for (ifaddrs* ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
if (ifa->ifa_addr == NULL) {
continue;
}
if ((ifa->ifa_flags & IFF_LOOPBACK) == IFF_LOOPBACK) {
continue;
}
names.insert(ifa->ifa_name);
}
std::vector<std::string> vec;
std::copy(names.begin(), names.end(), std::back_inserter(vec));
return vec;
}
// Get a list of stats names for a given interface
std::vector<StringData>& get_strings() {
if (!_names.has_value()) {
auto drvinfo = get_info();
_get_strings(drvinfo->n_stats);
}
return _names.get();
}
// Get a list of stats for a given interface
std::vector<uint64_t> get_stats() {
if (!_names.has_value()) {
return std::vector<uint64_t>();
}
return _get_stats(_names->size());
}
// Get a some basic information about the interface
boost::optional<ethtool_drvinfo> get_info() {
ethtool_drvinfo drvinfo;
memset(&drvinfo, 0, sizeof(drvinfo));
drvinfo.cmd = ETHTOOL_GDRVINFO;
if (_ioctlNoThrow("drvinfo", &drvinfo)) {
return boost::none;
}
return boost::optional<ethtool_drvinfo>(drvinfo);
}
// Name of the interface this class monitors
StringData name() const {
return _interface;
}
private:
explicit EthTool(StringData interface, int fd) : _fd(fd), _interface(std::string(interface)) {}
void _get_strings(size_t count) {
_gstrings = static_cast<ethtool_gstrings*>(
calloc(1, sizeof(ethtool_gstrings) + count * ETH_GSTRING_LEN));
_gstrings->cmd = ETHTOOL_GSTRINGS;
_gstrings->string_set = ETH_SS_STATS;
_gstrings->len = count;
_names.emplace(std::vector<StringData>());
if (_ioctlNoThrow("get_strings", _gstrings)) {
return;
}
char* ptr = reinterpret_cast<char*>(_gstrings) + sizeof(ethtool_gstrings);
for (size_t i = 0; i < count; i++) {
auto s = StringData(ptr);
_names->push_back(s);
ptr += ETH_GSTRING_LEN;
}
}
std::vector<uint64_t> _get_stats(size_t count) {
std::vector<char> stats_buf(sizeof(ethtool_stats) + count * 8,
0); /* 8 is the number specfied in ethtool.h */
ethtool_stats* stats = reinterpret_cast<ethtool_stats*>(stats_buf.data());
stats->cmd = ETHTOOL_GSTATS;
stats->n_stats = count;
if (_ioctlNoThrow("get_stats", stats)) {
return std::vector<uint64_t>();
}
char* ptr = reinterpret_cast<char*>(stats) + sizeof(ethtool_stats);
std::vector<uint64_t> stats_vec(ptr, ptr + count * 8);
return stats_vec;
}
// Returns non-zero on error
int _ioctlNoThrow(StringData name, void* cmd) {
ifreq ifr;
strcpy(ifr.ifr_name, _interface.c_str());
ifr.ifr_data = cmd;
auto ret = ioctl(_fd, SIOCETHTOOL, &ifr);
if (MONGO_unlikely(ret) && !_warningLogged) {
auto ec = lastPosixError();
_warningLogged = true;
LOGV2_WARNING(10985553,
"Failed to get strings for ethtool",
"interface"_attr = _interface,
"name"_attr = name,
"error"_attr = errorMessage(ec));
}
return ret;
}
private:
int _fd;
ethtool_gstrings* _gstrings{nullptr};
boost::optional<std::vector<StringData>> _names;
std::string _interface;
bool _warningLogged{false};
};
/** /**
* Collect metrics from the Linux /proc file system. * Collect metrics from the Linux /proc file system.
*/ */
@ -111,6 +304,16 @@ public:
for (const auto& disk : _disks) { for (const auto& disk : _disks) {
_disksStringData.emplace_back(disk); _disksStringData.emplace_back(disk);
} }
auto interfaces = EthTool::interface_names();
_ethtools.reserve(interfaces.size());
for (const auto& ifn : interfaces) {
auto nic = EthTool::create(ifn);
if (nic) {
_ethtools.push_back(std::move(nic));
}
}
} }
void collect(OperationContext* opCtx, BSONObjBuilder& builder) override { void collect(OperationContext* opCtx, BSONObjBuilder& builder) override {
@ -219,6 +422,29 @@ public:
&subObjBuilder); &subObjBuilder);
subObjBuilder.doneFast(); subObjBuilder.doneFast();
} }
{
BSONObjBuilder subObjBuilder(builder.subobjStart("ethtool"_sd));
for (auto& tool : _ethtools) {
BSONObjBuilder subNICBuilder(subObjBuilder.subobjStart(tool->name()));
auto names = tool->get_strings();
if (names.empty()) {
continue;
}
auto stats = tool->get_stats();
if (stats.empty()) {
continue;
}
invariant(stats.size() >= names.size());
for (size_t i = 0; i < names.size(); i++) {
subNICBuilder.append(names[i], static_cast<long long>(stats[i]));
}
}
}
} }
private: private:
@ -227,6 +453,8 @@ private:
// List of physical disks to collect stats from as StringData to pass to parseProcDiskStatsFile. // List of physical disks to collect stats from as StringData to pass to parseProcDiskStatsFile.
std::vector<StringData> _disksStringData; std::vector<StringData> _disksStringData;
std::vector<std::unique_ptr<EthTool>> _ethtools;
}; };
class SimpleFunctionCollector final : public FTDCCollectorInterface { class SimpleFunctionCollector final : public FTDCCollectorInterface {

View File

@ -106,6 +106,7 @@ mongo_cc_library(
"//src/mongo/db/repl:oplog_visibility_manager", "//src/mongo/db/repl:oplog_visibility_manager",
"//src/mongo/db/repl:optime", "//src/mongo/db/repl:optime",
"//src/mongo/db/repl:repl_coordinator_interface", # TODO(SERVER-93876): Remove. "//src/mongo/db/repl:repl_coordinator_interface", # TODO(SERVER-93876): Remove.
"//src/mongo/db/rss:replicated_storage_service",
"//src/mongo/db/storage:oplog_truncate_markers", "//src/mongo/db/storage:oplog_truncate_markers",
"//src/mongo/db/storage:record_store_base", "//src/mongo/db/storage:record_store_base",
], ],
@ -124,6 +125,7 @@ mongo_cc_library(
":durable_catalog_entry_metadata", ":durable_catalog_entry_metadata",
"//src/mongo/db:server_base", "//src/mongo/db:server_base",
"//src/mongo/db/op_observer:op_observer_util", "//src/mongo/db/op_observer:op_observer_util",
"//src/mongo/db/rss:replicated_storage_service",
"//src/mongo/db/storage:feature_document_util", "//src/mongo/db/storage:feature_document_util",
"//src/mongo/db/storage:ident", "//src/mongo/db/storage:ident",
"//src/mongo/db/storage:mdb_catalog", "//src/mongo/db/storage:mdb_catalog",
@ -646,7 +648,6 @@ mongo_cc_library(
"//src/mongo/db:vector_clock", "//src/mongo/db:vector_clock",
"//src/mongo/db/collection_crud", "//src/mongo/db/collection_crud",
"//src/mongo/db/commands:server_status_core", "//src/mongo/db/commands:server_status_core",
"//src/mongo/db/disagg_storage:server_parameters",
"//src/mongo/db/index:index_access_method", "//src/mongo/db/index:index_access_method",
"//src/mongo/db/index:preallocated_container_pool", "//src/mongo/db/index:preallocated_container_pool",
"//src/mongo/db/matcher/doc_validation", "//src/mongo/db/matcher/doc_validation",
@ -656,6 +657,7 @@ mongo_cc_library(
"//src/mongo/db/repl:oplog", "//src/mongo/db/repl:oplog",
"//src/mongo/db/repl:repl_server_parameters", "//src/mongo/db/repl:repl_server_parameters",
"//src/mongo/db/repl:repl_settings", "//src/mongo/db/repl:repl_settings",
"//src/mongo/db/rss:replicated_storage_service",
"//src/mongo/db/stats:top", "//src/mongo/db/stats:top",
"//src/mongo/db/storage:mdb_catalog", "//src/mongo/db/storage:mdb_catalog",
"//src/mongo/db/storage:record_store_base", "//src/mongo/db/storage:record_store_base",

View File

@ -176,6 +176,8 @@ struct CollectionOptions {
boost::optional<EncryptedFieldConfig> encryptedFieldConfig; boost::optional<EncryptedFieldConfig> encryptedFieldConfig;
// When 'true', will use the same recordIds across all nodes in the replica set. // When 'true', will use the same recordIds across all nodes in the replica set.
// When using disaggregated storage, will be enabled implicitly when the collection
// is created.
bool recordIdsReplicated = false; bool recordIdsReplicated = false;
}; };

View File

@ -35,7 +35,6 @@
#include "mongo/bson/bsonobjbuilder.h" #include "mongo/bson/bsonobjbuilder.h"
#include "mongo/db/audit.h" #include "mongo/db/audit.h"
#include "mongo/db/basic_types_gen.h" #include "mongo/db/basic_types_gen.h"
#include "mongo/db/disagg_storage/server_parameters_gen.h"
#include "mongo/db/index_builds/index_build_block.h" #include "mongo/db/index_builds/index_build_block.h"
#include "mongo/db/index_builds/index_builds_common.h" #include "mongo/db/index_builds/index_builds_common.h"
#include "mongo/db/local_catalog/catalog_raii.h" #include "mongo/db/local_catalog/catalog_raii.h"
@ -65,6 +64,7 @@
#include "mongo/db/record_id.h" #include "mongo/db/record_id.h"
#include "mongo/db/repl/oplog.h" #include "mongo/db/repl/oplog.h"
#include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/rss/replicated_storage_service.h"
#include "mongo/db/server_feature_flags_gen.h" #include "mongo/db/server_feature_flags_gen.h"
#include "mongo/db/server_options.h" #include "mongo/db/server_options.h"
#include "mongo/db/server_parameter.h" #include "mongo/db/server_parameter.h"
@ -158,10 +158,9 @@ RecordId acquireCatalogId(
OperationContext* opCtx, OperationContext* opCtx,
const boost::optional<CreateCollCatalogIdentifier>& createCollCatalogIdentifier, const boost::optional<CreateCollCatalogIdentifier>& createCollCatalogIdentifier,
MDBCatalog* mdbCatalog) { MDBCatalog* mdbCatalog) {
if (disagg::gDisaggregatedStorageEnabled && createCollCatalogIdentifier.has_value()) { auto& rss = rss::ReplicatedStorageService::get(opCtx);
// Replicated catalogIds aren't compatible with standard architecture, as a node may create if (rss.getPersistenceProvider().shouldUseReplicatedCatalogIdentifiers() &&
// local collection whose catalogId collides with that of a replicated collection created on createCollCatalogIdentifier.has_value()) {
// another node.
return createCollCatalogIdentifier->catalogId; return createCollCatalogIdentifier->catalogId;
} }
return mdbCatalog->reserveCatalogId(opCtx); return mdbCatalog->reserveCatalogId(opCtx);
@ -770,8 +769,10 @@ Collection* DatabaseImpl::_createCollection(
// Additionally, we do not set the recordIdsReplicated:true option on timeseries and // Additionally, we do not set the recordIdsReplicated:true option on timeseries and
// clustered collections because in those cases the recordId is the _id, or on capped // clustered collections because in those cases the recordId is the _id, or on capped
// collections which utilizes a separate mechanism for ensuring uniform recordIds. // collections which utilizes a separate mechanism for ensuring uniform recordIds.
if (generatedUUID && !nss.isOnInternalDb() && !optionsWithUUID.timeseries && const bool collectionTypeSupportsReplicatedRecordIds =
!optionsWithUUID.clusteredIndex && !optionsWithUUID.capped && !optionsWithUUID.timeseries && !optionsWithUUID.clusteredIndex && !optionsWithUUID.capped;
const auto& provider = rss::ReplicatedStorageService::get(opCtx).getPersistenceProvider();
if (generatedUUID && !nss.isOnInternalDb() && collectionTypeSupportsReplicatedRecordIds &&
gFeatureFlagRecordIdsReplicated.isEnabledUseLastLTSFCVWhenUninitialized( gFeatureFlagRecordIdsReplicated.isEnabledUseLastLTSFCVWhenUninitialized(
VersionContext::getDecoration(opCtx), VersionContext::getDecoration(opCtx),
serverGlobalParams.featureCompatibility.acquireFCVSnapshot()) && serverGlobalParams.featureCompatibility.acquireFCVSnapshot()) &&
@ -781,6 +782,19 @@ Collection* DatabaseImpl::_createCollection(
"Collection will use recordIdsReplicated:true.", "Collection will use recordIdsReplicated:true.",
"oldValue"_attr = optionsWithUUID.recordIdsReplicated); "oldValue"_attr = optionsWithUUID.recordIdsReplicated);
optionsWithUUID.recordIdsReplicated = true; optionsWithUUID.recordIdsReplicated = true;
} else if (provider.shouldUseReplicatedRecordIds() && nss.isReplicated() &&
!nss.isImplicitlyReplicated() && collectionTypeSupportsReplicatedRecordIds) {
tassert(10985561,
str::stream() << "Replicated record IDs must be enabled with " << provider.name(),
gFeatureFlagRecordIdsReplicated.isEnabledUseLatestFCVWhenUninitialized(
VersionContext::getDecoration(opCtx),
serverGlobalParams.featureCompatibility.acquireFCVSnapshot()));
LOGV2_DEBUG(10985560,
2,
"Collection will use recordIdsReplicated:true",
"provider"_attr = provider.name(),
"oldValue"_attr = optionsWithUUID.recordIdsReplicated);
optionsWithUUID.recordIdsReplicated = true;
} }
uassert(ErrorCodes::CommandNotSupported, uassert(ErrorCodes::CommandNotSupported,

View File

@ -39,6 +39,7 @@
#include "mongo/db/local_catalog/shard_role_api/transaction_resources.h" #include "mongo/db/local_catalog/shard_role_api/transaction_resources.h"
#include "mongo/db/op_observer/op_observer_util.h" #include "mongo/db/op_observer/op_observer_util.h"
#include "mongo/db/operation_context.h" #include "mongo/db/operation_context.h"
#include "mongo/db/rss/replicated_storage_service.h"
#include "mongo/db/storage/feature_document_util.h" #include "mongo/db/storage/feature_document_util.h"
#include "mongo/db/storage/kv/kv_engine.h" #include "mongo/db/storage/kv/kv_engine.h"
#include "mongo/db/storage/mdb_catalog.h" #include "mongo/db/storage/mdb_catalog.h"
@ -331,10 +332,13 @@ Status createIndex(OperationContext* opCtx,
auto& ru = *shard_role_details::getRecoveryUnit(opCtx); auto& ru = *shard_role_details::getRecoveryUnit(opCtx);
auto storageEngine = opCtx->getServiceContext()->getStorageEngine(); auto storageEngine = opCtx->getServiceContext()->getStorageEngine();
auto kvEngine = storageEngine->getEngine(); auto kvEngine = storageEngine->getEngine();
auto& provider = rss::ReplicatedStorageService::get(opCtx).getPersistenceProvider();
invariant(collectionOptions.uuid); invariant(collectionOptions.uuid);
bool replicateLocalCatalogIdentifiers = bool replicateLocalCatalogIdentifiers = shouldReplicateLocalCatalogIdentifers(
shouldReplicateLocalCatalogIdentifers(VersionContext::getDecoration(opCtx)); rss::ReplicatedStorageService::get(opCtx).getPersistenceProvider(),
VersionContext::getDecoration(opCtx));
if (replicateLocalCatalogIdentifiers) { if (replicateLocalCatalogIdentifiers) {
// If a previous attempt at creating this index was rolled back, the ident may still be drop // If a previous attempt at creating this index was rolled back, the ident may still be drop
// pending. Complete that drop before creating the index if so. // pending. Complete that drop before creating the index if so.
@ -349,6 +353,7 @@ Status createIndex(OperationContext* opCtx,
} }
Status status = kvEngine->createSortedDataInterface( Status status = kvEngine->createSortedDataInterface(
provider,
ru, ru,
nss, nss,
*collectionOptions.uuid, *collectionOptions.uuid,
@ -466,8 +471,10 @@ Status dropAndRecreateIndexIdentForResume(OperationContext* opCtx,
return status; return status;
invariant(collectionOptions.uuid); invariant(collectionOptions.uuid);
auto& provider = rss::ReplicatedStorageService::get(opCtx).getPersistenceProvider();
status = status =
engine->createSortedDataInterface(*shard_role_details::getRecoveryUnit(opCtx), engine->createSortedDataInterface(provider,
*shard_role_details::getRecoveryUnit(opCtx),
nss, nss,
*collectionOptions.uuid, *collectionOptions.uuid,
ident, ident,

View File

@ -39,6 +39,7 @@
#include "mongo/db/repl/oplog.h" #include "mongo/db/repl/oplog.h"
#include "mongo/db/repl/optime.h" #include "mongo/db/repl/optime.h"
#include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/rss/replicated_storage_service.h"
#include "mongo/db/storage/record_store.h" #include "mongo/db/storage/record_store.h"
#include "mongo/db/storage/recovery_unit.h" #include "mongo/db/storage/recovery_unit.h"
#include "mongo/db/storage/storage_options.h" #include "mongo/db/storage/storage_options.h"
@ -146,6 +147,10 @@ std::vector<OplogSlot> LocalOplogInfo::getNextOpTimes(OperationContext* opCtx, s
Timestamp ts; Timestamp ts;
// Provide a sample to FlowControl after the `oplogInfo.newOpMutex` is released. // Provide a sample to FlowControl after the `oplogInfo.newOpMutex` is released.
ON_BLOCK_EXIT([opCtx, &ts, count] { ON_BLOCK_EXIT([opCtx, &ts, count] {
auto& rss = rss::ReplicatedStorageService::get(opCtx);
if (!rss.getPersistenceProvider().shouldUseOplogWritesForFlowControlSampling())
return;
auto flowControl = FlowControl::get(opCtx); auto flowControl = FlowControl::get(opCtx);
if (flowControl) { if (flowControl) {
flowControl->sample(ts, count); flowControl->sample(ts, count);

View File

@ -154,17 +154,14 @@
#include "mongo/db/repl/repl_settings.h" #include "mongo/db/repl/repl_settings.h"
#include "mongo/db/repl/replication_consistency_markers_impl.h" #include "mongo/db/repl/replication_consistency_markers_impl.h"
#include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/repl/replication_coordinator_external_state_impl.h"
#include "mongo/db/repl/replication_coordinator_impl.h"
#include "mongo/db/repl/replication_coordinator_impl_gen.h" #include "mongo/db/repl/replication_coordinator_impl_gen.h"
#include "mongo/db/repl/replication_process.h" #include "mongo/db/repl/replication_process.h"
#include "mongo/db/repl/replication_recovery.h" #include "mongo/db/repl/replication_recovery.h"
#include "mongo/db/repl/storage_interface.h"
#include "mongo/db/repl/storage_interface_impl.h" #include "mongo/db/repl/storage_interface_impl.h"
#include "mongo/db/repl/topology_coordinator.h"
#include "mongo/db/repl/wait_for_majority_service.h" #include "mongo/db/repl/wait_for_majority_service.h"
#include "mongo/db/replication_state_transition_lock_guard.h" #include "mongo/db/replication_state_transition_lock_guard.h"
#include "mongo/db/request_execution_context.h" #include "mongo/db/request_execution_context.h"
#include "mongo/db/rss/replicated_storage_service.h"
#include "mongo/db/s/migration_blocking_operation/multi_update_coordinator.h" #include "mongo/db/s/migration_blocking_operation/multi_update_coordinator.h"
#include "mongo/db/s/migration_chunk_cloner_source_op_observer.h" #include "mongo/db/s/migration_chunk_cloner_source_op_observer.h"
#include "mongo/db/s/query_analysis_op_observer_configsvr.h" #include "mongo/db/s/query_analysis_op_observer_configsvr.h"
@ -606,9 +603,9 @@ ExitCode _initAndListen(ServiceContext* serviceContext) {
ec != ExitCode::clean) ec != ExitCode::clean)
return ec; return ec;
FlowControl::set(serviceContext, auto& rss = rss::ReplicatedStorageService::get(serviceContext);
std::make_unique<FlowControl>( auto& serviceLifecycle = rss.getServiceLifecycle();
serviceContext, repl::ReplicationCoordinator::get(serviceContext))); serviceLifecycle.initializeFlowControl(serviceContext);
// If a crash occurred during file-copy based initial sync, we may need to finish or clean up. // If a crash occurred during file-copy based initial sync, we may need to finish or clean up.
{ {
@ -620,8 +617,20 @@ ExitCode _initAndListen(ServiceContext* serviceContext) {
admission::initializeExecutionControl(serviceContext); admission::initializeExecutionControl(serviceContext);
auto lastShutdownState = catalog::startUpStorageEngineAndCollectionCatalog( serviceLifecycle.initializeStorageEngineExtensions(serviceContext);
serviceContext, &cc(), StorageEngineInitFlags{}, &startupTimeElapsedBuilder);
auto lastShutdownState = [&]() {
if (rss.getPersistenceProvider().shouldDelayDataAccessDuringStartup()) {
// If data isn't ready yet, we shouldn't try to read it.
auto initializeStorageEngineOpCtx = serviceContext->makeOperationContext(&cc());
return catalog::startUpStorageEngine(initializeStorageEngineOpCtx.get(),
StorageEngineInitFlags{},
&startupTimeElapsedBuilder);
} else {
return catalog::startUpStorageEngineAndCollectionCatalog(
serviceContext, &cc(), StorageEngineInitFlags{}, &startupTimeElapsedBuilder);
}
}();
StorageControl::startStorageControls(serviceContext); StorageControl::startStorageControls(serviceContext);
auto logStartupStats = std::make_unique<ScopeGuard<std::function<void()>>>([&] { auto logStartupStats = std::make_unique<ScopeGuard<std::function<void()>>>([&] {
@ -898,7 +907,8 @@ ExitCode _initAndListen(ServiceContext* serviceContext) {
&startupTimeElapsedBuilder); &startupTimeElapsedBuilder);
replCoord->startup(startupOpCtx.get(), lastShutdownState); replCoord->startup(startupOpCtx.get(), lastShutdownState);
} else { } else {
if (storageEngine->supportsCappedCollections()) { if (rss.getPersistenceProvider().supportsLocalCollections() &&
storageEngine->supportsCappedCollections()) {
logStartup(startupOpCtx.get()); logStartup(startupOpCtx.get());
} }
@ -1368,30 +1378,16 @@ auto makeReplicaSetNodeExecutor(ServiceContext* serviceContext) {
"ReplNodeDbWorkerNetwork", nullptr, makeShardingEgressHooksList(serviceContext))); "ReplNodeDbWorkerNetwork", nullptr, makeShardingEgressHooksList(serviceContext)));
} }
auto makeReplicationExecutor(ServiceContext* serviceContext) {
ThreadPool::Options tpOptions;
tpOptions.threadNamePrefix = "ReplCoord-";
tpOptions.poolName = "ReplCoordThreadPool";
tpOptions.maxThreads = 50;
tpOptions.onCreateThread = [serviceContext](const std::string& threadName) {
Client::initThread(threadName,
serviceContext->getService(ClusterRole::ShardServer),
Client::noSession(),
ClientOperationKillableByStepdown{false});
};
auto hookList = std::make_unique<rpc::EgressMetadataHookList>();
hookList->addHook(std::make_unique<rpc::VectorClockMetadataHook>(serviceContext));
return executor::ThreadPoolTaskExecutor::create(
std::make_unique<ThreadPool>(tpOptions),
executor::makeNetworkInterface("ReplNetwork", nullptr, std::move(hookList)));
}
void setUpReplicaSetDDLHooks(ServiceContext* serviceContext) { void setUpReplicaSetDDLHooks(ServiceContext* serviceContext) {
ReplicaSetDDLTracker::create(serviceContext); ReplicaSetDDLTracker::create(serviceContext);
DirectConnectionDDLHook::create(serviceContext); DirectConnectionDDLHook::create(serviceContext);
} }
void setUpReplication(ServiceContext* serviceContext) { void setUpReplication(ServiceContext* serviceContext) {
auto& serviceLifecycle =
rss::ReplicatedStorageService::get(serviceContext).getServiceLifecycle();
serviceLifecycle.initializeStateRequiredForStorageAccess(serviceContext);
repl::StorageInterface::set(serviceContext, std::make_unique<repl::StorageInterfaceImpl>()); repl::StorageInterface::set(serviceContext, std::make_unique<repl::StorageInterfaceImpl>());
auto storageInterface = repl::StorageInterface::get(serviceContext); auto storageInterface = repl::StorageInterface::get(serviceContext);
@ -1403,22 +1399,10 @@ void setUpReplication(ServiceContext* serviceContext) {
serviceContext, serviceContext,
std::make_unique<repl::ReplicationProcess>( std::make_unique<repl::ReplicationProcess>(
storageInterface, std::move(consistencyMarkers), std::move(recovery))); storageInterface, std::move(consistencyMarkers), std::move(recovery)));
auto replicationProcess = repl::ReplicationProcess::get(serviceContext);
repl::TopologyCoordinator::Options topoCoordOptions; std::unique_ptr<repl::ReplicationCoordinator> replCoord =
topoCoordOptions.maxSyncSourceLagSecs = Seconds(repl::maxSyncSourceLagSecs); serviceLifecycle.initializeReplicationCoordinator(serviceContext);
topoCoordOptions.clusterRole = serverGlobalParams.clusterRole;
auto replCoord = std::make_unique<repl::ReplicationCoordinatorImpl>(
serviceContext,
getGlobalReplSettings(),
std::make_unique<repl::ReplicationCoordinatorExternalStateImpl>(
serviceContext, storageInterface, replicationProcess),
makeReplicationExecutor(serviceContext),
std::make_unique<repl::TopologyCoordinator>(topoCoordOptions),
replicationProcess,
storageInterface,
SecureRandom().nextInt64());
// Only create a ReplicaSetNodeExecutor if sharding is disabled and replication is enabled. // Only create a ReplicaSetNodeExecutor if sharding is disabled and replication is enabled.
// Note that sharding sets up its own executors for scheduling work to remote nodes. // Note that sharding sets up its own executors for scheduling work to remote nodes.
if (serverGlobalParams.clusterRole.has(ClusterRole::None) && if (serverGlobalParams.clusterRole.has(ClusterRole::None) &&
@ -1840,8 +1824,12 @@ void shutdownTask(const ShutdownTaskArgs& shutdownArgs) {
SectionScopedTimer scopedTimer(serviceContext->getFastClockSource(), SectionScopedTimer scopedTimer(serviceContext->getFastClockSource(),
TimedSectionId::killAllOperations, TimedSectionId::killAllOperations,
&shutdownTimeElapsedBuilder); &shutdownTimeElapsedBuilder);
serviceContext->setKillAllOperations( auto& serviceLifecycle =
[](const StringData t) { return t == kFTDCThreadName; }); rss::ReplicatedStorageService::get(serviceContext).getServiceLifecycle();
serviceContext->setKillAllOperations([&serviceLifecycle](const StringData t) {
return t == kFTDCThreadName ||
serviceLifecycle.shouldKeepThreadAliveUntilStorageEngineHasShutDown(t);
});
if (MONGO_unlikely(pauseWhileKillingOperationsAtShutdown.shouldFail())) { if (MONGO_unlikely(pauseWhileKillingOperationsAtShutdown.shouldFail())) {
LOGV2_OPTIONS(4701700, LOGV2_OPTIONS(4701700,
@ -1987,6 +1975,13 @@ void shutdownTask(const ShutdownTaskArgs& shutdownArgs) {
true /* memLeakAllowed */); true /* memLeakAllowed */);
} }
// Depending on the underlying implementation, there may be some state that needs to be shut
// down after the replication subsystem and the storage engine.
auto& serviceLifecycle =
rss::ReplicatedStorageService::get(serviceContext).getServiceLifecycle();
serviceLifecycle.shutdownStateRequiredForStorageAccess(serviceContext,
&shutdownTimeElapsedBuilder);
// We drop the scope cache because leak sanitizer can't see across the // We drop the scope cache because leak sanitizer can't see across the
// thread we use for proxying MozJS requests. Dropping the cache cleans up // thread we use for proxying MozJS requests. Dropping the cache cleans up
// the memory and makes leak sanitizer happy. // the memory and makes leak sanitizer happy.

View File

@ -27,7 +27,6 @@
* it in the license file. * it in the license file.
*/ */
#include "mongo/db/op_msg_fuzzer_router_fixture.h" #include "mongo/db/op_msg_fuzzer_router_fixture.h"
#include "mongo/base/string_data.h" #include "mongo/base/string_data.h"

View File

@ -141,8 +141,8 @@ mongo_cc_library(
"//src/mongo:base", "//src/mongo:base",
"//src/mongo/bson/dotted_path:dotted_path_support", # TODO(SERVER-93876): Remove. "//src/mongo/bson/dotted_path:dotted_path_support", # TODO(SERVER-93876): Remove.
"//src/mongo/db:shard_role_api", "//src/mongo/db:shard_role_api",
"//src/mongo/db/disagg_storage:server_parameters",
"//src/mongo/db/local_catalog:collection_options", "//src/mongo/db/local_catalog:collection_options",
"//src/mongo/db/rss:replicated_storage_service",
], ],
) )

View File

@ -66,6 +66,7 @@
#include "mongo/db/repl/oplog_entry_gen.h" #include "mongo/db/repl/oplog_entry_gen.h"
#include "mongo/db/repl/read_concern_args.h" #include "mongo/db/repl/read_concern_args.h"
#include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/rss/replicated_storage_service.h"
#include "mongo/db/server_options.h" #include "mongo/db/server_options.h"
#include "mongo/db/session/logical_session_id_helpers.h" #include "mongo/db/session/logical_session_id_helpers.h"
#include "mongo/db/session/session_txn_record_gen.h" #include "mongo/db/session/session_txn_record_gen.h"
@ -330,8 +331,9 @@ void OpObserverImpl::onCreateIndex(OperationContext* opCtx,
return; return;
} }
bool replicateLocalCatalogIdentifiers = bool replicateLocalCatalogIdentifiers = shouldReplicateLocalCatalogIdentifers(
shouldReplicateLocalCatalogIdentifers(VersionContext::getDecoration(opCtx)); rss::ReplicatedStorageService::get(opCtx).getPersistenceProvider(),
VersionContext::getDecoration(opCtx));
BSONObjBuilder builder; BSONObjBuilder builder;
// Note that despite using this constant, we are not building a CreateIndexCommand here // Note that despite using this constant, we are not building a CreateIndexCommand here
@ -417,7 +419,9 @@ void OpObserverImpl::onStartIndexBuild(OperationContext* opCtx,
oplogEntry.setNss(nss.getCommandNS()); oplogEntry.setNss(nss.getCommandNS());
oplogEntry.setUuid(collUUID); oplogEntry.setUuid(collUUID);
oplogEntry.setObject(oplogEntryBuilder.done()); oplogEntry.setObject(oplogEntryBuilder.done());
if (shouldReplicateLocalCatalogIdentifers(VersionContext::getDecoration(opCtx))) { if (shouldReplicateLocalCatalogIdentifers(
rss::ReplicatedStorageService::get(opCtx).getPersistenceProvider(),
VersionContext::getDecoration(opCtx))) {
// TODO (SERVER-109824): Move 'directoryPerDB' and 'directoryForIndexes' to the function // TODO (SERVER-109824): Move 'directoryPerDB' and 'directoryForIndexes' to the function
// parameters. // parameters.
oplogEntry.setObject2(BSON("indexes" << o2IndexesArr.arr() << "directoryPerDB" oplogEntry.setObject2(BSON("indexes" << o2IndexesArr.arr() << "directoryPerDB"
@ -1190,7 +1194,9 @@ void OpObserverImpl::onCreateCollection(
oplogEntry.setNss(collectionName.getCommandNS()); oplogEntry.setNss(collectionName.getCommandNS());
oplogEntry.setUuid(options.uuid); oplogEntry.setUuid(options.uuid);
oplogEntry.setObject(MutableOplogEntry::makeCreateCollObject(collectionName, options, idIndex)); oplogEntry.setObject(MutableOplogEntry::makeCreateCollObject(collectionName, options, idIndex));
if (shouldReplicateLocalCatalogIdentifers(VersionContext::getDecoration(opCtx))) { if (shouldReplicateLocalCatalogIdentifers(
rss::ReplicatedStorageService::get(opCtx).getPersistenceProvider(),
VersionContext::getDecoration(opCtx))) {
invariant(createCollCatalogIdentifier.has_value(), invariant(createCollCatalogIdentifier.has_value(),
"Missing catalog identifier required to log replicated " "Missing catalog identifier required to log replicated "
"collection"); "collection");

View File

@ -33,8 +33,8 @@
#include "mongo/bson/bsonelement.h" #include "mongo/bson/bsonelement.h"
#include "mongo/bson/bsonobjbuilder.h" #include "mongo/bson/bsonobjbuilder.h"
#include "mongo/bson/dotted_path/dotted_path_support.h" #include "mongo/bson/dotted_path/dotted_path_support.h"
#include "mongo/db/disagg_storage/server_parameters_gen.h"
#include "mongo/db/global_catalog/shard_key_pattern.h" #include "mongo/db/global_catalog/shard_key_pattern.h"
#include "mongo/db/rss/replicated_storage_service.h"
#include "mongo/db/storage/storage_parameters_gen.h" #include "mongo/db/storage/storage_parameters_gen.h"
#include "mongo/util/duration.h" #include "mongo/util/duration.h"
#include "mongo/util/fail_point.h" #include "mongo/util/fail_point.h"
@ -53,10 +53,9 @@ const OpStateAccumulator::Decoration<std::unique_ptr<ShardingWriteRouter>>
MONGO_FAIL_POINT_DEFINE(addDestinedRecipient); MONGO_FAIL_POINT_DEFINE(addDestinedRecipient);
MONGO_FAIL_POINT_DEFINE(sleepBetweenInsertOpTimeGenerationAndLogOp); MONGO_FAIL_POINT_DEFINE(sleepBetweenInsertOpTimeGenerationAndLogOp);
bool shouldReplicateLocalCatalogIdentifers(const VersionContext& vCtx) { bool shouldReplicateLocalCatalogIdentifers(const rss::PersistenceProvider& provider,
if (disagg::gDisaggregatedStorageEnabled) { const VersionContext& vCtx) {
// Disaggregated storage relies on consistent catalog storage. Safe-guard if FCV is not yet if (provider.shouldUseReplicatedCatalogIdentifiers()) {
// initialized despite the feature being enabled.
return true; return true;
} }
const auto fcvSnapshot = serverGlobalParams.featureCompatibility.acquireFCVSnapshot(); const auto fcvSnapshot = serverGlobalParams.featureCompatibility.acquireFCVSnapshot();

View File

@ -35,6 +35,7 @@
#include "mongo/db/local_catalog/collection_options.h" #include "mongo/db/local_catalog/collection_options.h"
#include "mongo/db/namespace_string.h" #include "mongo/db/namespace_string.h"
#include "mongo/db/op_observer/op_observer.h" #include "mongo/db/op_observer/op_observer.h"
#include "mongo/db/rss/persistence_provider.h"
#include "mongo/util/assert_util.h" #include "mongo/util/assert_util.h"
#include "mongo/util/decorable.h" #include "mongo/util/decorable.h"
#include "mongo/util/fail_point.h" #include "mongo/util/fail_point.h"
@ -54,7 +55,8 @@ extern FailPoint sleepBetweenInsertOpTimeGenerationAndLogOp;
/** /**
* Returns true when local catalog identifiers should be replicated through the oplog. * Returns true when local catalog identifiers should be replicated through the oplog.
*/ */
bool shouldReplicateLocalCatalogIdentifers(const VersionContext& vCtx); bool shouldReplicateLocalCatalogIdentifers(const rss::PersistenceProvider&,
const VersionContext& vCtx);
/** /**
* Returns true if gFeatureFlagPrimaryDrivenIndexBuilds is enabled. * Returns true if gFeatureFlagPrimaryDrivenIndexBuilds is enabled.

View File

@ -972,6 +972,7 @@ mongo_cc_library(
"//src/mongo/db/pipeline:change_stream_preimage", "//src/mongo/db/pipeline:change_stream_preimage",
"//src/mongo/db/query/write_ops", "//src/mongo/db/query/write_ops",
"//src/mongo/db/repl/dbcheck", "//src/mongo/db/repl/dbcheck",
"//src/mongo/db/rss:replicated_storage_service",
"//src/mongo/db/session:session_catalog_mongod", "//src/mongo/db/session:session_catalog_mongod",
"//src/mongo/db/stats:counters", "//src/mongo/db/stats:counters",
"//src/mongo/db/stats:server_read_concern_write_concern_metrics", "//src/mongo/db/stats:server_read_concern_write_concern_metrics",

View File

@ -105,6 +105,7 @@
#include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/repl/timestamp_block.h" #include "mongo/db/repl/timestamp_block.h"
#include "mongo/db/repl/transaction_oplog_application.h" #include "mongo/db/repl/transaction_oplog_application.h"
#include "mongo/db/rss/replicated_storage_service.h"
#include "mongo/db/service_context.h" #include "mongo/db/service_context.h"
#include "mongo/db/session/logical_session_id_gen.h" #include "mongo/db/session/logical_session_id_gen.h"
#include "mongo/db/sharding_environment/shard_id.h" #include "mongo/db/sharding_environment/shard_id.h"
@ -211,7 +212,10 @@ StringData getInvalidatingReason(const OplogApplication::Mode mode, const bool i
boost::optional<CreateCollCatalogIdentifier> extractReplicatedCatalogIdentifier( boost::optional<CreateCollCatalogIdentifier> extractReplicatedCatalogIdentifier(
OperationContext* opCtx, const OplogEntry& oplogEntry) { OperationContext* opCtx, const OplogEntry& oplogEntry) {
auto& o2 = oplogEntry.getObject2(); auto& o2 = oplogEntry.getObject2();
if (!o2 || !shouldReplicateLocalCatalogIdentifers(VersionContext::getDecoration(opCtx))) { if (!o2 ||
!shouldReplicateLocalCatalogIdentifers(
rss::ReplicatedStorageService::get(opCtx).getPersistenceProvider(),
VersionContext::getDecoration(opCtx))) {
// Either no catalog identifier information was provided, or replicated local catalog // Either no catalog identifier information was provided, or replicated local catalog
// identifiers are not supported. // identifiers are not supported.
return boost::none; return boost::none;
@ -365,7 +369,9 @@ void createIndexForApplyOps(OperationContext* opCtx,
IndexBuildInfo indexBuildInfo = [&] { IndexBuildInfo indexBuildInfo = [&] {
auto storageEngine = opCtx->getServiceContext()->getStorageEngine(); auto storageEngine = opCtx->getServiceContext()->getStorageEngine();
if (!indexMetadata || if (!indexMetadata ||
!shouldReplicateLocalCatalogIdentifers(VersionContext::getDecoration(opCtx))) { !shouldReplicateLocalCatalogIdentifers(
rss::ReplicatedStorageService::get(opCtx).getPersistenceProvider(),
VersionContext::getDecoration(opCtx))) {
return IndexBuildInfo(indexSpec, *storageEngine, indexCollection->ns().dbName()); return IndexBuildInfo(indexSpec, *storageEngine, indexCollection->ns().dbName());
} }
@ -733,8 +739,13 @@ void createOplog(OperationContext* opCtx,
uow.commit(); uow.commit();
}); });
/* sync here so we don't get any surprising lag later when we try to sync */ // We cannot guarantee that we have a stable timestamp at this point, but if the persistence
service->getStorageEngine()->flushAllFiles(opCtx, /*callerHoldsReadLock*/ false); // provider supports unstable checkpoints, we can take a checkpoint now to avoid any surprising
// lag later when we try to sync.
auto& rss = rss::ReplicatedStorageService::get(service);
if (rss.getPersistenceProvider().supportsUnstableCheckpoints()) {
service->getStorageEngine()->flushAllFiles(opCtx, /*callerHoldsReadLock*/ false);
}
} }
void createOplog(OperationContext* opCtx) { void createOplog(OperationContext* opCtx) {
@ -961,7 +972,9 @@ const StringMap<ApplyOpMetadata> kOpsMap = {
auto swOplogEntry = IndexBuildOplogEntry::parse( auto swOplogEntry = IndexBuildOplogEntry::parse(
opCtx, opCtx,
entry, entry,
shouldReplicateLocalCatalogIdentifers(VersionContext::getDecoration(opCtx))); shouldReplicateLocalCatalogIdentifers(
rss::ReplicatedStorageService::get(opCtx).getPersistenceProvider(),
VersionContext::getDecoration(opCtx)));
if (!swOplogEntry.isOK()) { if (!swOplogEntry.isOK()) {
return swOplogEntry.getStatus().withContext( return swOplogEntry.getStatus().withContext(
"Error parsing 'startIndexBuild' oplog entry"); "Error parsing 'startIndexBuild' oplog entry");

View File

@ -291,7 +291,7 @@ public:
} }
// Should be called after all oplog entries have been processed to handle the deletes that // Should be called after all oplog entries have been processed to handle the deletes that
// were not superceded by a later write. // were not superseded by a later write.
void handleLatestDeletes(std::function<void(OplogEntry*)> handler) { void handleLatestDeletes(std::function<void(OplogEntry*)> handler) {
std::for_each(_retryImageWrites.begin(), std::for_each(_retryImageWrites.begin(),
_retryImageWrites.end(), _retryImageWrites.end(),

View File

@ -103,7 +103,6 @@
#include "mongo/db/session/session_txn_record_gen.h" #include "mongo/db/session/session_txn_record_gen.h"
#include "mongo/db/sharding_environment/shard_id.h" #include "mongo/db/sharding_environment/shard_id.h"
#include "mongo/db/stats/counters.h" #include "mongo/db/stats/counters.h"
#include "mongo/db/storage/mdb_catalog.h"
#include "mongo/db/storage/write_unit_of_work.h" #include "mongo/db/storage/write_unit_of_work.h"
#include "mongo/db/tenant_id.h" #include "mongo/db/tenant_id.h"
#include "mongo/db/timeseries/timeseries_gen.h" #include "mongo/db/timeseries/timeseries_gen.h"
@ -141,21 +140,6 @@
namespace mongo { namespace mongo {
namespace repl { namespace repl {
namespace { namespace {
CreateCollCatalogIdentifier newCatalogIdentifier(OperationContext* opCtx,
const DatabaseName& dbName,
bool includeIdIndexIdent) {
auto storageEngine = opCtx->getServiceContext()->getStorageEngine();
auto mdbCatalog = storageEngine->getMDBCatalog();
invariant(mdbCatalog);
CreateCollCatalogIdentifier catalogIdentifier;
catalogIdentifier.catalogId = mdbCatalog->reserveCatalogId(opCtx);
catalogIdentifier.ident = storageEngine->generateNewCollectionIdent(dbName);
if (includeIdIndexIdent) {
catalogIdentifier.idIndexIdent = storageEngine->generateNewIndexIdent(dbName);
}
return catalogIdentifier;
}
auto parseFromOplogEntryArray(const BSONObj& obj, int elem) { auto parseFromOplogEntryArray(const BSONObj& obj, int elem) {
BSONElement tsArray; BSONElement tsArray;
@ -632,45 +616,6 @@ TEST_F(OplogApplierImplTest, CreateCollectionCommand) {
ASSERT_TRUE(collectionExists(_opCtx.get(), nss)); ASSERT_TRUE(collectionExists(_opCtx.get(), nss));
} }
TEST_F(OplogApplierImplTest, CreateCollectionCommandDisaggBasic) {
RAIIServerParameterControllerForTest disaggServer("disaggregatedStorageEnabled", true);
RAIIServerParameterControllerForTest replicateLocalCatalogInfoController(
"featureFlagReplicateLocalCatalogIdentifiers", true);
NamespaceString nss = NamespaceString::createNamespaceString_forTest("test.t");
auto catalogIdentifier =
newCatalogIdentifier(_opCtx.get(), nss.dbName(), true /* includeIdIndexIdent*/);
auto entry =
makeCreateCollectionOplogEntry(nextOpTime(),
nss,
CollectionOptions{.uuid = UUID::gen()},
BSON("v" << 2 << "key" << BSON("_id_" << 1) << "name"
<< "_id_") /* idIndex */,
catalogIdentifier);
bool applyCmdCalled = false;
_opObserver->onCreateCollectionFn =
[&](OperationContext* opCtx,
const NamespaceString& collNss,
const CollectionOptions&,
const BSONObj&,
const boost::optional<CreateCollCatalogIdentifier>& collCatalogIdentifier) {
applyCmdCalled = true;
ASSERT_TRUE(opCtx);
ASSERT_TRUE(
shard_role_details::getLocker(opCtx)->isDbLockedForMode(nss.dbName(), MODE_IX));
ASSERT_EQUALS(nss, collNss);
ASSERT(collCatalogIdentifier);
ASSERT_EQUALS(catalogIdentifier.catalogId, collCatalogIdentifier->catalogId);
ASSERT_EQUALS(catalogIdentifier.ident, collCatalogIdentifier->ident);
};
ASSERT_OK(_applyOplogEntryOrGroupedInsertsWrapper(
_opCtx.get(), ApplierOperation{&entry}, OplogApplication::Mode::kInitialSync));
ASSERT_TRUE(applyCmdCalled);
ASSERT_TRUE(collectionExists(_opCtx.get(), nss));
}
TEST_F(OplogApplierImplTest, CreateCollectionCommandMultitenant) { TEST_F(OplogApplierImplTest, CreateCollectionCommandMultitenant) {
setServerParameter("multitenancySupport", true); setServerParameter("multitenancySupport", true);
setServerParameter("featureFlagRequireTenantID", true); setServerParameter("featureFlagRequireTenantID", true);

View File

@ -62,6 +62,7 @@
#include "mongo/db/repl/storage_interface_impl.h" #include "mongo/db/repl/storage_interface_impl.h"
#include "mongo/db/session/session_catalog_mongod.h" #include "mongo/db/session/session_catalog_mongod.h"
#include "mongo/db/sharding_environment/shard_id.h" #include "mongo/db/sharding_environment/shard_id.h"
#include "mongo/db/storage/mdb_catalog.h"
#include "mongo/db/storage/write_unit_of_work.h" #include "mongo/db/storage/write_unit_of_work.h"
#include "mongo/db/tenant_id.h" #include "mongo/db/tenant_id.h"
#include "mongo/db/transaction/session_catalog_mongod_transaction_interface_impl.h" #include "mongo/db/transaction/session_catalog_mongod_transaction_interface_impl.h"
@ -198,14 +199,18 @@ void OplogApplierImplOpObserver::onCollMod(OperationContext* opCtx,
onCollModFn(opCtx, nss, uuid, collModCmd, oldCollOptions, indexInfo); onCollModFn(opCtx, nss, uuid, collModCmd, oldCollOptions, indexInfo);
} }
std::unique_ptr<ReplicationCoordinator> OplogApplierImplTest::makeReplCoord(
ServiceContext* serviceContext) {
return std::make_unique<ReplicationCoordinatorMock>(serviceContext);
}
void OplogApplierImplTest::setUp() { void OplogApplierImplTest::setUp() {
ServiceContextMongoDTest::setUp(); ServiceContextMongoDTest::setUp();
serviceContext = getServiceContext(); serviceContext = getServiceContext();
_opCtx = cc().makeOperationContext(); _opCtx = cc().makeOperationContext();
ReplicationCoordinator::set(serviceContext, ReplicationCoordinator::set(serviceContext, makeReplCoord(serviceContext));
std::make_unique<ReplicationCoordinatorMock>(serviceContext));
ASSERT_OK(ReplicationCoordinator::get(_opCtx.get())->setFollowerMode(MemberState::RS_PRIMARY)); ASSERT_OK(ReplicationCoordinator::get(_opCtx.get())->setFollowerMode(MemberState::RS_PRIMARY));
StorageInterface::set(serviceContext, std::make_unique<StorageInterfaceImpl>()); StorageInterface::set(serviceContext, std::make_unique<StorageInterfaceImpl>());
@ -625,5 +630,21 @@ void createIndex(OperationContext* opCtx,
opCtx, collUUID, spec, IndexBuildsManager::IndexConstraints::kEnforce, false); opCtx, collUUID, spec, IndexBuildsManager::IndexConstraints::kEnforce, false);
} }
CreateCollCatalogIdentifier newCatalogIdentifier(OperationContext* opCtx,
const DatabaseName& dbName,
bool includeIdIndexIdent) {
auto storageEngine = opCtx->getServiceContext()->getStorageEngine();
auto mdbCatalog = storageEngine->getMDBCatalog();
invariant(mdbCatalog);
CreateCollCatalogIdentifier catalogIdentifier;
catalogIdentifier.catalogId = mdbCatalog->reserveCatalogId(opCtx);
catalogIdentifier.ident = storageEngine->generateNewCollectionIdent(dbName);
if (includeIdIndexIdent) {
catalogIdentifier.idIndexIdent = storageEngine->generateNewIndexIdent(dbName);
}
return catalogIdentifier;
}
} // namespace repl } // namespace repl
} // namespace mongo } // namespace mongo

View File

@ -277,6 +277,7 @@ protected:
return OpTime(Timestamp(Seconds(lastSecond++), 0), 1LL); return OpTime(Timestamp(Seconds(lastSecond++), 0), 1LL);
} }
virtual std::unique_ptr<ReplicationCoordinator> makeReplCoord(ServiceContext*);
void setUp() override; void setUp() override;
void tearDown() override; void tearDown() override;
@ -415,6 +416,13 @@ void createIndex(OperationContext* opCtx,
UUID collUUID, UUID collUUID,
const BSONObj& spec); const BSONObj& spec);
} // namespace MONGO_MOD_PUB repl /**
* Generate a new catalog identifier.
*/
CreateCollCatalogIdentifier newCatalogIdentifier(OperationContext* opCtx,
const DatabaseName& dbName,
bool includeIdIndexIdent);
} // namespace MONGO_MOD_PUB repl
} // namespace mongo } // namespace mongo

View File

@ -212,4 +212,10 @@ public:
}; };
std::ostream& operator<<(std::ostream& out, const OpTimeAndWallTime& opTime); std::ostream& operator<<(std::ostream& out, const OpTimeAndWallTime& opTime);
// A convenience class for holding both a Timestamp and a Date_t.
struct TimestampAndWallTime {
Timestamp timestamp;
Date_t wallTime;
};
} // namespace mongo::repl } // namespace mongo::repl

View File

@ -59,6 +59,7 @@
#include "mongo/db/repl/replication_consistency_markers.h" #include "mongo/db/repl/replication_consistency_markers.h"
#include "mongo/db/repl/replication_process.h" #include "mongo/db/repl/replication_process.h"
#include "mongo/db/repl/rollback_test_fixture.h" #include "mongo/db/repl/rollback_test_fixture.h"
#include "mongo/db/rss/replicated_storage_service.h"
#include "mongo/db/server_options.h" #include "mongo/db/server_options.h"
#include "mongo/db/service_context.h" #include "mongo/db/service_context.h"
#include "mongo/db/session/logical_session_id.h" #include "mongo/db/session/logical_session_id.h"
@ -2250,7 +2251,9 @@ TEST_F(RollbackImplObserverInfoTest,
auto uuid = UUID::gen(); auto uuid = UUID::gen();
BSONObj indexObj; BSONObj indexObj;
if (shouldReplicateLocalCatalogIdentifers(VersionContext::getDecoration(_opCtx.get()))) { if (shouldReplicateLocalCatalogIdentifers(
rss::ReplicatedStorageService::get(_opCtx.get()).getPersistenceProvider(),
VersionContext::getDecoration(_opCtx.get()))) {
indexObj = BSON("createIndexes" << nss.coll() << "spec" indexObj = BSON("createIndexes" << nss.coll() << "spec"
<< BSON("v" << 2 << "key" << BSON("v" << 2 << "key"
<< "x" << "x"

View File

@ -106,6 +106,7 @@
#include "mongo/db/repl/storage_interface.h" #include "mongo/db/repl/storage_interface.h"
#include "mongo/db/repl/storage_interface_impl.h" #include "mongo/db/repl/storage_interface_impl.h"
#include "mongo/db/repl/timestamp_block.h" #include "mongo/db/repl/timestamp_block.h"
#include "mongo/db/rss/replicated_storage_service.h"
#include "mongo/db/service_context.h" #include "mongo/db/service_context.h"
#include "mongo/db/service_context_d_test_fixture.h" #include "mongo/db/service_context_d_test_fixture.h"
#include "mongo/db/session/logical_session_id.h" #include "mongo/db/session/logical_session_id.h"
@ -916,7 +917,9 @@ public:
} }
StringData indexNameOplogField() const { StringData indexNameOplogField() const {
return shouldReplicateLocalCatalogIdentifers(VersionContext::getDecoration(_opCtx)) return shouldReplicateLocalCatalogIdentifers(
rss::ReplicatedStorageService::get(_opCtx).getPersistenceProvider(),
VersionContext::getDecoration(_opCtx))
? "o.spec.name" ? "o.spec.name"
: "o.name"; : "o.name";
} }
@ -3088,14 +3091,16 @@ TEST_F(StorageTimestampTest, CreateCollectionWithSystemIndex) {
// supports 2 phase index build. // supports 2 phase index build.
indexStartTs = op.getTimestamp(); indexStartTs = op.getTimestamp();
indexCreateTs = indexCreateTs =
repl::OplogEntry(queryOplog(BSON("op" << "c" repl::OplogEntry(
<< "ns" << nss.getCommandNS().ns_forTest() queryOplog(BSON(
<< "o.createIndexes" << nss.coll() "op" << "c"
<< (shouldReplicateLocalCatalogIdentifers( << "ns" << nss.getCommandNS().ns_forTest() << "o.createIndexes" << nss.coll()
VersionContext::getDecoration(_opCtx)) << (shouldReplicateLocalCatalogIdentifers(
? "o.spec.name" rss::ReplicatedStorageService::get(_opCtx).getPersistenceProvider(),
: "o.name") VersionContext::getDecoration(_opCtx))
<< "user_1_db_1"))) ? "o.spec.name"
: "o.name")
<< "user_1_db_1")))
.getTimestamp(); .getTimestamp();
indexCompleteTs = indexCreateTs; indexCompleteTs = indexCreateTs;

View File

@ -0,0 +1,77 @@
load("//bazel:mongo_src_rules.bzl", "mongo_cc_benchmark", "mongo_cc_library", "mongo_cc_unit_test")
package(default_visibility = ["//visibility:public"])
exports_files(
glob([
"*.h",
"*.cpp",
]),
)
mongo_cc_library(
name = "replicated_storage_service",
srcs = [
"replicated_storage_service.cpp",
],
hdrs = [
"replicated_storage_service.h",
],
deps = [
"//src/mongo:base",
"//src/mongo/db:service_context",
"//src/mongo/db/rss:persistence_provider",
"//src/mongo/db/rss:service_lifecycle",
],
)
mongo_cc_library(
name = "persistence_provider",
hdrs = [
"persistence_provider.h",
],
deps = [
"//src/mongo:base",
"//src/mongo/db:service_context",
],
)
mongo_cc_library(
name = "service_lifecycle",
hdrs = [
"service_lifecycle.h",
],
deps = [
"//src/mongo:base",
"//src/mongo/db:service_context",
"//src/mongo/db/repl:repl_coordinator_interface",
],
)
mongo_cc_library(
name = "persistence_provider_impl",
deps = [
"//src/mongo:base",
"//src/mongo/db/rss:persistence_provider",
"//src/mongo/db/rss/attached_storage:attached_persistence_provider",
] + select({
"//bazel/config:build_atlas_enabled": [
"//src/mongo/db/modules/atlas/src/disagg_storage:disaggregated_persistence_provider",
],
"//conditions:default": [],
}),
)
mongo_cc_library(
name = "service_lifecycle_impl",
deps = [
"//src/mongo:base",
"//src/mongo/db/rss:service_lifecycle",
"//src/mongo/db/rss/attached_storage:attached_service_lifecycle",
] + select({
"//bazel/config:build_atlas_enabled": [
"//src/mongo/db/modules/atlas/src/disagg_storage:disaggregated_service_lifecycle",
],
"//conditions:default": [],
}),
)

View File

@ -0,0 +1,6 @@
version: 1.0.0
filters:
- "*":
approvers:
- 10gen/server-storage-execution
- 10gen/server-replication

View File

@ -0,0 +1,43 @@
load("//bazel:mongo_src_rules.bzl", "mongo_cc_benchmark", "mongo_cc_library", "mongo_cc_unit_test")
package(default_visibility = ["//visibility:public"])
exports_files(
glob([
"*.h",
"*.cpp",
]),
)
mongo_cc_library(
name = "attached_persistence_provider",
srcs = [
"attached_persistence_provider.cpp",
],
hdrs = [
"attached_persistence_provider.h",
],
deps = [
"//src/mongo:base",
"//src/mongo/db:service_context",
"//src/mongo/db/rss:replicated_storage_service",
],
)
mongo_cc_library(
name = "attached_service_lifecycle",
srcs = [
"attached_service_lifecycle.cpp",
],
hdrs = [
"attached_service_lifecycle.h",
],
deps = [
"//src/mongo:base",
"//src/mongo/db:service_context",
"//src/mongo/db/admission:flow_control",
"//src/mongo/db/repl:repl_coordinator_impl",
"//src/mongo/db/repl:serveronly_repl",
"//src/mongo/db/rss:replicated_storage_service",
],
)

View File

@ -0,0 +1,101 @@
/**
* Copyright (C) 2025-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#include "mongo/db/rss/attached_storage/attached_persistence_provider.h"
#include "mongo/base/string_data.h"
#include "mongo/db/rss/replicated_storage_service.h"
#include "mongo/db/service_context.h"
namespace mongo::rss {
namespace {
// Checkpoint every 60 seconds by default.
constexpr double kDefaultAttachedSyncDelaySeconds = 60.0;
ServiceContext::ConstructorActionRegisterer registerAttachedPersistenceProvider{
"AttachedPersistenceProvider", [](ServiceContext* service) {
auto& rss = ReplicatedStorageService::get(service);
rss.setPersistenceProvider(std::make_unique<AttachedPersistenceProvider>());
}};
} // namespace
std::string AttachedPersistenceProvider::name() const {
return "Attached Storage";
}
boost::optional<Timestamp> AttachedPersistenceProvider::getSentinelDataTimestamp() const {
return boost::none;
}
std::string AttachedPersistenceProvider::getWiredTigerConfig(int) const {
return "";
}
bool AttachedPersistenceProvider::shouldUseReplicatedCatalogIdentifiers() const {
return false;
}
bool AttachedPersistenceProvider::shouldUseReplicatedRecordIds() const {
return false;
}
bool AttachedPersistenceProvider::shouldUseOplogWritesForFlowControlSampling() const {
return true;
}
bool AttachedPersistenceProvider::shouldStepDownForShutdown() const {
return true;
}
bool AttachedPersistenceProvider::shouldDelayDataAccessDuringStartup() const {
return false;
}
bool AttachedPersistenceProvider::shouldAvoidDuplicateCheckpoints() const {
return false;
}
bool AttachedPersistenceProvider::supportsLocalCollections() const {
return true;
}
bool AttachedPersistenceProvider::supportsUnstableCheckpoints() const {
return true;
}
bool AttachedPersistenceProvider::supportsTableLogging() const {
return true;
}
bool AttachedPersistenceProvider::supportsMultiDocumentTransactions() const {
return true;
}
} // namespace mongo::rss

View File

@ -0,0 +1,105 @@
/**
* Copyright (C) 2025-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#pragma once
#include "mongo/db/rss/persistence_provider.h"
namespace mongo::rss {
class AttachedPersistenceProvider : public PersistenceProvider {
public:
std::string name() const override;
/**
* We do not have any specific initialization requirements.
*/
boost::optional<Timestamp> getSentinelDataTimestamp() const override;
/**
* We do not have any additional WT config to add.
*/
std::string getWiredTigerConfig(int) const override;
/**
* Replicated catalog identifiers aren't compatible with attached storage as of right now, as a
* node may create a local collection whose catalog identifier collides with that of a
* replicated collection created on another node.
*/
bool shouldUseReplicatedCatalogIdentifiers() const override;
/**
* Attached storage does not require replicated RecordIds to function correctly.
*/
bool shouldUseReplicatedRecordIds() const override;
/**
* Flow control is based on the rate of generation of oplog data and the ability of the
* secondaries to keep the majority commit point relatively up-to-date.
*/
bool shouldUseOplogWritesForFlowControlSampling() const override;
/**
* Stepping down prior to shut down allows for a graceful and quick election most of the time.
*/
bool shouldStepDownForShutdown() const override;
/**
* We can safely initialize the catalog immediately after starting the storage engine.
*/
bool shouldDelayDataAccessDuringStartup() const override;
/**
* Running a duplicate checkpoint for a given timestamp has little effect other than being
* slightly inefficient, so there's no need to use extra synchronization to avoid it.
*/
bool shouldAvoidDuplicateCheckpoints() const override;
/**
* We can support local, fully unreplicated collections.
*/
bool supportsLocalCollections() const override;
/**
* We can support unstable checkpoints.
*/
bool supportsUnstableCheckpoints() const override;
/**
* We can support table logging.
*/
bool supportsTableLogging() const override;
/**
* We can support multi-document transactions.
*/
bool supportsMultiDocumentTransactions() const override;
};
} // namespace mongo::rss

View File

@ -0,0 +1,126 @@
/**
* Copyright (C) 2025-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#include "mongo/db/rss/attached_storage/attached_service_lifecycle.h"
#include "mongo/db/admission/flow_control.h"
#include "mongo/db/global_settings.h"
#include "mongo/db/repl/replication_consistency_markers_impl.h"
#include "mongo/db/repl/replication_coordinator_external_state_impl.h"
#include "mongo/db/repl/replication_coordinator_impl.h"
#include "mongo/db/repl/storage_interface.h"
#include "mongo/db/repl/topology_coordinator.h"
#include "mongo/db/rss/replicated_storage_service.h"
#include "mongo/db/storage/storage_options.h"
#include "mongo/executor/network_interface_factory.h"
#include "mongo/executor/thread_pool_task_executor.h"
#include "mongo/rpc/metadata/egress_metadata_hook_list.h"
namespace mongo::rss {
namespace {
// Checkpoint every 60 seconds by default.
constexpr double kDefaultAttachedSyncDelaySeconds = 60.0;
ServiceContext::ConstructorActionRegisterer registerAttachedServiceLifecycle{
"AttachedServiceLifecycle", [](ServiceContext* service) {
auto& rss = ReplicatedStorageService::get(service);
rss.setServiceLifecycle(std::make_unique<AttachedServiceLifecycle>());
}};
auto makeReplicationExecutor(ServiceContext* serviceContext) {
ThreadPool::Options tpOptions;
tpOptions.threadNamePrefix = "ReplCoord-";
tpOptions.poolName = "ReplCoordThreadPool";
tpOptions.maxThreads = 50;
tpOptions.onCreateThread = [serviceContext](const std::string& threadName) {
Client::initThread(threadName,
serviceContext->getService(ClusterRole::ShardServer),
Client::noSession(),
ClientOperationKillableByStepdown{false});
};
auto hookList = std::make_unique<rpc::EgressMetadataHookList>();
hookList->addHook(std::make_unique<rpc::VectorClockMetadataHook>(serviceContext));
return executor::ThreadPoolTaskExecutor::create(
std::make_unique<ThreadPool>(tpOptions),
executor::makeNetworkInterface("ReplNetwork", nullptr, std::move(hookList)));
}
} // namespace
AttachedServiceLifecycle::AttachedServiceLifecycle()
: _initializedUsingDefaultSyncDelay{[]() {
if (storageGlobalParams.syncdelay.load() < 0.0) {
storageGlobalParams.syncdelay.store(kDefaultAttachedSyncDelaySeconds);
return true;
} // namespace mongo::rss
return false;
}()} {}
void AttachedServiceLifecycle::initializeFlowControl(ServiceContext* svcCtx) {
FlowControl::set(
svcCtx, std::make_unique<FlowControl>(svcCtx, repl::ReplicationCoordinator::get(svcCtx)));
}
void AttachedServiceLifecycle::initializeStorageEngineExtensions(ServiceContext*) {}
std::unique_ptr<repl::ReplicationCoordinator>
AttachedServiceLifecycle::initializeReplicationCoordinator(ServiceContext* svcCtx) {
auto storageInterface = repl::StorageInterface::get(svcCtx);
auto replicationProcess = repl::ReplicationProcess::get(svcCtx);
repl::TopologyCoordinator::Options topoCoordOptions;
topoCoordOptions.maxSyncSourceLagSecs = Seconds(repl::maxSyncSourceLagSecs);
topoCoordOptions.clusterRole = serverGlobalParams.clusterRole;
return std::make_unique<repl::ReplicationCoordinatorImpl>(
svcCtx,
getGlobalReplSettings(),
std::make_unique<repl::ReplicationCoordinatorExternalStateImpl>(
svcCtx, storageInterface, replicationProcess),
makeReplicationExecutor(svcCtx),
std::make_unique<repl::TopologyCoordinator>(topoCoordOptions),
replicationProcess,
storageInterface,
SecureRandom().nextInt64());
}
void AttachedServiceLifecycle::initializeStateRequiredForStorageAccess(ServiceContext*) {}
void AttachedServiceLifecycle::shutdownStateRequiredForStorageAccess(ServiceContext*,
BSONObjBuilder*) {}
bool AttachedServiceLifecycle::initializedUsingDefaultSyncDelay() const {
return _initializedUsingDefaultSyncDelay;
}
bool AttachedServiceLifecycle::shouldKeepThreadAliveUntilStorageEngineHasShutDown(
const StringData) const {
return false;
}
} // namespace mongo::rss

View File

@ -0,0 +1,77 @@
/**
* Copyright (C) 2025-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#pragma once
#include "mongo/db/rss/service_lifecycle.h"
namespace mongo::rss {
class AttachedServiceLifecycle : public ServiceLifecycle {
public:
AttachedServiceLifecycle();
/**
* Initializes flow control based on oplog write rate.
*/
void initializeFlowControl(ServiceContext*) override;
/**
* There are no storage engine extensions utilized.
*/
void initializeStorageEngineExtensions(ServiceContext*) override;
/**
* Initializes a 'repl::ReplicationCoordinatorImpl'.
*/
std::unique_ptr<repl::ReplicationCoordinator> initializeReplicationCoordinator(
ServiceContext*) override;
/**
* There is no additional state required for storage access.
*/
void initializeStateRequiredForStorageAccess(ServiceContext*) override;
/**
* There is no additional state required for storage access.
*/
void shutdownStateRequiredForStorageAccess(ServiceContext*, BSONObjBuilder*) override;
bool initializedUsingDefaultSyncDelay() const override;
/**
* There are no specific persistence threads that must outlive the storage engine.
*/
bool shouldKeepThreadAliveUntilStorageEngineHasShutDown(StringData) const override;
private:
const bool _initializedUsingDefaultSyncDelay;
};
} // namespace mongo::rss

View File

@ -0,0 +1,131 @@
/**
* Copyright (C) 2025-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#pragma once
#include "mongo/base/string_data.h"
#include "mongo/bson/bsonelement.h"
#include <string>
#include <utility>
#include <boost/optional.hpp>
namespace mongo {
namespace rss {
/**
* This class provides an abstraction around the persistence layer underlying the storage and
* replication subsystems. Depending on the configuration, the implementation may be backed by a
* local filesystem, a remote service, etc. The interface is built primarily around capabilities and
* expected behaviors, allowing consumers to act based on these flags, rather than needing to reason
* about how a particular provider would behave in a given context.
*/
class PersistenceProvider {
public:
virtual ~PersistenceProvider() = default;
/**
* The name of this provider, for use in e.g. logging and error messages.
*/
virtual std::string name() const = 0;
/**
* If not none, the KVEngine will use the returned Timestamp during initialization as the
* initial data timestamp.
*/
virtual boost::optional<Timestamp> getSentinelDataTimestamp() const = 0;
/**
* Additional configuration that shoudld be added to the WiredTiger config string for the
* 'wiredtiger_open' call. The 'flattenLeafPageDelta' is expected to be the corresponding
* WiredTigerConfig member value.
*/
virtual std::string getWiredTigerConfig(int flattenLeafPageDelta) const = 0;
/**
* If true, the provider expects that all catalog identifiers will be replicated and identical
* between nodes.
*/
virtual bool shouldUseReplicatedCatalogIdentifiers() const = 0;
/**
* If true, the provider expects that RecordIds will be replicated (either explicitly or
* implicitly) and identical between nodes.
*/
virtual bool shouldUseReplicatedRecordIds() const = 0;
/**
* If true, writes to the oplog should be used as the unit of progress for flow control
* sampling.
*/
virtual bool shouldUseOplogWritesForFlowControlSampling() const = 0;
/**
* If true, the node should step down prior to shutdown in order to minimize unavailability.
*/
virtual bool shouldStepDownForShutdown() const = 0;
/**
* If true, data may not be availabile immediately after starting the storage engine, so systems
* like the catalog should not be initialized immediately.
*/
virtual bool shouldDelayDataAccessDuringStartup() const = 0;
/**
* If true, the system should take precautions to avoid taking multiple checkopints for the same
* stable timestamp. The underlying key-value engine likely does not provide the necessary
* coordination by default.
*/
virtual bool shouldAvoidDuplicateCheckpoints() const = 0;
/**
* If true, the storage provider supports the use of local, unreplicated collections.
*/
virtual bool supportsLocalCollections() const = 0;
/**
* If true, the provider can support unstable checkpoints.
*/
virtual bool supportsUnstableCheckpoints() const = 0;
/**
* If true, the provider can support logging (i.e. journaling) on individual tables.
*/
virtual bool supportsTableLogging() const = 0;
/**
* If true, the provider supports multi-document transactions.
*/
virtual bool supportsMultiDocumentTransactions() const = 0;
};
} // namespace rss
} // namespace mongo

View File

@ -0,0 +1,63 @@
/**
* Copyright (C) 2025-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#include "mongo/db/rss/replicated_storage_service.h"
namespace mongo::rss {
namespace {
const auto getReplicatedStorageService =
ServiceContext::declareDecoration<ReplicatedStorageService>();
} // namespace
ReplicatedStorageService& ReplicatedStorageService::get(ServiceContext* svcCtx) {
return getReplicatedStorageService(svcCtx);
}
ReplicatedStorageService& ReplicatedStorageService::get(OperationContext* opCtx) {
return get(opCtx->getServiceContext());
}
PersistenceProvider& ReplicatedStorageService::getPersistenceProvider() {
invariant(_provider);
return *_provider;
}
void ReplicatedStorageService::setPersistenceProvider(std::unique_ptr<PersistenceProvider>&& p) {
_provider = std::move(p);
}
ServiceLifecycle& ReplicatedStorageService::getServiceLifecycle() {
invariant(_lifecycle);
return *_lifecycle;
}
void ReplicatedStorageService::setServiceLifecycle(std::unique_ptr<ServiceLifecycle>&& l) {
_lifecycle = std::move(l);
}
} // namespace mongo::rss

View File

@ -0,0 +1,55 @@
/**
* Copyright (C) 2025-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#pragma once
#include "mongo/db/operation_context.h"
#include "mongo/db/rss/persistence_provider.h"
#include "mongo/db/rss/service_lifecycle.h"
#include "mongo/db/service_context.h"
namespace mongo::rss {
class ReplicatedStorageService {
public:
static ReplicatedStorageService& get(ServiceContext*);
static ReplicatedStorageService& get(OperationContext*);
PersistenceProvider& getPersistenceProvider();
void setPersistenceProvider(std::unique_ptr<PersistenceProvider>&&);
ServiceLifecycle& getServiceLifecycle();
void setServiceLifecycle(std::unique_ptr<ServiceLifecycle>&&);
private:
std::unique_ptr<PersistenceProvider> _provider;
std::unique_ptr<ServiceLifecycle> _lifecycle;
};
} // namespace mongo::rss

View File

@ -0,0 +1,97 @@
/**
* Copyright (C) 2025-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#pragma once
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/service_context.h"
#include <memory>
#include <string>
#include <utility>
namespace mongo {
namespace rss {
/**
* This class provides an abstraction for a set of functionalities related to the service lifecycle
* (i.e. startup and shutdown).
*
* The implementation details are generally closely related to the configured 'PersistenceProvider',
* but we separate it out from that class since that class is primarily focused on
* capabilities/behaviors, while this class instead represents a set of setup/teardown and related
* routines.
*/
class ServiceLifecycle {
public:
virtual ~ServiceLifecycle() = default;
/**
* Initializes the flow control algorithm for the current service configuration.
*/
virtual void initializeFlowControl(ServiceContext*) = 0;
/**
* Initializes any storage engine extensions necessary for the current service configuration.
*/
virtual void initializeStorageEngineExtensions(ServiceContext*) = 0;
/**
* Initializes and returns the replication coordinator appropriate for the current service
* configuration.
*/
virtual std::unique_ptr<repl::ReplicationCoordinator> initializeReplicationCoordinator(
ServiceContext*) = 0;
/**
* Initializes any state required to access 'repl::StorageInterface'. This method will be run
* prior to 'initializeReplicationCoordinator'.
*/
virtual void initializeStateRequiredForStorageAccess(ServiceContext*) = 0;
/**
* Tears down any state set up by 'initializeStateRequiredForStorageAccess'.
*/
virtual void shutdownStateRequiredForStorageAccess(ServiceContext*, BSONObjBuilder*) = 0;
/**
* If true, this instance was initialized using the default syncdelay parameter rather than any
* user-configured value.
*/
virtual bool initializedUsingDefaultSyncDelay() const = 0;
/**
* If true, the named thread must be kept alive until the storage engine has shut down.
*/
virtual bool shouldKeepThreadAliveUntilStorageEngineHasShutDown(
StringData threadName) const = 0;
};
} // namespace rss
} // namespace mongo

View File

@ -216,7 +216,7 @@ MongoDScopedGlobalServiceContextForTest::~MongoDScopedGlobalServiceContextForTes
std::swap(storageGlobalParams.engineSetByUser, _stashedStorageParams.engineSetByUser); std::swap(storageGlobalParams.engineSetByUser, _stashedStorageParams.engineSetByUser);
std::swap(storageGlobalParams.repair, _stashedStorageParams.repair); std::swap(storageGlobalParams.repair, _stashedStorageParams.repair);
storageGlobalParams.reset(); storageGlobalParams.reset_forTest();
} }
} // namespace mongo } // namespace mongo

View File

@ -89,6 +89,7 @@
#include "mongo/db/repl/repl_set_member_in_standalone_mode.h" #include "mongo/db/repl/repl_set_member_in_standalone_mode.h"
#include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/repl/storage_interface.h" #include "mongo/db/repl/storage_interface.h"
#include "mongo/db/rss/replicated_storage_service.h"
#include "mongo/db/server_feature_flags_gen.h" #include "mongo/db/server_feature_flags_gen.h"
#include "mongo/db/service_context.h" #include "mongo/db/service_context.h"
#include "mongo/db/storage/control/journal_flusher.h" #include "mongo/db/storage/control/journal_flusher.h"
@ -878,6 +879,14 @@ void startupRecovery(OperationContext* opCtx,
StorageEngine* storageEngine, StorageEngine* storageEngine,
StorageEngine::LastShutdownState lastShutdownState, StorageEngine::LastShutdownState lastShutdownState,
BSONObjBuilder* startupTimeElapsedBuilder = nullptr) { BSONObjBuilder* startupTimeElapsedBuilder = nullptr) {
auto& rss = rss::ReplicatedStorageService::get(opCtx);
if (rss.getPersistenceProvider().shouldDelayDataAccessDuringStartup()) {
LOGV2(10985327,
"Skip startupRecovery; it will be handled later when WT loads the "
"checkpoint");
return;
}
invariant(!storageGlobalParams.repair); invariant(!storageGlobalParams.repair);
ServiceContext* svcCtx = opCtx->getServiceContext(); ServiceContext* svcCtx = opCtx->getServiceContext();

View File

@ -262,6 +262,7 @@ mongo_cc_library(
name = "mdb_catalog", name = "mdb_catalog",
srcs = [ srcs = [
"mdb_catalog.cpp", "mdb_catalog.cpp",
"//src/mongo/db/rss:persistence_provider.h",
"//src/mongo/db/storage/kv:kv_engine.h", "//src/mongo/db/storage/kv:kv_engine.h",
], ],
hdrs = [ hdrs = [
@ -270,6 +271,7 @@ mongo_cc_library(
deps = [ deps = [
":record_store_base", ":record_store_base",
"//src/mongo/db:server_base", "//src/mongo/db:server_base",
"//src/mongo/db/rss:replicated_storage_service",
"//src/mongo/db/storage:feature_document_util", "//src/mongo/db/storage:feature_document_util",
"//src/mongo/db/storage:ident", "//src/mongo/db/storage:ident",
], ],
@ -423,6 +425,7 @@ mongo_cc_library(
], ],
hdrs = [ hdrs = [
"oplog_truncate_markers.h", "oplog_truncate_markers.h",
"//src/mongo/db/rss:persistence_provider.h",
"//src/mongo/db/storage/kv:kv_engine.h", "//src/mongo/db/storage/kv:kv_engine.h",
], ],
deps = [ deps = [
@ -501,6 +504,7 @@ mongo_cc_library(
":storage_repair_observer", ":storage_repair_observer",
"//src/mongo/db:server_base", "//src/mongo/db:server_base",
"//src/mongo/db:shard_role", "//src/mongo/db:shard_role",
"//src/mongo/db/rss:replicated_storage_service",
"//src/mongo/db/storage/kv:kv_drop_pending_ident_reaper", "//src/mongo/db/storage/kv:kv_drop_pending_ident_reaper",
], ],
) )

View File

@ -18,6 +18,7 @@ mongo_cc_library(
hdrs = [ hdrs = [
"devnull_kv_engine.h", "devnull_kv_engine.h",
"ephemeral_catalog_record_store.h", "ephemeral_catalog_record_store.h",
"//src/mongo/db/rss:persistence_provider.h",
"//src/mongo/db/storage/kv:kv_engine.h", "//src/mongo/db/storage/kv:kv_engine.h",
], ],
deps = [ deps = [

View File

@ -68,7 +68,8 @@ public:
std::unique_ptr<RecoveryUnit> newRecoveryUnit() override; std::unique_ptr<RecoveryUnit> newRecoveryUnit() override;
Status createRecordStore(const NamespaceString& nss, Status createRecordStore(const rss::PersistenceProvider&,
const NamespaceString& nss,
StringData ident, StringData ident,
const RecordStore::Options& options) override { const RecordStore::Options& options) override {
return Status::OK(); return Status::OK();
@ -89,6 +90,7 @@ public:
KeyFormat keyFormat) override; KeyFormat keyFormat) override;
Status createSortedDataInterface( Status createSortedDataInterface(
const rss::PersistenceProvider&,
RecoveryUnit&, RecoveryUnit&,
const NamespaceString& nss, const NamespaceString& nss,
const UUID& uuid, const UUID& uuid,

View File

@ -90,7 +90,8 @@ public:
return nullptr; return nullptr;
} }
Status createRecordStore(const NamespaceString& nss, Status createRecordStore(const rss::PersistenceProvider&,
const NamespaceString& nss,
StringData ident, StringData ident,
const RecordStore::Options& options) override { const RecordStore::Options& options) override {
return Status::OK(); return Status::OK();
@ -108,6 +109,7 @@ public:
return {}; return {};
} }
Status createSortedDataInterface( Status createSortedDataInterface(
const rss::PersistenceProvider&,
RecoveryUnit&, RecoveryUnit&,
const NamespaceString& nss, const NamespaceString& nss,
const UUID& uuid, const UUID& uuid,

View File

@ -32,6 +32,7 @@
#include "mongo/base/status.h" #include "mongo/base/status.h"
#include "mongo/base/string_data.h" #include "mongo/base/string_data.h"
#include "mongo/bson/timestamp.h" #include "mongo/bson/timestamp.h"
#include "mongo/db/rss/persistence_provider.h"
#include "mongo/db/storage/compact_options.h" #include "mongo/db/storage/compact_options.h"
#include "mongo/db/storage/record_store.h" #include "mongo/db/storage/record_store.h"
#include "mongo/db/storage/sorted_data_interface.h" #include "mongo/db/storage/sorted_data_interface.h"
@ -126,7 +127,8 @@ public:
* *
* Creates a 'RecordStore' and generated from the provided 'options'. * Creates a 'RecordStore' and generated from the provided 'options'.
*/ */
virtual Status createRecordStore(const NamespaceString& nss, virtual Status createRecordStore(const rss::PersistenceProvider&,
const NamespaceString& nss,
StringData ident, StringData ident,
const RecordStore::Options& options) = 0; const RecordStore::Options& options) = 0;
@ -201,6 +203,7 @@ public:
virtual bool underCachePressure(int concurrentWriteOuts, int concurrentReadOuts) = 0; virtual bool underCachePressure(int concurrentWriteOuts, int concurrentReadOuts) = 0;
virtual Status createSortedDataInterface( virtual Status createSortedDataInterface(
const rss::PersistenceProvider&,
RecoveryUnit&, RecoveryUnit&,
const NamespaceString& nss, const NamespaceString& nss,
const UUID& uuid, const UUID& uuid,
@ -257,10 +260,11 @@ public:
* This recovery process makes no guarantees about the integrity of data recovered or even that * This recovery process makes no guarantees about the integrity of data recovered or even that
* it still exists when recovered. * it still exists when recovered.
*/ */
virtual Status recoverOrphanedIdent(const NamespaceString& nss, virtual Status recoverOrphanedIdent(const rss::PersistenceProvider& provider,
const NamespaceString& nss,
StringData ident, StringData ident,
const RecordStore::Options& recordStoreOptions) { const RecordStore::Options& recordStoreOptions) {
auto status = createRecordStore(nss, ident, recordStoreOptions); auto status = createRecordStore(provider, nss, ident, recordStoreOptions);
if (status.isOK()) { if (status.isOK()) {
return {ErrorCodes::DataModifiedByRepair, "Orphan recovery created a new record store"}; return {ErrorCodes::DataModifiedByRepair, "Orphan recovery created a new record store"};
} }
@ -375,6 +379,22 @@ public:
*/ */
virtual void setJournalListener(JournalListener* jl) = 0; virtual void setJournalListener(JournalListener* jl) = 0;
/**
* See `StorageEngine::setLastMaterializedLsn`
*/
virtual void setLastMaterializedLsn(uint64_t lsn) {}
/**
* Configures the specified checkpoint as the starting point for recovery.
*/
virtual void setRecoveryCheckpointMetadata(StringData checkpointMetadata) {}
/**
* Configures the storage engine as the leader, allowing it to flush checkpoints to remote
* storage.
*/
virtual void promoteToLeader() {}
/** /**
* See `StorageEngine::setStableTimestamp` * See `StorageEngine::setStableTimestamp`
*/ */

View File

@ -47,6 +47,7 @@
#include "mongo/db/namespace_string.h" #include "mongo/db/namespace_string.h"
#include "mongo/db/operation_context.h" #include "mongo/db/operation_context.h"
#include "mongo/db/record_id.h" #include "mongo/db/record_id.h"
#include "mongo/db/rss/replicated_storage_service.h"
#include "mongo/db/service_context_test_fixture.h" #include "mongo/db/service_context_test_fixture.h"
#include "mongo/db/storage/key_format.h" #include "mongo/db/storage/key_format.h"
#include "mongo/db/storage/key_string/key_string.h" #include "mongo/db/storage/key_string/key_string.h"
@ -116,8 +117,10 @@ protected:
auto clientAndCtx = makeClientAndCtx("opCtx"); auto clientAndCtx = makeClientAndCtx("opCtx");
auto opCtx = clientAndCtx.opCtx(); auto opCtx = clientAndCtx.opCtx();
KVEngine* engine = helper->getEngine(); KVEngine* engine = helper->getEngine();
auto& provider = rss::ReplicatedStorageService::get(opCtx).getPersistenceProvider();
ASSERT_OK( ASSERT_OK(
engine->createRecordStore(NamespaceString::createNamespaceString_forTest("catalog"), engine->createRecordStore(provider,
NamespaceString::createNamespaceString_forTest("catalog"),
"collection-catalog", "collection-catalog",
RecordStore::Options{})); RecordStore::Options{}));
@ -231,7 +234,8 @@ protected:
const RecordStore::Options& recordStoreOptions, const RecordStore::Options& recordStoreOptions,
boost::optional<UUID> uuid) { boost::optional<UUID> uuid) {
auto opCtx = _makeOperationContext(engine); auto opCtx = _makeOperationContext(engine);
ASSERT_OK(engine->createRecordStore(nss, ident, recordStoreOptions)); auto& provider = rss::ReplicatedStorageService::get(opCtx.get()).getPersistenceProvider();
ASSERT_OK(engine->createRecordStore(provider, nss, ident, recordStoreOptions));
auto rs = engine->getRecordStore(opCtx.get(), nss, ident, recordStoreOptions, uuid); auto rs = engine->getRecordStore(opCtx.get(), nss, ident, recordStoreOptions, uuid);
ASSERT(rs); ASSERT(rs);
return rs; return rs;
@ -348,8 +352,14 @@ TEST_F(KVEngineTestHarness, SimpleSorted1) {
{ {
auto opCtx = _makeOperationContext(engine); auto opCtx = _makeOperationContext(engine);
auto& ru = *shard_role_details::getRecoveryUnit(opCtx.get()); auto& ru = *shard_role_details::getRecoveryUnit(opCtx.get());
ASSERT_OK(engine->createSortedDataInterface( auto& provider = rss::ReplicatedStorageService::get(opCtx.get()).getPersistenceProvider();
ru, kNss, kUUID, kIdent, config, boost::none /* storageEngineIndexOptions */)); ASSERT_OK(engine->createSortedDataInterface(provider,
ru,
kNss,
kUUID,
kIdent,
config,
boost::none /* storageEngineIndexOptions */));
sorted = engine->getSortedDataInterface( sorted = engine->getSortedDataInterface(
opCtx.get(), ru, kNss, kUUID, kIdent, config, kRecordStoreOptions.keyFormat); opCtx.get(), ru, kNss, kUUID, kIdent, config, kRecordStoreOptions.keyFormat);
ASSERT(sorted); ASSERT(sorted);

View File

@ -39,6 +39,7 @@
#include "mongo/db/namespace_string.h" #include "mongo/db/namespace_string.h"
#include "mongo/db/operation_context.h" #include "mongo/db/operation_context.h"
#include "mongo/db/record_id.h" #include "mongo/db/record_id.h"
#include "mongo/db/rss/replicated_storage_service.h"
#include "mongo/db/service_context.h" #include "mongo/db/service_context.h"
#include "mongo/db/service_context_test_fixture.h" #include "mongo/db/service_context_test_fixture.h"
#include "mongo/db/storage/kv/kv_engine.h" #include "mongo/db/storage/kv/kv_engine.h"
@ -237,7 +238,9 @@ public:
const auto nss = NamespaceString::createNamespaceString_forTest("a.b"); const auto nss = NamespaceString::createNamespaceString_forTest("a.b");
const auto ident = "collection-ident"; const auto ident = "collection-ident";
RecordStore::Options options; RecordStore::Options options;
ASSERT_OK(engine->createRecordStore(nss, ident, options)); auto& provider =
rss::ReplicatedStorageService::get(getGlobalServiceContext()).getPersistenceProvider();
ASSERT_OK(engine->createRecordStore(provider, nss, ident, options));
rs = engine->getRecordStore(op, nss, ident, options, UUID::gen()); rs = engine->getRecordStore(op, nss, ident, options, UUID::gen());
ASSERT(rs); ASSERT(rs);
} }

View File

@ -34,6 +34,7 @@
#include "mongo/db/namespace_string.h" #include "mongo/db/namespace_string.h"
#include "mongo/db/operation_context.h" #include "mongo/db/operation_context.h"
#include "mongo/db/record_id.h" #include "mongo/db/record_id.h"
#include "mongo/db/rss/replicated_storage_service.h"
#include "mongo/db/storage/feature_document_util.h" #include "mongo/db/storage/feature_document_util.h"
#include "mongo/db/storage/kv/kv_engine.h" #include "mongo/db/storage/kv/kv_engine.h"
#include "mongo/db/storage/record_store.h" #include "mongo/db/storage/record_store.h"
@ -242,7 +243,9 @@ StatusWith<std::unique_ptr<RecordStore>> MDBCatalog::createRecordStoreForEntry(
const MDBCatalog::EntryIdentifier& entry, const MDBCatalog::EntryIdentifier& entry,
const boost::optional<UUID>& uuid, const boost::optional<UUID>& uuid,
const RecordStore::Options& recordStoreOptions) { const RecordStore::Options& recordStoreOptions) {
Status status = _engine->createRecordStore(entry.nss, entry.ident, recordStoreOptions); auto& provider = rss::ReplicatedStorageService::get(opCtx).getPersistenceProvider();
Status status =
_engine->createRecordStore(provider, entry.nss, entry.ident, recordStoreOptions);
if (!status.isOK()) { if (!status.isOK()) {
return status; return status;
} }

View File

@ -715,6 +715,26 @@ public:
*/ */
virtual boost::optional<Timestamp> getLastStableRecoveryTimestamp() const = 0; virtual boost::optional<Timestamp> getLastStableRecoveryTimestamp() const = 0;
/**
* Sets the last materialized LSN, marking the highest phylog LSN
* that has been successfully written to the page server and should have no holes.
*
* TODO: Revisit how to handle cases where mongod speaks with a log server
* in a non-local zone due to failover.
*/
virtual void setLastMaterializedLsn(uint64_t lsn) = 0;
/**
* Configures the specified checkpoint as the starting point for recovery.
*/
virtual void setRecoveryCheckpointMetadata(StringData checkpointMetadata) = 0;
/**
* Configures the storage engine as the leader, allowing it to flush checkpoints to remote
* storage.
*/
virtual void promoteToLeader() = 0;
/** /**
* Sets the highest timestamp at which the storage engine is allowed to take a checkpoint. This * Sets the highest timestamp at which the storage engine is allowed to take a checkpoint. This
* timestamp must not decrease unless force=true is set, in which case we force the stable * timestamp must not decrease unless force=true is set, in which case we force the stable

View File

@ -38,6 +38,7 @@
#include "mongo/db/local_catalog/catalog_raii.h" #include "mongo/db/local_catalog/catalog_raii.h"
#include "mongo/db/local_catalog/shard_role_api/transaction_resources.h" #include "mongo/db/local_catalog/shard_role_api/transaction_resources.h"
#include "mongo/db/operation_context.h" #include "mongo/db/operation_context.h"
#include "mongo/db/rss/replicated_storage_service.h"
#include "mongo/db/storage/backup_cursor_hooks.h" #include "mongo/db/storage/backup_cursor_hooks.h"
#include "mongo/db/storage/deferred_drop_record_store.h" #include "mongo/db/storage/deferred_drop_record_store.h"
#include "mongo/db/storage/disk_space_monitor.h" #include "mongo/db/storage/disk_space_monitor.h"
@ -130,6 +131,15 @@ StorageEngineImpl::StorageEngineImpl(OperationContext* opCtx,
invariant(prevRecoveryUnit->isNoop()); invariant(prevRecoveryUnit->isNoop());
shard_role_details::setRecoveryUnit( shard_role_details::setRecoveryUnit(
opCtx, _engine->newRecoveryUnit(), WriteUnitOfWork::RecoveryUnitState::kNotInUnitOfWork); opCtx, _engine->newRecoveryUnit(), WriteUnitOfWork::RecoveryUnitState::kNotInUnitOfWork);
auto& rss = rss::ReplicatedStorageService::get(opCtx->getServiceContext());
if (rss.getPersistenceProvider().shouldDelayDataAccessDuringStartup()) {
LOGV2(10985326,
"Skip loading catalog on startup; it will be handled later when WT loads the "
"checkpoint");
return;
}
// If we throw in this constructor, make sure to destroy the RecoveryUnit instance created above // If we throw in this constructor, make sure to destroy the RecoveryUnit instance created above
// before '_engine' is destroyed. // before '_engine' is destroyed.
ScopeGuard recoveryUnitResetGuard([&] { ScopeGuard recoveryUnitResetGuard([&] {
@ -181,8 +191,9 @@ void StorageEngineImpl::loadMDBCatalog(OperationContext* opCtx,
if (!catalogExists) { if (!catalogExists) {
WriteUnitOfWork uow(opCtx); WriteUnitOfWork uow(opCtx);
auto& provider = rss::ReplicatedStorageService::get(opCtx).getPersistenceProvider();
auto status = _engine->createRecordStore( auto status = _engine->createRecordStore(
kCatalogInfoNamespace, ident::kMbdCatalog, catalogRecordStoreOpts); provider, kCatalogInfoNamespace, ident::kMbdCatalog, catalogRecordStoreOpts);
// BadValue is usually caused by invalid configuration string. // BadValue is usually caused by invalid configuration string.
// We still fassert() but without a stack trace. // We still fassert() but without a stack trace.
@ -422,8 +433,9 @@ Status StorageEngineImpl::_recoverOrphanedCollection(OperationContext* opCtx,
WriteUnitOfWork wuow(opCtx); WriteUnitOfWork wuow(opCtx);
const auto recordStoreOptions = const auto recordStoreOptions =
_catalog->getParsedRecordStoreOptions(opCtx, catalogId, collectionName); _catalog->getParsedRecordStoreOptions(opCtx, catalogId, collectionName);
Status status = auto& provider = rss::ReplicatedStorageService::get(opCtx).getPersistenceProvider();
_engine->recoverOrphanedIdent(collectionName, collectionIdent, recordStoreOptions); Status status = _engine->recoverOrphanedIdent(
provider, collectionName, collectionIdent, recordStoreOptions);
bool dataModified = status.code() == ErrorCodes::DataModifiedByRepair; bool dataModified = status.code() == ErrorCodes::DataModifiedByRepair;
if (!status.isOK() && !dataModified) { if (!status.isOK() && !dataModified) {
@ -706,6 +718,18 @@ void StorageEngineImpl::setJournalListener(JournalListener* jl) {
_engine->setJournalListener(jl); _engine->setJournalListener(jl);
} }
void StorageEngineImpl::setLastMaterializedLsn(uint64_t lsn) {
_engine->setLastMaterializedLsn(lsn);
}
void StorageEngineImpl::setRecoveryCheckpointMetadata(StringData checkpointMetadata) {
_engine->setRecoveryCheckpointMetadata(checkpointMetadata);
}
void StorageEngineImpl::promoteToLeader() {
_engine->promoteToLeader();
}
void StorageEngineImpl::setStableTimestamp(Timestamp stableTimestamp, bool force) { void StorageEngineImpl::setStableTimestamp(Timestamp stableTimestamp, bool force) {
_engine->setStableTimestamp(stableTimestamp, force); _engine->setStableTimestamp(stableTimestamp, force);
} }

View File

@ -144,6 +144,12 @@ public:
void cleanShutdown(ServiceContext* svcCtx, bool memLeakAllowed) override; void cleanShutdown(ServiceContext* svcCtx, bool memLeakAllowed) override;
void setLastMaterializedLsn(uint64_t lsn) override;
void setRecoveryCheckpointMetadata(StringData checkpointMetadata) override;
void promoteToLeader() override;
void setStableTimestamp(Timestamp stableTimestamp, bool force = false) override; void setStableTimestamp(Timestamp stableTimestamp, bool force = false) override;
Timestamp getStableTimestamp() const override; Timestamp getStableTimestamp() const override;

View File

@ -129,7 +129,14 @@ public:
boost::optional<Timestamp> getLastStableRecoveryTimestamp() const final { boost::optional<Timestamp> getLastStableRecoveryTimestamp() const final {
MONGO_UNREACHABLE; MONGO_UNREACHABLE;
} }
void setStableTimestamp(Timestamp stableTimestamp, bool force = false) final {}
void setLastMaterializedLsn(uint64_t lsn) final {}
void setRecoveryCheckpointMetadata(StringData checkpointMetadata) final {}
void promoteToLeader() final {}
void setStableTimestamp(Timestamp stableTimestamp, bool force = false) override {}
Timestamp getStableTimestamp() const override { Timestamp getStableTimestamp() const override {
return Timestamp(); return Timestamp();
} }

View File

@ -36,6 +36,7 @@
#include "mongo/db/local_catalog/durable_catalog.h" #include "mongo/db/local_catalog/durable_catalog.h"
#include "mongo/db/local_catalog/shard_role_api/transaction_resources.h" #include "mongo/db/local_catalog/shard_role_api/transaction_resources.h"
#include "mongo/db/repl/storage_interface_impl.h" #include "mongo/db/repl/storage_interface_impl.h"
#include "mongo/db/rss/replicated_storage_service.h"
#include "mongo/db/service_context_d_test_fixture.h" #include "mongo/db/service_context_d_test_fixture.h"
#include "mongo/db/storage/kv/kv_engine.h" #include "mongo/db/storage/kv/kv_engine.h"
#include "mongo/db/storage/mdb_catalog.h" #include "mongo/db/storage/mdb_catalog.h"
@ -117,8 +118,9 @@ public:
*/ */
Status createCollTable(OperationContext* opCtx, NamespaceString collName) { Status createCollTable(OperationContext* opCtx, NamespaceString collName) {
const std::string identName = _storageEngine->generateNewCollectionIdent(collName.dbName()); const std::string identName = _storageEngine->generateNewCollectionIdent(collName.dbName());
auto& provider = rss::ReplicatedStorageService::get(opCtx).getPersistenceProvider();
return _storageEngine->getEngine()->createRecordStore( return _storageEngine->getEngine()->createRecordStore(
collName, identName, RecordStore::Options{}); provider, collName, identName, RecordStore::Options{});
} }
Status dropIndexTable(OperationContext* opCtx, NamespaceString nss, StringData indexName) { Status dropIndexTable(OperationContext* opCtx, NamespaceString nss, StringData indexName) {

View File

@ -45,10 +45,14 @@
namespace mongo { namespace mongo {
StorageGlobalParams::StorageGlobalParams() { StorageGlobalParams::StorageGlobalParams() {
reset(); _reset();
} }
void StorageGlobalParams::reset() { void StorageGlobalParams::reset_forTest() {
_reset();
}
void StorageGlobalParams::_reset() {
engine = "wiredTiger"; engine = "wiredTiger";
engineSetByUser = false; engineSetByUser = false;
dbpath = kDefaultDbPath; dbpath = kDefaultDbPath;
@ -60,7 +64,7 @@ void StorageGlobalParams::reset() {
noTableScan.store(false); noTableScan.store(false);
directoryperdb = false; directoryperdb = false;
syncdelay.store(60.0); syncdelay.store(-1.0);
queryableBackupMode = false; queryableBackupMode = false;
groupCollections = false; groupCollections = false;
oplogMinRetentionHours.store(0.0); oplogMinRetentionHours.store(0.0);

View File

@ -44,7 +44,7 @@ namespace mongo {
struct StorageGlobalParams { struct StorageGlobalParams {
StorageGlobalParams(); StorageGlobalParams();
void reset(); void reset_forTest();
// Returns the directory path used by the spill storage engine to store spilled data. // Returns the directory path used by the spill storage engine to store spilled data.
std::string getSpillDbPath() const; std::string getSpillDbPath() const;
@ -109,13 +109,14 @@ struct StorageGlobalParams {
// --syncdelay // --syncdelay
// Delay in seconds between triggering the next checkpoint after the completion of the previous // Delay in seconds between triggering the next checkpoint after the completion of the previous
// one. A value of 0 indicates that checkpointing will be skipped. // one. A value of 0 indicates that checkpointing will be skipped. A value <0
// will result in using the default value for the configured persistence provider.
// Do not set this value on production systems. // Do not set this value on production systems.
// In almost every situation, you should use the default setting. // In almost every situation, you should use the default setting.
// This parameter is both a server parameter and a configuration parameter, and to resolve // This parameter is both a server parameter and a configuration parameter, and to resolve
// conflicts between the two the default must be set here. // conflicts between the two, a default sentinel (<0) must be set here.
static constexpr double kMaxSyncdelaySecs = 60 * 60; // 1hr static constexpr double kMaxSyncdelaySecs = 60 * 60; // 1hr
AtomicWord<double> syncdelay{60.0}; // seconds between checkpoints AtomicWord<double> syncdelay{-1.0}; // seconds between checkpoints
// --queryableBackupMode // --queryableBackupMode
// Prevents user-originating operations from performing writes to the server. Internally // Prevents user-originating operations from performing writes to the server. Internally
@ -139,6 +140,9 @@ struct StorageGlobalParams {
// Test-only option. Disables table logging. // Test-only option. Disables table logging.
bool forceDisableTableLogging = false; bool forceDisableTableLogging = false;
private:
void _reset();
}; };
extern StorageGlobalParams storageGlobalParams; extern StorageGlobalParams storageGlobalParams;

View File

@ -112,6 +112,7 @@ mongo_cc_library(
"//src/mongo/db:server_base", "//src/mongo/db:server_base",
"//src/mongo/db:server_feature_flags", "//src/mongo/db:server_feature_flags",
"//src/mongo/db:service_context", "//src/mongo/db:service_context",
"//src/mongo/db/rss:replicated_storage_service",
"//src/mongo/db/storage:container", "//src/mongo/db/storage:container",
"//src/mongo/db/storage:exceptions", "//src/mongo/db/storage:exceptions",
"//src/mongo/db/storage:execution_context", "//src/mongo/db/storage:execution_context",
@ -173,6 +174,7 @@ mongo_cc_library(
deps = [ deps = [
":storage_wiredtiger_core", ":storage_wiredtiger_core",
"//src/mongo/db:service_context_test_fixture", "//src/mongo/db:service_context_test_fixture",
"//src/mongo/db/rss:persistence_provider_impl",
"//src/mongo/db/storage:record_store_test_harness", "//src/mongo/db/storage:record_store_test_harness",
"//src/mongo/util:clock_source_mock", "//src/mongo/util:clock_source_mock",
], ],
@ -228,6 +230,7 @@ mongo_cc_unit_test(
"//src/mongo/db/storage:storage_options", "//src/mongo/db/storage:storage_options",
"//src/mongo/db/storage/kv:kv_engine_test_harness", "//src/mongo/db/storage/kv:kv_engine_test_harness",
"//src/mongo/idl:server_parameter_test_controller", "//src/mongo/idl:server_parameter_test_controller",
"//src/mongo/idl:server_parameter_test_util",
"//src/mongo/util:clock_source_mock", "//src/mongo/util:clock_source_mock",
], ],
) )

View File

@ -69,7 +69,7 @@ SpillWiredTigerKVEngine::SpillWiredTigerKVEngine(const std::string& canonicalNam
} }
std::string config = std::string config =
generateWTOpenConfigString(_wtConfig, wtExtensions.getOpenExtensionsConfig()); generateWTOpenConfigString(_wtConfig, wtExtensions.getOpenExtensionsConfig(), "");
LOGV2(10158000, "Opening spill WiredTiger", "config"_attr = config); LOGV2(10158000, "Opening spill WiredTiger", "config"_attr = config);
auto startTime = Date_t::now(); auto startTime = Date_t::now();

View File

@ -94,7 +94,8 @@ public:
MONGO_UNREACHABLE; MONGO_UNREACHABLE;
} }
Status createRecordStore(const NamespaceString& nss, Status createRecordStore(const rss::PersistenceProvider&,
const NamespaceString& nss,
StringData ident, StringData ident,
const RecordStore::Options& options) override { const RecordStore::Options& options) override {
MONGO_UNREACHABLE; MONGO_UNREACHABLE;
@ -126,6 +127,7 @@ public:
} }
Status createSortedDataInterface( Status createSortedDataInterface(
const rss::PersistenceProvider&,
RecoveryUnit&, RecoveryUnit&,
const NamespaceString& nss, const NamespaceString& nss,
const UUID& uuid, const UUID& uuid,

View File

@ -32,10 +32,12 @@
#include "mongo/base/init.h" // IWYU pragma: keep #include "mongo/base/init.h" // IWYU pragma: keep
#include "mongo/base/string_data.h" #include "mongo/base/string_data.h"
#include "mongo/db/rss/replicated_storage_service.h"
#include "mongo/db/service_context.h" #include "mongo/db/service_context.h"
#include "mongo/db/service_context_test_fixture.h" #include "mongo/db/service_context_test_fixture.h"
#include "mongo/db/storage/wiredtiger/wiredtiger_extensions.h" #include "mongo/db/storage/wiredtiger/wiredtiger_extensions.h"
#include "mongo/db/storage/wiredtiger/wiredtiger_global_options_gen.h" #include "mongo/db/storage/wiredtiger/wiredtiger_global_options_gen.h"
#include "mongo/idl/server_parameter_test_util.h"
#include "mongo/unittest/temp_dir.h" #include "mongo/unittest/temp_dir.h"
#include "mongo/unittest/unittest.h" #include "mongo/unittest/unittest.h"
#include "mongo/util/clock_source_mock.h" #include "mongo/util/clock_source_mock.h"

View File

@ -34,6 +34,7 @@
#include "mongo/db/service_context.h" #include "mongo/db/service_context.h"
#include "mongo/util/assert_util.h" #include "mongo/util/assert_util.h"
#include "mongo/util/decorable.h" #include "mongo/util/decorable.h"
#include "mongo/util/str.h"
#include <memory> #include <memory>
#include <utility> #include <utility>
@ -49,8 +50,33 @@ ServiceContext::ConstructorActionRegisterer setWiredTigerCustomizationHooks{
const auto getCustomizationHooks = const auto getCustomizationHooks =
ServiceContext::declareDecoration<std::unique_ptr<WiredTigerCustomizationHooks>>(); ServiceContext::declareDecoration<std::unique_ptr<WiredTigerCustomizationHooks>>();
const auto getWiredTigerCustomizationHooksRegistry =
ServiceContext::declareDecoration<WiredTigerCustomizationHooksRegistry>();
} // namespace } // namespace
WiredTigerCustomizationHooksRegistry& WiredTigerCustomizationHooksRegistry::get(
ServiceContext* service) {
return getWiredTigerCustomizationHooksRegistry(service);
}
void WiredTigerCustomizationHooksRegistry::addHook(
std::unique_ptr<WiredTigerCustomizationHooks> custHook) {
invariant(custHook);
_hooks.push_back(std::move(custHook));
}
std::string WiredTigerCustomizationHooksRegistry::getTableCreateConfig(StringData tableName) const {
str::stream config;
for (const auto& h : _hooks) {
config << h->getTableCreateConfig(tableName);
}
return config;
}
void WiredTigerCustomizationHooks::set(ServiceContext* service, void WiredTigerCustomizationHooks::set(ServiceContext* service,
std::unique_ptr<WiredTigerCustomizationHooks> customHooks) { std::unique_ptr<WiredTigerCustomizationHooks> customHooks) {
auto& hooks = getCustomizationHooks(service); auto& hooks = getCustomizationHooks(service);

View File

@ -31,6 +31,7 @@
#include <memory> #include <memory>
#include <string> #include <string>
#include <vector>
namespace mongo { namespace mongo {
class StringData; class StringData;
@ -58,4 +59,27 @@ public:
virtual std::string getTableCreateConfig(StringData tableName); virtual std::string getTableCreateConfig(StringData tableName);
}; };
/**
* Registry to store multiple WiredTiger customization hooks.
*/
class WiredTigerCustomizationHooksRegistry {
public:
static WiredTigerCustomizationHooksRegistry& get(ServiceContext* serviceContext);
/**
* Adds a WiredTiger customization hook to the registry. Multiple hooks can be
* added, and their configurations will be combined.
*/
void addHook(std::unique_ptr<WiredTigerCustomizationHooks> custHook);
/**
* Gets a combined configuration string from all hooks in the registry for
* the provided table name during the `WT_SESSION::create` call.
*/
std::string getTableCreateConfig(StringData tableName) const;
private:
std::vector<std::unique_ptr<WiredTigerCustomizationHooks>> _hooks;
};
} // namespace mongo } // namespace mongo

View File

@ -69,6 +69,7 @@ public:
std::string liveRestoreSource; std::string liveRestoreSource;
int liveRestoreThreads; int liveRestoreThreads;
double liveRestoreReadSizeMB; double liveRestoreReadSizeMB;
int flattenLeafPageDelta;
std::string collectionBlockCompressor; std::string collectionBlockCompressor;
bool useIndexPrefixCompression; bool useIndexPrefixCompression;

View File

@ -126,6 +126,18 @@ server_parameters:
lte: 100 lte: 100
redact: false redact: false
wiredTigerFlattenLeafPageDelta:
description: >-
WiredTiger page read rewrites the leaf pages with deltas to a new disk image if
successful. We will use a ternary where 0=Disabled, 1=Enabled if disaggregatedStorageEnabled is true, 2=Enabled unconditionally.
set_at: startup
cpp_varname: "wiredTigerGlobalOptions.flattenLeafPageDelta"
default: 1
validator:
gte: 0
lte: 2
redact: false
wiredTigerEvictionDirtyTargetGB: wiredTigerEvictionDirtyTargetGB:
description: >- description: >-
Absolute dirty cache eviction target. Once eviction begins, Absolute dirty cache eviction target. Once eviction begins,

Some files were not shown because too many files have changed in this diff Show More