SERVER-91077 Update FLE Range sparsity/trimFactor defaults (#24158)

GitOrigin-RevId: 7d11c46616c032f426b0635c180374bd7d8c346b
2024-07-05 14:26:51 -05:00 · 2024-07-05 14:26:51 -05:00 · 755b06ff50
parent 936cca77e5
commit 755b06ff50
7 changed files with 60 additions and 46 deletions
--- a/src/mongo/crypto/encryption_fields_validation.cpp
+++ b/src/mongo/crypto/encryption_fields_validation.cpp
@ -167,11 +167,6 @@ uint32_t getNumberOfBitsInDomain(const boost::optional<Decimal128>& min,
    }
 }

-std::int64_t getRangeSparsityDefault() {
-    // TODO: SERVER-91077 change to a more suitable default value
-    return 1;
-}
-
 std::pair<mongo::Value, mongo::Value> getRangeMinMaxDefaults(BSONType fieldType) {
    switch (fieldType) {
        case NumberDouble:
@ -470,10 +465,9 @@ void setRangeDefaults(BSONType fieldType, StringData fieldPath, QueryTypeConfig*
    validateRangeIndex(fieldType, fieldPath, query);

    auto [defMin, defMax] = getRangeMinMaxDefaults(fieldType);
-    auto defSparsity = getRangeSparsityDefault();
    query.setMin(query.getMin().value_or(defMin));
    query.setMax(query.getMax().value_or(defMax));
-    query.setSparsity(query.getSparsity().value_or(defSparsity));
+    query.setSparsity(query.getSparsity().value_or(kFLERangeSparsityDefault));
 }

 }  // namespace mongo
--- a/src/mongo/crypto/encryption_fields_validation.h
+++ b/src/mongo/crypto/encryption_fields_validation.h
@ -41,8 +41,8 @@ class EncryptedField;
 class EncryptedFieldConfig;
 class QueryTypeConfig;

-constexpr int kFLERangeSparsityDefault = 1;
-constexpr int kFLERangeTrimFactorDefault = 0;
+constexpr int kFLERangeSparsityDefault = 2;
+constexpr int kFLERangeTrimFactorDefault = 6;

 /*
 * Value: Value to attempt to coerce to field's type.
--- a/src/mongo/crypto/fle_crypto.cpp
+++ b/src/mongo/crypto/fle_crypto.cpp
@ -89,6 +89,7 @@ extern "C" {
 #include "mongo/crypto/aead_encryption.h"
 #include "mongo/crypto/encryption_fields_gen.h"
 #include "mongo/crypto/encryption_fields_util.h"
+#include "mongo/crypto/encryption_fields_validation.h"
 #include "mongo/crypto/fle_crypto_predicate.h"
 #include "mongo/crypto/fle_data_frames.h"
 #include "mongo/crypto/fle_field_schema_gen.h"
@ -931,7 +932,7 @@ std::unique_ptr<Edges> getEdges(FLE2RangeInsertSpec spec, int sparsity) {
    auto element = spec.getValue().getElement();
    auto minBound = spec.getMinBound().map([](IDLAnyType m) { return m.getElement(); });
    auto maxBound = spec.getMaxBound().map([](IDLAnyType m) { return m.getElement(); });
-    auto trimFactor = spec.getTrimFactor() ? spec.getTrimFactor().value() : 0;
+    auto trimFactor = spec.getTrimFactor();

    switch (element.type()) {
        case BSONType::NumberInt:
@ -4322,8 +4323,20 @@ bool EncryptedPredicateEvaluatorV2::evaluate(

 // Edges

-Edges::Edges(std::string leaf, int sparsity, int trimFactor)
-    : _leaf(std::move(leaf)), _sparsity(sparsity), _trimFactor(trimFactor) {
+namespace {
+int resolveTrimFactorDefault(int maxlen, const boost::optional<int>& optTrimFactor) {
+    if (optTrimFactor) {
+        return *optTrimFactor;
+    }
+
+    return std::clamp(kFLERangeTrimFactorDefault, 0, maxlen - 1);
+}
+}  // namespace
+
+Edges::Edges(std::string leaf, int sparsity, const boost::optional<int>& optTrimFactor)
+    : _leaf(std::move(leaf)),
+      _sparsity(sparsity),
+      _trimFactor(resolveTrimFactorDefault(_leaf.length(), optTrimFactor)) {
    uassert(6775101, "sparsity must be 1 or larger", _sparsity > 0);
    dassert(std::all_of(_leaf.begin(), _leaf.end(), [](char c) { return c == '1' || c == '0'; }));
    uassert(8574105,
@ -4370,7 +4383,8 @@ std::size_t Edges::size() const {
 }

 template <typename T>
-std::unique_ptr<Edges> getEdgesT(T value, T min, T max, int sparsity, int trimFactor) {
+std::unique_ptr<Edges> getEdgesT(
+    T value, T min, T max, int sparsity, const boost::optional<int>& trimFactor) {
    static_assert(!std::numeric_limits<T>::is_signed);
    static_assert(std::numeric_limits<T>::is_integer);

@ -4388,7 +4402,7 @@ std::unique_ptr<Edges> getEdgesInt32(int32_t value,
                                     boost::optional<int32_t> min,
                                     boost::optional<int32_t> max,
                                     int sparsity,
-                                     int trimFactor) {
+                                     const boost::optional<int>& trimFactor) {
    auto aost = getTypeInfo32(value, min, max);
    return getEdgesT(aost.value, aost.min, aost.max, sparsity, trimFactor);
 }
@ -4397,7 +4411,7 @@ std::unique_ptr<Edges> getEdgesInt64(int64_t value,
                                     boost::optional<int64_t> min,
                                     boost::optional<int64_t> max,
                                     int sparsity,
-                                     int trimFactor) {
+                                     const boost::optional<int>& trimFactor) {
    auto aost = getTypeInfo64(value, min, max);
    return getEdgesT(aost.value, aost.min, aost.max, sparsity, trimFactor);
 }
@ -4407,7 +4421,7 @@ std::unique_ptr<Edges> getEdgesDouble(double value,
                                      boost::optional<double> max,
                                      boost::optional<uint32_t> precision,
                                      int sparsity,
-                                      int trimFactor) {
+                                      const boost::optional<int>& trimFactor) {
    auto aost = getTypeInfoDouble(value, min, max, precision);
    return getEdgesT(aost.value, aost.min, aost.max, sparsity, trimFactor);
 }
@ -4417,7 +4431,7 @@ std::unique_ptr<Edges> getEdgesDecimal128(Decimal128 value,
                                          boost::optional<Decimal128> max,
                                          boost::optional<uint32_t> precision,
                                          int sparsity,
-                                          int trimFactor) {
+                                          const boost::optional<int>& trimFactor) {
    auto aost = getTypeInfoDecimal128(value, min, max, precision);
    return getEdgesT(aost.value, aost.min, aost.max, sparsity, trimFactor);
 }
@ -4427,7 +4441,7 @@ std::uint64_t getEdgesLength(BSONType fieldType, StringData fieldPath, QueryType
    setRangeDefaults(fieldType, fieldPath, &config);

    const auto sparsity = *config.getSparsity();
-    const auto trimFactor = config.getTrimFactor().get_value_or(0);
+    const auto trimFactor = config.getTrimFactor();
    auto precision = config.getPrecision().map(
        [](auto signedInt) -> uint32_t { return static_cast<uint32_t>(signedInt); });

@ -4473,7 +4487,7 @@ template <typename T>
 class MinCoverGenerator {
 public:
    static std::vector<std::string> minCover(
-        T lowerBound, T upperBound, T max, int sparsity, int trimFactor) {
+        T lowerBound, T upperBound, T max, int sparsity, const boost::optional<int>& trimFactor) {
        MinCoverGenerator<T> mcg(lowerBound, upperBound, max, sparsity, trimFactor);
        std::vector<std::string> c;
        mcg.minCoverRec(c, 0, mcg._maxlen);
@ -4481,12 +4495,13 @@ public:
    }

 private:
-    MinCoverGenerator(T lowerBound, T upperBound, T max, int sparsity, int trimFactor)
+    MinCoverGenerator(
+        T lowerBound, T upperBound, T max, int sparsity, const boost::optional<int>& optTrimFactor)
        : _lowerBound(lowerBound),
          _upperBound(upperBound),
          _sparsity(sparsity),
-          _trimFactor(trimFactor),
-          _maxlen(getFirstBitSet(max)) {
+          _maxlen(getFirstBitSet(max)),
+          _trimFactor(resolveTrimFactorDefault(_maxlen, optTrimFactor)) {
        static_assert(!std::numeric_limits<T>::is_signed);
        static_assert(std::numeric_limits<T>::is_integer);
        tassert(6860001,
@ -4496,7 +4511,7 @@ private:
        uassert(8574106,
                "Trim factor must be >= 0 and less than the number of bits used to represent an "
                "element of the domain",
-                trimFactor >= 0 && (trimFactor == 0 || trimFactor < _maxlen));
+                _trimFactor >= 0 && (_trimFactor == 0 || _trimFactor < _maxlen));
    }

    // Generate and apply a mask to an integer, filling masked bits with 1;
@ -4560,13 +4575,17 @@ private:
    T _lowerBound;
    T _upperBound;
    int _sparsity;
-    int _trimFactor;
    int _maxlen;
+    int _trimFactor;
 };

 template <typename T>
-std::vector<std::string> minCover(
-    T lowerBound, T upperBound, T min, T max, int sparsity, int trimFactor) {
+std::vector<std::string> minCover(T lowerBound,
+                                  T upperBound,
+                                  T min,
+                                  T max,
+                                  int sparsity,
+                                  const boost::optional<int>& trimFactor) {
    dassert(0 == min);
    return MinCoverGenerator<T>::minCover(lowerBound, upperBound, max, sparsity, trimFactor);
 }
@ -4604,7 +4623,7 @@ std::vector<std::string> minCoverInt32(int32_t lowerBound,
                                       boost::optional<int32_t> min,
                                       boost::optional<int32_t> max,
                                       int sparsity,
-                                       int trimFactor) {
+                                       const boost::optional<int>& trimFactor) {
    auto a = getTypeInfo32(lowerBound, min, max);
    auto b = getTypeInfo32(upperBound, min, max);
    dassert(a.min == b.min);
@ -4623,7 +4642,7 @@ std::vector<std::string> minCoverInt64(int64_t lowerBound,
                                       boost::optional<int64_t> min,
                                       boost::optional<int64_t> max,
                                       int sparsity,
-                                       int trimFactor) {
+                                       const boost::optional<int>& trimFactor) {
    auto a = getTypeInfo64(lowerBound, min, max);
    auto b = getTypeInfo64(upperBound, min, max);
    dassert(a.min == b.min);
@ -4643,7 +4662,7 @@ std::vector<std::string> minCoverDouble(double lowerBound,
                                        boost::optional<double> max,
                                        boost::optional<uint32_t> precision,
                                        int sparsity,
-                                        int trimFactor) {
+                                        const boost::optional<int>& trimFactor) {
    auto a = getTypeInfoDouble(lowerBound, min, max, precision);
    auto b = getTypeInfoDouble(upperBound, min, max, precision);
    dassert(a.min == b.min);
@ -4662,7 +4681,7 @@ std::vector<std::string> minCoverDecimal128(Decimal128 lowerBound,
                                            boost::optional<Decimal128> max,
                                            boost::optional<uint32_t> precision,
                                            int sparsity,
-                                            int trimFactor) {
+                                            const boost::optional<int>& trimFactor) {
    auto a = getTypeInfoDecimal128(lowerBound, min, max, precision);
    auto b = getTypeInfoDecimal128(upperBound, min, max, precision);
    dassert(a.min == b.min);
--- a/src/mongo/crypto/fle_crypto.h
+++ b/src/mongo/crypto/fle_crypto.h
@ -1503,7 +1503,7 @@ struct ParsedFindRangePayload {

 class Edges {
 public:
-    Edges(std::string leaf, int sparsity, int trimFactor);
+    Edges(std::string leaf, int sparsity, const boost::optional<int>& trimFactor);
    std::vector<StringData> get();
    std::size_t size() const;
    const std::string& getLeaf() const {
@ -1520,27 +1520,27 @@ std::unique_ptr<Edges> getEdgesInt32(int32_t value,
                                     boost::optional<int32_t> min,
                                     boost::optional<int32_t> max,
                                     int sparsity,
-                                     int trimFactor);
+                                     const boost::optional<int>& trimFactor);

 std::unique_ptr<Edges> getEdgesInt64(int64_t value,
                                     boost::optional<int64_t> min,
                                     boost::optional<int64_t> max,
                                     int sparsity,
-                                     int trimFactor);
+                                     const boost::optional<int>& trimFactor);

 std::unique_ptr<Edges> getEdgesDouble(double value,
                                      boost::optional<double> min,
                                      boost::optional<double> max,
                                      boost::optional<uint32_t> precision,
                                      int sparsity,
-                                      int trimFactor);
+                                      const boost::optional<int>& trimFactor);

 std::unique_ptr<Edges> getEdgesDecimal128(Decimal128 value,
                                          boost::optional<Decimal128> min,
                                          boost::optional<Decimal128> max,
                                          boost::optional<uint32_t> precision,
                                          int sparsity,
-                                          int trimFactor);
+                                          const boost::optional<int>& trimFactor);

 // Equivalent to a full edges calculation without creating an intemediate vector.
 // getEdgesT(min, min, max, precision, sparsity, trimFactor).size()
@ -1557,7 +1557,7 @@ std::vector<std::string> minCoverInt32(int32_t lowerBound,
                                       boost::optional<int32_t> min,
                                       boost::optional<int32_t> max,
                                       int sparsity,
-                                       int trimFactor);
+                                       const boost::optional<int>& trimFactor);

 std::vector<std::string> minCoverInt64(int64_t lowerBound,
                                       bool includeLowerBound,
@ -1566,7 +1566,7 @@ std::vector<std::string> minCoverInt64(int64_t lowerBound,
                                       boost::optional<int64_t> min,
                                       boost::optional<int64_t> max,
                                       int sparsity,
-                                       int trimFactor);
+                                       const boost::optional<int>& trimFactor);

 std::vector<std::string> minCoverDouble(double lowerBound,
                                        bool includeLowerBound,
@ -1576,7 +1576,7 @@ std::vector<std::string> minCoverDouble(double lowerBound,
                                        boost::optional<double> max,
                                        boost::optional<uint32_t> precision,
                                        int sparsity,
-                                        int trimFactor);
+                                        const boost::optional<int>& trimFactor);

 std::vector<std::string> minCoverDecimal128(Decimal128 lowerBound,
                                            bool includeLowerBound,
@ -1586,7 +1586,7 @@ std::vector<std::string> minCoverDecimal128(Decimal128 lowerBound,
                                            boost::optional<Decimal128> max,
                                            boost::optional<uint32_t> precision,
                                            int sparsity,
-                                            int trimFactor);
+                                            const boost::optional<int>& trimFactor);

 class FLEUtil {
 public:
--- a/src/mongo/crypto/fle_crypto_test.cpp
+++ b/src/mongo/crypto/fle_crypto_test.cpp
@ -5071,6 +5071,7 @@ public:
        if (precision) {
            config.setPrecision(*precision);
        }
+        config.setTrimFactor(0);
        return config;
    }

--- a/src/mongo/db/commands/fle_compact_test.cpp
+++ b/src/mongo/db/commands/fle_compact_test.cpp
@ -565,6 +565,7 @@ QueryTypeConfig generateQueryTypeConfigForTest(const T& min,
        config.setPrecision(precision.get());
    }
    config.setSparsity(sparsity);
+    config.setTrimFactor(0);

    return config;
 }
--- a/src/mongo/db/fle_crud_test.cpp
+++ b/src/mongo/db/fle_crud_test.cpp
@ -455,7 +455,7 @@ EncryptedFieldConfig getTestEncryptedFieldConfig(
                        ,
            "path": "encrypted",
            "bsonType": "int",
-            "queries": {"queryType": "range", "min": 0, "max": 15, "sparsity": 1}
+            "queries": {"queryType": "range", "min": 0, "max": 15, "sparsity": 1, "trimFactor": 0}

        }
    ]
@ -482,7 +482,7 @@ void parseEncryptedInvalidFieldConfig(StringData esc, StringData ecoc) {
                            ,
                "path": "encrypted",
                "bsonType": "int",
-                "queries": {"queryType": "range", "min": 0, "max": 15, "sparsity": 1}
+                "queries": {"queryType": "range", "min": 0, "max": 15, "sparsity": 1, "trimFactor": 0}

            }
        ]
@ -553,6 +553,7 @@ BSONObj generateFLE2RangeInsertSpec(BSONElement value) {
    auto upperDoc = BSON("ub" << 15);

    spec.setMaxBound(boost::optional<IDLAnyType>(upperDoc.firstElement()));
+    spec.setTrimFactor(0);
    auto specDoc = BSON("s" << spec.toBSON());

    return specDoc;
@ -563,17 +564,15 @@ std::vector<char> generateSinglePlaceholder(BSONElement value,
                                            Fle2AlgorithmInt alg = Fle2AlgorithmInt::kEquality,
                                            int64_t cm = 0) {
    FLE2EncryptionPlaceholder ep;
-
-    // Has to be generated outside of if statements to root the
-    // value until ep is finalized as an object.
-    BSONObj temp = generateFLE2RangeInsertSpec(value);
-
    ep.setAlgorithm(alg);
    ep.setUserKeyId(userKeyId);
    ep.setIndexKeyId(indexKeyId);
    ep.setType(mongo::Fle2PlaceholderType::kInsert);

+    // Keep definition outside of conditional to keep it alive until serialization.
+    BSONObj temp;
    if (alg == Fle2AlgorithmInt::kRange) {
+        temp = generateFLE2RangeInsertSpec(value);
        ep.setValue(temp.firstElement());
        ep.setSparsity(1);
    } else {