mongo/jstests/aggregation/expressions/convert_bindata_vector.js

432 lines
16 KiB
JavaScript

/**
* Tests behavior of BinData $convert array.
* @tags: [
* # BinData $convert array was added in v8.3.
* requires_fcv_83,
* featureFlagConvertBinDataVectors,
* ]
*/
// Specification is taken from
// https://github.com/mongodb/specifications/blob/9d0d3f0042a8cf5faeb47ae7765716151bfca9ef/source/bson-binary-vector/bson-binary-vector.md#data-types-dtypes.
const kBindataVectorSubtype = 9;
const kInt8Byte = "03";
const kFloat32Byte = "27";
const kPackedBitByte = "10";
/**
* Test cases that should succeed in being converted both directions (from bindata vector to BSON
* array and vice versa).
*
* Padding is ignored unless the dtype is kPackedBitByte.
* Special cases (such as internal conversions to different dtypes) are handled by the test
* infrastructure and noted in the test cases below.
*/
let testCases = [
{array_elems: [], padding: 0, dtype: kPackedBitByte},
{array_elems: [127, 7], padding: 0, dtype: kPackedBitByte},
{array_elems: [238, 224], padding: 4, dtype: kPackedBitByte},
{array_elems: [128, 8], padding: 3, dtype: kPackedBitByte},
// Note that the following converts from bson->bindata as an empty array of packed bits.
{array_elems: [], padding: 0, dtype: kInt8Byte},
// Note that the following converts from bson->bindata as an array of 1 packed bit.
{array_elems: [0], padding: 0, dtype: kInt8Byte},
// Note that the following converts from bson->bindata as an array of packed bits.
{array_elems: [0, 1], padding: 0, dtype: kInt8Byte},
{array_elems: [0, 1, 0, 10], padding: 0, dtype: kInt8Byte},
// Note that the following converts from bson->bindata as an array of packed bits.
{array_elems: [0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1], padding: 0, dtype: kInt8Byte},
{array_elems: [2], padding: 0, dtype: kInt8Byte},
{array_elems: [127, 7], padding: 0, dtype: kInt8Byte},
// Note that the following converts from bson->bindata as an empty array of packed bits.
{array_elems: [], padding: 0, dtype: kFloat32Byte},
{array_elems: [0.3], padding: 0, dtype: kFloat32Byte},
{array_elems: [1.2], padding: 0, dtype: kFloat32Byte},
{array_elems: [2.2], padding: 0, dtype: kFloat32Byte},
// Note that the following converts from bson->bindata as an array of int8s.
{array_elems: [127.0, 7.0, -128.0], padding: 0, dtype: kFloat32Byte},
{array_elems: [128.0, 7.0], padding: 0, dtype: kFloat32Byte},
{array_elems: [-129.0, 7.0], padding: 0, dtype: kFloat32Byte},
{array_elems: [-127.7, -7.7], padding: 0, dtype: kFloat32Byte},
{
array_elems: [Number.NEGATIVE_INFINITY, 0.0, Number.POSITIVE_INFINITY],
padding: 0,
dtype: kFloat32Byte,
},
// Big-endian versions of the above test cases.
{array_elems: [], padding: 0, dtype: kFloat32Byte, littleEndian: false},
{array_elems: [0.3], padding: 0, dtype: kFloat32Byte, littleEndian: false},
{array_elems: [1.2], padding: 0, dtype: kFloat32Byte, littleEndian: false},
{array_elems: [2.2], padding: 0, dtype: kFloat32Byte, littleEndian: false},
{array_elems: [127.0, 7.0, -128.0], padding: 0, dtype: kFloat32Byte, littleEndian: false},
{array_elems: [128.0, 7.0], padding: 0, dtype: kFloat32Byte, littleEndian: false},
{array_elems: [-129.0, 7.0], padding: 0, dtype: kFloat32Byte, littleEndian: false},
{array_elems: [-127.7, -7.7], padding: 0, dtype: kFloat32Byte, littleEndian: false},
{
array_elems: [Number.NEGATIVE_INFINITY, 0.0, Number.POSITIVE_INFINITY],
padding: 0,
dtype: kFloat32Byte,
littleEndian: false,
},
// TODO SERVER-106059 Add tests for integers larger than INT8.
];
function int8VectorToBitArray(vector) {
const bitArray = [];
for (const int8 of vector) {
const byte = (int8 < 0 ? 256 + int8 : int8).toString(2).padStart(8, "0");
bitArray.push(...byte.split("").map((bit) => parseInt(bit, 10)));
}
return bitArray;
}
function hexToBitArray(hexString) {
if (hexString.length % 2 !== 0) {
throw new Error("Invalid hex string. Length must be even.");
}
const bitArray = [];
for (let i = 0; i < hexString.length; i += 2) {
const byte = parseInt(hexString.substr(i, 2), 16);
const bits = byte.toString(2).padStart(8, "0");
bitArray.push(...bits.split("").map((bit) => parseInt(bit, 10)));
}
return bitArray;
}
function float32VectorToBitArray(vector, littleEndian = true) {
const bitArray = [];
for (const value of vector) {
const buffer = new ArrayBuffer(4); // 4 bytes for float32
const view = new DataView(buffer);
view.setFloat32(0, value, littleEndian);
for (let i = 0; i < 4; i++) {
const byte = view.getUint8(i);
const bits = byte.toString(2).padStart(8, "0");
bitArray.push(...bits.split("").map((bit) => parseInt(bit, 10)));
}
}
return bitArray;
}
function bitArrayToBoolArray(vector) {
let ret = [];
for (var b of vector) {
ret.push(b == "1");
}
return ret;
}
function bytesToBase64(byteArray) {
const base64chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
let result = "";
let i;
for (i = 0; i < byteArray.length; i += 3) {
let chunk = (byteArray[i] << 16) | ((byteArray[i + 1] || 0) << 8) | (byteArray[i + 2] || 0);
result += base64chars[(chunk >> 18) & 63];
result += base64chars[(chunk >> 12) & 63];
result += i + 1 < byteArray.length ? base64chars[(chunk >> 6) & 63] : "=";
result += i + 2 < byteArray.length ? base64chars[chunk & 63] : "=";
}
return result;
}
function bitArrayToByteArray(bitArray) {
if (!Array.isArray(bitArray) || !bitArray.every((bit) => bit == 0 || bit == 1)) {
throw new Error("Input must be an array of 0s and 1s.");
}
// Pad bit array to multiple of 8
const padded = [...bitArray];
while (padded.length % 8 !== 0) padded.push(0);
// Convert to byte array
const byteArray = new Uint8Array(padded.length / 8);
for (let i = 0; i < byteArray.length; i++) {
let byte = 0;
for (let bit = 0; bit < 8; bit++) {
byte = (byte << 1) | padded[i * 8 + bit];
}
byteArray[i] = byte;
}
return byteArray;
}
function bitArrayToBase64String(bitArray) {
let byteArray = bitArrayToByteArray(bitArray);
return bytesToBase64(byteArray);
}
/**
* Create a bindata vector bit array.
* @param {string} dataTypeByte: hex string representing the dtype, taken from bindata vector
* specification
* @param {array<number>} vector: a vector of int8 or float32s, containing the values of the array
* @param {number} numPaddingBits: the number of padding bits, applicable in the PACKED_BIT case
*/
function createBindataVectorBitArray(dataTypeByte, vector, numPaddingBits, littleEndian = true) {
let dTypeBitArray = hexToBitArray(dataTypeByte);
let paddingBitArray =
dataTypeByte == kPackedBitByte ? int8VectorToBitArray([numPaddingBits]) : int8VectorToBitArray([0]);
let arrayElemsBitArray =
dataTypeByte == kFloat32Byte ? float32VectorToBitArray(vector, littleEndian) : int8VectorToBitArray(vector);
return [...dTypeBitArray, ...paddingBitArray, ...arrayElemsBitArray];
}
testCases.forEach((testCase) => {
let {dtype, array_elems, padding, littleEndian = true} = testCase;
// Determine base64-encoded version of bindata vector, and the bson array of the vector.
let bindataArray = createBindataVectorBitArray(dtype, array_elems, padding, littleEndian);
let base64BindataArray = bitArrayToBase64String(bindataArray);
let bsonArray = array_elems;
if (dtype == kPackedBitByte) {
bsonArray = bitArrayToBoolArray(int8VectorToBitArray(bsonArray));
if (padding > 0) {
bsonArray = bsonArray.slice(0, -1 * padding);
}
} else if (dtype == kInt8Byte) {
// Javascript types default to doubles in BSON, so we must explicitly cast them to ints in
// this case.
bsonArray = bsonArray.map((n) => NumberInt(n));
}
// Insert a doc containing the test case into the collection.
let doc = {
_id: 0,
bson_array: bsonArray,
bindata_array_base64: BinData(kBindataVectorSubtype, base64BindataArray),
approx: dtype == kFloat32Byte, // BSON does not support FLOAT32, so results may not be exact.
};
const coll = db.expression_convert_bindata_vector;
coll.drop();
assert.commandWorked(coll.insertMany([doc]));
// Verify conversion from bindata vector to BSON array.
let bindataToBsonPipeline = [
{
$project: {
_id: 0,
approx: 1,
expected: "$bson_array",
output: {
$convert: {
to: {type: "array"},
input: "$bindata_array_base64",
byteOrder: littleEndian ? "little" : "big",
},
},
},
},
];
let bindataToBsonResult = coll.aggregate(bindataToBsonPipeline).toArray();
bindataToBsonResult.forEach((doc) => {
if (doc.approx) {
// assert.close() does not work on arrays so manually compare each value.
assert.eq(doc.output.length, doc.expected.length);
for (let i = 0; i < doc.output.length; i++) {
if (doc.output[i] == Number.NEGATIVE_INFINITY || doc.output[i] == Number.POSITIVE_INFINITY) {
assert.eq(doc.output[i], doc.expected[i]);
} else {
assert.close(doc.output[i], doc.expected[i]);
}
}
} else {
assert.eq(doc.output, doc.expected);
}
});
// Verify conversion from BSON array to bindata vector.
let expectedBindataVector = doc.bindata_array_base64;
// BSON arrays that only contain integer 0's and 1's will convert to a packed bit array.
let canBeRepresentedAsPackedBit = array_elems.every((n) => n == 1 || n == 0);
let intArrayCanConvertToPackedBit = canBeRepresentedAsPackedBit && dtype == kInt8Byte && array_elems.length > 0;
if (intArrayCanConvertToPackedBit) {
let arrayFilledWithZeros = array_elems;
let numZeros = 0;
while (array_elems.length % 8 != 0) {
arrayFilledWithZeros.push("0");
numZeros++;
}
expectedBindataVector = BinData(
kBindataVectorSubtype,
bitArrayToBase64String(
createBindataVectorBitArray(
kPackedBitByte,
bitArrayToByteArray(arrayFilledWithZeros),
numZeros,
littleEndian,
),
),
);
}
// BSON arrays that only contain integer values from [-128, 127] will convert to INT8
// arrays.
let canBeRepresentedAsIntArray = array_elems.every((n) => Number.isInteger(n) && n <= 127 && n >= -128);
let floatArrayCanConvertToInt8 = canBeRepresentedAsIntArray && dtype == kFloat32Byte && array_elems.length > 0;
if (floatArrayCanConvertToInt8) {
expectedBindataVector = BinData(
kBindataVectorSubtype,
bitArrayToBase64String(createBindataVectorBitArray(kInt8Byte, array_elems, 0, littleEndian)),
);
}
// Empty BSON arrays will always convert to a packed bit array.
let arrayIsEmpty = array_elems.length == 0;
if (arrayIsEmpty) {
expectedBindataVector = BinData(
kBindataVectorSubtype,
bitArrayToBase64String(createBindataVectorBitArray(kPackedBitByte, [], 0, littleEndian)),
);
}
let bsonToBindataPipeline = [
{
$project: {
_id: 0,
expected: expectedBindataVector,
output: {
$convert: {
to: {type: "binData", subtype: 9},
input: "$bson_array",
byteOrder: littleEndian ? "little" : "big",
},
},
},
},
];
let bsonToBindataResult = coll.aggregate(bsonToBindataPipeline).toArray();
bsonToBindataResult.forEach((doc) => {
if (doc.approx) {
// assert.close() does not work on arrays so manually compare each value.
assert.eq(doc.output.length, doc.expected.length);
for (let i = 0; i < doc.output.length; i++) {
if (doc.output[i] == Number.NEGATIVE_INFINITY || doc.output[i] == Number.POSITIVE_INFINITY) {
assert.eq(doc.output[i], doc.expected[i]);
} else {
assert.close(doc.output[i], doc.expected[i]);
}
}
} else {
assert.eq(doc.output, doc.expected);
}
});
});
/**
* Test cases that should error when converting from bindata vector to BSON array.
*/
let binToBsonErrorCases = [
// Invalid dtype
{invalid_bindata_vector: "ea0000", error_code: 10506600},
// Invalid padding - should only exist for PACKED_BIT
{
invalid_bindata_vector: kInt8Byte + "01" + "01",
error_code: 10506606,
},
// Invalid padding - should only exist for PACKED_BIT
{
invalid_bindata_vector: kFloat32Byte + "01" + "11111111",
error_code: 10506606,
},
// Not enough bytes for float
{
invalid_bindata_vector: kFloat32Byte + "00" + "001100",
error_code: 10506602,
},
];
binToBsonErrorCases.forEach((testCase) => {
let bindataVectorAsBitArray = hexToBitArray(testCase.invalid_bindata_vector);
let base64BindataArray = bitArrayToBase64String(bindataVectorAsBitArray);
let doc = {_id: 0, bindata_array_base64: BinData(kBindataVectorSubtype, base64BindataArray)};
const coll = db.expression_convert_bindata_vector;
coll.drop();
assert.commandWorked(coll.insertMany([doc]));
// Verify conversion from bindata vector to BSON array.
let bindataToBsonPipeline = [
{
$project: {_id: 0, output: {$convert: {to: {type: "array"}, input: "$bindata_array_base64"}}},
},
];
assert.throwsWithCode(() => coll.aggregate(bindataToBsonPipeline), testCase.error_code);
});
/**
* Test cases that should error when converting from BSON array to bindata vector.
*/
let bsonToBinErrorCases = [
// Invalid string BSON array
{invalid_bson_array: ["oh", "hi", "mark"], error_code: ErrorCodes.ConversionFailure},
// Must be an array
{invalid_bson_array: "theroom", error_code: ErrorCodes.ConversionFailure},
{invalid_bson_array: {mongodb: "skunkworks"}, error_code: ErrorCodes.ConversionFailure},
// TODO SERVER-106059 Remove this test.
{
invalid_bson_array: [NumberInt(5), NumberInt(6), NumberInt(200)],
error_code: ErrorCodes.ConversionFailure,
},
// TODO SERVER-106059 Remove this test.
{
invalid_bson_array: [NumberInt(5), NumberInt(6), NumberInt(-200)],
error_code: ErrorCodes.ConversionFailure,
},
];
bsonToBinErrorCases.forEach((testCase) => {
let doc = {_id: 0, bson_array: testCase.invalid_bson_array};
const coll = db.expression_convert_bindata_vector;
coll.drop();
assert.commandWorked(coll.insertMany([doc]));
let bsonToBindataPipeline = [
{
$project: {
_id: 0,
output: {
$convert: {to: {type: "binData", subtype: 9}, input: "$bson_array", format: "base64"},
},
},
},
];
assert.throwsWithCode(() => coll.aggregate(bsonToBindataPipeline).toArray(), testCase.error_code);
});
(function bsonArrayWithLargePositiveIntFailsToBeConverted() {
let doc = {_id: 0, bson_array: [NumberInt(5), NumberInt(6), NumberInt(200)]};
const coll = db.expression_convert_bindata_vector;
coll.drop();
assert.commandWorked(coll.insertMany([doc]));
let bsonToBindataPipeline = [
{
$project: {
_id: 0,
output: {$convert: {to: {type: "binData", subtype: 9}, input: "$bson_array"}},
},
},
];
assert.throwsWithCode(() => coll.aggregate(bsonToBindataPipeline).toArray(), ErrorCodes.ConversionFailure);
})();