diff --git a/jstests/noPassthrough/shell/js/stringdiff.js b/jstests/noPassthrough/shell/js/stringdiff.js new file mode 100644 index 00000000000..f1a577544ce --- /dev/null +++ b/jstests/noPassthrough/shell/js/stringdiff.js @@ -0,0 +1,203 @@ +import {describe, it} from "jstests/libs/mochalite.js"; +import {stringdiff} from "src/mongo/shell/stringdiff.js"; + +describe("diff strings", () => { + function difftest(oldStr, newStr, expectedDiff) { + let diff = stringdiff(oldStr, newStr); + assert.eq(diff, expectedDiff); + } + + it("no diff", () => { + const oldStr = "aaa\nbbb\nccc"; + const newStr = "aaa\nbbb\nccc"; + const expectedDiff = ""; + difftest(oldStr, newStr, expectedDiff); + }); + + it("middle diff", () => { + const oldStr = "aaa\nbbb\nccc"; + const newStr = "aaa\nxxx\nccc"; + const expectedDiff = `\ + aaa +-bbb ++xxx + ccc`; + difftest(oldStr, newStr, expectedDiff); + }); + + it("prefix diff", () => { + const oldStr = "aaa\nbbb\nccc"; + const newStr = "xxx\nbbb\nccc"; + const expectedDiff = `\ +-aaa ++xxx + bbb + ccc`; + difftest(oldStr, newStr, expectedDiff); + }); + + it("suffix diff", () => { + const oldStr = "aaa\nbbb\nccc"; + const newStr = "aaa\nbbb\nxxx"; + const expectedDiff = `\ + aaa + bbb +-ccc ++xxx`; + difftest(oldStr, newStr, expectedDiff); + }); + + it("oneliner diff", () => { + const oldStr = "aaa"; + const newStr = "axa"; + // don't do character by character diffing, just line by line + const expectedDiff = `\ +-aaa ++axa`; + difftest(oldStr, newStr, expectedDiff); + }); + + it("completely different", () => { + const oldStr = "aaa\nbbb\nccc"; + const newStr = "xxx\nyyy\nzzz"; + const expectedDiff = `\ +-aaa +-bbb +-ccc ++xxx ++yyy ++zzz`; + difftest(oldStr, newStr, expectedDiff); + }); + + it("context window", () => { + const oldStr = "a\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\nz"; + const newStr = "a\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nX\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\nz"; + const expectedDiff = `\ + i + j + k + l +-m ++X + n + o + p + q`; + difftest(oldStr, newStr, expectedDiff); + }); + + it("overlapping context window", () => { + const oldStr = "a\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\nz"; + const newStr = "a\nb\nc\nd\ne\nf\ng\nh\ni\nj\nX\nl\nm\nn\nY\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\nz"; + const expectedDiff = `\ + g + h + i + j +-k ++X + l + m + n +-o ++Y + p + q + r + s`; + difftest(oldStr, newStr, expectedDiff); + }); + + it("separate chunks", () => { + const oldStr = "a\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\nz"; + const newStr = "a\nb\nX\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nY\ny\nz"; + const expectedDiff = `\ + a + b +-c ++X + d + e + f + g +--- + t + u + v + w +-x ++Y + y + z`; + difftest(oldStr, newStr, expectedDiff); + }); + + it("compound diff", () => { + // https://www.nathaniel.ai/myers-diff/ + const oldStr = `\ +Empty Bottles - Colin Morton (1981) +--- +line up all the empty bottles +the long-necked beer bottles from the antique stores +the wine bottles and pop bottles left on beaches +steam off the labels and line the bottles up the green ones with +the brown black yellow and clear ones +line up +the beer bottles whose labels have been torn off by +neurotic fingers +and the bottles sent back by the breweries because they have +cockroaches or dead mice at the bottom +line up +the bottles afloat on all the seas those with messages in +them and those without any +and the bottles with methyl hydrate-soaked cotton in them +used by schoolkids for killing insects +line up the bottle that killed Malcolm Lowry with the bottle that...`; + + const newStr = `\ +Monkey Stops Whistling - David Morgan (2011) +--- +Stand to attention all the empty bottles +the long-necked beer bottles from the antique stores +the wine bottles and pop bottles left on beaches +steam off the labels and line the bottles up the green ones with +the brown black yellow and clear ones +Stand to attention all the empty bottles +the beer bottles whose labels have been torn off by +neurotic fingers +and the bottles sent back by the breweries because they have +cockroaches or dead bluebottles at the bottom +Stand to attention all the empty bottles +the bottles afloat on all the seas those with messages in +them and those without any +line up the bottle that killed Malcolm Lowry with the bottle that...`; + + const expectedDiff = `\ +-Empty Bottles - Colin Morton (1981) ++Monkey Stops Whistling - David Morgan (2011) + --- +-line up all the empty bottles ++Stand to attention all the empty bottles + the long-necked beer bottles from the antique stores + the wine bottles and pop bottles left on beaches + steam off the labels and line the bottles up the green ones with + the brown black yellow and clear ones +-line up ++Stand to attention all the empty bottles + the beer bottles whose labels have been torn off by + neurotic fingers + and the bottles sent back by the breweries because they have +-cockroaches or dead mice at the bottom +-line up ++cockroaches or dead bluebottles at the bottom ++Stand to attention all the empty bottles + the bottles afloat on all the seas those with messages in + them and those without any +-and the bottles with methyl hydrate-soaked cotton in them +-used by schoolkids for killing insects + line up the bottle that killed Malcolm Lowry with the bottle that...`; + + difftest(oldStr, newStr, expectedDiff); + }); +}); diff --git a/src/mongo/shell/stringdiff.js b/src/mongo/shell/stringdiff.js new file mode 100644 index 00000000000..65d6430a5a2 --- /dev/null +++ b/src/mongo/shell/stringdiff.js @@ -0,0 +1,187 @@ +/** + * Compares two strings and returns their differences in patch format. + * + * This function uses Myers diff algorithm to compute the differences between two strings, + * then converts the result into a patch format that can be applied to transform the old + * string into the new string. + * + * @param {string} oldStr - The original string to compare from + * @param {string} newStr - The new string to compare to + * @returns {*} A patch representation of the differences between oldStr and newStr + * @throws {AssertionError} If oldStr is not a string + * @throws {AssertionError} If newStr is not a string + * + * @example + * const diff = stringdiff("hello world", "hello javascript"); + * // Returns a patch showing the transformation from "hello world" to "hello javascript" + */ +export function stringdiff(oldStr, newStr) { + assert(typeof oldStr === "string"); + assert(typeof newStr === "string"); + + return patchdiff(myersdiff(oldStr, newStr)); +} + +const INS = "+"; +const DEL = "-"; +const PAD = " "; // matching lines + +/** + * Converts a full diff output into a patch format with context windows. + * + * This function processes a diff string (with lines prefixed by '+', '-', or ' ') + * and returns a condensed version showing only the changed lines plus a configurable + * number of surrounding context lines. Separate chunks of changes are delimited with '---'. + * + * @param {string} fulldiff - The complete diff string with each line prefixed by '+' (insertion), + * '-' (deletion), or ' ' (unchanged) + * @returns {string} A condensed patch showing only changed lines with 4 lines of context + * before and after each change. Separate change chunks are separated by '---'. + * + * @example + * const fulldiff = " line1\n line2\n-line3\n+line3a\n line4\n line5"; + * const patch = patchdiff(fulldiff); + * // Returns: " line1\n line2\n-line3\n+line3a\n line4\n line5" + * + * @example + * // With large gaps between changes, chunks are separated + * const fulldiff = "-a\n+b\n c\n d\n e\n f\n g\n h\n i\n j\n-k\n+l"; + * const patch = patchdiff(fulldiff); + * // Returns: "-a\n+b\n c\n d\n e\n f\n---\n g\n h\n i\n j\n-k\n+l" + */ +function patchdiff(fulldiff) { + let lines = fulldiff.split("\n"); + + const context = 4; // surround with 4 lines for context before/after diff + + let keep = []; + for (let i = 0; i < lines.length; i++) { + if (lines[i].startsWith(DEL) || lines[i].startsWith(INS)) { + let start = Math.max(0, i - context); + let end = Math.min(lines.length, i + context + 1); + for (let j = start; j < end; j++) { + keep[j] = true; + } + } + } + + let result = []; + for (let i = 0; i < lines.length; i++) { + if (keep[i]) { + if (i > 0 && !keep[i - 1] && result.length > 0) { + result.push("---"); + } + result.push(lines[i]); + } + } + result = result.join("\n"); + return result; +} +/** + * Implements Myers diff algorithm to compute the difference between two strings. + * + * This function uses Myers' O(ND) difference algorithm to find the shortest edit script + * that transforms string `a` into string `b`. The algorithm splits both strings into lines + * and computes insertions, deletions, and unchanged sections. + * + * The result is a multi-line string where each line is prefixed with: + * - ' ' (space) for unchanged lines + * - '-' for lines deleted from the original string + * - '+' for lines added in the new string + * + * @param {string} a - The original string to compare from + * @param {string} b - The new string to compare to + * @returns {string} A diff string with each line prefixed by ' ', '-', or '+' indicating + * unchanged, deleted, or inserted lines respectively. Lines are separated + * by newline characters. + * + * @example + * const diff = myersdiff("hello\nworld", "hello\njavascript"); + * // Returns: " hello\n-world\n+javascript" + * + * @see {@link https://blog.jcoglan.com/2017/02/12/the-myers-diff-algorithm-part-1/|Myers Diff Algorithm} + */ +function myersdiff(a, b) { + const aLines = a.split("\n"); + const bLines = b.split("\n"); + + const N = aLines.length; + const M = bLines.length; + const MAX = N + M; + + const v = []; + const trace = []; + v[1] = 0; + + for (let d = 0; d <= MAX; d++) { + trace.push({...v}); + + for (let k = -d; k <= d; k += 2) { + let x; + if (k === -d || (k !== d && v[k - 1] < v[k + 1])) { + x = v[k + 1]; + } else { + x = v[k - 1] + 1; + } + + let y = x - k; + + // Follow diagonal + while (x < N && y < M && aLines[x] === bLines[y]) { + x++; + y++; + } + + v[k] = x; + if (x >= N && y >= M) { + // Found the solution, now backtrack to build the diff + return backtrack(aLines, bLines, trace, d); + } + } + } + + return ""; +} + +function backtrack(aLines, bLines, trace, d) { + let x = aLines.length; + let y = bLines.length; + const diff = []; + + for (let depth = d; depth >= 0; depth--) { + const v = trace[depth]; + const k = x - y; + + let prevK = k; + if (k === -depth || (k !== depth && v[k - 1] < v[k + 1])) { + prevK++; + } else { + prevK--; + } + + const prevX = v[prevK]; + const prevY = prevX - prevK; + + // Add diagonal (unchanged) lines + while (x > prevX && y > prevY) { + x--; + y--; + diff.unshift(PAD + aLines[x]); + } + + // Add deletion or insertion + if (depth > 0) { + if (x === prevX) { + // Insertion + y--; + diff.unshift(INS + bLines[y]); + } else { + // Deletion + x--; + diff.unshift(DEL + aLines[x]); + } + } + } + + return diff.join("\n"); +}