代码编辑器Diff算法与版本比较完全指南

一、Diff 概述

Diff(差异比较)是代码编辑器的核心功能之一,广泛应用于版本对比、合并冲突解决、历史回滚等场景。本文详细介绍 Diff 算法的核心原理与实现方法。

二、最长公共子序列(LCS)

Diff 算法的核心是找到两个序列的最长公共子序列(Longest Common Subsequence):

// 示例:Old vs New
old: ["A", "B", "C", "D", "E"]
new: ["A", "B", "X", "C", "D", "F"]

// LCS = ["A", "B", "C", "D"]
// 差异 = 添加 X、替换 E 为 F

2.1 LCS 动态规划实现

// LCS 动态规划
function lcs(oldArr, newArr) {
    const m = oldArr.length;
    const n = newArr.length;

    // dp[i][j] = LCS 长度 of old[0..i-1] and new[0..j-1]
    const dp = Array(m + 1).fill(0)
        .map(() => Array(n + 1).fill(0));

    // 填表
    for (let i = 1; i <= m; i++) {
        for (let j = 1; j <= n; j++) {
            if (oldArr[i - 1] === newArr[j - 1]) {
                dp[i][j] = dp[i - 1][j - 1] + 1;
            } else {
                dp[i][j] = Math.max(dp[i - 1][j], dp[i][j - 1]);
            }
        }
    }

    // 回溯找到 LCS
    const lcs = [];
    let i = m, j = n;
    while (i > 0 && j > 0) {
        if (oldArr[i - 1] === newArr[j - 1]) {
            lcs.unshift(oldArr[i - 1]);
            i--; j--;
        } else if (dp[i - 1][j] > dp[i][j - 1]) {
            i--;
        } else {
            j--;
        }
    }

    return lcs;
}

三、Myers 算法

Myes 算法是实际应用中更高效的 Diff 算法,时间复杂度为 O(ND),D 为编辑距离。

// Myers 算法核心实现
class MyersDiff {
    diff(oldStr, newStr) {
        const oldLines = oldStr.split('\n');
        const newLines = newStr.split('\n');

        const N = oldLines.length;
        const M = newLines.length;
        const MAX = N + M;

        // V 数组存储各对角线的最大编辑距离
        let v = { 1: 0 };
        const trace = [];

        for (let D = 0; D <= MAX; D++) {
            trace.push({...v});

            for (let k = -D; k <= D; k += 2) {
                let x;
                if (k === -D || (k !== D && v[k - 1] < v[k + 1])) {
                    x = v[k + 1];  // 向下
                } else {
                    x = v[k - 1] + 1;  // 向右
                }

                let y = x - k;
                while (x < N && y < M && oldLines[x] === newLines[y]) {
                    x++;
                    y++;
                }

                v[k] = x;

                if (x >= N && y >= M) {
                    return this.backtrack(trace, oldLines, newLines);
                }
            }
        }
    }

    backtrack(trace, oldLines, newLines) {
        const result = [];
        let x = oldLines.length;
        let y = newLines.length;

        for (let D = trace.length - 1; D >= 0; D--) {
            const v = trace[D];
            const k = x - y;

            let prevK;
            if (k === -D || (k !== D && v[k - 1] < v[k + 1])) {
                prevK = k + 1;
            } else {
                prevK = k - 1;
            }

            const prevX = v[prevK];
            const prevY = prevX - prevK;

            while (x > prevX && y > prevY) {
                result.unshift({ type: 'equal', old: x - 1, new: y - 1,
                    oldLine: oldLines[x - 1], newLine: newLines[y - 1] });
                x--;
                y--;
            }

            if (D > 0) {
                if (x === prevX) {
                    result.unshift({ type: 'insert', new: y - 1, newLine: newLines[y - 1] });
                    y--;
                } else {
                    result.unshift({ type: 'delete', old: x - 1, oldLine: oldLines[x - 1] });
                    x--;
                }
            }
        }

        return result;
    }
}

四、差异可视化展示

常见的 Diff 展示方式包括行内 Diff、side-by-side 和 inline 两种视图:

// Diff 可视化数据结构
interface DiffHunk {
    oldStart: number;
    oldLines: number;
    newStart: number;
    newLines: number;
    lines: DiffLine[];
}

interface DiffLine {
    type: 'context' | 'add' | 'delete';
    content: string;
    oldLineNum?: number;
    newLineNum?: number;
}

4.1 行内高亮展示

// 渲染为行内高亮 HTML
function renderInlineDiff(diff) {
    let html = '';

    diff.forEach(line => {
        switch(line.type) {
            case 'add':
                html += `+ ${line.newLine}\n`;
                break;
            case 'delete':
                html += `- ${line.oldLine}\n`;
                break;
            case 'equal':
                html += `  ${line.oldLine}\n`;
                break;
        }
    });

    return html;
}

4.2 Side-by-Side 双栏展示

function renderSideBySide(oldLines, newLines, diff) {
    let oldHtml = '
'; let newHtml = '
'; let oldLine = 1, newLine = 1; diff.forEach(line => { switch(line.type) { case 'equal': oldHtml += `${oldLine++}${escapeHtml(line.oldLine)}`; newHtml += `${newLine++}${escapeHtml(line.newLine)}`; break; case 'delete': oldHtml += `${oldLine++}${escapeHtml(line.oldLine)}`; newHtml += ``; break; case 'insert': oldHtml += ``; newHtml += `${newLine++}${escapeHtml(line.newLine)}`; break; } }); return oldHtml + '
'
+ newHtml + '
'
; }

五、词级 Diff

在单行内进行更细粒度的词级别差异比较:

// 词级 Diff
function wordDiff(oldText, newText) {
    const oldWords = oldText.split(/(\s+)/);
    const newWords = newText.split(/(\s+)/);

    // 使用字符级 LCS 求解
    return myersDiff(oldWords, newWords).map(item => ({
        ...item,
        oldWord: item.oldLine,
        newWord: item.newLine
    }));
}

// 结果示例
// old: "function foo() { return 1; }"
// new: "function foo() { return 42; }"
// diff: equal "function ", equal "foo()", equal " { ",
//       equal "return ", delete "1", insert "42",
//       equal ";"

六、三路合并(Three-way Merge)

用于解决版本合并冲突:

// 三路合并算法
function threeWayMerge(base, ours, theirs) {
    const baseDiff = diff(base, ours);
    const theirDiff = diff(base, theirs);

    const result = [];
    let i = 0, j = 0;

    while (i < baseDiff.length || j < theirDiff.length) {
        const ourChange = baseDiff[i];
        const theirChange = theirDiff[j];

        // 相同变更
        if (ourChange && theirChange &&
            ourChange.type === theirChange.type &&
            ourChange.line === theirChange.line) {
            result.push(ourChange);
            i++; j++;
        }
        // 冲突:双方都修改了相同行
        else if (ourChange && theirChange &&
            ourChange.line === theirChange.line) {
            result.push({
                type: 'conflict',
                ours: ours.lines[ourChange.newLine],
                theirs: theirs.lines[theirChange.newLine],
                base: base.lines[ourChange.line]
            });
            i++; j++;
        }
        // 只在我们的修改中
        else if (ourChange) {
            result.push(ourChange);
            i++;
        }
        // 只在对方的修改中
        else if (theirChange) {
            result.push(theirChange);
            j++;
        }
    }

    return result;
}

七、性能优化

  • 行级拆分:先将文本按行拆分,只对差异行进行词级比较
  • 增量 Diff:编辑后只比较修改区域,避免全量计算
  • Web Worker:Diff 计算在后台线程执行,不阻塞 UI
  • 分块渲染:大文件分多次渲染,避免长列表卡顿

八、总结

  • 核心算法:LCS 是基础,Myers 算法更高效
  • 可视化:支持 inline 和 side-by-side 两种模式
  • 词级 Diff:在单行内进行细粒度比较
  • 三路合并:解决代码合并冲突的核心算法
  • 性能:增量计算、Web Worker、虚拟滚动