一、Diff 概述
Diff(差异比较)是代码编辑器的核心功能之一,广泛应用于版本对比、合并冲突解决、历史回滚等场景。本文详细介绍 Diff 算法的核心原理与实现方法。
二、最长公共子序列(LCS)
Diff 算法的核心是找到两个序列的最长公共子序列(Longest Common Subsequence):
// 示例:Old vs New
old: ["A", "B", "C", "D", "E"]
new: ["A", "B", "X", "C", "D", "F"]
// LCS = ["A", "B", "C", "D"]
// 差异 = 添加 X、替换 E 为 F
2.1 LCS 动态规划实现
// LCS 动态规划
function lcs(oldArr, newArr) {
const m = oldArr.length;
const n = newArr.length;
// dp[i][j] = LCS 长度 of old[0..i-1] and new[0..j-1]
const dp = Array(m + 1).fill(0)
.map(() => Array(n + 1).fill(0));
// 填表
for (let i = 1; i <= m; i++) {
for (let j = 1; j <= n; j++) {
if (oldArr[i - 1] === newArr[j - 1]) {
dp[i][j] = dp[i - 1][j - 1] + 1;
} else {
dp[i][j] = Math.max(dp[i - 1][j], dp[i][j - 1]);
}
}
}
// 回溯找到 LCS
const lcs = [];
let i = m, j = n;
while (i > 0 && j > 0) {
if (oldArr[i - 1] === newArr[j - 1]) {
lcs.unshift(oldArr[i - 1]);
i--; j--;
} else if (dp[i - 1][j] > dp[i][j - 1]) {
i--;
} else {
j--;
}
}
return lcs;
}
三、Myers 算法
Myes 算法是实际应用中更高效的 Diff 算法,时间复杂度为 O(ND),D 为编辑距离。
// Myers 算法核心实现
class MyersDiff {
diff(oldStr, newStr) {
const oldLines = oldStr.split('\n');
const newLines = newStr.split('\n');
const N = oldLines.length;
const M = newLines.length;
const MAX = N + M;
// V 数组存储各对角线的最大编辑距离
let v = { 1: 0 };
const trace = [];
for (let D = 0; D <= MAX; D++) {
trace.push({...v});
for (let k = -D; k <= D; k += 2) {
let x;
if (k === -D || (k !== D && v[k - 1] < v[k + 1])) {
x = v[k + 1]; // 向下
} else {
x = v[k - 1] + 1; // 向右
}
let y = x - k;
while (x < N && y < M && oldLines[x] === newLines[y]) {
x++;
y++;
}
v[k] = x;
if (x >= N && y >= M) {
return this.backtrack(trace, oldLines, newLines);
}
}
}
}
backtrack(trace, oldLines, newLines) {
const result = [];
let x = oldLines.length;
let y = newLines.length;
for (let D = trace.length - 1; D >= 0; D--) {
const v = trace[D];
const k = x - y;
let prevK;
if (k === -D || (k !== D && v[k - 1] < v[k + 1])) {
prevK = k + 1;
} else {
prevK = k - 1;
}
const prevX = v[prevK];
const prevY = prevX - prevK;
while (x > prevX && y > prevY) {
result.unshift({ type: 'equal', old: x - 1, new: y - 1,
oldLine: oldLines[x - 1], newLine: newLines[y - 1] });
x--;
y--;
}
if (D > 0) {
if (x === prevX) {
result.unshift({ type: 'insert', new: y - 1, newLine: newLines[y - 1] });
y--;
} else {
result.unshift({ type: 'delete', old: x - 1, oldLine: oldLines[x - 1] });
x--;
}
}
}
return result;
}
}
四、差异可视化展示
常见的 Diff 展示方式包括行内 Diff、side-by-side 和 inline 两种视图:
// Diff 可视化数据结构
interface DiffHunk {
oldStart: number;
oldLines: number;
newStart: number;
newLines: number;
lines: DiffLine[];
}
interface DiffLine {
type: 'context' | 'add' | 'delete';
content: string;
oldLineNum?: number;
newLineNum?: number;
}
4.1 行内高亮展示
// 渲染为行内高亮 HTML
function renderInlineDiff(diff) {
let html = '';
diff.forEach(line => {
switch(line.type) {
case 'add':
html += `+ ${line.newLine}\n`;
break;
case 'delete':
html += `- ${line.oldLine}\n`;
break;
case 'equal':
html += ` ${line.oldLine}\n`;
break;
}
});
return html;
}
4.2 Side-by-Side 双栏展示
function renderSideBySide(oldLines, newLines, diff) {
let oldHtml = '';
let newHtml = '';
let oldLine = 1, newLine = 1;
diff.forEach(line => {
switch(line.type) {
case 'equal':
oldHtml += `${oldLine++}${escapeHtml(line.oldLine)}`;
newHtml += `${newLine++}${escapeHtml(line.newLine)}`;
break;
case 'delete':
oldHtml += `${oldLine++}${escapeHtml(line.oldLine)}`;
newHtml += ``;
break;
case 'insert':
oldHtml += ``;
newHtml += `${newLine++}${escapeHtml(line.newLine)}`;
break;
}
});
return oldHtml + '' + newHtml + '';
}
五、词级 Diff
在单行内进行更细粒度的词级别差异比较:
// 词级 Diff
function wordDiff(oldText, newText) {
const oldWords = oldText.split(/(\s+)/);
const newWords = newText.split(/(\s+)/);
// 使用字符级 LCS 求解
return myersDiff(oldWords, newWords).map(item => ({
...item,
oldWord: item.oldLine,
newWord: item.newLine
}));
}
// 结果示例
// old: "function foo() { return 1; }"
// new: "function foo() { return 42; }"
// diff: equal "function ", equal "foo()", equal " { ",
// equal "return ", delete "1", insert "42",
// equal ";"
六、三路合并(Three-way Merge)
用于解决版本合并冲突:
// 三路合并算法
function threeWayMerge(base, ours, theirs) {
const baseDiff = diff(base, ours);
const theirDiff = diff(base, theirs);
const result = [];
let i = 0, j = 0;
while (i < baseDiff.length || j < theirDiff.length) {
const ourChange = baseDiff[i];
const theirChange = theirDiff[j];
// 相同变更
if (ourChange && theirChange &&
ourChange.type === theirChange.type &&
ourChange.line === theirChange.line) {
result.push(ourChange);
i++; j++;
}
// 冲突:双方都修改了相同行
else if (ourChange && theirChange &&
ourChange.line === theirChange.line) {
result.push({
type: 'conflict',
ours: ours.lines[ourChange.newLine],
theirs: theirs.lines[theirChange.newLine],
base: base.lines[ourChange.line]
});
i++; j++;
}
// 只在我们的修改中
else if (ourChange) {
result.push(ourChange);
i++;
}
// 只在对方的修改中
else if (theirChange) {
result.push(theirChange);
j++;
}
}
return result;
}
七、性能优化
- 行级拆分:先将文本按行拆分,只对差异行进行词级比较
- 增量 Diff:编辑后只比较修改区域,避免全量计算
- Web Worker:Diff 计算在后台线程执行,不阻塞 UI
- 分块渲染:大文件分多次渲染,避免长列表卡顿
八、总结
- 核心算法:LCS 是基础,Myers 算法更高效
- 可视化:支持 inline 和 side-by-side 两种模式
- 词级 Diff:在单行内进行细粒度比较
- 三路合并:解决代码合并冲突的核心算法
- 性能:增量计算、Web Worker、虚拟滚动