我想标记两个 html 文本的差异,我找到了一个解决方案,一切都按预期工作(删除的文件接收类名“del”,插入的文本接收类名“ins”以及相关样式)只有一个事情不对 - 由于某种原因,文本被标记在错误的位置。
var Match,
calculate_operations,
consecutive_where,
create_index,
diff,
find_match,
find_matching_blocks,
html_to_tokens,
is_end_of_tag,
is_start_of_tag,
is_tag,
is_whitespace,
isnt_tag,
op_map,
define,
recursively_find_matching_blocks,
render_operations,
wrap;
is_end_of_tag = function (char) {
return char === '>';
};
is_start_of_tag = function (char) {
return char === '<';
};
is_whitespace = function (char) {
return /^\s+$/.test(char);
};
is_tag = function (token) {
return /^\s*<[^>]+>\s*$/.test(token);
};
isnt_tag = function (token) {
return !is_tag(token);
};
Match = class Match {
constructor(start_in_before1, start_in_after1, length1) {
console.log('lalala');
this.start_in_before = start_in_before1;
this.start_in_after = start_in_after1;
this.length = length1;
this.end_in_before = this.start_in_before + this.length - 1;
this.end_in_after = this.start_in_after + this.length - 1;
}
};
html_to_tokens = function (html) {
var char, current_word, i, len, mode, words, letters;
mode = 'char';
current_word = '';
// letters = [];
words = [];
for (i = 0, len = html.length; i < len; i++) {
char = html[i];
// letters.push(char);
switch (mode) {
case 'tag':
if (is_end_of_tag(char)) {
current_word += '>';
words.push(current_word);
current_word = '';
if (is_whitespace(char)) {
mode = 'whitespace';
} else {
mode = 'char';
}
} else {
current_word += char;
}
break;
case 'char':
if (is_start_of_tag(char)) {
if (current_word) {
words.push(current_word);
}
current_word = '<';
mode = 'tag';
} else if (/\s/.test(char)) {
if (current_word) {
words.push(current_word);
}
current_word = char;
mode = 'whitespace';
} else if (/[\w\#@]+/i.test(char)) {
current_word += char;
} else {
if (current_word) {
words.push(current_word);
}
current_word = char;
}
break;
case 'whitespace':
if (is_start_of_tag(char)) {
if (current_word) {
words.push(current_word);
}
current_word = '<';
mode = 'tag';
} else if (is_whitespace(char)) {
current_word += char;
} else {
if (current_word) {
words.push(current_word);
}
current_word = char;
mode = 'char';
}
break;
default:
throw new Error(`Unknown mode ${mode}`);
}
}
if (current_word) {
words.push(current_word);
}
return words;
};
find_match = function (
before_tokens,
after_tokens,
index_of_before_locations_in_after_tokens,
start_in_before,
end_in_before,
start_in_after,
end_in_after
) {
var best_match_in_after,
best_match_in_before,
best_match_length,
i,
index_in_after,
index_in_before,
j,
len,
locations_in_after,
looking_for,
match,
match_length_at,
new_match_length,
new_match_length_at,
ref,
ref1;
best_match_in_before = start_in_before;
best_match_in_after = start_in_after;
best_match_length = 0;
match_length_at = {};
for (
index_in_before = i = ref = start_in_before, ref1 = end_in_before;
ref <= ref1 ? i < ref1 : i > ref1;
index_in_before = ref <= ref1 ? ++i : --i
) {
new_match_length_at = {};
looking_for = before_tokens[index_in_before];
locations_in_after = index_of_before_locations_in_after_tokens[looking_for];
for (j = 0, len = locations_in_after.length; j < len; j++) {
index_in_after = locations_in_after[j];
if (index_in_after < start_in_after) {
continue;
}
if (index_in_after >= end_in_after) {
break;
}
if (match_length_at[index_in_after - 1] == null) {
match_length_at[index_in_after - 1] = 0;
}
new_match_length = match_length_at[index_in_after - 1] + 1;
new_match_length_at[index_in_after] = new_match_length;
if (new_match_length > best_match_length) {
best_match_in_before = index_in_before - new_match_length + 1;
best_match_in_after = index_in_after - new_match_length + 1;
best_match_length = new_match_length;
}
}
match_length_at = new_match_length_at;
}
if (best_match_length !== 0) {
match = new Match(
best_match_in_before,
best_match_in_after,
best_match_length
);
}
return match;
};
recursively_find_matching_blocks = function (
before_tokens,
after_tokens,
index_of_before_locations_in_after_tokens,
start_in_before,
end_in_before,
start_in_after,
end_in_after,
matching_blocks
) {
var match;
match = find_match(
before_tokens,
after_tokens,
index_of_before_locations_in_after_tokens,
start_in_before,
end_in_before,
start_in_after,
end_in_after
);
if (match != null) {
if (
start_in_before < match.start_in_before &&
start_in_after < match.start_in_after
) {
recursively_find_matching_blocks(
before_tokens,
after_tokens,
index_of_before_locations_in_after_tokens,
start_in_before,
match.start_in_before,
start_in_after,
match.start_in_after,
matching_blocks
);
}
matching_blocks.push(match);
if (
match.end_in_before <= end_in_before &&
match.end_in_after <= end_in_after
) {
recursively_find_matching_blocks(
before_tokens,
after_tokens,
index_of_before_locations_in_after_tokens,
match.end_in_before + 1,
end_in_before,
match.end_in_after + 1,
end_in_after,
matching_blocks
);
}
}
return matching_blocks;
};
create_index = function (p) {
var i, idx, index, len, ref, token;
if (p.find_these == null) {
throw new Error('params must have find_these key');
}
if (p.in_these == null) {
throw new Error('params must have in_these key');
}
index = {};
const arr = html_to_tokens(p.find_these);
const arr_number = [];
for (var i = 0; i < arr.length; i++) {
arr_number.push(arr[i].length);
}
ref = p.find_these;
for (i = 0, len = ref.length; i < len; i++) {
token = ref[i];
index[token] = [];
idx = p.in_these.indexOf(token);
while (idx !== -1) {
index[token].push(idx);
idx = p.in_these.indexOf(token, idx + 1);
}
}
console.log(token, 'token');
console.log(arr_number, 'arr_number');
return index;
};
find_matching_blocks = function (before_tokens, after_tokens) {
var index_of_before_locations_in_after_tokens, matching_blocks;
matching_blocks = [];
index_of_before_locations_in_after_tokens = create_index({
find_these: before_tokens,
in_these: after_tokens,
});
console.log(
index_of_before_locations_in_after_tokens,
'index_of_before_locations_in_after_tokens '
);
return recursively_find_matching_blocks(
before_tokens,
after_tokens,
index_of_before_locations_in_after_tokens,
0,
before_tokens.length,
0,
after_tokens.length,
matching_blocks
);
};
calculate_operations = function (before_tokens, after_tokens) {
var action_map,
action_up_to_match_positions,
i,
index,
is_single_whitespace,
j,
last_op,
len,
len1,
match,
match_starts_at_current_position_in_after,
match_starts_at_current_position_in_before,
matches,
op,
operations,
position_in_after,
position_in_before,
post_processed;
if (before_tokens == null) {
throw new Error('before_tokens?');
}
if (after_tokens == null) {
throw new Error('after_tokens?');
}
position_in_before = position_in_after = 0;
operations = [];
action_map = {
'false,false': 'replace',
'true,false': 'insert',
'false,true': 'delete',
'true,true': 'none',
};
matches = find_matching_blocks(before_tokens, after_tokens);
console.log(matches, 'matches');
matches.push(new Match(before_tokens.length, after_tokens.length, 0));
for (index = i = 0, len = matches.length; i < len; index = ++i) {
match = matches[index];
match_starts_at_current_position_in_before =
position_in_before === match.start_in_before;
match_starts_at_current_position_in_after =
position_in_after === match.start_in_after;
action_up_to_match_positions =
action_map[
[
match_starts_at_current_position_in_before,
match_starts_at_current_position_in_after,
].toString()
];
if (action_up_to_match_positions !== 'none') {
operations.push({
action: action_up_to_match_positions,
start_in_before: position_in_before,
end_in_before:
action_up_to_match_positions !== 'insert'
? match.start_in_before - 1
: void 0,
start_in_after: position_in_after,
end_in_after:
action_up_to_match_positions !== 'delete'
? match.start_in_after - 1
: void 0,
});
}
if (match.length !== 0) {
operations.push({
action: 'equal',
start_in_before: match.start_in_before,
end_in_before: match.end_in_before,
start_in_after: match.start_in_after,
end_in_after: match.end_in_after,
});
}
position_in_before = match.end_in_before + 1;
position_in_after = match.end_in_after + 1;
}
post_processed = [];
last_op = {
action: 'none',
};
is_single_whitespace = function (op) {
if (op.action !== 'equal') {
return false;
}
if (op.end_in_before - op.start_in_before !== 0) {
return false;
}
return /^\s$/.test(
before_tokens.slice(op.start_in_before, +op.end_in_before + 1 || 9e9)
);
};
for (j = 0, len1 = operations.length; j < len1; j++) {
op = operations[j];
if (
(is_single_whitespace(op) && last_op.action === 'replace') ||
(op.action === 'replace' && last_op.action === 'replace')
) {
last_op.end_in_before = op.end_in_before;
last_op.end_in_after = op.end_in_after;
} else {
post_processed.push(op);
last_op = op;
}
}
return post_processed;
};
consecutive_where = function (start, content, predicate) {
var answer, i, index, last_matching_index, len, token;
content = content.slice(start, +content.length + 1 || 9e9);
// console.log(content, 'content');
// console.log(predicate, 'predicate');
// console.log(start, 'start');
last_matching_index = void 0;
for (index = i = 0, len = content.length; i < len; index = ++i) {
token = content[index];
answer = predicate(token);
if (answer === true) {
last_matching_index = index;
}
if (answer === false) {
break;
}
}
if (last_matching_index != null) {
return content.slice(0, +last_matching_index + 1 || 9e9);
}
return [];
};
wrap = function (tag, content) {
var length, non_tags, position, rendering, tags;
rendering = '';
position = 0;
length = content.length;
while (true) {
if (position >= length) {
break;
}
non_tags = consecutive_where(position, content, isnt_tag);
position += non_tags.length;
if (non_tags.length !== 0) {
rendering += `<${tag}>${non_tags.join('')}</${tag}>`;
}
if (position >= length) {
break;
}
tags = consecutive_where(position, content, is_tag);
position += tags.length;
rendering += tags.join('');
}
return rendering;
};
op_map = {
equal: function (op, before_tokens, after_tokens) {
return before_tokens
.slice(op.start_in_before, +op.end_in_before + 1 || 9e9)
.join('');
},
insert: function (op, before_tokens, after_tokens) {
var val;
const token_length = after_tokens.map(element => element.length);
val = after_tokens.slice(op.start_in_after, op.end_in_after + 1 || 9e9);
return wrap('ins', val);
},
delete: function (op, before_tokens, after_tokens) {
var val;
val = before_tokens.slice(op.start_in_before, +op.end_in_before + 1 || 9e9);
return wrap('del', val);
},
};
op_map.replace = function (op, before_tokens, after_tokens) {
return (
op_map.delete(op, before_tokens, after_tokens) +
op_map.insert(op, before_tokens, after_tokens)
);
};
render_operations = function (before_tokens, after_tokens, operations) {
var i, len, op, rendering;
rendering = '';
console.log(operations.length, 'operations.length');
console.log(operations, 'operations');
for (i = 0, len = operations.length; i < len; i++) {
op = operations[i];
rendering += op_map[op.action](op, before_tokens, after_tokens);
}
return rendering;
};
diff = function (before, after) {
var ops;
if (before === after) {
return before;
}
before = html_to_tokens(before);
after = html_to_tokens(after);
ops = calculate_operations(before, after);
return render_operations(before, after, ops);
};
diff.html_to_tokens = html_to_tokens;
diff.find_matching_blocks = find_matching_blocks;
find_matching_blocks.find_match = find_match;
find_matching_blocks.create_index = create_index;
diff.calculate_operations = calculate_operations;
diff.render_operations = render_operations;
if (typeof define === 'function') {
define([], function () {
return diff;
});
} else if (typeof module !== 'undefined' && module !== null) {
module.exports = diff;
} else {
this.htmldiff = diff;
}
调用我编写的代码:
let oldTxt = Match.html_to_tokens(a);
let newTxt = Match.html_to_tokens(b);
let ops = Match.calculate_operations(a, b);
const render = Match.render_operations(newTxt, oldTxt, ops);
document.getElementById('output').innerHTML = render;
a 是第一个 html,b 是更改后的 html(不是字符串化)
关于如何继续我有什么想法吗?非常感谢您的帮助!
我知道错误出在“create_index”函数内部的某个地方。因为我正在计算文本的长度,然后将其与文本的长度进行比较,每个标签/单词都算作一个。
代码沙盒
Copyright 2014-2025 https://www.php.cn/ All Rights Reserved | php.cn | 湘ICP备2023035733号
我解决了!对于任何想要使用此代码的人,只需切换这部分即可:
let oldTxt = Match.html_to_tokens(a); let newTxt = Match.html_to_tokens(b); let ops = Match.calculate_operations(a, b); const render = Match.render_operations(newTxt, oldTxt, ops); document.getElementById('output').innerHTML = render;对此:
a = Match.html_to_tokens(a); b = Match.html_to_tokens(b); let ops = Match.calculate_operations(a, b); console.log(ops, 'ops'); const render = Match.render_operations(a, b, ops); document.getElementById('output').innerHTML = render;其中a和b是两个html文本:a(第一个),b(更改)
您可以在此处尝试新代码: CodeSandbox 已解决!