Я пробую Myers Diff, чтобы Diff 2 HTML Strings. Итак, я делаю: < /p>
Прочитайте строку HTML в структуру дерева < /li>
Создайте плоский список дерева с определенными границами. Например, < /li>
< /ol>
1
2
< /code>
< /div>
< /div>
< /p>
преобразуется как [таблица, [tr, [td, [1]]]], [tr, [td, [2]]]] < /code> < /p>
start = "3" разница. /> import htmlparser from 'htmlparser2';
import {diffHierarchical} from './beta-myers-diff.js';;
/**
* Converts HTML string to a hierarchical array structure
* @param {string} html - The HTML string to parse
* @returns {Array} - A hierarchical array representation of the HTML
*/
export function htmlToHierarchicalArray(html) {
// Parse HTML to DOM using htmlparser2
let dom = null;
const handler = new htmlparser.DomHandler((error, nodes) => {
if(error) {
console.error('Error parsing HTML:', error);
return;
}
// Find root element(s)
const tags = nodes.filter(node => node.type === 'tag');
if(tags.length > 0) {
dom = tags[0]; // Take the first tag as the root (usually there's only one root)
} else if(nodes.length > 0) {
dom = nodes[0]; // Take the first node if no tags
}
});
const parser = new htmlparser.Parser(handler, { decodeEntities: false });
parser.write(html);
parser.end();
// Convert DOM to hierarchical array structure
if (!dom) {
return []; // Return empty array if parsing failed
}
// Convert the single root element to our hierarchical structure
return nodeToHierarchicalArray(dom);
}
/**
* Recursive function to convert DOM nodes to hierarchical array
* @param {Array|Object} nodes - DOM node or array of nodes
* @returns {Array} - Hierarchical array representation
*/
function domToHierarchicalArray(nodes) {
// Handle single node
if(!Array.isArray(nodes)) {
return nodeToHierarchicalArray(nodes);
}
// Handle array of nodes
return nodes.map(node => nodeToHierarchicalArray(node));
}
/**
* Convert a single DOM node to hierarchical array
* @param {Object} node - DOM node
* @returns {Array} - Hierarchical array representation
*/
function nodeToHierarchicalArray(node) {
// Handle text node
if(node.type === 'text') {
const text = node.data.trim();
if(text === '') {
return null; // Skip empty text nodes
}
// Return text node as a single-element array to maintain consistency
return [node];
}
// Handle tag node
if(node.type === 'tag') {
// Create array with the complete node object as first element
const result = [node];
// Process children and add them to the array
if(node.children && node.children.length > 0) {
const childArrays = node.children
.map(child => nodeToHierarchicalArray(child))
.filter(item => item !== null); // Filter out null items (empty text)
if(childArrays.length > 0) {
result.push(...childArrays);
}
}
return result;
}
// Other node types (comment, directive, etc.) - skip
return null;
}
/**
* Test function for the table row swapping example
*/
function testTableRowSwapping() {
const oldHtml = `
1
2
`;
const newHtml = `
2
1
`;
// Parse HTML to hierarchical arrays
const oldArray = htmlToHierarchicalArray(oldHtml);
const newArray = htmlToHierarchicalArray(newHtml);
const editScript = diffHierarchical(oldArray, newArray);
}
testTableRowSwapping();< /code>
< /div>
< /div>
< /p>
myers diff код, который различает созданный список: < /p>
new Insert(item, true));
}
if (current.length === 0) {
return old.map(item => new Remove(item, true));
}
// Compare root nodes of the arrays
const oldRoot = old[0];
const currentRoot = current[0];
// If the root nodes are equal, we have a potential match
if (areNodesEqual(oldRoot, currentRoot)) {
// First, check if these are leaf nodes or if their subtrees are equal
if (old.length === 1 && current.length === 1) {
// Leaf nodes - just keep
return [new Keep(currentRoot)];
}
// If both have exactly the same structure, keep the entire subtree
if (areSubtreesEqual(old, current)) {
return [new Keep(current, true)];
}
// The roots match but the subtrees differ - recursively diff the children
const result = [new Keep(currentRoot)];
// Extract children (everything except the root node)
const oldChildren = old.slice(1);
const currentChildren = current.slice(1);
// Apply standard Myers diff to the children arrays
const childDiff = myersDiff(oldChildren, currentChildren);
result.push(...childDiff);
return result;
}
// Root nodes are different - try standard Myers diff on the top level
return myersDiff(old, current);
}
/**
* Standard Myers diff algorithm for arrays
* This is similar to the original myersDiff but adapted for our hierarchical arrays
*/
function myersDiff(old, current) {
// Initialize the frontier with starting point
const frontier = { 1: new Frontier(0, []) };
// Convert from 1-based to 0-based indexing
function one(idx) {
return idx - 1;
}
const aMax = old.length; // horizontal size of edit graph
const bMax = current.length; // vertical size of edit graph
// Main loop: try increasing numbers of edits
for (let d = 0; d = 1 && x 0;
if (isSubtree) {
// Add the entire subtree as a single remove operation
history.push(new Remove(oldItem, true));
} else {
history.push(new Remove(oldItem));
}
}
// Follow the snake: match diagonal moves as far as possible
while (x < aMax && y < bMax) {
const oldItem = old[one(x + 1)];
const currentItem = current[one(y + 1)];
// Both items must be arrays (subtrees) to compare
if (Array.isArray(oldItem) && Array.isArray(currentItem)) {
// Check if the subtrees are equal
if (areSubtreesEqual(oldItem, currentItem)) {
x += 1;
y += 1;
history.push(new Keep(currentItem, true));
} else {
// Check if just the root nodes are equal
if (oldItem.length > 0 && currentItem.length > 0 &&
areNodesEqual(oldItem[0], currentItem[0])) {
x += 1;
y += 1;
// Add a Keep for the root node, then recursively diff the children
history.push(new Keep(currentItem[0]));
// Apply hierarchical diff to the children
const oldChildren = oldItem.slice(1);
const currentChildren = currentItem.slice(1);
if (oldChildren.length > 0 || currentChildren.length > 0) {
const childDiff = hierarchicalMyersDiff(...oldChildren, ...currentChildren);
history.push(...childDiff);
}
} else {
break; // No match, end of snake
}
}
} else {
break; // Different types, end of snake
}
}
// Check if we've reached the end
if (x >= aMax && y >= bMax) {
return history; // Found solution
} else {
// Save this point in the frontier
frontier[k] = new Frontier(x, history);
}
}
}
// If no solution found with the hierarchical approach, fallback to treating each element individually
// This should be rare but ensures we always produce a result
const fallbackResult = [];
// Remove all old elements
for (let i = 0; i < old.length; i++) {
fallbackResult.push(new Remove(old));
}
// Insert all new elements
for (let i = 0; i < current.length; i++) {
fallbackResult.push(new Insert(current));
}
return fallbackResult;
}
/**
* Process the edit script to expand subtree operations into individual node operations
* This is necessary to integrate with the existing system
*/
export function expandEditScript(editScript) {
const expanded = [];
for (const operation of editScript) {
if (operation.isSubtree) {
// Convert subtree operation to individual node operations
const node = operation.line[0]; // Root node
const children = operation.line.slice(1); // Child subtrees
// Add operation for the root node
if (operation.type === 'Keep') {
expanded.push(new Keep(node));
} else if (operation.type === 'Insert') {
expanded.push(new Insert(node));
} else if (operation.type === 'Remove') {
expanded.push(new Remove(node));
}
// Recursively process children
for (const child of children) {
const childOperations = expandSubtreeOperation(child, operation.type);
expanded.push(...childOperations);
}
} else {
// Single node operation - add as is
expanded.push(operation);
}
}
return expanded;
}
/**
* Helper function to expand a single subtree operation into individual node operations
*/
function expandSubtreeOperation(subtree, operationType) {
const result = [];
if (!Array.isArray(subtree)) {
// Not a subtree, just a single node
if (operationType === 'Keep') {
result.push(new Keep(subtree));
} else if (operationType === 'Insert') {
result.push(new Insert(subtree));
} else if (operationType === 'Remove') {
result.push(new Remove(subtree));
}
return result;
}
// Process the root node
const rootNode = subtree[0];
if (operationType === 'Keep') {
result.push(new Keep(rootNode));
} else if (operationType === 'Insert') {
result.push(new Insert(rootNode));
} else if (operationType === 'Remove') {
result.push(new Remove(rootNode));
}
// Process all children recursively
for (let i = 1; i < subtree.length; i++) {
const childOperations = expandSubtreeOperation(subtree, operationType);
result.push(...childOperations);
}
return result;
}
/**
* Main entry point - applies hierarchical Myers diff and returns expanded edit script
*/
export function diffHierarchical(old, current) {
const editScript = hierarchicalMyersDiff(old, current);
return expandEditScript(editScript);
}< /code>
< /div>
< /div>
< /p>
ожидание после запуска Myers diff на строке html: < /p>
old: < /p>
1
2
< /code>
new: < /p>
2
1
< /code>
заключается в том, что он возвращает дифференциацию, которая имеет 2 редактора, которое говорит 1 как удаленное и 1 как вставлено. Скорее, он возвращает 4 редактора, где снято 1, вставлено 2, 1 вставлен и 2 удаляется. Как я могу решить для этого? Первая часть кода просто читает HTML и кодирует его в список. Вторая часть - это реализация Myers Diff, которая работает на последовательности, генерируемой в первой части.
Подробнее здесь: https://stackoverflow.com/questions/795 ... ml-diffing
Использование Myers Diff для дифференциации HTML ⇐ Html
Программисты Html
-
Anonymous
1741874386
Anonymous
Я пробую Myers Diff, чтобы Diff 2 HTML Strings. Итак, я делаю: < /p>
Прочитайте строку HTML в структуру дерева < /li>
Создайте плоский список дерева с определенными границами. Например, < /li>
< /ol>
1
2
< /code>
< /div>
< /div>
< /p>
преобразуется как [таблица, [tr, [td, [1]]]], [tr, [td, [2]]]] < /code> < /p>
start = "3" разница. /> import htmlparser from 'htmlparser2';
import {diffHierarchical} from './beta-myers-diff.js';;
/**
* Converts HTML string to a hierarchical array structure
* @param {string} html - The HTML string to parse
* @returns {Array} - A hierarchical array representation of the HTML
*/
export function htmlToHierarchicalArray(html) {
// Parse HTML to DOM using htmlparser2
let dom = null;
const handler = new htmlparser.DomHandler((error, nodes) => {
if(error) {
console.error('Error parsing HTML:', error);
return;
}
// Find root element(s)
const tags = nodes.filter(node => node.type === 'tag');
if(tags.length > 0) {
dom = tags[0]; // Take the first tag as the root (usually there's only one root)
} else if(nodes.length > 0) {
dom = nodes[0]; // Take the first node if no tags
}
});
const parser = new htmlparser.Parser(handler, { decodeEntities: false });
parser.write(html);
parser.end();
// Convert DOM to hierarchical array structure
if (!dom) {
return []; // Return empty array if parsing failed
}
// Convert the single root element to our hierarchical structure
return nodeToHierarchicalArray(dom);
}
/**
* Recursive function to convert DOM nodes to hierarchical array
* @param {Array|Object} nodes - DOM node or array of nodes
* @returns {Array} - Hierarchical array representation
*/
function domToHierarchicalArray(nodes) {
// Handle single node
if(!Array.isArray(nodes)) {
return nodeToHierarchicalArray(nodes);
}
// Handle array of nodes
return nodes.map(node => nodeToHierarchicalArray(node));
}
/**
* Convert a single DOM node to hierarchical array
* @param {Object} node - DOM node
* @returns {Array} - Hierarchical array representation
*/
function nodeToHierarchicalArray(node) {
// Handle text node
if(node.type === 'text') {
const text = node.data.trim();
if(text === '') {
return null; // Skip empty text nodes
}
// Return text node as a single-element array to maintain consistency
return [node];
}
// Handle tag node
if(node.type === 'tag') {
// Create array with the complete node object as first element
const result = [node];
// Process children and add them to the array
if(node.children && node.children.length > 0) {
const childArrays = node.children
.map(child => nodeToHierarchicalArray(child))
.filter(item => item !== null); // Filter out null items (empty text)
if(childArrays.length > 0) {
result.push(...childArrays);
}
}
return result;
}
// Other node types (comment, directive, etc.) - skip
return null;
}
/**
* Test function for the table row swapping example
*/
function testTableRowSwapping() {
const oldHtml = `
1
2
`;
const newHtml = `
2
1
`;
// Parse HTML to hierarchical arrays
const oldArray = htmlToHierarchicalArray(oldHtml);
const newArray = htmlToHierarchicalArray(newHtml);
const editScript = diffHierarchical(oldArray, newArray);
}
testTableRowSwapping();< /code>
< /div>
< /div>
< /p>
myers diff код, который различает созданный список: < /p>
new Insert(item, true));
}
if (current.length === 0) {
return old.map(item => new Remove(item, true));
}
// Compare root nodes of the arrays
const oldRoot = old[0];
const currentRoot = current[0];
// If the root nodes are equal, we have a potential match
if (areNodesEqual(oldRoot, currentRoot)) {
// First, check if these are leaf nodes or if their subtrees are equal
if (old.length === 1 && current.length === 1) {
// Leaf nodes - just keep
return [new Keep(currentRoot)];
}
// If both have exactly the same structure, keep the entire subtree
if (areSubtreesEqual(old, current)) {
return [new Keep(current, true)];
}
// The roots match but the subtrees differ - recursively diff the children
const result = [new Keep(currentRoot)];
// Extract children (everything except the root node)
const oldChildren = old.slice(1);
const currentChildren = current.slice(1);
// Apply standard Myers diff to the children arrays
const childDiff = myersDiff(oldChildren, currentChildren);
result.push(...childDiff);
return result;
}
// Root nodes are different - try standard Myers diff on the top level
return myersDiff(old, current);
}
/**
* Standard Myers diff algorithm for arrays
* This is similar to the original myersDiff but adapted for our hierarchical arrays
*/
function myersDiff(old, current) {
// Initialize the frontier with starting point
const frontier = { 1: new Frontier(0, []) };
// Convert from 1-based to 0-based indexing
function one(idx) {
return idx - 1;
}
const aMax = old.length; // horizontal size of edit graph
const bMax = current.length; // vertical size of edit graph
// Main loop: try increasing numbers of edits
for (let d = 0; d = 1 && x 0;
if (isSubtree) {
// Add the entire subtree as a single remove operation
history.push(new Remove(oldItem, true));
} else {
history.push(new Remove(oldItem));
}
}
// Follow the snake: match diagonal moves as far as possible
while (x < aMax && y < bMax) {
const oldItem = old[one(x + 1)];
const currentItem = current[one(y + 1)];
// Both items must be arrays (subtrees) to compare
if (Array.isArray(oldItem) && Array.isArray(currentItem)) {
// Check if the subtrees are equal
if (areSubtreesEqual(oldItem, currentItem)) {
x += 1;
y += 1;
history.push(new Keep(currentItem, true));
} else {
// Check if just the root nodes are equal
if (oldItem.length > 0 && currentItem.length > 0 &&
areNodesEqual(oldItem[0], currentItem[0])) {
x += 1;
y += 1;
// Add a Keep for the root node, then recursively diff the children
history.push(new Keep(currentItem[0]));
// Apply hierarchical diff to the children
const oldChildren = oldItem.slice(1);
const currentChildren = currentItem.slice(1);
if (oldChildren.length > 0 || currentChildren.length > 0) {
const childDiff = hierarchicalMyersDiff(...oldChildren, ...currentChildren);
history.push(...childDiff);
}
} else {
break; // No match, end of snake
}
}
} else {
break; // Different types, end of snake
}
}
// Check if we've reached the end
if (x >= aMax && y >= bMax) {
return history; // Found solution
} else {
// Save this point in the frontier
frontier[k] = new Frontier(x, history);
}
}
}
// If no solution found with the hierarchical approach, fallback to treating each element individually
// This should be rare but ensures we always produce a result
const fallbackResult = [];
// Remove all old elements
for (let i = 0; i < old.length; i++) {
fallbackResult.push(new Remove(old[i]));
}
// Insert all new elements
for (let i = 0; i < current.length; i++) {
fallbackResult.push(new Insert(current[i]));
}
return fallbackResult;
}
/**
* Process the edit script to expand subtree operations into individual node operations
* This is necessary to integrate with the existing system
*/
export function expandEditScript(editScript) {
const expanded = [];
for (const operation of editScript) {
if (operation.isSubtree) {
// Convert subtree operation to individual node operations
const node = operation.line[0]; // Root node
const children = operation.line.slice(1); // Child subtrees
// Add operation for the root node
if (operation.type === 'Keep') {
expanded.push(new Keep(node));
} else if (operation.type === 'Insert') {
expanded.push(new Insert(node));
} else if (operation.type === 'Remove') {
expanded.push(new Remove(node));
}
// Recursively process children
for (const child of children) {
const childOperations = expandSubtreeOperation(child, operation.type);
expanded.push(...childOperations);
}
} else {
// Single node operation - add as is
expanded.push(operation);
}
}
return expanded;
}
/**
* Helper function to expand a single subtree operation into individual node operations
*/
function expandSubtreeOperation(subtree, operationType) {
const result = [];
if (!Array.isArray(subtree)) {
// Not a subtree, just a single node
if (operationType === 'Keep') {
result.push(new Keep(subtree));
} else if (operationType === 'Insert') {
result.push(new Insert(subtree));
} else if (operationType === 'Remove') {
result.push(new Remove(subtree));
}
return result;
}
// Process the root node
const rootNode = subtree[0];
if (operationType === 'Keep') {
result.push(new Keep(rootNode));
} else if (operationType === 'Insert') {
result.push(new Insert(rootNode));
} else if (operationType === 'Remove') {
result.push(new Remove(rootNode));
}
// Process all children recursively
for (let i = 1; i < subtree.length; i++) {
const childOperations = expandSubtreeOperation(subtree[i], operationType);
result.push(...childOperations);
}
return result;
}
/**
* Main entry point - applies hierarchical Myers diff and returns expanded edit script
*/
export function diffHierarchical(old, current) {
const editScript = hierarchicalMyersDiff(old, current);
return expandEditScript(editScript);
}< /code>
< /div>
< /div>
< /p>
ожидание после запуска Myers diff на строке html: < /p>
old: < /p>
1
2
< /code>
new: < /p>
2
1
< /code>
заключается в том, что он возвращает дифференциацию, которая имеет 2 редактора, которое говорит 1 как удаленное и 1 как вставлено. Скорее, он возвращает 4 редактора, где снято 1, вставлено 2, 1 вставлен и 2 удаляется. Как я могу решить для этого? Первая часть кода просто читает HTML и кодирует его в список. Вторая часть - это реализация Myers Diff, которая работает на последовательности, генерируемой в первой части.
Подробнее здесь: [url]https://stackoverflow.com/questions/79506497/using-myers-diff-for-html-diffing[/url]
Ответить
1 сообщение
• Страница 1 из 1
Перейти
- Кемерово-IT
- ↳ Javascript
- ↳ C#
- ↳ JAVA
- ↳ Elasticsearch aggregation
- ↳ Python
- ↳ Php
- ↳ Android
- ↳ Html
- ↳ Jquery
- ↳ C++
- ↳ IOS
- ↳ CSS
- ↳ Excel
- ↳ Linux
- ↳ Apache
- ↳ MySql
- Детский мир
- Для души
- ↳ Музыкальные инструменты даром
- ↳ Печатная продукция даром
- Внешняя красота и здоровье
- ↳ Одежда и обувь для взрослых даром
- ↳ Товары для здоровья
- ↳ Физкультура и спорт
- Техника - даром!
- ↳ Автомобилистам
- ↳ Компьютерная техника
- ↳ Плиты: газовые и электрические
- ↳ Холодильники
- ↳ Стиральные машины
- ↳ Телевизоры
- ↳ Телефоны, смартфоны, плашеты
- ↳ Швейные машинки
- ↳ Прочая электроника и техника
- ↳ Фототехника
- Ремонт и интерьер
- ↳ Стройматериалы, инструмент
- ↳ Мебель и предметы интерьера даром
- ↳ Cантехника
- Другие темы
- ↳ Разное даром
- ↳ Давай меняться!
- ↳ Отдам\возьму за копеечку
- ↳ Работа и подработка в Кемерове
- ↳ Давай с тобой поговорим...
Мобильная версия