Использование Myers Diff для дифференциации HTMLHtml

Программисты Html
Ответить
Anonymous
 Использование Myers Diff для дифференциации HTML

Сообщение Anonymous »

Я пробую Myers Diff, чтобы Diff 2 HTML Strings. Итак, я делаю: < /p>

Прочитайте строку HTML в структуру дерева < /li>
Создайте плоский список дерева с определенными границами. Например, < /li>
< /ol>





1




2


< /code>
< /div>
< /div>
< /p>
преобразуется как [таблица, [tr, [td, [1]]]], [tr, [td, [2]]]] < /code> < /p>
start = "3" разница. /> import htmlparser from 'htmlparser2';
import {diffHierarchical} from './beta-myers-diff.js';;

/**
* Converts HTML string to a hierarchical array structure
* @param {string} html - The HTML string to parse
* @returns {Array} - A hierarchical array representation of the HTML
*/
export function htmlToHierarchicalArray(html) {
// Parse HTML to DOM using htmlparser2
let dom = null;

const handler = new htmlparser.DomHandler((error, nodes) => {
if(error) {
console.error('Error parsing HTML:', error);
return;
}

// Find root element(s)
const tags = nodes.filter(node => node.type === 'tag');
if(tags.length > 0) {
dom = tags[0]; // Take the first tag as the root (usually there's only one root)
} else if(nodes.length > 0) {
dom = nodes[0]; // Take the first node if no tags
}
});

const parser = new htmlparser.Parser(handler, { decodeEntities: false });
parser.write(html);
parser.end();

// Convert DOM to hierarchical array structure
if (!dom) {
return []; // Return empty array if parsing failed
}

// Convert the single root element to our hierarchical structure
return nodeToHierarchicalArray(dom);
}

/**
* Recursive function to convert DOM nodes to hierarchical array
* @param {Array|Object} nodes - DOM node or array of nodes
* @returns {Array} - Hierarchical array representation
*/
function domToHierarchicalArray(nodes) {
// Handle single node
if(!Array.isArray(nodes)) {
return nodeToHierarchicalArray(nodes);
}

// Handle array of nodes
return nodes.map(node => nodeToHierarchicalArray(node));
}

/**
* Convert a single DOM node to hierarchical array
* @param {Object} node - DOM node
* @returns {Array} - Hierarchical array representation
*/
function nodeToHierarchicalArray(node) {
// Handle text node
if(node.type === 'text') {
const text = node.data.trim();
if(text === '') {
return null; // Skip empty text nodes
}
// Return text node as a single-element array to maintain consistency
return [node];
}

// Handle tag node
if(node.type === 'tag') {
// Create array with the complete node object as first element
const result = [node];

// Process children and add them to the array
if(node.children && node.children.length > 0) {
const childArrays = node.children
.map(child => nodeToHierarchicalArray(child))
.filter(item => item !== null); // Filter out null items (empty text)

if(childArrays.length > 0) {
result.push(...childArrays);
}
}

return result;
}

// Other node types (comment, directive, etc.) - skip
return null;
}

/**
* Test function for the table row swapping example
*/
function testTableRowSwapping() {
const oldHtml = `


1




2


`;

const newHtml = `


2




1


`;

// Parse HTML to hierarchical arrays
const oldArray = htmlToHierarchicalArray(oldHtml);
const newArray = htmlToHierarchicalArray(newHtml);

const editScript = diffHierarchical(oldArray, newArray);

}

testTableRowSwapping();< /code>
< /div>
< /div>
< /p>
myers diff код, который различает созданный список: < /p>


new Insert(item, true));
}
if (current.length === 0) {
return old.map(item => new Remove(item, true));
}

// Compare root nodes of the arrays
const oldRoot = old[0];
const currentRoot = current[0];

// If the root nodes are equal, we have a potential match
if (areNodesEqual(oldRoot, currentRoot)) {
// First, check if these are leaf nodes or if their subtrees are equal
if (old.length === 1 && current.length === 1) {
// Leaf nodes - just keep
return [new Keep(currentRoot)];
}

// If both have exactly the same structure, keep the entire subtree
if (areSubtreesEqual(old, current)) {
return [new Keep(current, true)];
}

// The roots match but the subtrees differ - recursively diff the children
const result = [new Keep(currentRoot)];

// Extract children (everything except the root node)
const oldChildren = old.slice(1);
const currentChildren = current.slice(1);

// Apply standard Myers diff to the children arrays
const childDiff = myersDiff(oldChildren, currentChildren);
result.push(...childDiff);

return result;
}

// Root nodes are different - try standard Myers diff on the top level
return myersDiff(old, current);
}

/**
* Standard Myers diff algorithm for arrays
* This is similar to the original myersDiff but adapted for our hierarchical arrays
*/
function myersDiff(old, current) {
// Initialize the frontier with starting point
const frontier = { 1: new Frontier(0, []) };

// Convert from 1-based to 0-based indexing
function one(idx) {
return idx - 1;
}

const aMax = old.length; // horizontal size of edit graph
const bMax = current.length; // vertical size of edit graph

// Main loop: try increasing numbers of edits
for (let d = 0; d = 1 && x 0;

if (isSubtree) {
// Add the entire subtree as a single remove operation
history.push(new Remove(oldItem, true));
} else {
history.push(new Remove(oldItem));
}
}

// Follow the snake: match diagonal moves as far as possible
while (x < aMax && y < bMax) {
const oldItem = old[one(x + 1)];
const currentItem = current[one(y + 1)];

// Both items must be arrays (subtrees) to compare
if (Array.isArray(oldItem) && Array.isArray(currentItem)) {
// Check if the subtrees are equal
if (areSubtreesEqual(oldItem, currentItem)) {
x += 1;
y += 1;
history.push(new Keep(currentItem, true));
} else {
// Check if just the root nodes are equal
if (oldItem.length > 0 && currentItem.length > 0 &&
areNodesEqual(oldItem[0], currentItem[0])) {
x += 1;
y += 1;
// Add a Keep for the root node, then recursively diff the children
history.push(new Keep(currentItem[0]));

// Apply hierarchical diff to the children
const oldChildren = oldItem.slice(1);
const currentChildren = currentItem.slice(1);

if (oldChildren.length > 0 || currentChildren.length > 0) {
const childDiff = hierarchicalMyersDiff(...oldChildren, ...currentChildren);
history.push(...childDiff);
}
} else {
break; // No match, end of snake
}
}
} else {
break; // Different types, end of snake
}
}

// Check if we've reached the end
if (x >= aMax && y >= bMax) {
return history; // Found solution
} else {
// Save this point in the frontier
frontier[k] = new Frontier(x, history);
}
}
}

// If no solution found with the hierarchical approach, fallback to treating each element individually
// This should be rare but ensures we always produce a result
const fallbackResult = [];

// Remove all old elements
for (let i = 0; i < old.length; i++) {
fallbackResult.push(new Remove(old));
}

// Insert all new elements
for (let i = 0; i < current.length; i++) {
fallbackResult.push(new Insert(current));
}

return fallbackResult;
}

/**
* Process the edit script to expand subtree operations into individual node operations
* This is necessary to integrate with the existing system
*/
export function expandEditScript(editScript) {
const expanded = [];

for (const operation of editScript) {
if (operation.isSubtree) {
// Convert subtree operation to individual node operations
const node = operation.line[0]; // Root node
const children = operation.line.slice(1); // Child subtrees

// Add operation for the root node
if (operation.type === 'Keep') {
expanded.push(new Keep(node));
} else if (operation.type === 'Insert') {
expanded.push(new Insert(node));
} else if (operation.type === 'Remove') {
expanded.push(new Remove(node));
}

// Recursively process children
for (const child of children) {
const childOperations = expandSubtreeOperation(child, operation.type);
expanded.push(...childOperations);
}
} else {
// Single node operation - add as is
expanded.push(operation);
}
}

return expanded;
}

/**
* Helper function to expand a single subtree operation into individual node operations
*/
function expandSubtreeOperation(subtree, operationType) {
const result = [];

if (!Array.isArray(subtree)) {
// Not a subtree, just a single node
if (operationType === 'Keep') {
result.push(new Keep(subtree));
} else if (operationType === 'Insert') {
result.push(new Insert(subtree));
} else if (operationType === 'Remove') {
result.push(new Remove(subtree));
}
return result;
}

// Process the root node
const rootNode = subtree[0];
if (operationType === 'Keep') {
result.push(new Keep(rootNode));
} else if (operationType === 'Insert') {
result.push(new Insert(rootNode));
} else if (operationType === 'Remove') {
result.push(new Remove(rootNode));
}

// Process all children recursively
for (let i = 1; i < subtree.length; i++) {
const childOperations = expandSubtreeOperation(subtree, operationType);
result.push(...childOperations);
}

return result;
}

/**
* Main entry point - applies hierarchical Myers diff and returns expanded edit script
*/
export function diffHierarchical(old, current) {
const editScript = hierarchicalMyersDiff(old, current);
return expandEditScript(editScript);
}< /code>
< /div>
< /div>
< /p>
ожидание после запуска Myers diff на строке html: < /p>
old: < /p>



1




2



< /code>
new: < /p>



2




1



< /code>
заключается в том, что он возвращает дифференциацию, которая имеет 2 редактора, которое говорит 1 как удаленное и 1 как вставлено. Скорее, он возвращает 4 редактора, где снято 1, вставлено 2, 1 вставлен и 2 удаляется. Как я могу решить для этого? Первая часть кода просто читает HTML и кодирует его в список. Вторая часть - это реализация Myers Diff, которая работает на последовательности, генерируемой в первой части.


Подробнее здесь: https://stackoverflow.com/questions/795 ... ml-diffing
Ответить

Быстрый ответ

Изменение регистра текста: 
Смайлики
:) :( :oops: :roll: :wink: :muza: :clever: :sorry: :angel: :read: *x)
Ещё смайлики…
   
К этому ответу прикреплено по крайней мере одно вложение.

Если вы не хотите добавлять вложения, оставьте поля пустыми.

Максимально разрешённый размер вложения: 15 МБ.

Вернуться в «Html»