Text Comparison and Diff Analysis Guide
Text comparison is essential for version control, content management, and quality assurance. This guide covers different comparison methods, implementation strategies, and practical applications for text analysis.
1. Text Comparison Fundamentals
Types of Text Comparison
Comparison Methods
const comparisonTypes = {
character: {
description: 'Character-by-character comparison',
useCase: 'Precise editing, small text changes',
granularity: 'Highest precision',
performance: 'Slower for large texts'
},
word: {
description: 'Word-by-word comparison',
useCase: 'Content editing, document review',
granularity: 'Balanced precision and readability',
performance: 'Good balance'
},
line: {
description: 'Line-by-line comparison',
useCase: 'Code review, file comparison',
granularity: 'Best for structured text',
performance: 'Fastest for large files'
},
semantic: {
description: 'Meaning-based comparison',
useCase: 'Content similarity analysis',
granularity: 'Context-aware',
performance: 'Most resource-intensive'
}
};
Diff Algorithm Implementation
Basic Diff Algorithm
class TextDiffer {
constructor() {
this.diffTypes = {
EQUAL: 'equal',
INSERT: 'insert',
DELETE: 'delete'
};
}
compareByCharacter(text1, text2) {
const results = [];
const maxLength = Math.max(text1.length, text2.length);
for (let i = 0; i < maxLength; i++) {
const char1 = text1[i] || '';
const char2 = text2[i] || '';
if (char1 === char2) {
results.push({
type: this.diffTypes.EQUAL,
value: char1,
position: i
});
} else {
if (char1) {
results.push({
type: this.diffTypes.DELETE,
value: char1,
position: i
});
}
if (char2) {
results.push({
type: this.diffTypes.INSERT,
value: char2,
position: i
});
}
}
}
return this.consolidateResults(results);
}
compareByWords(text1, text2) {
const words1 = text1.split(/(\s+)/);
const words2 = text2.split(/(\s+)/);
return this.longestCommonSubsequence(words1, words2);
}
compareByLines(text1, text2) {
const lines1 = text1.split('\n');
const lines2 = text2.split('\n');
return this.longestCommonSubsequence(lines1, lines2);
}
longestCommonSubsequence(arr1, arr2) {
const m = arr1.length;
const n = arr2.length;
const dp = Array(m + 1).fill().map(() => Array(n + 1).fill(0));
// Build LCS table
for (let i = 1; i <= m; i++) {
for (let j = 1; j <= n; j++) {
if (arr1[i - 1] === arr2[j - 1]) {
dp[i][j] = dp[i - 1][j - 1] + 1;
} else {
dp[i][j] = Math.max(dp[i - 1][j], dp[i][j - 1]);
}
}
}
// Backtrack to find differences
return this.backtrackLCS(arr1, arr2, dp, m, n);
}
backtrackLCS(arr1, arr2, dp, i, j) {
const result = [];
while (i > 0 || j > 0) {
if (i > 0 && j > 0 && arr1[i - 1] === arr2[j - 1]) {
result.unshift({
type: this.diffTypes.EQUAL,
value: arr1[i - 1],
position: { old: i - 1, new: j - 1 }
});
i--;
j--;
} else if (i > 0 && (j === 0 || dp[i - 1][j] >= dp[i][j - 1])) {
result.unshift({
type: this.diffTypes.DELETE,
value: arr1[i - 1],
position: { old: i - 1, new: null }
});
i--;
} else {
result.unshift({
type: this.diffTypes.INSERT,
value: arr2[j - 1],
position: { old: null, new: j - 1 }
});
j--;
}
}
return result;
}
consolidateResults(results) {
const consolidated = [];
let current = null;
for (const item of results) {
if (current && current.type === item.type) {
current.value += item.value;
} else {
if (current) consolidated.push(current);
current = { ...item };
}
}
if (current) consolidated.push(current);
return consolidated;
}
}
2. Advanced Comparison Features
Similarity Calculation
Text Similarity Metrics
class SimilarityCalculator {
calculateLevenshteinDistance(str1, str2) {
const matrix = Array(str2.length + 1).fill().map(() => Array(str1.length + 1).fill(0));
for (let i = 0; i <= str1.length; i++) {
matrix[0][i] = i;
}
for (let j = 0; j <= str2.length; j++) {
matrix[j][0] = j;
}
for (let j = 1; j <= str2.length; j++) {
for (let i = 1; i <= str1.length; i++) {
const cost = str1[i - 1] === str2[j - 1] ? 0 : 1;
matrix[j][i] = Math.min(
matrix[j - 1][i] + 1, // deletion
matrix[j][i - 1] + 1, // insertion
matrix[j - 1][i - 1] + cost // substitution
);
}
}
return matrix[str2.length][str1.length];
}
calculateSimilarityPercentage(str1, str2) {
const distance = this.calculateLevenshteinDistance(str1, str2);
const maxLength = Math.max(str1.length, str2.length);
if (maxLength === 0) return 100;
const similarity = ((maxLength - distance) / maxLength) * 100;
return Math.round(similarity * 100) / 100;
}
calculateJaccardSimilarity(str1, str2) {
const set1 = new Set(str1.toLowerCase().split(/\s+/));
const set2 = new Set(str2.toLowerCase().split(/\s+/));
const intersection = new Set([...set1].filter(x => set2.has(x)));
const union = new Set([...set1, ...set2]);
return intersection.size / union.size;
}
calculateCosineSimilarity(str1, str2) {
const words1 = str1.toLowerCase().split(/\s+/);
const words2 = str2.toLowerCase().split(/\s+/);
const wordSet = new Set([...words1, ...words2]);
const vector1 = Array.from(wordSet).map(word => words1.filter(w => w === word).length);
const vector2 = Array.from(wordSet).map(word => words2.filter(w => w === word).length);
const dotProduct = vector1.reduce((sum, val, i) => sum + val * vector2[i], 0);
const magnitude1 = Math.sqrt(vector1.reduce((sum, val) => sum + val * val, 0));
const magnitude2 = Math.sqrt(vector2.reduce((sum, val) => sum + val * val, 0));
return magnitude1 && magnitude2 ? dotProduct / (magnitude1 * magnitude2) : 0;
}
generateSimilarityReport(str1, str2) {
return {
levenshteinDistance: this.calculateLevenshteinDistance(str1, str2),
similarityPercentage: this.calculateSimilarityPercentage(str1, str2),
jaccardSimilarity: this.calculateJaccardSimilarity(str1, str2),
cosineSimilarity: this.calculateCosineSimilarity(str1, str2),
lengthDifference: Math.abs(str1.length - str2.length),
characterCount: {
text1: str1.length,
text2: str2.length
},
wordCount: {
text1: str1.split(/\s+/).length,
text2: str2.split(/\s+/).length
}
};
}
}
Visual Diff Rendering
HTML Diff Visualization
class DiffRenderer {
constructor() {
this.styles = {
equal: 'background-color: transparent;',
insert: 'background-color: #d4edda; color: #155724;',
delete: 'background-color: #f8d7da; color: #721c24;'
};
}
renderInlineDiff(diffResults) {
let html = '';
for (const diff of diffResults) {
const style = this.styles[diff.type];
const className = `diff-${diff.type}`;
html += `<span class="${className}" style="${style}">${this.escapeHtml(diff.value)}</span>`;
}
return html;
}
renderSideBySideDiff(text1, text2, diffResults) {
const lines1 = [];
const lines2 = [];
let line1 = '';
let line2 = '';
for (const diff of diffResults) {
const className = `diff-${diff.type}`;
const style = this.styles[diff.type];
const content = `<span class="${className}" style="${style}">${this.escapeHtml(diff.value)}</span>`;
switch (diff.type) {
case 'equal':
line1 += content;
line2 += content;
break;
case 'delete':
line1 += content;
break;
case 'insert':
line2 += content;
break;
}
if (diff.value.includes('\n')) {
lines1.push(line1);
lines2.push(line2);
line1 = '';
line2 = '';
}
}
if (line1 || line2) {
lines1.push(line1);
lines2.push(line2);
}
return {
left: lines1,
right: lines2
};
}
renderUnifiedDiff(diffResults) {
let html = '';
let lineNumber = 1;
for (const diff of diffResults) {
const lines = diff.value.split('\n');
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const isLastLine = i === lines.length - 1;
if (!isLastLine || line) {
const prefix = diff.type === 'insert' ? '+' : diff.type === 'delete' ? '-' : ' ';
const className = `diff-${diff.type}`;
const style = this.styles[diff.type];
html += `<div class="diff-line ${className}" style="${style}">`;
html += `<span class="line-number">${lineNumber.toString().padStart(4, ' ')}</span>`;
html += `<span class="line-prefix">${prefix}</span>`;
html += `<span class="line-content">${this.escapeHtml(line)}</span>`;
html += `</div>`;
if (diff.type !== 'delete') lineNumber++;
}
}
}
return html;
}
escapeHtml(text) {
const div = document.createElement('div');
div.textContent = text;
return div.innerHTML;
}
generateDiffCSS() {
return `
.diff-equal { background-color: transparent; }
.diff-insert { background-color: #d4edda; color: #155724; }
.diff-delete { background-color: #f8d7da; color: #721c24; }
.diff-line {
font-family: 'Courier New', monospace;
white-space: pre;
line-height: 1.4;
padding: 2px 0;
}
.line-number {
color: #666;
margin-right: 10px;
user-select: none;
}
.line-prefix {
margin-right: 5px;
font-weight: bold;
}
.diff-insert .line-prefix { color: #28a745; }
.diff-delete .line-prefix { color: #dc3545; }
`;
}
}