Text ToolsBeginner📖 7 min read📅 2025-08-21

Text Comparison and Change Analysis Guide

From document comparison to code review, learn everything about text difference analysis

#text comparison#diff#document review#version control

Text Comparison and Diff Analysis Guide

Text comparison is essential for version control, content management, and quality assurance. This guide covers different comparison methods, implementation strategies, and practical applications for text analysis.

1. Text Comparison Fundamentals

Types of Text Comparison

Comparison Methods

const comparisonTypes = {
  character: {
    description: 'Character-by-character comparison',
    useCase: 'Precise editing, small text changes',
    granularity: 'Highest precision',
    performance: 'Slower for large texts'
  },

  word: {
    description: 'Word-by-word comparison',
    useCase: 'Content editing, document review',
    granularity: 'Balanced precision and readability',
    performance: 'Good balance'
  },

  line: {
    description: 'Line-by-line comparison',
    useCase: 'Code review, file comparison',
    granularity: 'Best for structured text',
    performance: 'Fastest for large files'
  },

  semantic: {
    description: 'Meaning-based comparison',
    useCase: 'Content similarity analysis',
    granularity: 'Context-aware',
    performance: 'Most resource-intensive'
  }
};

Diff Algorithm Implementation

Basic Diff Algorithm

class TextDiffer {
  constructor() {
    this.diffTypes = {
      EQUAL: 'equal',
      INSERT: 'insert',
      DELETE: 'delete'
    };
  }

  compareByCharacter(text1, text2) {
    const results = [];
    const maxLength = Math.max(text1.length, text2.length);

    for (let i = 0; i < maxLength; i++) {
      const char1 = text1[i] || '';
      const char2 = text2[i] || '';

      if (char1 === char2) {
        results.push({
          type: this.diffTypes.EQUAL,
          value: char1,
          position: i
        });
      } else {
        if (char1) {
          results.push({
            type: this.diffTypes.DELETE,
            value: char1,
            position: i
          });
        }
        if (char2) {
          results.push({
            type: this.diffTypes.INSERT,
            value: char2,
            position: i
          });
        }
      }
    }

    return this.consolidateResults(results);
  }

  compareByWords(text1, text2) {
    const words1 = text1.split(/(\s+)/);
    const words2 = text2.split(/(\s+)/);

    return this.longestCommonSubsequence(words1, words2);
  }

  compareByLines(text1, text2) {
    const lines1 = text1.split('\n');
    const lines2 = text2.split('\n');

    return this.longestCommonSubsequence(lines1, lines2);
  }

  longestCommonSubsequence(arr1, arr2) {
    const m = arr1.length;
    const n = arr2.length;
    const dp = Array(m + 1).fill().map(() => Array(n + 1).fill(0));

    // Build LCS table
    for (let i = 1; i <= m; i++) {
      for (let j = 1; j <= n; j++) {
        if (arr1[i - 1] === arr2[j - 1]) {
          dp[i][j] = dp[i - 1][j - 1] + 1;
        } else {
          dp[i][j] = Math.max(dp[i - 1][j], dp[i][j - 1]);
        }
      }
    }

    // Backtrack to find differences
    return this.backtrackLCS(arr1, arr2, dp, m, n);
  }

  backtrackLCS(arr1, arr2, dp, i, j) {
    const result = [];

    while (i > 0 || j > 0) {
      if (i > 0 && j > 0 && arr1[i - 1] === arr2[j - 1]) {
        result.unshift({
          type: this.diffTypes.EQUAL,
          value: arr1[i - 1],
          position: { old: i - 1, new: j - 1 }
        });
        i--;
        j--;
      } else if (i > 0 && (j === 0 || dp[i - 1][j] >= dp[i][j - 1])) {
        result.unshift({
          type: this.diffTypes.DELETE,
          value: arr1[i - 1],
          position: { old: i - 1, new: null }
        });
        i--;
      } else {
        result.unshift({
          type: this.diffTypes.INSERT,
          value: arr2[j - 1],
          position: { old: null, new: j - 1 }
        });
        j--;
      }
    }

    return result;
  }

  consolidateResults(results) {
    const consolidated = [];
    let current = null;

    for (const item of results) {
      if (current && current.type === item.type) {
        current.value += item.value;
      } else {
        if (current) consolidated.push(current);
        current = { ...item };
      }
    }

    if (current) consolidated.push(current);
    return consolidated;
  }
}

2. Advanced Comparison Features

Similarity Calculation

Text Similarity Metrics

class SimilarityCalculator {
  calculateLevenshteinDistance(str1, str2) {
    const matrix = Array(str2.length + 1).fill().map(() => Array(str1.length + 1).fill(0));

    for (let i = 0; i <= str1.length; i++) {
      matrix[0][i] = i;
    }

    for (let j = 0; j <= str2.length; j++) {
      matrix[j][0] = j;
    }

    for (let j = 1; j <= str2.length; j++) {
      for (let i = 1; i <= str1.length; i++) {
        const cost = str1[i - 1] === str2[j - 1] ? 0 : 1;
        matrix[j][i] = Math.min(
          matrix[j - 1][i] + 1,     // deletion
          matrix[j][i - 1] + 1,     // insertion
          matrix[j - 1][i - 1] + cost // substitution
        );
      }
    }

    return matrix[str2.length][str1.length];
  }

  calculateSimilarityPercentage(str1, str2) {
    const distance = this.calculateLevenshteinDistance(str1, str2);
    const maxLength = Math.max(str1.length, str2.length);

    if (maxLength === 0) return 100;

    const similarity = ((maxLength - distance) / maxLength) * 100;
    return Math.round(similarity * 100) / 100;
  }

  calculateJaccardSimilarity(str1, str2) {
    const set1 = new Set(str1.toLowerCase().split(/\s+/));
    const set2 = new Set(str2.toLowerCase().split(/\s+/));

    const intersection = new Set([...set1].filter(x => set2.has(x)));
    const union = new Set([...set1, ...set2]);

    return intersection.size / union.size;
  }

  calculateCosineSimilarity(str1, str2) {
    const words1 = str1.toLowerCase().split(/\s+/);
    const words2 = str2.toLowerCase().split(/\s+/);

    const wordSet = new Set([...words1, ...words2]);
    const vector1 = Array.from(wordSet).map(word => words1.filter(w => w === word).length);
    const vector2 = Array.from(wordSet).map(word => words2.filter(w => w === word).length);

    const dotProduct = vector1.reduce((sum, val, i) => sum + val * vector2[i], 0);
    const magnitude1 = Math.sqrt(vector1.reduce((sum, val) => sum + val * val, 0));
    const magnitude2 = Math.sqrt(vector2.reduce((sum, val) => sum + val * val, 0));

    return magnitude1 && magnitude2 ? dotProduct / (magnitude1 * magnitude2) : 0;
  }

  generateSimilarityReport(str1, str2) {
    return {
      levenshteinDistance: this.calculateLevenshteinDistance(str1, str2),
      similarityPercentage: this.calculateSimilarityPercentage(str1, str2),
      jaccardSimilarity: this.calculateJaccardSimilarity(str1, str2),
      cosineSimilarity: this.calculateCosineSimilarity(str1, str2),
      lengthDifference: Math.abs(str1.length - str2.length),
      characterCount: {
        text1: str1.length,
        text2: str2.length
      },
      wordCount: {
        text1: str1.split(/\s+/).length,
        text2: str2.split(/\s+/).length
      }
    };
  }
}

Visual Diff Rendering

HTML Diff Visualization

class DiffRenderer {
  constructor() {
    this.styles = {
      equal: 'background-color: transparent;',
      insert: 'background-color: #d4edda; color: #155724;',
      delete: 'background-color: #f8d7da; color: #721c24;'
    };
  }

  renderInlineDiff(diffResults) {
    let html = '';

    for (const diff of diffResults) {
      const style = this.styles[diff.type];
      const className = `diff-${diff.type}`;

      html += `<span class="${className}" style="${style}">${this.escapeHtml(diff.value)}</span>`;
    }

    return html;
  }

  renderSideBySideDiff(text1, text2, diffResults) {
    const lines1 = [];
    const lines2 = [];
    let line1 = '';
    let line2 = '';

    for (const diff of diffResults) {
      const className = `diff-${diff.type}`;
      const style = this.styles[diff.type];
      const content = `<span class="${className}" style="${style}">${this.escapeHtml(diff.value)}</span>`;

      switch (diff.type) {
        case 'equal':
          line1 += content;
          line2 += content;
          break;
        case 'delete':
          line1 += content;
          break;
        case 'insert':
          line2 += content;
          break;
      }

      if (diff.value.includes('\n')) {
        lines1.push(line1);
        lines2.push(line2);
        line1 = '';
        line2 = '';
      }
    }

    if (line1 || line2) {
      lines1.push(line1);
      lines2.push(line2);
    }

    return {
      left: lines1,
      right: lines2
    };
  }

  renderUnifiedDiff(diffResults) {
    let html = '';
    let lineNumber = 1;

    for (const diff of diffResults) {
      const lines = diff.value.split('\n');

      for (let i = 0; i < lines.length; i++) {
        const line = lines[i];
        const isLastLine = i === lines.length - 1;

        if (!isLastLine || line) {
          const prefix = diff.type === 'insert' ? '+' : diff.type === 'delete' ? '-' : ' ';
          const className = `diff-${diff.type}`;
          const style = this.styles[diff.type];

          html += `<div class="diff-line ${className}" style="${style}">`;
          html += `<span class="line-number">${lineNumber.toString().padStart(4, ' ')}</span>`;
          html += `<span class="line-prefix">${prefix}</span>`;
          html += `<span class="line-content">${this.escapeHtml(line)}</span>`;
          html += `</div>`;

          if (diff.type !== 'delete') lineNumber++;
        }
      }
    }

    return html;
  }

  escapeHtml(text) {
    const div = document.createElement('div');
    div.textContent = text;
    return div.innerHTML;
  }

  generateDiffCSS() {
    return `
      .diff-equal { background-color: transparent; }
      .diff-insert { background-color: #d4edda; color: #155724; }
      .diff-delete { background-color: #f8d7da; color: #721c24; }

      .diff-line {
        font-family: 'Courier New', monospace;
        white-space: pre;
        line-height: 1.4;
        padding: 2px 0;
      }

      .line-number {
        color: #666;
        margin-right: 10px;
        user-select: none;
      }

      .line-prefix {
        margin-right: 5px;
        font-weight: bold;
      }

      .diff-insert .line-prefix { color: #28a745; }
      .diff-delete .line-prefix { color: #dc3545; }
    `;
  }
}

이 포스팅은 쿠팡 파트너스 활동의 일환으로, 이에 따른 일정액의 수수료를 제공받습니다.

3. Performance Optimization

Large Text Handling

Efficient Comparison Strategies

class OptimizedDiffer {
  constructor() {
    this.chunkSize = 1000; // Process in chunks
    this.similarityThreshold = 0.8;
  }

  async compareWithProgress(text1, text2, progressCallback) {
    const lines1 = text1.split('\n');
    const lines2 = text2.split('\n');

    const totalChunks = Math.ceil(Math.max(lines1.length, lines2.length) / this.chunkSize);
    const results = [];

    for (let i = 0; i < totalChunks; i++) {
      const start = i * this.chunkSize;
      const chunk1 = lines1.slice(start, start + this.chunkSize);
      const chunk2 = lines2.slice(start, start + this.chunkSize);

      const chunkResult = await this.compareChunk(chunk1, chunk2);
      results.push(...chunkResult);

      if (progressCallback) {
        progressCallback({
          completed: i + 1,
          total: totalChunks,
          percentage: Math.round(((i + 1) / totalChunks) * 100)
        });
      }

      // Allow UI to update
      await new Promise(resolve => setTimeout(resolve, 0));
    }

    return results;
  }

  async compareChunk(chunk1, chunk2) {
    return new Promise(resolve => {
      const differ = new TextDiffer();
      const result = differ.compareByLines(chunk1.join('\n'), chunk2.join('\n'));
      resolve(result);
    });
  }

  preprocessForComparison(text, options = {}) {
    let processed = text;

    if (options.ignoreCase) {
      processed = processed.toLowerCase();
    }

    if (options.ignoreWhitespace) {
      processed = processed.replace(/\s+/g, ' ').trim();
    }

    if (options.ignoreEmptyLines) {
      processed = processed.replace(/^\s*\n/gm, '');
    }

    if (options.normalizeLineEndings) {
      processed = processed.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
    }

    return processed;
  }
}

4. Practical Applications

Code Review Integration

Git-Style Diff Implementation

class GitStyleDiffer {
  generatePatch(originalContent, modifiedContent, fileName) {
    const differ = new TextDiffer();
    const diffs = differ.compareByLines(originalContent, modifiedContent);

    let patch = `--- a/${fileName}\n+++ b/${fileName}\n`;
    let originalLineNum = 1;
    let modifiedLineNum = 1;
    let hunkHeader = '';
    let hunkContent = [];

    for (const diff of diffs) {
      const lines = diff.value.split('\n');

      for (let i = 0; i < lines.length; i++) {
        const line = lines[i];
        const isLastLine = i === lines.length - 1;

        if (!isLastLine || line) {
          switch (diff.type) {
            case 'equal':
              hunkContent.push(` ${line}`);
              originalLineNum++;
              modifiedLineNum++;
              break;
            case 'delete':
              if (!hunkHeader) {
                hunkHeader = `@@ -${originalLineNum},${this.countLines(diffs, 'delete')} +${modifiedLineNum},${this.countLines(diffs, 'insert')} @@`;
              }
              hunkContent.push(`-${line}`);
              originalLineNum++;
              break;
            case 'insert':
              if (!hunkHeader) {
                hunkHeader = `@@ -${originalLineNum},${this.countLines(diffs, 'delete')} +${modifiedLineNum},${this.countLines(diffs, 'insert')} @@`;
              }
              hunkContent.push(`+${line}`);
              modifiedLineNum++;
              break;
          }
        }
      }
    }

    if (hunkHeader && hunkContent.length > 0) {
      patch += hunkHeader + '\n' + hunkContent.join('\n');
    }

    return patch;
  }

  countLines(diffs, type) {
    return diffs
      .filter(diff => diff.type === type)
      .reduce((count, diff) => count + (diff.value.match(/\n/g) || []).length + 1, 0);
  }
}

Document Version Control

Version Comparison System

class DocumentVersionController {
  constructor() {
    this.versions = new Map();
    this.differ = new TextDiffer();
  }

  saveVersion(documentId, content, author, message = '') {
    if (!this.versions.has(documentId)) {
      this.versions.set(documentId, []);
    }

    const versions = this.versions.get(documentId);
    const versionNumber = versions.length + 1;

    const version = {
      id: versionNumber,
      content,
      author,
      message,
      timestamp: new Date().toISOString(),
      contentHash: this.generateHash(content)
    };

    versions.push(version);
    return version;
  }

  compareVersions(documentId, version1Id, version2Id) {
    const versions = this.versions.get(documentId);
    if (!versions) return null;

    const v1 = versions.find(v => v.id === version1Id);
    const v2 = versions.find(v => v.id === version2Id);

    if (!v1 || !v2) return null;

    const diffs = this.differ.compareByLines(v1.content, v2.content);
    const similarity = new SimilarityCalculator().calculateSimilarityPercentage(v1.content, v2.content);

    return {
      version1: v1,
      version2: v2,
      differences: diffs,
      similarity,
      statistics: this.calculateDiffStatistics(diffs)
    };
  }

  calculateDiffStatistics(diffs) {
    const stats = { added: 0, deleted: 0, modified: 0, unchanged: 0 };

    for (const diff of diffs) {
      const lineCount = (diff.value.match(/\n/g) || []).length + 1;

      switch (diff.type) {
        case 'insert':
          stats.added += lineCount;
          break;
        case 'delete':
          stats.deleted += lineCount;
          break;
        case 'equal':
          stats.unchanged += lineCount;
          break;
      }
    }

    stats.modified = stats.added + stats.deleted;
    return stats;
  }

  generateHash(content) {
    // Simple hash function for demonstration
    let hash = 0;
    for (let i = 0; i < content.length; i++) {
      const char = content.charCodeAt(i);
      hash = ((hash << 5) - hash) + char;
      hash = hash & hash; // Convert to 32-bit integer
    }
    return hash.toString(16);
  }
}

Conclusion

Text comparison and diff analysis are essential tools for content management, version control, and quality assurance. By implementing efficient algorithms and providing clear visualizations, developers can create powerful tools that help users understand and manage textual changes effectively.

Key principles for effective text comparison:

  1. Choose appropriate algorithms - Character, word, or line-based comparison
  2. Optimize for performance - Handle large texts efficiently
  3. Provide clear visualization - Make differences easy to understand
  4. Include similarity metrics - Quantify the degree of change
  5. Support preprocessing options - Handle whitespace, case, and formatting variations
  6. Enable progressive loading - Process large comparisons incrementally
Text Comparison and Change Analysis Guide | DDTool