Text ToolsIntermediate📖 10 min read📅 2025-08-21

Regular Expression Testing and Pattern Matching Guide

From regex basics to advanced patterns, practical regex testing know-how for immediate use in work

#regex#regular expression#pattern matching#developer tools

Regular Expression Testing and Pattern Matching Guide

Regular expressions (regex) are powerful tools for pattern matching and text processing. This guide covers regex syntax, common patterns, and best practices for effective pattern matching.

1. Regular Expression Fundamentals

Basic Syntax and Metacharacters

Core Metacharacters

const regexBasics = {
  literals: {
    description: 'Match exact characters',
    example: '/hello/ matches "hello"',
    usage: 'Simple text matching'
  },

  metacharacters: {
    '.': 'Any character except newline',
    '^': 'Start of string/line',
    '$': 'End of string/line',
    '*': 'Zero or more of preceding',
    '+': 'One or more of preceding',
    '?': 'Zero or one of preceding',
    '|': 'OR operator',
    '()': 'Grouping and capturing',
    '[]': 'Character class',
    '{}': 'Quantifiers'
  }
};

Character Classes and Quantifiers

Common Patterns

const regexPatterns = {
  characterClasses: {
    '[abc]': 'Match a, b, or c',
    '[a-z]': 'Match any lowercase letter',
    '[A-Z]': 'Match any uppercase letter',
    '[0-9]': 'Match any digit',
    '[^abc]': 'Match anything except a, b, or c'
  },

  predefinedClasses: {
    '\\d': 'Digit [0-9]',
    '\\D': 'Non-digit [^0-9]',
    '\\w': 'Word character [a-zA-Z0-9_]',
    '\\W': 'Non-word character',
    '\\s': 'Whitespace character',
    '\\S': 'Non-whitespace character'
  },

  quantifiers: {
    '{n}': 'Exactly n times',
    '{n,}': 'n or more times',
    '{n,m}': 'Between n and m times',
    '*': 'Zero or more {0,}',
    '+': 'One or more {1,}',
    '?': 'Zero or one {0,1}'
  }
};

2. Common Use Cases and Patterns

Email Validation

Email Pattern Implementation

class EmailValidator {
  constructor() {
    // Basic email pattern
    this.basicPattern = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;

    // More comprehensive pattern
    this.comprehensivePattern = /^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/;
  }

  validateBasic(email) {
    return this.basicPattern.test(email);
  }

  validateComprehensive(email) {
    return this.comprehensivePattern.test(email);
  }

  getValidationDetails(email) {
    const parts = email.split('@');
    if (parts.length !== 2) {
      return { valid: false, error: 'Must contain exactly one @ symbol' };
    }

    const [localPart, domain] = parts;

    return {
      valid: this.validateComprehensive(email),
      localPart,
      domain,
      localLength: localPart.length,
      domainLength: domain.length,
      hasValidTLD: /\.[a-zA-Z]{2,}$/.test(domain)
    };
  }
}

Phone Number Patterns

Phone Number Validation

class PhoneValidator {
  constructor() {
    this.patterns = {
      us: /^(\+1)?[-.\s]?\(?([0-9]{3})\)?[-.\s]?([0-9]{3})[-.\s]?([0-9]{4})$/,
      international: /^\+?[1-9]\d{1,14}$/,
      generic: /^[\+]?[1-9][\d]{0,15}$/
    };
  }

  validateUSPhone(phone) {
    const match = phone.match(this.patterns.us);
    if (match) {
      return {
        valid: true,
        formatted: `(${match[2]}) ${match[3]}-${match[4]}`,
        areaCode: match[2],
        exchange: match[3],
        number: match[4]
      };
    }
    return { valid: false };
  }

  formatPhoneNumber(phone, format = 'standard') {
    const digits = phone.replace(/\D/g, '');

    if (digits.length === 10) {
      switch (format) {
        case 'standard':
          return `(${digits.slice(0, 3)}) ${digits.slice(3, 6)}-${digits.slice(6)}`;
        case 'dots':
          return `${digits.slice(0, 3)}.${digits.slice(3, 6)}.${digits.slice(6)}`;
        case 'dashes':
          return `${digits.slice(0, 3)}-${digits.slice(3, 6)}-${digits.slice(6)}`;
        default:
          return digits;
      }
    }
    return phone;
  }
}

URL and Domain Validation

URL Pattern Matching

class URLValidator {
  constructor() {
    this.urlPattern = /^https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)$/;
    this.domainPattern = /^(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?$/;
  }

  validateURL(url) {
    try {
      const urlObj = new URL(url);
      return {
        valid: this.urlPattern.test(url),
        protocol: urlObj.protocol,
        hostname: urlObj.hostname,
        pathname: urlObj.pathname,
        search: urlObj.search,
        hash: urlObj.hash
      };
    } catch {
      return { valid: false, error: 'Invalid URL format' };
    }
  }

  extractURLs(text) {
    const urlRegex = /https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)/g;
    return text.match(urlRegex) || [];
  }

  validateDomain(domain) {
    return {
      valid: this.domainPattern.test(domain),
      subdomain: domain.split('.').length > 2,
      tld: domain.split('.').pop(),
      levels: domain.split('.').length
    };
  }
}

3. Advanced Pattern Matching

Lookahead and Lookbehind

Advanced Assertions

class AdvancedPatterns {
  constructor() {
    this.patterns = {
      // Positive lookahead: match 'hello' only if followed by 'world'
      positiveLookahead: /hello(?=\s+world)/,

      // Negative lookahead: match 'hello' only if NOT followed by 'world'
      negativeLookahead: /hello(?!\s+world)/,

      // Password with requirements using lookahead
      strongPassword: /^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)(?=.*[@$!%*?&])[A-Za-z\d@$!%*?&]{8,}$/
    };
  }

  validateStrongPassword(password) {
    const requirements = {
      minLength: password.length >= 8,
      hasLowercase: /[a-z]/.test(password),
      hasUppercase: /[A-Z]/.test(password),
      hasDigit: /\d/.test(password),
      hasSpecial: /[@$!%*?&]/.test(password)
    };

    const isValid = Object.values(requirements).every(req => req);

    return {
      valid: isValid,
      requirements,
      strength: this.calculatePasswordStrength(requirements)
    };
  }

  calculatePasswordStrength(requirements) {
    const score = Object.values(requirements).filter(Boolean).length;
    const levels = ['Very Weak', 'Weak', 'Fair', 'Good', 'Strong'];
    return levels[Math.min(score - 1, 4)] || 'Very Weak';
  }
}

Text Processing and Extraction

Data Extraction Patterns

class TextExtractor {
  constructor() {
    this.patterns = {
      hashtags: /#[\w]+/g,
      mentions: /@[\w]+/g,
      emails: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g,
      dates: /\b\d{1,2}[\/\-]\d{1,2}[\/\-]\d{2,4}\b/g,
      creditCard: /\b\d{4}[\s\-]?\d{4}[\s\-]?\d{4}[\s\-]?\d{4}\b/g,
      ipAddress: /\b(?:\d{1,3}\.){3}\d{1,3}\b/g
    };
  }

  extractAll(text) {
    const results = {};

    for (const [type, pattern] of Object.entries(this.patterns)) {
      results[type] = text.match(pattern) || [];
    }

    return results;
  }

  cleanText(text, options = {}) {
    let cleaned = text;

    if (options.removeHashtags) {
      cleaned = cleaned.replace(this.patterns.hashtags, '');
    }

    if (options.removeMentions) {
      cleaned = cleaned.replace(this.patterns.mentions, '');
    }

    if (options.maskEmails) {
      cleaned = cleaned.replace(this.patterns.emails, '[EMAIL HIDDEN]');
    }

    if (options.maskCreditCards) {
      cleaned = cleaned.replace(this.patterns.creditCard, '[CARD NUMBER HIDDEN]');
    }

    return cleaned.trim().replace(/\s+/g, ' ');
  }

  highlightMatches(text, pattern, className = 'highlight') {
    if (typeof pattern === 'string') {
      pattern = new RegExp(pattern, 'gi');
    }

    return text.replace(pattern, match =>
      `<span class="${className}">${match}</span>`
    );
  }
}

이 포스팅은 쿠팡 파트너스 활동의 일환으로, 이에 따른 일정액의 수수료를 제공받습니다.

4. Performance and Optimization

Regex Performance Best Practices

Optimization Techniques

const performanceOptimization = {
  compilation: {
    issue: 'Recompiling regex in loops',
    solution: 'Compile once, use many times',
    example: 'const regex = /pattern/; // outside loop'
  },

  quantifiers: {
    issue: 'Catastrophic backtracking with nested quantifiers',
    solution: 'Use possessive quantifiers or atomic groups',
    example: 'Avoid patterns like (a+)+b'
  },

  specificity: {
    issue: 'Overly broad patterns',
    solution: 'Be as specific as possible',
    example: 'Use \\d instead of . for digits'
  },

  anchoring: {
    issue: 'Unanchored patterns scan entire string',
    solution: 'Use ^ and $ anchors when appropriate',
    example: '^pattern$ for exact matches'
  }
};

class RegexOptimizer {
  benchmarkRegex(pattern, testStrings, iterations = 1000) {
    const regex = new RegExp(pattern);
    const startTime = performance.now();

    for (let i = 0; i < iterations; i++) {
      for (const testString of testStrings) {
        regex.test(testString);
      }
    }

    const endTime = performance.now();
    return {
      pattern,
      executionTime: endTime - startTime,
      averageTime: (endTime - startTime) / (iterations * testStrings.length)
    };
  }

  optimizePattern(pattern) {
    const suggestions = [];

    // Check for common performance issues
    if (pattern.includes('.*.*')) {
      suggestions.push('Avoid nested quantifiers like .*.*');
    }

    if (pattern.includes('(.*)+')) {
      suggestions.push('Pattern may cause catastrophic backtracking');
    }

    if (!pattern.startsWith('^') && !pattern.endsWith('$')) {
      suggestions.push('Consider anchoring with ^ and $ if exact match needed');
    }

    return {
      original: pattern,
      suggestions,
      optimized: this.applyOptimizations(pattern)
    };
  }

  applyOptimizations(pattern) {
    // Simple optimizations (real implementation would be more complex)
    return pattern
      .replace(/\.\*/g, '[\\s\\S]*') // More specific than .*
      .replace(/\s+/g, '\\s+'); // More efficient whitespace matching
  }
}

Conclusion

Regular expressions are powerful tools for pattern matching and text processing, but they require careful design and testing to be effective and performant. By understanding the fundamentals, common patterns, and optimization techniques, developers can create robust and efficient regex solutions.

Key principles for effective regex usage:

  1. Start simple - Build complexity gradually
  2. Test thoroughly - Verify with diverse input data
  3. Optimize for performance - Avoid catastrophic backtracking
  4. Document patterns - Explain complex regex for maintainability
  5. Consider alternatives - Sometimes simple string methods are better
  6. Validate inputs - Sanitize data before regex processing
Regular Expression Testing and Pattern Matching Guide | DDTool