Regular Expression Testing and Pattern Matching Guide
Regular expressions (regex) are powerful tools for pattern matching and text processing. This guide covers regex syntax, common patterns, and best practices for effective pattern matching.
1. Regular Expression Fundamentals
Basic Syntax and Metacharacters
Core Metacharacters
const regexBasics = {
literals: {
description: 'Match exact characters',
example: '/hello/ matches "hello"',
usage: 'Simple text matching'
},
metacharacters: {
'.': 'Any character except newline',
'^': 'Start of string/line',
'$': 'End of string/line',
'*': 'Zero or more of preceding',
'+': 'One or more of preceding',
'?': 'Zero or one of preceding',
'|': 'OR operator',
'()': 'Grouping and capturing',
'[]': 'Character class',
'{}': 'Quantifiers'
}
};
Character Classes and Quantifiers
Common Patterns
const regexPatterns = {
characterClasses: {
'[abc]': 'Match a, b, or c',
'[a-z]': 'Match any lowercase letter',
'[A-Z]': 'Match any uppercase letter',
'[0-9]': 'Match any digit',
'[^abc]': 'Match anything except a, b, or c'
},
predefinedClasses: {
'\\d': 'Digit [0-9]',
'\\D': 'Non-digit [^0-9]',
'\\w': 'Word character [a-zA-Z0-9_]',
'\\W': 'Non-word character',
'\\s': 'Whitespace character',
'\\S': 'Non-whitespace character'
},
quantifiers: {
'{n}': 'Exactly n times',
'{n,}': 'n or more times',
'{n,m}': 'Between n and m times',
'*': 'Zero or more {0,}',
'+': 'One or more {1,}',
'?': 'Zero or one {0,1}'
}
};
2. Common Use Cases and Patterns
Email Validation
Email Pattern Implementation
class EmailValidator {
constructor() {
// Basic email pattern
this.basicPattern = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
// More comprehensive pattern
this.comprehensivePattern = /^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/;
}
validateBasic(email) {
return this.basicPattern.test(email);
}
validateComprehensive(email) {
return this.comprehensivePattern.test(email);
}
getValidationDetails(email) {
const parts = email.split('@');
if (parts.length !== 2) {
return { valid: false, error: 'Must contain exactly one @ symbol' };
}
const [localPart, domain] = parts;
return {
valid: this.validateComprehensive(email),
localPart,
domain,
localLength: localPart.length,
domainLength: domain.length,
hasValidTLD: /\.[a-zA-Z]{2,}$/.test(domain)
};
}
}
Phone Number Patterns
Phone Number Validation
class PhoneValidator {
constructor() {
this.patterns = {
us: /^(\+1)?[-.\s]?\(?([0-9]{3})\)?[-.\s]?([0-9]{3})[-.\s]?([0-9]{4})$/,
international: /^\+?[1-9]\d{1,14}$/,
generic: /^[\+]?[1-9][\d]{0,15}$/
};
}
validateUSPhone(phone) {
const match = phone.match(this.patterns.us);
if (match) {
return {
valid: true,
formatted: `(${match[2]}) ${match[3]}-${match[4]}`,
areaCode: match[2],
exchange: match[3],
number: match[4]
};
}
return { valid: false };
}
formatPhoneNumber(phone, format = 'standard') {
const digits = phone.replace(/\D/g, '');
if (digits.length === 10) {
switch (format) {
case 'standard':
return `(${digits.slice(0, 3)}) ${digits.slice(3, 6)}-${digits.slice(6)}`;
case 'dots':
return `${digits.slice(0, 3)}.${digits.slice(3, 6)}.${digits.slice(6)}`;
case 'dashes':
return `${digits.slice(0, 3)}-${digits.slice(3, 6)}-${digits.slice(6)}`;
default:
return digits;
}
}
return phone;
}
}
URL and Domain Validation
URL Pattern Matching
class URLValidator {
constructor() {
this.urlPattern = /^https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)$/;
this.domainPattern = /^(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?$/;
}
validateURL(url) {
try {
const urlObj = new URL(url);
return {
valid: this.urlPattern.test(url),
protocol: urlObj.protocol,
hostname: urlObj.hostname,
pathname: urlObj.pathname,
search: urlObj.search,
hash: urlObj.hash
};
} catch {
return { valid: false, error: 'Invalid URL format' };
}
}
extractURLs(text) {
const urlRegex = /https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)/g;
return text.match(urlRegex) || [];
}
validateDomain(domain) {
return {
valid: this.domainPattern.test(domain),
subdomain: domain.split('.').length > 2,
tld: domain.split('.').pop(),
levels: domain.split('.').length
};
}
}
3. Advanced Pattern Matching
Lookahead and Lookbehind
Advanced Assertions
class AdvancedPatterns {
constructor() {
this.patterns = {
// Positive lookahead: match 'hello' only if followed by 'world'
positiveLookahead: /hello(?=\s+world)/,
// Negative lookahead: match 'hello' only if NOT followed by 'world'
negativeLookahead: /hello(?!\s+world)/,
// Password with requirements using lookahead
strongPassword: /^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)(?=.*[@$!%*?&])[A-Za-z\d@$!%*?&]{8,}$/
};
}
validateStrongPassword(password) {
const requirements = {
minLength: password.length >= 8,
hasLowercase: /[a-z]/.test(password),
hasUppercase: /[A-Z]/.test(password),
hasDigit: /\d/.test(password),
hasSpecial: /[@$!%*?&]/.test(password)
};
const isValid = Object.values(requirements).every(req => req);
return {
valid: isValid,
requirements,
strength: this.calculatePasswordStrength(requirements)
};
}
calculatePasswordStrength(requirements) {
const score = Object.values(requirements).filter(Boolean).length;
const levels = ['Very Weak', 'Weak', 'Fair', 'Good', 'Strong'];
return levels[Math.min(score - 1, 4)] || 'Very Weak';
}
}
Text Processing and Extraction
Data Extraction Patterns
class TextExtractor {
constructor() {
this.patterns = {
hashtags: /#[\w]+/g,
mentions: /@[\w]+/g,
emails: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g,
dates: /\b\d{1,2}[\/\-]\d{1,2}[\/\-]\d{2,4}\b/g,
creditCard: /\b\d{4}[\s\-]?\d{4}[\s\-]?\d{4}[\s\-]?\d{4}\b/g,
ipAddress: /\b(?:\d{1,3}\.){3}\d{1,3}\b/g
};
}
extractAll(text) {
const results = {};
for (const [type, pattern] of Object.entries(this.patterns)) {
results[type] = text.match(pattern) || [];
}
return results;
}
cleanText(text, options = {}) {
let cleaned = text;
if (options.removeHashtags) {
cleaned = cleaned.replace(this.patterns.hashtags, '');
}
if (options.removeMentions) {
cleaned = cleaned.replace(this.patterns.mentions, '');
}
if (options.maskEmails) {
cleaned = cleaned.replace(this.patterns.emails, '[EMAIL HIDDEN]');
}
if (options.maskCreditCards) {
cleaned = cleaned.replace(this.patterns.creditCard, '[CARD NUMBER HIDDEN]');
}
return cleaned.trim().replace(/\s+/g, ' ');
}
highlightMatches(text, pattern, className = 'highlight') {
if (typeof pattern === 'string') {
pattern = new RegExp(pattern, 'gi');
}
return text.replace(pattern, match =>
`<span class="${className}">${match}</span>`
);
}
}