in reply to Missing HTML tags
$file = shift or die "Need a file name!\n"; @closedtags = qw(hmtl head body b i u); @opentags = qw(a img); open(HTMLFILE, "$file") || die "Could not open $file: $!\n"; undef $/; $html=<HTMLFILE>; close(HTMLFILE); ## The magic part: while($html =~ m#<(/?)([^ >]*)[^>]*>#gi) { if ($1) { $tag{lc $2}--; } else { $tag{lc $2}++; } } ## Now we have lots of options to play with: ## Show ALL tags, matched then unmatched: print "Matched tags:\n"; for $x (sort keys %tag) { print "$x\n" unless $tag{$x}; } print "Unmatched tags:\n"; for $x (sort keys %tag) { print "$x\n" if $tag{$x}; } ## Go through our list of 'closed' and check each: for $x (@closedtags) { printf "Results for html tag %5s: ", $x; if (defined $tag{$x}) { print $tag{$x} ? "NOT balanced\n" : "balanced\n"; } else { print "None found.\n"; } } # Etc. The regexp does a fairly good job, but # misses weird cases like > embedded in quotes, etc.
|
|---|