#!/usr/bin/perl # -w not used because of a few noisy warnings in write's # tag_comp.pl # - jlawrenc@infonium.com - use at your own risk # # A quick 'n dirty to help you compare HTML tags across two similar documents. # # This happens to me from time to time. We have an HTML template that has been # adapted for server-side use. Then the graphic designer goes off and reformats # with different fonts, tag sizes or whatever. It could be easer to scope out the # changes and then just re-edit our template document rather than reworking the # supplied HTML back into a template. # # Invoke thusly: # tag_comp fn1 fn2 [tag [shift]] # # ie/ # tag_comp index.html new_index.html table # generates a report of how the tag is used differently between the two # documents # # tag_comp index.html new_index.html img 2 # a report of how tags have changed shifting the left col up a couple # of rows to help line up the differences # # # Things to consider # a - tag regex is real simple "<" + not > 1 or more times + ">" # this may not always work for you # b - tag compares are lowercased # # It would be nice to try and line up the matches more effectively but a humon # will do the job for now. # Report header format STDOUT_TOP = --------------------------------------------------------------------------------- @|||||||||||||||||||||||||||||||||||||| | @|||||||||||||||||||||||||||||||||||||| $fn1, $fn2 --------------------------------------------------------------------------------- . # Report body - lines that do not match format STDOUT = ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~~ | ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~~ $srch1[$i], $srch2[$i] --------------------------------------------------------------------------------- . # Report body - lines that do match format STDOUT_MATCH = * match: ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~~ $srch1[$i] --------------------------------------------------------------------------------- . # Our input arguments - file name1, file name2, tag to report on, shift value ($fn1, $fn2, $tag, $shift) = @ARGV; if (!$fn1 or !$fn2) { die "Please supply two file names to compare."; } # Default to "img" tags if (!$tag) { $tag="img"; print STDERR "Defaulting to search for <$tag>s\n\n"; } # Check for positive shift if ($shift<0) { print STDERR "shift only works with positive vals.\n"; print STDERR "if you want to shift the other way then try reversing your file names. :)\n"; } # Slurp our files undef $/; open FIN, $fn1; $file1=; open FIN, $fn2; $file2=; # Grab our tags - real crude regex that may not always do the trick while ($file1 =~ /(<[^>]+>)/gms) { push @tags1, $1; } while ($file2 =~ /(<[^>]+>)/gms) { push @tags2, $1; } # Get our list of matching tags @srch1=grep /^<$tag(\s|>)/i, @tags1; @srch2=grep /^<$tag(\s|>)/i, @tags2; # Shift first search result if needed for ($i=0; $i<$shift; $i++) { unshift @srch1, ""; } # Find out who has more rows - set1 or 2 $rows=$#srch1 > $#srch2 ? $#srch1 : $#srch2; # Write our header $~="STDOUT_TOP"; write; # Write report body foreach ($i=0; $i<=$rows; $i++) { # One format for rows that are the same, another for those that are not if (lc $srch1[$i] ne lc $srch2[$i]) { $~="STDOUT"; write; } else { $~="STDOUT_MATCH"; write; } } # Done - coffee time