#!/usr/bin/perl -w
use strict;
use Algorithm::Diff qw(traverse_sequences);
use Getopt::Std;

my ($old_file, $new_file) = @ARGV[-2,-1];
&useage unless ( $old_file and $new_file and -e $old_file and -e $new_file );

# get options and file contents as array refs
our ( $opt_b, $opt_c );
getopts('bc');
my $skip_blanks   = $opt_b;
my $skip_comments = $opt_c;
my $old = get( $old_file, $skip_blanks, $skip_comments );
my $new = get( $new_file, $skip_blanks, $skip_comments );

# print out the colour coded diff - common code is black
# code in $old_file but not $new_file is red
# code in $new_file but not $old_file is green
print "<pre>\n<h4>Color Key:\n";
print "<font color=red>".escapeHTML($old_file)."</font>\n";
print "<font color=green>".escapeHTML($new_file)."</font></h4>\n";


# this is snippet of code originally written by merlyn exactly as posted.
# it is the same as his except he forgot to escape the HTML in his example.
# traverse_sequences( $old, $new, {
#    MATCH     => sub { print escapeHTML($old->[shift]) },
#    DISCARD_A => sub { print "<font color=red>" . escapeHTML($old->[shift])."</font>" },
#    DISCARD_B => sub { print "<font color=green>".escapeHTML($new->[shift,shift])."</font>" },
#});

# this is what I (silently) modified it to to demonstrate
# to merlyn that I knew another way to do it and can RTFS
# this is the same syntax used in the diff sub in the source
traverse_sequences( $old, $new, {
    MATCH     => sub { print escapeHTML($old->[$_[0]]) },
    DISCARD_A => sub { print "<font color=red>" . escapeHTML($old->[$_[0]])."</font>" },
    DISCARD_B => sub { print "<font color=green>".escapeHTML($new->[$_[1]])."</font>" },
});
print "</pre>\n";

sub get {
   my ($file, $skip_blanks, $skip_comments ) = @_;
   my @file;
   open F, $file or die "Can't read $file: $!";
   while (<F>) {
      next if /^\s*$/ and $skip_blanks;
      next if /^\s*#/ and $skip_comments;
      push @file, $_;
   }
   close F;
  return \@file;
}

sub escapeHTML {
    local $_ = shift;
    # make the required escapes
    s/&/&amp/g;
    s/"/&quot;/g;
    s/</&lt;/g;
    s/>/&gt;/g;
    # change tabs to 4 spaces
    s/\t/    /g;
    # make the whitespace escapes - not required within <pre> tags  
    # s/( {2,})/"&nbsp;" x length $1/eg;
    # make the brower bugfix escapes;
    s/\x8b/&#139;/g;
    s/\x9b/&#155;/g;
    # make the PERL MONKS escapes (if desired)
    # s/\[/&#091;/g;
    # s/\]/&#093;/g;
    # change newlines to <br> if desired - not required with <pre>
    # s/\n/<br>\n/g;
  return $_;
}

sub useage {
    print qq(
    Useage $0 -[b,c] <file1> <file2>
    -b skip blank lines
    -c skip comment only lines
    HTML output to STDOUT
    );
  exit;
}

##</code><code>##

The arguments to C<traverse_sequences> are the two sequences to
traverse, and a callback which specifies the callback functions, like
this:

  traverse_sequences( \@seq1, \@seq2,
                     { MATCH => $callback_1,
                       DISCARD_A => $callback_2,
                       DISCARD_B => $callback_3,
                     } );

Callbacks are invoked with at least the indices of the two arrows as
their arguments.  They are not expected to return any values.  If a
callback is omitted from the table, it is not called.

##</code><code>##

sub diff
{
	my $a = shift;	# array ref
	my $b = shift;	# array ref
	my $retval = [];
	my $hunk = [];
	my $discard = sub { push( @$hunk, [ '-', $_[ 0 ], $a->[$_[0]] ] ) };
	my $add     = sub { push( @$hunk, [ '+', $_[ 1 ], $b->[$_[1]] ] ) };
	my $match   = sub { push( @$retval, $hunk ) if scalar(@$hunk); $hunk = [] };
	traverse_sequences( $a, $b,
		{ MATCH     => $match, 
                  DISCARD_A => $discard, 
                  DISCARD_B => $add 
                }, @_ );
	&$match();
	return wantarray ? @$retval : $retval;
}