#! perl -slw use strict; use G; ## Expands command line wildcards use Data::Dumper; use Digest::MD5 qw[ md5_hex ]; local $/ = ''; ## paragraph mode. my( $pos, %index ) = 0; ## The first para start at offset 0 while( <> ) { ## build a HoAoAs, MD5 is the key ## The values are arrays of [ filename, offset ]. push @{ $index{ md5_hex( $_ ) } }, [ $ARGV, $pos ]; ## Getthe next offset $pos = tell ARGV; ## Back to 0 if we reached the EOF $pos = 0 if eof( ARGV ); } print Dumper \%index; __END__ C:\Perl\html>p:359522 *.html *\*.html *\*\*.html Processed 512 files and 5829 paragraphs into 3293 unique signatures.