open infile,$filepath; my $Block; while () { if (/\) { &Parse_XML($Block); undef $Block; } $Block .= $_; } &Parse_XML($Block); # Don't forget last block #### use GDBM_File; # my favorite, but there are others use Fcntl; tie %md5,'GDBM_File','/tmp/md5.tmp',O_RDWR,0600; # You may need to add |O_CREAT after O_RDWR #### open infile,$filepath; open outfile,'>'.$outfilepath; my $XML; while () { $XML .= $_; if (s/\<\ppsarticle\>(.*?)\<\/ppsarticle\>//) { my $Article = $1; my $MD5SUM = md5($Article); $md5{$MD5SUM} and next; $md5{$MD5SUM} = 1; print outfile $Article; } }