use strict; use HTML::TokeParser::Simple; if($#ARGV < 0){ die "You did not specify any files to process! : $@\n"; } foreach my $infname (@ARGV) { my $outfname = $infname.".txt"; my $inputtxt = HTML::TokeParser::Simple->new($infname); my $outputtxt = ""; #this section removes the code while(my $token = $inputtxt->get_token){ next unless $token->is_text; $outputtxt.= $token->as_is; } #this section removes whitespace $outputtxt =~ s/ / /g; #HTML special space char $outputtxt =~ s/\s\s\s//mg; #tabs (mostly) and newlines open (OUTPUT, ">>$outfname") or die "$outfname could not be opened.: $@ *_* $!\n"; print OUTPUT $outputtxt; close $infname; close $outfname; }