use strict; use HTML::TokeParser::Simple; if($#ARGV < 0){ die "You did not specify any files to process! : $@\n"; } foreach my $infname (@ARGV) { my $outfname = $infname.".txt"; my $inputtxt = HTML::TokeParser::Simple->new($infname); my $outputtxt = ""; #this section removes the code while(my $token = $inputtxt->get_token){ next unless $token->is_text; $outputtxt.= $token->as_is; } #this section removes whitespace $outputtxt =~ s/ / /g; #HTML special space char $outputtxt =~ s/\s\s\s//mg; #tabs (mostly) and newlines open (OUTPUT, ">>$outfname") or die "$outfname could not be opened +.: $@ *_* $!\n"; print OUTPUT $outputtxt; close $infname; close $outfname; }
In reply to Re^2: Simplify parsing a file
by myrrdyn
in thread Simplify parsing a file
by Anonymous Monk
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |