use strict; use warnings; use HTML::TableExtract; #Get HTML file and set up headers for HTML::TableExtract my $doc = 'nasdaq-stocks.txt'; my $headers = ['Symbol', 'Last Sale*', 'Change Net / %', 'Share Volume']; #table 4 is advances. Need to do again for 5 decliners my $table_extract = HTML::TableExtract->new(count => 4, headers => $headers); #parse the nasdaq-stocks.txt file and print to outup-temp.txt file #?? found this code. #Is the code below taking HTML loaded in string $table and #breaking into rows to print to a file??? $table_extract->parse_file($doc); my ($table) = $table_extract->tables; open (UPFILE, '>outup-temp.txt'); for my $row ($table->rows) { print UPFILE @$row, "\n"; } close(UPFILE); #tried to add the Substitutes below to the loop above #but failed miserably #.. am taking outup-temp.txt #and load the array @lines for removing junk in the loop below my $filename = 'outup-temp.txt' ; open my $fh , '<' , $filename or die "Cannot read '$filename': $!\n" ; my @lines = <$fh> ; close $fh ; # process the array @lines and remove some of the junk for ( @lines ) { s/^\s+// ; # No need for global substitution s/[\x0A\x0D]{3,}/\t/g; # 3 CR LF become a tab #double tab-change to one tab - never got this to work?? # s/[\x09]{2,}/\t/g; s/\$//g; # Substitute all dollar signs with nothing s/\x20/\t/g; # space becomes a tab # Change chars between open and change pct to tab s/\xC2\xA0\xE2\x96\xB2\xC2\xA0/\t/; } #write cleaned lines to outup-temp.txt open $fh , '>' , $filename or die "Cannot write '$filename': $!\n" ; print $fh @lines ; close $fh ; # now that we have some tab delimiters, use split to break out the # fields and calculate the closing price, then write to file my $stock; my $filler1; my $openpr; my $change; my $pct; my $vol; my $filler2; my $closepr; open (FILE, 'outup-temp.txt'); open STDOUT, '>', "outup.txt"; while () { chomp; ($stock,$filler1,$openpr,$change,$pct,$vol,$filler2) = split("\t"); #calculate closing price from prior day for advancers $closepr = $openpr-$change; #add back $ signs - print tab delimited fields to file print "$stock\t\$$closepr\t\$$openpr\t$pct\t$vol\n"; } close(FILE); close (STDOUT);