use strict;
use warnings;
use HTML::TableExtract;
#Get HTML file and set up headers for HTML::TableExtract
my $doc = 'nasdaq-stocks.txt';
my $headers = ['Symbol', 'Last Sale*', 'Change Net / %', 'Share Volume'];
#table 4 is advances. Need to do again for 5 decliners
my $table_extract = HTML::TableExtract->new(count => 4, headers => $headers);
#parse the nasdaq-stocks.txt file and print to outup-temp.txt file
#?? found this code.
#Is the code below taking HTML loaded in string $table and
#breaking into rows to print to a file???
$table_extract->parse_file($doc);
my ($table) = $table_extract->tables;
open (UPFILE, '>outup-temp.txt');
for my $row ($table->rows) {
print UPFILE @$row, "\n";
}
close(UPFILE);
#tried to add the Substitutes below to the loop above
#but failed miserably
#.. am taking outup-temp.txt
#and load the array @lines for removing junk in the loop below
my $filename = 'outup-temp.txt' ;
open my $fh , '<' , $filename or die "Cannot read '$filename': $!\n" ;
my @lines = <$fh> ;
close $fh ;
# process the array @lines and remove some of the junk
for ( @lines ) {
s/^\s+// ; # No need for global substitution
s/[\x0A\x0D]{3,}/\t/g; # 3 CR LF become a tab
#double tab-change to one tab - never got this to work??
# s/[\x09]{2,}/\t/g;
s/\$//g; # Substitute all dollar signs with nothing
s/\x20/\t/g; # space becomes a tab
# Change chars between open and change pct to tab
s/\xC2\xA0\xE2\x96\xB2\xC2\xA0/\t/;
}
#write cleaned lines to outup-temp.txt
open $fh , '>' , $filename or die "Cannot write '$filename': $!\n" ;
print $fh @lines ;
close $fh ;
# now that we have some tab delimiters, use split to break out the
# fields and calculate the closing price, then write to file
my $stock;
my $filler1;
my $openpr;
my $change;
my $pct;
my $vol;
my $filler2;
my $closepr;
open (FILE, 'outup-temp.txt');
open STDOUT, '>', "outup.txt";
while () {
chomp;
($stock,$filler1,$openpr,$change,$pct,$vol,$filler2) = split("\t");
#calculate closing price from prior day for advancers
$closepr = $openpr-$change;
#add back $ signs - print tab delimited fields to file
print "$stock\t\$$closepr\t\$$openpr\t$pct\t$vol\n";
}
close(FILE);
close (STDOUT);