that could work. Will that concept fit in place of line 77 in the code below?
#!/usr/bin/perl -w
use strict; use warnings;
my $base_url = 'http://www.sec.gov/Archives';
#Assign variable to file with URLs;
my $urls = 'c:/my documents/research/sec filings/10K and 10Q/data/urls
+/sizefiles1.txt';
#my $urls = 'g:/research/SEC filings 10K and 10Q/data/urls';
#open text file with URLs, read URLs into array;
open (FH, "<$urls") or die "can't open $urls: $!";
my @aonly = <FH>;
#close text file with URLs;
close FH or die "Cannot closee $urls: $!";
#Display array contents/elements;
print "@aonly\n";
#initialize file counter;
my $file_count=0;
my $FH_OUT = "c:/my documents/research/sec filings/Data2016_fiscal_yea
+r.txt";
#"g:/research/SEC filings 10K and 10Q/data/header data/Data2016_fiscal
+_year.txt";
my @fields = qw / cik form_type report_date file_date name fiscal_year
+_ended /;
foreach my $filetoget(@aonly) {
my $res = get_process_trunc ($filetoget);
if (scalar(keys(%$res))) {
my $lineout='';
for my $field (@fields) {
if ($res->{$field}) {$lineout.=$res->{$field} };
$lineout.='|';
} #close for my $field loop;
open (OUTPUT, '>>', $FH_OUT) or die "Couldn't open $!"
+;
print OUTPUT $lineout."\n";
print "$lineout.\n";
} #close if scalar(keys ... ) loop;
$file_count++;print "$file_count\n";
} #close foreach my $filetoget loop;
close(OUTPUT);
exit;
sub get_process_trunc {
# http://www.perlmonks.org/?node_id=1183107
my $filetoget=shift;
my $fullfile="$base_url/$filetoget";
my $res={};
use LWP::UserAgent;
my $received_size = 0;
my $partial = 0;
my $ua = LWP::UserAgent->new;
my $response = $ua->get($fullfile
, ':content_cb'=> sub {
my ($data, $response, $protocol) = @_;
$partial.=$data;
$received_size += length $data;
+
die if ($received_size>10000); #10000;
# die inside this callback interrupt th
+e request, not the program!!
}
);
if ($partial) {
# print 'length:'.length($partial)."\n";
my $line_count=0;
for my $line (split qr/\'\n'/, $partial) {
if($line=~m/^\s*CENTRAL\s*INDEX\s*KEY:\s*(\d*)/m)
+{$res->{cik} =$1;}
if($line=~m/^\s*FORM\s*TYPE:\s*(.*$)/m)
+{$res->{form_type} =$1;}
if($line=~m/^\s*CONFORMED\s*PERIOD\s*OF\s*REPORT:\s*(\d*)/m)
+{$res->{report_date}=$1;}
if($line=~m/^\s*FILED\s*AS\s*OF\s*DATE:\s*(\d*)/m)
+{$res->{file_date} =$1;}
if($line=~m/^\s*COMPANY\s*CONFORMED\s*NAME:\s*(.*$)/m)
+{$res->{name} =$1;}
if($line=~m/\s*For\s*the\s*fiscal\s*year\s*end(.*)/i) {
+$res->{fiscal_year_ended} =$1;}
$line_count++;
last if ($line_count>50); #50;
}
} # success
return $res;
} # close get_process_trunc loop;
|