#!/usr/bin/perl # turn on perl safety features use strict; use warnings; #initialize modules use XML::Twig; use DirHandle; my ($dir, $filepath, @filename, @filepath_list, %company_hist, @company_list, %reference_hist, @reference_list); $dir = $ARGV[0] or die "Must specify directory"; @filepath_list = get_file_list($dir); foreach $filepath (@filepath_list) { my $twig = XML::Twig -> new( twig_roots => { 'article/reference' => \&get_ref, company => \&get_code }); $company_hist{$_}++ for @company_list; #sort results from my @unique_comp = keys %company_hist; #"get_code" sub then @company_list = ( sort { $company_hist{$b} <=> #return 3 most freq $company_hist{$a} } @unique_comp )[0..2]; #codes. #my $ref_length = scalar( @reference_list ); #take ref list and $reference_hist{$_}++ for @reference_list; #eliminate duplicates my @unique_ref = keys %reference_hist; #return a tally my $uni_count = scalar ( @unique_ref ); my @by_date_tally = get_date(\@unique_ref); my $dup_count = (scalar( @reference_list ) - $uni_count); #my($k, $v); #while ( ($k,$v) = each %reference_hist ) { #print "$k => $v\n"; #} #print "File name: ", print_file_name($filepath), "\n"; #print "Total Dupicate Articles: $dup_count\n"; #print "Total Articles Found: $uni_count\n"; #print "@company_list\n"; undef %company_hist; #reinitialize global undef @company_list; #vars. undef %reference_hist; # undef @reference_list; # $twig->parsefile($filepath); # purge to save mem. $twig->purge; } #end of foreach loop exit(0); sub get_file_list { $dir = shift; print $dir, "\n"; my $dh = DirHandle->new($dir) or die "can't open directory"; return sort # sort pathnames grep { -f } # choose only files map { "$dir/$_" } # create full paths grep { !/^\./ } # filter out dot files $dh->read(); # read all filenames } sub print_file_name { my($path, $position, $path_strip); #take filepath and $path = $_[0]; #return filename. $position = rindex($path,"/") + 1; # $path_strip = substr($path, $position); #print "For file: $path_strip\n"; return $path_strip; } sub get_code { my $company; #get company code my( $twig, $elt)= @_; #attribute and $company = $elt->{'att'}->{'code'}; #put into array push @company_list, $company; #return @company_list; } sub get_ref { my( $twig, $elt)= @_; #take reference elt my $ref = $elt; #and return just the my $position = rindex($ref->text(), "/") + 1; #reference ID string my $ref_strip = substr($ref->text(), $position); push @reference_list, $ref_strip; } sub get_date { my $ref; #my @refs = @_; foreach $ref (@_) { print @$ref, "\t"; } print "\n\n"; } ####
MTPW000020090731e57v004mr distdoc:archive/ArchiveDoc::Article/MTPW000020090731e57v004mr EN (c) 2009 M2 Communications, Ltd. All Rights Reserved. Anadys Pharmaceuticals, Inc (NASDAQ:ANDS) is the Highest Percentage Gainers Among NASDAQ Stocks During Morning Trading Hours; Microsoft Corporation (NASDAQ:MSFT) And Orthofix International NV (NASDAQ:OFIX) Round Out Top Three Percentage Gainers During Morning Trading Hours 2009-07-31 M2 Presswire Orthofix International N.V. 0 0 Anadys Pharmaceuticals Inc 0 0 Microsoft Corporation 0 0 Computers/Electronics Applications Software Pharmaceuticals Software Computing Systems Software Medical Equipment/Supplies Health Care Medical/Surgical Instruments/Apparatus/Devices United States North American Countries/Regions Labor/Personnel Issues Hepatitis Stock Exchanges Press Release Corporate/Industrial News Political/General News Health Medical Conditions Equity Markets Commodity/Financial Market News Content Types Factiva Filters FC&E Exclusion Filter FC&E Industry News Filter MTPW Liquid Tycoon | e-mail: info@LiquidTycoon.com | Tel: +1 214 556 6798 679