# llil.pl # Example run: perl llil.pl tt1.txt tt2.txt >oo1.tmp use strict; use warnings; # ---------------------------------------------------------------------- # LLiL specification # ------------------ # A LLiL-format file is a text file. # Each line consists of a lowercase name a TAB character and a non-negative integer count. # That is, each line must match : ^[a-z]+\t\d+$ # For example, reading the LLiL-format files, tt1.txt containing: # camel\t42 # pearl\t94 # dromedary\t69 # and tt2.txt containing: # camel\t8 # hello\t12345 # dromedary\t1 # returns this hashref: # $hash_ret{"camel"} = 50 # $hash_ret{"dromedary"} = 70 # $hash_ret{"hello"} = 12345 # $hash_ret{"pearl"} = 94 # That is, values are added for items with the same key. # # To get the required LLiL text, you must sort the returned hashref # descending by value and insert a TAB separator: # hello\t12345 # pearl\t94 # dromedary\t70 # camel\t50 # To make testing via diff easier, we further sort ascending by name # for lines with the same value. # ---------------------------------------------------------------------- # Function get_properties # Read a list of LLiL-format files # Return a reference to a hash of properties sub get_properties { my $files = shift; # in: reference to a list of LLiL-format files my %hash_ret; # out: reference to a hash of properties for my $fname ( @{$files} ) { open( my $fh, '<', $fname ) or die "error: open '$fname': $!"; while (<$fh>) { chomp; my ($word, $count) = split /\t/; $hash_ret{$word} += $count; } close($fh) or die "error: close '$fname': $!"; } return \%hash_ret; } # ----------------- mainline ------------------------------------------- @ARGV or die "usage: $0 file...\n"; my @llil_files = @ARGV; warn "llil start\n"; my $tstart1 = time; my $href = get_properties( \@llil_files ); my $tend1 = time; my $taken1 = $tend1 - $tstart1; warn "get_properties : $taken1 secs\n"; my $tstart2 = time; for my $key ( sort { $href->{$b} <=> $href->{$a} || $a cmp $b } keys %{$href} ) { print "$key\t$href->{$key}\n"; } my $tend2 = time; my $taken2 = $tend2 - $tstart2; my $taken = $tend2 - $tstart1; warn "sort + output : $taken2 secs\n"; warn "total : $taken secs\n";