package LLiL; use strict; use warnings; # Read a LLiL-format file. # Return the number of lines in the file or -1 if the file could not be opened # Update $hash_ret, a reference to a hash of properties sub get_properties { my $fname = shift; # in: a LLiL-format filename my $hash_ret = shift; # inout: a reference to a hash of properties my $cnt = 0; open( my $fh, '<', $fname ) or return -1; while (<$fh>) { ++$cnt; chomp; my ($word, $count) = split /\t/; $hash_ret->{$word} += $count; } close($fh); return $cnt; } # Note: Some extra validation that could be done in get_properties() above # ( not done because, to allow the code to run as fast as possible, # get_properties assumes the input data adheres to the LLiL spec, # that is, each line matches ^[a-z]+\t\d+$ ): # s/^\s+//; s/\s+$//; # remove leading and trailing whitespace # next unless length; # ignore empty lines # $word =~ /^[a-z]+$/ or die "error: invalid word '$_' (must contain [a-z] only)"; # $count =~ /^\d+$/ or die "error: invalid count '$_' (must contain [0-9] only)"; 1;