use strict; use warnings; use IO::File; use IO::Zlib; use Digest::SHA1; our %Config=(buffer_size=>65535); #Read run length encoded file sub read_rle_file { my $filespec =shift; # file to read my $numfields=shift; # number of fields to a record my $sub =shift; # a CODE ref to call with a chunk of records my $sha1=Digest::SHA1->new(); my $IN_IO; print "Reading run length encoded file $filespec, with records of $numfields fields.\n" if $Debug; if ( $filespec =~ /\.gz/ ) { $IN_IO = IO::Zlib->new( $filespec, "rb" ) or die "Cannot open compressed run length encoded file $filespec : \n" ; } else { $IN_IO = IO::File->new($filespec) or die "Cannot open run length encoded file $filespec : \n" ; binmode $IN_IO; } my $buffer=""; # The buffer we are using my $records=0; # Number of record we have read my $buffers=0; # The number of times we have refilled the buffer my $bytes =0; # The number of bytes we have read so far # read until the file is empty while (!$IN_IO->eof ) { my $read_buffer; my $bytesread = $IN_IO->read( $read_buffer, $Config{buffer_size} ); die "Read error in read_rle_file($filespec,$numfields)\n" unless defined $bytesread; $bytes+=$bytesread; $sha1->add($read_buffer); $buffer.=$read_buffer; my @records; # try to extract as many records as possible from the buffer BUFFER: while ($buffer) { # Keep extracting records as long as the buffer isnt empty my @record; while (@record<$numfields) { # Extract field by field until we have a complete record. my $len=ord(substr($buffer,0,1)); # do we need to refill the buffer? if ($len+1>length $buffer) { # sigh, we do. Put what we've read sofar back into the buffer $buffer=join("",(map{chr(length $_).$_}@record),$buffer); last BUFFER; } push @record,substr($buffer,1,$len); substr($buffer,0,$len+1,""); } push @records,\@record; } # hand off to the callback the records we have extracted so far # we do this in chunks to save the callback overhead $sub->(\@records); $records+=@records; print "After buffer ".($buffers++)." read $records records from $bytes bytes.\n" if $Debug>1; } die "Unprocessed data in buffer! read_rle_file($filespec,$numfields) failed!\n" if length $buffer; return wantarray ? ($sha1->b64digest,$records) : $sha1->b64digest; }