sub read_rle_file {#read run length encoded file my $filespec=shift; my $numfields=shift; my $sub=shift; my $sha1=Digest::SHA1->new(); my $IN_IO; print "Reading run length encoded file $filespec, with records of $numfields fields.\n" if $Debug; if ( $filespec =~ /\.gz/ ) { $IN_IO = IO::Zlib->new( $filespec, "rb" ) or die "Cannot open compressed run length encoded file $filespec : \n" ; } else { $IN_IO = IO::File->new($filespec) or die "Cannot open run length encoded file $filespec : \n" ; binmode $IN_IO; } my $buffer=""; # The buffer we are using my $records=0; # Number of record we have read my $buffers=0; # The number of times we have refilled the buffer my $bytes =0; # The number of bytes we have read so far my $record=[]; # the array of records # read until the file is empty while (!$IN_IO->eof ) { my $read_buffer; my $bytesread = $IN_IO->read( $read_buffer, $Config{buffer_size} ); die "Read error in read_rle_file($filespec,$numfields)\n" unless defined $bytesread; $bytes+=$bytesread; $sha1->add($read_buffer); $buffer.=$read_buffer; my @records; # try to extract as many records as possible from the buffer BUFFER: while ((my $len=ord($buffer)) < length $buffer) { push @$record,unpack("C/a",$buffer); substr($buffer,0,$len+1,""); if (@$record==$numfields) { push @records,$record; $record=[]; } } # hand off to the callback the records we have extracted so far # we do this in chunks to save the callback overhead $sub->(\@records); $records+=@records; print "After buffer ".($buffers++)." read $records records from $bytes bytes.\n" if $Debug>1; } die "Unprocessed data in buffer! read_rle_file($filespec,$numfields) failed!\n[@$record] $buffer\n" if @$record || length $buffer; return wantarray ? ($sha1->b64digest,$records) : $sha1->b64digest; }