in reply to Re^3: Geo::ShapeFile memory problem
in thread Geo::ShapeFile memory problem

Also, one thing to watch for in any plotting code is holes in the polygons. I don't think the Tiger data have holes

Ooooo they do.. found that pdf about 9 hours before you mentioned it. Im dealing with them now, at a huge cost, maybe more later.

Ill live with unzipped dirs for now, and look closer into "seek"ing on a zip file and into the format of a raw shp file. to see if my sequential reader would work for sequential access.

My cheap zip sequencer

package cheap::zipbyline; use strict; use warnings; use Exporter; use Archive::Zip qw( :ERROR_CODES :CONSTANTS ); our @ISA= qw( Exporter ); # these CAN be exported. our @EXPORT_OK = qw( zipbyline_start zipbyline_read zipbyline_close ); # these are exported by default. our @EXPORT = qw( ); my %zbl; sub zipbyline_start { my $zf=shift; my $mf=shift; my $zip = Archive::Zip->new(); unless ( $zip->read( $zf ) == AZ_OK ) { die 'read error';} my ( $member, $status, $bufferRef ); $member = $zip->memberNamed( $mf ); $member->desiredCompressionMethod( COMPRESSION_STORED ); $status = $member->rewindData(); die "error $status" unless $status == AZ_OK; $zbl{$member}=''; return $member; } # zbl start sub zipbyline_read { my $member=shift; my ( $status, $bufferRef ); my $nl=index($zbl{$member},"\n"); while ( ( $nl == -1) && ! $member->readIsDone() ) { ( $bufferRef, $status ) = $member->readChunk(1000); die "error $status" if $status != AZ_OK && $status != AZ_STREAM_END; # do something with $bufferRef: $zbl{$member}.=$$bufferRef; $nl=index($zbl{$member},"\n"); } # while if ($nl == -1 ) {$zbl{$member}=undef; return $zbl{$member};} my $line=substr($zbl{$member},0,$nl+1); $zbl{$member}=substr($zbl{$member},$nl+1); return $line; } # zbl sub zipbyline_close { my $member=shift; delete $zbl{$member}; } # zbl close
This will probably get improvements so i can read 2 files out of the same zip at the same time without doing two my $zip=new ... $zip->read($zf) sets, havent needed it yet.

Replies are listed 'Best First'.
Re^5: Geo::ShapeFile memory problem
by pmqs (Friar) on Apr 23, 2017 at 19:33 UTC
    As someone has already mentioned IO::Uncompress::Unzip has a filehandle interface that hides all the complexity of reading directly from a zip file. Looking at the post from few days ago, the commented block below showed reading from the zip file
    # my $sn=$fips2state->{$sfips}.'2010.sf1'; # my $zf=$dir0.'/sf1/'.$sn.'.zip'; # my $mf=$fips2state->{$fips}.'geo2010.sf1'; ; # my $member=zipbyline_start($zf,$mf); # while (my $line=zipbyline_read($member)){ # ... pull out datums AREALAND AREAWATR POP100, create density # } # line # zipbyline_close($member);

    That would become this with IO::Uncompress::Unzip

    use IO::Uncompress::Unzip; my $sn=$fips2state->{$sfips}.'2010.sf1'; my $zf=$dir0.'/sf1/'.$sn.'.zip'; my $mf=$fips2state->{$fips}.'geo2010.sf1'; ; my $member = IO::Uncompress::Unzip($zf, Name => $mf); while (<$member>) { #... pull out datums AREALAND AREAWATR POP100, create density } close $member;

      Interesting

      13.zip is a 51meg zip file with 10,000 members
      13/00/00.lst is the first
      13/99/99.lst is the last
      13/50/50.lst is roughly in the middle

      use strict; my %zbl; use IO::Uncompress::Unzip qw(unzip $UnzipError) ; use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ; use IO::File; use Archive::Zip qw( :ERROR_CODES :CONSTANTS ); use File::Basename; compare('13.zip','13/00/00.lst'); compare('13.zip','13/50/50.lst'); compare('13.zip','13/99/99.lst'); compare('13.zip','xx/99/99.lst'); exit; sub compare { my $zf=shift; my $mf=shift; print 'testing:'.$zf.' '.$mf."\n"; my ($wzbl,$t1zbl,$t2zbl,$lzbl)=zbl($zf,$mf); my ($wzio,$t1zio,$t2zio,$lzio)=zio($zf,$mf); if ($wzio eq $wzbl) {print "are same\n"} else { print "are different\n"; } print 'io :open'.sprintf('%4d',$t1zio).' read:'.sprintf('%4d',$t2zio +).' lines'.sprintf('%4d',$lzio)."\n"; print 'zbl:open'.sprintf('%4d',$t1zbl).' read:'.sprintf('%4d',$t2zbl +).' lines'.sprintf('%4d',$lzbl)."\n"; print "\n"; } sub zbl { my $zf=shift; my $mf=shift; my $wzbl; my $time0=time; my $member=zipbyline_start($zf,$mf); my $time1=time; my $lines=0; while (my $line=zipbyline_read($member)){ $wzbl.=$line; $lines++; } zipbyline_close($member); my $dt1=$time1-$time0; my $dt2=time-$time1; return ($wzbl,$dt1,$dt2,$lines); } sub zio { my $zf=shift; my $mf=shift; my $wio; my $time0=time; my $file; eval { $file=new IO::Uncompress::Unzip($zf, Name =>$mf) or die "IO::Un +compress::unzip failed: $UnzipError\n"; }; my $time1=time; my $lines=0; if ($@) { print $@."\n"; } else { while (my $line=<$file>) { $wio.=$line; $lines++; } close $file; } my $dt1=$time1-$time0; my $dt2=time-$time1; return ($wio,$dt1,$dt2,$lines); } sub zipbyline_start { my $zf=shift; my $mf=shift; my $zip = Archive::Zip->new(); unless ( $zip->read( $zf ) == AZ_OK ) { die 'read error';} my ( $member, $status, $bufferRef ); $member = $zip->memberNamed( $mf ); if ($member) { $member->desiredCompressionMethod( COMPRESSION_STORED ); $status = $member->rewindData(); die "error $status" unless $status == AZ_OK; $zbl{$member}=''; } else {$member={};$zbl{$member}=undef; } return $member; } # zbl start sub zipbyline_read { my $member=shift; my ( $status, $bufferRef ); if (!defined $zbl{$member}) { return undef;} my $nl=index($zbl{$member},"\n"); while ( ( $nl == -1) && ! $member->readIsDone() ) { ( $bufferRef, $status ) = $member->readChunk(1000); die "error $status" if $status != AZ_OK && $status != AZ_STREAM_END; # do something with $bufferRef: $zbl{$member}.=$$bufferRef; $nl=index($zbl{$member},"\n"); } # while if ($nl == -1 ) {my $line=$zbl{$member}; $zbl{$member}=undef; return + $line;} my $line=substr($zbl{$member},0,$nl+1); $zbl{$member}=substr($zbl{$member},$nl+1); return $line; } # zbl sub zipbyline_close { my $member=shift; delete $zbl{$member}; } # zbl close
      result
      testing:13.zip 13/00/00.lst are same io :open 2 read: 0 lines 96 zbl:open 1 read: 0 lines 96 testing:13.zip 13/50/50.lst are same io :open 101 read: 0 lines 198 zbl:open 1 read: 0 lines 198 testing:13.zip 13/99/99.lst are same io :open 192 read: 0 lines 69 zbl:open 2 read: 0 lines 69 testing:13.zip xx/99/99.lst IO::Uncompress::unzip failed: Cannot find 'xx/99/99.lst' are same io :open 199 read: 0 lines 0 zbl:open 1 read: 0 lines 0
      I made changes to the zip_by_line routines to not die on a missing member, and to handle files that didnt terminate with \n

      Im going to stick with zip_by_line for now, it probably needs more work tho.

        Should xx/99/99.lst exist in 13.zip? If it does, that sounds like a possible bug in IO::Uncompress::Unzip.