in reply to Re^4: Spliting Table
in thread Spliting Table
I wasn't sure if this would scale to 3+GB but I tested it with a 500MB file (100 vps and 100_000 lines) and it took <1 minute.
update : header line corrected to include AVG_Beta poj#!perl use strict; use warnings; my %head = (); my @vp = (); my %fh = (); my $width; my $t0 = time(); my $infile = '500M.dat'; # read header open IN,'<',$infile or die "could not open $infile : $!"; chomp( my $line1 = <IN> ); my @head = split "\t", $line1; # scan across the columns my $k = 3; # repeat fields for my $c ($k+1..$#head){ my ($vp,$attr) = split '\.',$head[$c]; # open new filehandle for each vp if (not exists $fh{$vp}){ my $outfile = "out_$vp.dat"; open $fh{$vp},'>',$outfile or die "Could not open $outfile : $!"; push @vp,$vp; @{$head{$vp}} = @head[0..$k+1]; print "Opened $outfile for $vp\n"; } else { push @{$head{$vp}},$head[$c]; } ++$width if (@vp < 2) } print "Width = $width\n"; # write headers to outfiles for (keys %fh){ print { $fh{$_} } (join "\t",@{$head{$_}})."\n"; } # process file my $count = 1; while (<IN>){ chomp; my @f = split "\t",$_; my $begin = 4; for my $vp (@vp){ my $end = $begin + $width - 1; #print "$vp $begin $end\n"; print { $fh{$vp} } (join "\t",@f[0..3,$begin..$end])."\n"; # move along to next vp $begin = $begin + $width; } ++$count; } # close out files for (keys %fh){ close $fh{$_}; print "File closed for $_\n"; } my $dur = time - $t0; print "$count lines read from $infile\n"; print scalar @vp." files created in $dur seconds\n";
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^6: Spliting Table
by CountZero (Bishop) on Aug 21, 2015 at 08:07 UTC | |
by poj (Abbot) on Aug 21, 2015 at 08:42 UTC |