sub btree{ my ($disk,$workdir,$outFileName,$sep,$keyMap_aref,$verbose,$infile +_aref,$indir,$outdir,$splitSize,$keyPos_aref,$outCol_aref)=@_; my @keyMap=@{$keyMap_aref}; my @infile=@{$infile_aref}; my $outfile=$outdir.'/'.$outFileName; my @keyPos=@{$keyPos_aref}; my @outCol=@{$outCol_aref} if defined $outCol_aref; my $idx; my %h ; # generate a ref to anonymous sub for parsing key from data line # ################################# my $parse; #my $tmp=$splitSize-2; my $list=join ',',@keyPos; #my $pcode='$parse=sub {my($line)=@_;my $key=join(\''."$sep".'\',( +(split(/'."$sep".'/,$line,'."$splitSize".'))['."$list".']));return $k +ey}'; my $pcode='$parse=sub {my($line)=@_;join(\''."$sep".'\',((split(/' +."$sep".'/,$line,'."$splitSize".'))['."$list".']));};'; print "pcode: $pcode\n" if $verbose; eval $pcode; # ################################# if($disk){ $idx="$workdir".'/'."$outFileName".'.idx'; if(-f $idx){ unlink "$idx" or croak "cannot unlik $idx: $!\n"; } } # create the btree object # ################################# my $t = '$DB_BTREE->{\'compare\'} = '.genComp($sep,\@keyMap); print "sort criteria: $t\n" if $verbose; eval $t; my $mybtree; if ($disk){ $mybtree=tie %h, "DB_File", "$idx", O_RDWR|O_CREAT, 0666, $DB_ +BTREE or croak "Cannot open file $idx: $!\n" ; } else{ $mybtree=tie %h, "DB_File", undef, O_RDWR|O_CREAT, 0666, $DB_B +TREE ; } # lets do it. lets fill up the btree object # ################################# my $bench0=new Benchmark; # Add a key/value pair to the file unless(defined $outCol_aref){ foreach my $infile(@infile){ print "infile=$infile\n" if $verbose; my $fh = new IO::File "$indir".'/'."$infile", "r" or +croak "Cannot open file $infile: $!\n"; while(not $fh->eof){ my $line=<$fh>; my $key=$parse->($line); $h{$key}=$line; } $fh->close(); } } else{ foreach my $infile(@infile){ print "infile=$infile\n" if $verbose; my $fh = new IO::File "$indir".'/'."$infile", "r" or +croak "Cannot open file $infile: $!\n"; while(not $fh->eof){ my $line=<$fh>; my $key=$parse->($line); $h{$key}=join $sep,((split(/$sep/,$line,$splitSize +))[@outCol]); } $fh->close(); } } # Cycle through the keys printing them in order. # ################################# my $fh = new IO::File "$outfile", "w"; my $bench1=new Benchmark; my($key,$value); for (my $status = $mybtree->seq($key, $value, R_FIRST) ; $status == 0 ; $status = $mybtree->seq($key, $value, R_NEXT) ){ chomp $value; print $fh "$value".$/; } my $bench2=new Benchmark; # done $fh->close; untie %h ; my $diff=timediff($bench1,$bench0); print "sort ".timestr($diff)."\n" if $verbose; my $diff=timediff($bench2,$bench1); print "write ".timestr($diff)."\n" if $verbose; my $diff=timediff($bench2,$bench0); print "total ".timestr($diff)."\n" if $verbose; } sub genComp{ my ($sep,$keyMap_ref)=@_; my @keyMap=@{$keyMap_ref}; my $code = 'sub {'; $code .= 'my($k1,$k2)=@_; my @k1=split /'."$sep".'/,$k1; my + @k2=split /'."$sep".'/,$k2;'; #$code .= '"$k1[0]" cmp "$k2[0]" || "$k1[1]" <=> "$k2[1]" | +| "$k1[2]" <=> "$k2[2]";'; for (my $i=0; $i <= $#keyMap; $i++){ $code .= '"$k1['; $code .= $i; $code .= ']" '; $code .= $keyMap[$i] eq 'C' ? 'cmp' : '<=>'; $code .= ' "$k2['; $code .= $i; $code .= ']" '; $code .= ' || ' if $i != $#keyMap; } $code .= ';'; $code .= '}'; }
update (broquaint): added <readmore> tag
In reply to perl sort versus Unix sort by Anonymous Monk
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |