package FASTA::Faster; use strict; use warnings; use Carp; my %raw; my %seq; our $DEBUG = 0; sub TIEHASH { $DEBUG and carp "TIEHASH: @_"; my( $class, $file, @options ) = @_; my $self = bless \$file, $class; open my $in, '< :raw', $file or croak "$file : $!"; sysread( $in, $raw{ $self }, -s $file ) or die "$!"; close $in; $raw{ $self } .= "\n>"; ## Update: Make sure we capture the last record. $seq{ $self }{ $1 } = \substr( $raw{ $self }, $-[ 2 ], $+[ 2 ] - $-[ 2 ] ) while $raw{ $self } =~ m[>(\S+)\s[^\n]*?\n(.*?)\n(?=>)]sg; return $self; } use constant { SELF => 0, KEY => 1, }; sub FETCH { $DEBUG and carp "FETCH: @_"; my $value = ${ $seq{ $_[ SELF ] }{ $_[ KEY ] } }; $value =~ tr[\n][]d; $value; } sub EXISTS { $DEBUG and carp "EXISTS: @_"; exists $seq{ $_[ SELF ] }{ $_[ KEY ] }; } sub FIRSTKEY { $DEBUG and carp "FIRSTKEY: @_"; keys %{ $seq{ $_[ SELF ] } }; each %{ $seq{ $_[ SELF ] } }; } sub NEXTKEY { $DEBUG and carp "NEXTKEY: @_"; each %{ $seq{ $_[ SELF ] } }; } sub SCALAR { $DEBUG and carp "SCALAR: @_"; croak 'Not implemented'; } sub STORE { $DEBUG and carp "STORE: @_"; croak 'Not implemented'; } sub DELETE { $DEBUG and carp "DELETE: @_"; croak 'Not implemented'; } return 1 if caller; package main; use Benchmark::Timer; my $T = new Benchmark::Timer; local $\=$/; my %sequence; $T->start( 'load' ); my $seqRef = tie %sequence, 'FASTA::Faster', 'na_clones.dros.RELEASE2.5'; $T->stop( 'load' ); $T->start( 'keys' ); map $_, keys %sequence; $T->stop( 'keys' ); print scalar keys %sequence; $T->start( 'values' ); map $_, values %sequence; $T->stop( 'values' ); print scalar values %sequence; $T->report; printf 'Check memory'; ; my( $key, $value ); print "$key =>\n$value\n" while ( $key, $value ) = each %sequence; __END__ P:\test\FASTA>perl faster.pm 940 940 1 trial of load (1.165s total) 1 trial of keys (12.100ms total) 1 trial of values (12.311ms total)