use Text::Ngram qw( ngram_counts ); use ... use ... my $db = "/home/db/repository/megalith.db"; my $ngram_width = 5; my %protein_search_hash; unlink $db; tie %protein_search_hash, 'MLDBM', -Filename => $db, -Flags => DB_CREATE | DB_INIT_LOCK or die "Cannot open database '$db: $!\n"; open FILE, " }; close(FILE); # Precompile regex for speed my $regex = qr/>([\S]*)\s*.*\s([A-Za-z\s]+)/; my $regex_d = qr/^>[\S]*\s*.*\s[A-Za-z\s]+/; # PARSE $fasta_sequence and split header from the sequence while( $fasta_sequence =~ m/$regex/igm ) { my $sequence = $2; my $header = $1; $sequence =~ tr/[\r|\n]//d; # delete all line breaks # generate n-grams my $ngram_href= ngram_counts($sequence, $ngram_width); $protein_search_hash{$header} = $ngram_href; } ... ... ... #### $protein_search_hash{$header} = $ngram_href; #### my $ngram_href= ngram_counts($sequence, $ngram_width);