in reply to New to Perl

There must be some sub contained with all the bio Perl modules that does this, but the format is simple and here is a parser for you. I didn't parse the older format that starts with ;. But you can add that if needed. See Wiki FASTA format. Blank lines between sequences are optional as well as ending a sequence with a '*';
#!/usr/bin/perl -w use strict; use Data::Dump qw(pp); my %sequences; my $line; my $skip_read=0; while ($skip_read or defined ($line = <DATA>) ) { chomp $line; my ($id) = $line =~ /^\>(\w+)/; ($skip_read, $line) = finish_record($id,\%sequences) if ( defined +$id); } sub finish_record { my ($id, $seqHashRef) = @_; my $line; while (defined ($line = <DATA>) and $line !~ m/^\s*$/ and $line !~ m/^\>/) { chomp $line; if ($line =~ /\*$/) { $line =~ s/\*$//; $seqHashRef->{$id}.= $line; return 0; } $seqHashRef->{$id}.= $line; } print "$line\n" if defined $line; return (1, $line) if (defined ($line) and $line =~ m/^\>/); return 0; } print pp(\%sequences); =prints { MCHU => "ADQLTEEQIAEFKEAFSLFDKDGDGT....", gi => "LCLYTHIGRNIYYGSYLYSETWNTGI....", } =cut __DATA__ >MCHU - Calmodulin - Human, rabbit, bovine, rat, and chicken ADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMINEVDADGNGTID DIDGDGQVNYEEFVQMMTAK* >gi|5524211|gb|AAD44166.1| cytochrome b [Elephas maximus maximus] LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV GLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGX IENY

Replies are listed 'Best First'.
Re^2: New to Perl
by rolandomantilla (Novice) on Aug 06, 2011 at 03:42 UTC
    ok that work although I'm still trying to complete my program. Let me tell you the main thing I need to input the clone ID: and then the program should give me the sequence as the awnser. I thought about making the fasta file into a hash and then using the ID as the key to return the sequence as the awnser. Thank you and this a very easy way to get although this is a very cool way of doing something similar.