gi1 VERSION cds1 gi2 VERSION cds2 . . . . . . . . . #### #!/usr/bin/perl use strict; use warnings; my %info = ('gi' => "", 'version' => "", 'cds' => ""); # here we collect all information sub flush_info { # print out all information: print "$info{gi}"."\t"."$info{version}"."\t"."$info{cds}\n"; %info = (); }; my $data = '/DATA/GenBankFile.gb'; # GenBank file is located at C:\DATA open (INFILE, '<', $data) or die "Cannot!\n"; while () { last if m!//$!; if (m!^VERSION.*\w:(\d+)! ) { $info{version} = $1; } elsif (m!GI:(\d+)!) { if ($info{cds}) { # we are in a CDS block $info{gi} = $1; }; } elsif (m!^\s+CDS\s+(.*)!) { # a new gene information has started flush_info(); # now remember the CDS $info{cds} = $1; } else {warn "Ignoring unknown line [$_]\n";}; }; # Output any leftover information: flush_info(); #### LOCUS NC_0000230 600020 bp DNA linear CON 21-APR-2007 VERSION NC_000023.10 GI:123456789 CDS join(11111..222222,333333..444444) /db_xref="GI:55555555" CDS join(66666..7777777,888888..99999) /db_xref="GI:10101010" // #### Ignoring unknown line [LOCUS NC_0000230 600020 bp DNA linear CON 21-APR-2007 ] Use of uninitialized value $info{"version"} in string at C:\Perl\bin\wtf16.pl line 12, line 5. Use of uninitialized value $info{"version"} in string at C:\Perl\bin\wtf16.pl line 12, line 7. 123456789 55555555 join(11111..222222,333333..444444) 10101010 join(66666..7777777,888888..99999) #### sub flush_info { # print out all information: next print "$info{gi}"."\t"."$info{version}"."\t"."$info{cds}\n" if ($info{gi} or $info{version} or $info{cds} eq undef; %info = (); };