gi1 VERSION cds1
gi2 VERSION cds2
. . .
. . .
. . .
####
#!/usr/bin/perl
use strict;
use warnings;
my %info = ('gi' => "", 'version' => "", 'cds' => "");
# here we collect all information
sub flush_info {
# print out all information:
print "$info{gi}"."\t"."$info{version}"."\t"."$info{cds}\n";
%info = ();
};
my $data = '/DATA/GenBankFile.gb'; # GenBank file is located at C:\DATA
open (INFILE, '<', $data) or die "Cannot!\n";
while () {
last if m!//$!;
if (m!^VERSION.*\w:(\d+)! ) {
$info{version} = $1;
}
elsif (m!GI:(\d+)!) {
if ($info{cds}) { # we are in a CDS block
$info{gi} = $1;
};
}
elsif (m!^\s+CDS\s+(.*)!) {
# a new gene information has started
flush_info();
# now remember the CDS
$info{cds} = $1;
}
else {warn "Ignoring unknown line [$_]\n";};
};
# Output any leftover information:
flush_info();
####
LOCUS NC_0000230 600020 bp DNA linear CON 21-APR-2007
VERSION NC_000023.10 GI:123456789
CDS join(11111..222222,333333..444444)
/db_xref="GI:55555555"
CDS join(66666..7777777,888888..99999)
/db_xref="GI:10101010"
//
####
Ignoring unknown line [LOCUS NC_0000230 600020 bp DNA linear CON 21-APR-2007
]
Use of uninitialized value $info{"version"} in string at C:\Perl\bin\wtf16.pl line 12, line 5.
Use of uninitialized value $info{"version"} in string at C:\Perl\bin\wtf16.pl line 12, line 7.
123456789
55555555 join(11111..222222,333333..444444)
10101010 join(66666..7777777,888888..99999)
####
sub flush_info {
# print out all information:
next print "$info{gi}"."\t"."$info{version}"."\t"."$info{cds}\n" if ($info{gi} or $info{version} or $info{cds} eq undef;
%info = ();
};