and here is the data I have been reading:#!/usr/bin/perl use strict; use warnings; my $cds = ''; my $version = ''; my $gi = ''; my @genbank = (); my $data_file ="/DATA/GenBankFile.gb"; open(INFILE, $data_file) || die("Could not open file!"); @genbank=<INFILE>; close(INFILE); foreach my $line (@genbank) { if( $line =~ /^\/\/\n/ ) { last;} elsif($line =~ /^VERSION/) { $line =~ s/^VERSION\s*\w*\S\d*\s*\w*\S//; + $version = $line;} elsif($line =~ /^\s*\Sdb_xref="GI/) { chomp($line); $line =~ s/^\s*\Sd +b_xref="GI://; $gi = $line;} elsif($line =~ /^\s*CDS/){ chomp($line); $line =~ s/^\s*CDS\s*//; $cds + = $line;} print "$gi"."\t"."$version"."\t"."$cds\n" if (defined $cds and defined + $gi and defined $version); }
and here is the far-from-the-desired-output:LOCUS NC_0000230 600020 bp DNA linear CON 21- +APR-2007 VERSION NC_000023.10 GI:123456789 CDS join(11111..222222,333333..444444) /db_xref="GI:5555555" CDS join(66666..7777777,888888..99999) /db_xref="GI:10101010" //
and here is what the output should look like:123456789 123456789 join(11111..222222,333333..444444) 5555555" 123456789 join(11111..222222,333333..444444) 5555555" 123456789 join(66666..7777777,888888..99999) 10101010" 123456789 join(66666..7777777,888888..99999) 10101010" 123456789 join(66666..7777777,888888..99999)
5555555 123456789 join(11111..222222,333333..444444) 10101010 123456789 join(66666..7777777,888888..99999)
In reply to Read, match string and print by sophix
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |