in reply to Parsing help
Now to your code, gene=LOC653505 comes after gene=LOC644591, so you have to parse the file twice, example:
#!/usr/bin/perl -- use strict; use warnings; sub scan_gene { my $fh = shift; my $tell = tell $fh; my @gene; while(readline $fh){ push @gene, $1 if /gene=(\S+)/; } seek $fh, $tell, 0; return @gene; } my @gene = scan_gene(\*DATA); my $geneix = 0; #warn "@gene"; while(<DATA>){ my( $before, $sign, $after ) = split /([+-])/, $_, 2; print "$before $sign $gene[$geneix]\n"; $geneix++ if index($after, $gene[$geneix]) != -1; # warn "before $before\nsign $sign\nafter $after"; } __DATA__ NT_113797 CDS 122829 123323 - gene=LOC644591 ProteinID=X +P_932799.1 NT_113798 CDS 4457 4636 - NT_077932 CDS 9894 9928 - NT_077932 CDS 65297 65828 + NT_077932 CDS 89196 89690 - gene=LOC653505 ProteinID=BJD +ND993
|
|---|