# ----- prediction on sequence number 3 (length = 713, name = seq_03) ----- # # Constraints/Hints: # (none) # Predicted genes for sequence number 3 on both strands # start gene g4 .... [as same as above]......so on and on... #### #!/usr/local/bin/perl #Title "extraction of sequences" #saved the sample file in bioinfo.txt use strict; use warnings; use IO::File; my $handle = new IO::File; $handle->autoflush(1); $handle->open("; @input_array = grep {s/#//g} @input_array; for (my $i=0; $i<$#input_array; $i++){ chomp $input_array[$i]; delete $input_array[$i] if $input_array[$i]=~ /((none)|checked|constraints|predicted)/i; #shedding extras next unless $input_array[$i]; #ignoring empty lines. push @new_array, $input_array[$i]; #capturing the elements that I need } for(my $i=0;$i<$#new_array;$i++){ print "$i-$new_array[$i]\n"; #preparing for further processing } #### 0- ----- prediction on sequence number 1 (length = 105, name = seq_01) -- 1- start gene g1 2- coding sequence = [atgtcgtccctccccactctcatctttctccaccc 3- atcgctgcggtcctcgccgacccttttgtgccggaagtagggaccgg] 4- protein sequence = [MTASAFVLGTVAFLHNRLRRSRPRQASTAHR 5- GTETPLLRSDKENLTTVLDATILVHSLGQKTNLALGATSSSLDLQKTNLAL 6- VAALTPGIVFPLPSPFVATGLCLQKTNLALGATSSSLDL] 7- end gene g1 8- start gene g2 9- coding sequence = [atgccgtcctcgtcaaagcagctggcgatgcc 10- tcggcccctccttctgcaaaccgccctgccgcccgcctcggctcctccgaa 11- gccgagcagcctacgcaggggccgcagatgctcgcgggagggaatatcgg] 12- protein sequence =[MPLDSSSTPTSNPAPSHSSTAYLLFERLHIAEQ 13- CCPGQGIRHGKWSPGSSEAPT] 14- end gene g2 15- ----- prediction on sequence number 2 (length = 710, name = seq_02) ----- 16- start gene g3 17- coding sequence = [agctgccctcctcggggccagccttctcttaactc 18- tttgagaccttcaatcctgaggcgtgagacgcagtctggaggagcagctc] 19- protein sequence = [LRRETQSGGAALCSLFDPPPTPTACAHANSP]