# HLA Peptide Binding Predictions # http://thr.cit.nih.gov/molbio/hla_bind/ use warnings; use strict; use Data::Dumper; # load the scoring matrix # 9-mer Coefficient Table for HLA_A_0201 (A_0201_standard) my $nmer_size = 9; my $final_const = 0.069; my %mx; while( my $line = ) { chomp $line; my ( $aa, @scores ) = split( /\t/, $line ); $mx{$aa} = [ @scores ]; } # score the seq from the OP my $seq = 'EALLKQSWEVLKQNIPGHSLCLFALIIEAAPESKYVFSFLKDSNEIPENNPKLK' . 'AHAAVIFKTICESATELRQKGQAVWDNNTLKRLGSIHLKNKITDPHFEVMKGAL' . 'LGTIKEAVKENWSDEMCCAWTEAYNQLVATIKAEMKE'; my %scores; foreach my $start_pos ( 0 .. length( $seq ) - $nmer_size ) { my $subseq = substr( $seq, $start_pos, $nmer_size ); $scores{$start_pos} = score_seq( $subseq ); } # print a table of results in the same format as the website print join( "\t", 'Rank', 'Start Pos', 'Subsequence', 'Score' ), "\n"; my $rank = 1; foreach my $start_pos ( sort { $scores{$b} <=> $scores{$a} } keys %scores ) { print join( "\t", $rank, $start_pos + 1, substr( $seq, $start_pos, $nmer_size ), $scores{$start_pos} ), "\n"; $rank++; } sub score_seq { my ( $seq ) = @_; # score the sequence according to the process described at # http://thr.cit.nih.gov/molbio/hla_bind/hla_motif_search_info.html my $score = $final_const; foreach my $pos ( 0 .. length( $seq ) - 1 ) { # error checking should be added (invalid characters, etc) $score *= $mx{ substr( $seq, $pos, 1 ) }[$pos]; } return( sprintf( "%.3f", $score ) ); } __DATA__ A 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 C 1.000 0.470 1.000 1.000 1.000 1.000 1.000 1.000 1.000 D 0.075 0.100 0.400 4.100 1.000 1.000 0.490 1.000 0.003 E 0.075 1.400 0.064 4.100 1.000 1.000 0.490 1.000 0.003 F 4.600 0.050 3.700 1.000 3.800 1.900 5.800 5.500 0.015 G 1.000 0.470 1.000 1.000 1.000 1.000 0.130 1.000 0.015 H 0.034 0.050 1.000 1.000 1.000 1.000 1.000 1.000 0.015 I 1.700 9.900 1.000 1.000 1.000 2.300 1.000 0.410 2.100 K 3.500 0.100 0.035 1.000 1.000 1.000 1.000 1.000 0.003 L 1.700 72.000 3.700 1.000 1.000 2.300 1.000 1.000 4.300 M 1.700 52.000 3.700 1.000 1.000 2.300 1.000 1.000 1.000 N 1.000 0.470 1.000 1.000 1.000 1.000 1.000 1.000 0.015 P 0.022 0.470 1.000 1.000 1.000 1.000 1.000 1.000 0.003 Q 1.000 7.300 1.000 1.000 1.000 1.000 1.000 1.000 0.003 R 1.000 0.010 0.076 1.000 1.000 1.000 0.200 1.000 0.003 S 1.000 0.470 1.000 1.000 1.000 1.000 1.000 1.000 0.015 T 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.500 V 1.700 6.300 1.000 1.000 1.000 2.300 1.000 0.410 14.000 W 4.600 0.010 8.300 1.000 1.000 1.700 7.500 5.500 0.015 Y 4.600 0.010 3.200 1.000 1.000 1.500 1.000 5.500 0.015 #### Rank Start Pos Subsequence Score 1 2 ALLKQSWEV 1930.068 2 95 KITDPHFEV 795.962 3 108 LLGTIKEAV 57.937 4 107 ALLGTIKEA 42.278 5 21 CLFALIIEA 42.278 6 19 SLCLFALII 16.254 7 63 TICESATEL 12.043 8 12 KQNIPGHSL 7.581 9 14 NIPGHSLCL 2.937 10 101 FEVMKGALL 1.911 11 133 NQLVATIKA 1.864 12 35 YVFSFLKDS 0.970 13 45 EIPENNPKL 0.903 14 75 GQAVWDNNT 0.756 15 135 LVATIKAEM 0.739 16 103 VMKGALLGT 0.737 17 52 KLKAHAAVI 0.524 18 123 EMCCAWTEA 0.457 19 3 LLKQSWEVL 0.434 20 89 SIHLKNKIT 0.420 21 28 EAAPESKYV 0.398 22 70 ELRQKGQAV 0.396 23 79 WDNNTLKRL 0.314 24 76 QAVWDNNTL 0.297 25 128 WTEAYNQLV 0.284 26 17 GHSLCLFAL 0.198 27 117 KENWSDEMC 0.166 28 82 NTLKRLGSI 0.160 29 59 VIFKTICES 0.148 30 57 AAVIFKTIC 0.108 31 86 RLGSIHLKN 0.075 32 25 LIIEAAPES 0.071 33 55 AHAAVIFKT 0.069 34 88 GSIHLKNKI 0.068 35 130 EAYNQLVAT 0.057 36 61 FKTICESAT 0.048 37 69 TELRQKGQA 0.046 38 77 AVWDNNTLK 0.044 39 129 TEAYNQLVA 0.040 40 6 QSWEVLKQN 0.038 41 134 QLVATIKAE 0.034 42 13 QNIPGHSLC 0.032 43 49 NNPKLKAHA 0.032 44 113 KEAVKENWS 0.028 45 38 SFLKDSNEI 0.027 46 118 ENWSDEMCC 0.020 47 127 AWTEAYNQL 0.018 48 96 ITDPHFEVM 0.018 49 18 HSLCLFALI 0.016 50 24 ALIIEAAPE 0.015 51 5 KQSWEVLKQ 0.012 52 10 VLKQNIPGH 0.010 53 56 HAAVIFKTI 0.009 54 54 KAHAAVIFK 0.009 55 51 PKLKAHAAV 0.008 56 119 NWSDEMCCA 0.007 57 22 LFALIIEAA 0.007 58 104 MKGALLGTI 0.007 59 84 LKRLGSIHL 0.004 60 120 WSDEMCCAW 0.004 61 121 SDEMCCAWT 0.004 62 67 SATELRQKG 0.004 63 23 FALIIEAAP 0.004 64 32 ESKYVFSFL 0.004 65 16 PGHSLCLFA 0.004 66 125 CCAWTEAYN 0.003 67 99 PHFEVMKGA 0.003 68 131 AYNQLVATI 0.003 69 111 TIKEAVKEN 0.003 70 126 CAWTEAYNQ 0.002 71 47 PENNPKLKA 0.002 72 39 FLKDSNEIP 0.002 73 27 IEAAPESKY 0.002 74 15 IPGHSLCLF 0.002 75 73 QKGQAVWDN 0.002 76 29 AAPESKYVF 0.002 77 9 EVLKQNIPG 0.002 78 102 EVMKGALLG 0.002 79 37 FSFLKDSNE 0.002 80 20 LCLFALIIE 0.001 81 62 KTICESATE 0.001 82 74 KGQAVWDNN 0.001 83 58 AVIFKTICE 0.001 84 41 KDSNEIPEN 0.001 85 122 DEMCCAWTE 0.001 86 8 WEVLKQNIP 0.001 87 83 TLKRLGSIH 0.001 88 33 SKYVFSFLK 0.001 89 50 NPKLKAHAA 0.001 90 106 GALLGTIKE 0.001 91 68 ATELRQKGQ 0.000 92 71 LRQKGQAVW 0.000 93 72 RQKGQAVWD 0.000 94 91 HLKNKITDP 0.000 95 92 LKNKITDPH 0.000 96 93 KNKITDPHF 0.000 97 94 NKITDPHFE 0.000 98 78 VWDNNTLKR 0.000 99 80 DNNTLKRLG 0.000 100 97 TDPHFEVMK 0.000 101 105 KGALLGTIK 0.000 102 98 DPHFEVMKG 0.000 103 109 LGTIKEAVK 0.000 104 110 GTIKEAVKE 0.000 105 81 NNTLKRLGS 0.000 106 100 HFEVMKGAL 0.000 107 124 MCCAWTEAY 0.000 108 85 KRLGSIHLK 0.000 109 87 LGSIHLKNK 0.000 110 40 LKDSNEIPE 0.000 111 42 DSNEIPENN 0.000 112 112 IKEAVKENW 0.000 113 43 SNEIPENNP 0.000 114 44 NEIPENNPK 0.000 115 90 IHLKNKITD 0.000 116 114 EAVKENWSD 0.000 117 115 AVKENWSDE 0.000 118 116 VKENWSDEM 0.000 119 26 IIEAAPESK 0.000 120 46 IPENNPKLK 0.000 121 48 ENNPKLKAH 0.000 122 1 EALLKQSWE 0.000 123 30 APESKYVFS 0.000 124 4 LKQSWEVLK 0.000 125 132 YNQLVATIK 0.000 126 11 LKQNIPGHS 0.000 127 7 SWEVLKQNI 0.000 128 53 LKAHAAVIF 0.000 129 136 VATIKAEMK 0.000 130 137 ATIKAEMKE 0.000 131 31 PESKYVFSF 0.000 132 60 IFKTICESA 0.000 133 64 ICESATELR 0.000 134 34 KYVFSFLKD 0.000 135 65 CESATELRQ 0.000 136 36 VFSFLKDSN 0.000 137 66 ESATELRQK 0.000