# HLA Peptide Binding Predictions # http://thr.cit.nih.gov/molbio/hla_bind/ use warnings; use strict; use Data::Dumper; # load the scoring matrix # 9-mer Coefficient Table for HLA_A_0201 (A_0201_standard) my $nmer_size = 9; my $final_const = 0.069; my %mx; while( my $line = ) { chomp $line; my ( $aa, @scores ) = split( /\t/, $line ); $mx{$aa} = [ @scores ]; } # score the seq from the OP my $seq = 'EALLKQSWEVLKQNIPGHSLCLFALIIEAAPESKYVFSFLKDSNEIPENNPKLK' . 'AHAAVIFKTICESATELRQKGQAVWDNNTLKRLGSIHLKNKITDPHFEVMKGAL' . 'LGTIKEAVKENWSDEMCCAWTEAYNQLVATIKAEMKE'; my %scores; foreach my $start_pos ( 0 .. length( $seq ) - $nmer_size ) { my $subseq = substr( $seq, $start_pos, $nmer_size ); $scores{$start_pos} = score_seq( $subseq ); } # print a table of results in the same format as the website print join( "\t", 'Rank', 'Start Pos', 'Subsequence', 'Score' ), "\n"; my $rank = 1; foreach my $start_pos ( sort { $scores{$b} <=> $scores{$a} } keys %scores ) { print join( "\t", $rank, $start_pos + 1, substr( $seq, $start_pos, $nmer_size ), $scores{$start_pos} ), "\n"; $rank++; } sub score_seq { my ( $seq ) = @_; # score the sequence according to the process described at # http://thr.cit.nih.gov/molbio/hla_bind/hla_motif_search_info.html my $score = $final_const; foreach my $pos ( 0 .. length( $seq ) - 1 ) { # error checking should be added (invalid characters, etc) $score *= $mx{ substr( $seq, $pos, 1 ) }[$pos]; } return( sprintf( "%.3f", $score ) ); } __DATA__ A 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 C 1.000 0.470 1.000 1.000 1.000 1.000 1.000 1.000 1.000 D 0.075 0.100 0.400 4.100 1.000 1.000 0.490 1.000 0.003 E 0.075 1.400 0.064 4.100 1.000 1.000 0.490 1.000 0.003 F 4.600 0.050 3.700 1.000 3.800 1.900 5.800 5.500 0.015 G 1.000 0.470 1.000 1.000 1.000 1.000 0.130 1.000 0.015 H 0.034 0.050 1.000 1.000 1.000 1.000 1.000 1.000 0.015 I 1.700 9.900 1.000 1.000 1.000 2.300 1.000 0.410 2.100 K 3.500 0.100 0.035 1.000 1.000 1.000 1.000 1.000 0.003 L 1.700 72.000 3.700 1.000 1.000 2.300 1.000 1.000 4.300 M 1.700 52.000 3.700 1.000 1.000 2.300 1.000 1.000 1.000 N 1.000 0.470 1.000 1.000 1.000 1.000 1.000 1.000 0.015 P 0.022 0.470 1.000 1.000 1.000 1.000 1.000 1.000 0.003 Q 1.000 7.300 1.000 1.000 1.000 1.000 1.000 1.000 0.003 R 1.000 0.010 0.076 1.000 1.000 1.000 0.200 1.000 0.003 S 1.000 0.470 1.000 1.000 1.000 1.000 1.000 1.000 0.015 T 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.500 V 1.700 6.300 1.000 1.000 1.000 2.300 1.000 0.410 14.000 W 4.600 0.010 8.300 1.000 1.000 1.700 7.500 5.500 0.015 Y 4.600 0.010 3.200 1.000 1.000 1.500 1.000 5.500 0.015