#!/usr/bin/perl -w # use strict; use warnings; # useful hashes for converting one letter code to mass # my %s2m = (A => 71.0371, C => 103.0092, D => 115.0269, E => 129.0426, F => 147.0684, G => 57.0215, H => 137.0589, I => 113.0841, K => 128.0950, L => 113.0841, M => 131.0405, N => 114.0429, P => 97.0528, Q => 128.0586, R => 156.1011, S => 87.0320, T => 101.0477, V => 99.0684, W => 186.0793, Y => 163.0633, '\s' => 0.0, "*" => 0.0 ); my %s2a = (A => 71.08, C => 103.14, D => 115.09, E => 129.12, F => 147.18, G => 57.05, H => 137.14, I => 113.16, K => 128.17, L => 113.16, M => 131.19, N => 114.10, P => 97.12, Q => 128.13, R => 156.19, S => 87.08, T => 101.10, V => 99.13, W => 186.21, Y => 163.18, '\s' => 0.0, "*" => 0.0 ); my $file = "P1GroupCExercise2_trypsin.txt"; my $header = ""; my %sequences = (); printf ">Mass(A):18.0153|Charge:1|"; # # open file # open(FILE,$file) or die "error: unable to open file $file\n"; while ( ) { # # read in sequences in fasta format # next if /^\s*$/; # ignore blank lines if ( /^>((\S+)$)/ ) { $header = $1; } else { # assume its sequences chomp; $sequences{$header} .= $_ . ";"; } } close(FILE); while ((my $key, my $value) = each %sequences) { my @e = split(/;/, $value); # loop my @headers= split(/:/, $key); foreach $value (@e) { my @elements= split(/\|/, $value); $key = $headers[1]; # peptide = first element of @elements # missed = 2nd element of @elements # sequence = 5th element of @elements my @peptides= split (/:/, $elements[1]);#56 my $peptide = $peptides[1]; my @missedc = split (/:/, $elements[2]);#58 my $missed = $missedc[1]; my @typec=split (/:/, $elements[3]);#60 my $type=$typec[1]; my @enzymes=split (/:/, $elements[4]);#62 my $enzyme=$enzymes[1]; my @s= split(/:/, $elements[5]);#64 my $sequence= $s[1]; @peptides = split (//, $sequence);#66 my $total = 0.0; foreach my $peptide (@peptides) { $total += $s2m{$peptide} if defined $s2m{$peptide}; } my $output = ">Protein: $headers[1]\n|Peptide:$peptide|Mass(A):$total|Charge:1|Missed:$missed|Terminal:$type|Enzyme:$enzyme|sequence:$sequence\n";#71 print $output; } } >Protein:HEMINF_F3_Complement_N182_S1226717_L118 |Peptide:13|Missed:0|Type:I|Enzyme:trypsin|Seq:R| >Protein:HEMINF_F3_Complement_N182_S1226717_L118 |Peptide:14|Missed:0|Type:I|Enzyme:trypsin|Seq:SK| >Protein:HEMINF_F3_Complement_N182_S1226717_L118 |Peptide:15|Missed:0|Type:I|Enzyme:trypsin|Seq:NWALR| >Protein:HEMINF_F3_Complement_N182_S1226717_L118 |Peptide:16|Missed:0|Type:I|Enzyme:trypsin|Seq:R| >Protein:HEMINF_F3_Complement_N182_S1226717_L118 |Peptide:17|Missed:0|Type:I|Enzyme:trypsin|Seq:ER| >Protein:HEMINF_F3_Complement_N182_S1226717_L118 |Peptide:18|Missed:0|Type:I|Enzyme:trypsin|Seq:VSIFFWLLSAAAIPTMIINR| >Protein:HEMINF_F3_Complement_N182_S1226717_L118 |Peptide:19|Missed:0|Type:I|Enzyme:trypsin|Seq:GTSGAVIIK| >Protein:HEMINF_F3_Complement_N182_S1226717_L118 |Peptide:20|Missed:0|Type:I|Enzyme:trypsin|Seq:IKPAAQFK| >Protein:HEMINF_F3_Complement_N182_S1226717_L118 |Peptide:21|Missed:0|Type:I|Enzyme:trypsin|Seq:GNTAMK| >Protein:HEMINF_F3_Complement_N182_S1226717_L118 |Peptide:22|Missed:0|Type:I|Enzyme:trypsin|Seq:ITTGTMAER| >Protein:HEMINF_F3_Complement_N182_S1226717_L118 |Peptide:23|Missed:0|Type:C|Enzyme:trypsin|Seq:LC|