I have a fasta file containing many protein sequences in fasta format. I want to calculate isoelectric point of each sequences.
I have this code but it is giving errors.
use Bio::Tools::pICalculator;
use Bio::SeqIO;
my $in = Bio::SeqIO->new( -fh => \*STDIN ,-format => 'Fasta' );
my $calc = Bio::Tools::pICalculator->new(-places => 2,
-pKset => 'EMBOSS');
while ( my $seq = $in->next_seq ) {
$calc->seq($seq);
my $iep = $calc->iep;
print sprintf( "%s\t%s\t%.2f\n",
$seq->id,
$iep,
$calc->charge_at_pH($iep) );
for( my $i = 0; $i <= 14; $i += 0.5 ){
print sprintf( "pH = %.2f\tCharge = %.2f\n",
$i,
$calc->charge_at_pH($i) );
}
}
package Bio::Tools::pICalculator;
use strict;
use base qw(Bio::Root::Root);
my $DTASelect_pK = { N_term => 8.0,
K => 10.0, # Lys
R => 12.0, # Arg
H => 6.5, # His
D => 4.4, # Asp
E => 4.4, # Glu
C => 8.5, # Cys
Y => 10.0, # Tyr
C_term => 3.1
};
my $Emboss_pK = { N_term => 8.6,
K => 10.8, # Lys
R => 12.5, # Arg
H => 6.5, # His
D => 3.9, # Asp
E => 4.1, # Glu
C => 8.5, # Cys
Y => 10.1, # Tyr
C_term => 3.6
};
sub new {
my( $class, %opts ) = @_;
my $self = $class->SUPER::new(%opts);
$self = bless {}, ref $self || $self;
$self->seq( $opts{-seq} ) if exists $opts{-seq};
$self->pKset( $opts{-pKset} || 'EMBOSS' );
exists $opts{-places} ? $self->places( $opts{-places} ) :
$self->places(2);
return $self;
}
sub seq {
my( $this, $seq ) = @_;
unless( defined $seq && UNIVERSAL::isa($seq,'Bio::Seq') ){
die $seq . " is not a valid Bio::Seq object\n";
}
$this->{-seq} = $seq;
$this->{-count} = count_charged_residues( $seq );
return $this->{-seq};
}
sub pKset {
my ( $this, $pKset ) = @_;
if( ref $pKset eq 'HASH' ){ # user defined pK values
$this->{-pKset} = $pKset;
}elsif( $pKset =~ /^emboss$/i ){ # from EMBOSS's iep program
$this->{-pKset} = $Emboss_pK;
}elsif( $pKset =~ /^dtaselect$/i ){ # from DTASelect (scripps)
$this->{-pKset} = $DTASelect_pK;
}else{ # default to EMBOSS
$this->{-pKset} = $Emboss_pK;
}
return $this->{-pKset};
}
sub places {
my $this = shift;
$this->{-places} = shift if @_;
return $this->{-places};
}
sub iep {
my $this = shift;
return _calculate_iep($this->{-pKset},
$this->{-places},
$this->{-seq},
$this->{-count}
);
}
sub charge_at_pH {
my $this = shift;
return _calculate_charge_at_pH( shift, $this->{-pKset},
$this->{-count} );
}
sub count_charged_residues {
my $seq = shift;
my $sequence = $seq->seq;
my $count;
for ( qw( K R H D E C Y ) ){ # charged AA's
$count->{$_}++ while $sequence =~ /$_/ig;
}
return $count;
}
sub _calculate_iep {
my( $pK, $places, $seq, $count ) = @_;
my $pH = 7.0;
my $step = 3.5;
my $last_charge = 0.0;
my $format = "%.${places}f";
unless( defined $count ){
$count = count_charged_residues($seq);
}
while(1){
my $charge = _calculate_charge_at_pH( $pH, $pK, $count );
last if sprintf($format,$charge) ==
sprintf($format,$last_charge);
$charge > 0 ? ( $pH += $step ) : ( $pH -= $step );
$step /= 2.0;
$last_charge = $charge;
}
return sprintf( $format, $pH );
}
sub _calculate_charge_at_pH {
no warnings; # don't complain if a given key doesn't exist
my( $pH, $pK, $count ) = @_;
my $charge = _partial_charge( $pK->{N_term}, $pH )
+ $count->{K} * _partial_charge( $pK->{K}, $pH )
+ $count->{R} * _partial_charge( $pK->{R}, $pH )
+ $count->{H} * _partial_charge( $pK->{H}, $pH )
- $count->{D} * _partial_charge( $pH, $pK->{D} )
- $count->{E} * _partial_charge( $pH, $pK->{E} )
- $count->{C} * _partial_charge( $pH, $pK->{C} )
- $count->{Y} * _partial_charge( $pH, $pK->{Y} )
- _partial_charge( $pH, $pK->{C_term} );
return $charge;
}
sub _partial_charge {
my $cr = 10 ** ( $_[0] - $_
1 );
return $cr / ( $cr + 1 );
}
1;