#################################################################### ## MAIN #################################################################### print "Reading a file...\n"; my $file = get_file('pdb244l.ent'); #execute get_file subroutine for a given input file #and store the data within $file print "Parsing a file...\n"; my %record_types = parse_pdb_file($file); print "Getting data...\n"; $record_types{'HELIX'} ? my %helix = get_helix($record_types{'HELIX'}) : (); $record_types{'SHEET'} ? my %sheet = get_sheet($record_types{'SHEET'}) : (); $record_types{'TURN'} ? my %turn = get_turn($record_types{'TURN'}) : (); my %chains = extractSEQRES($record_types{'SEQRES'}); exit; #################################################################### ## SUBS #################################################################### sub get_file { #to read from a file my ($input_file) = @_; open (IN, $input_file) || die "Cannot open $input_file for reading: $OS_ERROR\n"; #open a filehandle or die my $sequence = ''; foreach my $line () { #for each line in the filehandle IN $sequence .= $line # add (concatenate) to a string sequence } return $sequence; #return the string sequence close (IN); } sub parse_pdb_file { #to return a hash with keys that are record type names and #values that are scalar containing lines for that record type my @file = @_; my %record_types = (); foreach my $line (@file) { my ($record_type) = ($line =~ /^(\S+)/); #the pattern (\S+) is returned and saved in $recordtype if (defined $record_types{$record_type} ) { $record_types{$record_type} .= $line; } else { $record_types{$record_type} = $line; #.= fails if a key is undefined } } return %record_types; } sub get_helix { #to return a scalar containing H if given a scalar contains HELIX line my($helix) = @_; my @record = split ( /\n/, $helix); my %chain_hash = (); foreach my $line (@record) { my($this_chain) = stripspaces(substr($line, 19, 1)); my($start) = stripspaces(substr($line, 21, 4)); my($length) = stripspaces(substr($line, 71, 5)); if (defined $chain_hash{$this_chain}) { $chain_hash{$this_chain} .= ':' . 'H' x $length . ";$start"; } else { $chain_hash{$this_chain} = 'H' x $length . ";$start"; } } return %chain_hash; } sub get_sheet { #to return a scalar containing H if given a scalar contains SHEET line my($sheet) = @_; my @record = split ( /\n/, $sheet); my %chain_hash = (); foreach my $line (@record) { my($this_chain) = stripspaces(substr($line, 21, 1)); my($start) = stripspaces(substr($line, 22, 4)); my($end) = stripspaces(substr($line, 33, 4)); my($length) = $end - $start + 1; if (defined $chain_hash{$this_chain}) { $chain_hash{$this_chain} .= ':' . 'S' x $length . ";$start"; } else { $chain_hash{$this_chain} = 'S' x $length . ";$start"; } } return %chain_hash; } sub get_turn { #to return a scalar containing H if given a scalar contains TURN line my($turn) = @_; my @record = split ( /\n/, $turn); my %chain_hash = (); foreach my $line (@record) { my($this_chain) = stripspaces(substr($line, 19, 1)); my($start) = stripspaces(substr($line, 20, 4)); my($end) = stripspaces(substr($line, 31, 4)); my($length) = $end - $start + 1; if (defined $chain_hash{$this_chain}) { $chain_hash{$this_chain} .= ':' . 'T' x $length . ";$start"; } else { $chain_hash{$this_chain} = 'T' x $length . ";$start"; } } return %chain_hash; } sub stripspaces { #to remove leading and trailing spaces my($string) = @_; $string =~ s/^\s*//; $string =~ s/\s*$//; return $string; } sub extractSEQRES { my ($seqres) = @_; my $lastchain; my $sequence = ''; my %results = (); my @record = split(/\n/, $seqres); foreach my $line (@record) { my ($thischain) = stripspaces(substr($line, 11, 1)); my ($residues) = substr($line, 19, 52); if (not defined $lastchain) { $sequence = $residues; } elsif ("$thischain" eq "$lastchain") { $sequence .= $residues; } elsif ($sequence) { $results{$lastchain} = $sequence; $sequence = $residues; } $lastchain = $thischain; } $results{$lastchain} = $sequence; return %results; }