#!/usr/local/bin/perl # this script finds the residue numbers that are associated # with a particular domain and retrieves the catalytic site # residues for that domain use strict; use English; use Data::Dumper; use UNIVERSAL qw(isa); use FileHandle; # declare some variables my $domain; my $fsg; # this is the directory that the domain files are stored my $dir = ""; # this is a file containing the domains of interest. my $domFile = shift; # we are going to go through this file one domain structure at a time # so we can pull out the appropriate CSA data # this is a file containing CSA data (from the website my $csafile = shift; my %csa_hash; my %dom_hash; # lets open the CSA data file open(CSAFILE, "$csafile") or die "unable to open $csafile: $!\n"; # ok lets get CSA information into a hash lookup while() { my @csa_data = split(/\,/, $_); my $pdb = $csa_data[0]; my $chain = $csa_data[3]; my $res = $csa_data[4]; # i need to concatenate the three for now to obtain # a unique key for the hash my $csa_record = "$pdb" . "." . "$chain" . "." . "$res"; # now lets place all the contents in a hash $csa_hash{$csa_record} = 1; } # lets close the CSA file close(CSAFILE); # test printing the array containing the CSA data #print Dumper(%csa_hash); looks good # lets open the domFile file open(DOMFILE, "$domFile") || die "ERROR: Unable to open $domFile for reading: $!\n"; # we can go though the file using a while loop # we want to get all the residues in the domain # in a hash while() { my @data = split(/\s+/, $_); $domain = $data[0]; #$fsg = $data[1]; # now to get the residue list of the # domain # we want this in a hash # open structure file for reading open(FILE, "$dir$domain") || die "Sorry, unable to open $dir$domain for reading\n"; while (my $line = ) { chomp $line; my @ff = split /\s+/,$line; my $number = $ff[5]; my $CA = $ff[2]; # looking just for CA atoms of pdb file if("$CA" eq "CA") { $dom_hash{$domain}{$number} = 1; } } } # ok, now we have both the CSA and domain information in hashes # it should be easy to compare them for my $protein_dom ( keys %dom_hash ) { for my $residue (keys %{ $dom_hash{$protein_dom} } ) { # in order to compare the domain data and the # CSA data I need to extract the chain data my $domchain = substr($protein_dom, 0, 5); # ok, now we just check if the same information # can be found in the CSA hash while ((my $key, my $value) = each(%csa_hash)) { #print "$key\n"; # ok we now need to split up the key in the hash my $pdb = substr($key, 0, 4); my $chain = substr($key, 5, 1); my $res = substr($key, 7, 6); $res =~/\S/g; # getting rid of excess white space #print "$pdb $chain $res\n"; # works great. using substring as I dont really want to create an array my $pdbchain = "$pdb" . "$chain"; # concatenate together so to compare with domain data # print "$pdbchain\n"; # works fine # lets be on the safe side and remove white space # for domchain and pdbchain $domchain =~/\S/g; $pdbchain =~/\S/g; chomp($domchain); chomp($pdbchain); # lets find CSA data for a particular domain if("$domchain" eq "$pdbchain") { # print off residues that are common # to both if("$res" eq "$residue") { print "$domchain $pdbchain $residue $res\n"; } } } } }