If you can install Swissknife, you don't have to write your own parser for UniProt and a program similar to the following might do what you need.
use strict; use warnings; use Data::Dumper; # # SWISS::Entry is part of Swissknife # Available from http://swissknife.sourceforge.net/ # See: http://swissknife.sourceforge.net/docs/ # use SWISS::Entry; my %entries; # Change the line termination string so we read an entire entry at a t +ime local $/ = "\n//\n"; # Read in all the entries and fill %entries while (<>) { my $entry = SWISS::Entry->fromText($_); # # Add this entry to %entries once for each IDentifier, DEscription # and Gene Name in the entry, all keys converted to lower case. # The hash values are pointers to anonymous arrays, so push the # entries onto the arrays. # foreach my $key ( $entry->IDs->elements, map { $_->text } $entry->DEs->elements, map { ( $_->Name, $_->Synonyms ) } $entry->GNs->elements , ) { push( @{$entries{lc($key)}}, $entry); } } # # Now report on each key in %entries # foreach my $key (sort keys %entries) { print "\n\n----------------------\n"; print "DUPLICATE " if ( @{$entries{$key}} > 1); print "key $key\n"; foreach my $entry ( @{$entries{$key}} ) { print "\n"; print " IDs " . join(", ", $entry->IDs->elements) . "\n" if($entry->IDs); print " DEs " . join(", ", map { $_->text } $entry->DEs->elements) . "\n" if($entry->DEs); print " GNs " . join(", ", map { $_->text } map { ($_->Name, $_->Synonyms) }$entry->GNs->elements) + . "\n" if($entry->GNs); } }
In reply to Re: how to parse a UniProt Flat file
by ig
in thread how to parse a UniProt Flat file
by stanleysj
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |