use strict; use warnings; use Data::Dumper; # # SWISS::Entry is part of Swissknife # Available from http://swissknife.sourceforge.net/ # See: http://swissknife.sourceforge.net/docs/ # use SWISS::Entry; my %entries; # Change the line termination string so we read an entire entry at a time local $/ = "\n//\n"; # Read in all the entries and fill %entries while (<>) { my $entry = SWISS::Entry->fromText($_); # # Add this entry to %entries once for each IDentifier, DEscription # and Gene Name in the entry, all keys converted to lower case. # The hash values are pointers to anonymous arrays, so push the # entries onto the arrays. # foreach my $key ( $entry->IDs->elements, map { $_->text } $entry->DEs->elements, map { ( $_->Name, $_->Synonyms ) } $entry->GNs->elements , ) { push( @{$entries{lc($key)}}, $entry); } } # # Now report on each key in %entries # foreach my $key (sort keys %entries) { print "\n\n----------------------\n"; print "DUPLICATE " if ( @{$entries{$key}} > 1); print "key $key\n"; foreach my $entry ( @{$entries{$key}} ) { print "\n"; print " IDs " . join(", ", $entry->IDs->elements) . "\n" if($entry->IDs); print " DEs " . join(", ", map { $_->text } $entry->DEs->elements) . "\n" if($entry->DEs); print " GNs " . join(", ", map { $_->text } map { ($_->Name, $_->Synonyms) }$entry->GNs->elements) . "\n" if($entry->GNs); } }