#!usr/bin/perl use strict; use warnings; use Data::Dumper; my %HoH; my @AoH; my $substring = 'ATG'; sub get_first_index { my %hash; my ($found, $string) = @_; my @tags = ('TAG', 'TAA', 'TGA'); my @indexes; foreach my $tag (@tags) { my $position = index($string, $tag, $found + 2); push @indexes, $position if ($position != -1); } # hash slice @hash{@indexes} = @tags; # sort has based on the lowest key first my @sorted = (sort {$a <=> $b} keys %hash); # remove the rest of the keys as we only want first occurence my $array_size = @sorted; delete $hash{$_} for @sorted [1..$array_size - 1]; return \%hash; } while (<>) { chomp; next if $. < 2; # Skip first line my $found = index($_, $substring); while ($found != -1) { my $hash_result = get_first_index( $found, $_ ); # choose one or the other what ever you prefer $HoH{"Found $substring at $found"} = $hash_result if (%$hash_result); push @AoH, "Found $substring at $found" ,$hash_result if (%$hash_result); my $offset = $found + 1; $found = index( $_, $substring, $offset ); } } continue { close ARGV if eof; # reset $. } my @keys = keys %HoH; print scalar @keys . "\n"; # print Dumper \@AoH; # print Dumper \%HoH; __END__ $ perl bio.pl sequence.fa 23 #### $ cat sequence.fa | grep -bo ATG 16:ATG 50:ATG 133:ATG 232:ATG 252:ATG 287:ATG 305:ATG 363:ATG 394:ATG 489:ATG 575:ATG 651:ATG 689:ATG 724:ATG 854:ATG 859:ATG 954:ATG 1014:ATG 1044:ATG 1051:ATG 1145:ATG 1228:ATG 1249:ATG 1272:ATG tinyos@tinyOMN:~/Monks$ cat sequence.fa | grep -bo ATG | wc -l 24 #### #!usr/bin/perl use say; use strict; use warnings; use Data::Dumper; my %HoH; my @AoH; my $substring = 'ATG'; sub get_first_index { my %hash; my ($found, $string) = @_; my @tags = ('TAG', 'TAA', 'TGA'); my @indexes; while (my $tag = shift @tags) { my $position = index($string, $tag, $found + 2); push @indexes, $position if ($position != -1); } # hash slice, we destroy the array above so we need to replace it @hash{@indexes} = ('TAG', 'TAA', 'TGA'); # sort has based on the lowest key first my @sorted = (sort {$a <=> $b} keys %hash); # remove the rest of the keys as we only want first occurence my $array_size = @sorted; delete $hash{$_} for @sorted [1..$array_size - 1]; return \%hash; } while (<>) { chomp; next if $. < 2; # Skip first line my $found = index($_, $substring); while ($found != -1) { my $hash_result = get_first_index( $found, $_ ); # choose one or the other what ever you prefer $HoH{"Found $substring at $found"} = $hash_result if (%$hash_result); push @AoH, "Found $substring at $found" ,$hash_result if (%$hash_result); my $offset = $found + 1; $found = index( $_, $substring, $offset ); } } continue { close ARGV if eof; # reset $. } my @keys = keys %HoH; print scalar @keys . "\n"; # print Dumper \@AoH; # print Dumper \%HoH;