# always use stric and warnings (till the moment you know when is safe disabling them) use strict; use warnings; # we use an array to grab DATA. array preserves order, if order in the output is needed my @arr; # <> is something like an iterator: # $next_line = retrieve next line # for process all lines # we chomp all lines to remove \n at the end and then we push the @arr with the line chomp $_ and push @arr,$_ for ; # hashes provides uniqueness of keys, and we need uniqueness because... my %adj; # .. in the loop from 0 to the last index of @arr # (pay attention when using $#arr: @arr in scalar context return num of elements, # while $#arr is the last index of the array starting from 0 # so scalar @arr == $#arr + 1) # in the loop we process two value at time (sliding window?) checking if the # numerical part is adjacent to the next element's numerical part for (0..$#arr){ # exit condition go EVERYTIME at the beginning of loops # so we will exit the loop if is the last element (yet processed previously) last if $_ == $#arr; # grab the numerical part of interest # $1 is what inside the first matched () group. (capturing parentheses) my $cur_num = $1 if $arr[$_] =~/\d*[A-Z]_(\d+)$/; my $next_num = $1 if $arr[$_ + 1] =~/\d*[A-Z]_(\d+)$/; # if current is adjacent to next if ($cur_num == $next_num - 1){ # we populate the hash with nevermind values $adj{$arr[$_]} = undef; $adj{$arr[$_ + 1]} = undef; # if we had used $adj{$arr[$_]}++ (autoincrement) # you would notice the X_203 with value of 2 # because is inserted twice: as next_num while processing X_202 # and as current_num while X_204 } } # if the order of the data must be preserved we still have the array: # if the data was alphabetically ordered would be simpler (and the array unuseful) # simple as print "$_\n" for sor keys %adj foreach (@arr){ print "$_\n" if exists $adj{ $_ }; } __DATA__ 2L_33 2L_34 3L_45 3L_87 X_202 X_203 X_204