#This part was revised by you but this is the old version of mine my %file1=(); while(){ chomp; (my $id, my $number) = split("\t", $_); if ($id=~ m/^(CLS_S3_Contig[0-9]+)([-]?)([0-9]+)([_]?)([0-9]+)$/i) { my $matched_id=$id; # breaks the CLS_Contig1000_200-202 to its componenents for (my $i=$3-8;$i<$5+8;$i++){ print join ("\t", $1, $i), "\n"; push (@{$file1{$1}}, $i); } } } close(INPUT1); ################################# THIS IS YOUR MODIFIED YOUR VERSION my %file2 =(); my @true_positives = (); while(){ chomp; my @current_line = split /\t/; if (exists $file1{$current_line[1]} ) { my $key = $current_line[1]; foreach my $position1 (@{$file1{$key}}){ if ( $current_line[0] eq $key && $current_line[1] == $position1 && $current_line[2] == 1 && $current_line[3] >= 3) { print join ("\t", @current_line[0..3], "***", $key, $position1), "\n"; push (@true_positives, $current_line[1]); # I made this up to count the number of true positives but it does not consider duplicates push (@{$file2{$current_line[0]}}, $current_line[2]); }#end inner if }#end foreach } #end if } #end while ############################################# IDENTIFY COMMON ELEMENTS ############################################# my @common =(); my $common_element =""; foreach (keys %file1) { push(@common, $_) if exists $file2{$_}; } ############################################# IDENTIFY NOT COMMON ELEMENTS ############################################# my @not_common =(); foreach (keys %file1){ push(@not_common, $_) unless exists $file2{$_}; } ############ making calculations########################## my $found_true_markers =""; my $found_false_positives = ""; $found_true_markers = scalar @true_positives; $found_false_positives = $comnon_element_numbers - $found_true_markers ; my $truepositive = sprintf ("%.2f", $found_true_markers/$comnon_element_numbers*100); my $false_positive_rate = sprintf ("%.2f", $found_false_positives/$comnon_element_numbers*100); print "$truepositive \% is the rate of true positives\n"; print "$false_positive_rate \% is the rate of fales positives\n"; #################################################### I AM STUCK AT THIS POINT - SOMETIMES IT CALCULATES NEGATIVE RATES OR MORE THAN 100%