# This constructs a small sample data set for what I am describing. # This will be an HoHoA with 1 upper Key/value pair, 4 lower # key/value pairs, and arrays of under 10 elements. In reality, # it would be 22,000 upper k/v pairs, 5ish lower k/v pairs, and # up to hundreds of elements in each array. # span is the variable used to determine the distance between # $upperlimit and $lowerlimit. $num is the number of different # patterns searched for. Both of these are from $span = 50; $num = 4; # These hashes are generated by storing m// matches for regex # using a different sub. keys such as '>scaffold_1' are fasta_ids # from text sequences in the file. keys 0,1,2,3 represent the # patterns searched for (these are the values of the @SortedKeys # array). Values (arrays) are the positions of the matches to the # patterns in the file. %{$matches{>scaffold_1}} = ( 0 => [0,15,100,120,250], 1 => [25,76,149,211], 2 => [20,80,115,172,225], 3 => [40,90,125,180,220,240], ); # in each case, $lowerlimit would be set to the first element of # array {0}, followed by its second element, all the way through, # then starting over again at {1}, etc. $upperlimit is always # $lowerlimit + $span. # So, the desired output of the sub would be along these lines: %{$sets{>scaffold_1}[0]} = ( 0 => (0,15) 1 => (25) 2 => (20) 3 => (40) ); %{$sets{>scaffold_1}[1]} = ( 0 => (15) 1 => (25) 2 => (20) 3 => (40) ); %{$sets{>scaffold_1}[2]} = ( 0 => (100,120) 1 => (149) 2 => (115) 3 => (125) ); %{$sets{>scaffold_1}[3]} = ( 0 => (100,120) 1 => (76) 2 => (80,115) 3 => (90,125) ); %{$sets{>scaffold_1}[4]} = ( 0 => (250) 1 => (211) 2 => (225) 3 => (220,240) ); %{$sets{>scaffold_1}[5]} = ( 0 => (120) 1 => (149) 2 => (115) 3 => (125) );