my (%data); while () { chomp; # $_ contains '100-233,MA,150:250', for example my @initialparts = split( /,/, $_ ); # @initialparts now contains ( '100-233', 'MA', '150:250' ) my @leadparts = split( /-/, $initialparts[0]); # @leadparts now contains ( '100', '233' ) push( @{$data{$leadparts[0]}{$initialparts[1]}{'items'}, $leadparts[1] ); # @{$data{'100'}{'MA'}{'items'} = ( '233' ) if (scalar(@initialparts) > 2) { # Do only if there was a last term my @tailparts = split( /:/, $initialparts[2] ); # @tailparts now contains ( '150', '250' ) push( @{$data{$leadparts[0]}{$initialparts[1]}{'data'}, @tailparts ); } } open(OUTF, $0 . '.out') or die("Can't open $0.out for output: $!\n"); foreach my $f1 (sort(keys(%data))) { # Personally, I prefer data sorted, # although you could leave out the sorts above, and following print $f1, "\n"; foreach my $f2 (sort(keys(%{$data{$f1}}))) { print $f2, "\n"; my $v4 = '|' if (defined($data{$f1}{$f2}{'data'} { $v4 .= join( ':', sort(@{$data{$f1}{$f2}{'data'}) ); } foreach my $v3 (sort(@{$data{$f1}{$f2}{'items'})) { print $v3, $v4, "\n"; } } } __DATA__ 100-233,MA,150:250 100-344,MA,350: 200-400,ER, 200-300,ER,576 100-250,MA,150 75-300,MA,350 #### # # Variables, throughout run # # Read line 1 $_ = '100-233',MA,150:250 @initialparts = ( '100-233', 'MA', '150:250' ) @leadparts = ( '100', '233' ) %data = { '100' => { 'MA' => { 'items' => ( '233' ) }}} @tailparts = ( '150', '250' ) $data = { '100' => { 'MA' => { 'items' => ( '233' ), 'data' => ( '150', '250' ) }}} # Read line 2 $_ = '100-344,MA,350:' @initialparts = ( '100-344', 'MA', '350:' ) @leadparts = ( '100', '344' ) %data = { '100' => { 'MA' => { 'items' => ( '233', '344' ), 'data' => ( '150', '250' ) }}} @tailparts = ( '350' ) %data = { '100' => { 'MA' => { 'items' => ( '233', '344' ), 'data' => ( '150', '250', '350' ) }}} # Read line 3 $_ = '200-400,ER,' @initialparts = ( '200-400', 'ER' ) @leadparts = ( '200', '400' ) %data = { '100' => { 'MA' => { 'items' => ( '233', '344' ), 'data' => ( '150', '250', '350' ) }}, '200' => { 'ER' => { 'items' => ( '400' ) }}} # Read line 4 $_ = '200-300,ER,576' @initialparts = ( '200-300', 'ER', '576' ) @leadparts = ( '200', '300' ) %data = { '100' => { 'MA' => { 'items' => ( '233', '344' ), 'data' => ( '150', '250', '350' ) }}, '200' => { 'ER' => { 'items' => ( '400', '300' ) }}} @tailparts = ( '576' ) %data = { '100' => { 'MA' => { 'items' => ( '233', '344' ), 'data' => ( '150', '250', '350' ) }}, '200' => { 'ER' => { 'items' => ( '400', '300' ), 'data' => ( '576' ) }}} # Read line 5 $_ = '100-250,MA,150' @initialparts = ( '100-250', 'MA', '150' ) @leadparts = ( '100', '250' ) %data = { '100' => { 'MA' => { 'items' => ( '233', '344', '250' ), 'data' => ( '150', '250', '350' ) }}, '200' => { 'ER' => { 'items' => ( '400', '300' ), 'data' => ( '576' ) }}} @tailparts = ( '150' ) %data = { '100' => { 'MA' => { 'items' => ( '233', '344' ), 'data' => ( '150', '250', '350', '150' ) }}, '200' => { 'ER' => { 'items' => ( '400', '300' ), 'data' => ( '576' ) }}} # Read line 6 $_ = '75-300,MA,350' @initialparts = ( '75-300', 'MA', '350' ) @leadparts = ( '75', '300' ) %data = { '75' => { 'MA' => { 'items' => ( '300' ) } }, '100' => { 'MA' => { 'items' => ( '233', '344' ), 'data' => ( '150', '250', '350', '150' ) }}, '200' => { 'ER' => { 'items' => ( '400', '300' ), 'data' => ( '576' ) }}} @tailparts = ( '350' ) %data = { '75' => { 'MA' => { 'items' => ( '300' ), 'data' => ( '350' ) }}, '100' => { 'MA' => { 'items' => ( '233', '344' ), 'data' => ( '150', '250', '350', '150' ) }}, '200' => { 'ER' => { 'items' => ( '400', '300' ), 'data' => ( '576' ) }}} # In loop %data = { '75' => { 'MA' => { 'items' => ( '300' ), 'data' => ( '350' ) }}, '100' => { 'MA' => { 'items' => ( '233', '344' ), 'data' => ( '150', '250', '350', '150' ) }}, '200' => { 'ER' => { 'items' => ( '400', '300' ), 'data' => ( '576' ) }}} $f1 = '75' $f2 = 'MA' $v4 = '|350' $v3 = '300' $f1 = '100' $f2 = 'MA' $v4 = '|150:150:250:350' $v3 = '233' $v3 = '344' $f1 = '200' $f2 = 'ER' $v4 = '|576' $v3 = '300' $v3 = '400' # # Output, from run # 75 MA 300|350 100 MA 233|150:150:250:350 344|150:250:350 200 ER 300|576 400|576 #### if (defined($data{$f1}{$f2}{'data'} { my (%uniq); foreach (@{$data{$f1}{$f2}{'data'}) { $uniq{$_}++; } $v4 .= join( ':', sort(keys(%uniq)) ); }