my (%data); while () { chomp; # $_ contains '100-233,MA,150:250', for example my @initialparts = split( /,/, $_ ); # @initialparts now contains ( '100-233', 'MA', '150:250' ) my @leadparts = split( /-/, $initialparts[0]); # @leadparts now contains ( '100', '233' ) push( @{$data{$leadparts[0]}{$initialparts[1]}{'items'}, $leadparts[1] ); # @{$data{'100'}{'MA'}{'items'} = ( '233' ) if (scalar(@initialparts) > 2) { # Do only if there was a last term my @tailparts = split( /:/, $initialparts[2] ); # @tailparts now contains ( '150', '250' ) push( @{$data{$leadparts[0]}{$initialparts[1]}{'data'}, @tailparts ); } } open(OUTF, $0 . '.out') or die("Can't open $0.out for output: $!\n"); foreach my $f1 (sort(keys(%data))) { # Personally, I prefer data sorted, # although you could leave out the sorts above, and following print $f1, "\n"; foreach my $f2 (sort(keys(%{$data{$f1}}))) { print $f2, "\n"; my $v4 = '|' if (defined($data{$f1}{$f2}{'data'} { $v4 .= join( ':', sort(@{$data{$f1}{$f2}{'data'}) ); } foreach my $v3 (sort(@{$data{$f1}{$f2}{'items'})) { print $v3, $v4, "\n"; } } } __DATA__ 100-233,MA,150:250 100-344,MA,350: 200-400,ER, 200-300,ER,576 100-250,MA,150 75-300,MA,350