#! perl -slw use strict; use Tie::File; use vars qw[$NEWRUN]; use constant OUTPUT_FILE => './output.dat'; # Empty the output file if this is a new run. (-NEWRUN on the command line) # If this switch isn't present, then new data will be accumulated onto the existing. unlink OUTPUT_FILE if $NEWRUN; tie my @accumulator, 'Tie::File', OUTPUT_FILE, memory => 20_000_000; # Adjust as required. See Tie::File pod for other options. my %hash; # This line preloads the ordering info into the hash if this isn't a new run unless($NEWRUN) { $hash{ (split/\t/, $accumulator[$_])[0] } = $_ for 0 .. $#accumulator ; } while ( ) { # switching this to <> would allow a list of files to be supplied chomp; my @bits = split /\t/; unless (exists $hash{$bits[0]}) { # unless we saw this type already push @accumulator, $bits[0] . "\t"; # Add it to the end of the array (file) $hash{$bits[0]} = $#accumulator; # And remember where in the hash } #append the new but to the appropriate array element (file record). $accumulator[ $hash{ $bits[0] } ] .= ' ' . $bits[1]; } untie @accumulator; __DATA__ text1 text-a text2 text-b text3 text-c text1 text-d text3 text-e text3 text-f