#!/usr/bin/perl -w use strict; my $NEWLINE_SIZE = length "\n"; # The size of the newline "character" in this OS my $OS_ADJUST = 1; # A way to somewhat generically do OS-specific offset computation my $KEY_OFFSET = 'O'; # Optimized key name for offset value my $KEY_LENGTH = 'L'; # Optimized key name for length value my $SEEK_SET = 0; # In case you don't want to export the constant for seek() my $Inpfnm = 'test2.dat'; my $Wrkfnm = $Inpfnm . '-presort.dat'; my $Srtfnm = $Inpfnm . '-sorted.dat'; my $Outfnm = $Inpfnm . '-output.dat'; { &convertKeysAndOffsets(); } exit; sub convertKeysAndOffsets { my $inputOffset = 0; open INPUT_FILE, "<$Inpfnm"; open PRESORT_FILE, ">$Wrkfnm"; while (my $inputBuffer = ) { chomp $inputBuffer; # Only capture records which match the structure if ($inputBuffer =~ /^\s*key(\d+)\s+key(\d+)\s+/) { # Capture the keys and record size my $primaryKey = $1; my $secondaryKey = $2; my $inputLength = length $inputBuffer; # Optimize the keys my $optimizedKey = sprintf "%02d%02d", $primaryKey, $secondaryKey; my $sortBuffer = "$optimizedKey\|$inputOffset\|$inputLength"; print PRESORT_FILE "$sortBuffer\n"; # Adjust the offset for read just committed. ########################################################################################### ### WARNING ### Test on small file to ensure you are getting the right results on your OS # ########################################################################################### $inputOffset += $inputLength; $inputOffset += $NEWLINE_SIZE; $inputOffset += $OS_ADJUST; } } close PRESORT_FILE; close INPUT_FILE; } __END__