#!/bin/perl #use strict; #step 0 : setup some general variables ###Hardcoded names of files -- read in from command line later?? my $InputFeed = "OPMS_List.txt"; my $OutputFeed = "DB.txt"; #step 1 : convert input feed to nicer data ### Requires the InputFeed file to contain the fields labelled below ### Hardcodes the OutputFeed file format print "Opening Input ($InputFeed) and output ($OutputFeed)\n"; open (INPUT_FEED, $InputFeed) || die "Cannot open Input Feed ($!)\n"; #Some storage variables my $first = 1; my @fields; my %invalid; my @tmp; my %PostCodeStrings; my $counter = 0; my $max = 1448996; my $temp; print "Converting field ordering...\n"; while () { if($counter % 1000 == 0) { $temp = $counter/$max*100; print "Have processed $counter lines\t"; print "$temp\%done\n"; } chomp; undef @fields; @fields = split($_,/\|/); if($first == 1) { $first = 0; $counter++; open(INPUT, "invalid") || die "Cannot open invalid input (invalid) for reading ($!)\n"; while() { next if($_ =~ m/^#/); my @curr_invalid = split($_,/\t/); $invalid{$curr_invalid[1]} = $invalid{$curr_invalid[1]} . "\t" . $curr_invalid[0]; undef @curr_invalid; } close INPUT; next; } foreach(keys(%invalid)) { undef @tmp; @tmp = split($invalid{$_},/\t/); while(@tmp) { next if($_ =~ m/$fields[$_]/); } } $fields[4] = expand_state($fields[4]); $PostCodeStrings{$fields[5]} = $PostCodeStrings{$fields[5]} . $fields[4] . "|" . $fields[5] . "|" . $fields[3] . "|" . $fields[2] . "|" . $fields[1] . "|" . $fields[0] . "\n"; $counter++; } close INPUT_FEED; print "Done Converting field ordering...\n"; print "Writing new field ordering...\n"; open (OUTPUT_FEED, "> $OutputFeed") || die "Cannot open Output Feed ($!)\n"; foreach (sort(keys(%PostCodeStrings))) { print OUTPUT_FEED $PostCodeStrings{$_}; } close OUTPUT_FEED; sub expand_state { my $state = pop(@_); if($state =~ m/NSW/) { $state = "New South Wales"; } elsif($state =~ m/VIC/) { $state = "Victoria"; } elsif($state =~ m/QLD/) { $state = "Queensland"; } return $state; }