#!/usr/bin/perl -w use strict; # Set to filename of CSV file my $csvfile = ''; # Set to filename of new file(file without duplicates) my $newfile = ''; # Set to 1 if first line of CSV file contains field names, 0 otherwise my $fieldnames = 1; ### Shouldn't need to change stuff below here ### open (IN, "<$csvfile") or die "Couldn't open input CSV file: $!"; open (OUT, ">$newfile") or die "Couldn't open output file: $!"; # Read header lines if they exist my $header; $header = if $fieldnames; # Slurp in & sort everything else my @data = sort ; # If we read in a header line, throw it back out again print OUT $header; my $n = 0; # Now go through the data line by line, writing it to output unless # to the previous line (in which case it's a dupe) my $lastline = ''; foreach my $currentline (@data) { next if $currentline eq $lastline; print OUT $currentline; $lastline = $currentline; $n++; } close IN; close OUT; print "Processing complete. In = " . scalar @data . " records, Out = $n records\n";