#!/usr/bin/perl -w # This script expects two file names for input and output. # it reads in the whole input file, separates lines based on \n character, # gets rid of duplicate lines, sorts the remaining lines, # and prints them to a file. use strict; use Getopt::Std; use vars qw( $content @list %seen @uniqu @sorted $opt_i $opt_o ); # get the in/out file names and do some error checks getopts('i:o:') or die "Usage : $0 -i infile -o outfile\n"; print "reading \"$opt_i\" and writing to \"$opt_o\"\n"; (-e "$opt_i") or die "Usage : $0 -i infile -o outfile\n"; ($opt_o) or die "Usage : $0 -i infile -o outfile\n"; if (-e "$opt_o") { die "$0 : will not write to existing file \"$opt_o\"\n"; } # read in the input file undef $/; open(FIN,"$opt_i") or die "$0 : cannot read input file \"$opt_i\"\n"; binmode(FIN); $content = ; close(FIN); @list = split(/\n/,$content); print "read ", scalar(@list), " lines in from \"$opt_i\", "; # get rid of the duplicate lines %seen = (); @uniqu = grep { ! $seen{$_} ++ } @list; # sort with "cmp" is alphabetic, with "<=>" is numeric @sorted = sort { $a cmp $b } @uniqu; # print the sorted, unique lines to a file print "writing ", scalar(@sorted), " lines out to \"$opt_o\"\n"; open(FOUT,">$opt_o") or die "$0 : cannot write output file \"$opt_o\"\n"; binmode(FOUT); print FOUT join("\n", @sorted); close(FOUT); exit;