#!/usr/bin/perl -w
# This script expects two file names for input and output.
# it reads in the whole input file, separates lines based on \n character,
# gets rid of duplicate lines, sorts the remaining lines,
# and prints them to a file.
use strict;
use Getopt::Std;
use vars qw( $content @list %seen @uniqu @sorted $opt_i $opt_o );

# get the in/out file names and do some error checks
getopts('i:o:') or die "Usage : $0 -i infile -o outfile\n";
print "reading \"$opt_i\" and writing to \"$opt_o\"\n";
(-e "$opt_i")  or die "Usage : $0 -i infile -o outfile\n";
($opt_o)  or die "Usage : $0 -i infile -o outfile\n";
if (-e "$opt_o") { die "$0 : will not write to existing file \"$opt_o\"\n"; }

# read in the input file
undef $/;
open(FIN,"$opt_i") or die "$0 : cannot read input file \"$opt_i\"\n";
binmode(FIN);
$content = <FIN>;
close(FIN);

@list = split(/\n/,$content);
print "read ", scalar(@list), " lines in from \"$opt_i\", ";

# get rid of the duplicate lines
%seen = ();
@uniqu = grep { ! $seen{$_} ++ } @list;
# sort with "cmp" is alphabetic, with "<=>" is numeric
@sorted = sort { $a cmp $b } @uniqu;

# print the sorted, unique lines to a file
print "writing ", scalar(@sorted), " lines out to \"$opt_o\"\n";
open(FOUT,">$opt_o") or die "$0 : cannot write output file \"$opt_o\"\n";
binmode(FOUT);
print FOUT join("\n", @sorted);
close(FOUT);

exit;