use strict; use warnings; use DBI; use constant {DEBUG_SQL => 1}; my $scratchDBname = "ScratchDB.sqlite"; my %attr = ( RaiseError => 1); # auto die with error printout my $dbh = DBI->connect("dbi:SQLite:dbname=$scratchDBname","","",\%attr) or die "Couldn't connect to database $scratchDBname: " . DBI->errstr; my $headerLine = ; chomp $headerLine; my @headers = split (/\|/,$headerLine); # create main Table # $dbh->do("DROP TABLE IF EXISTS main"); my $createTableSQL = "CREATE TABLE main (". join (",",map{"\"$_\" TEXT NOT NULL"}@headers). ")"; print "SQL debug Statement: $createTableSQL\n" if DEBUG_SQL; $dbh->do($createTableSQL); # prepare insert row statement # my $valuesField = "?," x @headers; chop $valuesField; # get rid of last comma my $insertRowSQL = "INSERT INTO main (". join(",",@headers).")". "VALUES (".$valuesField.")"; print "SQL debug statement: $insertRowSQL\n" if DEBUG_SQL; my $insert = $dbh->prepare($insertRowSQL); # populate main table by reading the 1/2 GB file # $dbh->begin_work; while (defined (my $line =)) { chomp $line; next if $line =~ /^\s*$/; #ignore blank lines ***ADDED*** my @fields = split (/\|/,$line); $insert->execute(@fields); } $dbh->commit; # prepare SQL prototypes # # A Column can be a variable but requires more SQL-foo than # I have at the moment... # But making a "prototype" and adjusting that and preparing # on a per column basis appears to be no big deal my $distinct_proto = "SELECT __COLUMN from main GROUP BY __COLUMN ORDER BY __COLUMN"; my $unique_proto = "SELECT __COLUMN FROM ( SELECT __COLUMN, count(*) as histo FROM main GROUP BY __COLUMN ORDER BY __COLUMN) WHERE histo = 1"; foreach my $header (@headers) { my $distinct_select = $distinct_proto; $distinct_select =~ s/__COLUMN/$header/g; my $distinct = $dbh->prepare($distinct_select); my $unique_select = $unique_proto; $unique_select =~ s/__COLUMN/$header/g; my $unique = $dbh->prepare($unique_select); print "\n*****\n"; print "Distinct values for $header:\n"; $distinct->execute(); my $distinct_array_ref = $distinct -> fetchall_arrayref(); print "@$_\n" foreach @$distinct_array_ref; print "\nUnique values for $header:\n"; $unique->execute(); my $unique_array_ref = $unique -> fetchall_arrayref(); print "@$_\n" foreach @$unique_array_ref; } =Prints: SQL debug Statement: CREATE TABLE main ("head1" TEXT NOT NULL,"head2" TEXT NOT NULL,"head3" TEXT NOT NULL) SQL debug statement: INSERT INTO main (head1,head2,head3)VALUES (?,?,?) ***** Distinct values for head1: val1 val2 val3 val6 Unique values for head1: val2 val3 val6 ***** Distinct values for head2: val2 val4 val7 Unique values for head2: val2 ***** Distinct values for head3: val3 val5 Unique values for head3: =cut __DATA__ head1|head2|head3 val1|val2|val3 val1|val4|val5 val6|val4|val5 val2|val7|val5 val3|val7|val3