-"Wouldn't it be cool to be able to do SQL on this?"
- "Yeah, so we could count the lines, group by the pid or something, to find the procecces that just dies right away"
But you can't query text files, so that was out.
Then I went to the Nordic Perl Workshop with this thought in the back of my mind. And there were people hacking everywhere, and my laptop was at home. So I sketched the CLI interface and some other ideas, waiting for a chance to start coding.
I'm sure something like this has been done in an ad-hoc way before, there is even a mention of it in the DBD::SQLite man page, but it only took me two hours and was fun to do.
My colleague thought it was way cool & useful.
Ideas and comments welcome!
Read lines from STDIN and populate the table specified by - t. The table is automatically re-created with the columns c1, c2, c3, etc. matching the result of the m// or split(). If -c0 is specified, the column c0 contains the entire line.Syntax: $program [-m|-s REGEXP [-t TABLE] [-c0 1] ] [SQL QUERY] [-q] +[-h] -m /REGEXP/ Match lines and store the captured values for matching li +nes. -s /REGEXP/ Split lines and store the split values. Use either -m or +-s. -c0 Default. Store the entire line in column "c0". -t TABLE Default "t". -q Quiet. Nothing on STDERR. -h Display even more help. SQL QUERY Execute the query and output the matching line values.
texql.pl "--m=/^(\([^:]+):(\\w+)/" < debuglog.txt cat some_random_file | texql.pl -s /:/ select c0 from t where c1 = 0 texql.pl --t=another_table -m /^(\\w+)+/ < all_names.txt texql.pl 'select distinct(c1) from another_table' > distinct_name.txt
#!/use/local/bin/perl -w $|++; use strict; use Getopt::Long; use DBI; our $PROGRAM = "TexQL"; our $VERSION = "0.0.1"; our $DATE = "2004-03-28"; our $AUTHOR = "johanl\@DarSerMan.com"; my $program = lc($PROGRAM); my $syntax = qq {$PROGRAM - $VERSION - $DATE - $AUTHOR Load text into a table and query it using SQL. Syntax: $program [-m|-s REGEXP [-t TABLE] [-c0 1] ] [SQL QUERY] [-q] +[-h] -m /REGEXP/ Match lines and store the captured values for matching li +nes. -s /REGEXP/ Split lines and store the split values. Use either -m or +-s. -c0 Default. Store the entire line in column "c0". -t TABLE Default "t". -q Quiet. Nothing on STDERR. -h Display even more help. SQL QUERY Execute the query and output the matching line values. Read lines from STDIN and populate the table specified by - t. The table is automatically re-created with the columns c1, c2, c3, etc. matching the result of the m// or split(). If -c0 is specified, the column c0 contains the entire line. }; my $helpText = qq { = Examples = texql.pl "--m=/^(\([^:]+):(\\w+)/" < debuglog.txt cat some_random_file | texql.pl -s /:/ select c0 from t where c1 = 0 texql.pl --t=another_table -m /^(\\w+)+/ < all_names.txt texql.pl 'select distinct(c1) from another_table' > distinct_name.txt = Matching = How many columns are stored in the table? The first line stored determines the number of columns. = Database location = The text is stored in an SQLite database file (the location is determined by the enviroment variable TEXQL_PATH or USERPROFILE or HOME or TEMP or "." ). }; main(); sub main { my ($sMatch, $rexMatch) = ("", undef); my ($sSplit, $rexSplit) = ("", undef); my $table = "t"; my $c0 = 1; my $quiet = 0; my $help = 0; my $commitEvery = 1000; GetOptions( "m:s" => \$sMatch, "s:s" => \$sSplit, "c0" => \$c0, "t:s" => \$table, "q" => \$quiet, "h" => \$help, ); $help and die("$syntax$helpText\n"); $table and $table =~ s/\W//gs; my $firstColNumber = $c0 ? 0 : 1; $sMatch and $sSplit and die("$syntax\nUse EITHER -m or -s\n"); my $sqlQuery = join(" ", @ARGV); ($sMatch || $sSplit || $sqlQuery) or die($syntax); close(STDERR) if($quiet); #Establish database environment my $fileDb = ($ENV{uc($PROGRAM) . "_PATH"} || $ENV{USERPROFILE} || + $ENV{HOME} || $ENV{TEMP} || ".") . "/." . lc($PROGRAM) . ".dat"; my $connect = "dbi:SQLite:dbname=$fileDb"; my $oDbh = DBI->connect($connect, "", "") or die("Could not connec +t to database ($connect)\n"); $oDbh->{RaiseError} = 1; $oDbh->{AutoCommit} = 0; if($sMatch || $sSplit) { if($sMatch) { $rexMatch = eval "qr$sMatch"; die("Invalid match rex ($sMatch) ($@)\n") if($@); } else { $rexSplit = eval "qr$sSplit"; die("Invalid split rex ($sSplit) ($@)\n") if($@); } #Parse STDIN my ($isTableCreated, $lineMatchCount, $lineCount) = (0, 0, 0); my $valueCount = undef; my $oSth = undef; while(my $line = <STDIN>) { chomp($line); $lineCount++; my @aVal; if($rexMatch) { @aVal = ($line =~ $rexMatch) or next; } else { no warnings; @aVal = split($rexSplit, $line, $valueCount); } $lineMatchCount++; $valueCount = @aVal if(!defined($valueCount)); @aVal = @aVal[0 .. $valueCount - 1]; $isTableCreated ||= createTable($oDbh, $table, $valueCount +, $firstColNumber) or die("Could not create table\n"); #Insert if(!$oSth) { my $placeholders = join(", ", map { "?" } $firstColNum +ber .. $valueCount); $oSth = $oDbh->prepare("INSERT INTO $table VALUES ( $p +laceholders )") or die("Could not prepare insert\n"); } unshift(@aVal, $line) if($c0); $oSth->execute(@aVal) or die("Insert failed\n"); if($lineMatchCount % $commitEvery == 0) { $oDbh->commit() or die("Could not commit\n"); } # print join("|", @aVal) . "\n"; } $oDbh->commit() or die("Could not commit\n"); print STDERR sprintf("%3.1f%% $lineMatchCount/$lineCount lines +\n", ($lineMatchCount / ($lineCount || 1) * 100)) if($rexMatch); print STDERR "$lineCount lines\n" if($rexSplit); } $sqlQuery and runQuery($oDbh, $sqlQuery); #or die $oDbh->commit() or die("Could not commit\n"); $oDbh->disconnect() or die("Could not disconnect from database\n") +; return(1); } =head2 createTable($oDbh, $table, $colCount, $firstColNumber) Set up the basic database contents. $firstColNumber: 0|1 Return 1 on success, else errors. =cut sub createTable { my ($oDbh, $table, $colCount, $firstColNumber) = @_; local $oDbh->{RaiseError} = 0; local $oDbh->{PrintError} = 0; $oDbh->do(qq{DROP TABLE $table}); my $cols = join(", ", map { "C$_ VARCHAR" } $firstColNumber .. $co +lCount); my $sql = qq{CREATE TABLE $table ( $cols ) }; $oDbh->do($sql) or return(0); return(1); } =head2 runQuery($oDbh, $sql) Run the query $sql and print the result. Return 1. Die on fatal errors. =cut sub runQuery { my ($oDbh, $sql) = @_; local $oDbh->{RaiseError} = 1; local $oDbh->{PrintError} = 0; eval { my $oSth = $oDbh->prepare($sql); $oSth->execute(); my $count = 0; while(my $raVal = $oSth->fetchrow_arrayref()) { $count++; print join("\t", map { defined($_) ? $_ : "" } @$raVal) . +"\n"; } print STDERR "Row count: $count\n"; }; die("$sql\nERROR: " . ($oDbh->errstr || ""). "\n") if($@); return(1); } __END__
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
•Re: TexQL - SQL queries on text files
by merlyn (Sage) on Mar 29, 2004 at 13:09 UTC | |
by jplindstrom (Monsignor) on Mar 29, 2004 at 17:52 UTC | |
by jZed (Prior) on Mar 29, 2004 at 19:43 UTC | |
by eserte (Deacon) on Mar 29, 2004 at 14:01 UTC |