#!/usr/bin/perl -w use strict; use Benchmark; use DBI; use Text::CSV; $| = 1; # Setup Data sources # Mysql setup my $dsn = 'dbi:mysql:test'; # The database to connect to my $user = 'test'; # The username to use my $password = ''; # The password for that user my $dbh = DBI->connect($dsn, $user, $password) || die $DBI::errstr; my $table = 'bm'; # Change this at will # END Mysql setup # Now CSV setup my $csv = Text::CSV->new(); my $csv_file = './bm.csv'; # END CSV setup # Now flatfile setup # (for this example pipe '|' delimited my $flatfile = './bm.flat'; # END flatfile setup my $number_of_rows = 200; # Number of rows to deal with in each timed iteration my @rows = (); for (1..$number_of_rows) { my $num = sprintf("%05d", $_); push(@rows, { ID => $_, name => "person_$_", address => "$_ Any St.", city => "Anytown", state => "AnyPlace", zip => $num }); } # Time our insert methods, and incidentally create our datasets # for the other tests, funny how that works :) timethese(500, { Mysql_Insert => \&insert_mysql, CSV_Insert => \&insert_csv, Flatfile_Insert => \&insert_flatfile}); print "=" x 40; # setup an array of $number_of_rows (from above) elements containing digits 1-200 # and mix it up for "random" access, we only do this once # because we want the same "random" access for all access # methods tested. my @random; for(1..$number_of_rows) { push(@random, $_); } shuffle(\@random); # Time random selects on our various datasets timethese(500, { Mysql_Select => \&select_mysql, CSV_Select => \&select_csv, Flatfile_Select => \&select_flatfile}); print "=" x 40; # Now using the same random set do "updates" timethese(500, { Mysql_Update => \&update_mysql, CSV_Update => \&update_csv, Flatfile_Update => \&update_flatfile}); print "=" x 40; # Now using the same random set do "deletes" print "We can only do 1 iteration of the delete because it necessarily destroys our datasource.\n"; timethese(1, { Mysql_Delete => \&delete_mysql, CSV_Delete => \&delete_csv, Flatfile_Delete => \&delete_flatfile}); exit; # Some basic cleanup stuff END { unlink("$csv_file"); unlink("$flatfile"); $dbh->do(qq{delete from $table}); $dbh->disconnect; } sub delete_mysql { foreach my $ID (@random) { $dbh->do(qq{delete from $table where ID = $ID}); } } sub delete_csv { foreach my $ID (@random) { my $new_num = $ID * 2; open(CSV_IN, "$csv_file") || die "Unable to open '$csv_file': $!"; open(CSV_OUT, ">$csv_file.new") || die "Unable to open '$csv_file.new': $!"; while() { chomp; $csv->parse($_); my @fields = $csv->fields; if($fields[0] eq $ID) { # If we have our ID line skip it next; } print CSV_OUT $_, "\n"; } close(CSV_OUT); close(CSV_IN); rename("$csv_file.new", "$csv_file") || die "Uh oh, problem renaming '$csv_file.new' to '$csv_file': $!"; } } sub delete_flatfile { local $" = '|'; #" <- used to fix syntax highlighting on my editor foreach my $ID (@random) { my $new_num = $ID * 2; open(FF_IN, "$flatfile") || die "Unable to open '$flatfile': $!"; open(FF_OUT, ">$flatfile.new") || die "Unable to open '$flatfile.new': $!"; while() { chomp; if(/^$ID\|/) { # Found our dataline whee! now skip it next; } print FF_OUT $_, "\n"; } close(FF_OUT); close(FF_IN); rename("$flatfile.new", "$flatfile") || die "Uh oh, problem renaming '$csv_file.new' to '$csv_file': $!"; } } sub update_mysql { foreach my $ID (@random) { my $new_num = $ID * 2; my ($name, $address) = ($dbh->quote("Person_$new_num"), $dbh->quote("$new_num Any St.")); $dbh->do(qq{update $table set name = $name, address = $address where ID = $ID}); } } sub update_csv { foreach my $ID (@random) { my $new_num = $ID * 2; open(CSV_IN, "$csv_file") || die "Unable to open '$csv_file': $!"; open(CSV_OUT, ">$csv_file.new") || die "Unable to open '$csv_file.new': $!"; while() { chomp; $csv->parse($_); my @fields = $csv->fields; if($fields[0] eq $ID) { # If we have our ID line alter it $fields[1] = "Person_$new_num"; # field 2 is our name field $fields[2] = "$new_num Any St."; # field 3 is our address field } $csv->combine(@fields); print CSV_OUT $csv->string, "\n"; } close(CSV_OUT); close(CSV_IN); rename("$csv_file.new", "$csv_file") || die "Uh oh, problem renaming '$csv_file.new' to '$csv_file': $!"; } } sub update_flatfile { local $" = '|'; #" <- used to fix syntax highlighting on my editor foreach my $ID (@random) { my $new_num = $ID * 2; open(FF_IN, "$flatfile") || die "Unable to open '$flatfile': $!"; open(FF_OUT, ">$flatfile.new") || die "Unable to open '$flatfile.new': $!"; while() { chomp; my $line = $_; if(/^$ID\|/) { # Found our dataline whee! my @fields = split(/\|/, $_); $fields[1] = "Person_$new_num"; # field 2 is our name field $fields[2] = "$new_num Any St."; # field 3 is our address field $line = "@fields"; } print FF_OUT $line, "\n"; } close(FF_OUT); close(FF_IN); rename("$flatfile.new", "$flatfile") || die "Uh oh, problem renaming '$csv_file.new' to '$csv_file': $!"; } } sub select_mysql { foreach(@random) { my %row_hash; # This is just someplace to put the data my $sth = $dbh->prepare(qq{select name, address, city, state, zip from $table where ID = ?}); $sth->execute($_); $sth->bind_columns(\$row_hash{name}, \$row_hash{address}, \$row_hash{city}, \$row_hash{state}, \$row_hash{zip}); $sth->fetch; $sth->finish; } } sub select_csv { foreach my $ID (@random) { open(CSV, "$csv_file") || die "Unable to open '$csv_file': $!"; while() { $csv->parse($_); my @fields = $csv->fields; if($fields[0] eq $ID) { close(CSV); last; # This would likely be a return(@fields) or some such } } close(CSV); # In case no match was found for some reason. } } sub select_flatfile { foreach my $ID (@random) { open(FF, "$flatfile") || die "Unable to open '$flatfile': $!"; while() { unless(/^$ID|/) { next; } else { my @fields = split(/\|/, $_); close(FF); last; # As in select_csv this would likely return(@fields) } } close(FF); } } sub shuffle { my $shuffled_data = shift; # Fisher-Yates shuffle from PCB recipe 4.17 my $i; for ($i = @$shuffled_data; --$i;) { my $j = int rand ($i+1); next if $i == $j; @$shuffled_data[$i,$j] = @$shuffled_data[$j,$i]; } } sub insert_mysql { # I almost didn't have this in before doing a LARGE test # that would have been bad for my diskspace $dbh->do(qq{delete from $table}); # If you have the diskspace for 10000 * $number_of_rows entries # feel free to comment the above out foreach(@rows) { my $sth = $dbh->prepare(qq{insert into $table (ID, name, address, city, state, zip) values (?, ?, ?, ?, ?, ?)}); $sth->execute($_->{ID}, $_->{name}, $_->{address}, $_->{city}, $_->{state}, $_->{zip}); $sth->finish; } } sub insert_csv { # Note: files opened in > mode again to prevent disk space issues open(CSV, ">$csv_file") || die "Unable to open '$csv_file': $!"; foreach(@rows) { $csv->combine($_->{ID}, $_->{name}, $_->{address}, $_->{city}, $_->{state}, $_->{zip}); print CSV $csv->string, "\n"; } close(CSV); } sub insert_flatfile { # Note: files opened in > mode again to prevent disk space issues open(FF, ">$flatfile") || die "Unable to open '$flatfile': $!"; foreach(@rows) { print FF join('|', ($_->{ID}, $_->{name}, $_->{address}, $_->{city}, $_->{state}, $_->{zip})), "\n"; } close(FF); }