yes, i really do need to do something about this code one way or the other. it looks quite bad which means that i'm never going to want to maintain it.
here's what i have for input
the name of something
something else cool
the title given does not exactly match
and for output, i want something like this something,something very nice,the name of something,something else coo
+l
where the cells are:search word,column returned from db,line(s) of input the word came from a search word may return more than one line in sql and that's fine, i'd like my csv to have the same date data for col 1, 3, and 4 and have the different result. i don't need duplicate data however, which was the point of the loops in question.
below is my code. i've left in my original comments and added some:
#!/usr/bin/perl
##### WHAT #
# What names return per search
##########
use strict;
#use warnings;
use DBI;
my $searcher = "owner.manager, owner.owner, owner.manown";
my $dbh = DBI->connect('DBI:mysql:db;host=localhost',
'user', 'pass')
or die "Database connection: $!";
open( FILE, "< $ARGV[0]" );
my %uword = ();
my %seen = ();
my $count = 0;
my @data;
my $key;
my $i = 0;
while ( <FILE> ) {
my $line = $_;
chomp ($line);
my @word = split / /, $line;
$count = 0;
while ( $word[ $count ] ) {
$word[ $count ] =~ tr/^[\-a-zA-Z]//;
$word[ $count ] =~ s/\'/\\\'/g;
$count++;
}
# deduplicate each word, but if it is a duplicate,
# i still want to know what lines the word was on.
foreach my $string ( @word ) {
if ( $uword{ $string }[ 0 ] == 1 ) {
push @{ $uword{ $string } }, $line;
next;
}
$uword{ $string }[ 0 ] = 1;
push @{ $uword{ $string } }, $line;
}
}
# for every unique word, do a search...
for my $key ( keys %uword ) {
my ( $imo, $owner, $manown, $manager );
my $select = qq/SELECT $searcher /;
my $from = qq/FROM owner, spd /;
my $where = qq/WHERE MATCH( $searcher ) AGAINST('+
+$key' IN BOOLEAN MODE) /;
my $join = qq/AND owner.number = spd.number/;
my $query = $select . $from . $where . $join;
print "SQL: $query\n";
my $sth = $dbh->prepare( $query );
$sth->execute;
$sth->bind_columns( \$manager, \$owner, \$manown );
# since i don't know or care what field my matches came
# from, take the results and put them into an array.
while ( $sth->fetch ) {
if ( defined( $owner ) ) {
$data[$count] = $owner;
$count++;
}
if ( defined( $manown ) ) {
$data[$count] = $manown;
$count++;
}
if ( defined( $manager ) ) {
$data[$count] = $manager;
$count++
}
}
# same general deduplication algorithm as before.
# @data holds full names of data.
foreach my $string ( @data ) { # dedupe data and sanity chec
+k.
next if !defined ($string); # should never be true.
next if $seen{ $string }[ 0 ] == 1; # check %seen hash
+ / array for dupe
$seen{ $string }[ 0 ] = 1; # define hash of array a
+nd assign check to it.
$seen{ $string }[ 1 ] = $key; # add word from line to
+hash for reference.
}
}
$dbh->disconnect;
# this is here because i was up real late thinking
# "wtf does what here"....
# $key - (below) is the deduped sql string
# $seen { $key }[ 1 ] - is the word searched in sql
# $uword { '$seen{ $key }[ 1 ]' } - should be the line(s) of test the
+ search string came from
for my $key ( keys %seen ) {
print "$seen{ $key }[ 1 ], $key,";
print join(',', @{ $uword{ $seen{ $key }[ 1 ] }[ 1 .. $#{ @uwo
+rd{ $seen{ $key } } } ] } );
print "\n";
}
|