my $known_tokens; while ($comment = get_next_file_comments()) { my $tokens; my @possible_tokens = split(/\s+/s, $comment); foreach (@possible_tokens) { my $hs = HTML::Strip->new(); #HTML::Strip segfaults without new instance each time $_=$hs->parse($_); /[\w\d\.\/\\]+/) or next; tr/a-z/A-Z/; s/[\s\',\!\"\\xa3\#\$\%\^\&\*\\=\+\:\'\`\(\)\{\}\[\]\.\/\\\-\_]//g; next if ($_ eq ""); if (!defined($tokens->{$_})) { $tokens->{$_}=1; } else { $tokens->{$_}++; } } foreach (keys(%{$tokens})) { my $token_id; if (defined($known_tokens->{$_})) { $token_id = $known_tokens->{$_}; } else { my $res = $gettokenid_sth->execute($_); ($gettokenid_sth->rows() > 0) and do { ($token_id) = $gettokenid_sth->fetchrow_array(); }; if (!defined($token_id) or !$token_id) { $addtoken_sth->execute($_); $gettoken_sth->execute($_); ($token_id) = $gettokenid_sth->fetchrow_array(); } $known_tokens->{$_}=$token_id; } $addtokeninfile_sth->execute($file_id, $token_id, $tokens->{$_}); } }