# (part of much larger codebase which uses strict)
my %TPL;   # hash for storing objects which persist between invocations of ocr_name

sub trim_word { # trim words
   my $ref=shift;
   $$ref=~s/\([^)]+\)//g;
   $$ref=~s/\s+$//;
   $$ref=~s/^\s+//;
   $$ref=~s/[-\s]/.*/g;  # internal hyphens or spaces
}

sub fuzzify_word {   # try to relax matching for common OCR mistakes
   my $ref=shift;
   $$ref=~s/O/[OD]/ig;      # O <-> O or D               
   $$ref=~s/G/[gc]/ig;      # uc G <-> C
   $$ref=~s/[ij]/[ij]/g;    # lc I <-> J
   $$ref=~s/[HN]/[hn]/ig;   # uc H <-> N
   $$ref=~s/W/[wn]/ig;      # uc W <-> N    
   $$ref=~s/T/[it]/ig;      # T or t <-> i or t           
   $$ref=~s/C/[CL]/ig;      # C or c <-> C or L          
}

sub ocr_sort {       # sorting function, put longer names first
   length($b) <=> length($a);
}

sub ocr_name {    # examine a file with OCR to get a name
   my ($file,$hExclude,$hIgnore)=@_;     # file, hash of excluded words, hash of ignored words 
   my ($first,$last,$acct,$origfirst,$origlast,$text);
   unless ($TPL{ocr_match}) {  # regenerate big hash & regexps
      my ($sth,$re,$re2,%re,%re2);
      $sth=sql("select lastname,firstname,acct from names
                 order by acct desc");  # calls DBI (code not shown), desc so more recent accounts which will be matched earlier (maybe) (?)
      while (($last,$first,$acct)=$sth->fetchrow) {
         trim_word(\$first);
         trim_word(\$last);
         $origfirst=$first;
         $origlast=$last;  
         fuzzify_word(\$first);
         fuzzify_word(\$last);
         $re{"$first$last"}||="(?{$acct})$first\\s*$last|(?{$acct})$last,?\\s+$first";    # use ?{} to store account if this branch matches
         # catch unusual last names 
         $first=substr($origfirst,0,1);
         $re2{"$first$last"}||="(?{$acct})\\b$last,?\\s+$first" 
            unless exists $hExclude->{lc $origlast} or length($last)<4;  
      }
      $sth->finish;
      $re=join('|',sort ocr_sort values %re);
      $re2=join('|',sort ocr_sort values %re2);
      $TPL{ocr_match}=sub { 
         my $text=shift; 
         my @matches; 
         die "Bad re $re" unless index($re,'|')>0;
         use re 'eval';
         while ($text=~/$re/gis) { 
            push @matches,$^R;
         }
         @matches;   
      };
      $TPL{ocr_match2}=sub {
         my $text=shift;
         my @matches;
         die "Bad re $re2" unless index($re2,'|')>0;
         use re 'eval';
         while ($text=~/$re2/gis) { 
            push @matches,$^R;
         }
         @matches;   
      };
   }

   $text=run_ocr($file);  # convert file and invoke OCR, returning raw OCR'd text output
   $ignored=join('|',keys %$hIgnore);
   $text=~s/($ignore)//oig;           # delete words which cause false hits
   map { $match{$_}++ } $TPL{ocr_match}->($text);    # call precompiled proc
   map { $match{$_}++ } $TPL{ocr_match2}->($text);

   return keys %match; # array of accounts of named individuals in document
}


##</code><code>##

...(?{474})[CL]r[ij]s[it][ij]a[hn][ij],?\s+S[it]eve|(?{487})...
# that's matching fictitious account 00474, Mr. Steve Christian