http://qs1969.pair.com?node_id=306474
Category: Miscellaneous
Author/Contact Info
Description:

This module allows you to use any of String::Approx's functions from within a regular expression. It also provides a convenience function `fuzzy_qr` which provides a regex fragment that works like an inline `scalar amatch( $pattern, $test )` expression.

use Regexp::Approx 'fuzzy_qr'; use re 'eval'; $r = qr/((?:@{[fuzzy_qr('APT')]}\s)?\w+\d)$/; $apt = ( "5678 DELAWARE AVENUE AOT 123" =~ $r )[0]; print "\$apt=$apt\n";
package Regexp::Approx;
use strict;
use base 'Exporter';
use Storable;
use String::Approx;
use POSIX ':sys_wait_h';
use vars qw($VERSION @ISA @EXPORT_OK
        $SERVER_PID
        $NLEN
        $READ_BUFFER
        %IMPORTED_FUNCTIONS
        $READER_BITS
        $READER $WRITER);
{
    no strict 'refs';
    %IMPORTED_FUNCTIONS =
    map +( $_, *{"String::Approx::$_"}{'CODE'} ),
    _imported_functions();
}
@EXPORT_OK = (qw(qrmatch),
          map { substr($_,0,1,'r'); $_ }
          keys %IMPORTED_FUNCTIONS);
$VERSION = '0.01';

INIT {
    $SIG{'CHLD'} = \&_REAPER;
    $SIG{'PIPE'} = \&_SIBLING_DIED;
    _init_module();
}

END {
    _cleanup();
}

sub qrmatch {
    my $match = shift;
    my $match_len = length $match;
    $match = quotemeta $match;
    
    my $storable_decl = '';
    my $storable_obj = shift || '';
    if ( $storable_obj ) {
    $storable_decl = "q(StorableIx=2),";
    $storable_obj = Storable::freeze( $storable_obj );
    $storable_obj =~ s(([^\w %=]))(sprintf "\\x{%x}", ord $1)ge;
    $storable_obj = "qq($storable_obj),";
    }

    my $long_len = $match_len;#1+int(1.1 * $match_len);
    my $match_code =
    qq[scalar Regexp::Approx::rmatch(
               $storable_decl q($match), $storable_obj
           substr( \$_,
                       pos(),
                       (length() - pos() < $long_len)
                           ? length() - pos()
                           : $long_len ) )];

    # Seek pos() forward $match_len ticks
    my $stand_in = '(?s:'. (q[.] x $match_len) . ')';

    return qq[(?x:(?-x:\Q$match\E) # Attempt an exact match first
                  |
                  # Match approximately
              (?(?{ $match_code })
            $stand_in
            |
                    # No match
            (?!)
                  ) 
              )];
}

sub _enter_server {
    while ( 1 ) {
    my ($context,$func,@msg) = @{_read_msg()};

    if ( $context eq 'exit' or
         $func eq 'exit' ) {
        exit;
    } elsif ( $context and $IMPORTED_FUNCTIONS{$func} ) {
        my @resp;
        if ( $context eq 'list' ) {    
        @resp = &{$IMPORTED_FUNCTIONS{$func}}( @msg );
        } else {
        $resp[0] = &{$IMPORTED_FUNCTIONS{$func}}( @msg );
        }
        
        _send_msg( \@resp );
    } else {
        _send_msg( ["Command not recognized"] );
    }
    }
    # NOT REACHED
}

sub _send_msg {
    # It is *NOT* safe to invoke the re-engine now.
    my $msg =
    join( '',
          map unpack( 'H*', pack( 'N/a*', $_ ) ),
          @{$_[0]} )
        . "\n";
    return syswrite WRITER, $msg;
}

sub _read_msg {
    # It is *NOT* safe to invoke the re-engine now.

    my $msg_len;
    do {
    # Wait until there's some input
    my $bits = $READER_BITS;
    select( $READER_BITS, undef, undef, undef );
    
    sysread READER, $READ_BUFFER, 8192, length $READ_BUFFER;
    $msg_len = rindex $READ_BUFFER, "\n";
    } until ( $msg_len != -1 );

    my $msg = substr( $READ_BUFFER,
                      0,
                        1 + $msg_len,
                  '' );
    local $/ = "\n";
    chomp $msg;

    my @msg = unpack '(N/a*)*', pack 'H*', $msg;
    if ( $msg[0] and
     substr($msg[0],0,11) eq 'StorableIx=' ) {
        # This is unsafe to execute on the client - only clits send
        # storable data so this is only ever processed on the server.
        my $ix = substr(shift(@msg),11);
        $msg[$ix] = Storable::thaw( $msg[$ix] );
    }

    return \@msg;
}

sub _imported_functions {
    no strict 'refs';
    return
    grep *{"String::Approx::$_"}{'CODE'},
    grep /^a/,
    keys %String::Approx::;
}

sub _init_module {
    for my $func ( keys %IMPORTED_FUNCTIONS ) {
    my $this = $func;
    $this =~ s(^a)(r);
    eval qq[sub $this {
        # It is *NOT* safe to invoke the re-engine now
        my \@msg = \@_;
        
        my \$StorableIx;
        if ( substr( \$msg[0], 0, 11 ) eq 'StorableIx=' ) {
        \$StorableIx = 1+substr shift(\@msg), 11;
        }
        
        unshift \@msg, q($func);
        unshift \@msg, wantarray
        ? 'list'
        : 'scalar';
        
        if ( defined \$StorableIx ) {
        unshift \@msg, "StorableIx=\$StorableIx";
        }

        _send_msg( \\\@msg );
        my \$msg = _read_msg();
        return wantarray ? \@\$msg : \$msg->[-1];
    }; 1] || die $@;
    }

    $NLEN = length pack 'N', 0;

    # Most of this code was cribbed right from perlipc.
    pipe( PARENT_RDR, CHILD_WTR )
    or die "pipe() failed: $!";
    pipe ( CHILD_RDR, PARENT_WTR )
    or die "pipe() failed: $!";

    AUTO_FLUSH: {
    my $orig_fh = select CHILD_WTR;
    $| = 1;
    select PARENT_WTR;
    $| = 1;
    select $orig_fh;
    }
    
    $READ_BUFFER = '';
    
    # perlipc safe fork. See _REAPER()
    {
    my $sleep_count = 0;
    do {
        $SERVER_PID = fork;
        unless ( defined $SERVER_PID ) {
        warn "Cannot fork: $!";
        die "Bailing out: $!" if $sleep_count++ > 6;
        sleep 10;
        }
    } until defined $SERVER_PID;
    }
    
    # Henceforth report the pid with the error message.

    if ( $SERVER_PID ) {
    close PARENT_RDR
        or warn "Parent $$ couldn't close PARENT_RDR: $!";
    close PARENT_WTR
        or warn "Parent Couldn't close PARENT_WTR: $!";
    *WRITER = *CHILD_WTR;
    *READER = *CHILD_RDR;
    $READER_BITS = '';
    vec( $READER_BITS, fileno( READER ), 1 ) = 1;
    
    # Leave the module's initializer
    return 1;
    } else {
    die "Cannot fork: $!" unless defined $SERVER_PID;
    close CHILD_RDR
        or warn "Child couldn't close CHILD_RDR: $!";
    close CHILD_WTR
        or warn "Child couldn't close CHILD_WTR: $!";
    *WRITER = *PARENT_WTR;
    *READER = *PARENT_RDR;
    $READER_BITS = '';
    vec( $READER_BITS, fileno( READER ), 1 ) = 1;
    
    goto \&_enter_server();
    }
    # NOT REACHED
}

sub _cleanup {
    if ( $SERVER_PID ) {
    _send_msg( [qw(void exit)] );
    }
    close WRITER
    or warn "$$ Couldn't close WRITER: $!";
    close READER
    or warn "$$ Couldn't close READER: $!";
    return 1;
}

sub _SIBLING_DIED {
    if ( $SERVER_PID ) {
    die "Parent $$ noticed that child $SERVER_PID unexpectedly died";
    } else {
    die "Child $$ noticed that its parent(huh?) died";
    }
}

sub _REAPER {
    my $child;
    my $waitedpid;
    while (($waitedpid = waitpid( -1, WNOHANG)) > 0) {
    if ( $? ) {
        my $exit = $? >> 8;
        my $sig  = $? & 127;
        my $core = $? & 128;
        warn "Reaped $waitedpid with exit $exit on signal $sig."
        . $core ? ' Core dumped!' : '';
    }
    }
    $SIG{'CHLD'} = \&_REAPER;
}

1;

__END__

=head1 NAME

Regexp::Approx - Use fuzzy regular expressions

=head1 SYNOPSIS

  use Regexp::Approx 'qrmatch';
  use re 'eval';

  @dict = glob "/usr/share/dict/*";
  @ARGV = $dict[ rand @dict ];
  my $fuzzy_part = qrmatch( 'teric',
                                [ 'i 10%', 'D0', 'I0' ] );
  $\ = "\n";
  while (<>) {
      chomp;
      s(($fuzzy_part))[[$1]]go and print;
  }

=head1 ABSTRACT

This module allows you to use any of String::Approx's functions from w
+ithin
a regular expression. It also provides a convenience function `qrmatch
+`
which provides a regex fragment that works like an inline 
`scalar amatch( $pattern, [ optional modifier ], $test )` expression.

=head1 DESCRIPTION

Perl's regular expression engine is not reentrant so using the re-engi
+ne
while it is already operating is a big no-no. This module skirts that
limitation by forking perl and running all the String::Approx function
+s in a
separate process.

Through unseemly peeking into String::Approx this module wraps all of 
+the
'a...' functions except they start with 'r' over here. So instead of
using 'amatch' you'd use 'rmatch'.

=over 4

=item qrmatch( "pattern", [ modifiers ] )

This function returns a regular expression fragment designed to supply
+ the
first two arguments of the String::Approx::amatch function. The patter
+n is
required, the modifiers are optional. See L<String::Approx> for detail
+s on
how to construct your pattern and what options are available as modifi
+ers.

=back

=head2 EXPORT

The qrmatch function is optionally exported.

=head1 SEE ALSO

L<String::Approx>

=head1 AUTHOR

Josh Jore, E<lt>jjore@cpan.orgE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright 2003 by Josh Jore

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself. 

=cut