package Regexp::Approx; use strict; use base 'Exporter'; use Storable; use String::Approx; use POSIX ':sys_wait_h'; use vars qw($VERSION @ISA @EXPORT_OK $SERVER_PID $NLEN $READ_BUFFER %IMPORTED_FUNCTIONS $READER_BITS $READER $WRITER); { no strict 'refs'; %IMPORTED_FUNCTIONS = map +( $_, *{"String::Approx::$_"}{'CODE'} ), _imported_functions(); } @EXPORT_OK = (qw(qrmatch), map { substr($_,0,1,'r'); $_ } keys %IMPORTED_FUNCTIONS); $VERSION = '0.01'; INIT { $SIG{'CHLD'} = \&_REAPER; $SIG{'PIPE'} = \&_SIBLING_DIED; _init_module(); } END { _cleanup(); } sub qrmatch { my $match = shift; my $match_len = length $match; $match = quotemeta $match; my $storable_decl = ''; my $storable_obj = shift || ''; if ( $storable_obj ) { $storable_decl = "q(StorableIx=2),"; $storable_obj = Storable::freeze( $storable_obj ); $storable_obj =~ s(([^\w %=]))(sprintf "\\x{%x}", ord $1)ge; $storable_obj = "qq($storable_obj),"; } my $long_len = $match_len;#1+int(1.1 * $match_len); my $match_code = qq[scalar Regexp::Approx::rmatch( $storable_decl q($match), $storable_obj substr( \$_, pos(), (length() - pos() < $long_len) ? length() - pos() : $long_len ) )]; # Seek pos() forward $match_len ticks my $stand_in = '(?s:'. (q[.] x $match_len) . ')'; return qq[(?x:(?-x:\Q$match\E) # Attempt an exact match first | # Match approximately (?(?{ $match_code }) $stand_in | # No match (?!) ) )]; } sub _enter_server { while ( 1 ) { my ($context,$func,@msg) = @{_read_msg()}; if ( $context eq 'exit' or $func eq 'exit' ) { exit; } elsif ( $context and $IMPORTED_FUNCTIONS{$func} ) { my @resp; if ( $context eq 'list' ) { @resp = &{$IMPORTED_FUNCTIONS{$func}}( @msg ); } else { $resp[0] = &{$IMPORTED_FUNCTIONS{$func}}( @msg ); } _send_msg( \@resp ); } else { _send_msg( ["Command not recognized"] ); } } # NOT REACHED } sub _send_msg { # It is *NOT* safe to invoke the re-engine now. my $msg = join( '', map unpack( 'H*', pack( 'N/a*', $_ ) ), @{$_[0]} ) . "\n"; return syswrite WRITER, $msg; } sub _read_msg { # It is *NOT* safe to invoke the re-engine now. my $msg_len; do { # Wait until there's some input my $bits = $READER_BITS; select( $READER_BITS, undef, undef, undef ); sysread READER, $READ_BUFFER, 8192, length $READ_BUFFER; $msg_len = rindex $READ_BUFFER, "\n"; } until ( $msg_len != -1 ); my $msg = substr( $READ_BUFFER, 0, 1 + $msg_len, '' ); local $/ = "\n"; chomp $msg; my @msg = unpack '(N/a*)*', pack 'H*', $msg; if ( $msg[0] and substr($msg[0],0,11) eq 'StorableIx=' ) { # This is unsafe to execute on the client - only clits send # storable data so this is only ever processed on the server. my $ix = substr(shift(@msg),11); $msg[$ix] = Storable::thaw( $msg[$ix] ); } return \@msg; } sub _imported_functions { no strict 'refs'; return grep *{"String::Approx::$_"}{'CODE'}, grep /^a/, keys %String::Approx::; } sub _init_module { for my $func ( keys %IMPORTED_FUNCTIONS ) { my $this = $func; $this =~ s(^a)(r); eval qq[sub $this { # It is *NOT* safe to invoke the re-engine now my \@msg = \@_; my \$StorableIx; if ( substr( \$msg[0], 0, 11 ) eq 'StorableIx=' ) { \$StorableIx = 1+substr shift(\@msg), 11; } unshift \@msg, q($func); unshift \@msg, wantarray ? 'list' : 'scalar'; if ( defined \$StorableIx ) { unshift \@msg, "StorableIx=\$StorableIx"; } _send_msg( \\\@msg ); my \$msg = _read_msg(); return wantarray ? \@\$msg : \$msg->[-1]; }; 1] || die $@; } $NLEN = length pack 'N', 0; # Most of this code was cribbed right from perlipc. pipe( PARENT_RDR, CHILD_WTR ) or die "pipe() failed: $!"; pipe ( CHILD_RDR, PARENT_WTR ) or die "pipe() failed: $!"; AUTO_FLUSH: { my $orig_fh = select CHILD_WTR; $| = 1; select PARENT_WTR; $| = 1; select $orig_fh; } $READ_BUFFER = ''; # perlipc safe fork. See _REAPER() { my $sleep_count = 0; do { $SERVER_PID = fork; unless ( defined $SERVER_PID ) { warn "Cannot fork: $!"; die "Bailing out: $!" if $sleep_count++ > 6; sleep 10; } } until defined $SERVER_PID; } # Henceforth report the pid with the error message. if ( $SERVER_PID ) { close PARENT_RDR or warn "Parent $$ couldn't close PARENT_RDR: $!"; close PARENT_WTR or warn "Parent Couldn't close PARENT_WTR: $!"; *WRITER = *CHILD_WTR; *READER = *CHILD_RDR; $READER_BITS = ''; vec( $READER_BITS, fileno( READER ), 1 ) = 1; # Leave the module's initializer return 1; } else { die "Cannot fork: $!" unless defined $SERVER_PID; close CHILD_RDR or warn "Child couldn't close CHILD_RDR: $!"; close CHILD_WTR or warn "Child couldn't close CHILD_WTR: $!"; *WRITER = *PARENT_WTR; *READER = *PARENT_RDR; $READER_BITS = ''; vec( $READER_BITS, fileno( READER ), 1 ) = 1; goto \&_enter_server(); } # NOT REACHED } sub _cleanup { if ( $SERVER_PID ) { _send_msg( [qw(void exit)] ); } close WRITER or warn "$$ Couldn't close WRITER: $!"; close READER or warn "$$ Couldn't close READER: $!"; return 1; } sub _SIBLING_DIED { if ( $SERVER_PID ) { die "Parent $$ noticed that child $SERVER_PID unexpectedly died"; } else { die "Child $$ noticed that its parent(huh?) died"; } } sub _REAPER { my $child; my $waitedpid; while (($waitedpid = waitpid( -1, WNOHANG)) > 0) { if ( $? ) { my $exit = $? >> 8; my $sig = $? & 127; my $core = $? & 128; warn "Reaped $waitedpid with exit $exit on signal $sig." . $core ? ' Core dumped!' : ''; } } $SIG{'CHLD'} = \&_REAPER; } 1; __END__ =head1 NAME Regexp::Approx - Use fuzzy regular expressions =head1 SYNOPSIS use Regexp::Approx 'qrmatch'; use re 'eval'; @dict = glob "/usr/share/dict/*"; @ARGV = $dict[ rand @dict ]; my $fuzzy_part = qrmatch( 'teric', [ 'i 10%', 'D0', 'I0' ] ); $\ = "\n"; while (<>) { chomp; s(($fuzzy_part))[[$1]]go and print; } =head1 ABSTRACT This module allows you to use any of String::Approx's functions from within a regular expression. It also provides a convenience function `qrmatch` which provides a regex fragment that works like an inline `scalar amatch( $pattern, [ optional modifier ], $test )` expression. =head1 DESCRIPTION Perl's regular expression engine is not reentrant so using the re-engine while it is already operating is a big no-no. This module skirts that limitation by forking perl and running all the String::Approx functions in a separate process. Through unseemly peeking into String::Approx this module wraps all of the 'a...' functions except they start with 'r' over here. So instead of using 'amatch' you'd use 'rmatch'. =over 4 =item qrmatch( "pattern", [ modifiers ] ) This function returns a regular expression fragment designed to supply the first two arguments of the String::Approx::amatch function. The pattern is required, the modifiers are optional. See L for details on how to construct your pattern and what options are available as modifiers. =back =head2 EXPORT The qrmatch function is optionally exported. =head1 SEE ALSO L =head1 AUTHOR Josh Jore, Ejjore@cpan.orgE =head1 COPYRIGHT AND LICENSE Copyright 2003 by Josh Jore This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut