package Regexp::Approx;
use strict;
use base 'Exporter';
use Storable;
use String::Approx;
use POSIX ':sys_wait_h';
use vars qw($VERSION @ISA @EXPORT_OK
$SERVER_PID
$NLEN
$READ_BUFFER
%IMPORTED_FUNCTIONS
$READER_BITS
$READER $WRITER);
{
no strict 'refs';
%IMPORTED_FUNCTIONS =
map +( $_, *{"String::Approx::$_"}{'CODE'} ),
_imported_functions();
}
@EXPORT_OK = (qw(qrmatch),
map { substr($_,0,1,'r'); $_ }
keys %IMPORTED_FUNCTIONS);
$VERSION = '0.01';
INIT {
$SIG{'CHLD'} = \&_REAPER;
$SIG{'PIPE'} = \&_SIBLING_DIED;
_init_module();
}
END {
_cleanup();
}
sub qrmatch {
my $match = shift;
my $match_len = length $match;
$match = quotemeta $match;
my $storable_decl = '';
my $storable_obj = shift || '';
if ( $storable_obj ) {
$storable_decl = "q(StorableIx=2),";
$storable_obj = Storable::freeze( $storable_obj );
$storable_obj =~ s(([^\w %=]))(sprintf "\\x{%x}", ord $1)ge;
$storable_obj = "qq($storable_obj),";
}
my $long_len = $match_len;#1+int(1.1 * $match_len);
my $match_code =
qq[scalar Regexp::Approx::rmatch(
$storable_decl q($match), $storable_obj
substr( \$_,
pos(),
(length() - pos() < $long_len)
? length() - pos()
: $long_len ) )];
# Seek pos() forward $match_len ticks
my $stand_in = '(?s:'. (q[.] x $match_len) . ')';
return qq[(?x:(?-x:\Q$match\E) # Attempt an exact match first
|
# Match approximately
(?(?{ $match_code })
$stand_in
|
# No match
(?!)
)
)];
}
sub _enter_server {
while ( 1 ) {
my ($context,$func,@msg) = @{_read_msg()};
if ( $context eq 'exit' or
$func eq 'exit' ) {
exit;
} elsif ( $context and $IMPORTED_FUNCTIONS{$func} ) {
my @resp;
if ( $context eq 'list' ) {
@resp = &{$IMPORTED_FUNCTIONS{$func}}( @msg );
} else {
$resp[0] = &{$IMPORTED_FUNCTIONS{$func}}( @msg );
}
_send_msg( \@resp );
} else {
_send_msg( ["Command not recognized"] );
}
}
# NOT REACHED
}
sub _send_msg {
# It is *NOT* safe to invoke the re-engine now.
my $msg =
join( '',
map unpack( 'H*', pack( 'N/a*', $_ ) ),
@{$_[0]} )
. "\n";
return syswrite WRITER, $msg;
}
sub _read_msg {
# It is *NOT* safe to invoke the re-engine now.
my $msg_len;
do {
# Wait until there's some input
my $bits = $READER_BITS;
select( $READER_BITS, undef, undef, undef );
sysread READER, $READ_BUFFER, 8192, length $READ_BUFFER;
$msg_len = rindex $READ_BUFFER, "\n";
} until ( $msg_len != -1 );
my $msg = substr( $READ_BUFFER,
0,
1 + $msg_len,
'' );
local $/ = "\n";
chomp $msg;
my @msg = unpack '(N/a*)*', pack 'H*', $msg;
if ( $msg[0] and
substr($msg[0],0,11) eq 'StorableIx=' ) {
# This is unsafe to execute on the client - only clits send
# storable data so this is only ever processed on the server.
my $ix = substr(shift(@msg),11);
$msg[$ix] = Storable::thaw( $msg[$ix] );
}
return \@msg;
}
sub _imported_functions {
no strict 'refs';
return
grep *{"String::Approx::$_"}{'CODE'},
grep /^a/,
keys %String::Approx::;
}
sub _init_module {
for my $func ( keys %IMPORTED_FUNCTIONS ) {
my $this = $func;
$this =~ s(^a)(r);
eval qq[sub $this {
# It is *NOT* safe to invoke the re-engine now
my \@msg = \@_;
my \$StorableIx;
if ( substr( \$msg[0], 0, 11 ) eq 'StorableIx=' ) {
\$StorableIx = 1+substr shift(\@msg), 11;
}
unshift \@msg, q($func);
unshift \@msg, wantarray
? 'list'
: 'scalar';
if ( defined \$StorableIx ) {
unshift \@msg, "StorableIx=\$StorableIx";
}
_send_msg( \\\@msg );
my \$msg = _read_msg();
return wantarray ? \@\$msg : \$msg->[-1];
}; 1] || die $@;
}
$NLEN = length pack 'N', 0;
# Most of this code was cribbed right from perlipc.
pipe( PARENT_RDR, CHILD_WTR )
or die "pipe() failed: $!";
pipe ( CHILD_RDR, PARENT_WTR )
or die "pipe() failed: $!";
AUTO_FLUSH: {
my $orig_fh = select CHILD_WTR;
$| = 1;
select PARENT_WTR;
$| = 1;
select $orig_fh;
}
$READ_BUFFER = '';
# perlipc safe fork. See _REAPER()
{
my $sleep_count = 0;
do {
$SERVER_PID = fork;
unless ( defined $SERVER_PID ) {
warn "Cannot fork: $!";
die "Bailing out: $!" if $sleep_count++ > 6;
sleep 10;
}
} until defined $SERVER_PID;
}
# Henceforth report the pid with the error message.
if ( $SERVER_PID ) {
close PARENT_RDR
or warn "Parent $$ couldn't close PARENT_RDR: $!";
close PARENT_WTR
or warn "Parent Couldn't close PARENT_WTR: $!";
*WRITER = *CHILD_WTR;
*READER = *CHILD_RDR;
$READER_BITS = '';
vec( $READER_BITS, fileno( READER ), 1 ) = 1;
# Leave the module's initializer
return 1;
} else {
die "Cannot fork: $!" unless defined $SERVER_PID;
close CHILD_RDR
or warn "Child couldn't close CHILD_RDR: $!";
close CHILD_WTR
or warn "Child couldn't close CHILD_WTR: $!";
*WRITER = *PARENT_WTR;
*READER = *PARENT_RDR;
$READER_BITS = '';
vec( $READER_BITS, fileno( READER ), 1 ) = 1;
goto \&_enter_server();
}
# NOT REACHED
}
sub _cleanup {
if ( $SERVER_PID ) {
_send_msg( [qw(void exit)] );
}
close WRITER
or warn "$$ Couldn't close WRITER: $!";
close READER
or warn "$$ Couldn't close READER: $!";
return 1;
}
sub _SIBLING_DIED {
if ( $SERVER_PID ) {
die "Parent $$ noticed that child $SERVER_PID unexpectedly died";
} else {
die "Child $$ noticed that its parent(huh?) died";
}
}
sub _REAPER {
my $child;
my $waitedpid;
while (($waitedpid = waitpid( -1, WNOHANG)) > 0) {
if ( $? ) {
my $exit = $? >> 8;
my $sig = $? & 127;
my $core = $? & 128;
warn "Reaped $waitedpid with exit $exit on signal $sig."
. $core ? ' Core dumped!' : '';
}
}
$SIG{'CHLD'} = \&_REAPER;
}
1;
__END__
=head1 NAME
Regexp::Approx - Use fuzzy regular expressions
=head1 SYNOPSIS
use Regexp::Approx 'qrmatch';
use re 'eval';
@dict = glob "/usr/share/dict/*";
@ARGV = $dict[ rand @dict ];
my $fuzzy_part = qrmatch( 'teric',
[ 'i 10%', 'D0', 'I0' ] );
$\ = "\n";
while (<>) {
chomp;
s(($fuzzy_part))[[$1]]go and print;
}
=head1 ABSTRACT
This module allows you to use any of String::Approx's functions from w
+ithin
a regular expression. It also provides a convenience function `qrmatch
+`
which provides a regex fragment that works like an inline
`scalar amatch( $pattern, [ optional modifier ], $test )` expression.
=head1 DESCRIPTION
Perl's regular expression engine is not reentrant so using the re-engi
+ne
while it is already operating is a big no-no. This module skirts that
limitation by forking perl and running all the String::Approx function
+s in a
separate process.
Through unseemly peeking into String::Approx this module wraps all of
+the
'a...' functions except they start with 'r' over here. So instead of
using 'amatch' you'd use 'rmatch'.
=over 4
=item qrmatch( "pattern", [ modifiers ] )
This function returns a regular expression fragment designed to supply
+ the
first two arguments of the String::Approx::amatch function. The patter
+n is
required, the modifiers are optional. See L<String::Approx> for detail
+s on
how to construct your pattern and what options are available as modifi
+ers.
=back
=head2 EXPORT
The qrmatch function is optionally exported.
=head1 SEE ALSO
L<String::Approx>
=head1 AUTHOR
Josh Jore, E<lt>jjore@cpan.orgE<gt>
=head1 COPYRIGHT AND LICENSE
Copyright 2003 by Josh Jore
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=cut
-
Are you posting in the right place? Check out Where do I post X? to know for sure.
-
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big>
<blockquote> <br /> <dd>
<dl> <dt> <em> <font>
<h1> <h2> <h3> <h4>
<h5> <h6> <hr /> <i>
<li> <nbsp> <ol> <p>
<small> <strike> <strong>
<sub> <sup> <table>
<td> <th> <tr> <tt>
<u> <ul>
-
Snippets of code should be wrapped in
<code> tags not
<pre> tags. In fact, <pre>
tags should generally be avoided. If they must
be used, extreme care should be
taken to ensure that their contents do not
have long lines (<70 chars), in order to prevent
horizontal scrolling (and possible janitor
intervention).
-
Want more info? How to link
or How to display code and escape characters
are good places to start.