use strict; use warnings; sub last_paren_match_ordinal() { my $n= $#-; return $n; # if the above stops working, comment out the return and let the following code # run. It seems that @- is populated only up to the actual number of captures, # while @+ always contains the max number present in the pattern. This is not # documented, and may be an artifact. while ($n) { # take $n initially as a maximum, and find the highest that's actually present. no strict 'refs'; last if defined $$n; --$n; } return $n; } sub globtore ($) # initial version, 27-June-2001 by JMD { my $s= shift; my $f= sub { my $n= last_paren_match_ordinal; return "($n)"; }; my @f= ( undef, # index 0 never used, uses 1..6 sub { # brackets my $s= $+; return '.' if $s eq '[?]'; # special case meaning. return '(?=\.|;|\z)' if $s eq '[]' || $s eq '[!?]'; # also special: match "no character present". # shell uses !, change to ^. Don't worry about anything else. Could deal with original ^ in that spot. $s =~ s/^\[!/[^/; return $s; }, sub { # ?'s before dot or end my $count= length $+; return '.?' if $count == 1; return ".{0,$count}"; }, sub { # ?'s normal case my $count= length $+; return '.' if $count == 1; return ".{$count}"; }, sub { # *'s return '.*' }, sub { # ; return '$|^'; }, sub { # other funny chars that aren't otherwise matched. my $s= $+; return "\\$s"; } ); # smash out consecutive stars $s =~ s/\*+/*/g; # do the main replacements $s =~ s/ (\[.*?\]) | # brackets in $1 (\?+(?=\.|;|\z)) | # ?'s before dot or end in $2 (\?+) | # ?'s normally in $3 (\*+) | # *'s in $4 (;) | # ; seperator in $5 ([\$\^\\\.\{\}\[\]]) # special otherwise unmatched chars in $6 /$f[last_paren_match_ordinal]->() # see above functions for massive replacement block. /gex; # fix up begin/end marks $s= "^$s\$"; $s =~ s/\Q^.*\E|\Q.*\E\$//; return $s; } while () { chomp; s/\s+#.*$//; # allow comments in input data print "$_ ==> "; my $result= globtore ($_); print "$result\n"; } __DATA__ LETTER?.DOC # comments allowed in test data, separated by at least one space. funny#file#name.txt # the prev # are not comments because no space. *.DO? file{braces}.^^x *.DO[?] foobar.exe xxy *.exe l?tter?.d?? *.* st*.d* *am*.txt letter[0-9].doc ?[aeiouy]*.* [a-dt-v]ip letter[?].doc letter[].doc letter[!?].doc test[!0-9].doc # anything except digits. ??[abc]*[def]*.[pq]* letter1;v2 letter1[;]v2