package URL::Deconstruct; use strict; use base 'Class::Accessor'; use Carp qw(croak); use List::MoreUtils qw(zip); use Data::Dumper; =head1 NAME URL::Deconstruct - match-and-extract stuff from HTTP request strings =head1 SYNOPSIS use URL::Deconstruct; my $my_request = URL::Deconstruct->from_spec( q'/(?)/(?)/(?\d+)?order_by=(\w+)' ); if (my %params = $my_request->match('/beer/drink/1?order_by=11') { print "$params{model}\n"; # beer print "$params{order_by}\n"; # 11 } =head1 REQUEST SYNTAX The syntax more or less adapts the regular expression syntax to HTTP request URIs. Currently the following magic is applied to your regular expression /($.*)/ Will capture into the name C. C<.*> and C<.+> and their nongreedy counterparts are implicitly translated to mean C<[^/]*> and C<[^/]+>. Examples: ?foo=bar; will match when there is a HTTP query parameter C with the value C. The pair will be returned. ?foo=bar;baz=bop will match when there is a HTTP query parameter C with the value C I there is a HTTP query parameter C with the value C. The order of the two parameters is not relevant. The pairs will be returned in the order specified in the pattern, not in the actual order of the HTTP request. ?foo=(.*); will match when there is a HTTP query parameter C with any value. The pair C<< foo => $q->param('foo') >> will be returned. Note that C<.*> is implicitly translated to only match up to the next HTTP parameter delimiter. /view/(?$.+)/ will match when there the path component matches C and will capture the second path component into the key C. Note that C<.+> is implicitly translated to only match up to the next path component. =cut __PACKAGE__->mk_accessors(qw(re path_names )); sub from_spec { my ($package,$spec,%options) = @_; my ($re,$path_names) = $package->make_cgi_re($spec); $package->SUPER::new({ re => $re, path_names => $path_names, }); } =head2 C<< $m->require_param NAME, VALUE >> This internal method returns the regular expression to match a HTTP query parameter and its name. NAME is the name of the key into which the value will be captured. VALUE is the regular expression that will match the value. =cut sub require_param { my ($self,$name,$value) = @_; if ($value =~ /\.\*/) { $value =~ s/\.\*/[^;&]\*/g; } return qr/(?=.*?(?:[;&?])($name)=($value)(?:[;&]|$))/; } =head2 C<< $m->make_cgi_re >> This is the internal method that implements the meat of the request decoder. It creates a regular expression that will match and capture the request fields. =cut sub make_cgi_re { my ($package,$re) = @_; my $res = $re; my @path_names; my ($path,$params) = $re,undef; if ($res =~ /[^(]\?/) { # Quite crudely split off the query: if ($res !~ m!^(.*?[^(])\?(\w+=[^/]+)$!) { croak "Cannot determine path and query parts of $re"; }; ($path,$params) = ($1,$2); }; if ($path) { my @elems = map { #warn $_; if (/^\(\?<(\w+)>(.*)\)$/) { push @path_names, $1; # Hack in implicit declaration if none given #warn ">$2<"; if ("" eq $2) { $_ = "(\\w+)"; } else { $_ = "($2)"; } s!\.([*+])![^/]$1!; } elsif (/^\(.*\)$/) { croak "Cannot capture path elements into unnamed fields. Use (?)"; } defined $_ ? $_ : "^" } split m!/!, $path; $path = join "/", @elems; } if ($params) { my @elems = map { # handle foo=(.*) -> (foo=>'bar') if (/^(\w+)=\((.*)\)$/) { $_ = $package->require_param($1,$2); # handle foo=bar -> (foo=>'bar') } elsif (/^(\w+)=(.+)/) { $_ = $package->require_param($1,$2); } $_ } split /[&;]/, $params; $params = join "", @elems; $params = "$params.*?(?:[;&]|\$)"; } else { $params = ""; } $res = "^$path$params"; qr/$res/, \@path_names } =head2 C<< $m->request_matches($url) >> Returns a list of captured values if the request matches. If the request matches but does not capture anything, a single 1 is returned. This is ugly but such is life. =cut sub match { my ($self,$req) = @_; # create a careful splitter that maps the path names # conveniently into the result... my $re = $self->re; #warn "$req =~ /$re/ ?"; if (my @res = $req =~ /$re/) { # two steps because I'm not sure about order of execution my $n = $self->path_names; #warn Dumper \@res; my @path = splice @res, 0, scalar @$n; return zip(@$n,@path),@res } else { return } } 1; __END__ =head1 TODO =item * Think about making C<&> a coderef/callback feature for validation. An example would be: sub model_can { my ($method) = @_; My::Model->can($method); } /beer/(?&model_can)/(?) This could call the C callback. On the other hand, this is probably total overkill and should be done in a second validation step instead of cramming it all into one line. This would need the C callback feature so the previous parameters are already set up. This is exceedingly ugly. =item * Think about returning a plain string/piece of (templated) Perl code to enable a dispatcher to be maximally efficient. =item * Could many requests be mapped into one (huge) regex? Under the assumption that there will always be enough parameters captured to later on decode what was actually wanted... =cut