use warnings; use strict; my $str = <<'END_STR'; I am an example http://www.perlmonks.org/?parent=1176663;node_id=3333 text that contains two URIs END_STR use Regexp::Common qw/URI/; use URI; while ($str=~/$RE{URI}{-keep}/g) { my $uri = URI->new($1); print "$uri\n"; print " Scheme: ", $uri->scheme, "\n"; print " Host: ", $uri->host, "\n"; print " Path: ", $uri->path, "\n"; print " Query: ", $uri->query, "\n"; } #### # NOTE this is based on a quick skim of RFC 3986 and may not be complete! my $url_re = qr{ # https://tools.ietf.org/html/rfc3986#section-2 # URI = scheme ":" hier-part ...; hier-part = "//" ... # scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) [A-Za-z][A-Za-z0-9+\-.]* :// # gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" # sub-delims = "!" / "$" / "&" / "'" / "(" / ")" # / "*" / "+" / "," / ";" / "=" # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" ( [:/?#\[\]@!\$&'()*+,;=A-Za-z0-9\-._~] # pct-encoded = "%" HEXDIG HEXDIG | %[0-9A-Fa-f]{2} )* }x; while ($str=~/($url_re)/g) { my $uri = URI->new($1); print "$uri\n"; }