foreach (@url) { if (my @parts=m!^http:// #must begin http:// ( #capture the site [^/?]+ # site has no / or ? in it ) #its mandatory ( #capture the path / # starts with a / (?: # group but dont capture [^/?]+ # anything but / or ? / # followed by a / )* # zero or more times (opt) )? #all optional ( #capture the filename [^./?] # doesnt start with a . or ? or / [^/?]+? # all chars not / or ? , (ctd.) # --leave stuff for rest of rex )? #we dont have to have a filename ( #capture the extension \. # they start with dots you know [^.?]* # any letter that arent a . or ? )? #we dont need an extension really ( #capture a parameter string \? # it starts with a ? .* # and has any char following )? #but its optional too.. $ #and thats the end folks... !x) { #ignore comments and whitespace in rex print "$_\t".join(',',@parts)."\n"; # weve matched now print } else { print "NOMATCH:$_\n"; #oops, is this ok? } } #lets try the next URL and see if we do better.... # :)