while ($html =~ m|href\s*=\s*"((?:[^/]+://[^"/]+)?)/?([^"]+)"\s*>(.*?)|gi) { print "$1, $2, $3 \n"; }