use strict; use warnings; while ( my $url = ) { chomp($url); my $dup_url = $url; if ( length($url) > 49) { $url =~ s!(?: (^https?://[^/]+/).*/(.*)\?.* ) | (?: (^https?://[^/]+/).*/(.*) ) ! ($1||$3) . '(...)/'. ($2||$4) !ex; my $http = (split /\/\//,$dup_url)[0]; my ($url_start, $url_end) = (split /\// ,(split /\?/,$dup_url)[0])[2,-1]; $dup_url = "$http//$url_start/(...)/$url_end"; } print "REGEX: $url\n"; print "SPLIT: $dup_url\n\n"; } __DATA__ http://some-shop.com/dir1/dir2/buystuff.cgi?x=1&y=2&z=3 http://somewhere/with/a/vastly/deep/structure/virus.exe http://host.com/some/uri/whatever?some/query/stringthatis/here https://some-shop.com/dir1/dir2/buystuff.cgi?x=1&y=2&z=3 https://somewhere/with/a/vastly/deep/structure/virus.exe https://host.com/some/uri/whatever?some/query/stringthatis/here