use URI::QueryParam; # introduces a new method to URI sub test_url { my ( $uri, $server ) = @_; # returns true, ok to index/spider # return false, don't index or spider # A white list is always better than # a black list if you can make one return unless $uri->path =~ /\.html$/; # Note about what this condition really means return if $uri->query_param("C") eq "N" and $uri->query_param("O") eq "D"; # Note about what this condition really means return if $uri->query_param("C") eq "M" and $uri->query_param("O") eq "A"; # make sure that the path is limited to the docs path return $uri->path =~ m[^/starteam_area/]; }