{ my ( $TLD, $ccTLD ); sub valid_domain { local $_ = shift; # user:pass@401authen.com is ~ valid and useful for more than phishing. if ( m/^[^@]+@/ ) { (undef, $_) = split '@'; } # domain.com:8080 is ~ valid/common if ( m/:\d+\z/ ) { ($_) = split ':'; } # having dealt with edge cases check for the valid chars return 0 unless m/^[A-Za-z0-9\.\-]+\z/; # domains are case insensitive and we need lc for hash table lookup $_ = lc; # end in digits can only be dot quad if ( m/\.\d+$/ ) { return 0 unless m/^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/; return 1 if $1 < 256 and $2 < 256 and $3 < 256 and $4 < 256; } # end in alpha chars and appropriate \w\.\w{2,}$ type syntax elsif ( m/\.([A-Za-z]{2,})$/ ) { ( $TLD, $ccTLD ) = init_tld() unless $TLD; return 0 if m/^[.-]/ or m/\.{2}/ or m/\-\.|\.\-/; return 1 if exists $TLD->{$1} or exists $ccTLD->{$1}; } # everything else is invalid return 0 } } sub init_tld { my ( %TLD, %ccTLD ); @TLD{ qw( aero arpa biz com coop edu gov info int mil museum name nato net org pro ) } = (); @ccTLD{qw( ac ad ae af ag ai al am an ao aq ar as at au aw az ba bb bd be bf bg bh bi bj bm bn bo br bs bt bv bw by bz ca cc cd cf cg ch ci ck cl cm cn co cr cu cv cx cy cz de dj dk dm do dz ec ee eg eh er es et fi fj fk fm fo fr fx ga gd ge gf gg gh gi gl gm gn gp gq gr gs gt gu gw gy hk hm hn hr ht hu id ie il im in io iq ir is it je jm jo jp ke kg kh ki km kn kp kr kw ky kz la lb lc li lk lr ls lt lu lv ly ma mc md mg mh mk ml mm mn mo mp mq mr ms mt mu mv mw mx my mz na nc ne nf ng ni nl no np nr nu nz om pa pe pf pg ph pk pl pm pn pr ps pt pw py qa re ro ru rw sa sb sc sd se sg sh si sj sk sl sm sn so sr st sv sy sz tc td tf tg th tj tk tm tn to tp tr tt tv tw tz ua ug uk um us uy uz va vc ve vg vi vn vu wf ws ye yt yu za zm zw ) } = (); return \%TLD, \%ccTLD; }