use LWP::Simple; use XML::DTDParser; my $dtdfile = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"; # or any other, like xhtml1-frameset.dtd my $dtd = get($dtdfile); $dtd =~ s/.*=== Imported Names =+-->//s; # avoid 'die' in XML::DTDParser $DTD = ParseDTD $dtd; my $elems = [ map {uc($_) } keys %$DTD ]; my %s; my $attrs = [ grep { s/-/_/g; ! $s{$_}++ } map { keys %{$DTD->{$_}->{'attributes'}} } keys %$DTD ]; define_vocabulary($elems,$attrs);