#!/usr/bin/perl -w use strict; use HTML::Parser; my $langs={}; my $in_tag = 0; my $tags={}; our $textout=''; my $start = sub { my ($tag, $attr, $text) = @_; our ($lang, $textout); if ($tag eq 'locale') { $langs->{$attr->{lang}}=1; # mark languages found if ($attr->{lang} eq $lang) { $textout.=$text; $in_tag=0; # override if already in locale tag } else { $in_tag=1; } } else { $textout.=$text; } }; my $end = sub { my ($tag, $attr, $text) = @_; our $textout; if ($tag eq 'locale' and $in_tag) { $in_tag=0; } else { $textout.=$text; } }; my $p = HTML::Parser->new( default_h => [ sub { $textout.=shift unless $in_tag }, 'text'], start_h => [ $start , 'tagname, attr, text'], end_h => [ $end, 'tagname, attr, text'], ); # Order of preference for languages my $acceptable = [qw{ en fr de it }]; my $data; while () { $data.=$_; } $textout.=''; $langs={}; foreach our $lang (@$acceptable) { next if (scalar keys %$langs && !(exists $langs->{$lang})); $textout=''; $p->parse($data); last if (scalar keys %$langs && (exists $langs->{$lang})); } print $textout."\n"; __DATA__ Some English Some French Some German