data
else
####
while ($html =~ m/<(\/?r)>/g) {
$r_pos = pos($html) if $1 eq 'r';
if ($1 eq '/r') {
$r = substr($html, $r_pos, pos($html) - $r_pos - length(''));
_extract_tags($r);
$rec_count++;
}
}
sub _extract_tags {
my $html = $_[0];
my $tag_pos;
my $curr_tag;
my %data;
while ($html =~ m/<(\/?[\w-]+)>/g) {
my $tag = $1;
my $n_tag = substr($tag, 1); # a 'naked' slashless tag
if ($tag =~ /^\// && $n_tag eq $curr_tag) {
# get the text inside the tag
my $text = substr($html, $tag_pos, pos($html) - $tag_pos - length($tag) - 2);
$text =~ s/\r\n/\n/g;
$data{$n_tag} = $text;
} else {
# only get tags specified
if (defined $columns{$tag}) {
$tag_pos = pos($html);
$curr_tag = $tag;
}
}
}
# pass off to subroutine
$handle_data->(\%data);
}