data else #### while ($html =~ m/<(\/?r)>/g) { $r_pos = pos($html) if $1 eq 'r'; if ($1 eq '/r') { $r = substr($html, $r_pos, pos($html) - $r_pos - length('')); _extract_tags($r); $rec_count++; } } sub _extract_tags { my $html = $_[0]; my $tag_pos; my $curr_tag; my %data; while ($html =~ m/<(\/?[\w-]+)>/g) { my $tag = $1; my $n_tag = substr($tag, 1); # a 'naked' slashless tag if ($tag =~ /^\// && $n_tag eq $curr_tag) { # get the text inside the tag my $text = substr($html, $tag_pos, pos($html) - $tag_pos - length($tag) - 2); $text =~ s/\r\n/\n/g; $data{$n_tag} = $text; } else { # only get tags specified if (defined $columns{$tag}) { $tag_pos = pos($html); $curr_tag = $tag; } } } # pass off to subroutine $handle_data->(\%data); }