#!/usr/bin/perl -w use warnings; use strict; use HTML::TreeBuilder; my $html = <<'EOHTML';
02:44 AM EDT
0:42 (est.)
Speech
U.S.-Japan Relations
Asia Society, Washington Center
Ryozo Kato , Japan
EOHTML my $tree = HTML::TreeBuilder->new_from_content($html); $tree->parse_content($html); my $c = $tree->look_down( "_tag", "table", "width", "100%" ); my @trimmed_text = map ( ref($c) ? $c->as_trimmed_text : $c, $c->content_list ); print "@trimmed_text\n"; # #### %h_cspan = ( time => 02:44 AM EDT length => 0:42 (est.) type => Speech title => U.S.-Japan Relations org => Asia Society, Washington Center );