in reply to Re^2: HTML::TableExtract problem handling merged cells across rows (OBO rowspan colspan)
in thread HTML::TableExtract problem handling merged cells across rows
Not sure if this helps but I 'cleaned' up the tags with this regex to remove the 3D's
s/(class|rowspan|style|colspan)=3D/$1=/g;
poj#!perl use strict; use HTML::TableExtract; my $infile = 'test.htm'; open IN,'<',$infile or die "$!"; open OUT,'>','clean.htm' or die "$!"; my $html; while (<IN>){ s/(class|rowspan|style|colspan)=3D/$1=/g; print OUT $_; $html .= $_; } my @col = ('Column_1','Asset Tag','Washed Number','Asset Name','Cust C +ode','Primary IP Address'); my $te = HTML::TableExtract->new( headers=>[@col],keep_headers => 1 ) +; $te->parse( $html ); foreach my $ts ($te->tables) { print "\nLine 0 ", join(', ',$ts->row(0)); print "\nLine 1 ", join(', ',$ts->row(1)); print "\nLine 2 ", join(', ',$ts->row(2)); print "\nLine 3 ", join(', ',$ts->row(3)); print "\nLine 4 ", join(', ',$ts->row(4)); print "\n"; }
|
|---|