#!/usr/bin/perl -- use strict; use warnings; use Data::Dump qw/ dd /; use HTML::TableExtract; Main( @ARGV ); exit( 0 ); sub Main { my $te = HTML::TableExtract->new(); $te->parse( SkewHtml() ); $te->eof; $te->tables_dump('show_content', ',' ); dd( $te->rows ); } ## end of Main sub SkewHtml { my $html= q{ skew test 2
head0 head1 head2 head3
0/0 colspan=4
0/0 undef undef undef
"0/0" "0/0" "0/0" "0/0"
1/0 rowspan=2
1/0 1/1 1/2 1/3
1/1 1/2 1/3
2/1 colspan=2 rowspan=3
undef 2/1 undef 2/3
"1/0" "2/1" "2/1" "2/3"
2/3
3/0
3/0 undef undef 3/3
"3/0" "2/1" "2/1" "3/3"
3/3
4/0
4/0 undef undef 4/3
"4/0" "2/1" "2/1" "4/3"
4/3
5/0 colspan=2
5/0 undef 5/2 5/3
"5/0" "5/0" "5/2" "5/3"
5/2 5/3
TABLE(0, 0):
head0,head1,head2,head3
 0/0  colspan=4  ,,,
 1/0  rowspan=2  , 1/1 , 1/2  , 1/3  
, 2/1 colspan=2 rowspan=3  ,, 2/3 
 3/0  ,,, 3/3 
 4/0  ,,, 4/3 
 5/0 colspan=2  ,, 5/2 , 5/3 
(
  ["head0", "head1", "head2", "head3"],
  [" 0/0  colspan=4  ", undef, undef, undef],
  [" 1/0  rowspan=2  ", " 1/1 ", " 1/2  ", " 1/3  "],
  [undef, " 2/1 colspan=2 rowspan=3  ", undef, " 2/3 "],
  [" 3/0  ", undef, undef, " 3/3 "],
  [" 4/0  ", undef, undef, " 4/3 "],
  [" 5/0 colspan=2  ", undef, " 5/2 ", " 5/3 "],
)
}; $html =~ s{^\s+|\s+$}{}gm; $html =~ s{(
.+?)}{ }gm; $html =~ s{rowspan=3}{rowspan=3D2}g; return $html; } ## end of SkewHtml __END__ TABLE(0, 0): head0,head1,head2,head3 0/0 colspan=4 ,,, 1/0 rowspan=2 , 1/1 , 1/2 , 1/3 , 2/1 colspan=2 rowspan=3D2 ,, 2/3 3/0 ,,, 3/3 4/0 ,,, 4/3 5/0 colspan=2 ,, 5/2 , 5/3 ( ["head0", "head1", "head2", "head3"], [" 0/0 colspan=4 ", undef, undef, undef], [" 1/0 rowspan=2 ", " 1/1 ", " 1/2 ", " 1/3 "], [undef, " 2/1 colspan=2 rowspan=3D2 ", undef, " 2/3 "], [" 3/0 ", undef, undef, " 3/3 "], [" 4/0 ", undef, undef, " 4/3 "], [" 5/0 colspan=2 ", undef, " 5/2 ", " 5/3 "], ) #### TABLE(0, 0): head0,head1,head2,head3 0/0 colspan=4 ,,, 1/0 rowspan=2 , 1/1 , 1/2 , 1/3 , 2/1 colspan=2 rowspan=3 ,, 2/3 3/0 ,,, 3/3 4/0 ,,, 4/3 5/0 colspan=2 ,, 5/2 , 5/3 ( ["head0", "head1", "head2", "head3"], [" 0/0 colspan=4 ", undef, undef, undef], [" 1/0 rowspan=2 ", " 1/1 ", " 1/2 ", " 1/3 "], [undef, " 2/1 colspan=2 rowspan=3 ", undef, " 2/3 "], [" 3/0 ", undef, undef, " 3/3 "], [" 4/0 ", undef, undef, " 4/3 "], [" 5/0 colspan=2 ", undef, " 5/2 ", " 5/3 "], )