#!/usr/bin/perl -- use strict; use warnings; use Data::Dump qw/ dd /; use HTML::TableExtract; Main( @ARGV ); exit( 0 ); sub Main { my $te = HTML::TableExtract->new(); $te->parse( SkewHtml() ); $te->eof; $te->tables_dump('show_content', ',' ); dd( $te->rows ); } ## end of Main sub SkewHtml { my $html= q{
| head0 | head1 | head2 | head3 |
| 0/0 colspan=4 0/0 undef undef undef "0/0" "0/0" "0/0" "0/0" |
|||
| 1/0 rowspan=2 1/0 1/1 1/2 1/3 |
1/1 | 1/2 | 1/3 |
| 2/1 colspan=2 rowspan=3 undef 2/1 undef 2/3 "1/0" "2/1" "2/1" "2/3" |
2/3 | ||
| 3/0 3/0 undef undef 3/3 "3/0" "2/1" "2/1" "3/3" |
3/3 | ||
| 4/0 4/0 undef undef 4/3 "4/0" "2/1" "2/1" "4/3" |
4/3 | ||
| 5/0 colspan=2 5/0 undef 5/2 5/3 "5/0" "5/0" "5/2" "5/3" |
5/2 | 5/3 | |
TABLE(0, 0): head0,head1,head2,head3 0/0 colspan=4 ,,, 1/0 rowspan=2 , 1/1 , 1/2 , 1/3 , 2/1 colspan=2 rowspan=3 ,, 2/3 3/0 ,,, 3/3 4/0 ,,, 4/3 5/0 colspan=2 ,, 5/2 , 5/3 ( ["head0", "head1", "head2", "head3"], [" 0/0 colspan=4 ", undef, undef, undef], [" 1/0 rowspan=2 ", " 1/1 ", " 1/2 ", " 1/3 "], [undef, " 2/1 colspan=2 rowspan=3 ", undef, " 2/3 "], [" 3/0 ", undef, undef, " 3/3 "], [" 4/0 ", undef, undef, " 4/3 "], [" 5/0 colspan=2 ", undef, " 5/2 ", " 5/3 "], )}; $html =~ s{^\s+|\s+$}{}gm; $html =~ s{(
##
TABLE(0, 0):
head0,head1,head2,head3
0/0 colspan=4 ,,,
1/0 rowspan=2 , 1/1 , 1/2 , 1/3
, 2/1 colspan=2 rowspan=3 ,, 2/3
3/0 ,,, 3/3
4/0 ,,, 4/3
5/0 colspan=2 ,, 5/2 , 5/3
(
["head0", "head1", "head2", "head3"],
[" 0/0 colspan=4 ", undef, undef, undef],
[" 1/0 rowspan=2 ", " 1/1 ", " 1/2 ", " 1/3 "],
[undef, " 2/1 colspan=2 rowspan=3 ", undef, " 2/3 "],
[" 3/0 ", undef, undef, " 3/3 "],
[" 4/0 ", undef, undef, " 4/3 "],
[" 5/0 colspan=2 ", undef, " 5/2 ", " 5/3 "],
)