Beefy Boxes and Bandwidth Generously Provided by pair Networks
good chemistry is complicated,
and a little bit messy -LW
 
PerlMonks  

Re: Convert html table to text

by Anonymous Monk
on Sep 18, 2008 at 13:47 UTC ( [id://712277]=note: print w/replies, xml ) Need Help??


in reply to Convert html table to text

Is it possible to limit the overall width of the table, i.e. 120 characters wide?

Replies are listed 'Best First'.
Re^2: Convert html table to text
by Anonymous Monk on Sep 19, 2008 at 08:22 UTC
    Not really, this is the best I could come up with
    #!/usr/bin/perl -- use strict; use warnings; use HTML::TreeBuilder; use Text::ASCIITable; use List::Util qw(max); my $html = q~ <table border=1> <tr><th>header1</th><th>header2</th></tr> <tr> <Td> tr 1, td 1 </td> <td> tr 1, td 2</td> </tr> <tr> <td> tr 3, td 1 <table border=0><tr><td>00</td></tr><tr><td>0</td></tr +><tr><td> Yes, a Lone Table Cell</td></tr></table> </td> <td> tr2 td2 </td> </tr> </table>~; my $t = HTML::TreeBuilder->new(); $t->parse($html); $t->eof; print DumpTable( $_ ), $/, $/ for $t->find_by_tag_name('table') ; sub DumpTable { my($ht, $depth) = (@_,0); # warn "$ht depth $depth"; die "$ht is not a table" unless $ht->tag eq 'table'; my $tt = Text::ASCIITable::->new; my @co; my @da; my $da = []; for my $ro ( @{ $ht->content() } ) { if( $ro->tag eq 'tr' ) { push @da, $da if @$da; $da = []; for my $ce ( @{ $ro->content() } ) { if( $ce->tag eq 'td' ) { if( $ce->look_down( '_tag', 'table' ) ) { my $string = ''; for my $i ( @{ $ce->content() } ) { if( not ref $i ) { $string .= $i; } elsif( $i->tag eq 'table' ) { $string .= "\n"; $string .= DumpTable($i,$depth+1); $string .= "\n"; } else { $string .= $i->as_text; } } push @$da, $string; } else { push @$da, $ce->as_text; } } elsif( $ce->tag eq 'th' ) { push @co, $ce->as_text; } } } } push @da, $da if @$da; unless(@co) { use List::Util qw(max); my $max = 1 + max( 0, map { $#$_ } @da ); ( @co ) = (' ') x $max ; $tt->setOptions( hide_HeadRow => 1 ); $tt->setOptions( hide_HeadLine => 1 ); } # warn " co ", map {"{$_}"} @co; $tt->setCols( @co ); if( $depth == 0 ){ my $maxcolwidth = 120; $maxcolwidth -= 1 for split //, '.==.'; # bad guesstimation $maxcolwidth -= 2 for @co; # '| ' and '| ' $tt->setColWidth( $_, $maxcolwidth/(0+@co), 1) for @co; } $tt->addRow($_) for @da; $tt->setOptions( 'drawRowLine', 1) if $ht->attr('border'); # return $tt->draw(); return $tt->draw( [ '.=', '=.', '-', '-' ], # .=-----------=. [ '|', '|', '|' ], # | info | info | [ '|-', '-|', '=', '=' ], # |-===========-| [ '|', '|', '|' ], # | info | info | [ "'=", "='", '-', '-' ], # '=-----------=' [ '|=', '=|', '-', '*' ] # rowseperator ); } __END__

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: note [id://712277]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others wandering the Monastery: (5)
As of 2024-04-19 22:08 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found