in reply to Re: HTML::TreeBuilder:: identifing xpath-expression - first attempt
in thread HTML::TreeBuilder:: identifing xpath-expression - first attempt
#!/usr/bin/perl -- use strict; use warnings; use HTML::TreeBuilder::XPath; #~ $XML::XPathEngine::DEBUG = 1; my $tree = HTML::TreeBuilder::XPath->new; $tree->parse_content(<<'__HTML__'); <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http:// +www.w3.org/TR/html4/loose.dtd"><html><head><meta name="generator" con +tent="DigiOnline GmbH - WebWeaver 3.4 CMS - http://www.webweaver.de"> +<title>educa.ch</title><meta http-equiv="Content-Type" content="text/ +html; charset=iso-8859-1"><link rel="stylesheet" href="101.htm"><scri +pt src="102.htm"></script><script language="JavaScript"><!-- var did='d79376'; var root=new Array('d200','d205','d73137','d1566','d79376','d'); var usefocus = 1; function check() { if ((self.focus) && (usefocus)) { self.focus(); } } // --></script></head><body bgcolor="#FFFFFF" leftmargin="0" topmargin +="0" marginwidth="0" marginheight="0" onload="check();"><table cellsp +acing="0" cellpadding="0" border="0" width="100%"><tr><td width="15" +class="popuphead"><img src="/0.gif" alt="" width="15" height="16"></t +d><td width="99%" class="popuphead">Adresse - Schulen in der Schweiz< +/td><td width="20" class="popuphead" valign="middle"><a href="#" titl +e="Print" onclick="window.print(); return false;"><img src="../pics/p +rint16x13.gif" alt="Drucken" width="16" height="13"></a></td><td widt +h="20" class="popuphead" valign="middle"><a href="#" title="close" on +click="window.close(); return false;"><img src="../pics/close21x13.gi +f" alt="Schliessen" width="21" height="13"></a></td></tr> <tr bgcolor="#B2B2B2"><td colspan="4"><img src="/0.gif" alt="" width=" +1" height="1"></td></tr></table><div class="leerzeile"> </div><d +iv class="leerzeile"><img src="/0.gif" alt="" width="15"height="8">Al +tes Schulhaus Ossingen </div><div class="leerzeile"> </div><d +iv><img src="/0.gif" alt="" width="15" height="8">Guntibachstrasse 10 +</div><div><img src="/0.gif" alt="" width="15" height="8"></div><div> +<img src="/0.gif" alt="" width="15" height="8">8475  Ossingen</d +iv><div class="leerzeile"> </div><div><img src="/0.gif" alt="" w +idth="15" height="8"><a href="" target="_blank"></a></div><div><img s +rc="/0.gif" alt="" width="15" height="8"><a href="mailto: sekretariat +.psossingen@bluewin.ch">sekretariat.psossingen@bluewin.ch</a></div><d +iv class="leerzeile"> </div><div><img src="/0.gif" alt="" width= +"15" height="8">Tel:<img src="/0.gif" alt="" width="6" height="8">052 + 317 15 45 </div><div><img src="/0.gif" alt="" width="15" height="8"> +Fax:<img src="/0.gif" alt="" width="4" height="8">052 317 04 42 </div +><div> </div></body></html> __HTML__ # you can delete html/body for my $query ( qw! /html/body/div[2] /html/body/div[4] /html/body/div[6] /html/body/div[9] /html/body/div[11] /html/body/div[12] ! ) { print $query,"\n",$tree->findvalue($query),"\n\n"; } __END__ /html/body/div[2] Altes Schulhaus Ossingen /html/body/div[4] Guntibachstrasse 10 /html/body/div[6] 8475 Ossingen /html/body/div[9] sekretariat.psossingen@bluewin.ch /html/body/div[11] Tel:052 317 15 45 /html/body/div[12] Fax:052 317 04 42
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^3: HTML::TreeBuilder:: identifing xpath-expression - first attempt
by Perlbeginner1 (Scribe) on Oct 17, 2010 at 13:35 UTC | |
by Perlbeginner1 (Scribe) on Oct 17, 2010 at 17:10 UTC | |
|
Re^3: HTML::TreeBuilder:: identifing xpath-expression - first attempt
by Perlbeginner1 (Scribe) on Oct 17, 2010 at 17:29 UTC | |
by Anonymous Monk on Apr 02, 2011 at 15:11 UTC |