comment on

0: =head1 NAME
1: 
2: XML::TokeParser::Simple - pretty much like HTML::TokeParser::Simple
3: 
4: =head1 DESCRIPTION
5: 
6: Get the benefits of L<XML::TokeParser::TOKEN|"What is XML::TokeParser::TOKEN">.
7: 
8: use XML::TokeParser::Simple and get the blessing ;)
9: 
10: Hopefully XML::TokeParser will add this in the next version.
11: 
12: 
13: =head1 SYNOPSIS
14: 
15:     # file: printComments.pl
16:     # desc: prints all the comments from an xml file
17:     use XML::TokeParser::Simple;
18:     my $p = new XML::TokeParser::Simple('file.xml');
19:     while(defined( my $t = $p->get_token )) {
20:         print $t->text,"\n" if $t->is_comment;
21:     }
22: 
23: See L<"What is XML::TokeParser::TOKEN">
24: 
25: =cut
26:  
27: package XML::TokeParser::Simple;
28: use XML::TokeParser();
29: use vars qw/ @ISA $VERSION /;
30: $VERSION = '0.01';
31: @ISA = qw/ XML::TokeParser /;
32: 
33: sub get_token {
34: 	my $self = shift;
35: 	my $token = $self->SUPER::get_token( @_ );
36: 	return unless defined $token;
37: 	bless $token, 'XML::TokeParser::Token';
38: }
39: 
40: sub get_tag {
41: 	my $self = shift;
42: 	my $token = $self->SUPER::get_tag( @_ );
43: 	return unless defined $token;
44: 	bless $token, 'XML::TokeParser::Token';
45: }
46: 
47: package XML::TokeParser::Token;
48: use strict;
49: 
50: =head1 What is XML::TokeParser::TOKEN
51: 
52: A token is a blessed array reference,
53: that you acquire using $p->get_token,
54: and that might look like:
55: 
56:     ["S",  $tag, $attr, $attrseq, $raw]
57:     ["E",  $tag, $raw]
58:     ["T",  $text, $raw]
59:     ["C",  $text, $raw]
60:     ["PI", $target, $data, $raw]
61: 
62: If you don't like remembering array indices,
63: you may access the attributes of a token like:
64: 
65:     $p->get_token->tag,  $t->attr, $t->attrseg, $t->raw ...
66: 
67: =head2 Methods
68: 
69: Tokens  may be inspected using any of these is_* methods
70: 
71:     is_text
72:     is_comment
73:     is_pi
74:     is_process_instruction
75:     is_start_tag
76:     is_end_tag
77:     is_tag
78: 
79: like:
80: 
81:     print $t->target if $t->is_pi;
82:     print "The comment says ", $t->text if $t->is_comment;
83: 
84: =cut
85: 
86: # for PI
87: sub target  { return $_[0]->[1] if $_[0]->is_pi; }
88: sub data    { return $_[0]->[2] if $_[0]->is_pi; }
89: sub raw     { return $_[0]->[-1]; }
90: 
91: #for S
92: sub attr    { return $_[0]->[2] if $_[0]->is_start_tag(); }
93: sub attrseq { return $_[0]->[3] if $_[0]->is_start_tag(); }
94: 
95: #for S|E
96: sub tag     { return $_[0]->[1] if $_[0]->is_tag; }
97: 
98: #for C|T
99: sub text    { return $_[0]->[1] if $_[0]->is_text or $_[0]->is_comment; }
100: 
101: # test your token
102: sub is_text                { return 1 if $_[0]->[0] eq 'T'; }
103: sub is_comment             { return 1 if $_[0]->[0] eq 'C'; }
104: sub is_pi                  { return 1 if $_[0]->[0] eq 'PI'; }
105: sub is_process_instruction { goto &is_pi; }
106: sub is_start_tag           { return $_[0]->_is( S => $_[1] ); }
107: sub is_end_tag             { return $_[0]->_is( E => $_[1] ); }
108: sub is_tag                 { return $_[0]->_is( S => $_[1] )
109:                                  || $_[0]->_is( E => $_[1] ); }
110: 
111: sub _is {
112:     if($_[0]->[0] eq $_[1]){
113:         if(defined $_[2]){
114:             return 1 if $_[0]->[1] eq $_[2];
115:         }else{
116:             return 1;
117:         }
118:     }
119:     return 0;
120: }
121: 
122: 1;
123: 
124: =head1 DEMO
125: 
126: execute this file as if it were a script, as in C<perl WhateverYouSavedItAs>,
127: and you'll see how/that this module works.
128: 
129: =cut
130: 
131: package main;
132: 
133: unless(caller()){  
134:     use Data::Dumper;
135:     my $file = 'REC-xml-19980210.xml';
136:        $file = \ q[<p>
137: <scrap lang='ebnf' id='document'>
138: <head>Document</head>
139: <prod id='NT-document'><lhs>document</lhs>
140: <rhs><nt def='NT-prolog'>prolog</nt> 
141: <nt def='NT-element'>element</nt> 
142: <nt def='NT-Misc'>Misc</nt>*</rhs></prod>
143: </scrap>
144: </p>];
145: ## Cause chances are you won't have
146: ## http://www.w3.org/TR/1998/REC-xml-19980210.xml
147: ## as referenced in 
148: ## http://www.xmltwig.com/article/ways_to_rome/ways_to_rome.html
149: ## in the current directory
150: 
151: 
152: 
153:     my $i = 0;
154:     my $p = XML::TokeParser::Simple->new($file);
155: 
156:     my $Ret = "";
157: 
158:     while(defined(my $t = $p->get_token() )){
159:     
160:         if( $t->is_start_tag('lhs') ){
161:             $i++;
162:             $Ret = join '', "[$i] ", $p->get_text('/lhs'), " ::= ";
163:         }elsif( $t->is_start_tag('rhs') ){
164:             $Ret .= $p->get_text('/rhs');
165:         }elsif( $t->is_end_tag('prod') ){
166:             print clean($Ret),"\n";
167:             $Ret = "";
168:         }
169:     }
170:     
171:     undef $Ret;
172:     undef $p;
173:     
174:     ## mirod already did this, so I'm borrowing
175:     
176:     sub prod {
177:         my( $twig, $prod)= @_;
178:         my $lhs= $prod->field( 'lhs');
179:         my $rhs= join '', map {$_->text} $prod->children( 'rhs');
180:     
181:         $i++;
182:         my $prod_text = "[$i] $lhs ::= $rhs";
183:         print clean( $prod_text) . "\n";
184:     }
185:     
186:     
187:     sub clean { 
188:             my( $string)= @_;
189:             $string =~ s/\xc2\xa0/ /sg;
190:             $string =~ s/\s+/ /g; $string=~ s{\s$}{}g;
191:             return $string;
192:     }
193: }
194: 
195: 1;
196: 
197: =head1 SEE ALSO
198: 
199: L<XML::TokeParser>, L<HTML::TokeParser>, L<HTML::TokeParser::Simple>,
200: L<XML::Twig>
201: 
202: =head1 AUTHOR
203: 
204: D.H. <PodMaster@cpan.org>
205: 
206: =head1 LICENSE
207: 
208: copyright (c) D.H. 2002 All rights reserved.
209: 
210: This program is released under the same terms as perl itself.
211: If you don't know what that means, visit http://perl.com
212: or execute "perl -v" at a commandline (assuming you have perl installed).
213: 
214: =cut

In reply to XML::TokeParser::Simple - pretty much like HTML::TokeParser::Simple by PodMaster

Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!

Titles consisting of a single word are discouraged, and in most cases are disallowed outright.

Read Where should I post X? if you're not absolutely sure you're posting in the right place.

Please read these before you post! —

Posts may use any of the Perl Monks Approved HTML tags:

a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr

You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)

	For:		Use:
	&		`&`
	<		`<`
	>		`>`
	[		`[`
	]		`]`

Link using PerlMonks shortcuts! What shortcuts can I use for linking?

See Writeup Formatting Tips and other pages linked from there for more info.