=head1 NAME

XML::TokeParser::Simple - pretty much like HTML::TokeParser::Simple

=head1 DESCRIPTION

Get the benefits of L<XML::TokeParser::TOKEN|"What is XML::TokeParser::TOKEN">.

use XML::TokeParser::Simple and get the blessing ;)

Hopefully XML::TokeParser will add this in the next version.


=head1 SYNOPSIS

    # file: printComments.pl
    # desc: prints all the comments from an xml file
    use XML::TokeParser::Simple;
    my $p = new XML::TokeParser::Simple('file.xml');
    while(defined( my $t = $p->get_token )) {
        print $t->text,"\n" if $t->is_comment;
    }

See L<"What is XML::TokeParser::TOKEN">

=cut
 
package XML::TokeParser::Simple;
use XML::TokeParser();
use vars qw/ @ISA $VERSION /;
$VERSION = '0.01';
@ISA = qw/ XML::TokeParser /;

sub get_token {
	my $self = shift;
	my $token = $self->SUPER::get_token( @_ );
	return unless defined $token;
	bless $token, 'XML::TokeParser::Token';
}

sub get_tag {
	my $self = shift;
	my $token = $self->SUPER::get_tag( @_ );
	return unless defined $token;
	bless $token, 'XML::TokeParser::Token';
}

package XML::TokeParser::Token;
use strict;

=head1 What is XML::TokeParser::TOKEN

A token is a blessed array reference,
that you acquire using $p->get_token,
and that might look like:

    ["S",  $tag, $attr, $attrseq, $raw]
    ["E",  $tag, $raw]
    ["T",  $text, $raw]
    ["C",  $text, $raw]
    ["PI", $target, $data, $raw]

If you don't like remembering array indices,
you may access the attributes of a token like:

    $p->get_token->tag,  $t->attr, $t->attrseg, $t->raw ...

=head2 Methods

Tokens  may be inspected using any of these is_* methods

    is_text
    is_comment
    is_pi
    is_process_instruction
    is_start_tag
    is_end_tag
    is_tag

like:

    print $t->target if $t->is_pi;
    print "The comment says ", $t->text if $t->is_comment;

=cut

# for PI
sub target  { return $_[0]->[1] if $_[0]->is_pi; }
sub data    { return $_[0]->[2] if $_[0]->is_pi; }
sub raw     { return $_[0]->[-1]; }

#for S
sub attr    { return $_[0]->[2] if $_[0]->is_start_tag(); }
sub attrseq { return $_[0]->[3] if $_[0]->is_start_tag(); }

#for S|E
sub tag     { return $_[0]->[1] if $_[0]->is_tag; }

#for C|T
sub text    { return $_[0]->[1] if $_[0]->is_text or $_[0]->is_comment; }

# test your token
sub is_text                { return 1 if $_[0]->[0] eq 'T'; }
sub is_comment             { return 1 if $_[0]->[0] eq 'C'; }
sub is_pi                  { return 1 if $_[0]->[0] eq 'PI'; }
sub is_process_instruction { goto &is_pi; }
sub is_start_tag           { return $_[0]->_is( S => $_[1] ); }
sub is_end_tag             { return $_[0]->_is( E => $_[1] ); }
sub is_tag                 { return $_[0]->_is( S => $_[1] )
                                 || $_[0]->_is( E => $_[1] ); }

sub _is {
    if($_[0]->[0] eq $_[1]){
        if(defined $_[2]){
            return 1 if $_[0]->[1] eq $_[2];
        }else{
            return 1;
        }
    }
    return 0;
}

1;

=head1 DEMO

execute this file as if it were a script, as in C<perl WhateverYouSavedItAs>,
and you'll see how/that this module works.

=cut

package main;

unless(caller()){  
    use Data::Dumper;
    my $file = 'REC-xml-19980210.xml';
       $file = \ q[<p>
<scrap lang='ebnf' id='document'>
<head>Document</head>
<prod id='NT-document'><lhs>document</lhs>
<rhs><nt def='NT-prolog'>prolog</nt> 
<nt def='NT-element'>element</nt> 
<nt def='NT-Misc'>Misc</nt>*</rhs></prod>
</scrap>
</p>];
## Cause chances are you won't have
## http://www.w3.org/TR/1998/REC-xml-19980210.xml
## as referenced in 
## http://www.xmltwig.com/article/ways_to_rome/ways_to_rome.html
## in the current directory



    my $i = 0;
    my $p = XML::TokeParser::Simple->new($file);

    my $Ret = "";

    while(defined(my $t = $p->get_token() )){
    
        if( $t->is_start_tag('lhs') ){
            $i++;
            $Ret = join '', "[$i] ", $p->get_text('/lhs'), " ::= ";
        }elsif( $t->is_start_tag('rhs') ){
            $Ret .= $p->get_text('/rhs');
        }elsif( $t->is_end_tag('prod') ){
            print clean($Ret),"\n";
            $Ret = "";
        }
    }
    
    undef $Ret;
    undef $p;
    
    ## mirod already did this, so I'm borrowing
    
    sub prod {
        my( $twig, $prod)= @_;
        my $lhs= $prod->field( 'lhs');
        my $rhs= join '', map {$_->text} $prod->children( 'rhs');
    
        $i++;
        my $prod_text = "[$i] $lhs ::= $rhs";
        print clean( $prod_text) . "\n";
    }
    
    
    sub clean { 
            my( $string)= @_;
            $string =~ s/\xc2\xa0/ /sg;
            $string =~ s/\s+/ /g; $string=~ s{\s$}{}g;
            return $string;
    }
}

1;

=head1 SEE ALSO

L<XML::TokeParser>, L<HTML::TokeParser>, L<HTML::TokeParser::Simple>,
L<XML::Twig>

=head1 AUTHOR

D.H. <PodMaster@cpan.org>

=head1 LICENSE

copyright (c) D.H. 2002 All rights reserved.

This program is released under the same terms as perl itself.
If you don't know what that means, visit http://perl.com
or execute "perl -v" at a commandline (assuming you have perl installed).

=cut