#!/usr/bin/perl -w # use strict; use Data::Dumper; use Xml; my $ret; my $tst = Xml -> new ( \*STDIN ); $tst -> set( 'TOLERATE', 'BETWEEN', 'TAGS', '-', 1 ); # allow '-' between tags a new requirement $tst -> set( 'DEPTH', 2 ); # return tags at depth 2 ignoring depth one envelope to avoid huge hash $tst -> set( 'CALLBACK', 'ALL', sub { print Dumper @_; } ); # but now we needed e.g. callbacks which have just been facilitated in the Xml module do { $ret = $tst -> get( 'TAG' ); } while $ret; #### #!/usr/bin/perl -w # # @(#) Xml.pm v1.0 - Xml methods compatible with 5.005 Perl # # (c) The Moron's real name, 2 January 2007 use strict; use Exporter; package Xml; sub new { my $class = shift; my $self = {}; if ( @_ ) { $self -> { FH } = shift; } $self -> { THIS }{ DEPTH } = 0; return bless $self; } sub set { my $self = shift; my $what = shift; my $value = shift; if ( @_ ) { unless ( defined( $self -> { $what } ) ) { $self -> { $what } = {}; } set( $self -> { $what }, $value, @_ ); return; } $self -> { $what } = $value; } sub get { my $self = shift; my $what = shift; if ( $what eq 'TAG' ) { return $self -> gettag(); } if ( @_ ) { if ( defined( $self -> { $what } ) ) { return get ( $self -> { what }, @_ ); } return undef(); } if ( defined( $self -> { $what } ) ) { return $self -> { $what }; } return undef(); } sub gettag { my $self = shift; my $fh = undef(); if ( defined( $self -> { FH } ) ) { $fh = $self -> { FH }; } my $pastNoise = 0; my ( $tag, $sts, $cnt ); # support spurious but defined characters between tags # e.g. the minuses in Mark-IT xml files if ( defined( $self -> { TOLERATE }{ BETWEEN }{ TAGS } ) ) { my $tref = $self -> { TOLERATE }{ BETWEEN }{ TAGS }; Throw( $fh, keys %$tref ); } do { # walk past comment tags e.g. Throw( $fh ); # walk past whitespace and "\n"s defined( $_ ) or return 0; # no more real tags but ok on format /^\ { THIS }{ DEPTH }++; } else { ( $cnt, $sts ) = AntiLex( $fh, '\>' ); /^\>/ or XMLerror( 'Comment Unclosed By > ' ); Step(); } } until ( $pastNoise ); Throw( $fh); my $assignments = {}; ASSMNT: for ( my $assco = 0; !/^\>/; $assco++ ) { my $kwd; ( $kwd, $sts ) = AntiLex( $fh, '\W', ); unless ( $kwd ) { # only valid way is no assignments ( $assco || !/^\>/ ) and XMLerror( 'Format' ); last ASSMNT; } Throw( $fh ); ( $cnt, $sts ) = AntiLex( $fh, '\=' ); ( $cnt || !$sts ) and XMLerror( 'Format' ); Step(); Throw( $fh ); my $val = ''; my $elt; ( $elt, $sts ) = AntiLex( $fh, '\"' ); Step(); $elt and XMLerror( 'Format' ); do { # quotes loop ( $cnt, $sts ) = AntiLex( $fh, '\"', '\\\"' ); $sts or XMLerror( 'Unclosed Quote' ); $val .= $cnt; length() or $_ = <$fh>; } until ( /^\"/ ); # i.e. include \" as part of string Step(); $assignments -> { $kwd } = $val; Throw( $fh ); length() or XMLerror( 'Unexpected EOF' ); } Step(); Throw( $fh ); my $simple = ''; unless( /^ { DEPTH } ); $push ||= ( ( $self -> { DEPTH } ) <= ( $self -> { THIS }{ DEPTH } ) ); while ( !$simple && /^\<(.)/ && ($1 ne '/' ) ) { my $subtag = $self -> gettag(); $push and push @subtags, $subtag; Throw( $fh ); } my $tref = { $tag => { ASSMNTS => $assignments, SUBTAGS => \@subtags } }; AntiLex( $fh, '<' ); if ( /^\<\/(\w+)\>(.*)/ ) { ( $1 eq $tag ) or XMLerror( 'Tag Nesting' ); $_ = $2; # walk past closing tag. $simple and $tref = { $tag => { ASSMNTS => $assignments, VALUE => $simple } }; if ( defined( $self -> { CALLBACK }{ ALL } ) ) { &$self -> { CALLBACK }{ ALL } -> ( $tref ); # %%%line 153%%% } elsif ( defined $self -> { CALLBACK }{ TAG }{ $tag } ) { &$self -> { CALLBACK }{ TAG }{ $tag } ->( $tref ); } $self -> { THIS }{ DEPTH }--; return $tref; } XMLerror( "Format" ); } sub AntiLex { # - walk thru $_, reloading from optional fh if present, until # matching one of a list of regexps # - eats the returned content from $_ ready for # repeated calls to this routine by the calling parser # # to parse positively just give it negative regexps. # the purpose is to roll up a lexer and thrower into a trivial # piece of code. # - SYNOPSIS: ( $stat, $matched ) = AntiLex ( [fh], { pattern, ... } ) my $fh = shift; # undef means simply: don't reload emptied $_ from file my $contents = ''; while ( 1 ) { unless( defined() && length() ) { defined( $fh ) and $_ = <$fh>; $_ or return ( $contents, 0 ); chomp; } for my $pat ( @_ ) { ( /(^$pat)(.*)$/ ) and return ( $contents, 1 ); } /^(.)(.*)$/; $contents .= $1; $_ = $2; } } sub Throw { my $fh = shift; my $ret = AntiLex( $fh, '\S' ) ; ( @_ ) or return $ret; my %tol; while( @_ ) { $tol{ shift() } = 1; } while( defined( $tol{ substr( $_, 0, 1 ) } ) ) { Step(); $ret = Throw( $fh ); } return $ret; } sub Step { # like chopping $_ but from the LEFT of the string s/^(.)//; return $1; } sub XMLerror { my $reason = shift; my @ct = split( "\n" ); die "XML $reason Error: $ct[0]"; } 1; __END__