Moron has asked for the wisdom of the Perl Monks concerning the following question:
Before Christmas I wrote an XML parser for what I thought would be a one-off requirement. XML::Simple was unsuitable for resons I won't go into, but mainly because I need to run on version 5.005 until a rather overdue Perl upgrade project can be sorted out for current client. Only a week later yet another type of XML needed to be parsed by the same system, so it became time to build a module which both programs can share. The module has a temporary name of Xml until I come up with something better (XML::DeadSimple? XML::BackwardsCompatible?).
The one-off version had some coding inside the parser that was specific to its application. Having stripped that out to make a generic parser, I soon wanted to implement a callback capability. On first run, it crashed with "not a code reference" at line 153 (of module Xml.pm). But using the debugger, just before execution of line 153 (marked %%%line 153%%% for quick reference below), the debugger command x ref( $self -> { CALLBACK }{ ALL } ) produces the answer CODE as erm expected.
Anyone know what I am doing wrong? Could it be I need a different syntax to run the code stored in the object?
Here is the main program test.pl which tests the key function of the module "gettag" in Xml.pm which follows after that ... The test program worked fine without the DEPTH and CALLBACK features, but of course, this version of the program was made precisely to test the new features just implemented. The XML input is fed as STDIN to test.pl
#!/usr/bin/perl -w # use strict; use Data::Dumper; use Xml; my $ret; my $tst = Xml -> new ( \*STDIN ); $tst -> set( 'TOLERATE', 'BETWEEN', 'TAGS', '-', 1 ); # allow '-' betw +een tags a new requirement $tst -> set( 'DEPTH', 2 ); # return tags at depth 2 ignoring depth one + envelope to avoid huge hash $tst -> set( 'CALLBACK', 'ALL', sub { print Dumper @_; } ); # but now +we needed e.g. callbacks which have just been facilitated in the Xml +module do { $ret = $tst -> get( 'TAG' ); } while $ret;
#!/usr/bin/perl -w # # @(#) Xml.pm v1.0 - Xml methods compatible with 5.005 Perl # # (c) The Moron's real name, 2 January 2007 use strict; use Exporter; package Xml; sub new { my $class = shift; my $self = {}; if ( @_ ) { $self -> { FH } = shift; } $self -> { THIS }{ DEPTH } = 0; return bless $self; } sub set { my $self = shift; my $what = shift; my $value = shift; if ( @_ ) { unless ( defined( $self -> { $what } ) ) { $self -> { $what } = {}; } set( $self -> { $what }, $value, @_ ); return; } $self -> { $what } = $value; } sub get { my $self = shift; my $what = shift; if ( $what eq 'TAG' ) { return $self -> gettag(); } if ( @_ ) { if ( defined( $self -> { $what } ) ) { return get ( $self -> { what }, @_ ); } return undef(); } if ( defined( $self -> { $what } ) ) { return $self -> { $what }; } return undef(); } sub gettag { my $self = shift; my $fh = undef(); if ( defined( $self -> { FH } ) ) { $fh = $self -> { FH }; } my $pastNoise = 0; my ( $tag, $sts, $cnt ); # support spurious but defined characters between tags # e.g. the minuses in Mark-IT xml files if ( defined( $self -> { TOLERATE }{ BETWEEN }{ TAGS } ) ) { my $tref = $self -> { TOLERATE }{ BETWEEN }{ TAGS }; Throw( $fh, keys %$tref ); } do { # walk past comment tags e.g. <?version ... > Throw( $fh ); # walk past whitespace and "\n"s defined( $_ ) or return 0; # no more real tags but ok on forma +t /^\</ or XMLerror( 'Format' ); # some crap was found Step(); # step over one char ( $tag, $sts ) = AntiLex( $fh, '\W' ); # collect data until \ +W # and then walk there if ( $pastNoise = $tag ) { $self -> { THIS }{ DEPTH }++; } else { ( $cnt, $sts ) = AntiLex( $fh, '\>' ); /^\>/ or XMLerror( 'Comment Unclosed By > ' ); Step(); } } until ( $pastNoise ); Throw( $fh); my $assignments = {}; ASSMNT: for ( my $assco = 0; !/^\>/; $assco++ ) { my $kwd; ( $kwd, $sts ) = AntiLex( $fh, '\W', ); unless ( $kwd ) { # only valid way is no assignments ( $assco || !/^\>/ ) and XMLerror( 'Format' ); last ASSMNT; } Throw( $fh ); ( $cnt, $sts ) = AntiLex( $fh, '\=' ); ( $cnt || !$sts ) and XMLerror( 'Format' ); Step(); Throw( $fh ); my $val = ''; my $elt; ( $elt, $sts ) = AntiLex( $fh, '\"' ); Step(); $elt and XMLerror( 'Format' ); do { # quotes loop ( $cnt, $sts ) = AntiLex( $fh, '\"', '\\\"' ); $sts or XMLerror( 'Unclosed Quote' ); $val .= $cnt; length() or $_ = <$fh>; } until ( /^\"/ ); # i.e. include \" as part of string Step(); $assignments -> { $kwd } = $val; Throw( $fh ); length() or XMLerror( 'Unexpected EOF' ); } Step(); Throw( $fh ); my $simple = ''; unless( /^</ ) { # case of simple value for current tag ( $simple, $sts ) = AntiLex( $fh, '<' ); } my @subtags = (); # collect nested tags to current tag my $push = !defined( $self -> { DEPTH } ); $push ||= ( ( $self -> { DEPTH } ) <= ( $self -> { THIS }{ DEPTH } + ) ); while ( !$simple && /^\<(.)/ && ($1 ne '/' ) ) { my $subtag = $self -> gettag(); $push and push @subtags, $subtag; Throw( $fh ); } my $tref = { $tag => { ASSMNTS => $assignments, SUBTAGS => \@subtags } }; AntiLex( $fh, '<' ); if ( /^\<\/(\w+)\>(.*)/ ) { ( $1 eq $tag ) or XMLerror( 'Tag Nesting' ); $_ = $2; # walk past closing tag. $simple and $tref = { $tag => { ASSMNTS => $assignments, VALUE => $simple } }; if ( defined( $self -> { CALLBACK }{ ALL } ) ) { &$self -> { CALLBACK }{ ALL } -> ( $tref ); # %%%line 153% +%% } elsif ( defined $self -> { CALLBACK }{ TAG }{ $tag } ) { &$self -> { CALLBACK }{ TAG }{ $tag } ->( $tref ); } $self -> { THIS }{ DEPTH }--; return $tref; } XMLerror( "Format" ); } sub AntiLex { # - walk thru $_, reloading from optional fh if pres +ent, until # matching one of a list of regexps # - eats the returned content from $_ ready for # repeated calls to this routine by the calling parser # # to parse positively just give it negative regexps. # the purpose is to roll up a lexer and thrower into a trivial # piece of code. # - SYNOPSIS: ( $stat, $matched ) = AntiLex ( [fh], { pattern, ... + } ) my $fh = shift; # undef means simply: don't reload emptied $_ from + file my $contents = ''; while ( 1 ) { unless( defined() && length() ) { defined( $fh ) and $_ = <$fh>; $_ or return ( $contents, 0 ); chomp; } for my $pat ( @_ ) { ( /(^$pat)(.*)$/ ) and return ( $contents, 1 ); } /^(.)(.*)$/; $contents .= $1; $_ = $2; } } sub Throw { my $fh = shift; my $ret = AntiLex( $fh, '\S' ) ; ( @_ ) or return $ret; my %tol; while( @_ ) { $tol{ shift() } = 1; } while( defined( $tol{ substr( $_, 0, 1 ) } ) ) { Step(); $ret = Throw( $fh ); } return $ret; } sub Step { # like chopping $_ but from the LEFT of the string s/^(.)//; return $1; } sub XMLerror { my $reason = shift; my @ct = split( "\n" ); die "XML $reason Error: $ct[0]"; } 1; __END__
-M
Free your mind
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re: Code reference problem
by Corion (Patriarch) on Jan 04, 2007 at 15:03 UTC | |
by Moron (Curate) on Jan 04, 2007 at 15:08 UTC | |
by Moron (Curate) on Jan 04, 2007 at 15:57 UTC | |
by shmem (Chancellor) on Jan 04, 2007 at 16:19 UTC | |
by Moron (Curate) on Jan 04, 2007 at 16:34 UTC | |
by kwaping (Priest) on Jan 04, 2007 at 16:10 UTC |