in reply to Extracting information from multiple files in a directory
Is this valid XHTML? If so, you're in luck -- (untested) SAX to the rescue:
#!/usr/bin/perl -w use strict; use XML::Parser; use File::Find; my $root_dir = 'c:/test1'; my $p = XML::Parser->new( Handlers => { Start => \&handle_elem_start, End => \&handle_elem_end, Char => \&handle_char_data, }); find(\&wanted, $root_dir); sub wanted { next unless /(LOG[^\n]*)|(REC[^\n]*)\.xml?/i; $p->parse( read_file( $File::Find::name ) ); } sub read_file { my $filename = shift; local (*IN, $/); open( IN, $filename ) or die "Cannot open $filename: $!\n"; return scalar <IN>; } my $in_notes; sub handle_elem_start { my ($p, $name) = @_; return unless $name eq 'notes'; $notes++; } sub handle_elem_end { my ($p, $name) = @_; return unless $name eq 'notes'; $notes--; } sub handle_char_data { my ($p, $text) = @_; return unless $notes; $notes =~ tr/\r\n/ /; print $notes; }
It does kinda scare me that that felt natural.
|
|---|