in reply to Split file based on tag
package MyDataRecord; # Represents a record of MyData use Moose; use Moose::Util::TypeConstraints; # -DateParse enables additional Date formats use Class::Date qw(-DateParse); # load carp for better warnings use Carp qw(cluck confess); # create a subtype for Dates subtype 'MyDate' => as class_type('Class::Date'); # coerce string input, depending on the format coerce 'MyDate' => from 'Str' => via { /^\d{6}$/ ? Class::Date->new([unpack "A4A2A2","20$_"]) : Class::Date->new($_) }; # data-members has it => ( isa => 'Str', is => 'rw', required => 1 ); has edition => ( isa => 'Str', is => 'rw', required => 1 ); has tag => ( isa => 'Str', is => 'rw', required => 1 ); has body => ( isa => 'Str', is => 'rw', required => 1 ); has date => ( isa => 'MyDate', is => 'rw', required => 1, coerce => + 1 ); has tdate => ( isa => 'MyDate', is => 'rw', required => 1, coerce => + 1 ); # check date integrity on construction sub BUILD { my ( $self, $params ) = @_; cluck "date and tdate should be identical\n" unless $self->tdate->ymd eq $self->date->ymd; } 1;
package MyDataParser; # parses a file or handle of MyData use Moose; use Moose::Util::TypeConstraints; use IO::File; subtype 'MyFile' => as class_type('IO::File'); coerce 'MyFile' => from 'Str' => via { IO::File->new($_) }; has file => ( isa => 'MyFile', is => 'rw', coerce => 1 ); sub parse { my $self = shift; my $file = $self->file; my $re_tag = qr/ ^ \{ ([A-Z]+) \} $ /x; my @records = (); my %record; my $tag; # iterate the file while ( <$file> ) { # We found a tag if ( /$re_tag/ ) { # save the new tag name because we use the value often my $ntag = lc $1; unless ( keys %record ) { # create the first record (only happens on the first l +ine) %record = ( $ntag => '' ); } else { # otherwise cut off the last newline of input chomp $record{$tag} if $tag; # initialize an empty value for the tag $record{$ntag} = ''; # we found a new record if ( $ntag eq 'it' ) { # save the data as a record object push @records, MyDataRecord->new(%record); } } # remember current tag for the next iterations $tag = $ntag; next; } # not a tag, so add the data to the current tags value $record{$tag} .= $_; } # save the last record, since there is no final {IT} push @records, MyDataRecord->new(%record); return \@records; }
package main; use warnings; use strict; use Template; use MyDataParser; # get the filename from the console my $file = shift @ARGV || die "Error. You must specify a filename.\n"; # get the data from our file parser print "parsing $file\n"; my $records = MyDataParser->new( file => $file )->parse; # template for XML output # use Template::Plugin::XML::Escape to ensure valid XML my $ixemel = q[ <?xml version="1.0" encoding="UTF-8"?> [% USE XML.Escape %] <record> <it>[% record.it %]</it> <date>[% record.date.strftime('%y%m%d') %]</date> <tdate>[% record.tdate.strftime('%A, %B %d, %Y') %]</tdate> <edition>[% record.edition %]</edition> <tag>[% record.tag %]</tag> <body>[% record.body | xml_escape %]</body> <record> ]; # iterate the records for my $record ( @$records ) { # create a template object my $tt = Template->new(); # and process it( process->(template, data, filename) ); print "writing ", $record->tag, "\n"; die $tt->error(), "\n" unless $tt->process( \$ixemel, {record=>$record}, $record->tag + ); } print "done\n";
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^2: Split file based on tag
by Anonymous Monk on Nov 28, 2009 at 03:29 UTC | |
by holli (Abbot) on Nov 28, 2009 at 13:02 UTC |