Another issue is that you are using commas to separate data with embedded commas. This will make it difficult to parse. Here is a revised version with some validation code and a format method. The data is placed into an array or array refs (2D array) which lets the format method join the elements with any char (I suggest tabs "\t" are better than commas in most cases) and quote the strings. If you wanted only as required quoting you could map like map{ s/"/\\"/; m/\Q$sep\E/ ? qq!"$_"! : $_ }
#!/usr/bin/perl -w
use strict;
local $/ = '';
my $date = qr/\d\d\.\d\d.\d\d\d\d/;
my $fee = qr/\d\d\d\d\d\.\d\d/;
my $one = qr/
^
\d\d #record10 = integer (length 2) (10).
(\d{5}) #primary key = integer (length 5).(00001)
(\d{3}) #sub item number = integer (length 3).(225)
($date) #start date = dd.mm.yyyy.(01.11.1996)
($date) #end date = dd.mm.yyyy.(00.00.0000)
(\w{3}) #category = alphanumeric (length 3).(BVF)
(\w{3}) #group = alphanumeric (length 3).(AAA)
(\w{3}) #subgroup = alphanumeric (length 3).(A65)
([SN]) #item type = text (options: S or N).(S)
([ND]) #fee type = text (options: N or D).(N)
(\w{3}) #provider type = alphanumeric (length 3).(009)
([YN]) #new item = text (options: Y or N).(Y)
([YN]) #item change = text (options: Y or N).(N)
([YN]) #procedure change = text (options: Y or N).(Y)
([YN]) #description change = text (options: Y or N).(N)
([YN]) #fee change = text (options: Y or N).(Y)
$
/x;
my $two = qr/
^
\d\d #record20 = integer (length 2).(20)
($date) #start date = dd.mm.yyyy.(01.11.1996)
($fee) #fee = decimal (nnnnn.nn).(00098.05)
($fee) #benefit1 = decimal (nnnnn.nn).(00073.55)
($fee) #benefit2 = decimal (nnnnn.nn).(00083.35)
$
/x;
my $gen = qr/
^
\d\d
($date)
(.*)
$
/x;
while(my $rec = <DATA>) {
my $pkey,
my @out = ();
for my $line( split /[\n\r]/, $rec ) {
my $type = substr $line,0,2;
if ( $type == 10 ) {
my @data = $line =~ m/$one/;
die error( $line, $rec, $one ) unless @data;
$pkey = $data[0];
$out[$type] = [$pkey, $type, @data];
}
elsif ( $type == 20 ) {
my @data = $line =~ m/$two/;
die error( $line, $rec, $two ) unless @data;
$out[$type] = [$pkey, $type, @data];
}
elsif ( $line =~ m/$gen/ ) {
my @data = ( $1, $2 );
if ( defined $out[$type] ) {
$out[$type][-1] .= ' ' . $2; # continuing string
}
else {
$out[$type] = [$pkey, $type, @data];
}
}
else {
die error( $line, $rec, $gen );
}
}
format_output( [@out[10,20,30,40,50]], ",", 'quote' );
}
sub format_output {
my ($aryref, $sep, $quote ) = @_;
for my $ref(@$aryref) {
@$ref = map{ s/"/\\"/; qq!"$_"! } @$ref if $quote;
print join $sep, @$ref;
print "\n";
}
}
sub error {
my ( $line, $rec, $re ) = @_;
return "Invalid Line:\n$line\n\nRE:\n$re\n\nRecord:\n$rec\n";
}
__DATA__
100000122501.11.199600.00.0000BVFAAAA65SN009YNYNY
2001.11.199600098.0500073.5500083.35
3014.11.1996This derived fee is for professional attendances for GP an
+d Specialist.
4023.12.1996(Anaes.)
5001.11.1997Professional attendance being an attendance at
5001.11.1997other than consulting rooms, by a general
5001.11.1997practitioner on not more than 1 patient.
100000222601.11.199600.00.0000BDGAABA66SN010YNYNY
2001.11.199600098.0500073.5500083.35
3014.11.1996This derived fee is for professional attendances by GP onl
+y.
4023.12.1996(Anaes.)
5001.11.1997Professional attendance being an attendance at
5001.11.1997other than consulting rooms, by a general
5001.11.1997practitioner only on not more than 1 patient.
|