#!/usr/bin/perl -w use strict; my $file = ''; $file = shift @ARGV unless $file; unless ($file) { print "Please input a filename: "; chomp ($file = <>); } my %change = ( HEAL => 'NOTE Medical:', HIST => 'NOTE Biography:', EDUC => 'NOTE Educated:', RESI => 'NOTE Resided:', OCCU => 'NOTE Occupation:'); open (FILE, "<$file") or die "Unable to open $file: $!"; my @data = ; close FILE; my $change_this = join "|", keys %change; $change_this = qr/($change_this)/; my $found_sour = 0; my $last_line = ''; for (@data) { # make the IBM DOS to ASCII substitution s/1 CHAR IBM DOS/O CHAR ASCII/; s/$change_this/$change{$1}/g; $found_sour = 0 if m/^0/; $found_sour = 1 if m/^0.+SOUR\s*$/; if ($found_sour == 1) { $found_sour++ if s/1 NAME/1 TITL/; } elsif ($found_sour) { s/1 NAME/2 CONT/; } # fix the name problem if ($last_line =~ m/\bINDI\s*$/) { if (m/1\s*NAME/) { s|([\w.]+)|($1 eq "NAME")? $1 :ucfirst lc $1|eg; # the MacDonald fix, the \b (boundary) condition # and the quote check should limit appropriately # Names like Mace and Mack are treated as special # cases as this is the only way to deal with them s # substitute regex / # opening delimiter for regex (?; unless ($input =~ m/y/i) { print "Aborting!\n"; exit; } } open (FILE, ">$file") or die "Unable to create $file: $!"; print FILE @data; close FILE; print "Data written to $file OK!"; ##

##

# here it is with comments (not updated from original draft)
# there are two inconsistencies in what you ask for both
# relating to the substitution of 1 NAME for 1 TEXT or is
# it 2 TEXT or is it not wanted at all! In the spec you 
# specify after 0 @nnn@ SOUR that the first 1 NAME goes to
# 1 TITL - this is consistent with sample. For the next
# 1 NAME you suggest both 1 TEXT and 2 TEXT in different 
# examples. Unfortunately there is no n TEXT in the sample
# output. you currently get 1 TEXT with instruction of how
# to change this to 2 TEXT of 2 CONT as desired.

#!/usr/bin/perl -w

# note this code is only 30 or so lines long, all the 
# rest is comments for your benefit, like this.
# lines that start with a # char are comments
# like these lines, they are use to explain the code

# the -w on the top 'shebang' line makes perl issue 
# warnings about probable errors in the code. on unix
# the shebang line specifies the path to the perl
# interpretter. On mac and windows it does not but
# the -w is still looked for and acted on so we use it.

# this line forces us to declare variables with the 'my'
# declaration you see used below. amongst other things
# it forces good coding practices or Perl complains
use strict;

# we declare a scalar variable called $file to hold the 
# name of the file we will munge (process)
# the $ char lets Perl know we want a scalar variable to
# hold a single value
# we can set it to the name of a file (hard code the name)
# here but for flexibility we don't we just set it to
# a null string '' which along with the values 0 and 
# undef (undefined) equate to false in Perl. if a variable
# holds one of these three values it is false otherwise it
# is true
my $file = '';

# we now set file to any command line argument which
# we shift out of the @ARGV array that contains the 
# command line arguments (if if any) which are passed
# to the script when you call it like this:
# perl myscript.pl  myfile_to_process
# the unless $file bit means that we do this
# unless $file holds a true value (set above)
$file = shift @ARGV unless $file;

# here we use an unless again. unless $file holds a true
# value (from either of the options above) we execute the
# block of code between the curly braces { ... }
unless ($file) {
    # this line prints a prompt
    print "Please input a filename: ";
    # the <> is the input operator, used with nothing
    # between the < and the > it waits for input on 
    # STDIN - the standard input ie the keyboard
    # we stick this input into $file and then chomp
    # off the newline that occurs when you hit enter
    chomp ($file = <>);
}

# we now declare an associative array (also called a hash).
# the leading % char lets Perl know we want it to generate 
# a hash data structure. Hashes are very convenient as they
# allow us to access a data value via a key
# for example if we said print $change{'HIST'} perl looks
# in the %change hash for the 'HIST' key and returns the 
# value 'Biography:' - without the quotes
# you can probably see where this is going - we are going 
# to look for the keys and replace them with the values
my %change = (
    HIST => 'NOTE Biography:', 
    EDUC => 'NOTE Educated:',
    RESI => 'NOTE Resided:',
    OCCU => 'NOTE Occupation:');

# here we open our file onto a filehandle called FILE
# if this fails we die printing an error message
# the $! special variable contains the error
open (FILE, "<$file") or die "Unable to open $file: $!";
# here we read all the data in the file into an array
# data structure called @data. the leading @ tells Perl
# this is an array. you see we use the input operator 
# again but this time aimed at our file handle FILE
# You have now met all the major Perl
# data structures:
# $ -> scalars which hold one value
# % -> associative arrays that hold key value pairs
# @ -> arrays that hold lists of values (lots of scalars)
my @data = ;
# this is self explanatory - we close the file handle
# that we opened above with the close FILE;
close FILE;

# this next line is a little complex. first we declare
# a new scalar var called $change_this. we then assign
# the result of the join statement to this var.
# what the join does it get all the keys from our
# % change hash and join them together with the '|' char
# thus the net result is that:
# $change_this = 'HIST|EDUC|RESI|OCCU';
my $change_this = join "|", keys %change;
# this bit changes $change_this into a compiled regular 
# expression that will match any one of HIST...
# the brackets () also call parentheses/parenths are
# inportant as when we match on of these four keys the
# match will be stored in the magical perl variable $1
$change_this = qr/($change_this)/;

# because 'SOUR' occurs in several contexts we define
# a scalar variable $found_sour and set it to false.
# we will use this as a flag for when we find a 0 @nn@ SOUR
my $found_sour = 0;

# the for (@data) iterates (loops)  over the list of values
# in the @data array. as a result each line of the source 
# file will be set to the magical $_ variable in turn.
# $_ is magical in a number of ways. first if we change it
# within this loop we will change the value stored in @data
# second it is the default argument for regexes so if we
# write m/^0/ what we are saying is match a '0' char at 
# the begining of the string in $_ (the ^ char specifies 
# the at the begining bit. similarly the s/this/that/ 
# rexex will substitute the string 'this' for the string
# 'that' in $_
for (@data) {
     # this line does the search and replace substitutuion 
     # for HIST.... as we capture the bit we matched in $1
     # and this will be a key to our %change hash the 
     # replacement will be the value of that key-value pair
     # the /g specifies do all cases in $_ without it only
     # the first match will get substituted - we don't 
     # actually need this but I put it in anyway!
     s/$change_this/$change{$1}/g;
     # by observation a leading '0' specifies the begining
     # of a section in this file format so if we m/^0/ -
     # match a '0' at the begining of our line we reset 
     # our $found_sour flag
     $found_sour = 0 if m/^0/;
     # here we set the found sour flag to true (1) if 
     # we find a line that starts in 0 and ends in SOUR
     # the ^ says at the begining, the 0 is a literal '0'
     # the . matches anything and the + makes it match 1 or
     # more charachters of anything, the SOUR is a literal
     # 'SOUR', the \s* allows 0 or more trailing spaces,
     # and the $ pecifies the end of the string. phew
     $found_sour = 1 if m/^0.+SOUR\s*$/;
     # after the first SOUR we do this as $fouund_sour == 1
     if ($found_sour == 1) {
         # we increment our flag count to 2 if we 
         # succeed in substituting the literals as shown
         $found_sour++ if s/1 NAME/1 TITL/;
     }
     # same but this time second instance, this is in your
     # specification but not your sample output. if it is
     # wrong comment these three lines out by adding a #
     # character at the begining of the line like this:
     # elsif ($found_sour == 2) {
     #     $found_sour++ if s/1 NAME/1 TEXT/;
     # }
     # the code above is a demo of how to kill this code
     # by commenting it out. you can change the 1 TEXT to
     # 2 TEXT depending on which you want - if either.
     elsif ($found_sour == 2) {
         $found_sour++ if s/1 NAME/1 TEXT/;
     }
     # this substitution will occur for further insances 
     # of '1 NAME' as $found_sour will be true (it will be 3)
     elsif ($found_sour) {
         s/1 NAME/2 CONT/;
     }
     # the if/elsif/elsif structure means we may only do
     # one of these things on each line. we may also do 
     # none if $found_sour is false ie == 0
}     

# this just prints out our munged data to a new file called
# $file.new where $file is the name of the source file
# as a result the output will be in a file of the same name
# and in the same dir as the source but with a .new suffix
open (FILE, ">$file.new") or die "Unable to create $file.new: $!";
print FILE @data;
close FILE;

# let the user know we are done
print "Processed $file OK!";

##

##

0 HEAD 
1 SOUR REUNION
2 VERS V4.0
2 CORP Leister Productions
1 DEST ANSTFILE
1 DATE 8 JUL 2001
1 SUBM @S1@
1 FILE Sample
1 GEDC 
2 VERS 5.01
1 CHAR IBM DOS
0 @I1@ INDI
1 NAME Joe /SLAVEN/
1 SEX M
1 BIRT 
2 DATE 1945
2 SOUR @S4@
2 PLAC Burwood, NSW
1 OCCU Engineer
1 EDUC Christian Bros
1 RESI Sydney, NSW; Townsville, QLD
1 NOTE Joe works as a planning manager in Queensland's largest electricity utility . He has worked there for 18 years.
1 HEAL Joe was recently hospitlised for atrium fibrillation in 2001
1 HIST Joe is named after both grandfathers
1 FAMS @F1@
1 FAMC @F2@
0 @I2@ INDI
1 NAME Janet /McDONALD/
1 SEX F
1 BIRT 
2 DATE 1948
2 PLAC Townsville
1 OCCU Librarian
1 EDUC St Pats College, The Strand, Townsville
1 RESI Tpownsville
1 FAMS @F1@
0 @I3@ INDI
1 NAME Arthur /SLAVEN/
1 SEX M
1 BIRT 
2 DATE 1916
2 PLAC Koorawatha, NSW
1 DEAT 
2 DATE 1956
2 PLAC Concord Hospital, Sydney 
2 SOUR @S1@
1 BURI 
2 PLAC Rookwood Cemetery, NSW
1 OCCU Farmer, Bus driver
1 EDUC Crowther School
1 RESI Crowther, Sydney
1 NOTE On the death of his father, he took up an offer by his father's employer (Mr. Bragg, Crowther) for work on  his farm
1 HEAL Arthur suffered from Hydatids cysts caught while in a WW2 training camp in WA. He had many major surgical operations before his death
1 FAMS @F2@
0 @I4@ INDI
1 NAME Michael /SLAVEN/
1 SEX M
1 BIRT 
2 DATE 1972
2 PLAC Townsville, QLD
2 SOUR @S2@
1 OCCU Plumber
1 EDUC Ignatius Park College, Townsville
1 RESI Townsville
1 FAMC @F1@
0 @S1@ SUBM
1 NAME Joe SLAVEN
1 ADDR Townsville
2 CONT QLD
2 CONT Australiae
0 @S1@ SOUR
1 NAME NSW BDM Register
1 NAME Year 1956
1 NAME Name: Arthur SLAVEN
1 NAME Parents: Joseph SLAVEN & Mary FOGARTY
1 NAME Buried: Rookwood  Cemetery
0 @S2@ SOUR
1 NAME NSW Birth Register
1 NAME Name: Michael SLAVEN
1 NAME Parents: Joseph and Janet
1 NAME Date 1972
1 NAME Baptised: 1972
1 NAME Location: Townsville General Hospital
1 NAME Transcript held by Joe Slaven
0 @S3@ SOUR
1 NAME QLD Marriage Register
1 NAME Year: 1970
1 NAME Location: St Joseph's , Townsville
0 @S4@ SOUR
1 NAME From information suppleid by Joe Slaven, June 2001
0 @F1@ FAM
1 HUSB @I1@
1 WIFE @I2@
1 CHIL @I4@
1 MARR 
2 DATE 1970
2 PLAC Townsville, QLD 
2 SOUR @S3@
0 @F2@ FAM
1 HUSB @I3@
1 CHIL @I1@
0 TRLR 


##

##

0 HEAD 
1 SOUR REUNION
2 VERS V4.0
2 CORP Leister Productions
1 DEST ANSTFILE
1 DATE 8 JUL 2001
1 SUBM @S1@
1 FILE Sample
1 GEDC 
2 VERS 5.01
1 CHAR IBM DOS
0 @I1@ INDI
1 NAME Joe /SLAVEN/
1 SEX M
1 BIRT 
2 DATE 1945
2 SOUR @S4@
2 PLAC Burwood, NSW
1 OCCU Engineer
1 NOTE Educated: Christian Bros
1 NOTE Resided: Sydney, NSW; Townsville, QLD
1 NOTE Joe works as a planning manager in Queensland's largest electricity utility . He has worked there for 18 years.
1 NOTE Medical: Joe was recently hospitlised for atrium fibrillation in 2001
1 NOTE Biographical: Joe is named after both grandfathers
1 FAMS @F1@
1 FAMC @F2@
0 @I2@ INDI
1 NAME Janet /McDONALD/
1 SEX F
1 BIRT 
2 DATE 1948
2 PLAC Townsville
1 OCCU Librarian
1 NOTE Educated: St Pats College, The Strand, Townsville
1 NOTE Resided: Tpownsville
1 FAMS @F1@
0 @I3@ INDI
1 NAME Arthur /SLAVEN/
1 SEX M
1 BIRT 
2 DATE 1916
2 PLAC Koorawatha, NSW
1 DEAT 
2 DATE 1956
2 PLAC Concord Hospital, Sydney 
2 SOUR @S1@
1 BURI 
2 PLAC Rookwood Cemetery, NSW
1 OCCU Farmer, Bus driver
1 NOTE Educated: Crowther School
1 NOTE Resided: Crowther, Sydney
1 NOTE On the death of his father, he took up an offer by his father's employer (Mr. Bragg, Crowther) for work on  his farm
1 NOTE Medical: Arthur suffered from Hydatids cysts caught while in a WW2 training camp in WA. He had many major surgical operations before his death
1 FAMS @F2@
0 @I4@ INDI
1 NAME Michael /SLAVEN/
1 SEX M
1 BIRT 
2 DATE 1972
2 PLAC Townsville, QLD
2 SOUR @S2@
1 OCCU Plumber
1 NOTE Educated: Ignatius Park College, Townsville
1 NOTE Resided: Townsville
1 FAMC @F1@
0 @S1@ SUBM
1 NAME Joe SLAVEN
1 ADDR Townsville
2 CONT QLD
2 CONT Australiae
0 @S1@ SOUR
1 TITL NSW BDM Register
2 CONT Year 1956
2 CONT Name: Arthur SLAVEN
2 CONT Parents: Joseph SLAVEN & Mary FOGARTY
2 CONT Buried: Rookwood  Cemetery
0 @S2@ SOUR
1 TITL NSW Birth Register
2 CONT Name: Michael SLAVEN
2 CONT Parents: Joseph and Janet
2 CONT Date 1972
2 CONT Baptised: 1972
2 CONT Location: Townsville General Hospital
2 CONT Transcript held by Joe Slaven
0 @S3@ SOUR
1 TITL QLD Marriage Register
2 CONT Year: 1970
2 CONT Location: St Joseph's , Townsville
0 @S4@ SOUR
1 TITL From information suppleid by Joe Slaven, June 2001
0 @F1@ FAM
1 HUSB @I1@
1 WIFE @I2@
1 CHIL @I4@
1 MARR 
2 DATE 1970
2 PLAC Townsville, QLD 
2 SOUR @S3@
0 @F2@ FAM
1 HUSB @I3@
1 CHIL @I1@
0 TRLR