# here it is with comments (not updated from original draft)
# there are two inconsistencies in what you ask for both
# relating to the substitution of 1 NAME for 1 TEXT or is
# it 2 TEXT or is it not wanted at all! In the spec you 
# specify after 0 @nnn@ SOUR that the first 1 NAME goes to
# 1 TITL - this is consistent with sample. For the next
# 1 NAME you suggest both 1 TEXT and 2 TEXT in different 
# examples. Unfortunately there is no n TEXT in the sample
# output. you currently get 1 TEXT with instruction of how
# to change this to 2 TEXT of 2 CONT as desired.

#!/usr/bin/perl -w

# note this code is only 30 or so lines long, all the 
# rest is comments for your benefit, like this.
# lines that start with a # char are comments
# like these lines, they are use to explain the code

# the -w on the top 'shebang' line makes perl issue 
# warnings about probable errors in the code. on unix
# the shebang line specifies the path to the perl
# interpretter. On mac and windows it does not but
# the -w is still looked for and acted on so we use it.

# this line forces us to declare variables with the 'my'
# declaration you see used below. amongst other things
# it forces good coding practices or Perl complains
use strict;

# we declare a scalar variable called $file to hold the 
# name of the file we will munge (process)
# the $ char lets Perl know we want a scalar variable to
# hold a single value
# we can set it to the name of a file (hard code the name)
# here but for flexibility we don't we just set it to
# a null string '' which along with the values 0 and 
# undef (undefined) equate to false in Perl. if a variable
# holds one of these three values it is false otherwise it
# is true
my $file = '';

# we now set file to any command line argument which
# we shift out of the @ARGV array that contains the 
# command line arguments (if if any) which are passed
# to the script when you call it like this:
# perl myscript.pl  myfile_to_process
# the unless $file bit means that we do this
# unless $file holds a true value (set above)
$file = shift @ARGV unless $file;

# here we use an unless again. unless $file holds a true
# value (from either of the options above) we execute the
# block of code between the curly braces { ... }
unless ($file) {
    # this line prints a prompt
    print "Please input a filename: ";
    # the <> is the input operator, used with nothing
    # between the < and the > it waits for input on 
    # STDIN - the standard input ie the keyboard
    # we stick this input into $file and then chomp
    # off the newline that occurs when you hit enter
    chomp ($file = <>);
}

# we now declare an associative array (also called a hash).
# the leading % char lets Perl know we want it to generate 
# a hash data structure. Hashes are very convenient as they
# allow us to access a data value via a key
# for example if we said print $change{'HIST'} perl looks
# in the %change hash for the 'HIST' key and returns the 
# value 'Biography:' - without the quotes
# you can probably see where this is going - we are going 
# to look for the keys and replace them with the values
my %change = (
    HIST => 'NOTE Biography:', 
    EDUC => 'NOTE Educated:',
    RESI => 'NOTE Resided:',
    OCCU => 'NOTE Occupation:');

# here we open our file onto a filehandle called FILE
# if this fails we die printing an error message
# the $! special variable contains the error
open (FILE, "<$file") or die "Unable to open $file: $!";
# here we read all the data in the file into an array
# data structure called @data. the leading @ tells Perl
# this is an array. you see we use the input operator 
# again but this time aimed at our file handle FILE
# You have now met all the major Perl
# data structures:
# $ -> scalars which hold one value
# % -> associative arrays that hold key value pairs
# @ -> arrays that hold lists of values (lots of scalars)
my @data = <FILE>;
# this is self explanatory - we close the file handle
# that we opened above with the close FILE;
close FILE;

# this next line is a little complex. first we declare
# a new scalar var called $change_this. we then assign
# the result of the join statement to this var.
# what the join does it get all the keys from our
# % change hash and join them together with the '|' char
# thus the net result is that:
# $change_this = 'HIST|EDUC|RESI|OCCU';
my $change_this = join "|", keys %change;
# this bit changes $change_this into a compiled regular 
# expression that will match any one of HIST...
# the brackets () also call parentheses/parenths are
# inportant as when we match on of these four keys the
# match will be stored in the magical perl variable $1
$change_this = qr/($change_this)/;

# because 'SOUR' occurs in several contexts we define
# a scalar variable $found_sour and set it to false.
# we will use this as a flag for when we find a 0 @nn@ SOUR
my $found_sour = 0;

# the for (@data) iterates (loops)  over the list of values
# in the @data array. as a result each line of the source 
# file will be set to the magical $_ variable in turn.
# $_ is magical in a number of ways. first if we change it
# within this loop we will change the value stored in @data
# second it is the default argument for regexes so if we
# write m/^0/ what we are saying is match a '0' char at 
# the begining of the string in $_ (the ^ char specifies 
# the at the begining bit. similarly the s/this/that/ 
# rexex will substitute the string 'this' for the string
# 'that' in $_
for (@data) {
     # this line does the search and replace substitutuion 
     # for HIST.... as we capture the bit we matched in $1
     # and this will be a key to our %change hash the 
     # replacement will be the value of that key-value pair
     # the /g specifies do all cases in $_ without it only
     # the first match will get substituted - we don't 
     # actually need this but I put it in anyway!
     s/$change_this/$change{$1}/g;
     # by observation a leading '0' specifies the begining
     # of a section in this file format so if we m/^0/ -
     # match a '0' at the begining of our line we reset 
     # our $found_sour flag
     $found_sour = 0 if m/^0/;
     # here we set the found sour flag to true (1) if 
     # we find a line that starts in 0 and ends in SOUR
     # the ^ says at the begining, the 0 is a literal '0'
     # the . matches anything and the + makes it match 1 or
     # more charachters of anything, the SOUR is a literal
     # 'SOUR', the \s* allows 0 or more trailing spaces,
     # and the $ pecifies the end of the string. phew
     $found_sour = 1 if m/^0.+SOUR\s*$/;
     # after the first SOUR we do this as $fouund_sour == 1
     if ($found_sour == 1) {
         # we increment our flag count to 2 if we 
         # succeed in substituting the literals as shown
         $found_sour++ if s/1 NAME/1 TITL/;
     }
     # same but this time second instance, this is in your
     # specification but not your sample output. if it is
     # wrong comment these three lines out by adding a #
     # character at the begining of the line like this:
     # elsif ($found_sour == 2) {
     #     $found_sour++ if s/1 NAME/1 TEXT/;
     # }
     # the code above is a demo of how to kill this code
     # by commenting it out. you can change the 1 TEXT to
     # 2 TEXT depending on which you want - if either.
     elsif ($found_sour == 2) {
         $found_sour++ if s/1 NAME/1 TEXT/;
     }
     # this substitution will occur for further insances 
     # of '1 NAME' as $found_sour will be true (it will be 3)
     elsif ($found_sour) {
         s/1 NAME/2 CONT/;
     }
     # the if/elsif/elsif structure means we may only do
     # one of these things on each line. we may also do 
     # none if $found_sour is false ie == 0
}     

# this just prints out our munged data to a new file called
# $file.new where $file is the name of the source file
# as a result the output will be in a file of the same name
# and in the same dir as the source but with a .new suffix
open (FILE, ">$file.new") or die "Unable to create $file.new: $!";
print FILE @data;
close FILE;

# let the user know we are done
print "Processed $file OK!";