USE COBOLIO;
$cobempl = COBOLIO->new("copylibs/PEOPLE.txt", "PEOPLE_FILE", 1);
open(INPUTFILE, "EMPL.PEOPLE.LIST.DISK");
open(OUTPUTFILE, ">BadEmployees.txt");
printf OUTPUTFILE, $cobio->GetCSVHeader("PEOPLE_FILE");
while(<INPUTFILE>) {
    $cobempl->ReadRecInto($_, "PEOPLE_FILE");
    if($cobempl->GetVal("SICK_DAYS_USED") > 5) {
        print "employee ".$cobempl->GetVal("EMPLOYEE_NUMBER")." has used up all sick leave\n";
    printf OUTPUTFILE, $cobio->GetCSVRecord("PEOPLE_FILE");
    }
}
..
..

##</code><code>##

# COBOLIO.pm
#
# Copyright (c) 2002 Harry Holt <hholt@comcast.net>.  All rights reserved.
# This program is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#
# Reading and interpretation of COBOL copylibs into perl data structures
#
# All variable names are the same as the cobol names, but with the '-'
# changed to a '_' to avoid operator/keyword issues.
#
# SIGNS: Signs may be added to a COBOL pic clause and require special
#	processing (all OVER the place!!), so each name is given a SIGN attribute.  
#	The possible values are:
#		R - No PIC clause for this var (a record-level variable)
#		X - No SIGN specified, non-numeric
#		9 - No SIGN specified, but variable is numeric
#		+ - SIGN is specified.
#	When the sign is specified, reading the data will require bit-shifting.
#	The LAST digit of the number is the sign and the last number.  If we
#	were still working in EBCDIC, would could do a straight bit-shift and
#	get the sign and the number, but because of the ASCII translation, things
#	don't work out like they should:
#		A-I = + 1-9
#		{	= + 0
#		J-R = - 1-9
#	I'm not sure what to look for if the value was a 0 with a negative sign.
#	It SHOULD be Hex D0, but since this has no representation in EBCDIC, I
#	don't know how it's represented.  +0 should be Hex C0, but it is 
#	represented as { anyway.
#	To make things simple for the Perl programmer, we will handle all the translations,
#	and often add an extra byte to the numeric variable to allow for the "-" sign.
#	Then we strip in off to update the variable value (THIS NEEDS REFACTORING).
#
package COBOLIO;
#
#
#
require Exporter;
require DynaLoader;
#
$COBOLIOPackage = "COBOLIO";
#
@ISA = qw( Exporter DynaLoader );
#
#
#
sub new {
	($class, $copyLibName, $fdRec, my $isFD, $rec01) = @_;
	
	$recList = {};
	$FD{$fdRec} = $recList;
	CreateCobolRec( $copyLibName, $fdRec, $isFD, $rec01);

	my $self = { $class, $FD };
	bless $self;


return $self;
}

sub PrintLayouts {
	($self, my $FDName) = @_;

	# Show the layouts of the existing copylib members	
	
	for $filedesc (keys %FD) {
		print "**********************  $filedesc  **********************\n";
		printf "%-32s  %7s  %7s  %7s  %6s  %7s \n", "Name", "Level", "Start", "Len", "Sign", "Parts";
		printf "%-32s  %7s  %7s  %7s  %6s  %7s \n", "-" x 32, "-----", "-----", "---", "----", "-----";
		for $reclist (sort BySRT keys %{ $FD{$filedesc} }) {
			if($reclist eq "VAL") {
				print "VALUE: \n$FD{$filedesc}->{VAL}\n";
			} else {
				printf "%-32s  %7s  %7s  %7s  %6s  %7s \n", 
						$reclist, 
						$FD{$filedesc}{$reclist}->{LEVEL},
						$FD{$filedesc}{$reclist}->{STARTPOS},
						$FD{$filedesc}{$reclist}->{LEN},
						$FD{$filedesc}{$reclist}->{SIGN},
						$FD{$filedesc}{$reclist}->{PARTS} ;
			}
		}
		print "\n\n";
	}
}


sub GetRec {
	my $self = shift;
	my $fdRec = shift;
	$recList = $FD{$fdRec};
	return $recList;
}

#sub DESTROY {
#	my ($self) = shift;
#	if(!undef($self->{cbNames})) {
#		undef $self->{cbNames};
#	}
#	return 1;
#}

sub ReadRecInto {
	# Start by setting up the record structure
	($self, $inputLine, $fdRec) = @_;

	$FD{$fdRec}->{VAL} = $inputLine;
	return 1;
} #### End sub ReadRecInto


sub GetVal {
	($self, my $dataItemName, my $fd, my $dataRecName) = @_;

	# If the item has an "Occurs" clause, we may need a subscript
	# of the value, so we need to parse the name to get the name and subscript
	($dataItemName, $subScr, $post) = split /[(,)]/, $dataItemName;
	if($subScr < 1) { $subScr = 1; }

	# If the FD is not passed, we will look it up
	if(defined($fd)) {
		$fdRec = $fd;
	} else {
		$fdRec = FindFDForRecord($dataItemName);
	}

	if(length($dataRecName) < 1) {
		$dataRecName = $FD{$fdRec}{$dataItemName}->{REC01};
	}
	
	# Get the part of the major record that contains the record being asked for.
	if($FD{$fdRec}{$dataItemName}->{PARTS} > 0) {
		$actualLen = $FD{$fdRec}{$dataItemName}->{LEN} / $FD{$fdRec}{$dataItemName}->{PARTS};
	} else {
		$actualLen = $FD{$fdRec}{$dataItemName}->{LEN}
	}
	$actualPos = $FD{$fdRec}{$dataItemName}->{STARTPOS} + ($actualLen * ($subScr - 1));
	$retVal = substr( $FD{$fdRec}->{VAL}, $actualPos, $actualLen);
	
	$retVal =~ s/\x00/0/g;
	# Deal with an signed numeric data
	if($FD{$fdRec}{$dataItemName}->{SIGN} eq "+") {
		if(substr($retVal,$FD{$fdRec}{$dataItemName}->{LEN} - 1,1) eq "{") {
			$retVal = substr($retVal,0,$FD{$fdRec}{$dataItemName}->{LEN} -1)."0";
		} else {
			if($retVal =~ m/[A-I]/) {
				$retVal =~ tr/[ABCDEFGHI]/[123456789]/;
			}
			if($retVal =~ m/[J-R]/) {
				$retVal =~ tr/[JKLMNOPQR]/[123456789]/;
				$retVal = '-'.$retVal;
			}
		}
	} 	# End of SIGN logic
			
	# If decimals are specified, we will need to add a "." in the right spot
	if($FD{$fdRec}{$dataItemName}->{DECIMALS} > 0) {
		$retVal = substr($retVal,0,length($retVal) - $FD{$fdRec}{$dataItemName}->{DECIMALS}).
				".".substr($retVal,length($retVal) - $FD{$fdRec}{$dataItemName}->{DECIMALS});
	}	# End of DECIMAL logic
	
	return $retVal;
}
	

sub GetCSVRecord {
	($self, my $fd, my $name01) = @_;
	
	$fdRec = $fd;
	$outRec = "";
	$flNeedSep = 0;
	$in01 = 0;
	if(undef($name01)) { $in01 = 1; }
	
	for $filedesc (keys %FD) {
		if($filedesc eq $fdRec) {
			for $reclist (sort BySRT keys %{ $FD{$filedesc} }) {
				if($reclist eq $name01) { $in01 = 1; }
				if($in01 == 1) {
					if($flNeedSep == 1) {
						$outRec .= ",";
						$flNeedSep = 0;
					}
					if($FD{$filedesc}{$reclist}->{SIGN} ne "R") {
						$outRec .= "\"".GetVal("", $reclist)."\"";
						$flNeedSep = 1;
					}
				}
			}
		}
	}
	
	return $outRec;
}

sub GetCSVHeader {
	($self, my $fd, my $name01) = @_;
	
	$fdRec = $fd;
	$outRec = "";
	$flNeedSep = 0;
	$in01 = 0;
	if(undef($name01)) { $in01 = 1; }
	
	for $filedesc (keys %FD) {
		if($filedesc eq $fdRec) {
			for $reclist (sort BySRT keys %{ $FD{$filedesc} }) {
				if($reclist eq $name01) { $in01 = 1; }
				if($in01 == 1) {
					if($flNeedSep == 1) {
						$outRec .= ", ";
						$flNeedSep = 0;
					}
					if($FD{$filedesc}{$reclist}->{SIGN} ne "R") {
						$outRec .= $reclist;
						$flNeedSep = 1;
					}
				}
			}
		}
	}
	
	return $outRec;
}
	
sub SetVal {
	($self,	my $dataItemName, my $newValue, my $fd, my $dataRecName) = @_;

	if(defined($fd)) {
		$fdRec = $fd;
	} else {
		$fdRec = FindFDForRecord($dataItemName);
	}
	if(length($dataRecName) < 1) {
		$dataRecName = $FD{$fdRec}{$dataItemName}->{REC01};
	}

	if((substr($newValue,0,5)) eq "SPACE") {
		$newValue = " " x $FD{$fdRec}{$dataItemName}->{LEN};
	}
	
	# Fix any numerics.  Allow an extra space in case of a sign
	if($FD{$fdRec}{$dataItemName}->{SIGN} =~ m/\+|9/) {
		$dataLen = $FD{$fdRec}{$dataItemName}->{LEN} + 1;
		$strMask = 'sprintf("%0'.$dataLen.'d", $newValue);';
		$newValue = eval($strMask);
	}
	
	# We need to deal with Signed numerics somehow.  This logic seems easily breakable, though
	if($FD{$fdRec}{$dataItemName}->{SIGN} eq "+") {
		if(substr($newValue,0,1) eq "+") { $newValue = substr($newValue,1); }
		if(substr($newValue,0,1) eq "-") {
			for($newValue) {
				s/0\z/\{/g;
				s/1\z/J/g;
				s/2\z/K/g;
				s/3\z/L/g;
				s/4\z/M/g;
				s/5\z/N/g;
				s/6\z/O/g;
				s/7\z/P/g;
				s/8\z/Q/g;
				s/9\z/R/g;
			}

		} else {
			for($newValue) {
				s/0\z/\{/g;
				s/1\z/A/g;
				s/2\z/B/g;
				s/3\z/C/g;
				s/4\z/D/g;
				s/5\z/E/g;
				s/6\z/F/g;
				s/7\z/G/g;
				s/8\z/H/g;
				s/9\z/I/g;
			}
		}
	}	# Finished dealing with signed numerics ###################################
	
	# Get rid of extra place for sign
	if(substr($newValue,0,1) eq "-") { $newValue = substr($newValue,1); }

	$newValue =~ s/'.'//g;
	
	$packTempl = 'A'.$FD{$fdRec}{$dataItemName}->{LEN};
	$newValue = pack($packTempl,$newValue);

	# In case the VAL is not large enough for the data item, add enough spaces
	if(length($FD{$fdRec}->{VAL}) < $FD{$fdRec}{$dataItemName}->{STARTPOS}) {
		$FD{$fdRec}->{VAL} .= " " x $FD{$fdRec}{$dataItemName}->{STARTPOS};
	}

	# Set the actual value within the larger record
	$FD{$fdRec}->{VAL} = 
		substr($FD{$fdRec}->{VAL},0,$FD{$fdRec}{$dataItemName}->{STARTPOS}).
		$newValue.
		substr($FD{$fdRec}->{VAL},$FD{$fdRec}{$dataItemName}->{STARTPOS} +
							$FD{$fdRec}{$dataItemName}->{LEN});
	return 1;
}
			
sub CreateCobolRec {
	(my $copyLibName, my $fdRec, my $isFD, my $rec01) = @_;

	#################################################################################
	# Here we create the "Data Division" interpretations for the 
	# perl variables.  Each name found in the COBOL code becomes a 
	# data element name with the following attributes:  
	#	
	#	LEVEL - The COBOL "record level", 01, 03, 05, 88, etc.
	#	STARTPOS - The starting position of the data element within the overall record
	#	LEN - The character length of the data element
	#	SIGN - Whether a +/- sign is used for a numeric value
	#	REC01 - The top-level (01) data element name that this element is in.
	#
	# The File Descriptor (FD) of the COBOL source also contains a VAL attribute,
	# which holds the value of a single record when it is read in.  To 
	# find or set the value of any other record or data element, the 
	# appropriate section of the complete record is used.
	#
	# To accomodate different styles of using COBOL "copy" statements, the initial
	# 01 level record can either be included in the copy member file, or it can
	# be supplied on the command line.
	# NOTE: COBOL "01" record-level intries subordinate to an "FD" clause are 
	#		implicit "REDEFINES".
	##################################################################################
	### Initializations
	my $periodAt;
	my $nextPos = 0;
	my $currLevelLen = 0;
	my $currentLevel = 01;
	my $fillerCount = 0;
	my $recStarted = 0;
	my $charCount = 0;
	my $currLevel = 01;
	my $cnt = 0;
	my $name;
	my $picChars = "";
	my $rec01Name;
	my $recName;
	my $level;
	my @recNames;
	my @recLevel;
	my @recLen;
	my @cplLines = ();
	my @cpl = ();
	my @vals = ();
	my $initVal = "";
	my $cplLine = "";
	my $rl;
	my $occurring = 0;
	my @occurringLevel;

	if(defined($rec01)) {
		$rec01Name = $rec01;
		push @cplLines, "       01  ".$rec01Name.'.';
	}

	#### Read the COPYLIB file
	die "I can't open the file $copyLibName because $!" unless
		open(COPYLIB, $copyLibName);
	while(<COPYLIB>) {
		push @cplLines, $_;
	}
	close(COPYLIB);

	#### Concatenate all COPYLIB lines into 1 line-per-sentence 
	for($cnt = 0; $cnt <= $#cplLines; $cnt++) {
		while(substr($cplLines[$cnt],6,1) eq "*") { $cnt++; } # throw out comments
		last if($cnt > $#cplLines);						# exit for if end of file
		$cplLine = substr($cplLines[$cnt],6,66);			# get rid of line #'s & comments
		while($cplLine !~ /\./) {					# concatenation based on the period
			if($cplLine =~ "\"") {
				$cplLines[$cnt + 1] =~ s/\"//;		# remove redundant quotes on next line
			}
			if(substr($cplLines[$cnt + 1],6,1) ne "*") {
				$cplLine .= substr($cplLines[$cnt + 1],11,61);
			}
			$cnt += 1;
		}
		if($cplLine =~ m/\./g) {			# chop line after the period
			$periodAt = pos($cplLine);
			$cplLine = substr($cplLine,0,$periodAt);
		}
		push @cpl, $cplLine;
	}

	##########################################################################
	# Next starts the loop to interpret the COBOL data members and create the
	# hash of hashes to store the attributes of the data items.
	#
	##########################################################################
	$cplSub = 0;
	for (@cpl) {
		@stuff = ();
		s/-/_/g;		# Change all "-" (dashes) to "_" (underscores)
		s/\.//g;		# Eliminate all periods
		if($_ =~ "PIC") {	# Get the position, length of defined data member
			@stuff = split;
			$level = $stuff[0];
			$name = $stuff[1];
			if($name eq "FILLER") { 	# Make "FILLER" fields have unique names
				$name = "FILLER".$fillerCount;
				$fillerCount++;
			}

			if($stuff[2] =~ "OCCURS") { 	# Check for "OCCURS" clause
				$parts = $stuff[3];			# PARTS = 1 unless the "OCCURS"
				$picClause = $stuff[6];		# clause defines multiples.
			} else {
				$parts = 1;
				$picClause = $stuff[3];
			}
			if($stuff[2] =~ "REDEFINES")  {
				$nextPos = $recList->{$stuff[3]}->{STARTPOS};
				$picClause = $stuff[5];
			}
			
			if(substr($picClause,0,1) eq "S") { 	# Look for a signed numeric
				$sign = "+";
			} else {
				if(substr($picClause,0,1) eq "9") {
					$sign = "9";
				} else {
					$sign = "X";
				}
			}
			###########
			# Next, the PICTURE clause is parsed to determine the data type
			# and the size of the field.  
			($picChars, $charCount, $other) = split /[(,)]/ ,$picClause;
			if($charCount == "") { 
				$picChars =~ s/V//g;
				$picChars =~ s/S//g;
				$charCount = length($picChars);
			}
			$other =~ s/V//g;
			$charCount = $charCount + length($other);
			$charCount = $charCount * $parts;
			$decimals = length($other);
			$recList->{ $name } = { 
				LEVEL => $level,
				STARTPOS => $nextPos,
				LEN => $charCount,
				SIGN => $sign,
				REC01 => $rec01Name,
				PARTS => $parts,
				DECIMALS => $decimals,
				SRT => $cplSub,
			};
			if($stuff[4] =~ "VALUE") {
				@vals = split/VALUE/;
				$initVal = $vals[1];
				if($initVal =~ "SPACE") {
					$initVal = " " x $charCount;
				}
				if($initVal =~ "ZERO") {
					$initVal = "0" x $charCount;
				}
				$initVal =~ s/\s+//x;
				$initVal =~ s/\"//g;
				SetVal("", $name, $initVal, $fdRec);
			}
			$nextPos += $charCount;
			while($currLevel > $level) {
				$recName = pop @recNames;
#				$recList->{$recName}->{LEN} += (pop @recLen) * (pop @recParts);
				$currLevel = pop @recLevel;
				pop @recLen;
				pop @recParts;
				
			}
#			for($rl=0;$rl <= $#recNames;$rl++) {
#				if($recLevel[$rl] < $level) { $recLen[$rl] += ($charCount * $recParts[$rl -1]); }
#			}
		} else {		# Deal with record-level data variables
			@stuff = split;
			$level = $stuff[0];
			if($level == 01) { 
				$rec01Name = $stuff[1];
				if($isFD == 1) { $nextPos = 0; }
			}
			while($currLevel > $level) {
				$recName = pop @recNames;
				$recList->{$recName}->{LEN} += pop @recLen;
				$currLevel = pop @recLevel;
				pop @recParts;
			}
			if($stuff[2] =~ "REDEFINES")  {
				$nextPos = $recList->{$stuff[3]}->{STARTPOS};
			}
			if($stuff[2] =~ "OCCURS") { 
				$parts = $stuff[3];
			} else {
				$parts = 1;
			}
			$recList->{$stuff[1]} = {
				LEVEL => $level,
				STARTPOS => $nextPos,
				LEN => 0,
				SIGN => "R",
				REC01 => $rec01Name,
				PARTS => $parts,
				SRT => $cplSub,
			};
			push @recNames, $stuff[1];
			push @recLevel, $level;
			push @recLen, 0;
			push @recParts, $parts;
		}
		$currLevel = $level;
		NEXTREC:
		$cplSub++;
#print "FOR $name:\t";
#print "level = $recList->{$name}->{LEVEL}\t";
#print "startpos = $recList->{$name}->{STARTPOS}\t";
#print "fdRec = $fdRec\n";
	}

	while($recName = pop @recNames) {   # Get lengths for remaining levels.
#		$recList->{$recName}->{LEN} += pop @recLen;
		pop @recLevel;
		pop @recParts;
	}
	
	@rns = ();
	for $recName (sort BySRT keys %$recList) {
		if($recList->{$recName}->{SIGN} eq "R") {
			push @rns, $recName;
		}
	}
	for $recName (@rns) {
		$filePos = $recList->{$recName}->{STARTPOS};
		$rLevel = $recList->{$recName}->{LEVEL};
		for $dataName (sort BySRT keys %$recList) {
			if($recList->{$dataName}->{SRT} > $recList->{$recName}->{SRT}) {
				if($rLevel < $recList->{$dataName}->{LEVEL}) {
					if($recList->{$dataName}->{SIGN} ne "R") {
						if($recList->{$dataName}->{STARTPOS} >= $filePos) {
							$recList->{$recName}->{LEN} += $recList->{$dataName}->{LEN};
							$filePos = $recList->{$dataName}->{STARTPOS} + $recList->{$dataName}->{LEN};
						} 
					}
				} else { 	# Exit loop if new record at the same level
					last; 
				}
			}
		}
	}

}	# sub CreateCobolRec()


sub ByLevel { 
	$FD{$fdRec}{$a}->{LEVEL} <=> $FD{$fdRec}{$b}->{LEVEL}; 
}

sub BySRT { 
	$FD{$fdRec}{$a}->{SRT} <=> $FD{$fdRec}{$b}->{SRT}; 
}

sub ByPosition {
	$FD{$fdRec}{$a}->{STARTPOS} <=> $FD{$fdRec}{$b}->{STARTPOS}
		||
	$FD{$fdRec}{$a}->{LEVEL} <=> $FD{$fdRec}{$b}->{LEVEL};
}

sub FindFDForRecord {
	my $recordName = shift;
	my $returnValue = "";
	my $foundCount = 0;
	my $dataName;
	
	for $filedesc (keys %FD) {
		for $reclist (keys %{ $FD{$filedesc} }) {
			if($reclist eq $recordName) {
				$returnValue = $filedesc;
				$foundCount += 1;
##				print "Found $recordName in $filedesc\n";
##				return $returnValue;
			}
		}
	}
	if($foundCount > 1) {
		die "Ambiguous record name specified $recordName\n";
	}
	return $returnValue;
}

1;