iceotron has asked for the wisdom of the Perl Monks concerning the following question:
I have a script my predecessor wrote and I use twice a year. I'll paste it below. The problem is that certain records ID numbers are being changed in the perl output. Out of 169,000 records roughly 8000 are being changed. Because they are changed the ID numbers don't match for a match back. I tried smaller data chunks with the same results.
### "Sequence Number,$First_Name,$Last_Name,$Middle_Name,$Prefix,$Suff +ix,$ID_number,$Donor_Category,$Alumni_ind,$Friend_ind,$Fac_staff_ind, +$Highest_Gift,$Delivery Address,$Alternate 1 Address,$Alternate 2 Add +ress,$Alternate_3_Address,$City,$St,$ZIP+4,$Country,$Full Name,$SALUT +ATION,$PREF_CLASS_YEAR,$PREF_SCHOOL_CODE,$PREF_SCHOOL,$APEAL_CODE,$DE +G_SCHOOL_CODE1,$DEG_SCHOOL_CODE2,$DEG_SCHOOL_CODE3,$DEG_SCHOOL_CODE4, +$DEG_SCHOOL_CODE5,$DEG_SCHOOL_CODE6,$GIVE_SCHOOL_CODE1,$GIVE_SCHOOL_C +ODE2,$GIVE_SCHOOL_CODE3,$GIVE_SCHOOL_CODE4,$COMPANY_NAME1,$COMPANY_NA +ME2,$COMPANY_NAME3,$BUSINESS_STREET2,$BUSINESS_STREET3,$BUSINESS_STRE +ET4,$BUSINESS_CITY,$BUSINESS_ST,$BUSINESS_ZIP,$BUSINESS_COUNTRY,$PREF +_ADDR_TYPE_CODE,$RECORD_TYPE_CODE,$ASK1,$ASK2,$ASK3,$SP_ALUMNI_IND,$S +P_FRIEND_IND,$SP_FAC_STAFF_IN,$SP_HIGHEST_GIFT,$SPOUSE_ID_NUMBER,$SP_ +FIRST_NAME,$SP_MIDDLE_NAME,$SP_LAST_NAME,$SP_MAIL_NAME,$SP_CLASS_YEAR +,$SP_PREF_SCHOOL,$SP_PREF_SCHOOL_CODE,$SP_DEG_SCHOOL_CODE1,$SP_DEG_SC +HOOL_CODE2,$SP_DEG_SCHOOL_CODE3,$SP_DEG_SCHOOL_CODE4,$SP_DEG_SCHOOL_C +ODE5,$SP_DEG_SCHOOL_CODE6,$SP_GIVE_SCHOOL_CODE1,$SP_GIVE_SCHOOL_CODE2 +,$SP_GIVE_SCHOOL_CODE3,$SP_GIVE_SCHOOL_CODE4,$SP_COMPANY_NAME1,$SP_CO +MPANY_NAME2,$SP_BUS_STREET1,$SP_BUS_STREET2,$SP_BUS_STREET3,$SP_BUS_S +TREET4,$SP_BUS_CITY,$SP_BUS_ST,$SP_BUS_ZIP,$SP_BUS_COUNTRY,$SP_RECORD +_TYPE_CODE,$,$,$,$,$,$,$,$,$,$,$,$,$,$,$,$" ### "ID_number","DEG_SCHOOL_CODE1","DEG_SCHOOL_CODE2","DEG_SCHOOL_CODE +3","DEG_SCHOOL_CODE4","DEG_SCHOOL_CODE5","DEG_SCHOOL_CODE6","GIVE_SCH +OOL_CODE1","GIVE_SCHOOL_CODE2","GIVE_SCHOOL_CODE3","GIVE_SCHOOL_CODE4 +","SP_DEG_SCHOOL_CODE1","SP_DEG_SCHOOL_CODE2","SP_DEG_SCHOOL_CODE3"," +SP_DEG_SCHOOL_CODE4","SP_DEG_SCHOOL_CODE5","SP_DEG_SCHOOL_CODE6","SP_ +GIVE_SCHOOL_CODE1","SP_GIVE_SCHOOL_CODE2","SP_GIVE_SCHOOL_CODE3","SP_ +GIVE_SCHOOL_CODE4","Give_Unit_1","Give_unit_2","Give_unit_3","Give_un +it_4","Sp_give_unit_1","sp_give_unit_2","sp_give_unit_3","sp_give_uni +t_4" ### "DEG_SCHOOL_CODE1","DEG_SCHOOL_CODE2","DEG_SCHOOL_CODE3","DEG_SCHO +OL_CODE4","DEG_SCHOOL_CODE5","DEG_SCHOOL_CODE6","SP_DEG_SCHOOL_CODE1" +,"SP_DEG_SCHOOL_CODE2","SP_DEG_SCHOOL_CODE3","SP_DEG_SCHOOL_CODE4","S +P_DEG_SCHOOL_CODE5","SP_DEG_SCHOOL_CODE6","ID_number" open (OUTPUT,">321868_UB_CYE_ToBCC_Rnd2.txt"); open (INPUT,"321868_UB_CYE_FromBCC_Rnd2.txt"); $arraysizemax=0; print OUTPUT "ID,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16\n"; foreach $line (<INPUT>){ chomp $line; $line =~ s/\"//g; $line =~ s/0232-MUE/0232/g; $line =~ s/0232-LIS/0232/g; $line =~ s/0232-MUE/0232/g; $line =~ s/0301-0304/0301/g; $line =~ s/0301-CE/0301/g; $line =~ s/0030/9999/g; $line =~ s/0031/9999/g; $line =~ s/0884/9999/g; $line =~ s/0716/9999/g; $line =~ s/0377-ACCT/0377,0377-ACCT/g; $line =~ s/0508-BCP/0508/g; $line =~ s/0508-MCH/0508/g; $line =~ s/0508-PHC/0508/g; $line =~ s/0528/0456/g; $line =~ s/0494-NAN/0494/g; $line =~ s/0494-NRN/0494/g; $line =~ s/1084-AAS/1084D,1084/g; $line =~ s/1084-APY/1084D,1084/g; $line =~ s/1084-ART/1084D,1084/g; $line =~ s/1084-AS/1084D,1084/g; $line =~ s/1084-BIO/1084D,1084/g; $line =~ s/1084-CDS/1084D,1084/g; $line =~ s/1084-CHE/1084D,1084/g; $line =~ s/1084-CL/1084D,1084/g; $line =~ s/1084-COL/1084D,1084/g; $line =~ s/1084-COM/1084D,1084/g; $line =~ s/1084-DMS/1084D,1084/g; $line =~ s/1084-ECO/1084D,1084/g; $line =~ s/1084-ENG/1084D,1084/g; $line =~ s/1084-FR/1084D,1084/g; $line =~ s/1084-GEO/1084D,1084/g; $line =~ s/1084-GGS/1084D,1084/g; $line =~ s/1084-GLY/1084D,1084/g; $line =~ s/1084-HIS/1084D,1084/g; $line =~ s/1084-HMN/1084D,1084/g; $line =~ s/1084-ITA/1084D,1084/g; $line =~ s/1084-LIN/1084D,1084/g; $line =~ s/1084-MCH/1084D,1084/g; $line =~ s/1084-MD/1084D,1084/g; $line =~ s/1084-MTH/1084D,1084/g; $line =~ s/1084-MUE/1084D,1084/g; $line =~ s/1084-MUP/1084D,1084/g; $line =~ s/1084-MUS/1084D,1084/g; $line =~ s/1084-PHC/1084D,1084/g; $line =~ s/1084-PHI/1084D,1084/g; $line =~ s/1084-PHY/1084D,1084/g; $line =~ s/1084-PSC/1084D,1084/g; $line =~ s/1084-PSY/1084D,1084/g; $line =~ s/1084-SMA/1084D,1084/g; $line =~ s/1084-SOC/1084D,1084/g; $line =~ s/1084-SPA/1084D,1084/g; $line =~ s/1084-SSC/1084D,1084/g; $line =~ s/1084-TD/1084D,1084/g; + $line =~ s/1084-TH/1084D,1084/g; use Array::Unique; tie @schools, 'Array::Unique'; @schools = split(/,/,$line); $id=pop(@schools); @allow = ("0226","0232","0372","0377","0377-ACCT","0424","0448","0456 +","0494","0508","1084D","1084","9999","0719","0301"); %original = (); @intersect = (); map { $original{$_} = 1 } @schools; @intersect = grep { $original{$_} } @allow; @intersect = sort @intersect; local $" = ','; ##" $arraysize = @intersect; if ($arraysize eq 0) {print "$id\n";} if ($arraysize gt $arraysizemax) {$arraysizemax=$arraysize} print OUTPUT "$id,@intersect\n"; } print "$arraysizemax";
This is the data being fed in:
"DEG_SCHOOL_CODE1","DEG_SCHOOL_CODE2","DEG_SCHOOL_CODE3","DEG_SCHOOL_C +ODE4","DEG_SCHOOL_CODE5","DEG_SCHOOL_CODE6","SP_DEG_SCHOOL_CODE1","SP +_DEG_SCHOOL_CODE2","SP_DEG_SCHOOL_CODE3","SP_DEG_SCHOOL_CODE4","SP_DE +G_SCHOOL_CODE5","SP_DEG_SCHOOL_CODE6","","","","","","","","","",""," +","","","","","","","","","","","","","","","","ID_number" "1084-COM","1084-CHE","","","","0719","1084-PSY","","","","","","","", +"","","","","","","","","","","","","","","","","","","","","","","", +"","0000319970" "0377","","","","","","0377-ACCT","0317","","","","","","","","","","" +,"","","","","","","","","","","","","","","","","","","","","0000030 +428" "1084-PSY","0377","","","","0719","1084-SSC","1084-SOC","1084","0232", +"","","","","","","","","","","","","","","","","","","","","","","", +"","","","","","0000319443" "0232-LIS","","","","","","1084-PHI","0301","","","","","","","","","" +,"","","","","","","","","","","","","","","","","","","","","","0000 +315276" "1084-PSY","1084-HIS","1084-ECO","","","0719","1084-SSC","","1084-HIS" +,"","","","","","","","","","","","","","","","","","","","","","","" +,"","","","","","","0000316920" "1084-DMS","0232-LIS","","","","","1084-SSC","","","","","","","",""," +","","","","","","","","","","","","","","","","","","","","","",""," +0000302425"
And this is what is being returned:
ID,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16 ID_number, 0099999970,0719,1084,1084D 0009999428,0377,0377-ACCT 0099999443,0232,0377,0719,1084,1084D 0099995276,0232,0301,1084,1084D 0099996920,0719,1084,1084D 0099992425,0232,1084,1084D
It is changing the ID numbers to have a 9's preceding the beginning. Please help
|
|---|