Below, I show one way of doing that assuming that lines are sorted which is often not a bad assumption as the command line sort utilities can sort humongous files very efficiently. Code would have to be a bit more complex if more than 2 duplicate lines were there and needed to be combined on the first pass although running the program again would pick up the "3rd" one on the second pass. Note I did not "save" the number of descriptions as this is easily produced by Perl by evaluation of @var in a scalar context.
#!/usr/bin/perl -w use strict; my $prev_line=(); while (<DATA>) { if (!$prev_line){$prev_line = $_; next} my ($prev_num, $prev_desc_txt) = (split(/,/,$prev_line,3))[0,2]; my ($num, $desc_text) = (split(/,/,$_,3))[0,2]; if ($prev_num eq $num) #combine prev and current descriptions { my $new_desc = "$prev_desc_txt $desc_text"; my @new_desc = ($new_desc =~m/(\w+)/g); @new_desc = sort { #thanks to jwkrahn for sort my ( $aL, $aR ) = $a =~ /(\D+)(\d+)/; my ( $bL, $bR ) = $b =~ /(\D+)(\d+)/; $aL cmp $bL or $aR <=> $bR } @new_desc; print "$num,".@new_desc.",\"", join(',',@new_desc),"\"\n"; #note .@new_desc forces scalar context (num elements) $prev_line =(); } else #prev_line is a "singleton" { print $prev_line; $prev_line = $_; } } print $prev_line if ($prev_line); #maybe a "hanger on" =prints: 032-00751-0000,1,R383 032-00794-0000,6,"RP1,RP2,RP3,RP22,RP24,RP26" 032-00795-0000,8,"RP10,RP11,RP12,RP13,RP14,RP15,RP16,RP17" 032-00804-0000,7,"R7,R14,R21,R23,R41,R42,R49" 032-00807-0000,6,"RP8,RP9,RP18,RP19,RP200,RP201" 032-00808-0000,3,"RP21,RP23,RP25" 032-00820-0000,6,"R966,R970,R971,R1041,R1076,R3000" 032-00893-0000,1,R1164 =cut __DATA__ 032-00751-0000,1,R383 032-00794-0000,6,"RP1,RP2,RP3,RP22,RP24,RP26" 032-00795-0000,8,"RP10,RP11,RP12,RP13,RP14,RP15,RP16,RP17" 032-00804-0000,7,"R7,R14,R21,R23,R41,R42,R49" 032-00807-0000,2,"RP18,RP19" 032-00807-0000,4,"RP8,RP9,RP200,RP201" 032-00808-0000,3,"RP21,RP23,RP25" 032-00820-0000,5,"R966,R970,R971,R1041,R1076" 032-00820-0000,1,R3000 032-00893-0000,1,R1164
In reply to Re: Find duplicate fields and merging data in a text file
by Marshall
in thread Find duplicate fields and merging data in a text file
by donkost
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |