#fix missed { in {c} perl -lne "BEGIN{$/=''}@ar=split /\s+/,$_;$k=join qq(\n),@ar[0..3];$h{$k}{s}.=' '. $ar[4];$h{$k}{c}+=$1 if $ar[4]=~/(\d+)/;END{print map{qq($_ $h{$_}{c} $h{$_}{s}\n\n)} keys %h}" gen1.txt gen2.txt @gi AGAT + AAAA 2 1:1.txt 1:2.txt @gi CATT + AAAAA 3 3:1.txt @gi TACA + AAAA 2 2:2.txt # infact the dumped datastructure is easily intellegible: # in the oneliner h is for hash # s is for string # c for count (ah! never use single char variable names!) perl -MData::Dump -lne "BEGIN{$/=''}@ar=split /\s+/,$_;$k=join qq(\n),@ar[0..3];$h{$k}{s}.=' '.$ar[4];$h{$k}{c}+=$1 if $ar[4]=~/(\d+)/;END{dd %h}" gen1.txt gen2.txt ( "\@gi\nAGAT\n+\nAAAA", { c => 2, s => " 1:1.txt 1:2.txt" }, "\@gi\nCATT\n+\nAAAAA", { c => 3, s => " 3:1.txt" }, "\@gi\nTACA\n+\nAAAA", { c => 2, s => " 2:2.txt" }, ) #oh well we dont need @ar at all, perl -a at the rescue: perl -lane "BEGIN{$/=''}$k=join qq(\n),@F[0..3];$h{$k}{s}.=' '.$F[4];$h{$k}{c}+=$1 if $F[4]=~/(\d+):/;END{print map{qq($_ $h{$_}{c} $h{$_}{s}\n\n)} keys %h}" gen1.txt gen2.txt # ok i've nothing better to do this night.. perl -lane "BEGIN{$/=''}map{$h{$_}{s}.=' '.$F[4];$h{$_}{c}+=$1 if $F[4]=~/(\d+):/}join qq(\n),@F[0..3]; END{print map{qq($_ $h{$_}{c} $h{$_}{s}\n\n)} keys %h}" gen1.txt gen2.txt