The question sound familiar to me...
# warning windows doublequotes perl -lne "BEGIN{$/=''} @arr = split /\s+/,$_;$key=join qq(\n),@arr[0 +..3]; push @{$hash{$key}},$arr[4]; $arr[4]=~/(\d+):/? $count{$key}+= +$1:'';END{print map {qq($_ $count{$_} @{$hash{$_}} \n\n)} keys %hash + }" gen1.txt gen2.txt @gi AGAT + AAAA 2 1:1.txt 1:2.txt @gi CATT + AAAAA 3 3:1.txt @gi TACA + AAAA 2 2:2.txt perl -MO=Deparse -lne "BEGIN{$/=''} @arr = split /\s+/,$_;$key=join q +q(\n),@arr[0..3]; push @{$hash{$key}},$arr[4]; $arr[4]=~/(\d+):/? $c +ount{$key}+=$1:'';END{print map {qq($_ $count{$_} @{$hash{$_}} \n\n) +} keys %hash }" gen1.txt gen2.txt BEGIN { $/ = ""; $\ = "\n"; } LINE: while (defined($_ = <ARGV>)) { chomp $_; sub BEGIN { $/ = ''; } @arr = split(/\s+/, $_, 0); $key = join("\n", @arr[0..3]); push @{$hash{$key};}, $arr[4]; $arr[4] =~ /(\d+):/ ? $count{$key} += $1 : '???'; sub END { print map({"$_ $count{$_} @{$hash{$_};} \n\n";} keys %hash); } ; } -e syntax OK

UPDATE still silly but no need of a separate hash to hold the count and no need of an array to accumulate the strings.

+ #fix missed { in {c} perl -lne "BEGIN{$/=''}@ar=split /\s+/,$_;$k=join qq(\n),@ar[0..3];$h +{$k}{s}.=' '. $ar[4];$h{$k}{c}+=$1 if $ar[4]=~/(\d+)/;END{print map{q +q($_ $h{$_}{c} $h{$_}{s}\n\n)} keys %h}" gen1.txt gen2.txt @gi AGAT + AAAA 2 1:1.txt 1:2.txt @gi CATT + AAAAA 3 3:1.txt @gi TACA + AAAA 2 2:2.txt # infact the dumped datastructure is easily intellegible: # in the oneliner h is for hash # s is for string # c for count (ah! never use single char variable names!) perl -MData::Dump -lne "BEGIN{$/=''}@ar=split /\s+/,$_;$k=join qq(\n), +@ar[0..3];$h{$k}{s}.=' '.$ar[4];$h{$k}{c}+=$1 if $ar[4]=~/(\d+)/;END{ +dd %h}" gen1.txt gen2.txt ( "\@gi\nAGAT\n+\nAAAA", { c => 2, s => " 1:1.txt 1:2.txt" }, "\@gi\nCATT\n+\nAAAAA", { c => 3, s => " 3:1.txt" }, "\@gi\nTACA\n+\nAAAA", { c => 2, s => " 2:2.txt" }, ) #oh well we dont need @ar at all, perl -a at the rescue: perl -lane "BEGIN{$/=''}$k=join qq(\n),@F[0..3];$h{$k}{s}.=' '.$F[4]; +$h{$k}{c}+=$1 if $F[4]=~/(\d+):/;END{print map{qq($_ $h{$_}{c} $h{$_} +{s}\n\n)} keys %h}" gen1.txt gen2.txt # ok i've nothing better to do this night.. perl -lane "BEGIN{$/=''}map{$h{$_}{s}.=' '.$F[4];$h{$_}{c}+=$1 if $F[ +4]=~/(\d+):/}join qq(\n),@F[0..3]; END{print map{qq($_ $h{$_}{c} $h{$ +_}{s}\n\n)} keys %h}" gen1.txt gen2.txt

L*

There are no rules, there are no thumbs..
Reinvent the wheel, then learn The Wheel; may be one day you reinvent one of THE WHEELS.

In reply to Re^3: preserving hash value outside the loop -- silly oneliner by Discipulus
in thread preserving hash value outside the loop by Anonymous Monk

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post, it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Titles consisting of a single word are discouraged, and in most cases are disallowed outright.
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.