In the event you have alot of data and don't want to suck the entire file in at once you can loop over each paragraph
Shown with embeded data here to simplify
#!/usr/bin/perl use warnings; use strict; use 5.010; sub ProcessChunk( $ ); $/ = "\n\n"; while (<DATA>) { chomp; ProcessChunk( $_ ); } sub ProcessChunk( $ ){ my @data = split /\n/, shift; my $counter = 0; my $total_aligned_reads = 0; my $mtDNA = 0; foreach my $line ( @data ) { next if $line =~ /^Data_Set/; next if $line =~ /^NoCoordinateCount/; my ($chr, $lenLabel, $lengNumbers, $AlignedLabel, $alignedNumb +ers) = split /\s/, $line; $total_aligned_reads += $alignedNumbers; $mtDNA = $alignedNumbers if $chr eq "chrM"; } print "total aligned reads = $total_aligned_reads\n"; print "mtDNA = $mtDNA\n"; print "mtDNA percentage = "; if( $total_aligned_reads && $mtDNA ) { print $mtDNA/$total_aligned_reads*100; } else { print 'undefined'; } print "\n"; return; } __DATA__ Data_Set_116: BAM Index Statistics_on_data 115.html chr10 length= 135534747 Aligned= 435 Unaligned= 0 chr11 length= 135006516 Aligned= 553 Unaligned= 0 chr12 length= 133851895 Aligned= 482 Unaligned= 0 chr13 length= 115169878 Aligned= 367 Unaligned= 0 chr14 length= 107349540 Aligned= 341 Unaligned= 0 chr15 length= 102531392 Aligned= 243 Unaligned= 0 chr16 length= 90354753 Aligned= 258 Unaligned= 0 chr17 length= 81195210 Aligned= 210 Unaligned= 0 chr18 length= 78077248 Aligned= 326 Unaligned= 0 chr19 length= 59128983 Aligned= 115 Unaligned= 0 chr1 length= 249250621 Aligned= 1012 Unaligned= 0 chr20 length= 63025520 Aligned= 194 Unaligned= 0 chr21 length= 48129895 Aligned= 148 Unaligned= 0 chr22 length= 51304566 Aligned= 100 Unaligned= 0 chr2 length= 243199373 Aligned= 897 Unaligned= 0 chr3 length= 198022430 Aligned= 763 Unaligned= 0 chr4 length= 191154276 Aligned= 841 Unaligned= 0 chr5 length= 180915260 Aligned= 755 Unaligned= 0 chr6 length= 171115067 Aligned= 730 Unaligned= 0 chr7 length= 159138663 Aligned= 646 Unaligned= 0 chr8 length= 146364022 Aligned= 642 Unaligned= 0 chr9 length= 141213431 Aligned= 466 Unaligned= 0 chrM length= 16571 Aligned= 2650 Unaligned= 0 chrX length= 155270560 Aligned= 1068 Unaligned= 0 chrY length= 59373566 Aligned= 11 Unaligned= 0 NoCoordinateCount= 0 Data_Set_108: BAM Index Statistics_on_data 107.html chr10 length= 135534747 Aligned= 45 Unaligned= 0 chr11 length= 135006516 Aligned= 49 Unaligned= 0 chr12 length= 133851895 Aligned= 31 Unaligned= 0 chr13 length= 115169878 Aligned= 47 Unaligned= 0 chr14 length= 107349540 Aligned= 24 Unaligned= 0 chr15 length= 102531392 Aligned= 26 Unaligned= 0 chr16 length= 90354753 Aligned= 22 Unaligned= 0 chr17 length= 81195210 Aligned= 23 Unaligned= 0 chr18 length= 78077248 Aligned= 20 Unaligned= 0 chr19 length= 59128983 Aligned= 9 Unaligned= 0 chr1 length= 249250621 Aligned= 89 Unaligned= 0 chr20 length= 63025520 Aligned= 19 Unaligned= 0 chr21 length= 48129895 Aligned= 5 Unaligned= 0 chr22 length= 51304566 Aligned= 13 Unaligned= 0 chr2 length= 243199373 Aligned= 81 Unaligned= 0 chr3 length= 198022430 Aligned= 53 Unaligned= 0 chr4 length= 191154276 Aligned= 55 Unaligned= 0 chr5 length= 180915260 Aligned= 56 Unaligned= 0 chr6 length= 171115067 Aligned= 55 Unaligned= 0 chr7 length= 159138663 Aligned= 44 Unaligned= 0 chr8 length= 146364022 Aligned= 52 Unaligned= 0 chr9 length= 141213431 Aligned= 32 Unaligned= 0 chrM length= 16571 Aligned= 1 Unaligned= 0 chrX length= 155270560 Aligned= 52 Unaligned= 0 chrY length= 59373566 Aligned= 3 Unaligned= 0 NoCoordinateCount= 0 Data_Set_100: BAM Index Statistics_on_data 99.html chr10 length= 135534747 Aligned= 25340 Unaligned= 0 chr11 length= 135006516 Aligned= 24577 Unaligned= 0 chr12 length= 133851895 Aligned= 24335 Unaligned= 0 chr13 length= 115169878 Aligned= 17653 Unaligned= 0 chr14 length= 107349540 Aligned= 16826 Unaligned= 0 chr15 length= 102531392 Aligned= 15506 Unaligned= 0 chr16 length= 90354753 Aligned= 17098 Unaligned= 0 chr17 length= 81195210 Aligned= 14604 Unaligned= 0 chr18 length= 78077248 Aligned= 14139 Unaligned= 0 chr19 length= 59128983 Aligned= 10155 Unaligned= 0 chr1 length= 249250621 Aligned= 43427 Unaligned= 0 chr20 length= 63025520 Aligned= 11568 Unaligned= 0 chr21 length= 48129895 Aligned= 6897 Unaligned= 0 chr22 length= 51304566 Aligned= 6766 Unaligned= 0 chr2 length= 243199373 Aligned= 45536 Unaligned= 0 chr3 length= 198022430 Aligned= 36213 Unaligned= 0 chr4 length= 191154276 Aligned= 34693 Unaligned= 0 chr5 length= 180915260 Aligned= 33941 Unaligned= 0 chr6 length= 171115067 Aligned= 31529 Unaligned= 0 chr7 length= 159138663 Aligned= 29473 Unaligned= 0 chr8 length= 146364022 Aligned= 27419 Unaligned= 0 chr9 length= 141213431 Aligned= 22254 Unaligned= 0 chrM length= 16571 Aligned= 169 Unaligned= 0 chrX length= 155270560 Aligned= 28121 Unaligned= 0 chrY length= 59373566 Aligned= 534 Unaligned= 0 NoCoordinateCount= 0
In reply to Re: How to loop over multiple datasets (blocks of text)?
by micurley
in thread How to loop over multiple datasets (blocks of text)?
by rnaeye
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |