in reply to Re: Skript help needed - RegEx & Hashes
in thread Skript help needed - RegEx & Hashes
Here's how I might have written that script, with the following changes to your version:
Please have a look, and if you have any questions, please let us know.
#!/usr/bin/env perl use warnings; use strict; use Data::Dumper; $Data::Dumper::Useqq = 1; $Data::Dumper::Quotekeys = 0; $Data::Dumper::Sortkeys = 1; for my $folder ( grep {-d} glob('UNITAS_*') ) { print Data::Dumper->Dump([$folder], [qw/folder/]); opendir my $dh, $folder or die "$folder: $!"; while ( my $file = readdir($dh) ) { next if $file !~ /\.mapped_sequences$/; print Data::Dumper->Dump([$file], [qw/file/]); my $reads = 0; open my $fileone, '<', "$folder/$file" or die "$folder/$file: $!"; while ( my $tocount = <$fileone> ) { chomp $tocount; $tocount =~ s/>//g; next if $tocount =~ /[A-Za-z]/; if ( $tocount =~ /[0-9]/ ) { print Data::Dumper->Dump([$tocount], [qw/tocount/]); $reads += $tocount; } } close $fileone; print Data::Dumper->Dump([$reads], [qw/reads/]); my %hash; my $trftable = 'unitas.tRF-table.txt'; open my $trf, '<', "$folder/$trftable" or die "$folder/$trftable: $!"; <$trf> for 1 .. 4; while ( my $line = <$trf> ) { chomp $line; my @line = split /\t/, $line; #print Data::Dumper->Dump([\@line], [qw/*line/]); my $tRNAname; if ( $line[0] =~ s/tRNA-[^-]+-...// ) { $tRNAname = $& } else { ( $tRNAname = $line[0] ) =~ s/-ENS.+$// } print Data::Dumper->Dump([$tRNAname], [qw/tRNAname/]); my $h = ( $hash{$tRNAname} //= {} ); $h->{"5p-tR-halves"} += $line[ 1] / $reads * 1000000; $h->{"5p-tRFs"} += $line[ 3] / $reads * 1000000; $h->{"3p-tR-halves"} += $line[ 5] / $reads * 1000000; $h->{"3p-CCA-tRFs"} += $line[ 7] / $reads * 1000000; $h->{"3p-tRFs"} += $line[ 9] / $reads * 1000000; $h->{"tRF-1"} += $line[11] / $reads * 1000000; $h->{"tRNA-leader"} += $line[13] / $reads * 1000000; $h->{"misc-tRFs"} += $line[15] / $reads * 1000000; } close $trf; print Data::Dumper->Dump([\%hash], [qw/*hash/]); open my $merge, '>>', "$folder/merge" or die "$folder/merge: $!"; my @tRF_types = ("5p-tR-halves", "5p-tRFs", "3p-tR-halves", "3p-CCA-tRFs", "3p-tRFs", "tRF-1", "tRNA-leader", "misc-tRFs"); for my $tRNAname ( sort keys %hash ) { print $merge $tRNAname; for my $tRF_type (@tRF_types) { print $merge "\t$hash{$tRNAname}{$tRF_type}"; } print $merge "\n"; } close $merge; } close $dh; }
For the sample data from this post, the output file merge I get is the following. Note that if you re-run the script, because of the append mode on the merge file, the same lines get added to that file again.
MT-TM 6500000 4500000 0 0 0 0 0 20416666.66666 +66 MT-TN 0 500000 0 0 0 750000 1000000 12625000 MT-TP 0 0 0 0 0 1000000 0 0 tRNA-Ala-AGC 0 3863095.23809524 0 0 136363.636363636 + 0 0 23353306.8783069 tRNA-Ala-CGC 0 8708333.33333333 0 0 0 14500000 50 +0000 8980291.00529101 tRNA-Ala-TGC 0 11833333.3333333 0 0 90909.0909090909 + 0 0 84521296.2962963 tRNA-Arg-ACG 0 100000 0 71428.5714285715 0 6500000 + 0 4916666.66666667
Update: Minor edits and a few additions to the explanations.
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^3: Script help needed - RegEx & Hashes (DRY)
by hippo (Archbishop) on Oct 12, 2018 at 12:27 UTC | |
by haukex (Archbishop) on Oct 12, 2018 at 12:30 UTC |