my @content = (); #### #!/usr/bin/env perl use strict; use warnings; use autodie ':all'; use Data::Dump; use Text::CSV; { my $source_file = 'pm_11116298_gene.txt'; show_verbatim_input($source_file); process_without_2d_array($source_file); my $data_2d_ref = process_with_2d_array($source_file); # Do more processing with $data_2d_ref } sub process_without_2d_array { my ($file) = @_; print "\n\n+++++ WITHOUT INTERMEDIATE 2D ARRAY +++++\n"; my @proteins; my %count_of = (just_mfs => {}, all_mf_elements => {}); { open my $fh, '<', $file; { my $header_record_to_discard = <$fh>; } my $csv = Text::CSV::->new({sep => "\t"}); print "\n*** Wanted Data Output ***\n"; while (my $row = $csv->getline($fh)) { push @proteins, $row->[0]; $count_of{just_mfs}{$row->[0]} = $#$row; $count_of{all_mf_elements}{$row->[0]} += scalar map split, @$row[1..$#$row]; print join('; ', @$row), "\n"; } } print "\n*** Wanted Row Counts (GUESS 1) ***\n"; print "$_ : $count_of{just_mfs}{$_}\n" for @proteins; print "\n*** Wanted Row Counts (GUESS 2) ***\n"; print "$_ : $count_of{all_mf_elements}{$_}\n" for @proteins; return; } sub process_with_2d_array { my ($file) = @_; print "\n\n+++++ WITH INTERMEDIATE 2D ARRAY +++++\n"; my @data_2d; { open my $fh, '<', $file; { my $header_record_to_discard = <$fh>; } my $csv = Text::CSV::->new({sep => "\t"}); while (my $row = $csv->getline($fh)) { push @data_2d, $row; } } print "\n*** 2D Array of Data ***\n"; dd \@data_2d; print "\n*** Wanted Data Output ***\n"; print join('; ', @$_), "\n" for @data_2d; print "\n*** Wanted Row Counts (GUESS 1) ***\n"; print "$_->[0] : $#$_\n" for @data_2d; print "\n*** Wanted Row Counts (GUESS 2) ***\n"; print "$_->[0] : ", scalar(map split, @$_[1..$#$_]), "\n" for @data_2d; return \@data_2d; } sub show_verbatim_input { my ($file) = @_; print "*** Input File ($file) ***\n", " ('^I' = TAB; '\$' = NEWLINE)\n"; system qw{cat -vet}, $file; return; } #### *** Input File (pm_11116298_gene.txt) *** ('^I' = TAB; '$' = NEWLINE) ProteinName^IMF1^IMF2^IMF3$ GH1^IGrowth factor activity^IGrowth hormone receptor binding^IHormone activity$ POMC^IG protein-coupled receptor binding^IHormone activity^ISignaling receptor binding$ THRAP3^IATP binding Source^INuclear receptor transcription coactivator activity^IPhosphoprotein binding$ +++++ WITHOUT INTERMEDIATE 2D ARRAY +++++ *** Wanted Data Output *** GH1; Growth factor activity; Growth hormone receptor binding; Hormone activity POMC; G protein-coupled receptor binding; Hormone activity; Signaling receptor binding THRAP3; ATP binding Source; Nuclear receptor transcription coactivator activity; Phosphoprotein binding *** Wanted Row Counts (GUESS 1) *** GH1 : 3 POMC : 3 THRAP3 : 3 *** Wanted Row Counts (GUESS 2) *** GH1 : 9 POMC : 9 THRAP3 : 10 +++++ WITH INTERMEDIATE 2D ARRAY +++++ *** 2D Array of Data *** [ [ "GH1", "Growth factor activity", "Growth hormone receptor binding", "Hormone activity", ], [ "POMC", "G protein-coupled receptor binding", "Hormone activity", "Signaling receptor binding", ], [ "THRAP3", "ATP binding Source", "Nuclear receptor transcription coactivator activity", "Phosphoprotein binding", ], ] *** Wanted Data Output *** GH1; Growth factor activity; Growth hormone receptor binding; Hormone activity POMC; G protein-coupled receptor binding; Hormone activity; Signaling receptor binding THRAP3; ATP binding Source; Nuclear receptor transcription coactivator activity; Phosphoprotein binding *** Wanted Row Counts (GUESS 1) *** GH1 : 3 POMC : 3 THRAP3 : 3 *** Wanted Row Counts (GUESS 2) *** GH1 : 9 POMC : 9 THRAP3 : 10