my %hash; my $current_token = ''; while () { my ($token, $value ) = $_ =~ m/^([^\s]+)\s+(.*)/; next unless $token; if ( $token eq $current_token ) { ${$hash{$token}}[-1] .= ' ' . $value; } else { $current_token = $token; push @{$hash{$token}}, $value; } } use Data::Dumper; print Dumper \%hash; __DATA__ HEADER Header 1 stuff TITLE Title 1 stuff TITLE 2 more title 1 stuff COMPND complicated stuff 1 COMPND 2 continued complicated stuff 1 HEADER Header 2 stuff TITLE Title 2 stuff TITLE 2 more title 2 stuff COMPND complicated stuff 2 COMPND 2 continued complicated stuff 2 HEADER Header 3 stuff TITLE Title 3 stuff TITLE 2 more title 3 stuff COMPND complicated stuff 3 COMPND 2 continued complicated stuff 3 COMPND 3 continued complicated stuff 3 HEADER Header 4 stuff TITLE Title 4 stuff TITLE 2 more title 4 stuff COMPND complicated stuff 4 COMPND 2 continued complicated stuff 4 HEADER Header 5 stuff TITLE Title 5 stuff TITLE 2 more title 5 stuff COMPND complicated stuff 5 COMPND 2 continued complicated stuff 5 HEADER Header 6 stuff TITLE Title 6 stuff TITLE 2 more title 6 stuff COMPND complicated stuff 6 COMPND 2 continued complicated stuff 6 __END__ $VAR1 = { 'HEADER' => [ 'Header 1 stuff', 'Header 2 stuff', 'Header 3 stuff', 'Header 4 stuff', 'Header 5 stuff', 'Header 6 stuff' ], 'TITLE' => [ 'Title 1 stuff 2 more title 1 stuff', 'Title 2 stuff 2 more title 2 stuff', 'Title 3 stuff 2 more title 3 stuff', 'Title 4 stuff 2 more title 4 stuff', 'Title 5 stuff 2 more title 5 stuff', 'Title 6 stuff 2 more title 6 stuff' ], 'COMPND' => [ 'complicated stuff 1 2 continued complicated stuff 1', 'complicated stuff 2 2 continued complicated stuff 2', 'complicated stuff 3 2 continued complicated stuff 3 3 continued complicated stuff 3', 'complicated stuff 4 2 continued complicated stuff 4', 'complicated stuff 5 2 continued complicated stuff 5', 'complicated stuff 6 2 continued complicated stuff 6' ] };