use strict; use warnings; use Text::CSV; my $csv = Text::CSV->new( { sep_char => "\t" } ); # assuming tab separated input open my $words, "<", "words.txt" or die "Cannot open words.txt: $!\n"; # the following shows what's in words.txt; it is NOT words.txt itself! =head words.txt how o B-NP are o I-NP you o I-NP some o o really o B-GP =cut my $lasttype; my @text; while( my $row = $csv->getline( $words ) ) { my $text = $$row[0]; my $type = ( $$row[-1] =~ /\w-(\w+)/ ) ? $1 : ""; $lasttype = $type unless @text; # special treatment for first row if( $type eq $lasttype ) { push @text, $text; } else { print ''."$lasttype@text\n" if $lasttype; $lasttype = $type; @text = ( $text ); } } # print what's left over when all input read print ''."$lasttype@text\n" if $lasttype; close $words;