use strict;
use warnings;
use Text::CSV;
my $csv = Text::CSV->new( { sep_char => "\t" } ); # assuming tab separated input
open my $words, "<", "words.txt" or die "Cannot open words.txt: $!\n";
# the following shows what's in words.txt; it is NOT words.txt itself!
=head words.txt
how o B-NP
are o I-NP
you o I-NP
some o o
really o B-GP
=cut
my $lasttype;
my @text;
while( my $row = $csv->getline( $words ) ) {
my $text = $$row[0];
my $type = ( $$row[-1] =~ /\w-(\w+)/ ) ? $1 : "";
$lasttype = $type unless @text; # special treatment for first row
if( $type eq $lasttype ) {
push @text, $text;
} else {
print ''."$lasttype@text\n" if $lasttype;
$lasttype = $type;
@text = ( $text );
}
}
# print what's left over when all input read
print ''."$lasttype@text\n" if $lasttype;
close $words;