in reply to Re: Why doesn't the whole line print?
in thread Why doesn't the whole line print?
As crazyinsomniac says, move the test out of the while loop becuase you don't have the full sentence. I went ahead and made some more changes. Got rid of some variables that were not needed, some stylistic changes as well, and two uses of grep. More can be done, but that's the once over for me
#!/usr/bin/perl -w # # Create clusters of meeting data in a semi-unsupervised fashion # # parameters: # 1) input directory # 2) output data #################################################################### use strict; my $indir = shift @ARGV or die "No indir supplied"; my $outfile = shift @ARGV or die "No outfile supplied"; die "$indir not a directory!" unless -d $indir; open(OUTPUT, ">$outfile") || die "Could not open $outfile"; die "Could not chdir $indir" unless chdir($indir); opendir(INDIR, ".") || die "directory open of $indir failed"; # I prefer the chdir instead of using the slash for reading the filena +mes my @files = grep(!/^..?\z/, readdir(INDIR)); #get all the files in the + directory # I'm not sure if readdir will always give you . and .. first # See the readdir function in the docs. That's my prefered method from + the camel closedir(INDIR); #just being neat foreach my $infile (@files) { #process files until none are left next if -d $infile; open(INPUT, $infile) || die "error on file opening $infile"; my $within_spurt=0; my $previous_interupt = 0; my @current_spurt; <INPUT>; # skip first line which isn't data while (<INPUT>) { #inner loop to create initail clustering my (undef, undef, $current_word, $word_in_spurt, $spurt_length, undef, undef, undef, undef, $primary_speaker, undef, $interupting_speakers) = split(' '); # see if number of speakers increases and first_speaker == 0, # this means the spurt is interupting, not the primary speaker if ($primary_speaker == 0 && $interupting_speakers > $previous_interupt && $word_in_spurt == 1) { push @current_spurt, "<s> $current_word ";# start marker $within_spurt = 1; if($word_in_spurt == $spurt_length) {#only word in spurt push @current_spurt, "<\/s>\n"; $within_spurt = 0; $interupting_speakers = 0; # the interupt is over } } # if we are in a spurt and it is not the last word of that spurt elsif($within_spurt && $word_in_spurt != $spurt_length) { push @current_spurt, "$current_word "; #add current word } # if this is the last word of a spurt elsif($word_in_spurt == $spurt_length && $within_spurt == 1) { push @current_spurt, "$current_word <\/s>\n"; #end marker $within_spurt = 0; $interupting_speakers = 0; # the interupt is over } #make sure that at the end of spurts this flag is reduced if($word_in_spurt == $spurt_length) { $interupting_speakers--; # should not be less than 0 if($interupting_speakers < 0) { $interupting_speakers = 0; } } $previous_interupt = $interupting_speakers; } close(INPUT); if( grep { /\bso\b/ } @current_spurt) {; print STDOUT @current_spurt; print OUTPUT @current_spurt; } }
|
|---|