in reply to Re^3: problem count the number of words (updated)
in thread problem count the number of words

Hi

TNX bro i could find the my problem , my problem was about these section *foreach (my $word = <$inwp>) { $countp{$word}++ ; }* , *foreach (my $word = <$nwt>) {$countn{$word}++ ; }* because i split the words by /\n/ that my code didn't need to this separator

my new code

#Hi Codder use warnings ; use strict ; #use DBI ; use utf8 ; use Encode ; my $numlinep = 0 ; my $traincountp = 0 ; my $pt = '/root/Positive.txt' ; my $ptt = '/root/positivetrain1.txt' ; my $pwt = '/root/Positive2.txt' ; my $ntw = '/root/Negative2.txt' ; my $pttt = '/root/positivetest.txt' ; open (my $in , "<:encoding(utf8)" , "$pt") or die "$pt: $!" ; while (my $line = <$in>) { $numlinep++ ; } close $in ; open ($in , "<:encoding(utf8)" , "$pt") or die "$pt: $!" ; while (my $linep = <$in>) { my $inp ; if ($traincountp <= (0.7*$numlinep)){ open ($inp , ">>" , "$ptt") or die "$ptt: $!" ; print $inp $linep , "\n" ; } if ($traincountp > (0.7*$numlinep)){ open (my $inpt , ">>" , "$pttt") or die "$pttt: $!" ; print $inpt $linep , "\n" ; } $traincountp++ ; } close $in ; my $numlinet = 0 ; my $traincountn = 0 ; my $nt = '/root/Negative.txt' ; my $ntt = '/root/negativetrain1.txt' ; my $nttt = '/root/negativetest.txt' ; open (my $it , "<:encoding(utf8)" , "$nt") or die "$nt: $!" ; while (my $line = <$it>) { $numlinet++ ; } close $it ; open ($it , "<:encoding(utf8)" , "$nt") or die "$nt: $!" ; while (my $linen = <$it>) { my $itn ; if ($traincountn <= (0.7*$numlinet)){ open ($itn , ">>" , "$ntt") or die "$ntt: $!" ; print $itn $linen , "\n" ; } if ($traincountn > (0.7*$numlinet)) { open (my $ittn , ">>" , "$nttt") or die "$nttt: $!" ; print $ittn $linen , "\n" ; } $traincountn++ ; } close $it ; my $numlinepw = 0 ; my %countp = () ; open (my $inw , "<:encoding(utf8)" , "$ptt") or die "$ptt: $!" ; open (my $inwp , "<:encoding(utf8)" , "$pwt") or die "$pwt: $!" ; while (<$inw>) { my @pwords ; my @ptw ; my $elementp ; my $countp ; #@pwords = split (/\n/ , $inwp) ; #push @ptw , @pwords ; foreach (my $word = <$inwp>) { $countp{$word}++ ; } while ( ( my $kp , my $vp) = each %countp ) { open (my $hashp , ">>" , 'wordsbagp.txt') ; print $hashp "$kp = $vp\n" ; #print "$kp => $vp\n" ; #print "$kp" , "\n" ; #print "$vp" , "\n" ; #print "$kp" , "\n" , "$vp" , "\n" ; } $numlinepw++ ; } #print "$numlinepw" , "\n" ; my $numlinenw = 0 ; my %countn = () ; open (my $itw , "<:encoding(utf8)" , "$ntt") or die "$ntt: $!" ; open (my $nwt , "<:encoding(utf8)" , "$ntw" ) or die "$ntw: $!" ; while (<$itw>) { $numlinenw++ ; my @nwords ; my @ntw ; my $elementn ; my $countn ; #@nwords = split (/\n/ , $nwt) ; #push @ntw , @nwords ; foreach (my $word = <$nwt>) { $countn{$word}++ ; } while ( ( my $kn , my $vn ) = each %countn ) { open (my $hashn , '>>' , 'wordsbagn.txt') or die $! ; print $hashn "$kn = $vn\n" ; #print "$kn => $vn\n" ; #print "$kn" , "\n" ; #print "$vn" , "\n" ; #print "$kn" , "\n" , "$vn" , "\n" ; } } print 'Finish First Section' , "\n" ;

Replies are listed 'Best First'.
Re^5: problem count the number of words
by poj (Abbot) on Jan 01, 2019 at 17:06 UTC
    foreach (my $word = <$inwp>) { $countp{$word}++ ; }

    You appear to be just counting the words in the words file Positive2.txt and I can't see where you are spitting the sentences in Positive.txt into words.

    Perhaps this will help you progess

    #!/usr/bin/perl use strict; # create positive word hash my $pos_words = '/root/Positive2.txt'; open my $fh_in,'<:encoding(utf8)',$pos_words or die "Could not open $pos_words : $!"; my %pos_word = map { s/^\s+|\s+$//g; $_=>1 } <$fh_in>; close $fh_in; # open positive word count output file my $pos_outfile = '/root/wordsbagp.txt'; open my $fh_out,'>',$pos_outfile or die "Could not open $pos_outfile\n"; # read positive sentence file my $pos_text = '/root/Positive.txt'; open my $fh_in,'<:encoding(utf8)',$pos_text or die "Could not open $pos_text : $!"; my @train = <$fh_in>; close $fh_in; # split into 2 arrays 70% / 30% my $offset = int 0.7 * @train; push my @test, splice @train,$offset; my $lineno = 0; for my $line (@train){ ++$lineno; # split sentence into words chomp($line); my @words = split /\b/,$line; # count positive words only my %count = (); for my $word (@words){ ++$count{$word} if exists $pos_word{$word}; } # print results print $fh_out "\nline $lineno: $line\n"; for my $word (sort keys %count){ printf $fh_out " %-10s => %d\n",$word,$count{$word}; } }; close $fh_in; close $fh_out;
    poj