# BilingualWorldList.txt (Les't call it FileA) vriendelik aardig irriterend vervelend losieshuis pension eksamen examen goed braaf damwal dam water water rekenaar computer outoritêr outoritaire wêreld wereld alle alle word worden angesien overwegende erkenning erkenning afrigter trainer FalseFriendsList.txt (Let's call it FileB) vriendelik aardig goed braaf damwal dam bruinmens kleurling kamera fototoestel jammer sneu//spijten japon ochtendjas losieshuis pension buffer bumper bruinmens kleurling brulpadda brulkikker jammerlik zielig buffer bumper irriterend irritant//vervelend kameelperd giraf//giraffe #### #FileA irriterend vervelend #FileB irriterend irritant//vervelend #### #!/usr/bin/perl-w use strict; #use warnings; use open ':utf8'; #open files open (FALSEF, ") { #assign each line to $line my $line = $_; #chomp off the new line chomp $line; #increment $line $falsef{$line}++; } #declare variables my $token; my %hash; #open output files open (OUTPUT1, ">OutputFalseFriends.txt"); open (OUTPUT2, ">OutputUnsortedWordList.txt"); #while input is received while () { #assign each line to $line my $line = $_; #chomp off the new line chomp $line; #assign $line to the array my @wordlist = split/\t/,$line; #a for-loop to 'clean up' the words, to get rid of all the commas, full stops, etc, except the apstrophes and hyphens for (my $x = 0; $x <= $#wordlist; $x++) { my $token = $wordlist[$x]; if ($token =~ /(['\-\w]+)/) { #$word is now clean my $searchword = $1; #checks to see whether the word exists in the false friends list if (exists $hash{$searchword} || exists $falsef{$searchword}) { $existingfalsefriend{$searchword}++; } else { #print to unsorted.txt print OUTPUT2 "$searchword\n"; } } } } my $searchword; foreach my $searchword(sort keys %existingfalsefriend) { #sorts the matched words alphabetically my $value = $existingfalsefriend{$searchword}; print OUTPUT1 "$searchword\t $value\n"; } #### #OutputFalseFriends.txt vriendelik aardig losieshuis pension goed braaf damwal dam irriterend irritant//vervelend #OutputUnsortedWorldList.txt eksamen examen water water rekenaar computer outoritêr outoritaire wêreld wereld alle alle word worden angesien overwegende erkenning erkenning afrigter trainer #### goed braaf naak bloot damwal dam kombers deken homoseksueel flikker bronstig geil munisipaliteit gemeente #### #!/usr/bin/perl-w use strict; use warnings; use open ':utf8'; use autodie; #open FILE B open (FALSEFRIENDINPUT, ") { #chomp off the new line chomp $line; # split the line on tab my ($filebkeys, $filebvalues) = split /\t/, $line; $fileb{$filebkeys} = $filebvalues; #open output files open (OUTPUT1, ">OutputMatchedFalseFriends.txt"); open (OUTPUT2, ">OutputNonMatchedWords.txt"); #open FILE A open (BILINGUALWL, " ) { chomp $line; #split the line on tab my ($fileakeys, $fileavalues) = split /\t/, $line; #do first columns match? if ($fileb{$fileakeys}) { #does the second column value contain the other as a substring? if ($fileb{$fileakeys} =~ /$fileavalues/ or $fileavalues =~ /$fileb{$fileakeys}/) { #if yes, print it to OutputMatchedFalseFriends.txt print OUTPUT1 "$line\n"; #loop to the next line next; } } else { #if not, print it to OutputNonMatchedWords.txt print OUTPUT2 "$line\n"; } } } #### #OutputMatchedFalseFriends.txt damwal dam bitsig vinnig bot been dikwels vaak aantreklik knap bees rund baas chef bestuur directie alles alles afrigter trainer #OutputNonMatchedWords.txt (only a sample of a 73 line output) vriendelik aardig polisieman agent net-net amper gedierte beest goed braaf naak bloot kombers deken homoseksueel flikker bronstig geil munisipaliteit gemeente menskop hoofd toedraai inpakken kiestand kies dierekop kop #### Can't open '>MatchedFalseFriends.txt' for writing: 'Invalid argument' at Script.ExtractionofCognates.1.0.5.2012.06.28.pl line 25 #and Can't open '>OutputNonMatchedWords.txt' for writing: 'Invalid argument' at Script.ExtractionofCognates.1.0.5.2012.06.28.pl line 25