# BilingualWorldList.txt (Les't call it FileA)
vriendelik aardig
irriterend vervelend
losieshuis pension
eksamen examen
goed braaf
damwal dam
water water
rekenaar computer
outoritêr outoritaire
wêreld wereld
alle alle
word worden
angesien overwegende
erkenning erkenning
afrigter trainer
FalseFriendsList.txt (Let's call it FileB)
vriendelik aardig
goed braaf
damwal dam
bruinmens kleurling
kamera fototoestel
jammer sneu//spijten
japon ochtendjas
losieshuis pension
buffer bumper
bruinmens kleurling
brulpadda brulkikker
jammerlik zielig
buffer bumper
irriterend irritant//vervelend
kameelperd giraf//giraffe
####
#FileA
irriterend vervelend
#FileB
irriterend irritant//vervelend
####
#!/usr/bin/perl-w
use strict;
#use warnings;
use open ':utf8';
#open files
open (FALSEF, ")
{
#assign each line to $line
my $line = $_;
#chomp off the new line
chomp $line;
#increment $line
$falsef{$line}++;
}
#declare variables
my $token;
my %hash;
#open output files
open (OUTPUT1, ">OutputFalseFriends.txt");
open (OUTPUT2, ">OutputUnsortedWordList.txt");
#while input is received
while ()
{
#assign each line to $line
my $line = $_;
#chomp off the new line
chomp $line;
#assign $line to the array
my @wordlist = split/\t/,$line;
#a for-loop to 'clean up' the words, to get rid of all the commas, full stops, etc, except the apstrophes and hyphens
for (my $x = 0; $x <= $#wordlist; $x++)
{
my $token = $wordlist[$x];
if ($token =~ /(['\-\w]+)/)
{
#$word is now clean
my $searchword = $1;
#checks to see whether the word exists in the false friends list
if (exists $hash{$searchword} || exists $falsef{$searchword})
{
$existingfalsefriend{$searchword}++;
}
else
{
#print to unsorted.txt
print OUTPUT2 "$searchword\n";
}
}
}
}
my $searchword;
foreach my $searchword(sort keys %existingfalsefriend)
{
#sorts the matched words alphabetically
my $value = $existingfalsefriend{$searchword};
print OUTPUT1 "$searchword\t $value\n";
}
####
#OutputFalseFriends.txt
vriendelik aardig
losieshuis pension
goed braaf
damwal dam
irriterend irritant//vervelend
#OutputUnsortedWorldList.txt
eksamen examen
water water
rekenaar computer
outoritêr outoritaire
wêreld wereld
alle alle
word worden
angesien overwegende
erkenning erkenning
afrigter trainer
####
goed
braaf
naak
bloot
damwal
dam
kombers
deken
homoseksueel
flikker
bronstig
geil
munisipaliteit
gemeente
####
#!/usr/bin/perl-w
use strict;
use warnings;
use open ':utf8';
use autodie;
#open FILE B
open (FALSEFRIENDINPUT, ")
{
#chomp off the new line
chomp $line;
# split the line on tab
my ($filebkeys, $filebvalues) = split /\t/, $line;
$fileb{$filebkeys} = $filebvalues;
#open output files
open (OUTPUT1, ">OutputMatchedFalseFriends.txt");
open (OUTPUT2, ">OutputNonMatchedWords.txt");
#open FILE A
open (BILINGUALWL, " )
{
chomp $line;
#split the line on tab
my ($fileakeys, $fileavalues) = split /\t/, $line;
#do first columns match?
if ($fileb{$fileakeys})
{
#does the second column value contain the other as a substring?
if ($fileb{$fileakeys} =~ /$fileavalues/ or $fileavalues =~ /$fileb{$fileakeys}/)
{
#if yes, print it to OutputMatchedFalseFriends.txt
print OUTPUT1 "$line\n";
#loop to the next line
next;
}
}
else
{
#if not, print it to OutputNonMatchedWords.txt
print OUTPUT2 "$line\n";
}
}
}
####
#OutputMatchedFalseFriends.txt
damwal dam
bitsig vinnig
bot been
dikwels vaak
aantreklik knap
bees rund
baas chef
bestuur directie
alles alles
afrigter trainer
#OutputNonMatchedWords.txt (only a sample of a 73 line output)
vriendelik aardig
polisieman agent
net-net amper
gedierte beest
goed braaf
naak bloot
kombers deken
homoseksueel flikker
bronstig geil
munisipaliteit gemeente
menskop hoofd
toedraai inpakken
kiestand kies
dierekop kop
####
Can't open '>MatchedFalseFriends.txt' for writing: 'Invalid argument' at Script.ExtractionofCognates.1.0.5.2012.06.28.pl line 25
#and
Can't open '>OutputNonMatchedWords.txt' for writing: 'Invalid argument' at Script.ExtractionofCognates.1.0.5.2012.06.28.pl line 25