#!/usr/bin/perl $pid = open FILTER,"|-"; die unless defined $pid; if ($pid) { open WORD, $ARGV[0] or die "Can't read $ARGV[0]\n"; $START = 000000000; $BLOCK = 100000000; sysseek WORD, $START, 0; while (sysread(WORD, $_, 10240)) { while ( /\G (.*?) (?: ((?:[\020-\117]\04)+) | ((?:[\10\12\040-\177]\00)+) ) /xgs ) { ($junk,$russian,$english) = ($1,$2,$3); $russian=~s/\04//g; $russian=~tr/\020-\117/\300-\377/; $english=~s/\00//g; print FILTER $russian,$english; print FILTER "\n" if length($junk) } last if sysseek(WORD,0,1)>$START+$BLOCK; } close FILTER; } else { while() { print unless /^.{0,3}$/ || /(.)\1\1/; } }