comment on

#!    /usr/bin/perl

use strict;
use warnings;
use Argdom;


my $globalVars = {conf => '', lang => '', paths => []};

sub extractRelations;
sub keyChooser;
sub isOneOf;
sub singleChooser;
sub setGlobalVars;
sub isValidLine;
sub splitTwo;
sub replaceNotations;
sub replaceSafely;
sub printHelp;

binmode STDOUT, ':utf8';    #    Especially during debugging

#    To make the script more organised ...
sub lesmetsMain
{
    my ($relations, $aider, $arr) = ({}, 0, 0);
    my $ad = new Argdom(\@_);
    $ad->setKeyChooser(\&keyChooser);
    $ad->setSingleChooser(\&singleChooser);
    $arr   = $ad->getArbs;
    $aider = isOneOf('--help', $arr);
    return printHelp if ($aider or isOneOf('-h', $arr));
    setGlobalVars $ad;
    extractRelations $relations;
    replaceNotations $relations;
    print "\n";
    0;
}
exit lesmetsMain @ARGV;

sub extractRelations
{
    my ($toWhere, $confFile, $l, $lit) = (shift, $globalVars->{'conf'}
+->[0], 0, '');
    my @kv = ();
    $l = $globalVars->{'lang'};
    $confFile = $ENV{'HOME'} . '/.lesmets.' . ($l ? $l->[0] : 'def') u
+nless $confFile;
    open CONF, '<:utf8', $confFile or die "Could not open $confFile: $
+!";
    while(<CONF>)
    {
        chomp;
        next if not isValidLine $_;        #    Because I hate ...
        @kv = splitTwo $_;                #    ...    regular expressi
+ons. Too difficult.
        $toWhere->{$kv[0]} = $kv[1];
    }
    close CONF;
}

sub keyChooser
{
    isOneOf((shift), ['-conf', '-lang']);
}

sub isOneOf
{
    die 'Insufficient parameters to isOneOf sub' unless @_ > 1;
    die 'Parameter two to isOneOf should be an array ref' unless ref $
+_[1] eq 'ARRAY';
    foreach my $notI (@{$_[1]})
    {
        return 1 if $notI eq $_[0];
    }
    0;
}

sub singleChooser
{
    0;
}

sub setGlobalVars
{
    die 'setGlobalvars takes one parameter' unless @_;
    die 'Parameter one to setGlobalVars must be an Argdom object' unle
+ss ref $_[0] eq 'Argdom';
    my $ad = shift;
    my %ks = %{$ad->getKeys};
    $globalVars->{'conf'}  = $ks{'-conf'};
    $globalVars->{'lang'}  = $ks{'-lang'};
    $globalVars->{'paths'} = $ad->getArbs;
}

#    Uses things that are less efficient than regular expressions, but
+ more intelligible -- to me, at least.
sub isValidLine
{
    my ($l, $notI, $W, $char, $line) = (length $_[0], 0, 0, '', shift)
+;
    for(; $notI < $l; ++$notI)
    {
        $char = substr $line, $notI, 1;
        $W    = 1 unless isOneOf($char, [' ', "\t"]);
        return 0 if $char eq '#';
        last if $W;
    }
    return 0 unless $W;
    1;
}

sub splitTwo
{
    my $space = index $_[0], ' ';
    my @rez   = ((substr $_[0], 0, $space), substr $_[0], $space + 1);
    @rez;
}

sub replaceNotations
{
    my ($rels, $paths, $interactive, $endit) = (shift, [], 0, 0);
    $paths = $globalVars->{'paths'};
    push @{$paths}, '-' unless $paths;
    foreach my $path (@{$paths})
    {
        if($path eq '-')
        {
            $interactive = 1;
            binmode STDOUT, ':utf8';
            binmode STDIN,  ':utf8';
        }
        (open RAWFILE, '<:utf8', $path or die "Could not open $path fo
+r reading: $!") unless $interactive;
        (open OUTFILE, '>:utf8', $path . '.out' or die "Could not open
+ $path.out for writing: $!") unless $interactive;
        while(1)
        {
            $_ = <RAWFILE> unless $interactive;
            $_ = <STDIN>   if $interactive;
            last unless $_;
            foreach my $mem (keys %{$rels})
            {
                $_ = replaceSafely $_, $mem, $rels->{$mem};
            }
            print OUTFILE $_ unless $interactive;
            print $_ if $interactive;
        }
        unless($interactive)
        {
            close OUTFILE;
            close RAWFILE;
        }
    }
}

sub replaceSafely
{
    my ($rez, $cur, $cue, $rep, $pos, $at, $l) = ('', shift, shift, sh
+ift, 0, 0, 0);
    $l = length $cue;
    while(($pos = index $cur, $cue, $at) != -1)
    {
        $rez .= substr $cur, $at, $pos - $at;
        $rez .= $rep;
        $at = $pos + $l;
    }
    $rez .= substr $cur, $at;
    $rez;
}

sub printHelp
{
    print qq
/Usage:
$0 [-h | --help] [-lang xy] [-conf conf_path] [file [file [...]]] [-]

-h | --help     Prints this help message and quits.
-lang xy        xy should be a language code (depends on you). If it p
+rovided, the config file
                that is used to giude $0 will have to be ~\/.lesmets.x
+y.
                If you invoke thus: $0 -lang fr
                then the config file will have to be ~\/.lesmets.fr
-conf conf_path Sets the config file's path. If this is provided, the 
+-lang value is not used as explained above.
                The default config file should be at ~\/.lesmets.def (
+like passing -lang def).
file            These are the files to process. They are treated one b
+y one, in that order.
                A file is written out for each, with the same name, bu
+t with a `.out' added to the end. This is the file
                that contains the edited data.
-               That tells the program to get input via STDIN, and pri
+nt to STDOUT. It is considered, in the internal
                logic, as the path to STDIN, and `-.out' as the path t
+o STDOUT.
Information on how to write the config file is in the POD documentatio
+n, which you can get out by doing this:
    pod2html --outfile lesmets.html lesmets.pl
and then reading the lesmets.html file in Unicode-good browser. Try:
    w3m lesmets.html
/;
    0;
}

__DATA__

=pod

=head1 NAME

lesmets.pl - Accents-on-my-ASCII script

=head1 SYNOPSIS

Sample usage:

    lesmets.pl -
    Franc>ais: Les boeufs n'ont pas mange/ les cadeaux de Caesar.
    Français: Les bœufs n'ont pas mangé les cadeaux de Cæsar.
    
That run used the default config file, which is written out in here.

=head1 DESCRIPTION

Helps you put accents on characters that should have them (for the Rom
+ance languages, for example), with a keyboard
that doesn't have characters. That calls for writing with the availabl
+e chars to indicate where you want accents to be.
It goes beyond putting accents, but that's what I use it for. It can c
+hange stuff from any format to any format. All
you have to do is put it all in a config file.

=head1 USAGE

To get information about how to use it (and it is very simple), run:

    lesmets.pl --help

You can use your shell's rc (C<.bashrc>, usually) to make it easier to
+ use. This line is in my C<.bashrc>:

    alias lesmets='lesmets.pl'

So that I don't have to remember to put the C<.pl> extension. Also, I 
+have it in one of my PATH directories.
Makes it all much easier.

Don't forget to read through again, after processing. Some character s
+equences, that you may have meant in good faith, may
have been understood to be codes for the program. B< I<You should read
+ through.> >

The code requires the C<Argdom.pm> module, which should also be availa
+ble where you got this file.

=head1 CONFIGURATION FILES

=head2 Sample Config File

    #    This is the default config file for lesmets. It is good enoug
+h.
    #    You can save it at ~/lesmets.def
    #    What you should put there, for ease, should be the config fil
+e you use most. This happens to be the one, 
    #    for me.
    #    By the way, I don't remember where I got this idea, but I mus
+t have been reading about one of those 
    #    made-for-computer cross-language dictionaries. I'll put the c
+redits here when I find it again.
    #    And I got the accented characters from Open Office (Menu: Ins
+ert -> Special Character)
    
    A/ Á
    A\ À
    A^ Â
    A: Ä
    AE Æ
    C> Ç
    E\ È
    E/ É
    E^ Ê
    E: Ë
    I\ Ì
    I/ Í
    I^ Î
    I: Ï
    O\ Ò
    O/ Ó
    O^ Ô
    O: Ö
    U\ Ù
    U/ Ú
    U^ Û
    U: Ü
    Y/ Ý
    a\ à
    a/ á
    a^ â
    a: ä
    ae æ
    c> ç
    e\ è
    e/ é
    e^ ê
    e: ë
    i\ ì
    i/ í
    i^ î
    i: ï
    o\ ò
    o/ ó
    o^ ô
    o: ö
    u\ ù
    u/ ú
    u^ û
    u: ü
    oe œ

=head2 How to Write a Config File

Any line whose first non-whitespace character is a hash (#) is a comme
+nt, and is skipped over. Same for lines with no
non-whitespace characters.
# does not show the beginning of a comment. It is only that if it is t
+he first non-whitespace char in the line. In other
cases, it is part of the codes.

So, the config file codes are two columns of characters. The columns a
+re separated by a single space only. Tabs work,
but don't use them. The Left is the series of characters that you will
+ type out. They are usually what you have on your
keyboard. The Right is the sequaence (usually one character) that will
+ replace the Left.

So, with that config file, you have all occurences of o: being replace
+d with o with a diaresis at the top.
Because all these sequences may occur without your intention to make t
+hem codes, you should read through, after processing
the file. I didn't feel like putting logic to skip over some bits, bec
+ause it is too rare a case to make me add 1000 lines
for. Also, I'm not planning to use this to write a book E<#8212> just 
+a bit of correspondence. Same should apply to you.
And, if you really want that, you can add it. See the L<"COPYRIGHT"> s
+ection.

=head1 EXTENDING NAUTILUS

You can use this script to extend Nautilus, the GNOME File manager. It
+ is very nifty. You will only have to right-click a file icon, and th
+en go to Scripts on the menu that pops up, and click lesmets.pl. The 
+processed file will show up in the same folder.

Copy lesmets.pl into Nautilus' script directory. It is usually ~/.gnom
+e2/nautilus-scripts. Then, visit that directory with Nautilus, so it 
+can know there is a script there (if it does not show ``Scripts'' whe
+n you right click a file).
Now, Nautilus is ready to use lesmets.pl! That easy!

=head1 CREDITS

I was checking for an English-French dictionary for my phone, and I st
+umbled on some project to create a translation dictionary for compute
+rs. I think that is where I checked and found that they were represen
+ting the characters that had accents on them the way I have done it i
+n that config file. All I remember is the stuff of the / and \ accent
+s. The rest, I had to get adventurous.

=head1 TO-DO

=over

=item 0. Add a GTK+ front-end.
This should be so simple as to discourage me from trying it.

=item 1. Find out how to extend Konq (I don't use KDE as of now), and 
+add it in here.

=back

=head1 BUGS

None that I know of. But one feature is missing: indication of a secti
+on of the source file to skip over.

=head1 AUTHOR

Revence XXVII <revence27@praize.com>

=head1 COPYRIGHT

No copyright, no licence.

This code, algorithms and all the ideas pertaining to this intellectua
+l property, I hereby place in the Public Domain.

=cut
[download]
In reply to lesmets.pl by revence27
Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
Titles consisting of a single word are discouraged, and in most cases are disallowed outright.
Read Where should I post X? if you're not absolutely sure you're posting in the right place.
Please read these before you post! —
Posts may use any of the Perl Monks Approved HTML tags:
a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
	For:		Use:
	&		`&`
	<		`<`
	>		`>`
	[		`[`
	]		`]`
Link using PerlMonks shortcuts! What shortcuts can I use for linking?
See Writeup Formatting Tips and other pages linked from there for more info.