#! /usr/bin/perl use strict; use warnings; use Argdom; my $globalVars = {conf => '', lang => '', paths => []}; sub extractRelations; sub keyChooser; sub isOneOf; sub singleChooser; sub setGlobalVars; sub isValidLine; sub splitTwo; sub replaceNotations; sub replaceSafely; sub printHelp; binmode STDOUT, ':utf8'; # Especially during debugging # To make the script more organised ... sub lesmetsMain { my ($relations, $aider, $arr) = ({}, 0, 0); my $ad = new Argdom(\@_); $ad->setKeyChooser(\&keyChooser); $ad->setSingleChooser(\&singleChooser); $arr = $ad->getArbs; $aider = isOneOf('--help', $arr); return printHelp if ($aider or isOneOf('-h', $arr)); setGlobalVars $ad; extractRelations $relations; replaceNotations $relations; print "\n"; 0; } exit lesmetsMain @ARGV; sub extractRelations { my ($toWhere, $confFile, $l, $lit) = (shift, $globalVars->{'conf'} +->[0], 0, ''); my @kv = (); $l = $globalVars->{'lang'}; $confFile = $ENV{'HOME'} . '/.lesmets.' . ($l ? $l->[0] : 'def') u +nless $confFile; open CONF, '<:utf8', $confFile or die "Could not open $confFile: $ +!"; while(<CONF>) { chomp; next if not isValidLine $_; # Because I hate ... @kv = splitTwo $_; # ... regular expressi +ons. Too difficult. $toWhere->{$kv[0]} = $kv[1]; } close CONF; } sub keyChooser { isOneOf((shift), ['-conf', '-lang']); } sub isOneOf { die 'Insufficient parameters to isOneOf sub' unless @_ > 1; die 'Parameter two to isOneOf should be an array ref' unless ref $ +_[1] eq 'ARRAY'; foreach my $notI (@{$_[1]}) { return 1 if $notI eq $_[0]; } 0; } sub singleChooser { 0; } sub setGlobalVars { die 'setGlobalvars takes one parameter' unless @_; die 'Parameter one to setGlobalVars must be an Argdom object' unle +ss ref $_[0] eq 'Argdom'; my $ad = shift; my %ks = %{$ad->getKeys}; $globalVars->{'conf'} = $ks{'-conf'}; $globalVars->{'lang'} = $ks{'-lang'}; $globalVars->{'paths'} = $ad->getArbs; } # Uses things that are less efficient than regular expressions, but + more intelligible -- to me, at least. sub isValidLine { my ($l, $notI, $W, $char, $line) = (length $_[0], 0, 0, '', shift) +; for(; $notI < $l; ++$notI) { $char = substr $line, $notI, 1; $W = 1 unless isOneOf($char, [' ', "\t"]); return 0 if $char eq '#'; last if $W; } return 0 unless $W; 1; } sub splitTwo { my $space = index $_[0], ' '; my @rez = ((substr $_[0], 0, $space), substr $_[0], $space + 1); @rez; } sub replaceNotations { my ($rels, $paths, $interactive, $endit) = (shift, [], 0, 0); $paths = $globalVars->{'paths'}; push @{$paths}, '-' unless $paths; foreach my $path (@{$paths}) { if($path eq '-') { $interactive = 1; binmode STDOUT, ':utf8'; binmode STDIN, ':utf8'; } (open RAWFILE, '<:utf8', $path or die "Could not open $path fo +r reading: $!") unless $interactive; (open OUTFILE, '>:utf8', $path . '.out' or die "Could not open + $path.out for writing: $!") unless $interactive; while(1) { $_ = <RAWFILE> unless $interactive; $_ = <STDIN> if $interactive; last unless $_; foreach my $mem (keys %{$rels}) { $_ = replaceSafely $_, $mem, $rels->{$mem}; } print OUTFILE $_ unless $interactive; print $_ if $interactive; } unless($interactive) { close OUTFILE; close RAWFILE; } } } sub replaceSafely { my ($rez, $cur, $cue, $rep, $pos, $at, $l) = ('', shift, shift, sh +ift, 0, 0, 0); $l = length $cue; while(($pos = index $cur, $cue, $at) != -1) { $rez .= substr $cur, $at, $pos - $at; $rez .= $rep; $at = $pos + $l; } $rez .= substr $cur, $at; $rez; } sub printHelp { print qq /Usage: $0 [-h | --help] [-lang xy] [-conf conf_path] [file [file [...]]] [-] -h | --help Prints this help message and quits. -lang xy xy should be a language code (depends on you). If it p +rovided, the config file that is used to giude $0 will have to be ~\/.lesmets.x +y. If you invoke thus: $0 -lang fr then the config file will have to be ~\/.lesmets.fr -conf conf_path Sets the config file's path. If this is provided, the +-lang value is not used as explained above. The default config file should be at ~\/.lesmets.def ( +like passing -lang def). file These are the files to process. They are treated one b +y one, in that order. A file is written out for each, with the same name, bu +t with a `.out' added to the end. This is the file that contains the edited data. - That tells the program to get input via STDIN, and pri +nt to STDOUT. It is considered, in the internal logic, as the path to STDIN, and `-.out' as the path t +o STDOUT. Information on how to write the config file is in the POD documentatio +n, which you can get out by doing this: pod2html --outfile lesmets.html lesmets.pl and then reading the lesmets.html file in Unicode-good browser. Try: w3m lesmets.html /; 0; } __DATA__ =pod =head1 NAME lesmets.pl - Accents-on-my-ASCII script =head1 SYNOPSIS Sample usage: lesmets.pl - Franc>ais: Les boeufs n'ont pas mange/ les cadeaux de Caesar. Français: Les bœufs n'ont pas mangé les cadeaux de Cæsar. That run used the default config file, which is written out in here. =head1 DESCRIPTION Helps you put accents on characters that should have them (for the Rom +ance languages, for example), with a keyboard that doesn't have characters. That calls for writing with the availabl +e chars to indicate where you want accents to be. It goes beyond putting accents, but that's what I use it for. It can c +hange stuff from any format to any format. All you have to do is put it all in a config file. =head1 USAGE To get information about how to use it (and it is very simple), run: lesmets.pl --help You can use your shell's rc (C<.bashrc>, usually) to make it easier to + use. This line is in my C<.bashrc>: alias lesmets='lesmets.pl' So that I don't have to remember to put the C<.pl> extension. Also, I +have it in one of my PATH directories. Makes it all much easier. Don't forget to read through again, after processing. Some character s +equences, that you may have meant in good faith, may have been understood to be codes for the program. B< I<You should read + through.> > The code requires the C<Argdom.pm> module, which should also be availa +ble where you got this file. =head1 CONFIGURATION FILES =head2 Sample Config File # This is the default config file for lesmets. It is good enoug +h. # You can save it at ~/lesmets.def # What you should put there, for ease, should be the config fil +e you use most. This happens to be the one, # for me. # By the way, I don't remember where I got this idea, but I mus +t have been reading about one of those # made-for-computer cross-language dictionaries. I'll put the c +redits here when I find it again. # And I got the accented characters from Open Office (Menu: Ins +ert -> Special Character) A/ Á A\ À A^  A: Ä AE Æ C> Ç E\ È E/ É E^ Ê E: Ë I\ Ì I/ Í I^ Î I: Ï O\ Ò O/ Ó O^ Ô O: Ö U\ Ù U/ Ú U^ Û U: Ü Y/ Ý a\ à a/ á a^ â a: ä ae æ c> ç e\ è e/ é e^ ê e: ë i\ ì i/ í i^ î i: ï o\ ò o/ ó o^ ô o: ö u\ ù u/ ú u^ û u: ü oe œ =head2 How to Write a Config File Any line whose first non-whitespace character is a hash (#) is a comme +nt, and is skipped over. Same for lines with no non-whitespace characters. # does not show the beginning of a comment. It is only that if it is t +he first non-whitespace char in the line. In other cases, it is part of the codes. So, the config file codes are two columns of characters. The columns a +re separated by a single space only. Tabs work, but don't use them. The Left is the series of characters that you will + type out. They are usually what you have on your keyboard. The Right is the sequaence (usually one character) that will + replace the Left. So, with that config file, you have all occurences of o: being replace +d with o with a diaresis at the top. Because all these sequences may occur without your intention to make t +hem codes, you should read through, after processing the file. I didn't feel like putting logic to skip over some bits, bec +ause it is too rare a case to make me add 1000 lines for. Also, I'm not planning to use this to write a book E<#8212> just +a bit of correspondence. Same should apply to you. And, if you really want that, you can add it. See the L<"COPYRIGHT"> s +ection. =head1 EXTENDING NAUTILUS You can use this script to extend Nautilus, the GNOME File manager. It + is very nifty. You will only have to right-click a file icon, and th +en go to Scripts on the menu that pops up, and click lesmets.pl. The +processed file will show up in the same folder. Copy lesmets.pl into Nautilus' script directory. It is usually ~/.gnom +e2/nautilus-scripts. Then, visit that directory with Nautilus, so it +can know there is a script there (if it does not show ``Scripts'' whe +n you right click a file). Now, Nautilus is ready to use lesmets.pl! That easy! =head1 CREDITS I was checking for an English-French dictionary for my phone, and I st +umbled on some project to create a translation dictionary for compute +rs. I think that is where I checked and found that they were represen +ting the characters that had accents on them the way I have done it i +n that config file. All I remember is the stuff of the / and \ accent +s. The rest, I had to get adventurous. =head1 TO-DO =over =item 0. Add a GTK+ front-end. This should be so simple as to discourage me from trying it. =item 1. Find out how to extend Konq (I don't use KDE as of now), and +add it in here. =back =head1 BUGS None that I know of. But one feature is missing: indication of a secti +on of the source file to skip over. =head1 AUTHOR Revence XXVII <revence27@praize.com> =head1 COPYRIGHT No copyright, no licence. This code, algorithms and all the ideas pertaining to this intellectua +l property, I hereby place in the Public Domain. =cut

In reply to lesmets.pl by revence27

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post, it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Titles consisting of a single word are discouraged, and in most cases are disallowed outright.
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.