#! /usr/bin/perl use strict; use warnings; use Argdom; my $globalVars = {conf => '', lang => '', paths => []}; sub extractRelations; sub keyChooser; sub isOneOf; sub singleChooser; sub setGlobalVars; sub isValidLine; sub splitTwo; sub replaceNotations; sub replaceSafely; sub printHelp; binmode STDOUT, ':utf8'; # Especially during debugging # To make the script more organised ... sub lesmetsMain { my ($relations, $aider, $arr) = ({}, 0, 0); my $ad = new Argdom(\@_); $ad->setKeyChooser(\&keyChooser); $ad->setSingleChooser(\&singleChooser); $arr = $ad->getArbs; $aider = isOneOf('--help', $arr); return printHelp if ($aider or isOneOf('-h', $arr)); setGlobalVars $ad; extractRelations $relations; replaceNotations $relations; print "\n"; 0; } exit lesmetsMain @ARGV; sub extractRelations { my ($toWhere, $confFile, $l, $lit) = (shift, $globalVars->{'conf'}->[0], 0, ''); my @kv = (); $l = $globalVars->{'lang'}; $confFile = $ENV{'HOME'} . '/.lesmets.' . ($l ? $l->[0] : 'def') unless $confFile; open CONF, '<:utf8', $confFile or die "Could not open $confFile: $!"; while() { chomp; next if not isValidLine $_; # Because I hate ... @kv = splitTwo $_; # ... regular expressions. Too difficult. $toWhere->{$kv[0]} = $kv[1]; } close CONF; } sub keyChooser { isOneOf((shift), ['-conf', '-lang']); } sub isOneOf { die 'Insufficient parameters to isOneOf sub' unless @_ > 1; die 'Parameter two to isOneOf should be an array ref' unless ref $_[1] eq 'ARRAY'; foreach my $notI (@{$_[1]}) { return 1 if $notI eq $_[0]; } 0; } sub singleChooser { 0; } sub setGlobalVars { die 'setGlobalvars takes one parameter' unless @_; die 'Parameter one to setGlobalVars must be an Argdom object' unless ref $_[0] eq 'Argdom'; my $ad = shift; my %ks = %{$ad->getKeys}; $globalVars->{'conf'} = $ks{'-conf'}; $globalVars->{'lang'} = $ks{'-lang'}; $globalVars->{'paths'} = $ad->getArbs; } # Uses things that are less efficient than regular expressions, but more intelligible -- to me, at least. sub isValidLine { my ($l, $notI, $W, $char, $line) = (length $_[0], 0, 0, '', shift); for(; $notI < $l; ++$notI) { $char = substr $line, $notI, 1; $W = 1 unless isOneOf($char, [' ', "\t"]); return 0 if $char eq '#'; last if $W; } return 0 unless $W; 1; } sub splitTwo { my $space = index $_[0], ' '; my @rez = ((substr $_[0], 0, $space), substr $_[0], $space + 1); @rez; } sub replaceNotations { my ($rels, $paths, $interactive, $endit) = (shift, [], 0, 0); $paths = $globalVars->{'paths'}; push @{$paths}, '-' unless $paths; foreach my $path (@{$paths}) { if($path eq '-') { $interactive = 1; binmode STDOUT, ':utf8'; binmode STDIN, ':utf8'; } (open RAWFILE, '<:utf8', $path or die "Could not open $path for reading: $!") unless $interactive; (open OUTFILE, '>:utf8', $path . '.out' or die "Could not open $path.out for writing: $!") unless $interactive; while(1) { $_ = unless $interactive; $_ = if $interactive; last unless $_; foreach my $mem (keys %{$rels}) { $_ = replaceSafely $_, $mem, $rels->{$mem}; } print OUTFILE $_ unless $interactive; print $_ if $interactive; } unless($interactive) { close OUTFILE; close RAWFILE; } } } sub replaceSafely { my ($rez, $cur, $cue, $rep, $pos, $at, $l) = ('', shift, shift, shift, 0, 0, 0); $l = length $cue; while(($pos = index $cur, $cue, $at) != -1) { $rez .= substr $cur, $at, $pos - $at; $rez .= $rep; $at = $pos + $l; } $rez .= substr $cur, $at; $rez; } sub printHelp { print qq /Usage: $0 [-h | --help] [-lang xy] [-conf conf_path] [file [file [...]]] [-] -h | --help Prints this help message and quits. -lang xy xy should be a language code (depends on you). If it provided, the config file that is used to giude $0 will have to be ~\/.lesmets.xy. If you invoke thus: $0 -lang fr then the config file will have to be ~\/.lesmets.fr -conf conf_path Sets the config file's path. If this is provided, the -lang value is not used as explained above. The default config file should be at ~\/.lesmets.def (like passing -lang def). file These are the files to process. They are treated one by one, in that order. A file is written out for each, with the same name, but with a `.out' added to the end. This is the file that contains the edited data. - That tells the program to get input via STDIN, and print to STDOUT. It is considered, in the internal logic, as the path to STDIN, and `-.out' as the path to STDOUT. Information on how to write the config file is in the POD documentation, which you can get out by doing this: pod2html --outfile lesmets.html lesmets.pl and then reading the lesmets.html file in Unicode-good browser. Try: w3m lesmets.html /; 0; } __DATA__ =pod =head1 NAME lesmets.pl - Accents-on-my-ASCII script =head1 SYNOPSIS Sample usage: lesmets.pl - Franc>ais: Les boeufs n'ont pas mange/ les cadeaux de Caesar. Français: Les bœufs n'ont pas mangé les cadeaux de Cæsar. That run used the default config file, which is written out in here. =head1 DESCRIPTION Helps you put accents on characters that should have them (for the Romance languages, for example), with a keyboard that doesn't have characters. That calls for writing with the available chars to indicate where you want accents to be. It goes beyond putting accents, but that's what I use it for. It can change stuff from any format to any format. All you have to do is put it all in a config file. =head1 USAGE To get information about how to use it (and it is very simple), run: lesmets.pl --help You can use your shell's rc (C<.bashrc>, usually) to make it easier to use. This line is in my C<.bashrc>: alias lesmets='lesmets.pl' So that I don't have to remember to put the C<.pl> extension. Also, I have it in one of my PATH directories. Makes it all much easier. Don't forget to read through again, after processing. Some character sequences, that you may have meant in good faith, may have been understood to be codes for the program. B< I > The code requires the C module, which should also be available where you got this file. =head1 CONFIGURATION FILES =head2 Sample Config File # This is the default config file for lesmets. It is good enough. # You can save it at ~/lesmets.def # What you should put there, for ease, should be the config file you use most. This happens to be the one, # for me. # By the way, I don't remember where I got this idea, but I must have been reading about one of those # made-for-computer cross-language dictionaries. I'll put the credits here when I find it again. # And I got the accented characters from Open Office (Menu: Insert -> Special Character) A/ Á A\ À A^  A: Ä AE Æ C> Ç E\ È E/ É E^ Ê E: Ë I\ Ì I/ Í I^ Î I: Ï O\ Ò O/ Ó O^ Ô O: Ö U\ Ù U/ Ú U^ Û U: Ü Y/ Ý a\ à a/ á a^ â a: ä ae æ c> ç e\ è e/ é e^ ê e: ë i\ ì i/ í i^ î i: ï o\ ò o/ ó o^ ô o: ö u\ ù u/ ú u^ û u: ü oe œ =head2 How to Write a Config File Any line whose first non-whitespace character is a hash (#) is a comment, and is skipped over. Same for lines with no non-whitespace characters. # does not show the beginning of a comment. It is only that if it is the first non-whitespace char in the line. In other cases, it is part of the codes. So, the config file codes are two columns of characters. The columns are separated by a single space only. Tabs work, but don't use them. The Left is the series of characters that you will type out. They are usually what you have on your keyboard. The Right is the sequaence (usually one character) that will replace the Left. So, with that config file, you have all occurences of o: being replaced with o with a diaresis at the top. Because all these sequences may occur without your intention to make them codes, you should read through, after processing the file. I didn't feel like putting logic to skip over some bits, because it is too rare a case to make me add 1000 lines for. Also, I'm not planning to use this to write a book E<#8212> just a bit of correspondence. Same should apply to you. And, if you really want that, you can add it. See the L<"COPYRIGHT"> section. =head1 EXTENDING NAUTILUS You can use this script to extend Nautilus, the GNOME File manager. It is very nifty. You will only have to right-click a file icon, and then go to Scripts on the menu that pops up, and click lesmets.pl. The processed file will show up in the same folder. Copy lesmets.pl into Nautilus' script directory. It is usually ~/.gnome2/nautilus-scripts. Then, visit that directory with Nautilus, so it can know there is a script there (if it does not show ``Scripts'' when you right click a file). Now, Nautilus is ready to use lesmets.pl! That easy! =head1 CREDITS I was checking for an English-French dictionary for my phone, and I stumbled on some project to create a translation dictionary for computers. I think that is where I checked and found that they were representing the characters that had accents on them the way I have done it in that config file. All I remember is the stuff of the / and \ accents. The rest, I had to get adventurous. =head1 TO-DO =over =item 0. Add a GTK+ front-end. This should be so simple as to discourage me from trying it. =item 1. Find out how to extend Konq (I don't use KDE as of now), and add it in here. =back =head1 BUGS None that I know of. But one feature is missing: indication of a section of the source file to skip over. =head1 AUTHOR Revence XXVII =head1 COPYRIGHT No copyright, no licence. This code, algorithms and all the ideas pertaining to this intellectual property, I hereby place in the Public Domain. =cut