#Last Updated 05.09.09
####
if ( $file[$i] =~ m/(\\(gll|[abcdef(exg.)]g\.)|textsc\/)/ ) {
##
##
... Note: The script and the TeX file have to be in the same directory. ...
##
##
N-=non- (e.g. NSG nonsingular, NPST nonpast)
##
##
#!/usr/bin/perl
=head1 NAME
name-of-script
=head1 SYNOPSIS
name-of-script [-l] filename.tex
=head1 DESCRIPTION
This script reads a given LaTeX file, finds everything in the text
that looks like an abbreviation, and then creates a new file in the
same directory (called "filename-abbrev.txt") that lists them all.
In this process, an abbreviation in LaTeX is defined as:
- this...
- that...
- whatever else...
If your LaTeX file uses abbreviations that are specified in the
'Leipzig Glossing Rules' (LGR), you can use the '-l' option to have
these abbreviations listed with the full terms that they represent.
In this case, the output file will list the non-LGR abbreviations
first, and then the LGR ones are given with their meanings.
=cut
use strict;
use Getopt::Long;
my %lgr;
while () {
chomp;
my ($abbr, $term) = split( /=/ );
$lgr{$abbr} = $term;
}
my $Usage = "$0 [-l] filename.tex\n (run 'perldoc $0' for help)\n";
my $opt_lgr;
my $opt_ok = GetOptions( 'l' => \$opt_lgr );
my $arg_ok = ( @ARGV == 1 and -f $ARGV[0] );
die $Usage unless ( $opt_ok and $arg_ok );
my $filename = shift;
open( TEX, "<:utf8", $filename ) or die "$0: $filename: $!\n";
my @texlines = ;
close TEX;
chomp @texlines;
my %abbr_seen;
for my $ln ( 0 .. $#texlines - 1 ) {
next unless ( $texlines[$ln] =~ /(\\(?:gll|[abcdef]g\.|exg\.?))/ );
my $ln1 = $ln + 1;
while ( $texlines[$ln1] =~
/ [-=\s.:]([A-Z]+)[-=\s.:] | (SG|DU|PL) | ([123]) /gx ) {
$abbr_seen{$1}++;
}
}
$filename =~ s/\.tex.*//;
$filename .= '-abbrev.txt';
open( ABBR, ">:utf8", $filename ) or die "$0: $filename: $!\n";
for my $abbr ( sort keys %abbr_seen ) {
next if ( $opt_lgr and exists( $lgr{$abbr} ));
print ABBR "$abbr\n";
}
if ( $opt_lgr ) {
print ABBR "\n";
for my $abbr ( sort keys %abbr_seen ) {
print ABBR "\\item[$abbr] '$lgr{$abbr}'" if ( exists( $lgr{$abbr} ));
}
}
close ABBR;
__DATA__
1=first person
2=second person
3=third person
A=agent-like argument of canonical transitive verb
ABL=ablative
ABS=absolutive
ACC=accusative
ADJ=adjective
ADV=adverb(ial)
AGR=agreement
ALL=allative
ANTIP=antipassive
APPL=applicative
ART=article
AUX=auxiliary
BEN=benefactive
CAUS=causative
CLF=classifier
COM=comitative
COMP=complementizer
COMPL=completive
COND=conditional
COP=copula
CVB=converb
DAT=dative
DECL=declarative
DEF=definite
DEM=demonstrative
DET=determiner
DIST=distal
DISTR=distributive
DU=dual
DUR=durative
ERG=ergative
EXCL=exclusive
F=feminine
FOC=focus
FUT=future
GEN=genitive
IMP=imperative
INCL=inclusive
IND=indicative
INDF=indefinite
INF=infinitive
INS=instrumental
INTR=intransitive
IPFV=imperfective
IRR=irrealis
LOC=locative
M=masculine
N=neuter
N-=non- (e.g. NSG nonsingular, NPST nonpast)
NEG=negation, negative
NMLZ=nominalizer/nominalization
NOM=nominative
OBJ=object
OBL=oblique
P=patient-like argument of canonical transitive verb
PASS=passive
PFV=perfective
PL=plural
POSS=possessive
PRED=predicative
PRF=perfect
PRS=present
PROG=progressive
PROH=prohibitive
PROX=proximal/proximate
PST=past
PTCP=participle
PURP=purposive
Q=question particle/marker
QUOT=quotative
RECP=reciprocal
REFL=reflexive
REL=relative
RES=resultative
S=single argument of canonical intransitive verb
SBJ=subject
SBJV=subjunctive
SG=singular
TOP=topic
TR=transitive
VOC=vocative