1
0
Fork 0
arangodb/3rdParty/snowball/libstemmer/mkmodules.pl

259 lines
5.8 KiB
Perl
Executable File

#!/usr/bin/env perl
use strict;
use 5.006;
use warnings;
my $progname = $0;
if (scalar @ARGV < 4 || scalar @ARGV > 5) {
print "Usage: $progname <outfile> <C source directory> <modules description file> <source list file> [<extn>]\n";
exit 1;
}
my $outname = shift(@ARGV);
my $c_src_dir = shift(@ARGV);
my $descfile = shift(@ARGV);
my $srclistfile = shift(@ARGV);
my $extn = '';
if (@ARGV) {
$extn = '_'.shift(@ARGV);
}
my %aliases = ();
my %algorithms = ();
my %algorithm_encs = ();
my %encs = ();
sub addalgenc($$) {
my $alg = shift();
my $enc = shift();
if (defined $algorithm_encs{$alg}) {
my $hashref = $algorithm_encs{$alg};
$$hashref{$enc}=1;
} else {
my %newhash = ($enc => 1);
$algorithm_encs{$alg}=\%newhash;
}
$encs{$enc} = 1;
}
sub readinput()
{
open DESCFILE, $descfile;
my $line;
while($line = <DESCFILE>)
{
next if $line =~ m/^\s*#/;
next if $line =~ m/^\s*$/;
my ($alg,$encstr,$aliases) = split(/\s+/, $line);
my $enc;
my $alias;
$algorithms{$alg} = 1;
foreach $alias (split(/,/, $aliases)) {
foreach $enc (split(/,/, $encstr)) {
# print "$alias, $enc\n";
$aliases{$alias} = $alg;
addalgenc($alg, $enc);
}
}
}
}
sub printoutput()
{
open (OUT, ">$outname") or die "Can't open output file `$outname': $!\n";
print OUT <<EOS;
/* $outname: List of stemming modules.
*
* This file is generated by mkmodules.pl from a list of module names.
* Do not edit manually.
*
EOS
my $line = " * Modules included by this file are: ";
print OUT $line;
my $linelen = length($line);
my $need_sep = 0;
my $lang;
my $enc;
my @algorithms = sort keys(%algorithms);
foreach $lang (@algorithms) {
if ($need_sep) {
if (($linelen + 2 + length($lang)) > 77) {
print OUT ",\n * ";
$linelen = 3;
} else {
print OUT ', ';
$linelen += 2;
}
}
print OUT $lang;
$linelen += length($lang);
$need_sep = 1;
}
print OUT "\n */\n\n";
foreach $lang (@algorithms) {
my $hashref = $algorithm_encs{$lang};
foreach $enc (sort keys (%$hashref)) {
print OUT "#include \"../$c_src_dir/stem_${enc}_$lang.h\"\n";
}
}
print OUT <<EOS;
typedef enum {
ENC_UNKNOWN=0,
EOS
my $neednl = 0;
for $enc (sort keys %encs) {
print OUT ",\n" if $neednl;
print OUT " ENC_${enc}";
$neednl = 1;
}
print OUT <<EOS;
} stemmer_encoding_t;
struct stemmer_encoding {
const char * name;
stemmer_encoding_t enc;
};
static struct stemmer_encoding encodings[] = {
EOS
for $enc (sort keys %encs) {
print OUT " {\"${enc}\", ENC_${enc}},\n";
}
print OUT <<EOS;
{0,ENC_UNKNOWN}
};
struct stemmer_modules {
const char * name;
stemmer_encoding_t enc;
struct SN_env * (*create)(void);
void (*close)(struct SN_env *);
int (*stem)(struct SN_env *);
};
static struct stemmer_modules modules[] = {
EOS
for $lang (sort keys %aliases) {
my $l = $aliases{$lang};
my $hashref = $algorithm_encs{$l};
my $enc;
foreach $enc (sort keys (%$hashref)) {
my $p = "${l}_${enc}";
print OUT " {\"$lang\", ENC_$enc, ${p}_create_env, ${p}_close_env, ${p}_stem},\n";
}
}
print OUT <<EOS;
{0,ENC_UNKNOWN,0,0,0}
};
EOS
print OUT <<EOS;
static const char * algorithm_names[] = {
EOS
for $lang (@algorithms) {
my $l = $aliases{$lang};
print OUT " \"$lang\", \n";
}
print OUT <<EOS;
0
};
EOS
close OUT or die "Can't close ${outname}: $!\n";
}
sub printsrclist()
{
open (OUT, ">$srclistfile") or die "Can't open output file `$srclistfile': $!\n";
print OUT <<EOS;
# $srclistfile: List of stemming module source files
#
# This file is generated by mkmodules.pl from a list of module names.
# Do not edit manually.
#
EOS
my $line = "# Modules included by this file are: ";
print OUT $line;
my $linelen = length($line);
my $need_sep = 0;
my $lang;
my $srcfile;
my $enc;
my @algorithms = sort keys(%algorithms);
foreach $lang (@algorithms) {
if ($need_sep) {
if (($linelen + 2 + length($lang)) > 77) {
print OUT ",\n# ";
$linelen = 3;
} else {
print OUT ', ';
$linelen += 2;
}
}
print OUT $lang;
$linelen += length($lang);
$need_sep = 1;
}
print OUT "\n\nsnowball_sources= \\\n";
for $lang (sort keys %aliases) {
my $hashref = $algorithm_encs{$lang};
my $enc;
foreach $enc (sort keys (%$hashref)) {
print OUT " src_c/stem_${enc}_${lang}.c \\\n";
}
}
$need_sep = 0;
for $srcfile ('runtime/api.c',
'runtime/utilities.c',
"libstemmer/libstemmer${extn}.c") {
print OUT " \\\n" if $need_sep;
print OUT " $srcfile";
$need_sep = 1;
}
print OUT "\n\nsnowball_headers= \\\n";
for $lang (sort keys %aliases) {
my $hashref = $algorithm_encs{$lang};
my $enc;
foreach $enc (sort keys (%$hashref)) {
my $p = "${lang}_${enc}";
print OUT " src_c/stem_${enc}_${lang}.h \\\n";
}
}
$need_sep = 0;
for $srcfile ('include/libstemmer.h',
"libstemmer/modules${extn}.h",
'runtime/api.h',
'runtime/header.h') {
print OUT " \\\n" if $need_sep;
print OUT " $srcfile";
$need_sep = 1;
}
print OUT "\n\n";
close OUT or die "Can't close ${srclistfile}: $!\n";
}
readinput();
printoutput();
printsrclist();