mirror of https://gitee.com/bigwinds/arangodb
231 lines
5.7 KiB
Plaintext
231 lines
5.7 KiB
Plaintext
routines (
|
|
postlude mark_regions
|
|
RV R1 R2
|
|
attached_pronoun
|
|
standard_suffix
|
|
y_verb_suffix
|
|
verb_suffix
|
|
residual_suffix
|
|
)
|
|
|
|
externals ( stem )
|
|
|
|
integers ( pV p1 p2 )
|
|
|
|
groupings ( v )
|
|
|
|
stringescapes {}
|
|
|
|
/* special characters (in ISO Latin I) */
|
|
|
|
stringdef a' hex 'E1' // a-acute
|
|
stringdef e' hex 'E9' // e-acute
|
|
stringdef i' hex 'ED' // i-acute
|
|
stringdef o' hex 'F3' // o-acute
|
|
stringdef u' hex 'FA' // u-acute
|
|
stringdef u" hex 'FC' // u-diaeresis
|
|
stringdef n~ hex 'F1' // n-tilde
|
|
|
|
define v 'aeiou{a'}{e'}{i'}{o'}{u'}{u"}'
|
|
|
|
define mark_regions as (
|
|
|
|
$pV = limit
|
|
$p1 = limit
|
|
$p2 = limit // defaults
|
|
|
|
do (
|
|
( v (non-v gopast v) or (v gopast non-v) )
|
|
or
|
|
( non-v (non-v gopast v) or (v next) )
|
|
setmark pV
|
|
)
|
|
do (
|
|
gopast v gopast non-v setmark p1
|
|
gopast v gopast non-v setmark p2
|
|
)
|
|
)
|
|
|
|
define postlude as repeat (
|
|
[substring] among(
|
|
'{a'}' (<- 'a')
|
|
'{e'}' (<- 'e')
|
|
'{i'}' (<- 'i')
|
|
'{o'}' (<- 'o')
|
|
'{u'}' (<- 'u')
|
|
// and possibly {u"}->u here, or in prelude
|
|
'' (next)
|
|
) //or next
|
|
)
|
|
|
|
backwardmode (
|
|
|
|
define RV as $pV <= cursor
|
|
define R1 as $p1 <= cursor
|
|
define R2 as $p2 <= cursor
|
|
|
|
define attached_pronoun as (
|
|
[substring] among(
|
|
'me' 'se' 'sela' 'selo' 'selas' 'selos' 'la' 'le' 'lo'
|
|
'las' 'les' 'los' 'nos'
|
|
)
|
|
substring RV among(
|
|
'i{e'}ndo' (] <- 'iendo')
|
|
'{a'}ndo' (] <- 'ando')
|
|
'{a'}r' (] <- 'ar')
|
|
'{e'}r' (] <- 'er')
|
|
'{i'}r' (] <- 'ir')
|
|
'ando'
|
|
'iendo'
|
|
'ar' 'er' 'ir'
|
|
(delete)
|
|
'yendo' ('u' delete)
|
|
)
|
|
)
|
|
|
|
define standard_suffix as (
|
|
[substring] among(
|
|
|
|
'anza' 'anzas'
|
|
'ico' 'ica' 'icos' 'icas'
|
|
'ismo' 'ismos'
|
|
'able' 'ables'
|
|
'ible' 'ibles'
|
|
'ista' 'istas'
|
|
'oso' 'osa' 'osos' 'osas'
|
|
'amiento' 'amientos'
|
|
'imiento' 'imientos'
|
|
(
|
|
R2 delete
|
|
)
|
|
'adora' 'ador' 'aci{o'}n'
|
|
'adoras' 'adores' 'aciones'
|
|
'ante' 'antes' 'ancia' 'ancias'// Note 1
|
|
(
|
|
R2 delete
|
|
try ( ['ic'] R2 delete )
|
|
)
|
|
'log{i'}a'
|
|
'log{i'}as'
|
|
(
|
|
R2 <- 'log'
|
|
)
|
|
'uci{o'}n' 'uciones'
|
|
(
|
|
R2 <- 'u'
|
|
)
|
|
'encia' 'encias'
|
|
(
|
|
R2 <- 'ente'
|
|
)
|
|
'amente'
|
|
(
|
|
R1 delete
|
|
try (
|
|
[substring] R2 delete among(
|
|
'iv' (['at'] R2 delete)
|
|
'os'
|
|
'ic'
|
|
'ad'
|
|
)
|
|
)
|
|
)
|
|
'mente'
|
|
(
|
|
R2 delete
|
|
try (
|
|
[substring] among(
|
|
'ante' // Note 1
|
|
'able'
|
|
'ible' (R2 delete)
|
|
)
|
|
)
|
|
)
|
|
'idad'
|
|
'idades'
|
|
(
|
|
R2 delete
|
|
try (
|
|
[substring] among(
|
|
'abil'
|
|
'ic'
|
|
'iv' (R2 delete)
|
|
)
|
|
)
|
|
)
|
|
'iva' 'ivo'
|
|
'ivas' 'ivos'
|
|
(
|
|
R2 delete
|
|
try (
|
|
['at'] R2 delete // but not a further ['ic'] R2 delete
|
|
)
|
|
)
|
|
)
|
|
)
|
|
|
|
define y_verb_suffix as (
|
|
setlimit tomark pV for ([substring]) among(
|
|
'ya' 'ye' 'yan' 'yen' 'yeron' 'yendo' 'yo' 'y{o'}'
|
|
'yas' 'yes' 'yais' 'yamos'
|
|
('u' delete)
|
|
)
|
|
)
|
|
|
|
define verb_suffix as (
|
|
setlimit tomark pV for ([substring]) among(
|
|
|
|
'en' 'es' '{e'}is' 'emos'
|
|
(try ('u' test 'g') ] delete)
|
|
|
|
'ar{i'}an' 'ar{i'}as' 'ar{a'}n' 'ar{a'}s' 'ar{i'}ais'
|
|
'ar{i'}a' 'ar{e'}is' 'ar{i'}amos' 'aremos' 'ar{a'}'
|
|
'ar{e'}'
|
|
'er{i'}an' 'er{i'}as' 'er{a'}n' 'er{a'}s' 'er{i'}ais'
|
|
'er{i'}a' 'er{e'}is' 'er{i'}amos' 'eremos' 'er{a'}'
|
|
'er{e'}'
|
|
'ir{i'}an' 'ir{i'}as' 'ir{a'}n' 'ir{a'}s' 'ir{i'}ais'
|
|
'ir{i'}a' 'ir{e'}is' 'ir{i'}amos' 'iremos' 'ir{a'}'
|
|
'ir{e'}'
|
|
|
|
'aba' 'ada' 'ida' '{i'}a' 'ara' 'iera' 'ad' 'ed'
|
|
'id' 'ase' 'iese' 'aste' 'iste' 'an' 'aban' '{i'}an'
|
|
'aran' 'ieran' 'asen' 'iesen' 'aron' 'ieron' 'ado'
|
|
'ido' 'ando' 'iendo' 'i{o'}' 'ar' 'er' 'ir' 'as'
|
|
'abas' 'adas' 'idas' '{i'}as' 'aras' 'ieras' 'ases'
|
|
'ieses' '{i'}s' '{a'}is' 'abais' '{i'}ais' 'arais'
|
|
'ierais' 'aseis' 'ieseis' 'asteis' 'isteis' 'ados'
|
|
'idos' 'amos' '{a'}bamos' '{i'}amos' 'imos'
|
|
'{a'}ramos' 'i{e'}ramos' 'i{e'}semos' '{a'}semos'
|
|
(delete)
|
|
)
|
|
)
|
|
|
|
define residual_suffix as (
|
|
[substring] among(
|
|
'os'
|
|
'a' 'o' '{a'}' '{i'}' '{o'}'
|
|
( RV delete )
|
|
'e' '{e'}'
|
|
( RV delete try( ['u'] test 'g' RV delete ) )
|
|
)
|
|
)
|
|
)
|
|
|
|
define stem as (
|
|
do mark_regions
|
|
backwards (
|
|
do attached_pronoun
|
|
do ( standard_suffix or
|
|
y_verb_suffix or
|
|
verb_suffix
|
|
)
|
|
do residual_suffix
|
|
)
|
|
do postlude
|
|
)
|
|
|
|
/*
|
|
Note 1: additions of 15 Jun 2005
|
|
*/
|