mirror of https://gitee.com/bigwinds/arangodb
216 lines
5.9 KiB
Plaintext
216 lines
5.9 KiB
Plaintext
stringescapes {}
|
|
|
|
/* the 32 Cyrillic letters in Unicode */
|
|
|
|
stringdef a hex '430'
|
|
stringdef b hex '431'
|
|
stringdef v hex '432'
|
|
stringdef g hex '433'
|
|
stringdef d hex '434'
|
|
stringdef e hex '435'
|
|
stringdef zh hex '436'
|
|
stringdef z hex '437'
|
|
stringdef i hex '438'
|
|
stringdef i` hex '439'
|
|
stringdef k hex '43A'
|
|
stringdef l hex '43B'
|
|
stringdef m hex '43C'
|
|
stringdef n hex '43D'
|
|
stringdef o hex '43E'
|
|
stringdef p hex '43F'
|
|
stringdef r hex '440'
|
|
stringdef s hex '441'
|
|
stringdef t hex '442'
|
|
stringdef u hex '443'
|
|
stringdef f hex '444'
|
|
stringdef kh hex '445'
|
|
stringdef ts hex '446'
|
|
stringdef ch hex '447'
|
|
stringdef sh hex '448'
|
|
stringdef shch hex '449'
|
|
stringdef " hex '44A'
|
|
stringdef y hex '44B'
|
|
stringdef ' hex '44C'
|
|
stringdef e` hex '44D'
|
|
stringdef iu hex '44E'
|
|
stringdef ia hex '44F'
|
|
|
|
routines ( mark_regions R2
|
|
perfective_gerund
|
|
adjective
|
|
adjectival
|
|
reflexive
|
|
verb
|
|
noun
|
|
derivational
|
|
tidy_up
|
|
)
|
|
|
|
externals ( stem )
|
|
|
|
integers ( pV p2 )
|
|
|
|
groupings ( v )
|
|
|
|
define v '{a}{e}{i}{o}{u}{y}{e`}{iu}{ia}'
|
|
|
|
define mark_regions as (
|
|
|
|
$pV = limit
|
|
$p2 = limit
|
|
do (
|
|
gopast v setmark pV gopast non-v
|
|
gopast v gopast non-v setmark p2
|
|
)
|
|
)
|
|
|
|
backwardmode (
|
|
|
|
define R2 as $p2 <= cursor
|
|
|
|
define perfective_gerund as (
|
|
[substring] among (
|
|
'{v}'
|
|
'{v}{sh}{i}'
|
|
'{v}{sh}{i}{s}{'}'
|
|
('{a}' or '{ia}' delete)
|
|
'{i}{v}'
|
|
'{i}{v}{sh}{i}'
|
|
'{i}{v}{sh}{i}{s}{'}'
|
|
'{y}{v}'
|
|
'{y}{v}{sh}{i}'
|
|
'{y}{v}{sh}{i}{s}{'}'
|
|
(delete)
|
|
)
|
|
)
|
|
|
|
define adjective as (
|
|
[substring] among (
|
|
'{e}{e}' '{i}{e}' '{y}{e}' '{o}{e}' '{i}{m}{i}' '{y}{m}{i}'
|
|
'{e}{i`}' '{i}{i`}' '{y}{i`}' '{o}{i`}' '{e}{m}' '{i}{m}'
|
|
'{y}{m}' '{o}{m}' '{e}{g}{o}' '{o}{g}{o}' '{e}{m}{u}'
|
|
'{o}{m}{u}' '{i}{kh}' '{y}{kh}' '{u}{iu}' '{iu}{iu}' '{a}{ia}'
|
|
'{ia}{ia}'
|
|
// and -
|
|
'{o}{iu}' // - which is somewhat archaic
|
|
'{e}{iu}' // - soft form of {o}{iu}
|
|
(delete)
|
|
)
|
|
)
|
|
|
|
define adjectival as (
|
|
adjective
|
|
|
|
/* of the participle forms, em, vsh, ivsh, yvsh are readily removable.
|
|
nn, {iu}shch, shch, u{iu}shch can be removed, with a small proportion of
|
|
errors. Removing im, uem, enn creates too many errors.
|
|
*/
|
|
|
|
try (
|
|
[substring] among (
|
|
'{e}{m}' // present passive participle
|
|
'{n}{n}' // adjective from past passive participle
|
|
'{v}{sh}' // past active participle
|
|
'{iu}{shch}' '{shch}' // present active participle
|
|
('{a}' or '{ia}' delete)
|
|
|
|
//but not '{i}{m}' '{u}{e}{m}' // present passive participle
|
|
//or '{e}{n}{n}' // adjective from past passive participle
|
|
|
|
'{i}{v}{sh}' '{y}{v}{sh}'// past active participle
|
|
'{u}{iu}{shch}' // present active participle
|
|
(delete)
|
|
)
|
|
)
|
|
|
|
)
|
|
|
|
define reflexive as (
|
|
[substring] among (
|
|
'{s}{ia}'
|
|
'{s}{'}'
|
|
(delete)
|
|
)
|
|
)
|
|
|
|
define verb as (
|
|
[substring] among (
|
|
'{l}{a}' '{n}{a}' '{e}{t}{e}' '{i`}{t}{e}' '{l}{i}' '{i`}'
|
|
'{l}' '{e}{m}' '{n}' '{l}{o}' '{n}{o}' '{e}{t}' '{iu}{t}'
|
|
'{n}{y}' '{t}{'}' '{e}{sh}{'}'
|
|
|
|
'{n}{n}{o}'
|
|
('{a}' or '{ia}' delete)
|
|
|
|
'{i}{l}{a}' '{y}{l}{a}' '{e}{n}{a}' '{e}{i`}{t}{e}'
|
|
'{u}{i`}{t}{e}' '{i}{t}{e}' '{i}{l}{i}' '{y}{l}{i}' '{e}{i`}'
|
|
'{u}{i`}' '{i}{l}' '{y}{l}' '{i}{m}' '{y}{m}' '{e}{n}'
|
|
'{i}{l}{o}' '{y}{l}{o}' '{e}{n}{o}' '{ia}{t}' '{u}{e}{t}'
|
|
'{u}{iu}{t}' '{i}{t}' '{y}{t}' '{e}{n}{y}' '{i}{t}{'}'
|
|
'{y}{t}{'}' '{i}{sh}{'}' '{u}{iu}' '{iu}'
|
|
(delete)
|
|
/* note the short passive participle tests:
|
|
'{n}{a}' '{n}' '{n}{o}' '{n}{y}'
|
|
'{e}{n}{a}' '{e}{n}' '{e}{n}{o}' '{e}{n}{y}'
|
|
*/
|
|
)
|
|
)
|
|
|
|
define noun as (
|
|
[substring] among (
|
|
'{a}' '{e}{v}' '{o}{v}' '{i}{e}' '{'}{e}' '{e}'
|
|
'{i}{ia}{m}{i}' '{ia}{m}{i}' '{a}{m}{i}' '{e}{i}' '{i}{i}'
|
|
'{i}' '{i}{e}{i`}' '{e}{i`}' '{o}{i`}' '{i}{i`}' '{i`}'
|
|
'{i}{ia}{m}' '{ia}{m}' '{i}{e}{m}' '{e}{m}' '{a}{m}' '{o}{m}'
|
|
'{o}' '{u}' '{a}{kh}' '{i}{ia}{kh}' '{ia}{kh}' '{y}' '{'}'
|
|
'{i}{iu}' '{'}{iu}' '{iu}' '{i}{ia}' '{'}{ia}' '{ia}'
|
|
(delete)
|
|
/* the small class of neuter forms '{e}{n}{i}' '{e}{n}{e}{m}'
|
|
'{e}{n}{a}' '{e}{n}' '{e}{n}{a}{m}' '{e}{n}{a}{m}{i}' '{e}{n}{a}{x}'
|
|
omitted - they only occur on 12 words.
|
|
*/
|
|
)
|
|
)
|
|
|
|
define derivational as (
|
|
[substring] R2 among (
|
|
'{o}{s}{t}'
|
|
'{o}{s}{t}{'}'
|
|
(delete)
|
|
)
|
|
)
|
|
|
|
define tidy_up as (
|
|
[substring] among (
|
|
|
|
'{e}{i`}{sh}'
|
|
'{e}{i`}{sh}{e}' // superlative forms
|
|
(delete
|
|
['{n}'] '{n}' delete
|
|
)
|
|
'{n}'
|
|
('{n}' delete) // e.g. -nno endings
|
|
'{'}'
|
|
(delete) // with some slight false conflations
|
|
)
|
|
)
|
|
)
|
|
|
|
define stem as (
|
|
|
|
do mark_regions
|
|
backwards setlimit tomark pV for (
|
|
do (
|
|
perfective_gerund or
|
|
( try reflexive
|
|
adjectival or verb or noun
|
|
)
|
|
)
|
|
try([ '{i}' ] delete)
|
|
// because noun ending -i{iu} is being treated as verb ending -{iu}
|
|
|
|
do derivational
|
|
do tidy_up
|
|
)
|
|
)
|