1
0
Fork 0

Update CMakeLists

Fix/add generators, add algorithms, remove modules_utf8 stuff (Snowball 2 has a single modules.txt only)

TODO: remove fallback in gen_stem macro?
This commit is contained in:
Simran Brucherseifer 2019-11-22 18:12:26 +01:00
parent 511d78ea59
commit 36f86a7f9f
1 changed files with 27 additions and 24 deletions

View File

@ -2,7 +2,7 @@ PROJECT(snowball C)
cmake_minimum_required(VERSION 2.8) cmake_minimum_required(VERSION 2.8)
SET(SNOWBALL_VERSION_MAJOR 1) SET(SNOWBALL_VERSION_MAJOR 2)
SET(SNOWBALL_VERSION_MINOR 0) SET(SNOWBALL_VERSION_MINOR 0)
SET(SNOWBALL_VERSION_PATCH 0) SET(SNOWBALL_VERSION_PATCH 0)
@ -141,19 +141,24 @@ ENDIF(ENABLE_STATIC MATCHES "ON")
# End of configuration # End of configuration
SET(LIBSTEM_ALGORITHMS SET(LIBSTEM_ALGORITHMS
arabic arabic basque catalan danish dutch english
danish dutch english finnish french german hungarian finnish french german hungarian indonesian
irish italian irish italian norwegian porter portuguese
norwegian porter portuguese romanian romanian russian spanish swedish tamil turkish
russian spanish swedish tamil turkish
) )
SET(KOI8_ALGORITHMS russian) SET(KOI8_ALGORITHMS russian)
SET(ISO_8859_1_ALGORITHMS SET(ISO_8859_1_ALGORITHMS
danish dutch english finnish french german irish basque catalan danish dutch english finnish
italian norwegian porter portuguese spanish swedish french german indonesian irish italian
norwegian porter portuguese spanish swedish
)
SET(ISO_8859_2_ALGORITHMS
hungarian romanian
)
SET(OTHER_ALGORITHMS
german2 greek hindi kraaij_pohlmann
lithuanian lovins nepali serbian
) )
SET(ISO_8859_2_ALGORITHMS hungarian romanian)
SET(OTHER_ALGORITHMS german2 kraaij_pohlmann lovins)
SET(ALL_ALGORITHMS ${LIBSTEM_ALGORITHMS} ${OTHER_ALGORITHMS}) SET(ALL_ALGORITHMS ${LIBSTEM_ALGORITHMS} ${OTHER_ALGORITHMS})
SET(COMPILER_SOURCES SET(COMPILER_SOURCES
@ -162,9 +167,11 @@ SET(COMPILER_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/compiler/analyser.c ${CMAKE_CURRENT_SOURCE_DIR}/compiler/analyser.c
${CMAKE_CURRENT_SOURCE_DIR}/compiler/generator.c ${CMAKE_CURRENT_SOURCE_DIR}/compiler/generator.c
${CMAKE_CURRENT_SOURCE_DIR}/compiler/driver.c ${CMAKE_CURRENT_SOURCE_DIR}/compiler/driver.c
${CMAKE_CURRENT_SOURCE_DIR}/compiler/generator_csharp.c
${CMAKE_CURRENT_SOURCE_DIR}/compiler/generator_go.c ${CMAKE_CURRENT_SOURCE_DIR}/compiler/generator_go.c
${CMAKE_CURRENT_SOURCE_DIR}/compiler/generator_java.c ${CMAKE_CURRENT_SOURCE_DIR}/compiler/generator_java.c
${CMAKE_CURRENT_SOURCE_DIR}/compiler/generator_jsx.c ${CMAKE_CURRENT_SOURCE_DIR}/compiler/generator_js.c
${CMAKE_CURRENT_SOURCE_DIR}/compiler/generator_pascal.c
${CMAKE_CURRENT_SOURCE_DIR}/compiler/generator_python.c ${CMAKE_CURRENT_SOURCE_DIR}/compiler/generator_python.c
${CMAKE_CURRENT_SOURCE_DIR}/compiler/generator_rust.c ${CMAKE_CURRENT_SOURCE_DIR}/compiler/generator_rust.c
) )
@ -175,11 +182,9 @@ SET(SNOWBALL_RUNTIME
${CMAKE_CURRENT_SOURCE_DIR}/runtime/api.c ${CMAKE_CURRENT_SOURCE_DIR}/runtime/api.c
${CMAKE_CURRENT_SOURCE_DIR}/runtime/utilities.c ${CMAKE_CURRENT_SOURCE_DIR}/runtime/utilities.c
) )
SET(LIBSTEMMER_SOURCES libstemmer/libstemmer.c) #SET(LIBSTEMMER_SOURCES libstemmer/libstemmer.c)
SET(LIBSTEMMER_UTF8_SOURCES libstemmer/libstemmer_utf8.c) #SET(LIBSTEMMER_HEADERS include/libstemmer.h libstemmer/modules.h libstemmer/modules_utf8.h)
#LIBSTEMMER_UTF8_SOURCES = libstemmer/libstemmer_utf8.c #SET(LIBSTEMMER_EXTRA libstemmer/modules.txt libstemmer/libstemmer_c.in)
#LIBSTEMMER_HEADERS = include/libstemmer.h libstemmer/modules.h libstemmer/modules_utf8.h
#LIBSTEMMER_EXTRA = libstemmer/modules.txt libstemmer/modules_utf8.txt libstemmer/libstemmer_c.in
SET(STEMWORDS_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/examples/stemwords.c) SET(STEMWORDS_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/examples/stemwords.c)
SET(MODULES_H "modules.h") SET(MODULES_H "modules.h")
@ -191,10 +196,11 @@ MACRO(gen_stem IN ENCODING)
SET(_header "${_base}.h") SET(_header "${_base}.h")
SET(_source "${_base}.c") SET(_source "${_base}.c")
STRING(REPLACE "UTF_8" "Unicode" _in_enc "${ENCODING}") STRING(REPLACE "UTF_8" "Unicode" _in_enc "${ENCODING}")
SET(_input "${CMAKE_CURRENT_SOURCE_DIR}/algorithms/${_it}/stem_${_in_enc}.sbl") SET(_input "${CMAKE_CURRENT_SOURCE_DIR}/algorithms/${_it}.sbl")
IF(${_in_enc} STREQUAL "Unicode" AND NOT EXISTS ${_input}) IF(${_in_enc} STREQUAL "Unicode" AND NOT EXISTS ${_input})
ADD_CUSTOM_COMMAND(OUTPUT ${_source} ADD_CUSTOM_COMMAND(OUTPUT ${_source}
COMMAND $<TARGET_FILE:snowball> "${CMAKE_CURRENT_SOURCE_DIR}/algorithms/${_it}/stem_ISO_8859_1.sbl" -o ${_base} -eprefix ${_it}_${ENCODING}_ -r ${CMAKE_CURRENT_SOURCE_DIR}/runtime -u # TODO: in Snowball 2 the stemmers are organized like algorithms/german.sbl, what is this fallback for?!
COMMAND $<TARGET_FILE:snowball> "${CMAKE_CURRENT_SOURCE_DIR}/algorithms/${_it}.sbl" -o ${_base} -eprefix ${_it}_${ENCODING}_ -r ${CMAKE_CURRENT_SOURCE_DIR}/runtime -u
DEPENDS snowball) DEPENDS snowball)
LIST(APPEND STEMMER_SOURCES ${_source}) LIST(APPEND STEMMER_SOURCES ${_source})
@ -212,16 +218,12 @@ ENDMACRO()
INCLUDE_DIRECTORIES("include") INCLUDE_DIRECTORIES("include")
INCLUDE_DIRECTORIES("${CMAKE_CURRENT_BINARY_DIR}/libstemmer") INCLUDE_DIRECTORIES("${CMAKE_CURRENT_BINARY_DIR}/libstemmer")
# NOTE: modules.h gets overwritten by static file from iresearch.build folder
ADD_CUSTOM_COMMAND(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libstemmer/modules.h ADD_CUSTOM_COMMAND(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libstemmer/modules.h
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/libstemmer COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/libstemmer
COMMAND ${PERL_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/libstemmer/mkmodules.pl ${CMAKE_CURRENT_BINARY_DIR}/libstemmer/modules.h "libstemmer" ${CMAKE_CURRENT_SOURCE_DIR}/libstemmer/modules.txt ${CMAKE_CURRENT_BINARY_DIR}/libstemmer/mkinc.mak COMMAND ${PERL_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/libstemmer/mkmodules.pl ${CMAKE_CURRENT_BINARY_DIR}/libstemmer/modules.h "libstemmer" ${CMAKE_CURRENT_SOURCE_DIR}/libstemmer/modules.txt ${CMAKE_CURRENT_BINARY_DIR}/libstemmer/mkinc.mak
) )
ADD_CUSTOM_COMMAND(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libstemmer/modules_utf8.h
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/libstemmer
COMMAND ${PERL_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/libstemmer/mkmodules.pl ${CMAKE_CURRENT_BINARY_DIR}/libstemmer/modules.h "libstemmer" ${CMAKE_CURRENT_SOURCE_DIR}/libstemmer/modules_utf8.txt ${CMAKE_CURRENT_BINARY_DIR}/libstemmer/mkinc_utf8.mak utf8
)
ADD_CUSTOM_TARGET(modules DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/libstemmer/modules.h") ADD_CUSTOM_TARGET(modules DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/libstemmer/modules.h")
SET(STEMMER_SOURCES "${CMAKE_CURRENT_BINARY_DIR}/libstemmer/libstemmer.c") SET(STEMMER_SOURCES "${CMAKE_CURRENT_BINARY_DIR}/libstemmer/libstemmer.c")
@ -233,6 +235,7 @@ gen_stem("${LIBSTEM_ALGORITHMS}" "UTF_8")
gen_stem("${KOI8_ALGORITHMS}" "KOI8_R") gen_stem("${KOI8_ALGORITHMS}" "KOI8_R")
gen_stem("${ISO_8859_1_ALGORITHMS}" "ISO_8859_1") gen_stem("${ISO_8859_1_ALGORITHMS}" "ISO_8859_1")
gen_stem("${ISO_8859_2_ALGORITHMS}" "ISO_8859_2") gen_stem("${ISO_8859_2_ALGORITHMS}" "ISO_8859_2")
gen_stem("${OTHER_ALGORITHMS}" "UTF_8")
INCLUDE_DIRECTORIES( INCLUDE_DIRECTORIES(
${CMAKE_CURRENT_SOURCE_DIR}/libstemmer ${CMAKE_CURRENT_SOURCE_DIR}/libstemmer