diff --git a/INSTALL b/INSTALL index cdb52b38537dfab88ce14d39a38efc06ba3dee81..b00bc9b779ca6a9153cbd9bf74d860ab12b1309d 100644 --- a/INSTALL +++ b/INSTALL @@ -28,6 +28,9 @@ The basic procedure to build and install macaon from sources is the following. or sudo apt-get install openjdk-7-jdk + Attention: if you have libpython2.7-dev and libpython3.X-dev installed, cmake will choose the latter + in this case the generated libraries for Python will only work with Python 3 + - Build the sources with: make - Install macaon diff --git a/build/debug.sh b/build/debug.sh deleted file mode 100755 index ec037e3c185c39ac81f861094f0c7bb033df1a78..0000000000000000000000000000000000000000 --- a/build/debug.sh +++ /dev/null @@ -1,3 +0,0 @@ -cmake -DCMAKE_BUILD_TYPE=Debug .. -make -sudo make install diff --git a/build/script.sh b/build/script.sh deleted file mode 100755 index 9485f62e027a988d84b15033b788b4b2d5d1970a..0000000000000000000000000000000000000000 --- a/build/script.sh +++ /dev/null @@ -1,3 +0,0 @@ -cmake .. -make -sudo make install diff --git a/maca_export/CMakeLists.txt b/maca_export/CMakeLists.txt index 8afb0b1aac07e64cf394c666fa848d67066a419e..e8246aa8b574e0fdeab7e8e2d2ecceb40ac6567c 100644 --- a/maca_export/CMakeLists.txt +++ b/maca_export/CMakeLists.txt @@ -1,124 +1,31 @@ if(MACA_EXPORT) - FIND_PACKAGE(SWIG) - if (SWIG_FOUND) - FIND_PACKAGE(PythonLibs) - - if(PYTHONLIBS_FOUND) - set(SOURCES src/maca_export.i) - set(PYTHON_MODULE_NAME Macaon) - - include_directories(${PYTHON_INCLUDE_PATH}) - include_directories(../maca_trans_parser/src) - include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src) - - #message(eeeeeeeeeeee ${CMAKE_CURRENT_SOURCE_DIR}) - add_custom_command( - DEPENDS ${SOURCES} - OUTPUT maca_export_py.cc - COMMAND swig -python -c++ -o maca_export_py.cc ../../maca_export/src/maca_export.i - ) - - # Specify the lib - add_library(${PYTHON_MODULE_NAME} SHARED - maca_export_py.cc - src/maca_trans_tagger_export.cc - src/maca_lemmatizer_export.cc - src/maca_trans_parser_export.cc - ) - - set_target_properties(${PYTHON_MODULE_NAME} PROPERTIES PREFIX _) - target_link_libraries(${PYTHON_MODULE_NAME} transparse maca_common perceptron) - else() - message("pythonlibs not installed on your system") - endif() - - - - FIND_PACKAGE(Java 1.7) - FIND_PACKAGE(JNI) - if (JNI_FOUND AND (Java_FOUND OR JAVA_FOUND)) - set(JAVA_MODULE_NAME Macaon) - set(JAVA_LIBRARY MacaonJava) - set(JAVA_CLASS_TAGGER MacaonTransTagger) - set(JAVA_CLASS_LEMMATIZER MacaonTransLemmatizer) - set(JAVA_CLASS_TRANSPARSER MacaonTransParser) - set(JAVA_PACKAGE lif) - set(JAR_FILENAME macaon) - - #set(ADDITIONNAL_JAVA_FILES ${CMAKE_CURRENT_SOURCE_DIR}/src/MacaonTransParserWrapper.java) - - # Add include directories - include_directories(${JAVA_INCLUDE_PATH} ${JAVA_INCLUDE_PATH2}) - - # Run swig executable to generate java code - add_custom_command( - DEPENDS ${SOURCES} - OUTPUT maca_export_java.cc - COMMAND swig -java -package ${JAVA_PACKAGE} -c++ -o maca_export_java.cc ../../maca_export/src/maca_export.i - ) - - add_library(${JAVA_LIBRARY} SHARED - maca_export_java.cc - src/maca_trans_tagger_export.cc - src/maca_lemmatizer_export.cc - src/maca_trans_parser_export.cc - ) - - target_link_libraries(${JAVA_LIBRARY} transparse maca_common perceptron) - - string(REGEX REPLACE "[.]" "/" JAVA_PACKAGE_DIR ${JAVA_PACKAGE}) - add_custom_command( - TARGET ${JAVA_LIBRARY} POST_BUILD - COMMAND mkdir -p ${JAVA_PACKAGE_DIR} - COMMAND ${Java_JAVAC_EXECUTABLE} -encoding utf8 -d ${CMAKE_CURRENT_BINARY_DIR} #${JAVA_PACKAGE_DIR} - ${JAVA_MODULE_NAME}JNI.java - ${JAVA_CLASS_TAGGER}.java - ${JAVA_CLASS_LEMMATIZER}.java - ${JAVA_CLASS_TRANSPARSER}.java - ${JAVA_MODULE_NAME}.java - ${ADDITIONNAL_JAVA_FILES} - DEPENDS ${JAVA_MODULE_NAME}JNI.java ${JAVA_CLASS_TRANSPARSER}.java ${JAVA_CLASS_TAGGER}.java ${JAVA_CLASS_LEMMATIZER}.java ${JAVA_MODULE_NAME}.java ${ADDITIONNAL_JAVA_FILES} - ) - add_custom_command( - TARGET ${JAVA_LIBRARY} POST_BUILD - COMMAND ${Java_JAR_EXECUTABLE} -cvf ${JAR_FILENAME}.jar -C ${CMAKE_CURRENT_BINARY_DIR} ${JAVA_PACKAGE_DIR} - DEPENDS ${JAVA_MODULE_NAME}JNI.java ${JAVA_CLASS_TRANSPARSER}.java ${JAVA_CLASS_TAGGER}.java ${JAVA_CLASS_LEMMATIZER}.java ${JAVA_MODULE_NAME}.java ${ADDITIONNAL_JAVA_FILES} - #WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - COMMENT "Building ${JAR_FILENAME}" - VERBATIM - ) - else() - message("Java JNI support not installed") - endif() - else() - message("swig >= 3.0 not installed on your system") - endif() - - - - - set(TESTSOURCES - src/maca_lemmatizer_export.cc - src/maca_trans_parser_export.cc - src/maca_trans_tagger_export.cc - ) - -# #compiling library -# include_directories(src) -# add_library(swigtransparse ${TESOURCES}) -#target_link_libraries(transparse perceptron) - - - - add_executable(testcxx ./src/test.cc ${TESTSOURCES}) - target_link_libraries(testcxx perceptron) - target_link_libraries(testcxx transparse) - target_link_libraries(testcxx maca_common) -# install (TARGETS testcxx DESTINATION bin) - - - + FIND_PACKAGE(SWIG) + if (SWIG_FOUND) + FIND_PACKAGE(PythonLibs) + if(PYTHONLIBS_FOUND) + message(STATUS "OK: PythonLibs ${PYTHONLIBS_VERSION_STRING} found. Run your python scripts with python ${PYTHONLIBS_VERSION_STRING}!") + endif() + + FIND_PACKAGE(Java 1.7) +# if (Java_FOUND OR JAVA_FOUND) +# message(STATUS "OK: Java ${Java_VERSION_STRING} found") + FIND_PACKAGE(JNI) +# if (JNI_FOUND) +# message(STATUS "OK: JNI found") +# endif() +# endif() + + add_subdirectory(tagger) + add_subdirectory(lemmatizer) + add_subdirectory(parser) + + else() + message("swig >= 3.0 not installed on your system") + endif() + + + add_subdirectory(src) endif() diff --git a/maca_export/README.txt b/maca_export/README.txt new file mode 100644 index 0000000000000000000000000000000000000000..1ebc2d28da74ce716eabe32f3580562e7de8a6b8 --- /dev/null +++ b/maca_export/README.txt @@ -0,0 +1,24 @@ +Using the libraries for python and java + +Attention: if you have libpython2.7-dev and libpython3.X-dev installed, cmake will choose the latter +in this case the generated libraries for Python will only work with Python 3 + + +set environment variable MACAON_DIR + export MACAON_DIR=/home/jeuh6401/SemanticData/macaon/maca_data2 + + +Adapt the lines like mt = MacaonTagger.MacaonTransTagger("fr", "fr/eval/wplgfs.mcd") +then: + + cd maca_data2 + ../macaon2/maca_export/example/example.py + + + + +another test programme: + cd maca_data2 + ../macaon2/build/maca_export/src/testcxx jh-seq jh-seq/eval/wplgfs.mcd ../macaon2/maca_export/src/test-0.mcf + + diff --git a/maca_export/example/example.java b/maca_export/example/example.java index 25816d5a00dac7c03acc2ec6be4ea03f30df66e7..b51763a63a83b278d2649c687691a19919ce6525 100644 --- a/maca_export/example/example.java +++ b/maca_export/example/example.java @@ -1,14 +1,35 @@ import lif.*; /** example to use the macaon parser with java compile (in maca_data2) - javac -cp ../macaon2/build_debug/maca_export/macaon.jar ../macaon2/maca_export/example/example.java + javac -cp ../macaon2/build/maca_export/tagger/macaontagger.jar:../macaon2/build/maca_export/lemmatizer/macaonlemmatizer.jar:../macaon2/build/maca_export/parser/macaonparser.jar ../macaon2/maca_export/example/example.java + run - java -cp ../macaon2/build_debug/maca_export/macaon.jar:../macaon2/maca_export/example -Djava.library.path=../macaon2/build_debug/maca_export/ example + java -cp ../macaon2/build/maca_export/tagger/macaontagger.jar:../macaon2/build/maca_export/lemmatizer/macaonlemmatizer.jar:../macaon2/build/maca_export/parser/macaonparser.jar:../macaon2/maca_export/example -Djava.library.path=../macaon2/build/maca_export/tagger:../macaon2/build/maca_export/lemmatizer:../macaon2/build/maca_export/parser example + + make jar + jar -cf ../macaon2/maca_export/example/macaon.jar -C ../macaon2/maca_export/example example.class + + */ public class example { - public static void main(String []args) { - System.loadLibrary("MacaonJava"); // use libMacaonExport.so + MacaonTransTagger mt; + MacaonTransLemmatizer ml; + MacaonTransParser mp; + + public example(String lg, String mcd, boolean loadlibraries) { + if (loadlibraries) { + System.loadLibrary("MacaonTransTaggerJava"); // use libMacaonTransTaggerJava.so + System.loadLibrary("MacaonTransLemmatizerJava"); + System.loadLibrary("MacaonTransParserJava"); + } + + mt = new MacaonTransTagger(lg, mcd); + ml = new MacaonTransLemmatizer(lg, mcd); + mp = new MacaonTransParser(lg, mcd); + } + + public void test1() { /* StringBuilder mcf1 = new StringBuilder("La D le\n"); mcf1.append("pose N pose\n"); mcf1.append("d' P de\n"); @@ -52,14 +73,35 @@ public class example { mcf1.append("usagers\n"); mcf1.append(".\n"); - MacaonTransTagger mt = new MacaonTransTagger("jh-seq", "jh-seq/eval/wplgfs.mcd"); - MacaonTransLemmatizer ml = new MacaonTransLemmatizer("jh-seq", "jh-seq/eval/wplgfs.mcd"); - MacaonTransParser mp = new MacaonTransParser("jh-seq", "jh-seq/eval/wplgfs.mcd"); String tags = mt.tagmcf(mcf1.toString()); System.out.println(tags); String lemmas = ml.lemmatizemcf(tags); System.out.println(lemmas); System.out.println(mp.parsemcf(lemmas)); + + } + + public String tag(String text) { + String t = text.replaceAll("([\\.,\\?;:!/])", " $1 "); + + String [] elems = t.split("[\\s]+"); + StringBuilder mcf1 = new StringBuilder(); + for (String elem : elems) { + mcf1.append(elem).append('\n'); + } + //System.out.println(mcf1); + String tags = mt.tagmcf(mcf1.toString()); + //System.out.println(tags); + String lemmas = ml.lemmatizemcf(tags); + + return lemmas; + } + + public static void main(String []args) { + example ex = new example("jh-seq", "jh-seq/eval/wplgfs.mcd", true); + ex.test1(); + //String res = ex.tag("la souris mange."); + //System.out.println(res); } } diff --git a/maca_export/example/example.py b/maca_export/example/example.py index 53216d40d64f6d9932d47c5fb9cea98e279c47af..03f7aa43df1f2dcc950c64cec5c0c352e3647b11 100755 --- a/maca_export/example/example.py +++ b/maca_export/example/example.py @@ -6,15 +6,20 @@ import os currentdir = os.path.dirname(os.path.abspath(__file__)) import sys -sys.path.append(currentdir + "/../../build_debug/maca_export") +sys.path.append(currentdir + "/../../build/maca_export/tagger") +sys.path.append(currentdir + "/../../build/maca_export/lemmatizer") +sys.path.append(currentdir + "/../../build/maca_export/parser") -import Macaon + +import MacaonTagger +import MacaonLemmatizer +import MacaonParser # for this example you should be in maca_data2 -mt = Macaon.MacaonTransTagger("jh-seq", "jh-seq/eval/wplgfs.mcd") -ml = Macaon.MacaonTransLemmatizer("jh-seq", "jh-seq/eval/wplgfs.mcd") -mp = Macaon.MacaonTransParser("jh-seq", "jh-seq/eval/wplgfs.mcd") +mt = MacaonTagger.MacaonTransTagger("jh-seq", "jh-seq/eval/wplgfs.mcd") +ml = MacaonLemmatizer.MacaonTransLemmatizer("jh-seq", "jh-seq/eval/wplgfs.mcd") +mp = MacaonParser.MacaonTransParser("jh-seq", "jh-seq/eval/wplgfs.mcd") mcf="""La grosse @@ -92,8 +97,8 @@ lemmas = ml.lemmatizemcf(tags) print lemmas print mp.parsemcf(lemmas) -del mp -del ml del mt +del ml +del mp diff --git a/maca_export/lemmatizer/CMakeLists.txt b/maca_export/lemmatizer/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..c803e69dc9bddbeeb0eee385cf2b9b3a5fbe34fb --- /dev/null +++ b/maca_export/lemmatizer/CMakeLists.txt @@ -0,0 +1,81 @@ +set(SOURCES maca_export_lemmatizer.i) + +if(PYTHONLIBS_FOUND) + + set(PYTHON_LEMMATIZER_NAME MacaonLemmatizer) + + + include_directories(${PYTHON_INCLUDE_PATH}) + include_directories(../../maca_lemmatizer/src) + include_directories(../../maca_common/include) + include_directories(${CMAKE_CURRENT_SOURCE_DIR}) + + #message(eeeeeeeeeeee ${CMAKE_CURRENT_SOURCE_DIR}) + add_custom_command( + DEPENDS ${SOURCES} + OUTPUT maca_export_lemmatizer_py.cc + COMMAND swig -python -c++ -o maca_export_lemmatizer_py.cc ../../../maca_export/lemmatizer/maca_export_lemmatizer.i + ) + + # Specify the lib + add_library(${PYTHON_LEMMATIZER_NAME} SHARED + maca_export_lemmatizer_py.cc + maca_lemmatizer_export.cc + ../../maca_lemmatizer/src/context.c + ) + + set_target_properties(${PYTHON_LEMMATIZER_NAME} PROPERTIES PREFIX _) + target_link_libraries(${PYTHON_LEMMATIZER_NAME} transparse maca_common) +else() + message("pythonlibs not installed on your system") +endif() + + +if (JNI_FOUND AND (Java_FOUND OR JAVA_FOUND)) + set(JAVA_MODULE_NAME MacaonLemmatizer) + set(JAVA_LIBRARY MacaonTransLemmatizerJava) + set(JAVA_CLASS MacaonTransLemmatizer) + set(JAVA_PACKAGE lif) + set(JAR_FILENAME macaonlemmatizer) + + + # Add include directories + include_directories(${JAVA_INCLUDE_PATH} ${JAVA_INCLUDE_PATH2}) + + # Run swig executable to generate java code + add_custom_command( + DEPENDS ${SOURCES} + OUTPUT maca_export_lemmatizer_java.cc + COMMAND swig -java -package ${JAVA_PACKAGE} -c++ -o maca_export_lemmatizer_java.cc ../../../maca_export/lemmatizer/maca_export_lemmatizer.i + ) + + add_library(${JAVA_LIBRARY} SHARED + maca_export_lemmatizer_java.cc + maca_lemmatizer_export.cc + ../../maca_lemmatizer/src/context.c + ) + + target_link_libraries(${JAVA_LIBRARY} transparse maca_common) + + string(REGEX REPLACE "[.]" "/" JAVA_PACKAGE_DIR ${JAVA_PACKAGE}) + add_custom_command( + TARGET ${JAVA_LIBRARY} POST_BUILD + COMMAND mkdir -p ${JAVA_PACKAGE_DIR} + COMMAND ${Java_JAVAC_EXECUTABLE} -encoding utf8 -d ${CMAKE_CURRENT_BINARY_DIR} #${JAVA_PACKAGE_DIR} + ${JAVA_MODULE_NAME}JNI.java + ${JAVA_CLASS}.java + ${JAVA_MODULE_NAME}.java + ${ADDITIONNAL_JAVA_FILES} + DEPENDS ${JAVA_MODULE_NAME}JNI.java ${JAVA_CLASS}.java ${JAVA_MODULE_NAME}.java ${ADDITIONNAL_JAVA_FILES} + ) + add_custom_command( + TARGET ${JAVA_LIBRARY} POST_BUILD + COMMAND ${Java_JAR_EXECUTABLE} -cvf ${JAR_FILENAME}.jar -C ${CMAKE_CURRENT_BINARY_DIR} ${JAVA_PACKAGE_DIR} + DEPENDS ${JAVA_MODULE_NAME}JNI.java ${JAVA_CLASS}.java ${JAVA_MODULE_NAME}.java ${ADDITIONNAL_JAVA_FILES} + #WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMENT "Building ${JAR_FILENAME}" + VERBATIM + ) + else() + message("Java JNI support not installed") + endif() diff --git a/maca_export/lemmatizer/maca_export_lemmatizer.i b/maca_export/lemmatizer/maca_export_lemmatizer.i new file mode 100644 index 0000000000000000000000000000000000000000..65ab38e3465d121bc8227af49cce11979962aa55 --- /dev/null +++ b/maca_export/lemmatizer/maca_export_lemmatizer.i @@ -0,0 +1,13 @@ +%module MacaonLemmatizer + +%{ +#include <maca_lemmatizer_export.h> +%} + +class MacaonTransLemmatizer { + public: + MacaonTransLemmatizer(char *lg, char *mcd); + ~MacaonTransLemmatizer(); + const char *lemmatizemcf(const char *mcf); +}; + diff --git a/maca_export/src/maca_lemmatizer_export.cc b/maca_export/lemmatizer/maca_lemmatizer_export.cc similarity index 96% rename from maca_export/src/maca_lemmatizer_export.cc rename to maca_export/lemmatizer/maca_lemmatizer_export.cc index af4ee502b7fa7448bd08d05829a0045de8663d32..9acf43119736ccfa3a72b1a85e5ac5149b3fb0c0 100644 --- a/maca_export/src/maca_lemmatizer_export.cc +++ b/maca_export/lemmatizer/maca_lemmatizer_export.cc @@ -1,38 +1,37 @@ -#include <stdio.h> -#include <string.h> +#include <cstdio> +#include <cstring> +#include <iostream> -#ifdef __cplusplus -extern "C"{ -#endif +//#ifdef __cplusplus +//extern "C"{ +//#endif -#include "context.h" -#include "feat_fct.h" -#include "config2feat_vec.h" -#include "feature_table.h" -#include "dico.h" + #include "dico.h" + #include "word.h" + #include "util.h" -#ifdef __cplusplus -} -#endif +//#ifdef __cplusplus +//} +//#endif #include "maca_lemmatizer_export.h" + MacaonTransLemmatizer::MacaonTransLemmatizer(char *lg, char *mcd) { char * argv[] = { (char *)"initParser", (char *)"-L", lg, (char *)"-C", mcd, 0 }; - ctx = context_read_options(5, argv); form_pos_ht = hash_new(1000000); //maca_lemmatizer_check_options(ctx); maca_lemmatizer_set_linguistic_resources_filenames(ctx); - lemma_array = read_fplm_file(ctx->fplm_filename, form_pos_ht, ctx->debug_mode, &lemma_array_size); resultstring = NULL; + } MacaonTransLemmatizer::~MacaonTransLemmatizer() { diff --git a/maca_export/src/maca_lemmatizer_export.h b/maca_export/lemmatizer/maca_lemmatizer_export.h similarity index 89% rename from maca_export/src/maca_lemmatizer_export.h rename to maca_export/lemmatizer/maca_lemmatizer_export.h index afa63bc1071a69a8e52df08ba1985cb540ef83c7..b4a183c12550746af6aa5036b1c981214571e6a6 100644 --- a/maca_export/src/maca_lemmatizer_export.h +++ b/maca_export/lemmatizer/maca_lemmatizer_export.h @@ -1,4 +1,15 @@ -#include "context.h" +//#ifdef __cplusplus +//extern "C"{ +//#endif + + #include "context.h" + #include "../../../maca_trans_parser/src/config.h" + #include "word.h" + +//#ifdef __cplusplus +//} +//#endif + class MacaonTransLemmatizer { public: diff --git a/maca_export/parser/CMakeLists.txt b/maca_export/parser/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..37a5e8b39fdcd61f1040580efb23ecc7252885fe --- /dev/null +++ b/maca_export/parser/CMakeLists.txt @@ -0,0 +1,75 @@ +if(PYTHONLIBS_FOUND) + set(SOURCES maca_export_parser.i) + set(PYTHON_PARSER_NAME MacaonParser) + + include_directories(${PYTHON_INCLUDE_PATH}) + include_directories(../../maca_trans_parser/src) + include_directories(${CMAKE_CURRENT_SOURCE_DIR}) + + #message(eeeeeeeeeeee ${CMAKE_CURRENT_SOURCE_DIR}) + add_custom_command( + DEPENDS ${SOURCES} + OUTPUT maca_export_parser_py.cc + COMMAND swig -python -c++ -o maca_export_parser_py.cc ../../../maca_export/parser/maca_export_parser.i + ) + + # Specify the lib + add_library(${PYTHON_PARSER_NAME} SHARED + maca_export_parser_py.cc + maca_trans_parser_export.cc + ) + + set_target_properties(${PYTHON_PARSER_NAME} PROPERTIES PREFIX _) + target_link_libraries(${PYTHON_PARSER_NAME} transparse maca_common perceptron) +else() + message("pythonlibs not installed on your system") +endif() + + +if (JNI_FOUND AND (Java_FOUND OR JAVA_FOUND)) + set(JAVA_MODULE_NAME MacaonParser) + set(JAVA_LIBRARY MacaonTransParserJava) + set(JAVA_CLASS MacaonTransParser) + set(JAVA_PACKAGE lif) + set(JAR_FILENAME macaonparser) + + + # Add include directories + include_directories(${JAVA_INCLUDE_PATH} ${JAVA_INCLUDE_PATH2}) + + # Run swig executable to generate java code + add_custom_command( + DEPENDS ${SOURCES} + OUTPUT maca_export_parser_java.cc + COMMAND swig -java -package ${JAVA_PACKAGE} -c++ -o maca_export_parser_java.cc ../../../maca_export/parser/maca_export_parser.i + ) + + add_library(${JAVA_LIBRARY} SHARED + maca_export_parser_java.cc + maca_trans_parser_export.cc + ) + + target_link_libraries(${JAVA_LIBRARY} transparse maca_common perceptron) + + string(REGEX REPLACE "[.]" "/" JAVA_PACKAGE_DIR ${JAVA_PACKAGE}) + add_custom_command( + TARGET ${JAVA_LIBRARY} POST_BUILD + COMMAND mkdir -p ${JAVA_PACKAGE_DIR} + COMMAND ${Java_JAVAC_EXECUTABLE} -encoding utf8 -d ${CMAKE_CURRENT_BINARY_DIR} #${JAVA_PACKAGE_DIR} + ${JAVA_MODULE_NAME}JNI.java + ${JAVA_CLASS}.java + ${JAVA_MODULE_NAME}.java + ${ADDITIONNAL_JAVA_FILES} + DEPENDS ${JAVA_MODULE_NAME}JNI.java ${JAVA_CLASS}.java ${JAVA_MODULE_NAME}.java ${ADDITIONNAL_JAVA_FILES} + ) + add_custom_command( + TARGET ${JAVA_LIBRARY} POST_BUILD + COMMAND ${Java_JAR_EXECUTABLE} -cvf ${JAR_FILENAME}.jar -C ${CMAKE_CURRENT_BINARY_DIR} ${JAVA_PACKAGE_DIR} + DEPENDS ${JAVA_MODULE_NAME}JNI.java ${JAVA_CLASS}.java ${JAVA_MODULE_NAME}.java ${ADDITIONNAL_JAVA_FILES} + #WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMENT "Building ${JAR_FILENAME}" + VERBATIM + ) +else() + message("Java JNI support not installed") +endif() diff --git a/maca_export/parser/maca_export_parser.i b/maca_export/parser/maca_export_parser.i new file mode 100644 index 0000000000000000000000000000000000000000..3fb4280cf55590e6a53b94b1b3ec0ed11ae4d61e --- /dev/null +++ b/maca_export/parser/maca_export_parser.i @@ -0,0 +1,14 @@ +%module MacaonParser + +%{ +#include <maca_trans_parser_export.h> +%} + + + +class MacaonTransParser { + public: + MacaonTransParser(char *lg, char *mcd); + ~MacaonTransParser(); + const char *parsemcf(const char *mcf); +}; diff --git a/maca_export/src/maca_trans_parser_export.cc b/maca_export/parser/maca_trans_parser_export.cc similarity index 98% rename from maca_export/src/maca_trans_parser_export.cc rename to maca_export/parser/maca_trans_parser_export.cc index 7749f7033456741d6655966048904231010294a9..03150189967d3536d577cec08e41301407cb23f9 100644 --- a/maca_export/src/maca_trans_parser_export.cc +++ b/maca_export/parser/maca_trans_parser_export.cc @@ -1,22 +1,22 @@ -#include <stdio.h> -#include <string.h> +#include <cstdio> +#include <cstring> -#ifdef __cplusplus -extern "C"{ -#endif +//#ifdef __cplusplus +//extern "C"{ +//#endif -#include "context.h" #include "simple_decoder_parser_arc_eager.h" #include "movement_parser_arc_eager.h" #include "feat_fct.h" #include "config2feat_vec.h" + #include "feature_table.h" #include "dico.h" -#ifdef __cplusplus -} -#endif +//#ifdef __cplusplus +//} +//#endif #include "maca_trans_parser_export.h" @@ -37,7 +37,7 @@ MacaonTransParser::MacaonTransParser(char *lg, char *mcd) { set_linguistic_resources_filenames_parser(ctx); - ctx->features_model = feat_model_read(ctx->features_model_filename, ctx->verbose); + ctx->features_model = feat_model_read(ctx->features_model_filename, feat_lib_build(), ctx->verbose); ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio); mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose); diff --git a/maca_export/src/maca_trans_parser_export.h b/maca_export/parser/maca_trans_parser_export.h similarity index 88% rename from maca_export/src/maca_trans_parser_export.h rename to maca_export/parser/maca_trans_parser_export.h index ddf0c1b41692c3fd5721ef74c2d8764419a00244..403f704681d4e1a3d55987a75b5c87cda60b6c44 100644 --- a/maca_export/src/maca_trans_parser_export.h +++ b/maca_export/parser/maca_trans_parser_export.h @@ -1,4 +1,14 @@ -#include "context.h" + +//#ifdef __cplusplus +//extern "C"{ +//#endif + + #include "context.h" + #include "config.h" + +//#ifdef __cplusplus +//} +//#endif class MacaonTransParser { diff --git a/maca_export/src/CMakeLists.txt b/maca_export/src/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..c48caa26e1388f488dfff21941a7aa369733404e --- /dev/null +++ b/maca_export/src/CMakeLists.txt @@ -0,0 +1,20 @@ + +include_directories(../../maca_trans_parser/src) +include_directories(../../maca_lemmatizer/src) +include_directories(../../maca_common/include) + +include_directories(../tagger) +include_directories(../lemmatizer) +include_directories(../parser) + + + + +add_executable(testcxx + test.cc + ../tagger/maca_trans_tagger_export.cc + ../lemmatizer/maca_lemmatizer_export.cc + ../parser/maca_trans_parser_export.cc) +target_link_libraries(testcxx transparse maca_common perceptron openblas) + +## install (TARGETS testcxx DESTINATION bin) diff --git a/maca_export/src/maca_export.i b/maca_export/src/maca_export.i deleted file mode 100644 index 0e425ac6e17b5d2d13232a6e1bffe2a9197921b1..0000000000000000000000000000000000000000 --- a/maca_export/src/maca_export.i +++ /dev/null @@ -1,30 +0,0 @@ -%module Macaon - -%{ -#include <maca_trans_tagger_export.h> -#include <maca_lemmatizer_export.h> -#include <maca_trans_parser_export.h> -%} - -class MacaonTransTagger { - public: - MacaonTransTagger(char *lg, char *mcd); - ~MacaonTransTagger(); - const char *tagmcf(const char *mcf); -}; - - -class MacaonTransLemmatizer { - public: - MacaonTransLemmatizer(char *lg, char *mcd); - ~MacaonTransLemmatizer(); - const char *lemmatizemcf(const char *mcf); -}; - - -class MacaonTransParser { - public: - MacaonTransParser(char *lg, char *mcd); - ~MacaonTransParser(); - const char *parsemcf(const char *mcf); -}; diff --git a/maca_export/src/test.cc b/maca_export/src/test.cc index f8208ee3595cc16e053d5c29059d17fa672290a5..e7e9c8ac289ee76689f4dbd97968df471374f85d 100644 --- a/maca_export/src/test.cc +++ b/maca_export/src/test.cc @@ -6,6 +6,10 @@ #include "maca_trans_tagger_export.h" // test programme to be able using valgrind on the exported part of macaon +// run (in maca_data2) +/** + ../macaon2/build/maca_export/src/testcxx jh-seq jh-seq/eval/wplgfs.mcd ../macaon2/maca_export/src/test.mcf + */ using namespace std ; @@ -15,8 +19,8 @@ int main(int argc, char *argv[]) { return 1; } - //MacaonTransTagger *mt = new MacaonTransTagger(argv[1], argv[2]); - //MacaonTransLemmatizer *ml = new MacaonTransLemmatizer(argv[1], argv[2]); + MacaonTransTagger *mt = new MacaonTransTagger(argv[1], argv[2]); + MacaonTransLemmatizer *ml = new MacaonTransLemmatizer(argv[1], argv[2]); MacaonTransParser *mp = new MacaonTransParser(argv[1], argv[2]); ifstream ifp(argv[3]); @@ -27,13 +31,15 @@ int main(int argc, char *argv[]) { text += line + "\n"; } - //const char *tags = mt->tagmcf(text.c_str()); - //const char *lemmas = ml->lemmatizemcf(tags); - //const char *deps = mp->parsemcf(lemmas); - const char *deps = mp->parsemcf(text.c_str()); - + const char *tags = mt->tagmcf(text.c_str()); + //cout << tags << endl; + const char *lemmas = ml->lemmatizemcf(tags); + const char *deps = mp->parsemcf(lemmas); + //const char *deps = mp->parsemcf(text.c_str()); cout << deps << endl; + + + delete mt; + delete ml; delete mp; - //delete ml; - //delete mt; } diff --git a/maca_export/src/test.mcf b/maca_export/src/test.mcf new file mode 100644 index 0000000000000000000000000000000000000000..4e180beb73e54d912f9181eb134699554c5b2dbf --- /dev/null +++ b/maca_export/src/test.mcf @@ -0,0 +1,20 @@ +La D le +pose N pos +d' P de +un D un +panneau N panneau +stop N stop +paraît V paraître +être V être +la D le +formule N formule +la D le +mieux ADV mieux +adaptée A adapté +pour P pour +assurer V assurer +la D le +sécurité N sécurité +des P+D de +usagers N usager +. PONCT . \ No newline at end of file diff --git a/maca_export/tagger/CMakeLists.txt b/maca_export/tagger/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..5982e84e244f8a45f94dcbbffcf53b3d7fd1ffb9 --- /dev/null +++ b/maca_export/tagger/CMakeLists.txt @@ -0,0 +1,82 @@ +set(SOURCES maca_export_tagger.i) + + + + +if(PYTHONLIBS_FOUND) + + set(PYTHON_TAGGER_NAME MacaonTagger) + + include_directories(${PYTHON_INCLUDE_PATH}) + include_directories(../../maca_trans_parser/src) + include_directories(${CMAKE_CURRENT_SOURCE_DIR}) + + #message(eeeeeeeeeeee ${CMAKE_CURRENT_SOURCE_DIR}) + add_custom_command( + DEPENDS ${SOURCES} + OUTPUT maca_export_tagger_py.cc + COMMAND swig -python -c++ -o maca_export_tagger_py.cc ../../../maca_export/tagger/maca_export_tagger.i + ) + + # Specify the lib + add_library(${PYTHON_TAGGER_NAME} SHARED + maca_export_tagger_py.cc + maca_trans_tagger_export.cc + ) + + set_target_properties(${PYTHON_TAGGER_NAME} PROPERTIES PREFIX _) + target_link_libraries(${PYTHON_TAGGER_NAME} transparse maca_common perceptron) +else() + message("pythonlibs not installed on your system") + +endif() + + + +if (JNI_FOUND AND (Java_FOUND OR JAVA_FOUND)) + set(JAVA_MODULE_NAME MacaonTagger) + set(JAVA_LIBRARY MacaonTransTaggerJava) + set(JAVA_CLASS MacaonTransTagger) + set(JAVA_PACKAGE lif) + set(JAR_FILENAME macaontagger) + + + # Add include directories + include_directories(${JAVA_INCLUDE_PATH} ${JAVA_INCLUDE_PATH2}) + + # Run swig executable to generate java code + add_custom_command( + DEPENDS ${SOURCES} + OUTPUT maca_export_tagger_java.cc + COMMAND swig -java -package ${JAVA_PACKAGE} -c++ -o maca_export_tagger_java.cc ../../../maca_export/tagger/maca_export_tagger.i + ) + + add_library(${JAVA_LIBRARY} SHARED + maca_export_tagger_java.cc + maca_trans_tagger_export.cc + ) + + target_link_libraries(${JAVA_LIBRARY} transparse maca_common perceptron) + + string(REGEX REPLACE "[.]" "/" JAVA_PACKAGE_DIR ${JAVA_PACKAGE}) + add_custom_command( + TARGET ${JAVA_LIBRARY} POST_BUILD + COMMAND mkdir -p ${JAVA_PACKAGE_DIR} + COMMAND ${Java_JAVAC_EXECUTABLE} -encoding utf8 -d ${CMAKE_CURRENT_BINARY_DIR} #${JAVA_PACKAGE_DIR} + ${JAVA_MODULE_NAME}JNI.java + ${JAVA_CLASS}.java + ${JAVA_MODULE_NAME}.java + ${ADDITIONNAL_JAVA_FILES} + DEPENDS ${JAVA_MODULE_NAME}JNI.java ${JAVA_CLASS}.java ${JAVA_MODULE_NAME}.java ${ADDITIONNAL_JAVA_FILES} + ) + add_custom_command( + TARGET ${JAVA_LIBRARY} POST_BUILD + COMMAND ${Java_JAR_EXECUTABLE} -cvf ${JAR_FILENAME}.jar -C ${CMAKE_CURRENT_BINARY_DIR} ${JAVA_PACKAGE_DIR} + DEPENDS ${JAVA_MODULE_NAME}JNI.java ${JAVA_CLASS}.java ${JAVA_MODULE_NAME}.java ${ADDITIONNAL_JAVA_FILES} + #WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMENT "Building ${JAR_FILENAME}" + VERBATIM + ) + else() + message("Java JNI support not installed") + endif() diff --git a/maca_export/tagger/maca_export_tagger.i b/maca_export/tagger/maca_export_tagger.i new file mode 100644 index 0000000000000000000000000000000000000000..1f1b1e643a6d7d6cd67e2376106657a4486bce30 --- /dev/null +++ b/maca_export/tagger/maca_export_tagger.i @@ -0,0 +1,12 @@ +%module MacaonTagger + +%{ +#include <maca_trans_tagger_export.h> +%} + +class MacaonTransTagger { + public: + MacaonTransTagger(char *lg, char *mcd); + ~MacaonTransTagger(); + const char *tagmcf(const char *mcf); +}; diff --git a/maca_export/src/maca_trans_tagger_export.cc b/maca_export/tagger/maca_trans_tagger_export.cc similarity index 93% rename from maca_export/src/maca_trans_tagger_export.cc rename to maca_export/tagger/maca_trans_tagger_export.cc index a791372ddf7d6977ed6b34418e0a7fa84514ac5b..a4cb81be84ec48a1cbaf1685aea3452210c96f87 100644 --- a/maca_export/src/maca_trans_tagger_export.cc +++ b/maca_export/tagger/maca_trans_tagger_export.cc @@ -1,18 +1,18 @@ -#include <stdio.h> -#include <string.h> +#include <cstdio> +#include <cstring> -#ifdef __cplusplus -extern "C"{ -#endif +//#ifdef __cplusplus +//extern "C"{ +//#endif -#include "context.h" -#include "config2feat_vec.h" -#include "movement_tagger.h" -#include "simple_decoder_tagger.h" -#ifdef __cplusplus -} -#endif + #include "config2feat_vec.h" + #include "movement_tagger.h" + #include "simple_decoder_tagger.h" + +//#ifdef __cplusplus +//} +//#endif #include "maca_trans_tagger_export.h" @@ -21,13 +21,15 @@ MacaonTransTagger::MacaonTransTagger(char *lg, char *mcd) { char * argv[] = { (char *)"initParser", (char *)"-L", lg, (char *)"-C", mcd, + //(char *)"--debug", + //(char *)"-v", 0 }; ctx = context_read_options(5, argv); decode_tagger_set_linguistic_resources_filenames(ctx); - ctx->features_model = feat_model_read(ctx->features_model_filename, ctx->verbose); + ctx->features_model = feat_model_read(ctx->features_model_filename, feat_lib_build(), ctx->verbose); ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio); mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose); @@ -126,7 +128,7 @@ void MacaonTransTagger::simple_decoder_tagger(context *ctx, const char *mcfStrin add_signature_to_words_in_word_buffer(c->bf, ctx->f2p); b0 = word_buffer_b0(c->bf); - postag = word_get_pos(b0); + postag = -1; //word_get_pos(b0); if(ctx->debug_mode){ fprintf(stderr, "***********************************\n"); @@ -179,7 +181,7 @@ void MacaonTransTagger::print_word(word *w, mcd *mcd_struct, dico *dico_pos, int token = strtok(buffer, "\t"); col_nb = 0; while(token){ - if(col_nb != 0) printf("\t"); + if(col_nb != 0) fprintf(outstream, "\t"); if(col_nb == mcd_get_pos_col(mcd_struct)) fprintf(outstream, "%s", dico_int2string(dico_pos, postag)); else diff --git a/maca_export/src/maca_trans_tagger_export.h b/maca_export/tagger/maca_trans_tagger_export.h similarity index 82% rename from maca_export/src/maca_trans_tagger_export.h rename to maca_export/tagger/maca_trans_tagger_export.h index 1d3aefe500aa178d27b5e7c6c543de92dde1714e..1e234a885d383510cb41ff1b2d0a3059d74a0087 100644 --- a/maca_export/src/maca_trans_tagger_export.h +++ b/maca_export/tagger/maca_trans_tagger_export.h @@ -1,4 +1,17 @@ -#include "context.h" +//#include "maca_trans_parser/src/context.h" +//#include "maca_common/include/word.h" + +//#ifdef __cplusplus +//extern "C"{ +//#endif + + #include "context.h" + #include "../../maca_common/include/word.h" + +//#ifdef __cplusplus +//} +//#endif + class MacaonTransTagger { public: diff --git a/maca_trans_parser/src/simple_decoder_tagger.h b/maca_trans_parser/src/simple_decoder_tagger.h index b5d5b2aa019629a560011a9b7f575228a7cb3353..3856be1616714451ddb62379f6c415522e46e5b5 100644 --- a/maca_trans_parser/src/simple_decoder_tagger.h +++ b/maca_trans_parser/src/simple_decoder_tagger.h @@ -1,5 +1,7 @@ #ifndef __SIMPLE_DECODER_TAGGER__ #define __SIMPLE_DECODER_TAGGER__ +#include "context.h" + void add_signature_to_words_in_word_buffer(word_buffer *bf, form2pos *f2p); void simple_decoder_tagger(context *ctx);