diff --git a/CMakeLists.txt b/CMakeLists.txt index 7ac8c72e27aa29647ace39d85fc06131ac057b5a..cf3f0d2370576c9bbe83851a3d2f14f3a5e99ee0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,6 +4,9 @@ project(macaon2) find_package(FLEX) add_definitions("-Wall" ) +SET(CMAKE_C_COMPILER g++) +SET(CMAKE_CXX_COMPILER g++) + if (${CMAKE_C_COMPILER_VERSION} VERSION_LESS 5.3) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=gnu11") @@ -33,7 +36,7 @@ add_subdirectory(maca_tokenizer) add_subdirectory(maca_lexer) add_subdirectory(maca_trans_parser) add_subdirectory(maca_crf_tagger) -add_subdirectory(maca_graph_parser) +#add_subdirectory(maca_graph_parser) if(MACA_EXPORT) add_subdirectory(maca_export) diff --git a/maca_common/include/feat_model.h b/maca_common/include/feat_model.h index f7c234cd0ea14ca4015cfa93f1826ecc2e7acc00..3febe35a4ee7c07a77dbc05ebe97a64badbb72d6 100644 --- a/maca_common/include/feat_model.h +++ b/maca_common/include/feat_model.h @@ -24,5 +24,5 @@ feat_desc *feat_model_add(feat_model *fm, feat_desc *fd); feat_model *feat_model_read(char *filename, feat_lib *fl, int verbose); void feat_model_compute_ranges(feat_model *fm, mcd *m, int mvt_nb); int feat_model_get_type_feat_n(feat_model *fm, int n); - +void catenate_int(char *string, int val); #endif diff --git a/maca_common/src/char16.c b/maca_common/src/char16.c index b07d59423b73ac83263c467d4152430cbed9aeb9..311e618726db9c7e060513b47b67d91eb2323e5a 100644 --- a/maca_common/src/char16.c +++ b/maca_common/src/char16.c @@ -56,7 +56,7 @@ char16 *utf8tochar16(char *utf8_string) for(i=0; i < utf8_length; i++) char16_length += length(utf8_string[i]); - char16_string = malloc((char16_length + 1)* sizeof(char16)); + char16_string = (char16*) malloc((char16_length + 1)* sizeof(char16)); for(i=0, j=0; i < utf8_length; i++, j++){ if(length(utf8_string[i]) == 1){ char16_string[j] = (char16)utf8_string[i]; diff --git a/maca_common/src/form2pos.c b/maca_common/src/form2pos.c index b200c780e12371d4473561e2690c44fc481e6c5c..853d06108167523713cd1f8a16d0b5d55b50b78b 100644 --- a/maca_common/src/form2pos.c +++ b/maca_common/src/form2pos.c @@ -6,13 +6,13 @@ form2pos *form2pos_new(int nbelem, int pos_nb, char *pos_list) { - form2pos *f2p = memalloc(sizeof(form2pos)); + form2pos *f2p = (form2pos *)memalloc(sizeof(form2pos)); char *token; f2p->nbelem = nbelem; f2p->pos_nb = pos_nb; - f2p->d_pos = dico_new("d_pos", pos_nb * 10); - f2p->d_signature = dico_new("d_signature", pos_nb * 10); + f2p->d_pos = dico_new((char *)"d_pos", pos_nb * 10); + f2p->d_signature = dico_new((char *)"d_signature", pos_nb * 10); f2p->h_form2signature = hash_new(nbelem * 4); token = strtok(pos_list, "\t"); do{ diff --git a/maca_common/src/trie.c b/maca_common/src/trie.c index b25bca3df0e2b0a87abe36d74c5c6ed692db65b6..13412c7b552f617dacbb14dce755bc549317f74d 100644 --- a/maca_common/src/trie.c +++ b/maca_common/src/trie.c @@ -7,7 +7,7 @@ trie_state *trie_state_new(trie_trans *transitions, int is_accept) { - trie_state *state = memalloc(sizeof(trie_state)); + trie_state *state = (trie_state *) memalloc(sizeof(trie_state)); state->transitions = transitions; state->is_accept = is_accept; state->fail = 0; @@ -24,7 +24,7 @@ void trie_state_free(trie_state *state) trie *trie_new(void) { - trie *t = memalloc(sizeof(trie)); + trie *t = (trie *) memalloc(sizeof(trie)); t->states = NULL; t->size = 0; t->states_nb = 0; @@ -45,7 +45,7 @@ void trie_free(trie *t) trie_trans *trie_trans_new(int destination, int symbol, trie_trans *next) { - trie_trans *trans = memalloc(sizeof(trie_trans)); + trie_trans *trans = (trie_trans *)memalloc(sizeof(trie_trans)); trans->destination = destination; trans->symbol = symbol; trans->next = next; diff --git a/maca_graph_parser/array.c b/maca_graph_parser/array.c index d7c27c11c14b9a3b552692b00fcb4055c9db3d21..50f2ff6e2ab24f3bc401535c1811ce91de337a4b 100644 --- a/maca_graph_parser/array.c +++ b/maca_graph_parser/array.c @@ -2,7 +2,7 @@ #include "array.h" array_t* array_new() { - array_t* array = malloc(sizeof(array_t)); + array_t* array = (array_t *)malloc(sizeof(array_t)); array->num_elements = 0; array->data = NULL; return array; @@ -18,7 +18,7 @@ ARRAY_TYPE array_get(array_t* array, int element) { } void array_push(array_t* array, ARRAY_TYPE value) { - array->data = realloc(array->data, sizeof(ARRAY_TYPE) * (array->num_elements + 1)); + array->data = (ARRAY_TYPE *)realloc(array->data, sizeof(ARRAY_TYPE) * (array->num_elements + 1)); array->data[array->num_elements] = value; array->num_elements++; } diff --git a/maca_graph_parser/maca_graph_parser.c b/maca_graph_parser/maca_graph_parser.c index bc066ca3d83aac65d7c9b9176b8ec966ca6c8359..2b0d888900b94e7f7f1a0e44462f9a40715c2871 100644 --- a/maca_graph_parser/maca_graph_parser.c +++ b/maca_graph_parser/maca_graph_parser.c @@ -75,7 +75,7 @@ void maca_graph_parser_print_ctx(maca_graph_parser_ctx *ctx) maca_graph_parser_ctx * maca_graph_parser_InitCTX() { - maca_graph_parser_ctx * ctx = calloc(sizeof(maca_graph_parser_ctx), 1); + maca_graph_parser_ctx * ctx = (maca_graph_parser_ctx *)calloc(sizeof(maca_graph_parser_ctx), 1); ctx->cfg=MACA_DEFAULT_CFG; ctx->verbose_flag = maca_verbose; diff --git a/maca_graph_parser/maca_graph_parser_alphabet.c b/maca_graph_parser/maca_graph_parser_alphabet.c index c162196e3ab8237c457b76531e96c4d2a635739a..2f503486a1647a0e6b77dd290745b1e5cfd97f29 100644 --- a/maca_graph_parser/maca_graph_parser_alphabet.c +++ b/maca_graph_parser/maca_graph_parser_alphabet.c @@ -36,7 +36,7 @@ void maca_graph_parser_alphabet_free(maca_graph_parser_alphabet *a) maca_graph_parser_alphabet *maca_graph_parser_alphabet_new(char *name) { - maca_graph_parser_alphabet *a = malloc(sizeof(maca_graph_parser_alphabet)); + maca_graph_parser_alphabet *a = (maca_graph_parser_alphabet *)malloc(sizeof(maca_graph_parser_alphabet)); if(a == NULL){ fprintf(stderr, "memory allocation error\n"); exit(1); @@ -153,7 +153,7 @@ maca_graph_parser_alphabet **maca_graph_parser_alphabet_load4(char *filename) int i = 0; char symbol[1000]; maca_graph_parser_alphabet *a = NULL; - maca_graph_parser_alphabet **alpha_array = malloc(4 * sizeof(maca_graph_parser_alphabet*)); + maca_graph_parser_alphabet **alpha_array = (maca_graph_parser_alphabet **)malloc(4 * sizeof(maca_graph_parser_alphabet*)); for(i=0; i < 4; i++) alpha_array[i] = NULL; @@ -182,7 +182,7 @@ maca_graph_parser_alphabet **maca_graph_parser_alphabet_load5(char *filename) int i = 0; char symbol[1000]; maca_graph_parser_alphabet *a = NULL; - maca_graph_parser_alphabet **alpha_array = malloc(5 * sizeof(maca_graph_parser_alphabet*)); + maca_graph_parser_alphabet **alpha_array = (maca_graph_parser_alphabet **)malloc(5 * sizeof(maca_graph_parser_alphabet*)); for(i=0; i < 5; i++) alpha_array[i] = NULL; diff --git a/maca_lexer/src/extract_mwe_from_fplm.c b/maca_lexer/src/extract_mwe_from_fplm.c index 800bed0478d786d5df37bd72d3e0562a0b12e1ef..b09defd06f557b016d2390d8326d8ccfb8310bc5 100644 --- a/maca_lexer/src/extract_mwe_from_fplm.c +++ b/maca_lexer/src/extract_mwe_from_fplm.c @@ -29,7 +29,7 @@ dico *decompose_mwe_in_fplm_file(char *fplm_filename, FILE *output_file, int deb char token[1000]; int l; int i, j; - dico *d_tokens = dico_new("TOKENS", 100000); + dico *d_tokens = dico_new((char *)"TOKENS", 100000); int token_code; while(fgets(buffer, 10000, f)){ fields_nb = sscanf(buffer, "%[^\t]\t%s\t%[^\t]\t%s\n", form, pos, lemma, morpho); @@ -71,6 +71,6 @@ int main(int argc, char *argv[]) dico *d_tokens; d_tokens = decompose_mwe_in_fplm_file(argv[1], stdout, 1); - dico_print("d_tokens.dico", d_tokens); + dico_print((char *)"d_tokens.dico", d_tokens); dico_free(d_tokens); } diff --git a/maca_trans_parser/CMakeLists.txt b/maca_trans_parser/CMakeLists.txt index 91bb573ac43bf7c7ec2123bf18e15414f91c4e82..be494e50df9fd828c3b9040d4816077bc9e3d343 100644 --- a/maca_trans_parser/CMakeLists.txt +++ b/maca_trans_parser/CMakeLists.txt @@ -17,14 +17,13 @@ set(SOURCES src/context.c # src/simple_decoder_tagger_bt.c src/stack.c src/config2feat_vec.c - src/depset.c +# src/depset.c src/config.c # src/queue.c # src/beam.c src/feat_types.c src/mvt.c src/mvt_stack.c - ) #compiling library @@ -33,6 +32,11 @@ add_library(transparse STATIC ${SOURCES}) target_link_libraries(transparse perceptron) #compiling, linking and installing executables +add_executable(maca_trans_parser_nn ./src/maca_trans_parser_nn.cc) +target_link_libraries(maca_trans_parser_nn perceptron) +target_link_libraries(maca_trans_parser_nn transparse) +target_link_libraries(maca_trans_parser_nn maca_common) +install (TARGETS maca_trans_parser_nn DESTINATION bin) add_executable(maca_trans_tagger_mcf2cff ./src/maca_trans_tagger_mcf2cff.c) target_link_libraries(maca_trans_tagger_mcf2cff perceptron) diff --git a/maca_trans_parser/src/cff2fann.c b/maca_trans_parser/src/cff2fann.c index 4a39ebc56b9f437a45376986758f270ce60321a2..294d8215d3e3de25bd16503dc6f65dcefa637c4d 100644 --- a/maca_trans_parser/src/cff2fann.c +++ b/maca_trans_parser/src/cff2fann.c @@ -128,7 +128,7 @@ void cff2fann(context *ctx) int feature_valindex; int count = 0; - vocab = dico_vec_get_dico(ctx->vocabs, "d_perceptron_features"); + vocab = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features"); /* printf("%d %d\n", 1, ctx->features_model->nbelem); */ diff --git a/maca_trans_parser/src/compare_traces.c b/maca_trans_parser/src/compare_traces.c index f6567c3df39b0938ad658283241576387d3914b7..baea6fbbaf88b70e1f42302b93b07a26e6bb5299 100644 --- a/maca_trans_parser/src/compare_traces.c +++ b/maca_trans_parser/src/compare_traces.c @@ -26,7 +26,7 @@ int configuration_equal(configuration *c1, configuration *c2) configuration *configuration_new(int index, char *stack, char *movement, float score) { - configuration *c = malloc(sizeof(configuration)); + configuration *c = (configuration *)malloc(sizeof(configuration)); if(c == NULL) return NULL; c->index = index; c->stack = stack; diff --git a/maca_trans_parser/src/context.c b/maca_trans_parser/src/context.c index bc75f11401ab10345e8aeb4a3e493ea0c30fbd8e..2b8b165bd14e28565a1efe46fce3be0500e6f6e0 100644 --- a/maca_trans_parser/src/context.c +++ b/maca_trans_parser/src/context.c @@ -25,6 +25,10 @@ void context_free(context *ctx) if(ctx->vocabs_filename) free(ctx->vocabs_filename); if(ctx->fplm_filename) free(ctx->fplm_filename); + if(ctx->json_filename) free(ctx->json_filename); + if(ctx->dnn_model_filename) free(ctx->dnn_model_filename); + + if (ctx->mcd_struct) mcd_free(ctx->mcd_struct); @@ -44,6 +48,7 @@ void context_free(context *ctx) if(ctx->f2p) form2pos_free(ctx->f2p); + free(ctx); } @@ -92,6 +97,11 @@ context *context_new(void) ctx->ifpls = 1; ctx->trace_mode = 0; + + + ctx->json_filename = NULL; + ctx->dnn_model_filename = NULL; + return ctx; } @@ -167,6 +177,14 @@ void context_trace_mode_help_message(context *ctx){ void context_debug_help_message(context *ctx){ fprintf(stderr, "\t-d --debug : activate debug mode (default is false)\n"); } +void context_json_help_message(context *ctx){ + fprintf(stderr, "\t-J --json : json description of keras model\n"); +} +void context_dnn_model_help_message(context *ctx){ + fprintf(stderr, "\t-N --dnn_model : weight file for dnn\n"); +} + + context *context_read_options(int argc, char *argv[]) { @@ -176,7 +194,7 @@ context *context_read_options(int argc, char *argv[]) ctx->program_name = strdup(argv[0]); - static struct option long_options[22] = + static struct option long_options[24] = { {"help", no_argument, 0, 'h'}, {"verbose", no_argument, 0, 'v'}, @@ -199,13 +217,15 @@ context *context_read_options(int argc, char *argv[]) {"maca_data_path", required_argument, 0, 'D'}, {"root_label", required_argument, 0, 'R'}, {"f2p", required_argument, 0, 'P'}, - {"traces", required_argument, 0, 'T'} + {"traces", required_argument, 0, 'T'}, + {"json", required_argument, 0, 'J'}, + {"dnn_model", required_argument, 0, 'N'} }; optind = 0; opterr = 0; - while ((c = getopt_long (argc, argv, "hvdcSTm:i:n:x:u:r:M:b:f:s:C:F:V:L:D:R:P:", long_options, &option_index)) != -1){ + while ((c = getopt_long (argc, argv, "hvdcSTm:i:n:x:u:r:M:b:f:s:C:F:V:L:D:R:P:J:N:", long_options, &option_index)) != -1){ switch (c) { case 'h': @@ -277,6 +297,12 @@ context *context_read_options(int argc, char *argv[]) ctx->f2p_filename = strdup(optarg); ctx->f2p = form2pos_read(ctx->f2p_filename); break; + case 'N': + ctx->dnn_model_filename = strdup(optarg); + break; + case 'J': + ctx->json_filename = strdup(optarg); + break; } } diff --git a/maca_trans_parser/src/context.h b/maca_trans_parser/src/context.h index 82db602d38516ac109daee70c0f6d9ae7c9f8819..a204a3e36deb03cd71c1d51b8fa81f16ab3996e2 100644 --- a/maca_trans_parser/src/context.h +++ b/maca_trans_parser/src/context.h @@ -38,7 +38,6 @@ typedef struct { char *program_name; char *input_filename; char *perc_model_filename; - char *dnn_model_filename; char *cff_filename; char *fann_filename; char *stag_desc_filename; @@ -72,6 +71,10 @@ typedef struct { int conll; int ifpls; int trace_mode; + + char *json_filename; + char *dnn_model_filename; + } context; context *context_new(void); @@ -108,6 +111,7 @@ void context_input_help_message(context *ctx); void context_root_label_help_message(context *ctx); void context_debug_help_message(context *ctx); - +void context_json_help_message(context *ctx); +void context_dnn_model_help_message(context *ctx); #endif diff --git a/maca_trans_parser/src/feat_fct.c b/maca_trans_parser/src/feat_fct.c index b65b2dd240d1bdff404e0b6c2ae54cb0d95be66b..e4e02de4707a7fac892195ded4ae4907929efcf3 100644 --- a/maca_trans_parser/src/feat_fct.c +++ b/maca_trans_parser/src/feat_fct.c @@ -430,7 +430,7 @@ int bm3Z(void *c) {return word_get_Z(word_buffer_bm3(config_get_buffer((config * /* structural features */ int ldep_s0r(void *input){ - config *c = input; + config *c = (config *)input; word *gov = stack_s0(config_get_stack((config *) c)); int i; word *dep; @@ -450,7 +450,7 @@ int ldep_s0r(void *input){ } int ldep_s0p(void *input){ - config *c = input; + config *c = (config *)input; word *gov = stack_s0(config_get_stack((config *) c)); int i; word *dep; @@ -470,7 +470,7 @@ int ldep_s0p(void *input){ } int ldep_s1r(void *input){ - config *c = input; + config *c = (config *)input; word *gov = stack_s1(config_get_stack((config *) c)); int i; word *dep; @@ -490,7 +490,7 @@ int ldep_s1r(void *input){ } int ldep_s1p(void *input){ - config *c = input; + config *c = (config *)input; word *gov = stack_s1(config_get_stack((config *) c)); int i; word *dep; @@ -510,7 +510,7 @@ int ldep_s1p(void *input){ } int ldep_b0r(void *input){ - config *c = input; + config *c = (config *)input; word *gov = word_buffer_b0(config_get_buffer((config *) c)); int i; word *dep; @@ -530,7 +530,7 @@ int ldep_b0r(void *input){ } int ldep_b0p(void *input){ - config *c = input; + config *c = (config *)input; word *gov = word_buffer_b0(config_get_buffer((config *) c)); int i; word *dep; @@ -550,7 +550,7 @@ int ldep_b0p(void *input){ } int rdep_s0r(void *input){ - config *c = input; + config *c = (config *)input; word *gov = stack_s0(config_get_stack((config *) c)); int i; word *dep; @@ -570,7 +570,7 @@ int rdep_s0r(void *input){ } int rdep_s0p(void *input){ - config *c = input; + config *c = (config *)input; word *gov = stack_s0(config_get_stack((config *) c)); int i; word *dep; @@ -590,7 +590,7 @@ int rdep_s0p(void *input){ } int rdep_s1p(void *input){ - config *c = input; + config *c = (config *)input; word *gov = stack_s1(config_get_stack((config *) c)); int i; word *dep; @@ -610,7 +610,7 @@ int rdep_s1p(void *input){ } int rdep_s1r(void *input){ - config *c = input; + config *c = (config *)input; word *gov = stack_s1(config_get_stack((config *) c)); int i; word *dep; @@ -630,7 +630,7 @@ int rdep_s1r(void *input){ } int rdep_b0r(void *input){ - config *c = input; + config *c = (config *)input; word *gov = word_buffer_b0(config_get_buffer((config *) c)); int i; word *dep; @@ -650,7 +650,7 @@ int rdep_b0r(void *input){ } int rdep_b0p(void *input){ - config *c = input; + config *c = (config *)input; word *gov = word_buffer_b0(config_get_buffer((config *) c)); int i; word *dep; @@ -671,7 +671,7 @@ int rdep_b0p(void *input){ int ndep_b0(void *input){ - config *c = input; + config *c = (config *)input; word *gov = word_buffer_b0(config_get_buffer((config *) c)); int i; int n = 0; @@ -698,7 +698,7 @@ int ndep_b0(void *input){ } int ndep_s0(void *input){ - config *c = input; + config *c = (config *)input; word *gov = stack_s0(config_get_stack((config *) c)); int i; int n = 0; @@ -728,7 +728,7 @@ int ndep_s0(void *input){ /* distance features */ int dist_s0_b0(void *input){ - config *c = input; + config *c = (config *)input; int dist; if(stack_is_empty(config_get_stack((config *) c)) || word_buffer_is_empty(config_get_buffer((config *) c))) @@ -743,7 +743,7 @@ int dist_s0_b0(void *input){ /* stack height */ int sh(void *input) { -config *c = input; +config *c = (config *)input; return (config_get_stack((config *) c)->top > 7)? 7 : config_get_stack((config *) c)->top; /* return (stack_nbelem(config_get_stack((config *) c)) > 0)? 1 : 0; */ @@ -751,7 +751,7 @@ config *c = input; /* buffer size */ int bh(void *input) { -config *c = input; +config *c = (config *)input; return (config_get_buffer((config *) c)->size > 7)? 7 : config_get_buffer((config *) c)->size; } @@ -793,21 +793,21 @@ int t4(void *c) int mvt0(void *input) { - config *c = input; + config *c = (config *)input; if(c->vcode_array == NULL) return -1; return c->vcode_array[0].class_code; } int mvt1(void *input) { - config *c = input; + config *c = (config *)input; if(c->vcode_array == NULL) return -1; return c->vcode_array[1].class_code; } int delta1(void *input) { - config *c = input; + config *c = (config *)input; if(c->vcode_array == NULL) return -1; int delta = (int) (c->vcode_array[0].score - c->vcode_array[1].score); return (delta >= 10)? 10: delta; @@ -815,14 +815,14 @@ int delta1(void *input) int mvt2(void *input) { - config *c = input; + config *c = (config *)input; if(c->vcode_array == NULL) return -1; return c->vcode_array[2].class_code; } int delta2(void *input) { - config *c = input; + config *c = (config *)input; if(c->vcode_array == NULL) return -1; int delta = (int) (c->vcode_array[0].score - c->vcode_array[2].score); return (delta >= 10)? 10: delta; @@ -830,14 +830,14 @@ int delta2(void *input) int mvt3(void *input) { - config *c = input; + config *c = (config *)input; if(c->vcode_array == NULL) return -1; return c->vcode_array[3].class_code; } int delta3(void *input) { - config *c = input; + config *c = (config *)input; if(c->vcode_array == NULL) return -1; int delta = (int) (c->vcode_array[0].score - c->vcode_array[3].score); return (delta >= 10)? 10: delta; diff --git a/maca_trans_parser/src/global_feat_vec.c b/maca_trans_parser/src/global_feat_vec.c index c8eebb332e99437de68562d10d95b97a15c93895..94ecffee668b6fab8081d39a427baf3edab78363 100644 --- a/maca_trans_parser/src/global_feat_vec.c +++ b/maca_trans_parser/src/global_feat_vec.c @@ -16,7 +16,7 @@ void global_feat_vec_print(global_feat_vec *gfv) global_feat_vec *global_feat_vec_new(void) { - global_feat_vec *gfv = memalloc(sizeof(global_feat_vec)); + global_feat_vec *gfv = (global_feat_vec *)memalloc(sizeof(global_feat_vec)); gfv->nbelem = 0; gfv->array = NULL; return gfv; @@ -24,7 +24,7 @@ global_feat_vec *global_feat_vec_new(void) void global_feat_vec_add(global_feat_vec *gfv, int pred_mvt, feat_vec *fv) { - global_feat_vec_elt *elt = memalloc(sizeof(global_feat_vec_elt)); + global_feat_vec_elt *elt = (global_feat_vec_elt *) memalloc(sizeof(global_feat_vec_elt)); elt->pred_mvt = pred_mvt; /* elt->oracle_mvt = oracle_mvt; */ elt->fv = fv; diff --git a/maca_trans_parser/src/json.h b/maca_trans_parser/src/json.h new file mode 100644 index 0000000000000000000000000000000000000000..b4087ac609aa79ef290496c952da93a7818d916a --- /dev/null +++ b/maca_trans_parser/src/json.h @@ -0,0 +1,243 @@ +#pragma once + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> + +#include <iostream> +#include <string> +#include <sstream> +#include <vector> +#include <map> + +namespace json { + + typedef enum { Null, True, False, Number, String, List, Object } Type; + + std::string replace_all(std::string subject, const std::string& search, const std::string& replace) { + size_t pos = 0; + while ((pos = subject.find(search, pos)) != std::string::npos) { + subject.replace(pos, search.length(), replace); + pos += replace.length(); + } + return subject; + } + + class Value { + public: + Type type; + double number; + std::string string; + std::vector<Value> list; + std::map<std::string, Value> object; + public: + Value() : type(Null) { } + Value(Type _type) : type(_type) { } + Value(const std::string& text) : type(String), string(text) { } + Value(double _number) : type(Number), number(_number) { } + void append(const Value& v) { list.push_back(v); } + Value& operator[](int index) { return list[index]; } + Value& operator[](const std::string& key) { + return object[key]; } + int length() const { + if(type == List) { return list.size(); } + if(type == Object) { return object.size(); } + if(type == String) { return string.length(); } + return 1; + } + std::string to_json() const { + std::stringstream out; + if(type == String) out << "\"" << replace_all(string, "\"", "\\\"") << "\""; + else if(type == Null) out << "null"; + else if(type == True) out << "true"; + else if(type == False) out << "false"; + else if(type == Number) out << number; + else if(type == List) { + out << "["; + for(int i = 0; i < length(); i++) { + if(i > 0) out << ","; + out << list[i].to_json(); + } + out << "]"; + } else if(type == Object) { + out << "{"; + for(std::map<std::string, Value>::const_iterator i = begin(); i != end(); i++) { + if(i != begin()) out << ","; + out << "\"" << i->first << "\":" << i->second.to_json(); + } + out << "}"; + } else { + out << "error"; + } + return out.str(); + } + std::string to_string() const { return string; } + int to_int() const { return (int) number; } + double to_double() const { return (double) number; } + std::map<std::string, Value>::const_iterator begin() const { return object.begin(); } + std::map<std::string, Value>::const_iterator end() const { return object.end(); } + }; + + class Iterator { + const Value& backend; + std::map<std::string, Value>::const_iterator iterator; + public: + Iterator(const Value& target) : backend(target), iterator(target.begin()) { } + const std::string key() { return iterator->first; } + const Value value() { return iterator->second; } + bool hasNext() { return iterator != backend.end(); } + void next() { iterator++; } + }; + + class Parser { + const char* input; + + void error() { + throw std::string("error at \"" + std::string(input) + "\""); + } + + void space() { + while(*input != '\0' && (*input == ' ' || *input == '\t' || *input == '\n' || *input == '\r')) input++; + } + + Value null() { + input += 4; + return Value(Null); + } + + Value _false() { + input += 5; + return Value(False); + } + + Value _true() { + input += 4; + return Value(True); + } + + Value number() { + char* end = NULL; + double value = strtod(input, &end); + input = end; + return Value(value); + } + + Value string() { + input++; + char prev = '"'; + const char* start = input; + while(*input != '\0') { + if(*input == '"' && prev != '\\') { + char text[input - start + 1]; + strncpy(text, start, input - start); + text[input - start] = '\0'; + input++; + return Value(replace_all(text, "\\\"", "\"")); + } + prev = *input; + input++; + } + error(); + return Value(Null); + } + + Value list() { + input++; + Value l(List); + while(*input != '\0') { + space(); + if(*input == ']') { + input++; + return l; + } + l.append(value()); + space(); + if(*input == ']') { + input++; + return l; + } else if(*input == ',') { + input++; + } else { + error(); + } + } + error(); + return Value(Null); + } + + Value object() { + input++; + Value o(Object); + while(*input != '\0') { + space(); + if(*input == '}') { + input++; + return o; + } + std::string key = string().to_string(); + space(); + if(*input != ':') error(); + else { + input++; + space(); + o[key] = value(); + space(); + } + if(*input == '}') { + input++; + return o; + } else if(*input == ',') { + input++; + } else { + error(); + return Value(Null); + } + } + error(); + return Value(Null); + } + + Value value() { + if(*input == '{') return object(); + else if(*input == '[') return list(); + else if(*input == '"') return string(); + else if(!strncmp(input, "true", 4)) return _true(); + else if(!strncmp(input, "false", 5)) return _false(); + else if(!strncmp(input, "null", 4)) return null(); + else return number(); + } + + public: + Value parse(const char* p) { + input = p; + space(); + Value v = value(); + space(); + if(*input != '\0') error(); + return v; + } + }; + + Value parse(const std::string& str) { + return Parser().parse(str.c_str()); + } + Value parse(const char* input) { + return Parser().parse(input); + } + Value parse_file(const char* filename) { + FILE* fp = fopen(filename, "r"); + if(!fp) { std::cerr << "ERROR: cannot load file \"" << filename << "\"\n"; exit(1); } + fseek(fp, 0, SEEK_END); + off_t length = ftell(fp); + char* content = new char[length + 1]; + fseek(fp, 0, SEEK_SET); + size_t read = fread(content, length, 1, fp); + if(read != 1) { std::cerr << "ERROR: could not read content of \"" << filename << "\"\n"; exit(1); } + fclose(fp); + content[length] = '\0'; + Value v = parse(content); + delete[] content; + return v; + } + +} diff --git a/maca_trans_parser/src/keras.h b/maca_trans_parser/src/keras.h new file mode 100644 index 0000000000000000000000000000000000000000..bd18939cd0ff994e59134fc4350afd5807509f15 --- /dev/null +++ b/maca_trans_parser/src/keras.h @@ -0,0 +1,208 @@ +#include <iostream> +#include <cassert> + +#include "json.h" +#include "matrix.h" + +class Node { + protected: + json::Value config; + std::vector<Node*> inbound; + std::string name; + public: + Node() { name = "NONE"; } + virtual ~Node() { } + Node(const json::Value& _config) : config(_config) { name = config["name"].to_string();} + void setup(std::map<std::string, Node*>& nodes) { + json::Value inbound_nodes = config["inbound_nodes"]; + if(inbound_nodes.length() > 0) { + for(int j = 0; j < inbound_nodes[0].length(); j++) { + std::string node_name = inbound_nodes[0][j][0].to_string(); + //std::cerr << node_name << "->" << name << "\n"; + if(nodes.find(node_name) != nodes.end()) { + inbound.push_back(nodes[node_name]); + } else { + std::cerr << "ERROR: cannot find inbound layer \"" << node_name << "\" when setting up layer \"" << name << "\"\n"; + exit(1); + } + } + } + } + + virtual void set_input(const Matrix<float> & x) { } + virtual Matrix<float> get_output() { + assert(inbound.size() == 1); + assert(inbound[0] != NULL); + return forward(inbound[0]->get_output()); + } + virtual Matrix<float> forward(const Matrix<float> & x) { return x; } +}; + +class Embedding : public Node { + protected: + Matrix<float> W; + public: + Embedding(const json::Value& _config, FILE* storage) : Node(_config) { + fseek(storage, config["weights"]["W"].to_int(), SEEK_SET); + W.load(storage); + //W.print("W"); + } + virtual Matrix<float> forward(const Matrix<float> & x) { + Matrix<float> output(x.rows, x.cols * W.cols); + for(int i = 0; i < x.rows; i++) { + for(int j = 0; j < x.cols; j++) { + int id = (int)x.at(i, j); + if(id < 0 || id >= W.rows) { + std::cerr << "WARNING: unexpected embedding id " << id << " for row " << i << " in layer " << name << ", mapping to 0\n"; + id = 0; + } + //assert(id >= 0 && id < W.rows); + output[i].slice(j * W.cols, W.cols) = W[id]; + } + } + return output; + } +}; + +class Dense : public Node { + protected: + Matrix<float> W, b; + Matrix<float> (*activation)(const Matrix<float>&); + public: + Dense(const json::Value& _config, FILE* storage) : Node(_config) { + fseek(storage, config["weights"]["W"].to_int(), SEEK_SET); + W.load(storage); + fseek(storage, config["weights"]["b"].to_int(), SEEK_SET); + b.load(storage); + std::string function = config["config"]["activation"].to_string(); + if(function == "linear") activation = Matrix<float>::identity; + else if(function == "tanh") activation = Matrix<float>::tanh; + else if(function == "sigmoid") activation = Matrix<float>::sigmoid; + else if(function == "relu") activation = Matrix<float>::relu; + else if(function == "softmax") activation = Matrix<float>::softmax; + else { + std::cerr << "ERROR: unsupported activation function \"" << function << "\"\n"; + exit(1); + } + } + virtual Matrix<float> forward(const Matrix<float> & x) { + return activation(x.dot(W) + b); + } +}; + +class Input : public Node { + Matrix<float> input; + public: + Input(const json::Value& _config) : Node(_config) { } + void set_input(const Matrix<float> &x) { input = x; } + Matrix<float> get_output() { return input; } +}; + +class Merge : public Node { + public: + Merge(const json::Value& _config) : Node(_config) { } + Matrix<float> get_output() { + int cols = 0; + int rows = 0; + std::vector<Matrix<float> > inputs(inbound.size()); + for(size_t i = 0; i < inbound.size(); i++) { + inputs[i] = inbound[i]->get_output(); + cols += inputs[i].cols; + if(rows < inputs[i].rows) rows = inputs[i].rows; + } + //std::cerr << "Merge: " << rows << "x" << cols << "\n"; + Matrix<float> output(rows, cols); + int offset = 0; + for(size_t i = 0; i < inputs.size(); i++) { + for(int j = 0; j < inputs[i].rows; j++) { + output[j].slice(offset, inputs[i].cols) = inputs[i][j]; + } + offset += inputs[i].cols; + } + return output; + } +}; + +class Identity : public Node { + public: + Identity(const json::Value& _config) : Node(_config) { } + Matrix<float> get_output() { + assert(inbound.size() == 1); + return inbound[0]->get_output(); + } +}; + +class Model { + std::map<std::string, Node*> nodes; + std::vector<Node*> output_nodes; + std::vector<Node*> input_nodes; + + public: + + ~Model() { + for(std::map<std::string, Node*>::iterator i = nodes.begin(); i != nodes.end(); i++) { + delete i->second; + } + } + + int num_inputs() { return input_nodes.size(); } + int num_outputs() { return output_nodes.size(); } + + std::vector<Matrix<float> > forward(std::vector<Matrix<float> > input) { + assert(input_nodes.size() == input.size()); + for(size_t i = 0; i < input.size(); i++) { + input_nodes[i]->set_input(input[i]); + } + std::vector<Matrix<float> > output(output_nodes.size()); + for(size_t i = 0; i < output_nodes.size(); i++) { + output[i] = output_nodes[i]->get_output(); + } + return output; + } + Matrix<float> forward(const Matrix<float>& input) { + assert(input_nodes.size() == 1); + assert(output_nodes.size() == 1); + input_nodes[0]->set_input(input); + return output_nodes[0]->get_output(); + } + + static Model load(const char* json_filename, const char* storage_filename) { + Model model; + + json::Value config = json::parse_file(json_filename); + FILE* storage = fopen(storage_filename, "r"); + + for(int i = 0; i < config["config"]["layers"].length(); i++) { + json::Value layer = config["config"]["layers"][i]; + std::string name = layer["name"].to_string(); + std::string class_name = layer["class_name"].to_string(); + std::vector<std::string> inbound; + if(class_name == "Dense") model.nodes[name] = new Dense(layer, storage); + else if(class_name == "Embedding") model.nodes[name] = new Embedding(layer, storage); + else if(class_name == "Merge") model.nodes[name] = new Merge(layer); + else if(class_name == "Dropout") model.nodes[name] = new Identity(layer); + else if(class_name == "InputLayer") model.nodes[name] = new Input(layer); + else { + std::cerr << "ERROR: unsupported layer class \"" << class_name << "\"\n"; + exit(1); + } + } + for(std::map<std::string, Node*>::iterator i = model.nodes.begin(); i != model.nodes.end(); i++) { + i->second->setup(model.nodes); + } + + for(int i = 0; i < config["config"]["input_layers"].length(); i++) { + std::string name = config["config"]["input_layers"][i][0].to_string(); + model.input_nodes.push_back(model.nodes[name]); + } + for(int i = 0; i < config["config"]["output_layers"].length(); i++) { + std::string name = config["config"]["output_layers"][i][0].to_string(); + model.output_nodes.push_back(model.nodes[name]); + } + + fclose(storage); + return model; + } + +}; + diff --git a/maca_trans_parser/src/maca_trans_lemmatizer.c b/maca_trans_parser/src/maca_trans_lemmatizer.c index ee38e7a5f61eb9ea37286efe68df59aa20a9ef67..f0c86e5e8eedc7fb7f208e497c7fe199cb8cd800 100644 --- a/maca_trans_parser/src/maca_trans_lemmatizer.c +++ b/maca_trans_parser/src/maca_trans_lemmatizer.c @@ -1,3 +1,4 @@ + #include<stdio.h> #include<stdlib.h> #include<string.h> @@ -80,7 +81,7 @@ char **read_fplm_file(char *fplm_filename, hash *form_pos_ht, int debug_mode, in if(num >= *lemma_array_size){ *lemma_array_size = 2 * (*lemma_array_size) + 1; - lemma_array = realloc(lemma_array, (*lemma_array_size) * sizeof(char *)); + lemma_array = (char **)realloc(lemma_array, (*lemma_array_size) * sizeof(char *)); // initialize in order to be able to free correctly and the end for(int i=num; i<*lemma_array_size; ++i) { lemma_array[i] = NULL; diff --git a/maca_trans_parser/src/maca_trans_parser_nn.cc b/maca_trans_parser/src/maca_trans_parser_nn.cc new file mode 100644 index 0000000000000000000000000000000000000000..89322ea78045642ecef37ca8ace8b82744290728 --- /dev/null +++ b/maca_trans_parser/src/maca_trans_parser_nn.cc @@ -0,0 +1,306 @@ +#include<stdio.h> +#include<stdlib.h> +#include<string.h> +#include<unistd.h> +#include<getopt.h> +#include"context.h" +#include"movement_parser.h" +#include"oracle_parser_arc_eager.h" +#include"feat_fct.h" +#include"feature_table.h" +#include"dico.h" +#include "keras.h" +#include"movement_parser_arc_eager.h" +#include"feat_fct.h" +#include"feature_table.h" + + + + +void maca_trans_parser_nn_help_message(context *ctx) +{ + context_general_help_message(ctx); + context_debug_help_message(ctx); + fprintf(stderr, "INPUT\n"); + context_input_help_message(ctx); + context_mcd_help_message(ctx); + context_vocabs_help_message(ctx); + context_features_model_help_message(ctx); + context_root_label_help_message(ctx); + context_json_help_message(ctx); + context_dnn_model_help_message(ctx); +} + +void maca_trans_parser_nn_check_options(context *ctx){ + if(ctx->help + /*!ctx->conll_filename*/ + /* || !ctx->perc_model_filename + || !ctx->mcd_filename + || !ctx->vocabs_filename + || !ctx->features_model_filename*/ + ){ + maca_trans_parser_nn_help_message(ctx); + exit(1); + } +} + + + +void set_linguistic_resources_filenames_parser(context *ctx) +{ + char absolute_filename[500]; + + if(!ctx->perc_model_filename){ + strcpy(absolute_filename, ctx->maca_data_path); + strcat(absolute_filename, DEFAULT_MODEL_FILENAME); + ctx->perc_model_filename = strdup(absolute_filename); + } + + if(!ctx->vocabs_filename){ + strcpy(absolute_filename, ctx->maca_data_path); + strcat(absolute_filename, DEFAULT_VOCABS_FILENAME); + ctx->vocabs_filename = strdup(absolute_filename); + } + + /* if(!ctx->mcd_filename){ + strcpy(absolute_filename, ctx->maca_data_path); + strcat(absolute_filename, DEFAULT_MULTI_COL_DESC_FILENAME); + ctx->mcd_filename = strdup(absolute_filename); + }*/ + + if(!ctx->features_model_filename){ + strcpy(absolute_filename, ctx->maca_data_path); + strcat(absolute_filename, DEFAULT_FEATURES_MODEL_FILENAME); + ctx->features_model_filename = strdup(absolute_filename); + } + + if(ctx->verbose){ + fprintf(stderr, "perc_model_filename = %s\n", ctx->perc_model_filename); + fprintf(stderr, "vocabs_filename = %s\n", ctx->vocabs_filename); + fprintf(stderr, "mcd_filename = %s\n", ctx->mcd_filename); + fprintf(stderr, "perc_features_model_filename = %s\n", ctx->features_model_filename); + } +} + + + +void print_word_buffer(config *c, dico *dico_labels, mcd *mcd_struct) +{ + int i; + word *w; + char *label; + char *buffer = NULL; + char *token = NULL; + int col_nb = 0; + + + for(i=0; i < config_get_buffer(c)->nbelem; i++){ + w = word_buffer_get_word_n(config_get_buffer(c), i); + + if((mcd_get_gov_col(mcd_struct) == -1) + && (mcd_get_label_col(mcd_struct) == -1) + && (mcd_get_sent_seg_col(mcd_struct) == -1)){ + printf("%s\t", word_get_input(w)); + printf("%d\t", word_get_gov(w)); + label = (word_get_label(w) == -1)? NULL : dico_int2string(dico_labels, word_get_label(w)); + if(label != NULL) + printf("%s\t", label) ; + else + printf("_\t"); + if(word_get_sent_seg(w) == 1) + printf("1\n") ; + else + printf("0\n"); + } + else{ + buffer = strdup(w->input); + token = strtok(buffer, "\t"); + col_nb = 0; + while(token){ + if(col_nb != 0) printf("\t"); + if(col_nb == mcd_get_gov_col(mcd_struct)){ + printf("%d", word_get_gov(w)); + } + else + if(col_nb == mcd_get_label_col(mcd_struct)){ + label = (word_get_label(w) == -1)? NULL : dico_int2string(dico_labels, word_get_label(w)); + if(label != NULL) + printf("%s", label) ; + else + printf("_"); + } + else + if(col_nb == mcd_get_sent_seg_col(mcd_struct)){ + if(word_get_sent_seg(w) == 1) + printf("1") ; + else + printf("0"); + } + else{ + word_print_col_n(stdout, w, col_nb); + } + col_nb++; + token = strtok(NULL, "\t"); + } + if((col_nb <= mcd_get_gov_col(mcd_struct)) || (mcd_get_gov_col(mcd_struct) == -1)){ + printf("\t%d", word_get_gov(w)); + } + if((col_nb <= mcd_get_label_col(mcd_struct)) || (mcd_get_label_col(mcd_struct) == -1)){ + label = (word_get_label(w) == -1)? NULL : dico_int2string(dico_labels, word_get_label(w)); + if(label != NULL) + printf("\t%s", label) ; + else + printf("\t_"); + } + if((col_nb <= mcd_get_sent_seg_col(mcd_struct)) || (mcd_get_sent_seg_col(mcd_struct) == -1)){ + if(word_get_sent_seg(w) == 1) + printf("\t1") ; + else + printf("\t0"); + } + printf("\n"); + free(buffer); + } + } +} + +std::vector<Matrix<float> > config2keras_vec(feat_model *fm, config *c) +{ + int i; + feat_desc *fd; + int feat_value; + std::vector<Matrix<float> > keras_vec(fm->nbelem, Matrix<float>(1, 1)); + for(i=0; i < fm->nbelem; i++){ + /* fm must be exclusively composed of simple features */ + /* if this is not the case, the first feature of a complex feature is take into account */ + fd = fm->array[i]; + feat_value = fd->array[0]->fct(c); + keras_vec[i][0][0] = feat_value + 1; + // printf("feature %d = %d\n", i, (int)keras_vec[i][0][0]); + } + return keras_vec; +} + +void simple_decoder_parser_arc_eager_nn(context *ctx, Model &model) +{ + FILE *f = (ctx->input_filename)? myfopen(ctx->input_filename, "r") : stdin; + feature_table *ft = feature_table_load(ctx->perc_model_filename, ctx->verbose); + int root_label; + int mvt_code; + int mvt_type; + int mvt_label; + feat_vec *fv = feat_vec_new(10); + config *c = NULL; + int result; + std::vector<Matrix<float> > keras_vec; + + root_label = dico_string2int(ctx->dico_labels, ctx->root_label); + if(root_label == -1) root_label = 0; + + c = config_new(f, ctx->mcd_struct, 5); + while(!config_is_terminal(c)){ + + if(ctx->debug_mode){ + fprintf(stdout, "***********************************\n"); + config_print(stdout, c); + } + /* forced EOS (the element on the top of the stack is eos, but the preceding movement is not MVT_PARSER_EOS */ + /* which means that the top of the stack got its eos status from input */ + /* force the parser to finish parsing the sentence (perform all pending reduce actions) and determine root of the sentence */ + + if((word_get_sent_seg(stack_top(config_get_stack(c))) == 1) && (mvt_get_type(mvt_stack_top(config_get_history(c))) != MVT_PARSER_EOS)){ + word_set_sent_seg(stack_top(config_get_stack(c)), -1); + movement_parser_eos(c); + while(movement_parser_reduce(c)); + while(movement_parser_root(c, root_label)); + if(ctx->debug_mode) printf("force EOS\n"); + } + + /* normal behaviour, ask classifier what is the next movement to do and do it */ + else{ + keras_vec = config2keras_vec(ctx->features_model, c); + std::vector<Matrix<float> > y = model.forward(keras_vec); + Matrix<float> argmax = y[0].argmax(); + mvt_code = argmax.at(0, 0); + + // printf("mvt code = %d\n", mvt_code); + + /* config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE); */ + /* mvt_code = feature_table_argmax(fv, ft, &max); */ + + mvt_type = movement_parser_type(mvt_code); + mvt_label = movement_parser_label(mvt_code); + + result = 0; + switch(mvt_type){ + case MVT_PARSER_LEFT : + result = movement_parser_left_arc(c, mvt_label); + break; + case MVT_PARSER_RIGHT: + result = movement_parser_right_arc(c, mvt_label); + break; + case MVT_PARSER_REDUCE: + result = movement_parser_reduce(c); + break; + case MVT_PARSER_ROOT: + result = movement_parser_root(c, root_label); + break; + case MVT_PARSER_EOS: + result = movement_parser_eos(c); + break; + case MVT_PARSER_SHIFT: + result = movement_parser_shift(c); + } + + if(result == 0){ + if(ctx->debug_mode) fprintf(stdout, "WARNING : movement cannot be executed doing a SHIFT instead !\n"); + result = movement_parser_shift(c); + if(result == 0){ /* SHIFT failed no more words to read, let's get out of here ! */ + if(ctx->debug_mode) fprintf(stdout, "WARNING : cannot exectue a SHIFT emptying stack !\n"); + while(!stack_is_empty(config_get_stack(c))) + movement_parser_root(c, root_label); + } + } + } + } + + if(!ctx->trace_mode) + print_word_buffer(c, ctx->dico_labels, ctx->mcd_struct); + + config_free(c); + feat_vec_free(fv); + feature_table_free(ft); + if(ctx->input_filename) + fclose(f); +} + + + +int main(int argc, char *argv[]) +{ + context *ctx; + + ctx = context_read_options(argc, argv); + maca_trans_parser_nn_check_options(ctx); + + set_linguistic_resources_filenames_parser(ctx); + Model model = Model::load(ctx->json_filename, ctx->dnn_model_filename); + + ctx->features_model = feat_model_read(ctx->features_model_filename, feat_lib_build(), ctx->verbose); + ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio); + + mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose); + + ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL"); + + if(ctx->dico_labels == NULL){ + fprintf(stderr, "cannot find label names\n"); + return 1; + } + + simple_decoder_parser_arc_eager_nn(ctx, model); + + context_free(ctx); + return 0; +} + diff --git a/maca_trans_parser/src/matrix.h b/maca_trans_parser/src/matrix.h new file mode 100644 index 0000000000000000000000000000000000000000..67c839344bcbf2df29cb89524a58ac714ae4f1c1 --- /dev/null +++ b/maca_trans_parser/src/matrix.h @@ -0,0 +1,385 @@ +#pragma once + +//#define USE_CBLAS +#include <cstdio> +#include <cassert> +#include <cmath> +#include <cstdlib> +#include <cstring> +#include <iostream> + +#ifdef USE_CBLAS +extern "C" { +#include <cblas.h> +} +#endif + +#define error(...) fprintf(stderr, __VA_ARGS__); + +typedef enum { + BOTH, ROWS, COLS +} Axis; + +template <class T> +class MatrixRow { + private: + T* values; + int cols; + public: + MatrixRow(T* _values, int _cols) : values(_values), cols(_cols) { } + T& operator[](int y) { + return *(values + y); + } + MatrixRow& operator=(const MatrixRow& other) { + assert(cols == other.cols); + memcpy(values, other.values, cols * sizeof(T)); + return *this; + } + MatrixRow slice(int offset, int length) const { + assert(offset + length <= cols); + return MatrixRow(values + offset, length); + } + +}; + +template <class T> +class Matrix { + public: + int rows; + int cols; + private: + T* values; + public: + Matrix() : rows(0), cols(0), values(NULL) { } + Matrix(int _rows, int _cols) : rows(_rows), cols(_cols) { + if(rows * cols > 0) { + values = new T[rows * cols]; + } else { + values = NULL; + } + } + ~Matrix() { + if(values != NULL) delete[] values; + } + Matrix<T>(const Matrix<T>& other) : rows(other.rows), cols(other.cols) { + if(rows * cols > 0) { + values = new T[rows * cols]; + memcpy(values, other.values, sizeof(T) * rows * cols); + } else { + values = NULL; + } + } + + void info(const char* name) const { + printf("%s: %dx%d\n", name, rows, cols); + } + + void print(const char* name = NULL, const char* format = "%9f") const { + if(name != NULL) info(name); + for(int i = 0; i < rows; i++) { + for(int j = 0; j < cols; j++) { + printf(format, at(i, j)); + printf(" "); + } + printf("\n"); + } + } + + static Matrix zeros(int rows, int cols) { + Matrix result(rows, cols); + bzero(result.values, sizeof(T) * rows * cols); + return result; + } + + static Matrix ones(int rows, int cols) { + Matrix result(rows, cols); + for(int i = 0; i < rows * cols; i++) result.values[i] = 1; + return result; + } + + static Matrix rand(int rows, int cols) { + Matrix result(rows, cols); + for(int i = 0; i < rows * cols; i++) result.values[i] = (2.0f * std::rand()) / RAND_MAX - 1.0f; + return result; + } + + static Matrix zeros_like(const Matrix<T>& other) { + return zeros(other.rows, other.cols); + } + + static Matrix ones_like(const Matrix<T>& other) { + return ones(other.rows, other.cols); + } + + Matrix<T> broadcast(const Matrix<T>& other) const { + if(rows == other.rows && cols == other.cols) return *this; + assert(other.cols % cols == 0 && other.rows % rows == 0); + Matrix result(other.rows, other.cols); + for(int i = 0; i < other.rows; i++) { + for(int j = 0; j < other.cols; j++) { + result.at(i, j) = at(i % rows, j % cols); + } + } + return result; + } + + Matrix<T> slice(int start_row, int end_row) const { + return slice(start_row, end_row, 0, cols); + } + Matrix<T> slice(int start_row, int end_row, int start_col, int end_col) const { + Matrix<T> result(end_row - start_row, end_col - start_col); + for(int i = 0; i < end_row - start_row; i++) { + result[i] = (*this)[i + start_row].slice(start_col, end_col); + } + return result; + } + + /*Matrix<T>& operator=(const T& value) { + for(int i = 0; i < rows * cols; i++) values[i] = value; + }*/ + + const Matrix<T>& operator=(const Matrix<T>& other) { + if(cols != other.cols || rows != other.rows) { + delete[] values; + cols = other.cols; + rows = other.rows; + values = new T[cols * rows]; + } + memcpy(values, other.values, sizeof(T) * rows * cols); + return *this; + } + + MatrixRow<T> operator[](int x) { + return MatrixRow<T>(values + x * cols, cols); + } + const MatrixRow<T> operator[](int x) const { + return MatrixRow<T>(values + x * cols, cols); + } + T& at(int x, int y) { + return values[x * cols + y]; + } + const T& at(int x, int y) const { + return values[x * cols + y]; + } + // matrix-scalar operations + Matrix<T> operator-(T a) const { + assert(a.rows == rows && a.cols == cols); + Matrix<T> result(rows, cols); + for(int i = 0; i < rows * cols; i++) result.values[i] = values[i] - a; + return result; + } + Matrix<T> operator+(T a) const { + Matrix<T> result(rows, cols); + for(int i = 0; i < rows * cols; i++) result.values[i] = a + values[i]; + return result; + } + Matrix<T> operator*(T a) const { + assert(a.rows == rows && a.cols == cols); + Matrix<T> result(rows, cols); + for(int i = 0; i < rows * cols; i++) result.values[i] = a * values[i]; + return result; + } + Matrix<T> operator/(T a) const { + assert(a.rows == rows && a.cols == cols); + Matrix<T> result(rows, cols); + for(int i = 0; i < rows * cols; i++) result.values[i] = values[i] / a; + return result; + } + // matrix-matrix operations + Matrix<T> operator-(const Matrix<T>& other) const { + Matrix<T> a = other.broadcast(*this); + Matrix<T> result(rows, cols); + for(int i = 0; i < rows * cols; i++) result.values[i] = values[i] - a.values[i]; + return result; + } + Matrix<T> operator+(const Matrix<T>& other) const { + Matrix<T> a = other.broadcast(*this); + Matrix<T> result(rows, cols); + for(int i = 0; i < rows * cols; i++) result.values[i] = a.values[i] + values[i]; + return result; + } + Matrix<T> operator*(const Matrix<T>& other) const { + Matrix<T> a = other.broadcast(*this); + Matrix<T> result(rows, cols); + for(int i = 0; i < rows * cols; i++) result.values[i] = a.values[i] * values[i]; + return result; + } + Matrix<T> operator/(const Matrix<T>& other) const { + Matrix<T> a = other.broadcast(*this); + Matrix<T> result(rows, cols); + for(int i = 0; i < rows * cols; i++) result.values[i] = values[i] / a.values[i]; + return result; + } + + Matrix<T> dot(const Matrix<T>& a) const { + assert(a.rows == cols); + Matrix<T> result = zeros(rows, a.cols); +#ifdef USE_CBLAS + cblas_sgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, rows, a.cols, cols, 1, values, cols, a.values, a.cols, 1, result.values, result.cols); +#else + for(int i = 0; i < rows; i++) + for(int j = 0; j < a.cols; j++) + for(int k = 0; k < cols; k++) + result.at(i, j) += at(i, k) * a.at(k, j); + //result.values[i * result.cols + j] += values[i * cols + k] * a.values[k * a.cols + j]; +#endif + return result; + } + Matrix<T> transpose() const { + Matrix<T> result(cols, rows); + for(int i = 0; i < cols; i++) + for(int j = 0; j < rows; j++) + result.values[i * result.rows + j] = values[j * cols + i]; + return result; + } + + Matrix<T> sum(const Axis axis=BOTH) const { + if(axis == BOTH) { + Matrix<T> result = zeros(1, 1); + for(int i = 0; i < rows * cols; i++) result[0][0] += values[i]; + return result; + } else if(axis == ROWS) { + Matrix<T> result = zeros(1, cols); + for(int i = 0; i < rows; i++) + for(int j = 0; j < cols; j++) + result.values[j] += values[i * cols + j]; + return result; + } else if(axis == COLS) { + Matrix<T> result = zeros(rows, 1); + for(int i = 0; i < rows; i++) + for(int j = 0; j < cols; j++) + result.values[i] += values[i * cols + j]; + return result; + } + std::cerr << "ERROR: unsupported axis for Matrix::sum\n"; + exit(1); + return zeros(0, 0); + } + Matrix<T> max(const Axis axis=BOTH) const { + if(axis == BOTH) { + Matrix<T> result = zeros(1, 1); + for(int i = 0; i < rows * cols; i++) if(i == 0 || result.values[0] < values[i]) result.values[0] = values[i]; + return result; + } else if(axis == ROWS) { + Matrix<T> result = zeros(1, cols); + for(int i = 0; i < rows; i++) + for(int j = 0; j < cols; j++) + if(j == 0 || result.values[j] < values[i * cols + j]) result.values[j] = values[i * cols + j]; + return result; + } else if(axis == COLS) { + Matrix<T> result = zeros(rows, 1); + for(int i = 0; i < rows; i++) + for(int j = 0; j < cols; j++) + if(i == 0 || result.values[i] < values[i * cols + j]) result.values[i] = values[i * cols + j]; + return result; + } + std::cerr << "ERROR: unsupported axis for Matrix::max\n"; + exit(1); + return zeros(0, 0); + } + Matrix<T> min(const Axis axis=BOTH) const { + if(axis == BOTH) { + Matrix<T> result = zeros(1, 1); + for(int i = 0; i < rows * cols; i++) if(i == 0 || result.values[0] > values[i]) result.values[0] = values[i]; + return result; + } else if(axis == ROWS) { + Matrix<T> result = zeros(1, cols); + for(int i = 0; i < rows; i++) + for(int j = 0; j < cols; j++) + if(j == 0 || result.values[j] > values[i * cols + j]) result.values[j] = values[i * cols + j]; + return result; + } else if(axis == COLS) { + Matrix<T> result = zeros(rows, 1); + for(int i = 0; i < rows; i++) + for(int j = 0; j < cols; j++) + if(i == 0 || result.values[i] > values[i * cols + j]) result.values[i] = values[i * cols + j]; + return result; + } + std::cerr << "ERROR: unsupported axis for Matrix::min\n"; + exit(1); + return zeros(0, 0); + } + Matrix<T> argmax(const Axis axis=COLS) const { + if(axis == ROWS) { + Matrix<T> result = zeros(1, cols); + Matrix<T> max = zeros(1, cols); + for(int i = 0; i < rows; i++) { + for(int j = 0; j < cols; j++) { + if(i == 0 || max.at(0, j) < at(i, j)) { + max.at(0, j) = at(i, j); + result.at(0, j) = i; + } + } + } + return result; + } else if(axis == COLS) { + Matrix<T> result = zeros(rows, 1); + for(int i = 0; i < rows; i++) { + double max = 0; + for(int j = 0; j < cols; j++) { + if(j == 0 || max < at(i, j)) { + max = at(i, j); + result.at(i, 0) = j; + } + } + } + return result; + } + std::cerr << "ERROR: unsupported axis for Matrix::argmax\n"; + return zeros(0, 0); + } + + void load(FILE* fp) { + if(fread(&rows, sizeof(int), 1, fp) != 1) error("loading number of rows from fp\n"); + if(fread(&cols, sizeof(int), 1, fp) != 1) error("loading number of cols from fp\n"); + values = new T[rows * cols]; + if(fread(values, sizeof(T), rows * cols, fp) != (size_t) (rows * cols)) { + error("loading %dx%d matrix from fp\n", rows, cols); + } + } + + void save(FILE* fp) { + fwrite(&rows, sizeof(int), 1, fp); + fwrite(&cols, sizeof(int), 1, fp); + if(fwrite(values, sizeof(T), rows * cols, fp) != (size_t) (rows * cols)) { + error("saving %dx%d matrix to fp\n", rows, cols); + } + } + + Matrix<T> apply(T (*function)(T)) const { + Matrix<T> output(rows, cols); + for(int i = 0; i < rows * cols; i++) output.values[i] = function(values[i]); + return output; + } + + class Function { + public: + static T sigmoid(const T a) { return std::tanh(a * 0.5) * 0.5 + 0.5; } + static T hard_sigmoid(const T a) { T tmp = a * 0.2 + 0.5; return tmp < 0 ? 0 : tmp > 1 ? 1 : tmp; } + static T identity(const T a) { return a; } + static T tanh(const T a) { return std::tanh(a); } + static T exp(const T a) { return std::exp(a); } + static T log(const T a) { return std::log(a); } + static T relu(const T a) { return a > 0 ? a : 0; } + }; + + static Matrix<T> sigmoid(const Matrix<T>& x) { return x.apply(Function::sigmoid); } + static Matrix<T> hard_sigmoid(const Matrix<T>& x) { return x.apply(Function::hard_sigmoid); } + static Matrix<T> identity(const Matrix<T>& x) { return x.apply(Function::identity); } + static Matrix<T> tanh(const Matrix<T>& x) { return x.apply(Function::tanh); } + static Matrix<T> exp(const Matrix<T>& x) { return x.apply(Function::exp); } + static Matrix<T> log(const Matrix<T>& x) { return x.apply(Function::log); } + static Matrix<T> relu(const Matrix<T>& x) { return x.apply(Function::relu); } + static Matrix<T> softmax(const Matrix<T>& x) { + Matrix<T> r = Matrix::exp(x - x.max(COLS)); + return r / r.sum(COLS); + } + /*Matrix<T> operator=(Matrix<float> x) { + Matrix<T> result(x.rows, x.cols); + for(int i = 0; i < x.rows; i++) + for(int j = 0; j < y.cols; j++) + result.at(i, j) = (T) x.at(i, j); + return result; + }*/ +}; + diff --git a/maca_trans_parser/src/mvt.c b/maca_trans_parser/src/mvt.c index fd5c549fc2561777112c2d0714a9406d5847b119..dcedc2e895509bfd27c903a178d16e5574f66e51 100644 --- a/maca_trans_parser/src/mvt.c +++ b/maca_trans_parser/src/mvt.c @@ -5,7 +5,7 @@ mvt *mvt_new(int type, word *gov, word *dep) { - mvt *m = memalloc(sizeof(mvt)); + mvt *m = (mvt *)memalloc(sizeof(mvt)); m->type = type; m->gov = gov; m->dep = dep; diff --git a/maca_trans_parser/src/test_mvt_stack.c b/maca_trans_parser/src/test_mvt_stack.c index 711a3a49ffba93a1461c18af5ee6acf36ac63dee..110fbd914e476c0937813f0c42181ff7772951dd 100644 --- a/maca_trans_parser/src/test_mvt_stack.c +++ b/maca_trans_parser/src/test_mvt_stack.c @@ -11,9 +11,9 @@ int main(int argc, char *argv[]) int i; for(i=0; i < 10; i++){ - w1 = word_new("1"); + w1 = word_new((char *)"1"); word_set_index(w1, i + 1); - w2 = word_new("1"); + w2 = word_new((char *)"1"); word_set_index(w2, i + 2); mvt_stack_push(ms, mvt_new(i, w1, w2)); } diff --git a/perceptron/lib/src/cf_file.c b/perceptron/lib/src/cf_file.c index 9afcf344fe2a8934f5dd0b71f52d968552a1c0ac..0114a3dce03c55e2329a0ad6f75f1a77a16bf424 100644 --- a/perceptron/lib/src/cf_file.c +++ b/perceptron/lib/src/cf_file.c @@ -10,7 +10,7 @@ int *cff_max_value_per_column(char *cff_filename, int n) char *token; int i; int col; - int *max_array = memalloc(n * sizeof(int)); + int *max_array = (int *)memalloc(n * sizeof(int)); for(i = 0; i < n; i++){ max_array[i] = 0; }