diff --git a/CMakeLists.txt b/CMakeLists.txt index bbac66d12c06123bca8c9e1ae63fbefb5128f1de..903cc582a74196aabbb3bc58fbf6b9d50fbffb1d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,7 +15,7 @@ SET( CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lm -lopenblas" ) if (${CMAKE_C_COMPILER_VERSION} VERSION_LESS 5.3) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=gnu11") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=gnu11 -ggdb") # better, but needs CMake >= 3.0 #set_property(GLOBAL PROPERTY CXX_STANDARD 11) #set_property(GLOBAL PROPERTY C_STANDARD 11) diff --git a/maca_common/include/word_buffer.h b/maca_common/include/word_buffer.h index 9efbaf09f8e90422907c4a85954fcc6cd62fb0ec..f26081b9c7d4de3f0bf15a9a03cb5ee69c937511 100644 --- a/maca_common/include/word_buffer.h +++ b/maca_common/include/word_buffer.h @@ -45,6 +45,8 @@ typedef struct { word_buffer *word_buffer_new(FILE *input_file, mcd *mcd_struct, int lookahead); void word_buffer_free(word_buffer *wb); int word_buffer_add(word_buffer *wb, word *w); +void word_buffer_insert(word_buffer *wb, word *w, int index); +void word_buffer_rm(word_buffer *wb, int index); word* word_buffer_get_word_relative(word_buffer *wb, int dist); word* word_buffer_get_word_n(word_buffer *wb, int n); int word_buffer_read_next_word(word_buffer *wb); diff --git a/maca_common/src/json_parser.c b/maca_common/src/json_parser.c index f1b7fd4dd7805950826aecb5162ac66c86ed2459..046b401f7b0772ec73d65c40bc1b332b7eadf8cf 100644 --- a/maca_common/src/json_parser.c +++ b/maca_common/src/json_parser.c @@ -207,7 +207,7 @@ void consommer(json_parser_ctx *ctx, int c ) { ctx->uc = yylex(ctx); /* consommer le caractère */ } else - erreur(ctx, (char *) "erreure lexicale" ); + erreur(ctx, (char *) "erreur lexicale" ); } /*-------------------------------------------------------------------------*/ diff --git a/maca_common/src/word_buffer.c b/maca_common/src/word_buffer.c index 971c03781657755f150f63e9e3b6e15b1336558a..4a20ea662d286ed922bbefd4f955db320b0c4d3a 100644 --- a/maca_common/src/word_buffer.c +++ b/maca_common/src/word_buffer.c @@ -72,6 +72,66 @@ void word_buffer_free(word_buffer *wb) free(wb); } +/* remove word at position index */ +void word_buffer_rm(word_buffer *wb, int index) +{ + int i; + if((index < 0) || (index >= wb->nbelem)) { + fprintf(stderr, "cannot remove word %d, index out of range\n", index); + return; + } + /* check if word at index has daughters */ + for(i=0; i < wb->nbelem; i++){ + if(word_get_gov_index(word_buffer_get_word_n(wb, i)) == index){ + fprintf(stderr, "cannot remove word %d, it has at least one daughter", index); + return; + } + } + /* decrease dependencies length whenever gov and dep are on different sides of index */ + for(int dep_index=0; dep_index < wb->nbelem; dep_index++){ + word *dep = word_buffer_get_word_n(wb, dep_index); + int gov_index = word_get_gov_index(dep); + if((dep_index < index && gov_index > index) + || (dep_index > index && gov_index < index)){ + word_set_gov(dep, word_get_gov(dep) - 1); + } + } + word_free(wb->array[index]); + wb->array[index] = NULL; + for(i=index+1; i < wb->nbelem; i++){ + wb->array[i-1] = wb->array[i]; + } + wb->nbelem--; +} + +/* insert word w at position index */ +void word_buffer_insert(word_buffer *wb, word *w, int index) +{ + if(wb->nbelem == wb->size -1){ + wb->size = 2 * (wb->size + 1); + wb->array = (word **)realloc(wb->array, wb->size * sizeof(word *)); + } + + /* increase dependencies length whenever gov and dep are on different sides of index */ + for(int dep_index=0; dep_index < wb->nbelem; dep_index++){ + word *dep = word_buffer_get_word_n(wb, dep_index); + int gov_index = word_get_gov_index(dep); + if((dep_index < index && gov_index >= index) + || (dep_index >= index && gov_index < index)){ + word_set_gov(dep, word_get_gov(dep) + 1); + } + } + + for(int i=wb->nbelem; i >= index; i--){ + wb->array[i] = wb->array[i-1]; + } + wb->array[index] = w; + wb->nbelem++; + +} + + + int word_buffer_add(word_buffer *wb, word *w) { if(wb->nbelem == wb->size -1){ diff --git a/maca_tools/src/json2mcf.c b/maca_tools/src/json2mcf.c index a5bf8758cb272bd96122de1ae67c5acabcd3da1a..21e481c6a8a8b27a609f8ec456c9751999831ec1 100644 --- a/maca_tools/src/json2mcf.c +++ b/maca_tools/src/json2mcf.c @@ -238,7 +238,7 @@ void update_segment(word_buffer *wb, int start, int end, char *label, char *stat mcd_struct = word_buffer_get_mcd(wb); d = mcd_struct->dico_array[mcd_get_pos_col(mcd_struct)]; - + if(status_lab && !strcmp(status_lab, "G")){ fprintf(stderr, "updating label of segment [%d-%d] with \"%s\"\n", start, end, label); w = word_buffer_get_word_n(wb, offset + start); @@ -409,14 +409,44 @@ void check_token(json_attr_val *avl, word_buffer *wb, int offset) int id; char *form_json = NULL; char *form_mcf = NULL; + char *status = NULL; json_attr_val *av; word *w = NULL; for(av = avl; av != NULL; av = av->next){ // printf("attr = %s\n", av->attr); if(!strcmp(av->attr, "id")){id = (int)(av->val->u.number); continue;} if(!strcmp(av->attr, "word")){form_json = av->val->u.string; continue;} + if(!strcmp(av->attr, "status")){status = av->val->u.string; continue;} + + } + /* ajouté le 24 juillet 2020 par Alexis */ + if(status != NULL && !strcmp(status, "inserted")){ + int form_column = wb->mcd_struct->wf2col[MCD_WF_FORM]; + fprintf(stderr, "inserting token at position %d\n", id); + word *w = word_new(NULL); + // word_set_form(w, form_json); + int code = dico_add(wb->mcd_struct->dico_array[form_column], form_json); + w->wf_array[MCD_WF_FORM] = dico_add(wb->mcd_struct->dico_array[form_column], form_json); + w->form = strdup(form_json); + word_buffer_insert(wb, w, id); + } + + /* ajouté le 24 juillet 2020 par Alexis */ + if(status != NULL && !strcmp(status, "deleted")){ + fprintf(stderr, "deleting token at position %d\n", id); + word_buffer_rm(wb, id); + } + /* ajouté le 24 juillet 2020 par Alexis */ + if(status != NULL && !strcmp(status, "modified")){ + fprintf(stderr, "modifying token at position %d\n", id); + int form_column = wb->mcd_struct->wf2col[MCD_WF_FORM]; + int code = dico_add(wb->mcd_struct->dico_array[form_column], form_json); + word *w = word_buffer_get_word_n(wb, id); + w->wf_array[MCD_WF_FORM] = dico_add(wb->mcd_struct->dico_array[form_column], form_json); + w->form = strdup(form_json); } + w = word_buffer_get_word_n(wb, id); form_mcf = w->form; fprintf(stderr, "id : %d \t json : %s \t mcf : %s\n", id, form_json, form_mcf);