From 0a1411113ba23741040a600aaab40087f499bdb8 Mon Sep 17 00:00:00 2001 From: Alexis Nasr <alexis.nasr@lif.univ-mrs.fr> Date: Fri, 24 Jul 2020 14:09:07 +0200 Subject: [PATCH] =?UTF-8?q?ajout=20de=20fonctions=20pour=20=C3=A9liminer?= =?UTF-8?q?=20ou=20ajouter=20un=20mot=20dans=20un=20mcf,=20utlisation=20de?= =?UTF-8?q?=20ces=20fonctions=20pour=20la=20conversion=20json2mcf?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 2 +- maca_common/include/word_buffer.h | 2 ++ maca_common/src/json_parser.c | 2 +- maca_common/src/word_buffer.c | 60 +++++++++++++++++++++++++++++++ maca_tools/src/json2mcf.c | 32 ++++++++++++++++- 5 files changed, 95 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bbac66d..903cc58 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,7 +15,7 @@ SET( CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lm -lopenblas" ) if (${CMAKE_C_COMPILER_VERSION} VERSION_LESS 5.3) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=gnu11") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=gnu11 -ggdb") # better, but needs CMake >= 3.0 #set_property(GLOBAL PROPERTY CXX_STANDARD 11) #set_property(GLOBAL PROPERTY C_STANDARD 11) diff --git a/maca_common/include/word_buffer.h b/maca_common/include/word_buffer.h index 9efbaf0..f26081b 100644 --- a/maca_common/include/word_buffer.h +++ b/maca_common/include/word_buffer.h @@ -45,6 +45,8 @@ typedef struct { word_buffer *word_buffer_new(FILE *input_file, mcd *mcd_struct, int lookahead); void word_buffer_free(word_buffer *wb); int word_buffer_add(word_buffer *wb, word *w); +void word_buffer_insert(word_buffer *wb, word *w, int index); +void word_buffer_rm(word_buffer *wb, int index); word* word_buffer_get_word_relative(word_buffer *wb, int dist); word* word_buffer_get_word_n(word_buffer *wb, int n); int word_buffer_read_next_word(word_buffer *wb); diff --git a/maca_common/src/json_parser.c b/maca_common/src/json_parser.c index f1b7fd4..046b401 100644 --- a/maca_common/src/json_parser.c +++ b/maca_common/src/json_parser.c @@ -207,7 +207,7 @@ void consommer(json_parser_ctx *ctx, int c ) { ctx->uc = yylex(ctx); /* consommer le caractère */ } else - erreur(ctx, (char *) "erreure lexicale" ); + erreur(ctx, (char *) "erreur lexicale" ); } /*-------------------------------------------------------------------------*/ diff --git a/maca_common/src/word_buffer.c b/maca_common/src/word_buffer.c index 971c037..4a20ea6 100644 --- a/maca_common/src/word_buffer.c +++ b/maca_common/src/word_buffer.c @@ -72,6 +72,66 @@ void word_buffer_free(word_buffer *wb) free(wb); } +/* remove word at position index */ +void word_buffer_rm(word_buffer *wb, int index) +{ + int i; + if((index < 0) || (index >= wb->nbelem)) { + fprintf(stderr, "cannot remove word %d, index out of range\n", index); + return; + } + /* check if word at index has daughters */ + for(i=0; i < wb->nbelem; i++){ + if(word_get_gov_index(word_buffer_get_word_n(wb, i)) == index){ + fprintf(stderr, "cannot remove word %d, it has at least one daughter", index); + return; + } + } + /* decrease dependencies length whenever gov and dep are on different sides of index */ + for(int dep_index=0; dep_index < wb->nbelem; dep_index++){ + word *dep = word_buffer_get_word_n(wb, dep_index); + int gov_index = word_get_gov_index(dep); + if((dep_index < index && gov_index > index) + || (dep_index > index && gov_index < index)){ + word_set_gov(dep, word_get_gov(dep) - 1); + } + } + word_free(wb->array[index]); + wb->array[index] = NULL; + for(i=index+1; i < wb->nbelem; i++){ + wb->array[i-1] = wb->array[i]; + } + wb->nbelem--; +} + +/* insert word w at position index */ +void word_buffer_insert(word_buffer *wb, word *w, int index) +{ + if(wb->nbelem == wb->size -1){ + wb->size = 2 * (wb->size + 1); + wb->array = (word **)realloc(wb->array, wb->size * sizeof(word *)); + } + + /* increase dependencies length whenever gov and dep are on different sides of index */ + for(int dep_index=0; dep_index < wb->nbelem; dep_index++){ + word *dep = word_buffer_get_word_n(wb, dep_index); + int gov_index = word_get_gov_index(dep); + if((dep_index < index && gov_index >= index) + || (dep_index >= index && gov_index < index)){ + word_set_gov(dep, word_get_gov(dep) + 1); + } + } + + for(int i=wb->nbelem; i >= index; i--){ + wb->array[i] = wb->array[i-1]; + } + wb->array[index] = w; + wb->nbelem++; + +} + + + int word_buffer_add(word_buffer *wb, word *w) { if(wb->nbelem == wb->size -1){ diff --git a/maca_tools/src/json2mcf.c b/maca_tools/src/json2mcf.c index a5bf875..21e481c 100644 --- a/maca_tools/src/json2mcf.c +++ b/maca_tools/src/json2mcf.c @@ -238,7 +238,7 @@ void update_segment(word_buffer *wb, int start, int end, char *label, char *stat mcd_struct = word_buffer_get_mcd(wb); d = mcd_struct->dico_array[mcd_get_pos_col(mcd_struct)]; - + if(status_lab && !strcmp(status_lab, "G")){ fprintf(stderr, "updating label of segment [%d-%d] with \"%s\"\n", start, end, label); w = word_buffer_get_word_n(wb, offset + start); @@ -409,14 +409,44 @@ void check_token(json_attr_val *avl, word_buffer *wb, int offset) int id; char *form_json = NULL; char *form_mcf = NULL; + char *status = NULL; json_attr_val *av; word *w = NULL; for(av = avl; av != NULL; av = av->next){ // printf("attr = %s\n", av->attr); if(!strcmp(av->attr, "id")){id = (int)(av->val->u.number); continue;} if(!strcmp(av->attr, "word")){form_json = av->val->u.string; continue;} + if(!strcmp(av->attr, "status")){status = av->val->u.string; continue;} + + } + /* ajouté le 24 juillet 2020 par Alexis */ + if(status != NULL && !strcmp(status, "inserted")){ + int form_column = wb->mcd_struct->wf2col[MCD_WF_FORM]; + fprintf(stderr, "inserting token at position %d\n", id); + word *w = word_new(NULL); + // word_set_form(w, form_json); + int code = dico_add(wb->mcd_struct->dico_array[form_column], form_json); + w->wf_array[MCD_WF_FORM] = dico_add(wb->mcd_struct->dico_array[form_column], form_json); + w->form = strdup(form_json); + word_buffer_insert(wb, w, id); + } + + /* ajouté le 24 juillet 2020 par Alexis */ + if(status != NULL && !strcmp(status, "deleted")){ + fprintf(stderr, "deleting token at position %d\n", id); + word_buffer_rm(wb, id); + } + /* ajouté le 24 juillet 2020 par Alexis */ + if(status != NULL && !strcmp(status, "modified")){ + fprintf(stderr, "modifying token at position %d\n", id); + int form_column = wb->mcd_struct->wf2col[MCD_WF_FORM]; + int code = dico_add(wb->mcd_struct->dico_array[form_column], form_json); + word *w = word_buffer_get_word_n(wb, id); + w->wf_array[MCD_WF_FORM] = dico_add(wb->mcd_struct->dico_array[form_column], form_json); + w->form = strdup(form_json); } + w = word_buffer_get_word_n(wb, id); form_mcf = w->form; fprintf(stderr, "id : %d \t json : %s \t mcf : %s\n", id, form_json, form_mcf); -- GitLab