From 0a1411113ba23741040a600aaab40087f499bdb8 Mon Sep 17 00:00:00 2001
From: Alexis Nasr <alexis.nasr@lif.univ-mrs.fr>
Date: Fri, 24 Jul 2020 14:09:07 +0200
Subject: [PATCH] =?UTF-8?q?ajout=20de=20fonctions=20pour=20=C3=A9liminer?=
 =?UTF-8?q?=20ou=20ajouter=20un=20mot=20dans=20un=20mcf,=20utlisation=20de?=
 =?UTF-8?q?=20ces=20fonctions=20pour=20la=20conversion=20json2mcf?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 CMakeLists.txt                    |  2 +-
 maca_common/include/word_buffer.h |  2 ++
 maca_common/src/json_parser.c     |  2 +-
 maca_common/src/word_buffer.c     | 60 +++++++++++++++++++++++++++++++
 maca_tools/src/json2mcf.c         | 32 ++++++++++++++++-
 5 files changed, 95 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index bbac66d..903cc58 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,7 +15,7 @@ SET( CMAKE_EXE_LINKER_FLAGS  "${CMAKE_EXE_LINKER_FLAGS} -lm -lopenblas" )
 
 
 if (${CMAKE_C_COMPILER_VERSION} VERSION_LESS 5.3)
-	set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=gnu11")
+	set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=gnu11 -ggdb")
 	# better, but needs CMake >= 3.0
 	#set_property(GLOBAL PROPERTY CXX_STANDARD 11)
 	#set_property(GLOBAL PROPERTY C_STANDARD 11)
diff --git a/maca_common/include/word_buffer.h b/maca_common/include/word_buffer.h
index 9efbaf0..f26081b 100644
--- a/maca_common/include/word_buffer.h
+++ b/maca_common/include/word_buffer.h
@@ -45,6 +45,8 @@ typedef struct {
 word_buffer *word_buffer_new(FILE *input_file, mcd *mcd_struct, int lookahead);
 void         word_buffer_free(word_buffer *wb);
 int          word_buffer_add(word_buffer *wb, word *w);
+void         word_buffer_insert(word_buffer *wb, word *w, int index);
+void         word_buffer_rm(word_buffer *wb, int index);
 word*        word_buffer_get_word_relative(word_buffer *wb, int dist);
 word*        word_buffer_get_word_n(word_buffer *wb, int n);
 int          word_buffer_read_next_word(word_buffer *wb);
diff --git a/maca_common/src/json_parser.c b/maca_common/src/json_parser.c
index f1b7fd4..046b401 100644
--- a/maca_common/src/json_parser.c
+++ b/maca_common/src/json_parser.c
@@ -207,7 +207,7 @@ void consommer(json_parser_ctx *ctx, int c ) {
     ctx->uc = yylex(ctx); /* consommer le caractère */
   }
   else
-    erreur(ctx,  (char *) "erreure lexicale" );
+    erreur(ctx,  (char *) "erreur lexicale" );
 }
 
 /*-------------------------------------------------------------------------*/
diff --git a/maca_common/src/word_buffer.c b/maca_common/src/word_buffer.c
index 971c037..4a20ea6 100644
--- a/maca_common/src/word_buffer.c
+++ b/maca_common/src/word_buffer.c
@@ -72,6 +72,66 @@ void word_buffer_free(word_buffer *wb)
   free(wb);
 }
 
+/* remove word at position index */
+void word_buffer_rm(word_buffer *wb, int index)
+{
+  int i;
+  if((index < 0) || (index >= wb->nbelem)) {
+    fprintf(stderr, "cannot remove word %d, index out of range\n", index);
+    return;
+  }
+  /* check if word at index has daughters */
+  for(i=0; i < wb->nbelem; i++){
+    if(word_get_gov_index(word_buffer_get_word_n(wb, i)) == index){
+      fprintf(stderr, "cannot remove word %d, it has at least one daughter", index);
+      return;
+    }
+  }
+  /* decrease dependencies length whenever gov and dep are on different sides of index */
+  for(int dep_index=0; dep_index < wb->nbelem; dep_index++){
+    word *dep = word_buffer_get_word_n(wb, dep_index);
+    int gov_index = word_get_gov_index(dep);
+    if((dep_index < index && gov_index > index)
+       || (dep_index > index && gov_index < index)){
+	word_set_gov(dep, word_get_gov(dep) - 1); 
+      }
+  }
+  word_free(wb->array[index]);
+  wb->array[index] = NULL;
+  for(i=index+1; i < wb->nbelem; i++){
+    wb->array[i-1] = wb->array[i];
+  }
+  wb->nbelem--;
+}
+
+/* insert word w at position index */
+void word_buffer_insert(word_buffer *wb, word *w, int index)
+{
+  if(wb->nbelem == wb->size -1){
+    wb->size = 2 * (wb->size + 1);
+    wb->array = (word **)realloc(wb->array, wb->size * sizeof(word *));
+  }
+
+  /* increase dependencies length whenever gov and dep are on different sides of index */
+  for(int dep_index=0; dep_index < wb->nbelem; dep_index++){
+    word *dep = word_buffer_get_word_n(wb, dep_index);
+    int gov_index = word_get_gov_index(dep);
+    if((dep_index < index && gov_index >= index)
+       || (dep_index >= index && gov_index < index)){
+	word_set_gov(dep, word_get_gov(dep) + 1); 
+      }
+  }
+  
+  for(int i=wb->nbelem; i >= index; i--){
+    wb->array[i] = wb->array[i-1];
+  }
+  wb->array[index] = w;
+  wb->nbelem++;
+  
+}
+
+
+
 int word_buffer_add(word_buffer *wb, word *w)
 {
   if(wb->nbelem == wb->size -1){
diff --git a/maca_tools/src/json2mcf.c b/maca_tools/src/json2mcf.c
index a5bf875..21e481c 100644
--- a/maca_tools/src/json2mcf.c
+++ b/maca_tools/src/json2mcf.c
@@ -238,7 +238,7 @@ void update_segment(word_buffer *wb, int start, int end, char *label, char *stat
 
   mcd_struct = word_buffer_get_mcd(wb); 
   d = mcd_struct->dico_array[mcd_get_pos_col(mcd_struct)];
-  
+
   if(status_lab && !strcmp(status_lab, "G")){
     fprintf(stderr, "updating label of segment [%d-%d] with \"%s\"\n", start, end, label);
     w = word_buffer_get_word_n(wb, offset + start);
@@ -409,14 +409,44 @@ void check_token(json_attr_val *avl, word_buffer *wb, int offset)
   int id;
   char *form_json = NULL;
   char *form_mcf = NULL;
+  char *status = NULL;
   json_attr_val *av;
   word *w = NULL;
   for(av = avl; av != NULL; av = av->next){
     //      printf("attr = %s\n", av->attr);
     if(!strcmp(av->attr, "id")){id = (int)(av->val->u.number); continue;}
     if(!strcmp(av->attr, "word")){form_json = av->val->u.string; continue;}
+    if(!strcmp(av->attr, "status")){status = av->val->u.string; continue;}
+
+  }
+  /* ajouté le 24 juillet 2020 par Alexis */
+  if(status != NULL && !strcmp(status, "inserted")){
+    int form_column = wb->mcd_struct->wf2col[MCD_WF_FORM];
+    fprintf(stderr, "inserting token at position %d\n", id);
+    word *w = word_new(NULL);
+    //    word_set_form(w, form_json);
+    int code = dico_add(wb->mcd_struct->dico_array[form_column], form_json);
+    w->wf_array[MCD_WF_FORM] = dico_add(wb->mcd_struct->dico_array[form_column], form_json);
+    w->form = strdup(form_json);
+    word_buffer_insert(wb, w, id);
+  }
+
+  /* ajouté le 24 juillet 2020 par Alexis */
+  if(status != NULL && !strcmp(status, "deleted")){
+    fprintf(stderr, "deleting token at position %d\n", id);
+    word_buffer_rm(wb, id);
+  }
 
+  /* ajouté le 24 juillet 2020 par Alexis */
+  if(status != NULL && !strcmp(status, "modified")){
+    fprintf(stderr, "modifying token at position %d\n", id);
+    int form_column = wb->mcd_struct->wf2col[MCD_WF_FORM];
+    int code = dico_add(wb->mcd_struct->dico_array[form_column], form_json);
+    word *w = word_buffer_get_word_n(wb, id);
+    w->wf_array[MCD_WF_FORM] = dico_add(wb->mcd_struct->dico_array[form_column], form_json);
+    w->form = strdup(form_json);
   }
+  
   w = word_buffer_get_word_n(wb, id);
   form_mcf = w->form;
   fprintf(stderr, "id : %d \t json : %s \t mcf : %s\n", id, form_json, form_mcf);
-- 
GitLab