Skip to content
Snippets Groups Projects
Commit 71700931 authored by Alexis Nasr's avatar Alexis Nasr
Browse files

modified the handling of end of sentence in parser

corrected bugs in word and word buffer
parent ba1bb55e
No related branches found
No related tags found
No related merge requests found
...@@ -5,86 +5,6 @@ ...@@ -5,86 +5,6 @@
#define WORD_INVALID_GOV 10000 #define WORD_INVALID_GOV 10000
#define word_get_id(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_ID]
#define word_get_form(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_FORM]
#define word_get_lemma(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LEMMA]
#define word_get_cpos(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_CPOS]
#define word_get_pos(w) ((w) == NULL) ? -1 : ((w)->wf_array[MCD_WF_POS])
#define word_get_feats(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_FEATS]
#define word_get_gov(w) ((w) == NULL) ? 0 : (w)->wf_array[MCD_WF_GOV]
#define word_get_label(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LABEL]
#define word_get_stag(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_STAG]
#define word_get_sent_seg(w) ((w) == NULL) ? 0 : (w)->wf_array[MCD_WF_SENT_SEG]
#define word_get_A(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_A]
#define word_get_B(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_B]
#define word_get_C(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_C]
#define word_get_D(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_D]
#define word_get_E(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_E]
#define word_get_F(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_F]
#define word_get_G(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_G]
#define word_get_H(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_H]
#define word_get_I(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_I]
#define word_get_J(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_J]
#define word_get_K(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_K]
#define word_get_L(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_L]
#define word_get_M(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_M]
#define word_get_N(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_N]
#define word_get_O(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_O]
#define word_get_P(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_P]
#define word_get_Q(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_Q]
#define word_get_R(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_R]
#define word_get_S(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_S]
#define word_get_T(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_T]
#define word_get_U(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_U]
#define word_get_V(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_V]
#define word_get_W(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_W]
#define word_get_X(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_X]
#define word_get_Y(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_Y]
#define word_get_Z(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_Z]
#define word_get_input(w) ((w) == NULL) ? NULL : (w)->input
#define word_get_signature(w) ((w) == NULL) ? -1 : (w)->signature
#define word_get_U1(w) ((w) == NULL) ? -1 : (w)->U1
#define word_get_index(w) ((w) == NULL) ? -1 : (w)->index
#define word_set_id(w, val) (w)->wf_array[MCD_WF_ID] = (val)
#define word_set_form(w, val) (w)->wf_array[MCD_WF_FORM] = (val)
#define word_set_lemma(w, val) (w)->wf_array[MCD_WF_LEMMA] = (val)
#define word_set_cpos(w, val) (w)->wf_array[MCD_WF_CPOS] = (val)
#define word_set_pos(w, val) (w)->wf_array[MCD_WF_POS] = (val)
#define word_set_feats(w, val) (w)->wf_array[MCD_WF_FEATS] = (val)
#define word_set_gov(w, val) ((w)->wf_array[MCD_WF_GOV] = (val))
#define word_set_label(w, val) (w)->wf_array[MCD_WF_LABEL] = (val)
#define word_set_stag(w, val) (w)->wf_array[MCD_WF_STAG] = (val)
#define word_set_sent_seg(w, val) (w)->wf_array[MCD_WF_SENT_SEG] = (val)
#define word_set_A(w, val) (w)->wf_array[MCD_WF_A] = (val)
#define word_set_B(w, val) (w)->wf_array[MCD_WF_B] = (val)
#define word_set_C(w, val) (w)->wf_array[MCD_WF_C] = (val)
#define word_set_D(w, val) (w)->wf_array[MCD_WF_D] = (val)
#define word_set_E(w, val) (w)->wf_array[MCD_WF_E] = (val)
#define word_set_F(w, val) (w)->wf_array[MCD_WF_F] = (val)
#define word_set_G(w, val) (w)->wf_array[MCD_WF_G] = (val)
#define word_set_H(w, val) (w)->wf_array[MCD_WF_H] = (val)
#define word_set_I(w, val) (w)->wf_array[MCD_WF_I] = (val)
#define word_set_J(w, val) (w)->wf_array[MCD_WF_J] = (val)
#define word_set_K(w, val) (w)->wf_array[MCD_WF_K] = (val)
#define word_set_L(w, val) (w)->wf_array[MCD_WF_L] = (val)
#define word_set_M(w, val) (w)->wf_array[MCD_WF_M] = (val)
#define word_set_N(w, val) (w)->wf_array[MCD_WF_N] = (val)
#define word_set_O(w, val) (w)->wf_array[MCD_WF_O] = (val)
#define word_set_P(w, val) (w)->wf_array[MCD_WF_P] = (val)
#define word_set_Q(w, val) (w)->wf_array[MCD_WF_Q] = (val)
#define word_set_R(w, val) (w)->wf_array[MCD_WF_R] = (val)
#define word_set_S(w, val) (w)->wf_array[MCD_WF_S] = (val)
#define word_set_T(w, val) (w)->wf_array[MCD_WF_T] = (val)
#define word_set_U(w, val) (w)->wf_array[MCD_WF_U] = (val)
#define word_set_V(w, val) (w)->wf_array[MCD_WF_V] = (val)
#define word_set_W(w, val) (w)->wf_array[MCD_WF_W] = (val)
#define word_set_X(w, val) (w)->wf_array[MCD_WF_X] = (val)
#define word_set_Y(w, val) (w)->wf_array[MCD_WF_Y] = (val)
#define word_set_Z(w, val) (w)->wf_array[MCD_WF_Z] = (val)
#define word_set_signature(w, val) (w)->signature = (val)
#define word_set_index(w, val) (w)->index = (val)
typedef struct _word { typedef struct _word {
int wf_array[MCD_WF_NB]; /* array containing the codes corresponding to the different word features */ int wf_array[MCD_WF_NB]; /* array containing the codes corresponding to the different word features */
char *input; /* the string corresponding to the actual line in the corpus file */ char *input; /* the string corresponding to the actual line in the corpus file */
...@@ -96,6 +16,86 @@ typedef struct _word { ...@@ -96,6 +16,86 @@ typedef struct _word {
int is_root; int is_root;
} word; } word;
#define word_get_id(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_ID])
#define word_get_form(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_FORM])
#define word_get_lemma(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LEMMA])
#define word_get_cpos(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_CPOS])
#define word_get_pos(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_POS])
#define word_get_feats(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_FEATS])
#define word_get_gov(w) (((w) == NULL) ? 0 : (w)->wf_array[MCD_WF_GOV])
#define word_get_label(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LABEL])
#define word_get_stag(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_STAG])
#define word_get_sent_seg(w) (((w) == NULL) ? 0 : (w)->wf_array[MCD_WF_SENT_SEG])
#define word_get_A(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_A])
#define word_get_B(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_B])
#define word_get_C(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_C])
#define word_get_D(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_D])
#define word_get_E(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_E])
#define word_get_F(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_F])
#define word_get_G(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_G])
#define word_get_H(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_H])
#define word_get_I(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_I])
#define word_get_J(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_J])
#define word_get_K(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_K])
#define word_get_L(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_L])
#define word_get_M(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_M])
#define word_get_N(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_N])
#define word_get_O(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_O])
#define word_get_P(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_P])
#define word_get_Q(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_Q])
#define word_get_R(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_R])
#define word_get_S(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_S])
#define word_get_T(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_T])
#define word_get_U(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_U])
#define word_get_V(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_V])
#define word_get_W(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_W])
#define word_get_X(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_X])
#define word_get_Y(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_Y])
#define word_get_Z(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_Z])
#define word_get_input(w) (((w) == NULL) ? NULL : (w)->input)
#define word_get_signature(w) (((w) == NULL) ? -1 : (w)->signature)
#define word_get_U1(w) (((w) == NULL) ? -1 : (w)->U1)
#define word_get_index(w) (((w) == NULL) ? -1 : (w)->index)
#define word_set_id(w, val) ((w)->wf_array[MCD_WF_ID] = (val))
#define word_set_form(w, val) ((w)->wf_array[MCD_WF_FORM] = (val))
#define word_set_lemma(w, val) ((w)->wf_array[MCD_WF_LEMMA] = (val))
#define word_set_cpos(w, val) ((w)->wf_array[MCD_WF_CPOS] = (val))
#define word_set_pos(w, val) ((w)->wf_array[MCD_WF_POS] = (val))
#define word_set_feats(w, val) ((w)->wf_array[MCD_WF_FEATS] = (val))
#define word_set_gov(w, val) ((w)->wf_array[MCD_WF_GOV] = (val))
#define word_set_label(w, val) ((w)->wf_array[MCD_WF_LABEL] = (val))
#define word_set_stag(w, val) ((w)->wf_array[MCD_WF_STAG] = (val))
#define word_set_sent_seg(w, val) ((w)->wf_array[MCD_WF_SENT_SEG] = (val))
#define word_set_A(w, val) ((w)->wf_array[MCD_WF_A] = (val))
#define word_set_B(w, val) ((w)->wf_array[MCD_WF_B] = (val))
#define word_set_C(w, val) ((w)->wf_array[MCD_WF_C] = (val))
#define word_set_D(w, val) ((w)->wf_array[MCD_WF_D] = (val))
#define word_set_E(w, val) ((w)->wf_array[MCD_WF_E] = (val))
#define word_set_F(w, val) ((w)->wf_array[MCD_WF_F] = (val))
#define word_set_G(w, val) ((w)->wf_array[MCD_WF_G] = (val))
#define word_set_H(w, val) ((w)->wf_array[MCD_WF_H] = (val))
#define word_set_I(w, val) ((w)->wf_array[MCD_WF_I] = (val))
#define word_set_J(w, val) ((w)->wf_array[MCD_WF_J] = (val))
#define word_set_K(w, val) ((w)->wf_array[MCD_WF_K] = (val))
#define word_set_L(w, val) ((w)->wf_array[MCD_WF_L] = (val))
#define word_set_M(w, val) ((w)->wf_array[MCD_WF_M] = (val))
#define word_set_N(w, val) ((w)->wf_array[MCD_WF_N] = (val))
#define word_set_O(w, val) ((w)->wf_array[MCD_WF_O] = (val))
#define word_set_P(w, val) ((w)->wf_array[MCD_WF_P] = (val))
#define word_set_Q(w, val) ((w)->wf_array[MCD_WF_Q] = (val))
#define word_set_R(w, val) ((w)->wf_array[MCD_WF_R] = (val))
#define word_set_S(w, val) ((w)->wf_array[MCD_WF_S] = (val))
#define word_set_T(w, val) ((w)->wf_array[MCD_WF_T] = (val))
#define word_set_U(w, val) ((w)->wf_array[MCD_WF_U] = (val))
#define word_set_V(w, val) ((w)->wf_array[MCD_WF_V] = (val))
#define word_set_W(w, val) ((w)->wf_array[MCD_WF_W] = (val))
#define word_set_X(w, val) ((w)->wf_array[MCD_WF_X] = (val))
#define word_set_Y(w, val) ((w)->wf_array[MCD_WF_Y] = (val))
#define word_set_Z(w, val) ((w)->wf_array[MCD_WF_Z] = (val))
#define word_set_signature(w, val) ((w)->signature = (val))
#define word_set_index(w, val) ((w)->index = (val))
word *word_new(char *input); word *word_new(char *input);
word *word_create_dummy(mcd *mcd_struct); word *word_create_dummy(mcd *mcd_struct);
......
...@@ -13,7 +13,8 @@ ...@@ -13,7 +13,8 @@
#define word_buffer_get_input_file(wb) (wb)->input_file #define word_buffer_get_input_file(wb) (wb)->input_file
#define word_buffer_get_mcd(wb) (wb)->mcd_struct #define word_buffer_get_mcd(wb) (wb)->mcd_struct
#define word_buffer_b0(wb) (((wb)->nbelem == 0)? NULL : (wb)->array[(wb)->current_index]) /* #define word_buffer_b0(wb) (((wb)->nbelem == 0)? NULL : (wb)->array[(wb)->current_index]) */
#define word_buffer_b0(wb) (((wb)->current_index >= (wb)->nbelem)? NULL : (wb)->array[(wb)->current_index])
#define word_buffer_b1(wb) (((wb)->current_index + 1 >= (wb)->nbelem)? NULL : (wb)->array[(wb)->current_index + 1]) #define word_buffer_b1(wb) (((wb)->current_index + 1 >= (wb)->nbelem)? NULL : (wb)->array[(wb)->current_index + 1])
#define word_buffer_b2(wb) (((wb)->current_index + 2 >= (wb)->nbelem)? NULL : (wb)->array[(wb)->current_index + 2]) #define word_buffer_b2(wb) (((wb)->current_index + 2 >= (wb)->nbelem)? NULL : (wb)->array[(wb)->current_index + 2])
#define word_buffer_b3(wb) (((wb)->current_index + 3 >= (wb)->nbelem)? NULL : (wb)->array[(wb)->current_index + 3]) #define word_buffer_b3(wb) (((wb)->current_index + 3 >= (wb)->nbelem)? NULL : (wb)->array[(wb)->current_index + 3])
...@@ -24,6 +25,15 @@ ...@@ -24,6 +25,15 @@
#define word_buffer_nb_elts_left(wb) ((wb)->current_index) #define word_buffer_nb_elts_left(wb) ((wb)->current_index)
#define word_buffer_end(wb) (((wb)->current_index >= (wb)->nbelem)? 1 : 0)
#define word_buffer_is_last(wb) (((wb)->current_index == (wb)->nbelem - 1)? 1 : 0)
#define word_buffer_is_empty(wb) (((wb)->nbelem == 0)? 1 : 0)
typedef struct { typedef struct {
int size; /* size of the array used to store words */ int size; /* size of the array used to store words */
int nbelem; /* number of words in the buffer */ int nbelem; /* number of words in the buffer */
...@@ -45,10 +55,13 @@ int word_buffer_move_right(word_buffer *wb); ...@@ -45,10 +55,13 @@ int word_buffer_move_right(word_buffer *wb);
int word_buffer_move_left(word_buffer *wb); int word_buffer_move_left(word_buffer *wb);
void word_buffer_print(FILE *f, word_buffer *wb); void word_buffer_print(FILE *f, word_buffer *wb);
void word_buffer_print_compact(FILE *f, word_buffer *wb); void word_buffer_print_compact(FILE *f, word_buffer *wb);
int word_buffer_read_sentence(word_buffer *bw);
word_buffer *word_buffer_load_mcf(char *mcf_filename, mcd *mcd_struct);
/*
int word_buffer_is_empty(word_buffer *wb); int word_buffer_is_empty(word_buffer *wb);
int word_buffer_is_last(word_buffer *wb); int word_buffer_is_last(word_buffer *wb);
int word_buffer_end(word_buffer *wb); int word_buffer_end(word_buffer *wb);
int word_buffer_read_sentence(word_buffer *bw); */
word_buffer *word_buffer_load_mcf(char *mcf_filename, mcd *mcd_struct);
#endif #endif
...@@ -92,7 +92,6 @@ word *word_buffer_get_word(word_buffer *wb, int offset) ...@@ -92,7 +92,6 @@ word *word_buffer_get_word(word_buffer *wb, int offset)
word *word_buffer_get_word_n(word_buffer *wb, int n) word *word_buffer_get_word_n(word_buffer *wb, int n)
{ {
return ((n >=0) && (n < wb->nbelem))? wb->array[n] : NULL; return ((n >=0) && (n < wb->nbelem))? wb->array[n] : NULL;
} }
word_buffer *word_buffer_load_mcf(char *mcf_filename, mcd *mcd_struct) word_buffer *word_buffer_load_mcf(char *mcf_filename, mcd *mcd_struct)
...@@ -104,7 +103,7 @@ word_buffer *word_buffer_load_mcf(char *mcf_filename, mcd *mcd_struct) ...@@ -104,7 +103,7 @@ word_buffer *word_buffer_load_mcf(char *mcf_filename, mcd *mcd_struct)
else else
f = myfopen(mcf_filename, "r"); f = myfopen(mcf_filename, "r");
word_buffer *wb = word_buffer_new(f, mcd_struct, 0); word_buffer *wb = word_buffer_new(f, mcd_struct, 0);
while(word_buffer_read_next_word(wb) != -1){ while(word_buffer_read_next_word(wb)){
/* printf("load word %d\n", wb->nbelem - 1); */ /* printf("load word %d\n", wb->nbelem - 1); */
} }
if(mcf_filename != NULL) if(mcf_filename != NULL)
...@@ -112,16 +111,12 @@ word_buffer *word_buffer_load_mcf(char *mcf_filename, mcd *mcd_struct) ...@@ -112,16 +111,12 @@ word_buffer *word_buffer_load_mcf(char *mcf_filename, mcd *mcd_struct)
return wb; return wb;
} }
int word_buffer_read_next_word(word_buffer *wb) int word_buffer_read_next_word(word_buffer *wb)
{ {
word *w = NULL; word *w = word_read(wb->input_file, wb->mcd_struct);
if(w == NULL) return 0;
w = word_read(wb->input_file, wb->mcd_struct);
/* if((w) && (w->input)) printf("## %s\n", w->input); */
if(w == NULL) return -1;
word_buffer_add(wb, w); word_buffer_add(wb, w);
return wb->nbelem - 1; return 1;
} }
int word_buffer_move_right(word_buffer *wb) int word_buffer_move_right(word_buffer *wb)
...@@ -142,23 +137,6 @@ int word_buffer_move_left(word_buffer *wb) ...@@ -142,23 +137,6 @@ int word_buffer_move_left(word_buffer *wb)
return 0; return 0;
} }
int word_buffer_end(word_buffer *wb)
{
/* printf("in word_buffer_end current index = %d nb elem = %d\n", wb->current_index, wb->nbelem); */
return (wb->current_index >= wb->nbelem)? 1 : 0;
}
int word_buffer_is_last(word_buffer *wb)
{
return (wb->current_index == wb->nbelem - 1)? 1 : 0;
}
int word_buffer_is_empty(word_buffer *wb)
{
return (wb->nbelem == 0)? 1 : 0;
}
int word_buffer_read_sentence(word_buffer *wb) int word_buffer_read_sentence(word_buffer *wb)
{ {
char buffer[10000]; char buffer[10000];
...@@ -166,16 +144,29 @@ int word_buffer_read_sentence(word_buffer *wb) ...@@ -166,16 +144,29 @@ int word_buffer_read_sentence(word_buffer *wb)
int index = 1; int index = 1;
while(fgets(buffer, 10000, word_buffer_get_input_file(wb))){ while(fgets(buffer, 10000, word_buffer_get_input_file(wb))){
if((buffer[0] == '\n') || (buffer[0] == ' ') || (buffer[0] == '\t')) continue; /* ignore empty lines */
if(feof(word_buffer_get_input_file(wb))) break; if(feof(word_buffer_get_input_file(wb))) break;
/* fprintf(stderr, "%s", buffer); */
if((buffer[0] == '\n') || (buffer[0] == ' ') || (buffer[0] == '\t')) break; /* end of the sentence */
w = word_parse_buffer(buffer, word_buffer_get_mcd(wb)); w = word_parse_buffer(buffer, word_buffer_get_mcd(wb));
word_set_index(w, index); word_set_index(w, index);
index++; index++;
word_buffer_add(wb, w); word_buffer_add(wb, w);
if(word_is_eos(w, word_buffer_get_mcd(wb))) break; if(word_is_eos(w, word_buffer_get_mcd(wb))) break;
} }
/* return bw->nbelem - 1; */ /* because of the dummy word */
return wb->nbelem ; return wb->nbelem ;
} }
/*int word_buffer_end(word_buffer *wb)
{
return (wb->current_index >= wb->nbelem)? 1 : 0;
}
int word_buffer_is_last(word_buffer *wb)
{
return (wb->current_index == wb->nbelem - 1)? 1 : 0;
}
int word_buffer_is_empty(word_buffer *wb)
{
return (wb->nbelem == 0)? 1 : 0;
}
*/
...@@ -18,6 +18,23 @@ typedef struct { ...@@ -18,6 +18,23 @@ typedef struct {
mcd *mcd_struct; mcd *mcd_struct;
} context; } context;
void context_free(context *ctx)
{
if(ctx){
if(ctx->program_name)
free(ctx->program_name);
if(ctx->conll_filename)
free(ctx->conll_filename);
if(ctx->mcf_filename)
free(ctx->mcf_filename);
if(ctx->mcd_filename)
free(ctx->mcd_filename);
if(ctx->mcd_struct)
mcd_free(ctx->mcd_struct);
free(ctx);
}
}
context *context_new(void) context *context_new(void)
{ {
context *ctx = (context *)memalloc(sizeof(context)); context *ctx = (context *)memalloc(sizeof(context));
...@@ -44,7 +61,6 @@ void context_general_help_message(context *ctx) ...@@ -44,7 +61,6 @@ void context_general_help_message(context *ctx)
fprintf(stderr, "\t-o --conll : conll filename (write to stdout if absent)\n"); fprintf(stderr, "\t-o --conll : conll filename (write to stdout if absent)\n");
} }
void mcf2conll_check_options(context *ctx){ void mcf2conll_check_options(context *ctx){
if(ctx->help){ if(ctx->help){
context_general_help_message(ctx); context_general_help_message(ctx);
...@@ -52,8 +68,6 @@ void mcf2conll_check_options(context *ctx){ ...@@ -52,8 +68,6 @@ void mcf2conll_check_options(context *ctx){
} }
} }
context *context_read_options(int argc, char *argv[]) context *context_read_options(int argc, char *argv[])
{ {
int c; int c;
...@@ -134,7 +148,7 @@ int main(int argc, char *argv[]) ...@@ -134,7 +148,7 @@ int main(int argc, char *argv[])
w = word_buffer_b0(wb); w = word_buffer_b0(wb);
if(w){ if(w){
printf("%d\t", index); fprintf(output_file, "%d\t", index);
if(form_col != -1) if(form_col != -1)
word_print_col_n(output_file, w, form_col); word_print_col_n(output_file, w, form_col);
...@@ -194,5 +208,6 @@ int main(int argc, char *argv[]) ...@@ -194,5 +208,6 @@ int main(int argc, char *argv[])
if(ctx->conll_filename) if(ctx->conll_filename)
fclose(output_file); fclose(output_file);
context_free(ctx);
return 0; return 0;
} }
...@@ -27,6 +27,10 @@ int config_is_terminal(config *c) ...@@ -27,6 +27,10 @@ int config_is_terminal(config *c)
return (word_buffer_end(config_get_buffer(c)) && return (word_buffer_end(config_get_buffer(c)) &&
(stack_is_empty(config_get_stack(c)) || (stack_is_empty(config_get_stack(c)) ||
(stack_top(config_get_stack(c)) == NULL))); (stack_top(config_get_stack(c)) == NULL)));
/*return (word_buffer_end(config_get_buffer(c)) &&
(stack_is_empty(config_get_stack(c)) ||
(stack_top(config_get_stack(c)) == NULL)));*/
} }
void config_push_mvt(config *c, int type, word *gov, word *dep) void config_push_mvt(config *c, int type, word *gov, word *dep)
......
...@@ -9,9 +9,10 @@ int movement_eos(config *c, int movement_code) ...@@ -9,9 +9,10 @@ int movement_eos(config *c, int movement_code)
if(stack_is_empty(config_get_stack(c))) return 0; if(stack_is_empty(config_get_stack(c))) return 0;
word *s0 = stack_top(config_get_stack(c)); word *s0 = stack_top(config_get_stack(c));
/* word on top of stack is already eos */
if(word_get_sent_seg(s0) == 1) return 0; if(word_get_sent_seg(s0) == 1) return 0;
/* set word on the top of the stack to sent_seg */ /* set word on the top of the stack to eos */
word_set_sent_seg(s0, 1); word_set_sent_seg(s0, 1);
config_push_mvt(c, movement_code, s0, NULL); config_push_mvt(c, movement_code, s0, NULL);
...@@ -22,7 +23,7 @@ int movement_eos_undo(config *c) ...@@ -22,7 +23,7 @@ int movement_eos_undo(config *c)
{ {
word *gov = mvt_get_gov(mvt_stack_top(config_get_history(c))); word *gov = mvt_get_gov(mvt_stack_top(config_get_history(c)));
/* word on the top of the stack is not sent_seg anymore */ /* word on the top of the stack is not eos anymore */
word_set_sent_seg(gov, 0); word_set_sent_seg(gov, 0);
mvt_free(config_pop_mvt(c)); mvt_free(config_pop_mvt(c));
...@@ -103,7 +104,10 @@ int movement_right_arc_undo(config *c) ...@@ -103,7 +104,10 @@ int movement_right_arc_undo(config *c)
int movement_shift(config *c, int movement_code) int movement_shift(config *c, int movement_code)
{ {
if(word_buffer_is_empty(config_get_buffer(c))) return 0; /* if(word_buffer_is_empty(config_get_buffer(c))) return 0; */
if(word_buffer_end(config_get_buffer(c))) return 0;
word *b0 = word_buffer_b0(config_get_buffer(c)); word *b0 = word_buffer_b0(config_get_buffer(c));
stack_push(config_get_stack(c), b0); stack_push(config_get_stack(c), b0);
config_push_mvt(c, movement_code, b0, NULL); config_push_mvt(c, movement_code, b0, NULL);
......
...@@ -36,8 +36,6 @@ void print_word_buffer_old(config *c, dico *dico_labels, mcd *mcd_struct) ...@@ -36,8 +36,6 @@ void print_word_buffer_old(config *c, dico *dico_labels, mcd *mcd_struct)
} }
} }
void print_word_buffer(config *c, dico *dico_labels, mcd *mcd_struct) void print_word_buffer(config *c, dico *dico_labels, mcd *mcd_struct)
{ {
int i; int i;
...@@ -141,25 +139,28 @@ void simple_decoder_parser_arc_eager(context *ctx) ...@@ -141,25 +139,28 @@ void simple_decoder_parser_arc_eager(context *ctx)
c = config_new(f, ctx->mcd_struct, 5); c = config_new(f, ctx->mcd_struct, 5);
while(!config_is_terminal(c)){ while(!config_is_terminal(c)){
if(ctx->debug_mode){ if(ctx->debug_mode){
fprintf(stdout, "***********************************\n"); fprintf(stdout, "***********************************\n");
config_print(stdout, c); config_print(stdout, c);
} }
/* forced EOS (the element on the top of the stack is eos, but the preceding movement is not MVT_PARSER_EOS */ /* forced EOS (the element on the top of the stack is eos, but the preceding movement is not MVT_PARSER_EOS */
/* which means that the top of the stack got its eos status from input */ /* which means that the top of the stack got its eos status from input */
/* force the parser to finish parsing the sentence (perform all pending reduce actions) and determine root of the sentence */
if((word_get_sent_seg(stack_top(config_get_stack(c))) == 1) && (mvt_get_type(mvt_stack_top(config_get_history(c))) != MVT_PARSER_EOS)){ if((word_get_sent_seg(stack_top(config_get_stack(c))) == 1) && (mvt_get_type(mvt_stack_top(config_get_history(c))) != MVT_PARSER_EOS)){
word_set_sent_seg(stack_top(config_get_stack(c)), -1); word_set_sent_seg(stack_top(config_get_stack(c)), -1);
movement_parser_eos(c); movement_parser_eos(c);
while(movement_parser_reduce(c)); while(movement_parser_reduce(c));
while(movement_parser_root(c, root_label)); while(movement_parser_root(c, root_label));
/* mvt_code = MVT_PARSER_EOS; */
if(ctx->debug_mode) printf("force EOS\n"); if(ctx->debug_mode) printf("force EOS\n");
} }
/* normal behavious, ask classifier what is the next movement to do and do it */
else{ else{
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE); config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE);
mvt_code = feature_table_argmax(fv, ft, &max); mvt_code = feature_table_argmax(fv, ft, &max);
if(ctx->debug_mode){ if(ctx->debug_mode){
vcode *vcode_array = feature_table_get_vcode_array(fv, ft); vcode *vcode_array = feature_table_get_vcode_array(fv, ft);
for(int i=0; i < 3; i++){ for(int i=0; i < 3; i++){
...@@ -181,7 +182,6 @@ void simple_decoder_parser_arc_eager(context *ctx) ...@@ -181,7 +182,6 @@ void simple_decoder_parser_arc_eager(context *ctx)
fprintf(stdout, "\t"); fprintf(stdout, "\t");
feature_table_argmax_1_2(fv, ft, &argmax1, &max1, &argmax2, &max2); feature_table_argmax_1_2(fv, ft, &argmax1, &max1, &argmax2, &max2);
printf("%f\n", max1 - max2); printf("%f\n", max1 - max2);
} }
mvt_type = movement_parser_type(mvt_code); mvt_type = movement_parser_type(mvt_code);
...@@ -209,20 +209,16 @@ void simple_decoder_parser_arc_eager(context *ctx) ...@@ -209,20 +209,16 @@ void simple_decoder_parser_arc_eager(context *ctx)
} }
if(result == 0){ if(result == 0){
if(ctx->debug_mode){ if(ctx->debug_mode) fprintf(stdout, "WARNING : movement cannot be executed doing a SHIFT instead !\n");
fprintf(stdout, "WARNING : movement cannot be executed doing a SHIFT instead !\n"); result = movement_parser_shift(c);
if(result == 0){ /* SHIFT failed no more words to read, let's get out of here ! */
if(ctx->debug_mode) fprintf(stdout, "WARNING : cannot exectue a SHIFT emptying stack !\n");
while(!stack_is_empty(config_get_stack(c)))
movement_parser_root(c, root_label);
} }
movement_parser_shift(c);
} }
} }
} }
/* horrible hack: force the remaining element in the stack (if any) to be the root */
if(stack_nbelem(config_get_stack(c)) && (stack_top(config_get_stack(c)) == NULL))
stack_pop(config_get_stack(c));
while(!stack_is_empty(config_get_stack(c)) && (stack_top(config_get_stack(c)) != NULL))
movement_parser_root(c, root_label);
/* end of horrible hack */
if(!ctx->trace_mode) if(!ctx->trace_mode)
print_word_buffer(c, ctx->dico_labels, ctx->mcd_struct); print_word_buffer(c, ctx->dico_labels, ctx->mcd_struct);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment