Commit 65441c51 authored by Alexis Nasr's avatar Alexis Nasr
Browse files

arc eager version, still unstable

parent cd9f32dd
......@@ -44,6 +44,7 @@ int word_buffer_read_next_word(word_buffer *wb);
int word_buffer_move_right(word_buffer *wb);
int word_buffer_move_left(word_buffer *wb);
void word_buffer_print(FILE *f, word_buffer *wb);
void word_buffer_print_compact(FILE *f, word_buffer *wb);
int word_buffer_is_empty(word_buffer *wb);
int word_buffer_is_last(word_buffer *wb);
int word_buffer_end(word_buffer *wb);
......
......@@ -56,7 +56,7 @@ mcd *mcd_copy(mcd *m)
mcd *copy = mcd_new(m->nb_col);
for(i=0; i < MCD_WF_NB; i++)
copy->wf2col[i] = m->wf2col[i] = -1;
copy->wf2col[i] = m->wf2col[i];
for(i=0; i < m->nb_col; i++){
copy->representation[i] = m->representation[i];
......
......@@ -145,8 +145,8 @@ int word_get_gov_relative_index(word *w)
int index;
if(word_get_gov(w) == 0) return -1;
printf("in word_get_gov_rel_index(%d)\n", word_get_relative_index(w));
printf("gov = %d\n", word_get_gov(w));
/* printf("in word_get_gov_rel_index(%d)\n", word_get_relative_index(w));
printf("gov = %d\n", word_get_gov(w)); */
index = (word_get_relative_index(w)) + (word_get_gov(w));
/* printf("index = %d\n", index); */
......
......@@ -45,6 +45,26 @@ void word_buffer_print(FILE *f, word_buffer *wb)
if(w){ fprintf(f, "[ 3] "); word_print(f, w); fprintf(f, "\n");}
}
void word_buffer_print_compact(FILE *f, word_buffer *wb)
{
word *w;
w = word_buffer_bm3(wb);
if(w){ fprintf(f, "%d:%s ", word_get_relative_index(w), w->form);}
w = word_buffer_bm2(wb);
if(w){ fprintf(f, "%d:%s ", word_get_relative_index(w), w->form);}
w = word_buffer_bm1(wb);
if(w){ fprintf(f, "%d:%s ", word_get_relative_index(w), w->form);}
w = word_buffer_b0(wb);
if(w){ fprintf(f, "[%d:%s] ", word_get_relative_index(w), w->form);}
w = word_buffer_b1(wb);
if(w){ fprintf(f, "%d:%s ", word_get_relative_index(w), w->form);}
w = word_buffer_b2(wb);
if(w){ fprintf(f, "%d:%s ", word_get_relative_index(w), w->form);}
w = word_buffer_b3(wb);
if(w){ fprintf(f, "%d:%s ", word_get_relative_index(w), w->form);}
fprintf(f, "\n");
}
void word_buffer_free(word_buffer *wb)
{
int i;
......@@ -121,7 +141,7 @@ int word_buffer_move_left(word_buffer *wb)
int word_buffer_end(word_buffer *wb)
{
return (wb->current_index == wb->nbelem)? 1 : 0;
return (wb->current_index >= wb->nbelem)? 1 : 0;
}
int word_buffer_is_last(word_buffer *wb)
......
......@@ -106,14 +106,14 @@ void config_print(FILE *f, config *c)
word *s0 = NULL;
if(c){
if(!stack_is_empty(c->st))
s0 = stack_elt_n(c->st, 0);
s0 = stack_elt_n(c->st, 0);
b0 = word_buffer_b0(c->bf);
if(s0) { printf("s0 = "); word_print2(stdout, s0);}
if(b0) { printf("b0 = "); word_print2(stdout, b0);}
/* if(s0) { printf("s0 = "); word_print2(stdout, s0);} */
/* if(b0) { printf("b0 = "); word_print2(stdout, b0);} */
stack_print(f, c->st);
fprintf(f, "\n");
word_buffer_print(f, c->bf);
word_buffer_print_compact(f, c->bf);
}
}
......
......@@ -51,9 +51,11 @@ void generate_training_file_stream(FILE *output_file, context *ctx)
feat_vec *fv = feat_vec_new(feature_types_nb);
int sentence_nb = 0;
int root_label = dico_string2int(ctx->dico_labels, (char *) ctx->root_label);
int eos_label = dico_string2int(ctx->dico_labels, "eos");
word_buffer *ref = word_buffer_load_mcf(ctx->input_filename, ctx->mcd_struct);
FILE *mcf_file = myfopen(ctx->input_filename, "r");
/* create an mcd that corresponds to ctx->mcd_struct, but without gov and label */
mcd *mcd_struct_hyp = mcd_copy(ctx->mcd_struct);
mcd_remove_wf_column(mcd_struct_hyp, MCD_WF_GOV);
mcd_remove_wf_column(mcd_struct_hyp, MCD_WF_LABEL);
......@@ -61,10 +63,11 @@ void generate_training_file_stream(FILE *output_file, context *ctx)
c = config_initial(mcf_file, mcd_struct_hyp, 5);
while(!word_buffer_end(ref)){
printf("************ REF ************\n");
/* printf("************ REF ************\n");
word_buffer_print(stdout, ref);
printf("*****************************\n");
printf("*****************************\n");*/
printf("*****************************\n");
config_print(stdout,c);
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, ctx->mode);
......@@ -74,32 +77,50 @@ void generate_training_file_stream(FILE *output_file, context *ctx)
mvt_type = movement_type(mvt_code);
mvt_label = movement_label(mvt_code);
/* printf("mvt code = %d\n", mvt_code); */
/* printf("mvt code = %d\n", mvt_code);
printf("mvt type = %d\n", mvt_type); */
movement_print(stdout, mvt_code, ctx->dico_labels);
fprintf(output_file, "%d", mvt_code);
feat_vec_print(output_file, fv);
if((mvt_type == MVT_RIGHT) && (mvt_label == root_label)){ /* sentence is complete */
/* create the root arc */
movement_right_arc(c, mvt_label, 0);
/* pop root from stack */
stack_pop(config_get_stack(c));
/* printf("sentence complete config : ");
config_print(stdout,c); */
}
if(mvt_type == MVT_LEFT){
movement_left_arc(c, mvt_label, 0);
continue;
}
if(mvt_type == MVT_RIGHT){
movement_right_arc(c, mvt_label, 0);
word_buffer_move_right(ref);
if((mvt_label == eos_label)){ /* sentence is complete */
printf("sentence complete\n");
while(movement_reduce(c,0)){
printf("reduce\n");
printf("*****************************\n");
config_print(stdout,c);
}
/* pop eos from stack */
/* stack_pop(config_get_stack(c)); */
/* pop root from stack */
/* stack_pop(config_get_stack(c)); */
config_print(stdout,c);
printf("ref current index = %d\n", word_buffer_get_current_index(ref));
if(word_buffer_is_last(ref)){
printf("it is the end\n");
break;
}
/* change index of dummy word */
word_set_relative_index(stack_top(config_get_stack(c)), word_get_relative_index(word_buffer_b0(config_get_buffer(c))) - 1);
}
continue;
}
if(mvt_type == MVT_REDUCE){
......
......@@ -21,7 +21,7 @@ int movement_type(int mvt)
if(mvt == 0) return MVT_SHIFT; /* 0 is the code of shift */
if(mvt == 1) return MVT_REDUCE; /* 1 is the code of reduce */
if(mvt % 2 == 0) return MVT_LEFT; /* even movements are left movements */
return MVT_LEFT; /* odd movements are right movements */
return MVT_RIGHT; /* odd movements are right movements */
}
int movement_label(int mvt)
......@@ -40,15 +40,21 @@ int movement_left_arc(config *c, int label, float score)
if(word_buffer_is_empty(c->bf)) return 0;
/* word on top of the stack should not have a governor */
printf("word_get_gov_relative_index(stack_top(c->st)) = %d\n", word_get_gov_relative_index(stack_top(c->st)));
/* printf("index word top of stack = %d\n", word_get_relative_index(stack_top(c->st))); */
/* printf("word_get_gov_relative_index(stack_top(c->st)) = %d\n", word_get_gov_relative_index(stack_top(c->st))); */
if(word_get_gov_relative_index(stack_top(c->st)) != -1) return 0;
word *gov = word_buffer_b0(c->bf);
word *dep = stack_top(c->st);
int dist = (word_get_relative_index(gov)) - (word_get_relative_index(dep));
printf("create left arc %d <- %d dist = %d\n", word_get_relative_index(dep), word_get_relative_index(gov), dist);
/* create a new dependency */
word_set_gov(stack_top(c->st), word_get_relative_index(word_buffer_b0(c->bf)));
word_set_label(stack_top(c->st), label);
word_set_gov(dep, dist);
word_set_label(dep, label);
/* depset_add(c->ds, word_buffer_b0(c->bf), label, stack_top(c->st)); */
stack_pop(c->st);
......@@ -62,12 +68,17 @@ int movement_right_arc(config *c, int label, float score)
if(stack_is_empty(c->st)) return 0;
if(word_buffer_is_empty(c->bf)) return 0;
word *gov = stack_top(c->st);
word *dep = word_buffer_b0(c->bf);
int dist = (word_get_relative_index(gov)) - (word_get_relative_index(dep));
printf("create right arc %d -> %d dist = %d\n", word_get_relative_index(gov), word_get_relative_index(dep), dist);
/* create a new dependency */
word_set_gov(word_buffer_b0(c->bf), word_get_relative_index(stack_top(c->st)));
word_set_label(word_buffer_b0(c->bf), label);
word_set_gov(dep, dist);
word_set_label(dep, label);
/* depset_add(c->ds, stack_top(c->st), label, word_buffer_b0(c->bf)); */
stack_push(c->st, word_buffer_b0(c->bf));
word_buffer_move_right(c->bf);
......@@ -91,7 +102,7 @@ int movement_shift(config *c, int stream, float score)
int movement_reduce(config *c, float score)
{
if(stack_is_empty(c->st)) return 0;
if(word_get_gov(stack_top(c->st)) == -1) return 0; /* word on top of stack does not have a governor */
if(word_get_gov(stack_top(c->st)) == 0) return 0; /* word on top of stack does not have a governor */
stack_pop(c->st);
return 1;
}
......
......@@ -7,12 +7,18 @@
int check_all_dependents_of_word_in_ref_are_in_hyp(config *c, word_buffer *ref, int word_index)
{
int dep;
int gov_ref;
int gov_hyp;
for(dep=1; dep < ref->nbelem; dep++){
if(word_get_gov_relative_index(word_buffer_get_word_n(ref, dep)) == word_index){ /* found a dependent of word in ref */
gov_ref = word_get_gov_relative_index(word_buffer_get_word_n(ref, dep));
if(gov_ref == word_index){ /* found a dependent of word in ref */
/* look for a dependency in hyp such that its dependent is dep */
if(word_get_gov_relative_index(word_buffer_get_word_n(config_get_buffer(c), dep)) != word_index) return 0;
printf("found a dep of word %d in ref, it is %d\n", word_index, dep);
gov_hyp = word_get_gov_relative_index(word_buffer_get_word_n(config_get_buffer(c), dep));
printf("gov of %d in hyp is %d\n", dep,gov_hyp);
if(gov_hyp != gov_ref) return 0;
/*
if((dep >= c->ds->length)
|| (c->ds->array[dep].gov == NULL)
......@@ -29,38 +35,42 @@ int oracle_parser_arc_eager(config *c, word_buffer *ref)
word *s0; /* word on top of stack */
word *b0; /* next word in the bufer */
int s0_index, b0_index;
int s0_gov_index, b0_gov_index;
if(!stack_is_empty(c->st) && !word_buffer_is_empty(c->bf)){
s0 = stack_top(c->st);
s0_index = word_get_relative_index(s0);
s0_gov_index = word_get_gov_relative_index(word_buffer_get_word_n(ref, s0_index));
b0 = word_buffer_b0(c->bf);
b0_index = word_get_relative_index(b0);
b0_gov_index = word_get_gov_relative_index(word_buffer_get_word_n(ref, b0_index));
printf("s0_index = %d b0_index = %d\n", s0_index, b0_index);
printf("dans ref gov de s0 (%d) = %d\n", s0_index, word_get_gov_relative_index(word_buffer_get_word_n(ref, s0_index)));
printf("dans ref gov de b0 (%d) = %d\n", b0_index, word_get_gov_relative_index(word_buffer_get_word_n(ref, b0_index)));
/* printf("s0_index = %d b0_index = %d\n", s0_index, b0_index);
printf("dans ref gov de s0 (%d) = %d\n", s0_index, s0_gov_index);
printf("dans ref gov de b0 (%d) = %d\n", b0_index, b0_gov_index);*/
/* LEFT ARC b0 is the governor and s0 the dependent */
if(word_get_gov_relative_index(word_buffer_get_word_n(ref, s0_index)) == b0_index){
if(s0_gov_index == b0_index){
printf("oracle says left\n");
/* printf("oracle says left\n"); */
return movement_left_code(word_get_label(word_buffer_get_word_n(ref, s0_index)));
}
/* RIGHT ARC s0 is the governor and b0 the dependent */
if((word_get_gov_relative_index(word_buffer_get_word_n(ref, b0_index)) == s0_index))
if(b0_gov_index == s0_index){
/* printf("oracle says right\n"); */
return movement_right_code(word_get_label(word_buffer_get_word_n(ref, b0_index)));
}
/* REDUCE */
if((stack_height(c->st) > 2) && check_all_dependents_of_word_in_ref_are_in_hyp(c, ref, s0_index))
/* if(word_get_gov_relative_index(stack_top(c->st)) != -1) */
return MVT_REDUCE;
/* if(check_all_dependents_of_word_in_ref_are_in_hyp(c, ref, s0_index))
return MVT_REDUCE;*/
printf("all dep in ref are in hyp = %d\n", check_all_dependents_of_word_in_ref_are_in_hyp(c, ref, s0_index));
if((stack_height(c->st) > 2)
&& check_all_dependents_of_word_in_ref_are_in_hyp(c, ref, s0_index)
&& (word_get_gov(stack_top(c->st)) != 0)) /* word on top of the stack has a goveror */
{
return MVT_REDUCE;
}
/* SHIFT */
return MVT_SHIFT;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment