Skip to content
Snippets Groups Projects
Commit 65441c51 authored by Alexis Nasr's avatar Alexis Nasr
Browse files

arc eager version, still unstable

parent cd9f32dd
No related branches found
No related tags found
No related merge requests found
......@@ -44,6 +44,7 @@ int word_buffer_read_next_word(word_buffer *wb);
int word_buffer_move_right(word_buffer *wb);
int word_buffer_move_left(word_buffer *wb);
void word_buffer_print(FILE *f, word_buffer *wb);
void word_buffer_print_compact(FILE *f, word_buffer *wb);
int word_buffer_is_empty(word_buffer *wb);
int word_buffer_is_last(word_buffer *wb);
int word_buffer_end(word_buffer *wb);
......
......@@ -56,7 +56,7 @@ mcd *mcd_copy(mcd *m)
mcd *copy = mcd_new(m->nb_col);
for(i=0; i < MCD_WF_NB; i++)
copy->wf2col[i] = m->wf2col[i] = -1;
copy->wf2col[i] = m->wf2col[i];
for(i=0; i < m->nb_col; i++){
copy->representation[i] = m->representation[i];
......
......@@ -145,8 +145,8 @@ int word_get_gov_relative_index(word *w)
int index;
if(word_get_gov(w) == 0) return -1;
printf("in word_get_gov_rel_index(%d)\n", word_get_relative_index(w));
printf("gov = %d\n", word_get_gov(w));
/* printf("in word_get_gov_rel_index(%d)\n", word_get_relative_index(w));
printf("gov = %d\n", word_get_gov(w)); */
index = (word_get_relative_index(w)) + (word_get_gov(w));
/* printf("index = %d\n", index); */
......
......@@ -45,6 +45,26 @@ void word_buffer_print(FILE *f, word_buffer *wb)
if(w){ fprintf(f, "[ 3] "); word_print(f, w); fprintf(f, "\n");}
}
void word_buffer_print_compact(FILE *f, word_buffer *wb)
{
word *w;
w = word_buffer_bm3(wb);
if(w){ fprintf(f, "%d:%s ", word_get_relative_index(w), w->form);}
w = word_buffer_bm2(wb);
if(w){ fprintf(f, "%d:%s ", word_get_relative_index(w), w->form);}
w = word_buffer_bm1(wb);
if(w){ fprintf(f, "%d:%s ", word_get_relative_index(w), w->form);}
w = word_buffer_b0(wb);
if(w){ fprintf(f, "[%d:%s] ", word_get_relative_index(w), w->form);}
w = word_buffer_b1(wb);
if(w){ fprintf(f, "%d:%s ", word_get_relative_index(w), w->form);}
w = word_buffer_b2(wb);
if(w){ fprintf(f, "%d:%s ", word_get_relative_index(w), w->form);}
w = word_buffer_b3(wb);
if(w){ fprintf(f, "%d:%s ", word_get_relative_index(w), w->form);}
fprintf(f, "\n");
}
void word_buffer_free(word_buffer *wb)
{
int i;
......@@ -121,7 +141,7 @@ int word_buffer_move_left(word_buffer *wb)
int word_buffer_end(word_buffer *wb)
{
return (wb->current_index == wb->nbelem)? 1 : 0;
return (wb->current_index >= wb->nbelem)? 1 : 0;
}
int word_buffer_is_last(word_buffer *wb)
......
......@@ -108,12 +108,12 @@ void config_print(FILE *f, config *c)
if(!stack_is_empty(c->st))
s0 = stack_elt_n(c->st, 0);
b0 = word_buffer_b0(c->bf);
if(s0) { printf("s0 = "); word_print2(stdout, s0);}
if(b0) { printf("b0 = "); word_print2(stdout, b0);}
/* if(s0) { printf("s0 = "); word_print2(stdout, s0);} */
/* if(b0) { printf("b0 = "); word_print2(stdout, b0);} */
stack_print(f, c->st);
fprintf(f, "\n");
word_buffer_print(f, c->bf);
word_buffer_print_compact(f, c->bf);
}
}
......
......@@ -51,9 +51,11 @@ void generate_training_file_stream(FILE *output_file, context *ctx)
feat_vec *fv = feat_vec_new(feature_types_nb);
int sentence_nb = 0;
int root_label = dico_string2int(ctx->dico_labels, (char *) ctx->root_label);
int eos_label = dico_string2int(ctx->dico_labels, "eos");
word_buffer *ref = word_buffer_load_mcf(ctx->input_filename, ctx->mcd_struct);
FILE *mcf_file = myfopen(ctx->input_filename, "r");
/* create an mcd that corresponds to ctx->mcd_struct, but without gov and label */
mcd *mcd_struct_hyp = mcd_copy(ctx->mcd_struct);
mcd_remove_wf_column(mcd_struct_hyp, MCD_WF_GOV);
mcd_remove_wf_column(mcd_struct_hyp, MCD_WF_LABEL);
......@@ -61,10 +63,11 @@ void generate_training_file_stream(FILE *output_file, context *ctx)
c = config_initial(mcf_file, mcd_struct_hyp, 5);
while(!word_buffer_end(ref)){
printf("************ REF ************\n");
/* printf("************ REF ************\n");
word_buffer_print(stdout, ref);
printf("*****************************\n");
printf("*****************************\n");*/
printf("*****************************\n");
config_print(stdout,c);
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, ctx->mode);
......@@ -75,31 +78,49 @@ void generate_training_file_stream(FILE *output_file, context *ctx)
mvt_type = movement_type(mvt_code);
mvt_label = movement_label(mvt_code);
/* printf("mvt code = %d\n", mvt_code); */
/* printf("mvt code = %d\n", mvt_code);
printf("mvt type = %d\n", mvt_type); */
movement_print(stdout, mvt_code, ctx->dico_labels);
fprintf(output_file, "%d", mvt_code);
feat_vec_print(output_file, fv);
if((mvt_type == MVT_RIGHT) && (mvt_label == root_label)){ /* sentence is complete */
/* create the root arc */
movement_right_arc(c, mvt_label, 0);
/* pop root from stack */
stack_pop(config_get_stack(c));
/* printf("sentence complete config : ");
config_print(stdout,c); */
}
if(mvt_type == MVT_LEFT){
movement_left_arc(c, mvt_label, 0);
continue;
}
if(mvt_type == MVT_RIGHT){
movement_right_arc(c, mvt_label, 0);
word_buffer_move_right(ref);
if((mvt_label == eos_label)){ /* sentence is complete */
printf("sentence complete\n");
while(movement_reduce(c,0)){
printf("reduce\n");
printf("*****************************\n");
config_print(stdout,c);
}
/* pop eos from stack */
/* stack_pop(config_get_stack(c)); */
/* pop root from stack */
/* stack_pop(config_get_stack(c)); */
config_print(stdout,c);
printf("ref current index = %d\n", word_buffer_get_current_index(ref));
if(word_buffer_is_last(ref)){
printf("it is the end\n");
break;
}
/* change index of dummy word */
word_set_relative_index(stack_top(config_get_stack(c)), word_get_relative_index(word_buffer_b0(config_get_buffer(c))) - 1);
}
continue;
}
if(mvt_type == MVT_REDUCE){
......
......@@ -21,7 +21,7 @@ int movement_type(int mvt)
if(mvt == 0) return MVT_SHIFT; /* 0 is the code of shift */
if(mvt == 1) return MVT_REDUCE; /* 1 is the code of reduce */
if(mvt % 2 == 0) return MVT_LEFT; /* even movements are left movements */
return MVT_LEFT; /* odd movements are right movements */
return MVT_RIGHT; /* odd movements are right movements */
}
int movement_label(int mvt)
......@@ -40,15 +40,21 @@ int movement_left_arc(config *c, int label, float score)
if(word_buffer_is_empty(c->bf)) return 0;
/* word on top of the stack should not have a governor */
printf("word_get_gov_relative_index(stack_top(c->st)) = %d\n", word_get_gov_relative_index(stack_top(c->st)));
/* printf("index word top of stack = %d\n", word_get_relative_index(stack_top(c->st))); */
/* printf("word_get_gov_relative_index(stack_top(c->st)) = %d\n", word_get_gov_relative_index(stack_top(c->st))); */
if(word_get_gov_relative_index(stack_top(c->st)) != -1) return 0;
word *gov = word_buffer_b0(c->bf);
word *dep = stack_top(c->st);
int dist = (word_get_relative_index(gov)) - (word_get_relative_index(dep));
printf("create left arc %d <- %d dist = %d\n", word_get_relative_index(dep), word_get_relative_index(gov), dist);
/* create a new dependency */
word_set_gov(stack_top(c->st), word_get_relative_index(word_buffer_b0(c->bf)));
word_set_label(stack_top(c->st), label);
word_set_gov(dep, dist);
word_set_label(dep, label);
/* depset_add(c->ds, word_buffer_b0(c->bf), label, stack_top(c->st)); */
stack_pop(c->st);
......@@ -62,12 +68,17 @@ int movement_right_arc(config *c, int label, float score)
if(stack_is_empty(c->st)) return 0;
if(word_buffer_is_empty(c->bf)) return 0;
word *gov = stack_top(c->st);
word *dep = word_buffer_b0(c->bf);
int dist = (word_get_relative_index(gov)) - (word_get_relative_index(dep));
printf("create right arc %d -> %d dist = %d\n", word_get_relative_index(gov), word_get_relative_index(dep), dist);
/* create a new dependency */
word_set_gov(word_buffer_b0(c->bf), word_get_relative_index(stack_top(c->st)));
word_set_label(word_buffer_b0(c->bf), label);
word_set_gov(dep, dist);
word_set_label(dep, label);
/* depset_add(c->ds, stack_top(c->st), label, word_buffer_b0(c->bf)); */
stack_push(c->st, word_buffer_b0(c->bf));
word_buffer_move_right(c->bf);
......@@ -91,7 +102,7 @@ int movement_shift(config *c, int stream, float score)
int movement_reduce(config *c, float score)
{
if(stack_is_empty(c->st)) return 0;
if(word_get_gov(stack_top(c->st)) == -1) return 0; /* word on top of stack does not have a governor */
if(word_get_gov(stack_top(c->st)) == 0) return 0; /* word on top of stack does not have a governor */
stack_pop(c->st);
return 1;
}
......
......@@ -7,12 +7,18 @@
int check_all_dependents_of_word_in_ref_are_in_hyp(config *c, word_buffer *ref, int word_index)
{
int dep;
int gov_ref;
int gov_hyp;
for(dep=1; dep < ref->nbelem; dep++){
if(word_get_gov_relative_index(word_buffer_get_word_n(ref, dep)) == word_index){ /* found a dependent of word in ref */
gov_ref = word_get_gov_relative_index(word_buffer_get_word_n(ref, dep));
if(gov_ref == word_index){ /* found a dependent of word in ref */
/* look for a dependency in hyp such that its dependent is dep */
printf("found a dep of word %d in ref, it is %d\n", word_index, dep);
gov_hyp = word_get_gov_relative_index(word_buffer_get_word_n(config_get_buffer(c), dep));
if(word_get_gov_relative_index(word_buffer_get_word_n(config_get_buffer(c), dep)) != word_index) return 0;
printf("gov of %d in hyp is %d\n", dep,gov_hyp);
if(gov_hyp != gov_ref) return 0;
/*
if((dep >= c->ds->length)
|| (c->ds->array[dep].gov == NULL)
......@@ -29,38 +35,42 @@ int oracle_parser_arc_eager(config *c, word_buffer *ref)
word *s0; /* word on top of stack */
word *b0; /* next word in the bufer */
int s0_index, b0_index;
int s0_gov_index, b0_gov_index;
if(!stack_is_empty(c->st) && !word_buffer_is_empty(c->bf)){
s0 = stack_top(c->st);
s0_index = word_get_relative_index(s0);
s0_gov_index = word_get_gov_relative_index(word_buffer_get_word_n(ref, s0_index));
b0 = word_buffer_b0(c->bf);
b0_index = word_get_relative_index(b0);
b0_gov_index = word_get_gov_relative_index(word_buffer_get_word_n(ref, b0_index));
printf("s0_index = %d b0_index = %d\n", s0_index, b0_index);
printf("dans ref gov de s0 (%d) = %d\n", s0_index, word_get_gov_relative_index(word_buffer_get_word_n(ref, s0_index)));
printf("dans ref gov de b0 (%d) = %d\n", b0_index, word_get_gov_relative_index(word_buffer_get_word_n(ref, b0_index)));
/* printf("s0_index = %d b0_index = %d\n", s0_index, b0_index);
printf("dans ref gov de s0 (%d) = %d\n", s0_index, s0_gov_index);
printf("dans ref gov de b0 (%d) = %d\n", b0_index, b0_gov_index);*/
/* LEFT ARC b0 is the governor and s0 the dependent */
if(word_get_gov_relative_index(word_buffer_get_word_n(ref, s0_index)) == b0_index){
if(s0_gov_index == b0_index){
printf("oracle says left\n");
/* printf("oracle says left\n"); */
return movement_left_code(word_get_label(word_buffer_get_word_n(ref, s0_index)));
}
/* RIGHT ARC s0 is the governor and b0 the dependent */
if((word_get_gov_relative_index(word_buffer_get_word_n(ref, b0_index)) == s0_index))
if(b0_gov_index == s0_index){
/* printf("oracle says right\n"); */
return movement_right_code(word_get_label(word_buffer_get_word_n(ref, b0_index)));
}
/* REDUCE */
if((stack_height(c->st) > 2) && check_all_dependents_of_word_in_ref_are_in_hyp(c, ref, s0_index))
/* if(word_get_gov_relative_index(stack_top(c->st)) != -1) */
printf("all dep in ref are in hyp = %d\n", check_all_dependents_of_word_in_ref_are_in_hyp(c, ref, s0_index));
if((stack_height(c->st) > 2)
&& check_all_dependents_of_word_in_ref_are_in_hyp(c, ref, s0_index)
&& (word_get_gov(stack_top(c->st)) != 0)) /* word on top of the stack has a goveror */
{
return MVT_REDUCE;
/* if(check_all_dependents_of_word_in_ref_are_in_hyp(c, ref, s0_index))
return MVT_REDUCE;*/
}
/* SHIFT */
return MVT_SHIFT;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment