Commit 85ee246d authored by Alexis Nasr's avatar Alexis Nasr
Browse files

still working on arc eager version, not stable yet

parent 65441c51
......@@ -10,7 +10,7 @@
#define MCD_WF_NB 36
#define MCD_WF_INDEX 0
#define MCD_WF_ID 0
#define MCD_WF_FORM 1
#define MCD_WF_LEMMA 2
#define MCD_WF_CPOS 3
......@@ -53,7 +53,7 @@
#define mcd_get_dico_label(m) (m)->dico_array[MCD_WF_LABEL]
#define mcd_get_index_col(m) (m)->wf2col[MCD_WF_INDEX]
#define mcd_get_index_col(m) (m)->wf2col[MCD_WF_ID]
#define mcd_get_form_col(m) (m)->wf2col[MCD_WF_FORM]
#define mcd_get_lemma_col(m) (m)->wf2col[MCD_WF_LEMMA]
#define mcd_get_cpos_col(m) (m)->wf2col[MCD_WF_CPOS]
......
......@@ -3,13 +3,13 @@
#include "mcd.h"
#define word_get_index(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_INDEX]
#define word_get_id(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_ID]
#define word_get_form(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_FORM]
#define word_get_lemma(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LEMMA]
#define word_get_cpos(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_CPOS]
#define word_get_pos(w) ((w) == NULL) ? -1 : ((w)->wf_array[MCD_WF_POS])
#define word_get_feats(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_FEATS]
#define word_get_gov(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_GOV]
#define word_get_gov(w) ((w) == NULL) ? 0 : (w)->wf_array[MCD_WF_GOV]
#define word_get_label(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LABEL]
#define word_get_stag(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_STAG]
#define word_get_sent_seg(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_SENT_SEG]
......@@ -39,11 +39,12 @@
#define word_get_X(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_X]
#define word_get_Y(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_Y]
#define word_get_Z(w) ((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_Z]
#define word_get_input(w) ((w) == NULL) ? NULL : (w)->input
#define word_get_signature(w) ((w) == NULL) ? -1 : (w)->signature
#define word_get_U1(w) ((w) == NULL) ? -1 : (w)->U1
#define word_get_relative_index(w) ((w) == NULL) ? -1 : (w)->relative_index
#define word_get_index(w) ((w) == NULL) ? -1 : (w)->index
#define word_set_index(w, val) (w)->wf_array[MCD_WF_INDEX] = (val)
#define word_set_id(w, val) (w)->wf_array[MCD_WF_ID] = (val)
#define word_set_form(w, val) (w)->wf_array[MCD_WF_FORM] = (val)
#define word_set_lemma(w, val) (w)->wf_array[MCD_WF_LEMMA] = (val)
#define word_set_cpos(w, val) (w)->wf_array[MCD_WF_CPOS] = (val)
......@@ -80,7 +81,7 @@
#define word_set_Y(w, val) (w)->wf_array[MCD_WF_Y] = (val)
#define word_set_Z(w, val) (w)->wf_array[MCD_WF_Z] = (val)
#define word_set_signature(w, val) (w)->signature = (val)
#define word_set_relative_index(w, val) (w)->relative_index = (val)
#define word_set_index(w, val) (w)->index = (val)
typedef struct _word {
int wf_array[MCD_WF_NB]; /* array containing the codes corresponding to the different word features */
......@@ -89,7 +90,7 @@ typedef struct _word {
int signature; /* pos tags that this form can have (represented as a boolean string) */
int label;
char *form;
int relative_index;
int index;
} word;
word *word_new(char *input);
......@@ -104,6 +105,6 @@ word *word_read(FILE *f, mcd *mcd_struct);
word *word_parse_buffer(char *buffer, mcd *mcd_struct);
int word_is_eos(word *w, mcd *mcd_struct);
int word_get_gov_relative_index(word *w);
int word_get_gov_index(word *w);
#endif
......@@ -65,9 +65,9 @@ form2pos *form2pos_read(char *filename)
int form2pos_get_signature(form2pos *f2p, char *form)
{
/* if(form == NULL)
if(form == NULL)
return -1;
else*/
else
return hash_get_val(f2p->h_form2signature, form);
}
......
......@@ -231,11 +231,11 @@ mcd *mcd_read(char *mcd_filename, int verbose)
mcd *mcd_build_conll07(void)
{
mcd *m = mcd_new(8);
m->wf[0]=MCD_WF_INDEX;
m->wf[0]=MCD_WF_ID;
m->wf_str[0]=strdup("INDEX");
m->representation[0]= MCD_REPRESENTATION_INT;
m->filename[0] = strdup("_");
m->wf2col[MCD_WF_INDEX] = 0;
m->wf2col[MCD_WF_ID] = 0;
m->wf[1]=MCD_WF_FORM;
m->wf_str[1]=strdup("FORM");
......@@ -332,11 +332,11 @@ mcd *mcd_build_ifpls(void)
{
mcd *m = mcd_new(6);
m->wf[0]=MCD_WF_INDEX;
m->wf[0]=MCD_WF_ID;
m->wf_str[0]=strdup("INDEX");
m->representation[0]= MCD_REPRESENTATION_INT;
m->filename[0] = strdup("_");
m->wf2col[MCD_WF_INDEX] = 0;
m->wf2col[MCD_WF_ID] = 0;
m->wf[1]=MCD_WF_FORM;
m->wf_str[1]=strdup("FORM");
......@@ -389,7 +389,7 @@ dico_vec *mcd_build_dico_vec(mcd *mcd_struct)
int mcd_wf_code(char *wf)
{
if(!strcmp(wf, "INDEX")) return MCD_WF_INDEX;
if(!strcmp(wf, "INDEX")) return MCD_WF_ID;
if(!strcmp(wf, "FORM")) return MCD_WF_FORM;
if(!strcmp(wf, "LEMMA")) return MCD_WF_LEMMA;
if(!strcmp(wf, "CPOS")) return MCD_WF_CPOS;
......
......@@ -48,7 +48,7 @@ void sentence_add_word(sentence *s, word *w)
s->length++;
s->words = (word **)realloc(s->words, s->length * sizeof(word *));
s->words[s->length -1] = w;
word_set_relative_index(w, s->length -1);
word_set_index(w, s->length -1);
}
void sentence_free(sentence *s)
......
......@@ -20,7 +20,7 @@ word *word_new(char *input)
w->wf_array[MCD_WF_GOV] = 0;
w->form = NULL;
w->relative_index = -1;
w->index = -1;
w->signature = -1;
return w;
}
......@@ -102,8 +102,8 @@ word *word_create_dummy(mcd *mcd_struct)
word *w = word_new(NULL);
/* int type; */
w->wf_array[MCD_WF_INDEX] = 0;
w->relative_index = 0;
w->wf_array[MCD_WF_ID] = 0;
w->index = 0;
/* for(type = 1; type < MCD_WF_NB; type++)
w->wf_array[type] = -1;*/
/* if(mcd_struct->wf2col[type] != -1)
......@@ -120,8 +120,8 @@ void word_print2(FILE *f, word *w)
printf("form = %d\t", word_get_form(w));
printf("lemma = %d\t", word_get_lemma(w));
printf("pos = %d\t", word_get_pos(w));
printf("index = %d\t", word_get_index(w));
printf("rel index = %d\n", word_get_relative_index(w));
printf("index = %d\t", word_get_id(w));
printf("rel index = %d\n", word_get_index(w));
}
void word_print(FILE *f, word *w)
......@@ -140,16 +140,10 @@ int word_is_eos(word *w, mcd *mcd_struct)
return word_get_sent_seg(w);
}
int word_get_gov_relative_index(word *w)
int word_get_gov_index(word *w)
{
int index;
if(word_get_gov(w) == 0) return -1;
/* printf("in word_get_gov_rel_index(%d)\n", word_get_relative_index(w));
printf("gov = %d\n", word_get_gov(w)); */
index = (word_get_relative_index(w)) + (word_get_gov(w));
/* printf("index = %d\n", index); */
return index;
/* return word_get_relative_index(w) + word_get_gov(w); */
index = (word_get_index(w)) + (word_get_gov(w));
return index;
}
......@@ -49,19 +49,19 @@ void word_buffer_print_compact(FILE *f, word_buffer *wb)
{
word *w;
w = word_buffer_bm3(wb);
if(w){ fprintf(f, "%d:%s ", word_get_relative_index(w), w->form);}
if(w){ fprintf(f, "%d:%s ", word_get_index(w), w->form);}
w = word_buffer_bm2(wb);
if(w){ fprintf(f, "%d:%s ", word_get_relative_index(w), w->form);}
if(w){ fprintf(f, "%d:%s ", word_get_index(w), w->form);}
w = word_buffer_bm1(wb);
if(w){ fprintf(f, "%d:%s ", word_get_relative_index(w), w->form);}
if(w){ fprintf(f, "%d:%s ", word_get_index(w), w->form);}
w = word_buffer_b0(wb);
if(w){ fprintf(f, "[%d:%s] ", word_get_relative_index(w), w->form);}
if(w){ fprintf(f, "[%d:%s] ", word_get_index(w), w->form);}
w = word_buffer_b1(wb);
if(w){ fprintf(f, "%d:%s ", word_get_relative_index(w), w->form);}
if(w){ fprintf(f, "%d:%s ", word_get_index(w), w->form);}
w = word_buffer_b2(wb);
if(w){ fprintf(f, "%d:%s ", word_get_relative_index(w), w->form);}
if(w){ fprintf(f, "%d:%s ", word_get_index(w), w->form);}
w = word_buffer_b3(wb);
if(w){ fprintf(f, "%d:%s ", word_get_relative_index(w), w->form);}
if(w){ fprintf(f, "%d:%s ", word_get_index(w), w->form);}
fprintf(f, "\n");
}
......@@ -83,7 +83,7 @@ int word_buffer_add(word_buffer *wb, word *w)
wb->array = (word **)realloc(wb->array, wb->size * sizeof(word *));
}
wb->array[wb->nbelem] = w;
word_set_relative_index(w, wb->nbelem);
word_set_index(w, wb->nbelem);
wb->nbelem++;
return wb->nbelem - 1;
......@@ -116,6 +116,7 @@ int word_buffer_read_next_word(word_buffer *wb)
word *w = NULL;
w = word_read(wb->input_file, wb->mcd_struct);
/* if((w) && (w->input)) printf("## %s\n", w->input); */
if(w == NULL) return -1;
word_buffer_add(wb, w);
return wb->nbelem - 1;
......@@ -141,6 +142,8 @@ int word_buffer_move_left(word_buffer *wb)
int word_buffer_end(word_buffer *wb)
{
/* printf("in word_buffer_end current index = %d nb elem = %d\n", wb->current_index, wb->nbelem); */
return (wb->current_index >= wb->nbelem)? 1 : 0;
}
......@@ -165,7 +168,7 @@ int word_buffer_read_sentence(word_buffer *wb)
/* fprintf(stderr, "%s", buffer); */
if((buffer[0] == '\n') || (buffer[0] == ' ') || (buffer[0] == '\t')) break; /* end of the sentence */
w = word_parse_buffer(buffer, word_buffer_get_mcd(wb));
word_set_relative_index(w, index);
word_set_index(w, index);
index++;
word_buffer_add(wb, w);
if(word_is_eos(w, word_buffer_get_mcd(wb))) break;
......
......@@ -30,7 +30,7 @@ config *config_new(FILE *f, mcd *mcd_struct, int lookahead)
w = word_read(c->f, c->mcd_struct);
if(w == NULL) return NULL;
word_set_relative_index(w, c->current_index);
word_set_index(w, c->current_index);
c->current_index++;
queue_add(c->bf, w);
return w;
......@@ -48,7 +48,7 @@ void config_free(config *c)
int config_is_terminal(config *c)
{
return word_buffer_is_last(c->bf);
return word_buffer_end(c->bf);
}
config *config_initial(FILE *f, mcd *mcd_struct, int lookahead)
......@@ -102,12 +102,12 @@ void config_add_mvt(config *c, int mvt)
void config_print(FILE *f, config *c)
{
word *b0 = NULL;
word *s0 = NULL;
/* word *b0 = NULL; */
/* word *s0 = NULL; */
if(c){
if(!stack_is_empty(c->st))
s0 = stack_elt_n(c->st, 0);
b0 = word_buffer_b0(c->bf);
/* s0 = stack_elt_n(c->st, 0); */
/* b0 = word_buffer_b0(c->bf); */
/* if(s0) { printf("s0 = "); word_print2(stdout, s0);} */
/* if(b0) { printf("b0 = "); word_print2(stdout, b0);} */
......
......@@ -44,10 +44,10 @@ void depset_add(depset *d, word *gov, int label, word *dep)
int new_length;
if(gov == NULL || dep == NULL) return;
word *max = (word_get_relative_index(gov) > word_get_relative_index(dep)) ? gov : dep;
word *max = (word_get_index(gov) > word_get_index(dep)) ? gov : dep;
if(word_get_relative_index(max) >= d->length){
new_length = word_get_relative_index(max) + 1;
if(word_get_index(max) >= d->length){
new_length = word_get_index(max) + 1;
d->array = (dependency *)realloc(d->array, new_length * sizeof(dependency));
for(i=d->length; i < new_length; i++){
d->array[i].gov = NULL;
......@@ -56,9 +56,9 @@ void depset_add(depset *d, word *gov, int label, word *dep)
}
d->length = new_length;
}
d->array[word_get_relative_index(dep)].gov = gov;
d->array[word_get_relative_index(dep)].dep = dep;
d->array[word_get_relative_index(dep)].label = label;
d->array[word_get_index(dep)].gov = gov;
d->array[word_get_index(dep)].dep = dep;
d->array[word_get_index(dep)].label = label;
}
void depset_print(FILE *f, depset *d)
......@@ -66,7 +66,7 @@ void depset_print(FILE *f, depset *d)
int i;
for(i=0; i < d->length; i++){
if((d->array[i].gov) && (d->array[i].dep))
fprintf(f, "(%d, %d, %d) ", word_get_relative_index(d->array[i].dep), d->array[i].label, word_get_relative_index(d->array[i].gov));
fprintf(f, "(%d, %d, %d) ", word_get_index(d->array[i].dep), d->array[i].label, word_get_index(d->array[i].gov));
}
fprintf(f, "\n");
}
......@@ -78,7 +78,7 @@ void depset_print2(FILE *f, depset *d, dico *dico_labels)
char *label;
for(i=1; i < d->length; i++){
if((d->array[i].gov) && (d->array[i].dep)){
distance = word_get_relative_index(d->array[i].gov) - word_get_relative_index(d->array[i].dep);
distance = word_get_index(d->array[i].gov) - word_get_index(d->array[i].dep);
/* fprintf(f, "%s\t%d\t%s\n", d->array[i].dep->input, distance, dico_int2string(dico_labels, d->array[i].label)); */
label = dico_int2string(dico_labels, d->array[i].label);
......@@ -103,10 +103,10 @@ void depset_print3(FILE *f, depset *d, dico *dico_labels)
for(i=1; i < d->length; i++){
if((d->array[i].gov) && (d->array[i].dep)){
if(d->array[i].label == root_code)
fprintf(f, "%d\t%s\t%d\t%s\n", word_get_relative_index(d->array[i].dep), d->array[i].dep->input, 0, dico_int2string(dico_labels, d->array[i].label));
fprintf(f, "%d\t%s\t%d\t%s\n", word_get_index(d->array[i].dep), d->array[i].dep->input, 0, dico_int2string(dico_labels, d->array[i].label));
else{
distance = word_get_relative_index(d->array[i].gov) - word_get_relative_index(d->array[i].dep);
fprintf(f, "%d\t%s\t%d\t%s\n", word_get_relative_index(d->array[i].dep), d->array[i].dep->input, distance, dico_int2string(dico_labels, d->array[i].label));
distance = word_get_index(d->array[i].gov) - word_get_index(d->array[i].dep);
fprintf(f, "%d\t%s\t%d\t%s\n", word_get_index(d->array[i].dep), d->array[i].dep->input, distance, dico_int2string(dico_labels, d->array[i].label));
}
}
}
......@@ -129,9 +129,9 @@ void depset_print_new_index(FILE *f, depset *d, dico *dico_labels)
for(i=1; i < d->length; i++){
if((d->array[i].gov) && (d->array[i].dep)){
/* fprintf(f, "%d\t", word_get_relative_index(d->array[i].dep)); */
fprintf(f, "%d\t", word_get_relative_index(d->array[i].dep));
fprintf(f, "%s\t%d\t%s\n", skip_index(d->array[i].dep->input), word_get_relative_index(d->array[i].gov), dico_int2string(dico_labels, d->array[i].label));
/* fprintf(f, "%d\t", word_get_index(d->array[i].dep)); */
fprintf(f, "%d\t", word_get_index(d->array[i].dep));
fprintf(f, "%s\t%d\t%s\n", skip_index(d->array[i].dep->input), word_get_index(d->array[i].gov), dico_int2string(dico_labels, d->array[i].label));
}
}
fprintf(f, "\n");
......@@ -148,8 +148,8 @@ int depset_compare(depset *d1, depset *d2)
if(d1->length != d2->length){ fprintf(stdout, "fail\n"); return 0;}
for(i=0; i < d1->length; i++){
for(j=0; j < d2->length; j++){
if((word_get_relative_index(d1->array[i].gov) == word_get_relative_index(d2->array[j].gov))
&& (word_get_relative_index(d1->array[i].dep) == word_get_relative_index(d2->array[j].dep))
if((word_get_index(d1->array[i].gov) == word_get_index(d2->array[j].gov))
&& (word_get_index(d1->array[i].dep) == word_get_index(d2->array[j].dep))
&& (d1->array[i].label == d2->array[j].label)) break;
}
if(j == d2->length){
......
......@@ -8,6 +8,9 @@
/* word features */
/* words in the stack */
int s0g(config *c) {return word_get_gov(stack_s0(config_get_stack(c)));}
int s0sf(config *c) {return word_get_label(stack_s0(config_get_stack(c)));}
int s0f(config *c) {return word_get_form(stack_s0(config_get_stack(c)));}
int s0l(config *c) {return word_get_lemma(stack_s0(config_get_stack(c)));}
int s0c(config *c) {return word_get_cpos(stack_s0(config_get_stack(c)));}
......@@ -44,6 +47,9 @@ int s0Z(config *c) {return word_get_Z(stack_s0(config_get_stack(c)));}
int s0U1(config *c) {return word_get_U1(stack_s0(config_get_stack(c)));}
int s0sgn(config *c) {return word_get_signature(stack_s0(config_get_stack(c)));}
int s1g(config *c) {return word_get_gov(stack_s1(config_get_stack(c)));}
int s1sf(config *c) {return word_get_label(stack_s1(config_get_stack(c)));}
int s1f(config *c) {return word_get_form(stack_s1(config_get_stack(c)));}
int s1l(config *c) {return word_get_lemma(stack_s1(config_get_stack(c)));}
int s1c(config *c) {return word_get_cpos(stack_s1(config_get_stack(c)));}
......@@ -400,8 +406,8 @@ int ldep_s0r(config *c){
int i;
if(top){
if(word_get_relative_index(top) >= c->ds->length) return -1;
for(i=word_get_relative_index(top); i > 0; i--)
if(word_get_index(top) >= c->ds->length) return -1;
for(i=word_get_index(top); i > 0; i--)
if(c->ds->array[i].gov == top)
return i;
}
......@@ -422,7 +428,7 @@ int rdep_s0r(config *c){
int i;
if(top)
for(i=word_get_relative_index(top); i < c->ds->length; i++)
for(i=word_get_index(top); i < c->ds->length; i++)
if(c->ds->array[i].gov == top)
return i;
return -1;
......@@ -442,8 +448,8 @@ int ldep_b0r(config *c){
int i;
if(top){
if(word_get_relative_index(top) >= c->ds->length) return -1;
for(i=word_get_relative_index(top); i > 0; i--)
if(word_get_index(top) >= c->ds->length) return -1;
for(i=word_get_index(top); i > 0; i--)
if(c->ds->array[i].gov == top)
return i;
}
......@@ -468,7 +474,7 @@ int rdep_b0r(config *c){
int i;
if(top)
for(i=word_get_relative_index(top); i < c->ds->length; i++)
for(i=word_get_index(top); i < c->ds->length; i++)
if(c->ds->array[i].gov == top)
return i;
return -1;
......@@ -545,7 +551,7 @@ int dist_s0_b0(config *c){
if(stack_is_empty(c->st) || word_buffer_is_empty(c->bf))
return 0;
dist = word_get_relative_index(word_buffer_b0(c->bf)) - word_get_relative_index(stack_top(c->st));
dist = word_get_index(word_buffer_b0(c->bf)) - word_get_index(stack_top(c->st));
return (abs(dist) > 6)? 6 : dist;
}
......
......@@ -6,6 +6,9 @@
typedef int (*feat_fct) (config *c);
/* word features */
int s0g(config *c);
int s0sf(config *c);
int s0f(config *c);
int s0l(config *c);
int s0c(config *c);
......@@ -44,6 +47,10 @@ int s0sgn(config *c);
int s0r(config *c);
int s1g(config *c);
int s1sf(config *c);
int s1f(config *c);
int s1l(config *c);
int s1c(config *c);
......
......@@ -33,6 +33,9 @@ feat_lib *feat_lib_build(void)
{
feat_lib *fl = feat_lib_new();
feat_lib_add(fl, FEAT_TYPE_LABEL, (char *)"s0sf", s0sf);
feat_lib_add(fl, FEAT_TYPE_INT , (char *)"s0g", s0g);
feat_lib_add(fl, FEAT_TYPE_FORM, (char *)"s0f", s0f);
feat_lib_add(fl, FEAT_TYPE_LEMMA, (char *)"s0l", s0l);
feat_lib_add(fl, FEAT_TYPE_CPOS, (char *)"s0c", s0c);
......@@ -69,6 +72,9 @@ feat_lib *feat_lib_build(void)
feat_lib_add(fl, FEAT_TYPE_INT_3, (char *)"s0U1", s0U1);
feat_lib_add(fl, FEAT_TYPE_INT, (char *)"s0sgn", s0sgn);
feat_lib_add(fl, FEAT_TYPE_FORM, (char *)"s1g", s1g);
feat_lib_add(fl, FEAT_TYPE_FORM, (char *)"s1sf", s1sf);
feat_lib_add(fl, FEAT_TYPE_FORM, (char *)"s1f", s1f);
feat_lib_add(fl, FEAT_TYPE_LEMMA, (char *)"s1l", s1l);
feat_lib_add(fl, FEAT_TYPE_CPOS, (char *)"s1c", s1c);
......
......@@ -54,38 +54,39 @@ void generate_training_file_stream(FILE *output_file, context *ctx)
int eos_label = dico_string2int(ctx->dico_labels, "eos");
word_buffer *ref = word_buffer_load_mcf(ctx->input_filename, ctx->mcd_struct);
FILE *mcf_file = myfopen(ctx->input_filename, "r");
int start_sentence_index = 1;
/* create an mcd that corresponds to ctx->mcd_struct, but without gov and label */
/* the idea is to ignore syntax in the mcf file that will be read */
/* it is ugly !!! */
mcd *mcd_struct_hyp = mcd_copy(ctx->mcd_struct);
mcd_remove_wf_column(mcd_struct_hyp, MCD_WF_GOV);
mcd_remove_wf_column(mcd_struct_hyp, MCD_WF_LABEL);
c = config_initial(mcf_file, mcd_struct_hyp, 5);
while(!word_buffer_end(ref)){
/* printf("************ REF ************\n");
/*printf("************ REF ************\n");
word_buffer_print(stdout, ref);
printf("*****************************\n");*/
printf("*****************************\n");*/
printf("*****************************\n");
config_print(stdout,c);
/* printf("*****************************\n"); */
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, ctx->mode);
/* feat_vec_print(stdout, fv); */
mvt_code = oracle_parser_arc_eager(c, ref);
mvt_code = oracle_parser_arc_eager(c, ref, start_sentence_index);
mvt_type = movement_type(mvt_code);
mvt_label = movement_label(mvt_code);
/* printf("mvt code = %d\n", mvt_code);
printf("mvt type = %d\n", mvt_type); */
movement_print(stdout, mvt_code, ctx->dico_labels);
/* config_print(stdout,c); */
/* movement_print(stdout, mvt_code, ctx->dico_labels); */
fprintf(output_file, "%d", mvt_code);
feat_vec_print(output_file, fv);
if(mvt_type == MVT_LEFT){
movement_left_arc(c, mvt_label, 0);
continue;
......@@ -94,35 +95,21 @@ void generate_training_file_stream(FILE *output_file, context *ctx)
if(mvt_type == MVT_RIGHT){
movement_right_arc(c, mvt_label, 0);
word_buffer_move_right(ref);
if((mvt_label == eos_label)){ /* sentence is complete */
printf("sentence complete\n");
while(movement_reduce(c,0)){
printf("reduce\n");
printf("*****************************\n");
config_print(stdout,c);
}
/* pop eos from stack */
/* stack_pop(config_get_stack(c)); */
/* pop root from stack */
/* stack_pop(config_get_stack(c)); */
config_print(stdout,c);
sentence_nb++;
start_sentence_index = word_get_index(word_buffer_b0(config_get_buffer(c))) - 1;
/* printf("%d\n", start_sentence_index); */
printf("ref current index = %d\n", word_buffer_get_current_index(ref));
/* printf("*****************************\n"); */
/* config_print(stdout,c); */
if(word_buffer_is_last(ref)){
printf("it is the end\n");
/* printf("it is the end\n"); */
break;
}
/* change index of dummy word */
word_set_relative_index(stack_top(config_get_stack(c)), word_get_relative_index(word_buffer_b0(config_get_buffer(c))) - 1);
}
}
continue;
}
if(mvt_type == MVT_REDUCE){
movement_reduce(c, 0);
continue;
......@@ -133,61 +120,7 @@ void generate_training_file_stream(FILE *output_file, context *ctx)
continue;
}
}
}
#if 0
void generate_training_file_buffer(FILE *output_file, context *ctx)
{
config *c;
int mvt_code;
char mvt_type;
int mvt_label;
feat_vec *fv = feat_vec_new(feature_types_nb);
sentence *ref = NULL;
int sentence_nb = 0;
FILE *conll_file = myfopen(ctx->input_filename, "r");
FILE *conll_file_ref = myfopen(ctx->input_filename, "r");
c = config_initial(conll_file, ctx->mcd_struct, 0);
while((ref = sentence_read(conll_file_ref, ctx->mcd_struct)) && (sentence_nb < ctx->sent_nb)){
/* sentence_print(stdout, ref, NULL); */
word_buffer_read_sentence(c->bf);
while(!config_is_terminal(c)){
/* config_print(stdout,c); */
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, ctx->mode);
mvt_code = oracle_parser(c, ref);
mvt_type = movement_type(mvt_code);
mvt_label = movement_label(mvt_code);
/* printf("mvt type = %d mvt label = %d\n", mvt_type, mvt_label); */
fprintf(output_file, "%d", mvt_code);