simple_decoder_tagparser_arc_eager.c 4.57 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<unistd.h>
#include<getopt.h>
#include"context.h"
#include"movement_tagparser_arc_eager.h"
#include"feat_fct.h"
#include"config2feat_vec.h"
#include"feature_table.h"
#include"dico.h"

void add_signature_to_words_in_word_buffer(word_buffer *bf, form2pos *f2p)
{
  int i;
  word *w;

  for(i = word_buffer_get_nbelem(bf) - 1; i >=0  ; i--){
    w = word_buffer_get_word_n(bf, i);
    if(word_get_signature(w) != -1) break;
    w->signature = form2pos_get_signature(f2p, w->form);
  }
}



void print_word_buffer_tagparser(config *c, dico *dico_labels, dico *dico_pos)
{
  int i;
  word *dep;
  char *label;
  char *pos;
  
  for(i=0; i < config_get_buffer(c)->nbelem; i++){
    dep = word_buffer_get_word_n(config_get_buffer(c), i);
    printf("%s\t", word_get_input(dep));
    pos = (word_get_pos(dep) == -1)? NULL : dico_int2string(dico_pos, word_get_pos(dep));
    if(pos != NULL)
      printf("%s\t_\t", pos) ;
    else
      printf("_\t_\t");

    printf("%d\t", word_get_gov(dep));
    label = (word_get_label(dep) == -1)? NULL : dico_int2string(dico_labels, word_get_label(dep));
    if(label != NULL)
      printf("%s\t", label) ;
    else
      printf("_\t");
    if(word_get_sent_seg(dep) == 1)
      printf("1\n") ;
    else
      printf("0\n");
  }
}


void simple_decoder_tagparser_arc_eager(context *ctx)
{
  FILE *f = (ctx->input_filename)? myfopen(ctx->input_filename, "r") : stdin;
  feature_table *ft = feature_table_load(ctx->perc_model_filename, ctx->verbose);
  int root_label;
  int mvt_code;
  int mvt_type;
  int mvt_label;
  float max;
  feat_vec *fv = feat_vec_new(feature_types_nb);
  config *c = NULL;
  int result;
  float entropy;
  float delta;
  int argmax1, argmax2;
  float max1, max2;
  int index;
  
  root_label = dico_string2int(ctx->dico_labels, ctx->root_label);
  if(root_label == -1) root_label = 0;
  
  c = config_initial(f, ctx->mcd_struct, 5);
  while(!config_is_terminal(c)){
    if(ctx->f2p)
      add_signature_to_words_in_word_buffer(c->bf, ctx->f2p);

    config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE);
    mvt_code = feature_table_argmax(fv, ft, &max);
85
86
    mvt_type = movement_tagparse_type(mvt_code);
    mvt_label = movement_tagparse_label(mvt_code);
87
88
89
90
91
92
93
94
    
    if(ctx->trace_mode){
      index = word_get_index(word_buffer_b0(config_get_buffer(c)));
      fprintf(stdout, "%d\t", index);

      stack_print(stdout, c->st);
      fprintf(stdout, "\t");
      
95
      movement_tagparse_print(stdout, mvt_code, ctx->dico_labels, ctx->dico_postags);        
96
97
98
99
100
101
102
103
104
105
106
107
      fprintf(stdout, "\t");
      feature_table_argmax_1_2(fv, ft, &argmax1, &max1, &argmax2, &max2);
      printf("%f\n", max1 - max2);

    }
    
    if(ctx->debug_mode){
      fprintf(stdout, "***********************************\n");
      config_print(stdout, c);      
      entropy = feature_table_entropy(fv, ft);
      /* delta = feature_table_diff_scores(fv, ft); */
      feature_table_argmax_1_2(fv, ft, &argmax1, &max1, &argmax2, &max2);
108
      movement_tagparse_print(stdout, argmax1, ctx->dico_labels, ctx->dico_postags);         
109
      printf(":\t%f\n", max1);
110
      movement_tagparse_print(stdout, argmax2, ctx->dico_labels, ctx->dico_postags);         
111
112
113
114
115
116
117
      printf(":\t%f\n", max2);
      printf("delta = %f\n", max1 - max2);

      /* delta = feature_table_first_second(fv, ft); */
       /* printf("entropy = %f delta = %f\n", entropy, delta);  */
       printf("entropy = %f\n",entropy); 
      
118
      /* movement_tagparse_print(stdout, mvt_code, ctx->dico_labels);          */
119
120
121
122
    }
    result = 0;
    switch(mvt_type){
    case MVT_POSTAG :
123
      result = movement_tagparse_add_pos(c, max, mvt_label);
124
125
      break;
    case MVT_LEFT :
126
      result = movement_tagparse_left_arc(c, mvt_label, max);
127
128
      break;
    case MVT_RIGHT:
129
      result = movement_tagparse_right_arc(c, mvt_label, max);
130
131
      break;
    case MVT_REDUCE:
132
      result = movement_tagparse_reduce(c, max);
133
134
      break;
    case MVT_ROOT:
135
      result = movement_tagparse_root(c, max, root_label);
136
137
      break;
    case MVT_EOS:
138
      result = movement_tagparse_eos(c, max);
139
140
      break;
    case MVT_SHIFT:
141
      result = movement_tagparse_shift(c, 1, max);
142
143
144
145
146
147
    }

    if(result == 0){
      if(ctx->debug_mode){
	fprintf(stdout, "WARNING : movement cannot be executed doing a SHIFT instead !\n");
      }
148
      movement_tagparse_shift(c, 1, max);
149
150
151
152
153
154
155
156
157
158
159
160
    }
  }
  
  if(!ctx->trace_mode)
    print_word_buffer_tagparser(c, ctx->dico_labels, ctx->dico_postags);
  
  config_free(c); 
  feat_vec_free(fv);
  feature_table_free(ft);
  if(ctx->input_filename)
    fclose(f);
}