Skip to content
Snippets Groups Projects
Commit ba20febe authored by Alexis Nasr's avatar Alexis Nasr
Browse files

added a trace mode for tagger and parser. added confidence measure for perceptron.

parent cdc23c53
Branches
No related tags found
No related merge requests found
......@@ -180,8 +180,12 @@ int main(int argc, char *argv[])
fprintf(output_file, "_");
fprintf(output_file, "\t");
if(gov_col)
if(gov_col){
if(word_get_gov(w) == 0)
fprintf(output_file, "0\t");
else
fprintf(output_file, "%d\t", word_get_gov(w) + index);
}
else
fprintf(output_file, "_\t");
......
......@@ -53,6 +53,12 @@ target_link_libraries(maca_trans_parser_arc_eager_mcf2cff transparse)
target_link_libraries(maca_trans_parser_arc_eager_mcf2cff maca_common)
install (TARGETS maca_trans_parser_arc_eager_mcf2cff DESTINATION bin)
add_executable(compare_traces ./src/compare_traces.c)
target_link_libraries(compare_traces perceptron)
target_link_libraries(compare_traces transparse)
target_link_libraries(compare_traces maca_common)
install (TARGETS compare_traces DESTINATION bin)
add_executable(maca_trans_parser ./src/maca_trans_parser.c)
target_link_libraries(maca_trans_parser perceptron)
target_link_libraries(maca_trans_parser transparse)
......
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
typedef struct {
int index;
char *stack;
char *movement;
float score;
} configuration;
typedef struct {
int size; /* size of the array used to store words */
int nbelem; /* number of words in the buffer */
configuration **array; /* array to store configurations */
} trace;
int configuration_equal(configuration *c1, configuration *c2)
{
if(c1->index != c2->index) return 0;
if(strcmp(c1->stack, c2->stack)) return 0;
return 1;
}
configuration *configuration_new(int index, char *stack, char *movement, float score)
{
configuration *c = malloc(sizeof(configuration));
if(c == NULL) return NULL;
c->index = index;
c->stack = stack;
c->movement = movement;
c->score = score;
return c;
}
void configuration_print(FILE *f, configuration *c)
{
fprintf(f, "%d\t%s\t%s\t%f\n", c->index, c->stack, c->movement, c->score);
}
trace *trace_new()
{
trace *t = (trace *)malloc(sizeof(trace));
t->size = 10;
t->nbelem = 0;
t->array = (configuration **)malloc(t->size * sizeof(configuration *));
return t;
}
int trace_add(trace *t, configuration *c)
{
if(t->nbelem == t->size -1){
t->size = 2 * (t->size + 1);
t->array = (configuration **)realloc(t->array, t->size * sizeof(configuration *));
}
t->array[t->nbelem] = c;
t->nbelem++;
return t->nbelem - 1;
}
void trace_print(FILE *f, trace *t)
{
int i;
for(i=0; i < t->nbelem; i++)
configuration_print(f, t->array[i]);
}
trace *trace_load(char *trace_filename)
{
FILE *f;
int index;
float score;
char stack[10000];
char movement[100];
char buffer[20000];
if(trace_filename == NULL)
f = stdin;
else
f = fopen(trace_filename, "r");
if(f == NULL){
fprintf(stderr, "cannot open file %s aborting\n", trace_filename);
exit(1);
}
trace *t = trace_new();
/* while(!feof(f)){ */
while(fgets(buffer, 20000, f)){
int r = sscanf(buffer, "%d\t%[^\t]\t%[^\t]\t%f\n", &index, stack, movement, &score);
if(r == 4){
/* printf("index = %d stack = %s movement = %s score = %f\n", index, stack, movement, score); */
trace_add(t, configuration_new(index, strdup(stack), strdup(movement), score));
}
}
if(trace_filename != NULL)
fclose(f);
return t;
}
void trace_compare(trace *ref, trace *hyp)
{
int index_hyp = 0;
int index_ref = 0;
configuration *c_ref, *c_hyp;
int status = 1;
while(1){
c_ref = ref->array[index_ref];
c_hyp = hyp->array[index_hyp];
if(!c_hyp || !c_ref) break;
printf("REF ");
configuration_print(stdout, c_ref);
printf("HYP ");
configuration_print(stdout, c_hyp);
if(configuration_equal(c_ref, c_hyp)){
status = 1;
fprintf(stdout, "EQUAL\n");
if(strcmp(c_ref->movement, c_hyp->movement)){
/* fprintf(stdout, "BAAD\t%s\t%s\t%f\n", c_ref->movement, c_hyp->movement, c_hyp->score); */
/* fprintf(stdout, "BAAD\t%s\t%f\n", c_hyp->movement, c_hyp->score); */
}
else{
/* fprintf(stdout, "GOOD\t%s\t%s\t%f\n", c_ref->movement, c_hyp->movement, c_hyp->score); */
/* fprintf(stdout, "GOOD\t%s\t%f\n", c_hyp->movement, c_hyp->score); */
}
index_hyp++;
index_ref++;
}
else{
fprintf(stdout, "DIFFERENT\n");
status = 0;
if(c_ref->index > c_hyp->index)
index_hyp++;
else if(c_ref->index < c_hyp->index)
index_ref++;
else{
index_hyp++;
index_ref++;
}
}
}
}
int main(int arc, char *argv[])
{
char *ref_filename = argv[1];
char *hyp_filename = argv[2];
fprintf(stderr, "loading file %s\n", ref_filename);
trace *t_ref = trace_load(ref_filename);
fprintf(stderr, "loading file %s\n", hyp_filename);
trace *t_hyp = trace_load(hyp_filename);
/* trace_print(stdout, t_ref); */
trace_compare(t_ref, t_hyp);
}
......@@ -111,15 +111,10 @@ void config_add_mvt(config *c, int mvt)
void config_print(FILE *f, config *c)
{
/* word *b0 = NULL; */
/* word *s0 = NULL; */
if(c){
if(!stack_is_empty(c->st))
/* s0 = stack_elt_n(c->st, 0); */
/* b0 = word_buffer_b0(c->bf); */
/* if(s0) { printf("s0 = "); word_print2(stdout, s0);} */
/* if(b0) { printf("b0 = "); word_print2(stdout, b0);} */
if(stack_is_empty(c->st))
fprintf(f, "[ ]");
else
stack_print(f, c->st);
fprintf(f, "\n");
word_buffer_print_compact(f, c->bf);
......
......@@ -81,6 +81,8 @@ context *context_new(void)
ctx->conll = 0;
ctx->ifpls = 1;
ctx->trace_mode = 0;
return ctx;
}
......@@ -149,6 +151,9 @@ void context_root_label_help_message(context *ctx){
void context_f2p_filename_help_message(context *ctx){
fprintf(stderr, "\t-P --f2p <file> : form to pos (f2p) filename\n");
}
void context_trace_mode_help_message(context *ctx){
fprintf(stderr, "\t-T --traces : activate trace mode (default is false)\n");
}
context *context_read_options(int argc, char *argv[])
{
......@@ -180,13 +185,14 @@ context *context_read_options(int argc, char *argv[])
{"language", required_argument, 0, 'L'},
{"maca_data_path", required_argument, 0, 'D'},
{"root_label", required_argument, 0, 'R'},
{"f2p", required_argument, 0, 'P'}
{"f2p", required_argument, 0, 'P'},
{"traces", required_argument, 0, 'T'}
};
optind = 0;
opterr = 0;
while ((c = getopt_long (argc, argv, "hvdcSm:i:n:x:u:r:M:b:f:s:C:F:V:L:D:R:P:", long_options, &option_index)) != -1){
while ((c = getopt_long (argc, argv, "hvdcSTm:i:n:x:u:r:M:b:f:s:C:F:V:L:D:R:P:", long_options, &option_index)) != -1){
switch (c)
{
case 'h':
......@@ -201,6 +207,9 @@ context *context_read_options(int argc, char *argv[])
case 'c':
ctx->conll = 1;
break;
case 'T':
ctx->trace_mode = 1;
break;
case 'm':
ctx->perc_model_filename = strdup(optarg);
break;
......
......@@ -58,6 +58,7 @@ typedef struct {
form2pos *f2p;
int conll;
int ifpls;
int trace_mode;
} context;
context *context_new(void);
......
......@@ -80,8 +80,19 @@ void generate_training_file_stream(FILE *output_file, context *ctx)
config_print(stdout,c);
movement_print(stdout, mvt_code, ctx->dico_labels);
}
if(ctx->trace_mode){
fprintf(output_file, "%d\t", word_get_index(word_buffer_b0(config_get_buffer(c))));
stack_print(output_file, c->st);
fprintf(output_file, "\t");
movement_print(output_file, mvt_code, ctx->dico_labels);
fprintf(output_file, "\t1\n");
}
else{
fprintf(output_file, "%d", mvt_code);
feat_vec_print(output_file, fv);
}
if(mvt_type == MVT_EOS){
movement_eos(c, 0);
......
......@@ -32,32 +32,6 @@ void print_word_buffer(config *c, dico *dico_labels)
}
}
#if 0
void print_word_buffer(config *c, dico *dico_labels)
{
int i;
word *dep;
char *label;
int root_position = 0;
for(i=0; i < config_get_buffer(c)->nbelem; i++){
dep = word_buffer_get_word_n(config_get_buffer(c), i);
if(word_get_gov(dep) == 0) root_position = i;
printf("%s\t", word_get_input(dep));
/* if(word_get_sent_seg(dep) == 1){
printf("%d\teos\t1\n", root_position - i);
}
else{*/
printf("%d\t", word_get_gov(dep));
label = (word_get_label(dep) == -1)? NULL : dico_int2string(dico_labels, word_get_label(dep));
if(label != NULL)
printf("%s\t0\n", label) ;
else
printf("_\t0\n");
/* } */
}
}
#endif
void simple_decoder_parser_arc_eager(context *ctx)
{
......@@ -75,7 +49,7 @@ void simple_decoder_parser_arc_eager(context *ctx)
float delta;
int argmax1, argmax2;
float max1, max2;
int index;
root_label = dico_string2int(ctx->dico_labels, ctx->root_label);
if(root_label == -1) root_label = 0;
......@@ -87,6 +61,19 @@ void simple_decoder_parser_arc_eager(context *ctx)
mvt_type = movement_type(mvt_code);
mvt_label = movement_label(mvt_code);
if(ctx->trace_mode){
index = word_get_index(word_buffer_b0(config_get_buffer(c)));
fprintf(stdout, "%d\t", index);
stack_print(stdout, c->st);
fprintf(stdout, "\t");
movement_print(stdout, mvt_code, ctx->dico_labels);
fprintf(stdout, "\t");
feature_table_argmax_1_2(fv, ft, &argmax1, &max1, &argmax2, &max2);
printf("%f\n", max1 - max2);
}
if(ctx->debug_mode){
fprintf(stdout, "***********************************\n");
......@@ -129,12 +116,13 @@ void simple_decoder_parser_arc_eager(context *ctx)
if(result == 0){
if(ctx->debug_mode){
fprintf(stdout, "WARNING : movement cannot be executed !\n");
fprintf(stdout, "WARNING : movement cannot be executed doing a SHIFT instead !\n");
}
movement_shift(c, 1, max);
}
}
if(!ctx->trace_mode)
print_word_buffer(c, ctx->dico_labels);
config_free(c);
......@@ -143,3 +131,30 @@ void simple_decoder_parser_arc_eager(context *ctx)
if(ctx->input_filename)
fclose(f);
}
#if 0
void print_word_buffer(config *c, dico *dico_labels)
{
int i;
word *dep;
char *label;
int root_position = 0;
for(i=0; i < config_get_buffer(c)->nbelem; i++){
dep = word_buffer_get_word_n(config_get_buffer(c), i);
if(word_get_gov(dep) == 0) root_position = i;
printf("%s\t", word_get_input(dep));
/* if(word_get_sent_seg(dep) == 1){
printf("%d\teos\t1\n", root_position - i);
}
else{*/
printf("%d\t", word_get_gov(dep));
label = (word_get_label(dep) == -1)? NULL : dico_int2string(dico_labels, word_get_label(dep));
if(label != NULL)
printf("%s\t0\n", label) ;
else
printf("_\t0\n");
/* } */
}
}
#endif
......@@ -5,10 +5,10 @@ target_link_libraries(perceptron_train perceptron)
target_link_libraries(perceptron_train maca_common)
install (TARGETS perceptron_train DESTINATION bin)
#add_executable(maca_trans_parser_cff_cutoff cff_cutoff.c)
#target_link_libraries(maca_trans_parser_cff_cutoff perceptron)
#target_link_libraries(maca_trans_parser_cff_cutoff maca_common)
#install (TARGETS maca_trans_parser_cff_cutoff DESTINATION bin)
add_executable(cff_cutoff cff_cutoff.c)
target_link_libraries(cff_cutoff perceptron)
target_link_libraries(cff_cutoff maca_common)
install (TARGETS cff_cutoff DESTINATION bin)
add_executable(perceptron_eval perceptron_eval.c)
target_link_libraries(perceptron_eval perceptron)
......
......@@ -5,25 +5,109 @@
#include<getopt.h>
#include"feature_table.h"
#include"dico.h"
#include"util.h"
#include"perceptron.h"
#include"perceptron_context.h"
#include"cf_file.h"
void cff_cutoff_help_message(perceptron_context *ctx)
typedef struct {
int help;
int verbose;
char *program_name;
char *cff_filename;
int cutoff;
char *vocabs_filename;
dico_vec *vocabs;
float hash_ratio;
dico *d_perceptron_features;
} cff_cutoff_context;
cff_cutoff_context *cff_cutoff_context_new(void)
{
cff_cutoff_context *ctx = (cff_cutoff_context *)memalloc(sizeof(cff_cutoff_context));
ctx->verbose = 0;
ctx->program_name = NULL;
ctx->vocabs_filename = NULL;
ctx->cff_filename = NULL;
ctx->cutoff = 1;
ctx->hash_ratio = 0.5;
ctx->vocabs = NULL;
ctx->d_perceptron_features = NULL;
return ctx;
}
void cff_cutoff_context_free(cff_cutoff_context *ctx)
{
perceptron_context_help_message(ctx);
fprintf(stderr, "INPUT\n");
perceptron_context_cutoff_help_message(ctx);
if(ctx->program_name) free(ctx->program_name);
if(ctx->cff_filename) free(ctx->cff_filename);
free(ctx);
}
void cff_cutoff_help_message2(cff_cutoff_context *ctx)
{
fprintf(stderr, "usage: %s [options]\n", ctx->program_name);
fprintf(stderr, "Options:\n");
fprintf(stderr, "\t-h --help : print this message\n");
fprintf(stderr, "\t-v --verbose : activate verbose mode\n");
fprintf(stderr, "\t-i --input <file> : cff file name\n");
fprintf(stderr, "\t-V --vocabs <file> : vocabs filename\n");
fprintf(stderr, "\t-c --cutoff <int> : threshold (features appearing less than the threshold are ignored\n");
}
void cff_cutoff_check_options(context *ctx)
cff_cutoff_context *cff_cutoff_read_options(int argc, char *argv[])
{
int c;
int option_index = 0;
cff_cutoff_context *ctx = cff_cutoff_context_new();
ctx->program_name = strdup(argv[0]);
static struct option long_options[5] =
{
{"help", no_argument, 0, 'h'},
{"verbose", no_argument, 0, 'v'},
{"input", required_argument, 0, 'i'},
{"vocabs", required_argument, 0, 'V'},
{"cutoff", required_argument, 0, 'c'}
};
optind = 0;
opterr = 0;
while ((c = getopt_long (argc, argv, "hvi:V:c:", long_options, &option_index)) != -1){
switch (c)
{
case 'h':
ctx->help = 1;
break;
case 'v':
ctx->verbose = 1;
break;
case 'V':
ctx->vocabs_filename = strdup(optarg);
break;
case 'c':
ctx->cutoff = atoi(optarg);
break;
case 'i':
ctx->cff_filename = strdup(optarg);
break;
}
}
return ctx;
}
void cff_cutoff_check_options(cff_cutoff_context *ctx)
{
if(ctx->help
|| !ctx->vocabs_filename
|| !ctx->cff_filename
){
cff_cutoff_help_message(ctx);
cff_cutoff_help_message2(ctx);
exit(1);
}
}
......@@ -44,9 +128,9 @@ int main(int argc, char *argv[])
dico *old_d_feat;
dico *new_d_feat;
context *ctx;
cff_cutoff_context *ctx;
ctx = context_read_options(argc, argv);
ctx = cff_cutoff_read_options(argc, argv);
cff_cutoff_check_options(ctx);
ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
......@@ -59,7 +143,7 @@ int main(int argc, char *argv[])
old2new = (int *)memalloc(n_feat * sizeof(int));
for(i=0; i < n_feat; i++)
if(occ_table[i] < ctx->feature_cutoff)
if(occ_table[i] < ctx->cutoff)
old2new[i] = -1;
else
old2new[i] = dico_add(new_d_feat, dico_int2string(old_d_feat, i));
......@@ -86,25 +170,20 @@ int main(int argc, char *argv[])
fprintf(stderr, "total number of features : %d\n", n_feat);
fprintf(stderr, "number of features removed : %d\n", feat_removed);
fprintf(stderr, "ratio : %.3f\n\n", (float)feat_removed / n_feat);
fprintf(stderr, "threshold : %d\n", ctx->cutoff);
fprintf(stderr, "after thresholding : %d\n", n_feat - feat_removed);
fprintf(stderr, "ratio : %.3f\n\n", (float)(n_feat - feat_removed) / n_feat);
fprintf(stderr, "total number of feature occurrences : %d\n", f_occ);
fprintf(stderr, "feature occurrences removed : %d\n", occ_removed);
fprintf(stderr, "ratio : %.3f\n", (float)occ_removed / f_occ);
fprintf(stderr, "atfer thresholding : %d\n", f_occ - occ_removed);
fprintf(stderr, "ratio : %.3f\n", (float)(f_occ - occ_removed) / f_occ);
dico_vec_replace_dico(ctx->vocabs, old_d_feat, new_d_feat);
dico_vec_print(ctx->vocabs_filename, ctx->vocabs);
/* dico_print(ctx->perceptron_features_filename, new_d_feat); */
dico_free(new_d_feat);
free(old2new);
context_free(ctx);
cff_cutoff_context_free(ctx);
return 0;
}
......@@ -230,6 +230,8 @@ float feature_table_entropy(feat_vec *fv, feature_table *ft)
return entropy;
}
int feature_table_argmax(feat_vec *fv, feature_table *ft, float *max)
{
float *classes_score = (float *)memalloc(ft->classes_nb * sizeof(float));
......@@ -266,6 +268,9 @@ int feature_table_argmax(feat_vec *fv, feature_table *ft, float *max)
return argmax;
}
/* fill an array (classes_score) with the scores of the different classes */
/* for the feature vector fv */
void feature_table_scores(feat_vec *fv, feature_table *ft, float *classes_score)
{
int cla;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment