Commit 7e34c822 authored by Alexis Nasr's avatar Alexis Nasr
Browse files

created simple decoder for Forrest to test them with DNN

parent c53ae822
......@@ -10,6 +10,7 @@ set(SOURCES src/context.c
src/oracle.c
src/oracle_tagger.c
src/simple_decoder.c
src/simple_decoder_forrest.c
src/simple_decoder_tagger.c
src/cf_file.c
src/feat_lib.c
......@@ -50,6 +51,11 @@ target_link_libraries(maca_trans_parser transparse)
target_link_libraries(maca_trans_parser maca_common)
install (TARGETS maca_trans_parser DESTINATION bin)
add_executable(maca_trans_parser_forrest ./src/decode_forrest.c)
target_link_libraries(maca_trans_parser_forrest transparse)
target_link_libraries(maca_trans_parser_forrest maca_common)
install (TARGETS maca_trans_parser_forrest DESTINATION bin)
add_executable(maca_trans_tagger ./src/decode_tagger.c)
target_link_libraries(maca_trans_tagger transparse)
target_link_libraries(maca_trans_tagger maca_common)
......
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<unistd.h>
#include<getopt.h>
#include"context.h"
#include"movement.h"
#include"oracle.h"
#include"feat_fct.h"
#include"feature_table.h"
#include"dico.h"
#include"beam.h"
#include"simple_decoder_forrest.h"
/*#include"dnn_decoder.h"*/
#include"config2feat_vec.h"
void decode_help_message(context *ctx)
{
context_general_help_message(ctx);
context_beam_help_message(ctx);
context_conll_help_message(ctx);
fprintf(stderr, "INPUT\n");
context_input_help_message(ctx);
context_mcd_help_message(ctx);
context_model_help_message(ctx);
context_vocabs_help_message(ctx);
context_features_model_help_message(ctx);
}
void decode_check_options(context *ctx){
if(ctx->help
/*!ctx->conll_filename*/
/* || !ctx->perc_model_filename
|| !ctx->mcd_filename
|| !ctx->vocabs_filename
|| !ctx->features_model_filename*/
){
decode_help_message(ctx);
exit(1);
}
}
void set_linguistic_resources_filenames_parser(context *ctx)
{
char absolute_path[500];
char absolute_filename[500];
absolute_path[0] = '\0';
if(ctx->maca_data_path)
strcat(absolute_path, ctx->maca_data_path);
else
strcat(absolute_path, getenv("MACAON_DIR"));
strcat(absolute_path, "/");
strcat(absolute_path, ctx->language);
strcat(absolute_path, "/bin/");
if(!ctx->perc_model_filename){
strcpy(absolute_filename, absolute_path);
strcat(absolute_filename, DEFAULT_MODEL_FILENAME);
ctx->perc_model_filename = strdup(absolute_filename);
}
if(!ctx->vocabs_filename){
strcpy(absolute_filename, absolute_path);
strcat(absolute_filename, DEFAULT_VOCABS_FILENAME);
ctx->vocabs_filename = strdup(absolute_filename);
}
/* if(!ctx->mcd_filename){
strcpy(absolute_filename, absolute_path);
strcat(absolute_filename, DEFAULT_MULTI_COL_DESC_FILENAME);
ctx->mcd_filename = strdup(absolute_filename);
}*/
if(!ctx->features_model_filename){
strcpy(absolute_filename, absolute_path);
strcat(absolute_filename, DEFAULT_FEATURES_MODEL_FILENAME);
ctx->features_model_filename = strdup(absolute_filename);
}
if(ctx->verbose){
fprintf(stderr, "perc_model_filename = %s\n", ctx->perc_model_filename);
fprintf(stderr, "vocabs_filename = %s\n", ctx->vocabs_filename);
fprintf(stderr, "mcd_filename = %s\n", ctx->mcd_filename);
fprintf(stderr, "perc_features_model_filename = %s\n", ctx->features_model_filename);
}
}
int main(int argc, char *argv[])
{
context *ctx;
ctx = context_read_options(argc, argv);
decode_check_options(ctx);
set_linguistic_resources_filenames_parser(ctx);
ctx->features_model = feat_model_read(ctx->features_model_filename, ctx->verbose);
ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose);
ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL");
if(ctx->dico_labels == NULL){
fprintf(stderr, "cannot find label names\n");
return 1;
}
ctx->mvt_nb = ctx->dico_labels->nbelem * 2 + 1;
/* load models */
ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features");
if(ctx->beam_width == 1){
simple_decoder_forrest(ctx);
}
context_free(ctx);
return 0;
}
......@@ -93,20 +93,6 @@ int queue_is_empty(queue *q)
return (q->nbelem == 0);
}
void queue_add_in_front_old(queue *q, word *w)
{
if(q->head == 0)
q->head = q->size - 1;
else
q->head --;
q->array[q->head] = w;
q->nbelem++;
if(q->tail == q->head){
fprintf(stderr, "queue full !\n");
/* free(NULL); */ /* what was this !!!! */
}
}
void queue_add_in_front(queue *q, word *w)
{
if(q->head == 0)
......@@ -138,20 +124,6 @@ void queue_double_size(queue *q)
free(q2);
}
void queue_add_old(queue *q, word *w)
{
q->array[q->tail] = w;
if(q->tail == q->size-1)
q->tail = 0;
else
q->tail++;
q->nbelem++;
if(q->tail == q->head){
fprintf(stderr, "queue full !\n");
}
}
void queue_add(queue *q, word *w)
{
q->array[q->tail] = w;
......
......@@ -26,8 +26,6 @@ void queue_free(queue *q);
int queue_is_empty(queue *q);
void queue_add(queue *q, word *w);
void queue_add_in_front(queue *q, word *w);
void queue_add2(queue *q, word *w);
void queue_add_in_front2(queue *q, word *w);
word *queue_remove(queue *q);
void queue_print(FILE *f, queue *q);
word *queue_elt_n(queue *q, int n);
......
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<unistd.h>
#include<getopt.h>
#include"context.h"
#include"movement.h"
#include"oracle.h"
#include"feat_fct.h"
#include"config2feat_vec.h"
#include"feature_table.h"
#include"dico.h"
void simple_decoder_buffer_forrest(context *ctx, FILE *f, feature_table *ft, int root_label)
{
int mvt_code;
int mvt_type;
int mvt_label;
float max;
feat_vec *fv = feat_vec_new(feature_types_nb);
config *c = config_initial(f, ctx->mcd_struct, 0);
/* read a sentence and put it in the buffer */
while(queue_read_sentence(c->bf, f, ctx->mcd_struct)){
while(!config_is_terminal(c)){
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE);
/* FORREST : this is where the DNN should be called */
/* fv is the feature vector that contains the values of the features extracted from configuration c */
/* the function returns the code of a movement (mvt_code), that is used to yield a new configuration */
mvt_code = feature_table_argmax(fv, ft, &max);
mvt_type = movement_type(mvt_code);
mvt_label = movement_label(mvt_code);
if(mvt_type == MVT_LEFT)
if(movement_left_arc(c, mvt_label, max))
continue;
if(mvt_type == MVT_RIGHT)
if(movement_right_arc(c, mvt_label, max))
continue;
movement_shift(c, 0, max);
}
/* config_print(stdout, c); */
config_connect_subtrees(c, root_label);
depset_print2(stdout, c->ds, ctx->dico_labels);
/* config_free(c); */
c = config_initial(f, ctx->mcd_struct, 0);
}
feat_vec_free(fv);
}
void simple_decoder_forrest(context *ctx)
{
FILE *f = (ctx->input_filename)? myfopen(ctx->input_filename, "r") : stdin;
feature_table *ft = feature_table_load(ctx->perc_model_filename, ctx->verbose);
int root_label;
root_label = dico_string2int(ctx->dico_labels, ctx->root_label);
if(root_label == -1) root_label = 0;
simple_decoder_buffer_forrest(ctx, f, ft, root_label);
feature_table_free(ft);
if(ctx->input_filename)
fclose(f);
}
#ifndef __SIMPLE_DECODER_FORREST__
#define __SIMPLE_DECODER_FORREST__
#include"context.h"
void simple_decoder_forrest(context *ctx);
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment