Select Git revision
decode_forrest.c
maca_trans_morpho.c 4.96 KiB
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<unistd.h>
#include<getopt.h>
#include"context.h"
#include"feat_fct.h"
#include"feature_table.h"
#include"dico.h"
#include"config2feat_vec.h"
void decode_morpho_help_message(context *ctx);
void decode_morpho_help_message(context *ctx)
{
context_general_help_message(ctx);
context_beam_help_message(ctx);
context_conll_help_message(ctx);
fprintf(stderr, "INPUT\n");
context_input_help_message(ctx);
context_mcd_help_message(ctx);
context_model_help_message(ctx);
context_vocabs_help_message(ctx);
context_features_model_help_message(ctx);
context_f2p_filename_help_message(ctx);
}
void decode_morpho_check_options(context *ctx){
if(ctx->help
/*!ctx->conll_filename*/
/* || !ctx->perc_model_filename
|| !ctx->mcd_filename
|| !ctx->vocabs_filename
|| !ctx->features_model_filename*/
){
decode_morpho_help_message(ctx);
exit(1);
}
}
void decode_morpho_set_linguistic_resources_filenames(context *ctx)
{
char absolute_filename[500];
if(!ctx->perc_model_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_MODEL_MORPHO_FILENAME);
ctx->perc_model_filename = strdup(absolute_filename);
}
if(!ctx->vocabs_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_VOCABS_MORPHO_FILENAME);
ctx->vocabs_filename = strdup(absolute_filename);
}
if(!ctx->features_model_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_FEATURES_MODEL_MORPHO_FILENAME);
ctx->features_model_filename = strdup(absolute_filename);
}
if(ctx->verbose){
fprintf(stderr, "perc_model_filename = %s\n", ctx->perc_model_filename);
fprintf(stderr, "vocabs_filename = %s\n", ctx->vocabs_filename);
fprintf(stderr, "mcd_filename = %s\n", ctx->mcd_filename);
fprintf(stderr, "perc_features_model_filename = %s\n", ctx->features_model_filename);
}
}
void print_word(word *w, mcd *mcd_struct, dico *dico_morph, int postag)
{
char *buffer = NULL;
char *token = NULL;
int col_nb = 0;
if(mcd_get_pos_col(mcd_struct) == -1){
printf("%s\t%s\n", w->input, dico_int2string(dico_morph, postag));
}
else{
buffer = strdup(w->input);
token = strtok(buffer, "\t");
col_nb = 0;
while(token){
if(col_nb != 0) printf("\t");
if(col_nb == mcd_get_feats_col(mcd_struct))
printf("%s", dico_int2string(dico_morph, postag));
else
word_print_col_n(stdout, w, col_nb);
col_nb++;
token = strtok(NULL, "\t");
}
if(col_nb <= mcd_get_feats_col(mcd_struct))
printf("\t%s", dico_int2string(dico_morph, postag));
printf("\n");
free(buffer);
}
}
int movement_morpho(config *c, int feats)
{
word_set_feats(word_buffer_b0(c->bf), feats);
word_buffer_move_right(c->bf);
return 1;
}
void simple_decoder_morpho(context *ctx)
{
config *c;
feat_vec *fv = feat_vec_new(feature_types_nb);
FILE *f = (ctx->input_filename)? myfopen(ctx->input_filename, "r") : stdin;
feature_table *ft = feature_table_load(ctx->perc_model_filename, ctx->verbose);
int feats;
float max;
word *b0;
dico *dico_feats = dico_vec_get_dico(ctx->vocabs, (char *)"FEATS");
c = config_new(f, ctx->mcd_struct, 5);
while(!config_is_terminal(c)){
b0 = word_buffer_b0(c->bf);
feats = word_get_feats(b0);
if(ctx->debug_mode){
fprintf(stderr, "***********************************\n");
config_print(stderr, c);
}
/* if feats is not specified in input it is predicted */
if(feats == -1){
/* config_print(stdout, c); */
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE);
/* feat_vec_print(stdout, fv); */
feats = feature_table_argmax(fv, ft, &max);
/* printf("feats = %d\n", feats); */
if(ctx->debug_mode){
vcode *vcode_array = feature_table_get_vcode_array(fv, ft);
for(int i=0; i < 3; i++){
fprintf(stderr, "%d\t", i);
fprintf(stderr, "%s\t%.4f\n", dico_int2string(dico_feats, vcode_array[i].class_code), vcode_array[i].score);
}
free(vcode_array);
}
}
print_word(b0, ctx->mcd_struct, dico_feats, feats);
movement_morpho(c, feats);
}
/* config_print(stdout, c); */
feat_vec_free(fv);
feature_table_free(ft);
config_free(c);
if (ctx->input_filename) fclose(f);
}
int main(int argc, char *argv[])
{
context *ctx = context_read_options(argc, argv);
decode_morpho_check_options(ctx);
decode_morpho_set_linguistic_resources_filenames(ctx);
ctx->features_model = feat_model_read(ctx->features_model_filename, feat_lib_build(), ctx->verbose);
ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose);
ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features");
if(ctx->beam_width == 1)
simple_decoder_morpho(ctx);
context_free(ctx);
return 0;
}