Commit 18b96fe3 authored by Alexis Nasr's avatar Alexis Nasr
Browse files

added file maca_trans_parser_conll2cff.c transform_treebank is now obselete

parent 01502e5d
......@@ -15,6 +15,9 @@
#include "word_emb.h"
#include "dico_vec.h"
#define mcd_get_dico_label(m) (m)->dico_array[FEAT_TYPE_LABEL]
typedef struct {
int nb_col;
int type2col[FEAT_TYPE_NB];
......
......@@ -81,7 +81,7 @@ context *context_read_options(int argc, char *argv[])
ctx->program_name = strdup(argv[0]);
static struct option long_options[8] =
static struct option long_options[10] =
{
{"help", no_argument, 0, 'h'},
{"verbose", no_argument, 0, 'v'},
......
......@@ -32,7 +32,7 @@ target_link_libraries(maca_trans_parser_conll2fann transparse)
target_link_libraries(maca_trans_parser_conll2fann maca_common)
install (TARGETS maca_trans_parser_conll2fann DESTINATION bin)
add_executable(maca_trans_parser_conll2cff ./src/transform_treebank.c)
add_executable(maca_trans_parser_conll2cff ./src/maca_trans_parser_conll2cff.c)
target_link_libraries(maca_trans_parser_conll2cff transparse)
target_link_libraries(maca_trans_parser_conll2cff maca_common)
install (TARGETS maca_trans_parser_conll2cff DESTINATION bin)
......
......@@ -12,19 +12,20 @@ void context_set_linguistic_resources_filenames(context *ctx);
void context_free(context *ctx)
{
if(ctx->program_name) free(ctx->program_name);
if(ctx->conll_filename) free(ctx->conll_filename);
if(ctx->perc_model_filename) free(ctx->perc_model_filename);
if(ctx->dnn_model_filename) free(ctx->dnn_model_filename);
if(ctx->dico_features_filename) free(ctx->dico_features_filename);
if(ctx->dico_classes_filename) free(ctx->dico_classes_filename);
if(ctx->cff_filename) free(ctx->cff_filename);
if(ctx->fann_filename) free(ctx->fann_filename);
if(ctx->mcd_filename) free(ctx->mcd_filename);
if(ctx->stag_desc_filename) free(ctx->stag_desc_filename);
if(ctx->program_name) free(ctx->program_name);
if(ctx->conll_filename) free(ctx->conll_filename);
if(ctx->perc_model_filename) free(ctx->perc_model_filename);
if(ctx->dnn_model_filename) free(ctx->dnn_model_filename);
if(ctx->dico_features_filename) free(ctx->dico_features_filename);
if(ctx->dico_classes_filename) free(ctx->dico_classes_filename);
if(ctx->cff_filename) free(ctx->cff_filename);
if(ctx->fann_filename) free(ctx->fann_filename);
if(ctx->mcd_filename) free(ctx->mcd_filename);
if(ctx->stag_desc_filename) free(ctx->stag_desc_filename);
if(ctx->features_model_filename) free(ctx->features_model_filename);
if(ctx->maca_data_path) free(ctx->maca_data_path);
if(ctx->language) free(ctx->language);
if(ctx->maca_data_path) free(ctx->maca_data_path);
if(ctx->language) free(ctx->language);
if(ctx->root_label) free(ctx->root_label);
if(ctx->d_perceptron_features)
dico_free(ctx->d_perceptron_features);
......@@ -59,6 +60,7 @@ context *context_new(void)
c->maca_data_path = NULL;
c->language = strdup("fr");
c->root_label = strdup("root");
c->d_perceptron_features = NULL;
c->mcd_struct = NULL;
c->features_model = NULL;
......@@ -154,6 +156,9 @@ void context_maca_data_path_help_message(context *ctx){
fprintf(stderr, "\t-Y --maca_data_path : path to the maca_data directory\n");
}
void context_root_label_help_message(context *ctx){
fprintf(stderr, "\t-R --root_label : name of the root label (default is \"root\")\n");
}
context *context_read_options(int argc, char *argv[])
{
......@@ -163,7 +168,7 @@ context *context_read_options(int argc, char *argv[])
ctx->program_name = strdup(argv[0]);
static struct option long_options[25] =
static struct option long_options[26] =
{
{"help", no_argument, 0, 'h'},
{"verbose", no_argument, 0, 'v'},
......@@ -189,12 +194,13 @@ context *context_read_options(int argc, char *argv[])
{"vocabs", required_argument, 0, 'V'},
{"stream", required_argument, 0, 'T'},
{"language", required_argument, 0, 'X'},
{"maca_data_path", required_argument, 0, 'Y'}
{"maca_data_path", required_argument, 0, 'Y'},
{"root_label", required_argument, 0, 'R'}
};
optind = 0;
opterr = 0;
while ((c = getopt_long (argc, argv, "dhvT:m:f:c:i:n:x:u:r:o:b:y:s:M:H:S:C:F:V:X:Y:", long_options, &option_index)) != -1){
while ((c = getopt_long (argc, argv, "dhvT:m:f:c:i:n:x:u:r:o:b:y:s:M:H:S:C:F:V:X:Y:R:", long_options, &option_index)) != -1){
switch (c)
{
case 'd':
......@@ -270,6 +276,9 @@ context *context_read_options(int argc, char *argv[])
case 'Y':
ctx->maca_data_path = strdup(optarg);
break;
case 'R':
ctx->root_label = strdup(optarg);
break;
}
}
......
......@@ -47,6 +47,7 @@ typedef struct {
dico *dico_labels;
char *maca_data_path;
char *language;
char *root_label;
} context;
context *context_new(void);
......
......@@ -56,7 +56,6 @@ int main(int argc, char *argv[])
ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs);
ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL");
if(ctx->dico_labels == NULL){
......@@ -65,7 +64,7 @@ int main(int argc, char *argv[])
}
ctx->mvt_nb = ctx->dico_labels->nbelem * 2 + 1;
root_label = dico_string2int(ctx->dico_labels, (char *)"root");
root_label = dico_string2int(ctx->dico_labels, ctx->root_label);
if(root_label == -1) root_label = 0;
......
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<unistd.h>
#include<getopt.h>
#include"movement.h"
#include"oracle.h"
#include"feat_fct.h"
#include"context.h"
#include"feat_vec.h"
#include"dico_vec.h"
#include"corpus.h"
#include"word_emb.h"
#include"config2feat_vec.h"
void maca_trans_parser_conll2cff_help_message(context *ctx)
{
context_general_help_message(ctx);
context_mode_help_message(ctx);
context_sent_nb_help_message(ctx);
fprintf(stderr, "INPUT\n");
context_conll_help_message(ctx);
fprintf(stderr, "IN TEST MODE\n");
context_alphabet_help_message(ctx);
fprintf(stderr, "OUTPUT\n");
context_cff_help_message(ctx);
fprintf(stderr, "IN TRAIN MODE\n");
context_alphabet_help_message(ctx);
}
void maca_trans_parser_conll2cff_check_options(context *ctx)
{
if(!ctx->conll_filename
|| ctx->help
/* || !ctx->mcd_filename */
|| !(ctx->cff_filename || ctx->fann_filename)
){
maca_trans_parser_conll2cff_help_message(ctx);
exit(1);
}
}
void generate_training_file_stream(FILE *output_file, context *ctx)
{
config *c;
int mvt_code;
char mvt_type;
int mvt_label;
feat_vec *fv = feat_vec_new(feature_types_nb);
sentence *ref = NULL;
int sentence_nb = 0;
int root_label = dico_string2int(mcd_get_dico_label(ctx->mcd_struct), ctx->root_label);
FILE *conll_file = myfopen(ctx->conll_filename, "r");
FILE *conll_file_ref = myfopen(ctx->conll_filename, "r");
c = config_initial(conll_file, ctx->mcd_struct, 10, 5);
while((ref = sentence_read(conll_file_ref , ctx->mcd_struct)) && (sentence_nb < ctx->sent_nb)){
/* sentence_print(stdout, ref, mcd_get_dico_label(ctx->mcd_struct)); */
while(1){
/* config_print(stdout,c); */
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, ctx->mode);
mvt_code = oracle(c, ref);
mvt_type = movement_type(mvt_code);
mvt_label = movement_label(mvt_code);
/* printf("mvt type = %d mvt label = %d\n", mvt_type, mvt_label); */
fprintf(output_file, "%d", mvt_code);
feat_vec_print(output_file, fv);
if(queue_is_empty(c->bf)) break;
if((mvt_type == MVT_RIGHT) && (mvt_label == root_label)){ /* sentence is complete */
/* create the root arc */
movement_right_arc(c, mvt_label, 0);
/* shift dummy word in stack */
movement_shift(c, 1, 0);
/* printf("sentence complete config : ");
config_print(stdout,c); */
/* empty depset */
depset_free(c->ds);
c->ds = depset_new();
sentence_free(ref);
sentence_nb++;
break;
}
if(mvt_type == MVT_LEFT){
movement_left_arc(c, mvt_label, 0);
continue;
}
if(mvt_type == MVT_RIGHT){
movement_right_arc(c, mvt_label, 0);
continue;
}
if(mvt_type == MVT_SHIFT){
movement_shift(c, 1, 0);
continue;
}
}
}
}
void generate_training_file_buffer(FILE *output_file, context *ctx)
{
config *c;
int mvt_code;
char mvt_type;
int mvt_label;
feat_vec *fv = feat_vec_new(feature_types_nb);
sentence *ref = NULL;
int sentence_nb = 0;
FILE *conll_file = myfopen(ctx->conll_filename, "r");
FILE *conll_file_ref = myfopen(ctx->conll_filename, "r");
c = config_initial(conll_file, ctx->mcd_struct, 1000, 0);
while((ref = sentence_read(conll_file_ref, ctx->mcd_struct)) && (sentence_nb < ctx->sent_nb)){
/* sentence_print(stdout, ref, NULL); */
queue_read_sentence(c->bf, conll_file, ctx->mcd_struct);
while(!config_is_terminal(c)){
/* config_print(stdout,c); */
config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, ctx->mode);
mvt_code = oracle(c, ref);
mvt_type = movement_type(mvt_code);
mvt_label = movement_label(mvt_code);
/* printf("mvt type = %d mvt label = %d\n", mvt_type, mvt_label); */
fprintf(output_file, "%d", mvt_code);
feat_vec_print(output_file, fv);
if(mvt_type == MVT_LEFT){
movement_left_arc(c, mvt_label, 0);
continue;
}
if(mvt_type == MVT_RIGHT){
movement_right_arc(c, mvt_label, 0);
continue;
}
if(mvt_type == MVT_SHIFT){
movement_shift(c, 0, 0);
continue;
}
}
config_free(c);
c = config_initial(conll_file, ctx->mcd_struct, 1000, 0);
sentence_nb++;
}
}
int main(int argc, char *argv[])
{
context *ctx;
FILE *output_file;
ctx = context_read_options(argc, argv);
maca_trans_parser_conll2cff_check_options(ctx);
if(ctx->mode == TRAIN_MODE){
mcd_extract_dico_from_corpus(ctx->mcd_struct, ctx->conll_filename);
ctx->vocabs = mcd_build_dico_vec(ctx->mcd_struct);
}
else if(ctx->mode == TEST_MODE){
ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs);
}
ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL");
if(ctx->dico_labels == NULL){
fprintf(stderr, "cannot find label names\n");
return 1;
}
ctx->mvt_nb = ctx->dico_labels->nbelem * 2 + 1;
feat_model_compute_ranges(ctx->features_model, ctx->mcd_struct, ctx->mvt_nb);
/* in train mode create feature dictionnary for perceptron */
if(ctx->mode == TRAIN_MODE)
ctx->d_perceptron_features = dico_new((char *)"d_perceptron_features", 10000000);
/* in test mode read feature dictionnary for perceptron */
if(ctx->mode == TEST_MODE)
ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features");
/* add the feature dictionnary to the dico vector */
dico_vec_add(ctx->vocabs, ctx->d_perceptron_features);
/* open output file */
if(ctx->cff_filename)
output_file = myfopen(ctx->cff_filename, "w");
else
output_file = stdout;
if(ctx->stream_mode)
generate_training_file_stream(output_file, ctx);
else
generate_training_file_buffer(output_file, ctx);
if(ctx->mode == TRAIN_MODE){
/* dico_print(ctx->perceptron_features_filename, ctx->d_perceptron_features); */
dico_vec_print(ctx->vocabs_filename, ctx->vocabs);
}
if(ctx->cff_filename)
fclose(output_file);
context_free(ctx);
return 0;
}
......@@ -176,4 +176,4 @@ int main(int argc, char *argv[])
context_free(ctx);
return 0;
}
k
......@@ -64,7 +64,7 @@ void sentence_free(sentence *s)
sentence *sentence_read(FILE *f, mcd *mcd_struct)
{
sentence *s = sentence_init(mcd_struct, f);
sentence *s = sentence_init(mcd_struct, f);
char buffer[1000];
word *w = NULL;
......
......@@ -3,7 +3,6 @@
#include"word.h"
#include"util.h"
#include"depset.h"
#include"mcd.h"
typedef struct {
......
......@@ -76,10 +76,14 @@ void simple_decoder_stream(FILE *f, mcd *mcd_struct, dico *dico_features, dico *
c = config_initial(f, mcd_struct, 10, 5);
while(!config_is_terminal(c)){
config_print(stdout, c);
config2feat_vec_cff(fm, c, dico_features, fv, LOOKUP_MODE);
/* feat_vec_print_string(fv, dico_features); */
mvt_code = feature_table_argmax(fv, ft, &max);
mvt_type = movement_type(mvt_code);
mvt_label = movement_label(mvt_code);
/* printf("code predicted = %d\n", mvt_code); */
if((stack_height(c->st)==1) && (mvt_type == MVT_RIGHT) && (mvt_label == root_label)){ /* sentence is complete */
......@@ -94,7 +98,8 @@ void simple_decoder_stream(FILE *f, mcd *mcd_struct, dico *dico_features, dico *
/* config_print(stdout, c); */
config_connect_subtrees(c, root_label);
depset_print_new_index(stdout, c->ds, dico_labels);
/* depset_print_new_index(stdout, c->ds, dico_labels);*/
depset_print2(stdout, c->ds, dico_labels);
/* pop the dummy word */
stack_pop(c->st);
......
......@@ -70,7 +70,7 @@ int generate_training_file_stream(FILE *output_file, context *ctx)
sentence *ref = NULL;
int nb_trans = 0;
int sentence_nb = 0;
int root_label = dico_string2int(ctx->mcd_struct->dico_array[FEAT_TYPE_LABEL], (char *)"root");
int root_label = dico_string2int(ctx->mcd_struct->dico_array[FEAT_TYPE_LABEL], ctx->root_label);
FILE *conll_file = myfopen(ctx->conll_filename, "r");
FILE *conll_file_ref = myfopen(ctx->conll_filename, "r");
word *b0;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment