Skip to content
Snippets Groups Projects
Commit 5591defd authored by Franck Dary's avatar Franck Dary
Browse files

tape_alphabets is now created using a TM

parent aef0880f
Branches
No related tags found
No related merge requests found
......@@ -107,8 +107,8 @@ word_buffer *word_buffer_load_mcf(char *mcf_filename, mcd *mcd_struct)
while(word_buffer_read_next_word(wb)){
/* printf("load word %d\n", wb->nbelem - 1); */
}
//if(mcf_filename != NULL)
// fclose(f);
if(mcf_filename != NULL)
fclose(f);
return wb;
}
......
......@@ -3,7 +3,6 @@
#include<string.h>
#include<unistd.h>
#include<getopt.h>
/* #include"movement_parser_arc_eager.h" */
#include"movements.h"
#include"oracle_parser_arc_eager.h"
#include"oracle_tagger.h"
......@@ -15,7 +14,6 @@
#include"feat_types.h"
#include"dico.h"
#include"dico_vec.h"
#include"word_emb.h"
#include"config2feat_vec.h"
#include"classifier.h"
......@@ -44,7 +42,7 @@ void add_signature_to_words_in_word_buffer(word_buffer *bf, form2pos *f2p)
}
}
void maca_tm_mcf2scf_help_message(context *ctx)
void maca_tm_extract_tape_alphabets_help_message(context *ctx)
{
context_general_help_message(ctx);
context_mode_help_message(ctx);
......@@ -52,7 +50,6 @@ void maca_tm_mcf2scf_help_message(context *ctx)
context_tm_help_message(ctx);
fprintf(stderr, "INPUT\n");
context_conll_help_message(ctx);
fprintf(stderr, "IN TEST MODE\n");
......@@ -62,22 +59,20 @@ void maca_tm_mcf2scf_help_message(context *ctx)
context_cff_help_message(ctx);
fprintf(stderr, "IN TRAIN MODE\n");
context_vocabs_help_message(ctx);
}
void maca_tm_mcf2scf_check_options(context *ctx)
void maca_tm_extract_tape_alphabets_check_options(context *ctx)
{
if(!ctx->input_filename
|| ctx->help
/* || !ctx->mcd_filename */
/* || !(ctx->cff_filename || ctx->fann_filename) */
|| !ctx->mcd_filename
){
maca_tm_mcf2scf_help_message(ctx);
maca_tm_extract_tape_alphabets_help_message(ctx);
exit(1);
}
}
void generate_scf_file(context *ctx)
void generate_tape_alphabets(context *ctx)
{
config *c;
int mvt_code;
......@@ -123,18 +118,22 @@ void generate_scf_file(context *ctx)
dico *d_lemma = dico_vec_get_dico(tm_get_d_tapes(ctx->machine), (char *)"LEMMA");
dico *d_rules = dico_vec_get_dico(tm_get_d_tapes(ctx->machine), (char *)"d_rules");
dico *d_pos = dico_vec_get_dico(tm_get_d_tapes(ctx->machine), (char *)"POS");
int word_nb = 0;
while(!word_buffer_end(config_get_buffer(c)) && (sentence_nb < ctx->sent_nb)){
current_state = machine->state_array[c->current_state_nb];
classif = current_state->classif;
mvt_tagset *tagset = classifier_get_output_tagset(classif);
mvt_tagset_update(tagset);
/*
fprintf(stderr, "Classif=%s\n", classif->name);
for(tm_transition *trans = current_state->trans_list; trans; trans = trans->next)
fprintf(stderr, "%d %d\n", trans->label, trans->destination);
if (!classif){
fprintf(stderr, "ERROR %s : classifier is NULL\n", __func__);
exit(1);
}
*/
if(ctx->f2p)
add_signature_to_words_in_word_buffer(c->bf, ctx->f2p);
......@@ -178,7 +177,10 @@ void generate_scf_file(context *ctx)
mvt_type = mvt_tagset_get_type(tagset, mvt_code);
if(classif->type == classifier::Type::Classifier)
{
config2feat_vec_fann(classif->fm, c, classif->d_features, classif->fv, ctx->mode);
feat_vec_add_values_to_dicos(classif->fv, classif->fm, machine->d_tapes, ctx->mcd_struct);
}
if(ctx->debug_mode){
config_print(stdout,c);
......@@ -191,13 +193,15 @@ void generate_scf_file(context *ctx)
/* advance head in ref word buffer */
if((mvt_type == MVT_RIGHT) || (mvt_type == MVT_SHIFT) || (mvt_type == MVT_FWD)){
word_buffer_move_right(ref);
if((++word_nb % 1000) == 0)
fprintf(stderr, "\rword %d", word_nb);
}
else if (mvt_type == MVT_BKWD){
word_buffer_move_left(ref);
}
if(mvt_type == MVT_EOS)
if((++sentence_nb % 100) == 0) fprintf(stderr, "\rsentence %d", sentence_nb);
sentence_nb++;
}
fprintf(stderr, "\n");
......@@ -208,7 +212,7 @@ void generate_scf_file(context *ctx)
int main(int argc, char *argv[])
{
context *ctx = context_read_options(argc, argv);
maca_tm_mcf2scf_check_options(ctx);
maca_tm_extract_tape_alphabets_check_options(ctx);
ctx->vocabs = dico_vec_read(ctx->vocabs_filename, 0.5);
for(unsigned int i = 0; i <= FEAT_TYPE_NB; i++){
......@@ -217,6 +221,11 @@ int main(int argc, char *argv[])
dico_vec_add(ctx->vocabs, dico_new(name,1000));
}
mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose);
dico_set_add_unknown_strings();
word_buffer *ref = word_buffer_load_mcf(ctx->input_filename, ctx->mcd_struct);
word_buffer_free(ref);
dico_vec_print(ctx->vocabs_filename, ctx->vocabs);
context_free(ctx);
......@@ -226,8 +235,7 @@ int main(int argc, char *argv[])
ctx->machine = machine;
mcd_link_to_dico(ctx->mcd_struct, machine->d_tapes, ctx->verbose);
dico_set_add_unknown_strings();
generate_scf_file(ctx);
generate_tape_alphabets(ctx);
dico_unset_add_unknown_strings();
tm_save_d_tapes(machine);
......
......@@ -234,9 +234,8 @@ int main(int argc, char *argv[])
classifier_set_d_features(classif, dico_new(string, 1000000));
}
dico_set_add_unknown_strings();
generate_scf_file(ctx);
dico_unset_add_unknown_strings();
generate_scf_file(ctx);
/* in train mode print all feature dictionnaries that have been created as well as classifiers descriptions */
if(ctx->mode == TRAIN_MODE){
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment