Skip to content
Snippets Groups Projects
Commit 3ccdd755 authored by Franck Dary's avatar Franck Dary
Browse files

Finished integration of lookup lemma classifier

parent 66b28aac
No related branches found
No related tags found
No related merge requests found
#include<string.h>
#include"classifier.h"
#include"config2feat_vec.h"
#include"oracle_lemmatizer_lookup.h"
classifier *classifier_new(char *name)
{
......@@ -21,9 +22,7 @@ classifier *classifier_new(char *name)
classif->mlp_struct_filename = NULL;
classif->mlp = NULL;
classif->fplm = NULL;
classif->d_lookup = NULL;
classif->fplm_filename = NULL;
classif->d_lookup_filename = NULL;
classif->d_features_filename = NULL;
classif->d_tapes_filename = NULL;
......@@ -87,8 +86,14 @@ int classifier_argmax(classifier *classif, config *c, word_emb *emb, mcd *m)
return classifier_argmax_perceptron(classif, c);
}
else if(classif->type == classifier::Type::Lookup){
//TODO faire ca bien
return 0;
if(!strcmp("LEMMATIZER_LOOKUP", classifier_get_oracle_name(classif))){
dico *d_form = dico_vec_get_dico(classif->d_tapes, (char*)"FORM");
dico *d_pos = dico_vec_get_dico(classif->d_tapes, (char*)"POS");
dico *d_lemma = dico_vec_get_dico(classif->d_tapes, (char*)"LEMMA");
return oracle_lemmatizer_lookup(c, classifier_get_output_tagset(classif), d_form, d_lemma, d_pos, classif->fplm);
}
fprintf(stderr, "do not know which oracle to use for classifier %s, oracle_name = %s\n", classif->name, classifier_get_oracle_name(classif));
exit(1);
}
else if(classif->type == classifier::Type::Forced){
return 0;
......@@ -168,9 +173,6 @@ void classifier_print_desc_file(char *filename, classifier *classif)
if(classif->fplm)
fprintf(f,"%%FPLM %s\n", classif->fplm_filename);
if(classif->d_lookup)
fprintf(f,"%%D_LOOKUP %s\n", classif->d_lookup_filename);
if(classif->oracle_name)
fprintf(f,"%%ORACLE_TYPE %s\n", classif->oracle_name);
}
......@@ -240,7 +242,7 @@ classifier *classifier_read_full(char *filename, char *absolute_path, dico_vec *
}
if(sscanf(buffer, "%%FEAT_MODEL %s", name)){
if(classif->type == classifier::Type::Forced){
if(classif->type != classifier::Type::Classifier){
fprintf(stderr, "ERROR %s : classifier type '%s' must not use FEAT_MODEL\n", __func__, type2string(classif->type));
exit(1);
}
......@@ -289,20 +291,6 @@ classifier *classifier_read_full(char *filename, char *absolute_path, dico_vec *
continue;
}
if(sscanf(buffer, "%%D_LOOKUP %s", name)){
if(verbose)
fprintf(stderr, "D_LOOKUP = %s\n", name);
if(classif->type != classifier::Type::Lookup){
fprintf(stderr, "ERROR %s : classifier type '%s' must not use D_LOOKUP\n", __func__, type2string(classif->type));
exit(1);
}
classif->d_lookup = dico_read(name, 0.5);
classif->d_lookup_filename = strdup(name);
continue;
}
if(sscanf(buffer, "%%MLP_MODEL %s", name)){
if(verbose)
fprintf(stderr, "MLP_MODEL = %s\n", name);
......@@ -445,14 +433,9 @@ classifier *classifier_read_full(char *filename, char *absolute_path, dico_vec *
fprintf(stderr, "ERROR (%s) : classifier type 'LOOKUP' require fplm\n", __func__);
exit(1);
}
if(!classif->d_lookup){
classif->d_lookup = dico_new("d_lookup", 20000);
}
if(!classif->d_lookup_filename){
char d_name[1024];
strcpy(d_name, classif->name);
strcat(d_name, "_lookup.dic");
classif->d_lookup_filename = strdup(d_name);
if(!classif->oracle_name){
fprintf(stderr, "ERROR (%s) : classifier type 'LOOKUP' require oracle type\n", __func__);
exit(1);
}
}
......
......@@ -31,8 +31,6 @@ struct classifier{
Mlp *mlp; /* Multi Layers Perceptron */
fplm_struct *fplm; /* Used for training lookup classifiers */
char *fplm_filename; /* Its filename */
dico *d_lookup; /* The dictionnary used by Lookup classifiers */
char *d_lookup_filename; /* Its filename */
char *output_tagset_name; /* name of the movement tagset */
char *d_features_filename; /* name of the file that stores the feature dictionnary */
char *d_tapes_filename; /* name of the file that stores the tape alphabets */
......
......@@ -173,7 +173,7 @@ void generate_scf_file(context *ctx)
mvt_type = mvt_tagset_get_type(classifier_get_output_tagset(classif), mvt_code);
if(classif->type == classifier::Type::Classifier || classif->type == classifier::Type::Lookup)
if(classif->type == classifier::Type::Classifier)
config2feat_vec_cff(classif->fm, c, classif->d_features, classif->fv, ctx->mode);
if(ctx->debug_mode){
......@@ -192,7 +192,7 @@ void generate_scf_file(context *ctx)
word_buffer_move_left(ref);
}
if(classif->type == classifier::Type::Classifier || classif->type == classifier::Type::Lookup){
if(classif->type == classifier::Type::Classifier){
fprintf(output_file, "%d", current_state->classifier_nb);
fprintf(output_file, "\t%d", mvt_code);
feat_vec_print(output_file, classif->fv);
......
......@@ -51,7 +51,7 @@ void print_header(mcd *m, context *ctx)
for(int i = 0; i < machine->classif_vec->nb; i++){
classifier *classif = machine->classif_vec->array[i];
if(classif->type == classifier::Type::Forced)
if(classif->type != classifier::Type::Classifier)
continue;
feat_model *fm = classif->fm;
......@@ -155,7 +155,7 @@ int main(int argc, char *argv[])
for(int i = 0; i < machine->classif_vec->nb; i++){
classifier *classif = machine->classif_vec->array[i];
if(classif->type != classifier::Type::Forced){
if(classif->type == classifier::Type::Classifier){
if(classif->fm)
check_feature_model(classif->fm);
else{
......
......@@ -47,9 +47,8 @@ void train_perceptron(context * ctx){
fprintf(stderr, "training classifier %d / %d : %s\n", classif_nb + 1, classif_vec->nb, classif->name);
if(classif->type == classifier::Type::Lookup){
//TODO ici finir
classifier_print_desc_file(classif->filename, classif);
dico_print(classif->d_lookup_filename, classif->d_lookup);
fprintf(stderr, "OK size=%d\n", classif->fplm->nbelem);
continue;
}
......
......@@ -46,6 +46,13 @@ int movement_apply(config *c, int mvt_code, mvt_tagset *tagset, int root_code, T
case MVT_MORPHO :
result = movement_add_morpho(c, mvt_label);
break;
case MVT_LOOKUP_FOUND :
if(!strcmp("LEMMA", tagset->d_labels->name))
result = movement_add_lemma(c, mvt_label);
break;
case MVT_LOOKUP_NOTFOUND :
result = 1;
break;
case MVT_LEMMATIZER_RULES :
char *mvt_label_str = dico_int2string(tagset->d_labels, mvt_label);
dico *d_form = dico_vec_get_dico(tm_get_d_tapes(machine), (char *)"FORM");
......@@ -256,6 +263,25 @@ int movement_add_pos_undo(config *c)
return 1;
}
int movement_add_lemma(config *c, int lemma)
{
if(word_buffer_b0(config_get_buffer(c)) == NULL){
return 0;
}
word *b0 = word_buffer_b0(config_get_buffer(c));
word_set_lemma(b0, lemma);
config_push_mvt(c, MVT_LOOKUP_FOUND, lemma, b0, NULL);
return 1;
}
int movement_add_lemma_undo(config *c)
{
word *gov = mvt_get_gov(mvt_stack_top(config_get_history(c)));
word_set_lemma(gov, -1);
mvt_free(config_pop_mvt(c));
return 1;
}
int movement_add_morpho(config *c, int morpho)
{
if(word_buffer_b0(config_get_buffer(c)) == NULL) return 0;
......
......@@ -24,6 +24,8 @@ int movement_root_undo (config *c);
int movement_undo (config *c);
int movement_add_pos (config *c, int pos);
int movement_add_pos_undo (config *c);
int movement_add_lemma (config *c, int lemma);
int movement_add_lemma_undo (config *c);
int movement_add_cpos (config *c, int cpos);
int movement_add_cpos_undo (config *c);
int movement_add_morpho (config *c, int morpho);
......
......@@ -39,7 +39,7 @@ int mvt_tagset_get_type(mvt_tagset *t, int code)
int mvt_tagset_get_code(mvt_tagset *t, int type, int label)
{
if(t->start[type] == -1){
fprintf(stderr, "cannot find a code for movement %d %d\n", type, label);
fprintf(stderr, "cannot find a code for movement type=%d label=%d\n", type, label);
mvt_tagset_print(stderr, t);
exit(1);
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment