Skip to content
Snippets Groups Projects
Commit fa380ae0 authored by Alexis Nasr's avatar Alexis Nasr
Browse files

Merge branch 'master' of gitlab.lif.univ-mrs.fr:alexis.nasr/macaon2

parents fcfe6c1c f50e7d93
Branches
No related tags found
No related merge requests found
#ifndef __WORD__ #ifndef __WORD__
#define __WORD__ #define __WORD__
#include<ctype.h>
#include "mcd.h" #include "mcd.h"
#include "char16.h" #include "char16.h"
...@@ -28,6 +28,7 @@ typedef struct _word { ...@@ -28,6 +28,7 @@ typedef struct _word {
#define word_get_s5(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 5))? -1 : (w)->form[strlen((w)->form) - 5]) #define word_get_s5(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 5))? -1 : (w)->form[strlen((w)->form) - 5])
#define word_get_s6(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 6))? -1 : (w)->form[strlen((w)->form) - 6]) #define word_get_s6(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 6))? -1 : (w)->form[strlen((w)->form) - 6])
*/ */
#define word_get_s1(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 1))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 1]) #define word_get_s1(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 1))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 1])
#define word_get_s2(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 2))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 2]) #define word_get_s2(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 2))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 2])
#define word_get_s3(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 3))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 3]) #define word_get_s3(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 3))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 3])
......
...@@ -210,9 +210,10 @@ install (TARGETS maca_trans_lemmatizer DESTINATION bin) ...@@ -210,9 +210,10 @@ install (TARGETS maca_trans_lemmatizer DESTINATION bin)
#target_link_libraries(test_w2v transparse) #target_link_libraries(test_w2v transparse)
#install (TARGETS test_w2v DESTINATION bin) #install (TARGETS test_w2v DESTINATION bin)
#add_executable(w2v_filter ./src/w2v_filter.c) add_executable(w2v_filter ./src/w2v_filter.c)
#target_link_libraries(w2v_filter transparse) target_link_libraries(w2v_filter transparse)
#install (TARGETS w2v_filter DESTINATION bin) target_link_libraries(w2v_filter maca_common)
install (TARGETS w2v_filter DESTINATION bin)
#add_executable(test_word_emb ./src/test_word_emb.c) #add_executable(test_word_emb ./src/test_word_emb.c)
#target_link_libraries(test_word_emb transparse) #target_link_libraries(test_word_emb transparse)
......
...@@ -43,6 +43,20 @@ void one_hot_print(FILE *f, int val, int dim) ...@@ -43,6 +43,20 @@ void one_hot_print(FILE *f, int val, int dim)
fprintf(f, "%d ", (i == val)? 1 : 0); fprintf(f, "%d ", (i == val)? 1 : 0);
} }
void check_feature_model(feat_model *fm)
{
int i;
feat_desc *fd;
for(i=0; i <fm->nbelem; i++){
fd = fm->array[i];
if(fd->nbelem > 1){
fprintf(stderr, "feature %d is a complex feature, aborting\n", i);
exit(1);
}
}
}
void print_header(mcd *m, feat_model *fm) void print_header(mcd *m, feat_model *fm)
{ {
int i; int i;
...@@ -53,23 +67,14 @@ void print_header(mcd *m, feat_model *fm) ...@@ -53,23 +67,14 @@ void print_header(mcd *m, feat_model *fm)
for(i=0; i <fm->nbelem; i++){ for(i=0; i <fm->nbelem; i++){
fd = fm->array[i]; fd = fm->array[i];
if(fd->nbelem > 1){
fprintf(stderr, "feature %d is a complex feature, skipping it\n", i);
}
else{
sfd = fd->array[0]; sfd = fd->array[0];
printf("\t%s", sfd->name); printf("\t%s", sfd->name);
} }
}
printf("\n"); printf("\n");
printf("OUT"); printf("OUT");
for(i=0; i <fm->nbelem; i++){ for(i=0; i <fm->nbelem; i++){
fd = fm->array[i]; fd = fm->array[i];
if(fd->nbelem > 1){
fprintf(stderr, "feature %d is a complex feature, skipping it\n", i);
}
else{
sfd = fd->array[0]; sfd = fd->array[0];
if(sfd->type == FEAT_TYPE_FORM){printf("\tFORM");continue;} if(sfd->type == FEAT_TYPE_FORM){printf("\tFORM");continue;}
if(sfd->type == FEAT_TYPE_LEMMA){printf("\tLEMMA");continue;} if(sfd->type == FEAT_TYPE_LEMMA){printf("\tLEMMA");continue;}
...@@ -79,7 +84,7 @@ void print_header(mcd *m, feat_model *fm) ...@@ -79,7 +84,7 @@ void print_header(mcd *m, feat_model *fm)
if(sfd->type == FEAT_TYPE_INT){printf("\tINT");continue;} if(sfd->type == FEAT_TYPE_INT){printf("\tINT");continue;}
printf("\tUNK"); printf("\tUNK");
} }
}
printf("\n"); printf("\n");
/* /*
for(i=0; i < m->nb_col; i++){ for(i=0; i < m->nb_col; i++){
...@@ -118,6 +123,7 @@ void cff2fann(context *ctx) ...@@ -118,6 +123,7 @@ void cff2fann(context *ctx)
char feature_type[64]; char feature_type[64];
int feature_valindex; int feature_valindex;
int count = 0; int count = 0;
char *feat_str = NULL;
vocab = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features"); vocab = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features");
...@@ -133,19 +139,23 @@ void cff2fann(context *ctx) ...@@ -133,19 +139,23 @@ void cff2fann(context *ctx)
if (count % 100 == 0) if (count % 100 == 0)
fprintf(stderr, "%d\r", count); fprintf(stderr, "%d\r", count);
while(token){ while(token){
/* printf("col = %d token = %s max = %d\n", col_nb, token, max_array[col_nb]); */ /* printf("col = %d token = %s\n", col_nb, token); */
val = atoi(token); val = atoi(token);
if(col_nb == 0){ if(col_nb == 0){
/* one_hot_print(stdout, val, ctx->mvt_nb); */ /* one_hot_print(stdout, val, ctx->mvt_nb); */
/* printf("\n"); */ /* printf("\n"); */
printf("%d", val); printf("%d", val);
} else { } else {
sscanf(dico_int2string(vocab, val), "%[^==]==%d", feature_type, &feature_valindex); feat_str = dico_int2string(vocab, val);
if(feat_str){
/* printf("feat str = %s\n", feat_str); */
sscanf(feat_str, "%[^==]==%d", feature_type, &feature_valindex);
/* printf("feature_type = %s\n", feature_type); */ /* printf("feature_type = %s\n", feature_type); */
feat_type = feat_model_get_type_feat_n(ctx->features_model, col_nb - 1); feat_type = feat_model_get_type_feat_n(ctx->features_model, col_nb - 1);
/* printf("feat_type = %d\n", feat_type); */ /* printf("feat_type = %d\n", feat_type); */
/* printf("%d: ", col_nb); */ /* printf("%d: ", col_nb); */
int mcd_col = m->wf2col[feat_type]; int mcd_col = m->wf2col[feat_type];
/* printf("representation = %d\n", m->representation[mcd_col]); */ /* printf("representation = %d\n", m->representation[mcd_col]); */
if(m->representation[mcd_col] == MCD_REPRESENTATION_EMB){ if(m->representation[mcd_col] == MCD_REPRESENTATION_EMB){
/* printf("it is an embedding val = %d, file = %s\n", val, m->filename[mcd_col]); */ /* printf("it is an embedding val = %d, file = %s\n", val, m->filename[mcd_col]); */
...@@ -162,6 +172,14 @@ void cff2fann(context *ctx) ...@@ -162,6 +172,14 @@ void cff2fann(context *ctx)
printf("\t%d", feature_valindex); printf("\t%d", feature_valindex);
} }
} }
else{
fprintf(stderr, "WARNING cannot find the description of feature : %d\n", val);
feature_valindex = -1;
printf("\t%d", feature_valindex);
}
}
col_nb++; col_nb++;
token = strtok(NULL , "\t"); token = strtok(NULL , "\t");
} }
...@@ -184,6 +202,9 @@ int main(int argc, char *argv[]) ...@@ -184,6 +202,9 @@ int main(int argc, char *argv[])
ctx->features_model = feat_model_read(ctx->features_model_filename, feat_lib_build(), ctx->verbose); ctx->features_model = feat_model_read(ctx->features_model_filename, feat_lib_build(), ctx->verbose);
check_feature_model(ctx->features_model);
look_for_number_of_features_and_classes(ctx->cff_filename, &nb_feat, &nb_class); look_for_number_of_features_and_classes(ctx->cff_filename, &nb_feat, &nb_class);
ctx->mvt_nb = nb_class; ctx->mvt_nb = nb_class;
......
...@@ -158,6 +158,7 @@ int main(int argc, char *argv[]) ...@@ -158,6 +158,7 @@ int main(int argc, char *argv[])
lemma_from_fplm = fplm_lookup_lemma(exceptions, form, pos, ctx->verbose); lemma_from_fplm = fplm_lookup_lemma(exceptions, form, pos, ctx->verbose);
if(lemma_from_fplm){ if(lemma_from_fplm){
// printf("lemma %s found in exceptions file\n", lemma_from_fplm); // printf("lemma %s found in exceptions file\n", lemma_from_fplm);
// print_word(b0, ctx->mcd_struct, to_lower_string(lemma_from_fplm));
print_word(b0, ctx->mcd_struct, lemma_from_fplm); print_word(b0, ctx->mcd_struct, lemma_from_fplm);
} }
// if lemma is not found in exception file, predict an l_rule // if lemma is not found in exception file, predict an l_rule
...@@ -193,6 +194,7 @@ int main(int argc, char *argv[]) ...@@ -193,6 +194,7 @@ int main(int argc, char *argv[])
} }
/* no rule applied */ /* no rule applied */
if(i == 10){ if(i == 10){
// print_word(b0, ctx->mcd_struct, to_lower_string(form));
print_word(b0, ctx->mcd_struct, form); print_word(b0, ctx->mcd_struct, form);
} }
free(vcode_array); free(vcode_array);
......
...@@ -7,9 +7,9 @@ ...@@ -7,9 +7,9 @@
#define MVT_PARSER_SHIFT 0 #define MVT_PARSER_SHIFT 0
#define MVT_PARSER_REDUCE 1 #define MVT_PARSER_REDUCE 1
#define MVT_PARSER_ROOT 2 #define MVT_PARSER_ROOT 2
#define MVT_PARSER_EOS -1 #define MVT_PARSER_EOS 3
#define MVT_PARSER_LEFT 3 #define MVT_PARSER_LEFT 4
#define MVT_PARSER_RIGHT 4 #define MVT_PARSER_RIGHT 5
/* even movements are left movements (except 0, which is shift and 2 which is root) */ /* even movements are left movements (except 0, which is shift and 2 which is root) */
#define movement_parser_left_code(label) (2 * (label) + 4) #define movement_parser_left_code(label) (2 * (label) + 4)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment