Skip to content
Snippets Groups Projects
Commit f50e7d93 authored by Alexis Nasr's avatar Alexis Nasr
Browse files

fixed few little bugs

parent 61309227
No related branches found
No related tags found
No related merge requests found
#ifndef __WORD__ #ifndef __WORD__
#define __WORD__ #define __WORD__
#include<ctype.h>
#include "mcd.h" #include "mcd.h"
#include "char16.h" #include "char16.h"
...@@ -28,6 +28,7 @@ typedef struct _word { ...@@ -28,6 +28,7 @@ typedef struct _word {
#define word_get_s5(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 5))? -1 : (w)->form[strlen((w)->form) - 5]) #define word_get_s5(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 5))? -1 : (w)->form[strlen((w)->form) - 5])
#define word_get_s6(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 6))? -1 : (w)->form[strlen((w)->form) - 6]) #define word_get_s6(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 6))? -1 : (w)->form[strlen((w)->form) - 6])
*/ */
#define word_get_s1(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 1))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 1]) #define word_get_s1(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 1))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 1])
#define word_get_s2(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 2))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 2]) #define word_get_s2(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 2))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 2])
#define word_get_s3(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 3))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 3]) #define word_get_s3(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 3))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 3])
......
...@@ -210,9 +210,10 @@ install (TARGETS maca_trans_lemmatizer DESTINATION bin) ...@@ -210,9 +210,10 @@ install (TARGETS maca_trans_lemmatizer DESTINATION bin)
#target_link_libraries(test_w2v transparse) #target_link_libraries(test_w2v transparse)
#install (TARGETS test_w2v DESTINATION bin) #install (TARGETS test_w2v DESTINATION bin)
#add_executable(w2v_filter ./src/w2v_filter.c) add_executable(w2v_filter ./src/w2v_filter.c)
#target_link_libraries(w2v_filter transparse) target_link_libraries(w2v_filter transparse)
#install (TARGETS w2v_filter DESTINATION bin) target_link_libraries(w2v_filter maca_common)
install (TARGETS w2v_filter DESTINATION bin)
#add_executable(test_word_emb ./src/test_word_emb.c) #add_executable(test_word_emb ./src/test_word_emb.c)
#target_link_libraries(test_word_emb transparse) #target_link_libraries(test_word_emb transparse)
......
...@@ -52,6 +52,20 @@ void one_hot_print(FILE *f, int val, int dim) ...@@ -52,6 +52,20 @@ void one_hot_print(FILE *f, int val, int dim)
fprintf(f, "%d ", (i == val)? 1 : 0); fprintf(f, "%d ", (i == val)? 1 : 0);
} }
void check_feature_model(feat_model *fm)
{
int i;
feat_desc *fd;
for(i=0; i <fm->nbelem; i++){
fd = fm->array[i];
if(fd->nbelem > 1){
fprintf(stderr, "feature %d is a complex feature, aborting\n", i);
exit(1);
}
}
}
void print_header(mcd *m, feat_model *fm) void print_header(mcd *m, feat_model *fm)
{ {
int i; int i;
...@@ -62,23 +76,14 @@ void print_header(mcd *m, feat_model *fm) ...@@ -62,23 +76,14 @@ void print_header(mcd *m, feat_model *fm)
for(i=0; i <fm->nbelem; i++){ for(i=0; i <fm->nbelem; i++){
fd = fm->array[i]; fd = fm->array[i];
if(fd->nbelem > 1){
printf("feature %d is a complex feature, skipping it\n", i);
}
else{
sfd = fd->array[0]; sfd = fd->array[0];
printf("\t%s", sfd->name); printf("\t%s", sfd->name);
} }
}
printf("\n"); printf("\n");
printf("OUT"); printf("OUT");
for(i=0; i <fm->nbelem; i++){ for(i=0; i <fm->nbelem; i++){
fd = fm->array[i]; fd = fm->array[i];
if(fd->nbelem > 1){
printf("feature %d is a complex feature, skipping it\n", i);
}
else{
sfd = fd->array[0]; sfd = fd->array[0];
if(sfd->type == FEAT_TYPE_FORM){printf("\tFORM");continue;} if(sfd->type == FEAT_TYPE_FORM){printf("\tFORM");continue;}
if(sfd->type == FEAT_TYPE_LEMMA){printf("\tLEMMA");continue;} if(sfd->type == FEAT_TYPE_LEMMA){printf("\tLEMMA");continue;}
...@@ -88,7 +93,7 @@ void print_header(mcd *m, feat_model *fm) ...@@ -88,7 +93,7 @@ void print_header(mcd *m, feat_model *fm)
if(sfd->type == FEAT_TYPE_INT){printf("\tINT");continue;} if(sfd->type == FEAT_TYPE_INT){printf("\tINT");continue;}
printf("\tUNK"); printf("\tUNK");
} }
}
printf("\n"); printf("\n");
/* /*
for(i=0; i < m->nb_col; i++){ for(i=0; i < m->nb_col; i++){
...@@ -127,6 +132,7 @@ void cff2fann(context *ctx) ...@@ -127,6 +132,7 @@ void cff2fann(context *ctx)
char feature_type[64]; char feature_type[64];
int feature_valindex; int feature_valindex;
int count = 0; int count = 0;
char *feat_str = NULL;
vocab = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features"); vocab = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features");
...@@ -142,19 +148,23 @@ void cff2fann(context *ctx) ...@@ -142,19 +148,23 @@ void cff2fann(context *ctx)
if (count % 100 == 0) if (count % 100 == 0)
fprintf(stderr, "%d\r", count); fprintf(stderr, "%d\r", count);
while(token){ while(token){
/* printf("col = %d token = %s max = %d\n", col_nb, token, max_array[col_nb]); */ /* printf("col = %d token = %s\n", col_nb, token); */
val = atoi(token); val = atoi(token);
if(col_nb == 0){ if(col_nb == 0){
/* one_hot_print(stdout, val, ctx->mvt_nb); */ /* one_hot_print(stdout, val, ctx->mvt_nb); */
/* printf("\n"); */ /* printf("\n"); */
printf("%d", val); printf("%d", val);
} else { } else {
sscanf(dico_int2string(vocab, val), "%[^==]==%d", feature_type, &feature_valindex); feat_str = dico_int2string(vocab, val);
if(feat_str){
/* printf("feat str = %s\n", feat_str); */
sscanf(feat_str, "%[^==]==%d", feature_type, &feature_valindex);
/* printf("feature_type = %s\n", feature_type); */ /* printf("feature_type = %s\n", feature_type); */
feat_type = feat_model_get_type_feat_n(ctx->features_model, col_nb - 1); feat_type = feat_model_get_type_feat_n(ctx->features_model, col_nb - 1);
/* printf("feat_type = %d\n", feat_type); */ /* printf("feat_type = %d\n", feat_type); */
/* printf("%d: ", col_nb); */ /* printf("%d: ", col_nb); */
int mcd_col = m->wf2col[feat_type]; int mcd_col = m->wf2col[feat_type];
/* printf("representation = %d\n", m->representation[mcd_col]); */ /* printf("representation = %d\n", m->representation[mcd_col]); */
if(m->representation[mcd_col] == MCD_REPRESENTATION_EMB){ if(m->representation[mcd_col] == MCD_REPRESENTATION_EMB){
/* printf("it is an embedding val = %d, file = %s\n", val, m->filename[mcd_col]); */ /* printf("it is an embedding val = %d, file = %s\n", val, m->filename[mcd_col]); */
...@@ -171,6 +181,14 @@ void cff2fann(context *ctx) ...@@ -171,6 +181,14 @@ void cff2fann(context *ctx)
printf("\t%d", feature_valindex); printf("\t%d", feature_valindex);
} }
} }
else{
fprintf(stderr, "WARNING cannot find the description of feature : %d\n", val);
feature_valindex = -1;
printf("\t%d", feature_valindex);
}
}
col_nb++; col_nb++;
token = strtok(NULL , "\t"); token = strtok(NULL , "\t");
} }
...@@ -193,6 +211,9 @@ int main(int argc, char *argv[]) ...@@ -193,6 +211,9 @@ int main(int argc, char *argv[])
ctx->features_model = feat_model_read(ctx->features_model_filename, feat_lib_build(), ctx->verbose); ctx->features_model = feat_model_read(ctx->features_model_filename, feat_lib_build(), ctx->verbose);
check_feature_model(ctx->features_model);
look_for_number_of_features_and_classes(ctx->cff_filename, &nb_feat, &nb_class); look_for_number_of_features_and_classes(ctx->cff_filename, &nb_feat, &nb_class);
ctx->mvt_nb = nb_class; ctx->mvt_nb = nb_class;
......
...@@ -158,6 +158,7 @@ int main(int argc, char *argv[]) ...@@ -158,6 +158,7 @@ int main(int argc, char *argv[])
lemma_from_fplm = fplm_lookup_lemma(exceptions, form, pos, ctx->verbose); lemma_from_fplm = fplm_lookup_lemma(exceptions, form, pos, ctx->verbose);
if(lemma_from_fplm){ if(lemma_from_fplm){
// printf("lemma %s found in exceptions file\n", lemma_from_fplm); // printf("lemma %s found in exceptions file\n", lemma_from_fplm);
// print_word(b0, ctx->mcd_struct, to_lower_string(lemma_from_fplm));
print_word(b0, ctx->mcd_struct, lemma_from_fplm); print_word(b0, ctx->mcd_struct, lemma_from_fplm);
} }
// if lemma is not found in exception file, predict an l_rule // if lemma is not found in exception file, predict an l_rule
...@@ -193,6 +194,7 @@ int main(int argc, char *argv[]) ...@@ -193,6 +194,7 @@ int main(int argc, char *argv[])
} }
/* no rule applied */ /* no rule applied */
if(i == 10){ if(i == 10){
// print_word(b0, ctx->mcd_struct, to_lower_string(form));
print_word(b0, ctx->mcd_struct, form); print_word(b0, ctx->mcd_struct, form);
} }
free(vcode_array); free(vcode_array);
......
...@@ -7,9 +7,9 @@ ...@@ -7,9 +7,9 @@
#define MVT_PARSER_SHIFT 0 #define MVT_PARSER_SHIFT 0
#define MVT_PARSER_REDUCE 1 #define MVT_PARSER_REDUCE 1
#define MVT_PARSER_ROOT 2 #define MVT_PARSER_ROOT 2
#define MVT_PARSER_EOS -1 #define MVT_PARSER_EOS 3
#define MVT_PARSER_LEFT 3 #define MVT_PARSER_LEFT 4
#define MVT_PARSER_RIGHT 4 #define MVT_PARSER_RIGHT 5
/* even movements are left movements (except 0, which is shift and 2 which is root) */ /* even movements are left movements (except 0, which is shift and 2 which is root) */
#define movement_parser_left_code(label) (2 * (label) + 4) #define movement_parser_left_code(label) (2 * (label) + 4)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment