Skip to content
Snippets Groups Projects
Commit fa380ae0 authored by Alexis Nasr's avatar Alexis Nasr
Browse files

Merge branch 'master' of gitlab.lif.univ-mrs.fr:alexis.nasr/macaon2

parents fcfe6c1c f50e7d93
Branches
No related tags found
No related merge requests found
#ifndef __WORD__
#define __WORD__
#include<ctype.h>
#include "mcd.h"
#include "char16.h"
......@@ -28,6 +28,7 @@ typedef struct _word {
#define word_get_s5(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 5))? -1 : (w)->form[strlen((w)->form) - 5])
#define word_get_s6(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 6))? -1 : (w)->form[strlen((w)->form) - 6])
*/
#define word_get_s1(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 1))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 1])
#define word_get_s2(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 2))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 2])
#define word_get_s3(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 3))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 3])
......
......@@ -210,9 +210,10 @@ install (TARGETS maca_trans_lemmatizer DESTINATION bin)
#target_link_libraries(test_w2v transparse)
#install (TARGETS test_w2v DESTINATION bin)
#add_executable(w2v_filter ./src/w2v_filter.c)
#target_link_libraries(w2v_filter transparse)
#install (TARGETS w2v_filter DESTINATION bin)
add_executable(w2v_filter ./src/w2v_filter.c)
target_link_libraries(w2v_filter transparse)
target_link_libraries(w2v_filter maca_common)
install (TARGETS w2v_filter DESTINATION bin)
#add_executable(test_word_emb ./src/test_word_emb.c)
#target_link_libraries(test_word_emb transparse)
......
......@@ -43,6 +43,20 @@ void one_hot_print(FILE *f, int val, int dim)
fprintf(f, "%d ", (i == val)? 1 : 0);
}
void check_feature_model(feat_model *fm)
{
int i;
feat_desc *fd;
for(i=0; i <fm->nbelem; i++){
fd = fm->array[i];
if(fd->nbelem > 1){
fprintf(stderr, "feature %d is a complex feature, aborting\n", i);
exit(1);
}
}
}
void print_header(mcd *m, feat_model *fm)
{
int i;
......@@ -53,23 +67,14 @@ void print_header(mcd *m, feat_model *fm)
for(i=0; i <fm->nbelem; i++){
fd = fm->array[i];
if(fd->nbelem > 1){
fprintf(stderr, "feature %d is a complex feature, skipping it\n", i);
}
else{
sfd = fd->array[0];
printf("\t%s", sfd->name);
}
}
printf("\n");
printf("OUT");
for(i=0; i <fm->nbelem; i++){
fd = fm->array[i];
if(fd->nbelem > 1){
fprintf(stderr, "feature %d is a complex feature, skipping it\n", i);
}
else{
sfd = fd->array[0];
if(sfd->type == FEAT_TYPE_FORM){printf("\tFORM");continue;}
if(sfd->type == FEAT_TYPE_LEMMA){printf("\tLEMMA");continue;}
......@@ -79,7 +84,7 @@ void print_header(mcd *m, feat_model *fm)
if(sfd->type == FEAT_TYPE_INT){printf("\tINT");continue;}
printf("\tUNK");
}
}
printf("\n");
/*
for(i=0; i < m->nb_col; i++){
......@@ -118,6 +123,7 @@ void cff2fann(context *ctx)
char feature_type[64];
int feature_valindex;
int count = 0;
char *feat_str = NULL;
vocab = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features");
......@@ -133,19 +139,23 @@ void cff2fann(context *ctx)
if (count % 100 == 0)
fprintf(stderr, "%d\r", count);
while(token){
/* printf("col = %d token = %s max = %d\n", col_nb, token, max_array[col_nb]); */
/* printf("col = %d token = %s\n", col_nb, token); */
val = atoi(token);
if(col_nb == 0){
/* one_hot_print(stdout, val, ctx->mvt_nb); */
/* printf("\n"); */
printf("%d", val);
} else {
sscanf(dico_int2string(vocab, val), "%[^==]==%d", feature_type, &feature_valindex);
feat_str = dico_int2string(vocab, val);
if(feat_str){
/* printf("feat str = %s\n", feat_str); */
sscanf(feat_str, "%[^==]==%d", feature_type, &feature_valindex);
/* printf("feature_type = %s\n", feature_type); */
feat_type = feat_model_get_type_feat_n(ctx->features_model, col_nb - 1);
/* printf("feat_type = %d\n", feat_type); */
/* printf("%d: ", col_nb); */
int mcd_col = m->wf2col[feat_type];
/* printf("representation = %d\n", m->representation[mcd_col]); */
if(m->representation[mcd_col] == MCD_REPRESENTATION_EMB){
/* printf("it is an embedding val = %d, file = %s\n", val, m->filename[mcd_col]); */
......@@ -162,6 +172,14 @@ void cff2fann(context *ctx)
printf("\t%d", feature_valindex);
}
}
else{
fprintf(stderr, "WARNING cannot find the description of feature : %d\n", val);
feature_valindex = -1;
printf("\t%d", feature_valindex);
}
}
col_nb++;
token = strtok(NULL , "\t");
}
......@@ -184,6 +202,9 @@ int main(int argc, char *argv[])
ctx->features_model = feat_model_read(ctx->features_model_filename, feat_lib_build(), ctx->verbose);
check_feature_model(ctx->features_model);
look_for_number_of_features_and_classes(ctx->cff_filename, &nb_feat, &nb_class);
ctx->mvt_nb = nb_class;
......
......@@ -158,6 +158,7 @@ int main(int argc, char *argv[])
lemma_from_fplm = fplm_lookup_lemma(exceptions, form, pos, ctx->verbose);
if(lemma_from_fplm){
// printf("lemma %s found in exceptions file\n", lemma_from_fplm);
// print_word(b0, ctx->mcd_struct, to_lower_string(lemma_from_fplm));
print_word(b0, ctx->mcd_struct, lemma_from_fplm);
}
// if lemma is not found in exception file, predict an l_rule
......@@ -193,6 +194,7 @@ int main(int argc, char *argv[])
}
/* no rule applied */
if(i == 10){
// print_word(b0, ctx->mcd_struct, to_lower_string(form));
print_word(b0, ctx->mcd_struct, form);
}
free(vcode_array);
......
......@@ -7,9 +7,9 @@
#define MVT_PARSER_SHIFT 0
#define MVT_PARSER_REDUCE 1
#define MVT_PARSER_ROOT 2
#define MVT_PARSER_EOS -1
#define MVT_PARSER_LEFT 3
#define MVT_PARSER_RIGHT 4
#define MVT_PARSER_EOS 3
#define MVT_PARSER_LEFT 4
#define MVT_PARSER_RIGHT 5
/* even movements are left movements (except 0, which is shift and 2 which is root) */
#define movement_parser_left_code(label) (2 * (label) + 4)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment