diff --git a/CMakeLists.txt b/CMakeLists.txt index 766110fb2776ae8d42ae6209ee1a24e0c8974226..85d4e4bcdce693d1da0be09721efe1118d02158e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,6 +3,10 @@ project(macaon2) add_definitions("-Wall") +include_directories(maca_common/include) + +add_subdirectory(maca_common) +add_subdirectory(maca_lemmatizer) add_subdirectory(maca_trans_parser) #set(CMAKE_INSTALL_PREFIX ../) diff --git a/maca_common/CMakeLists.txt b/maca_common/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..d0dbc552b3f9756143c299ffe07eb309fdbda97c --- /dev/null +++ b/maca_common/CMakeLists.txt @@ -0,0 +1,11 @@ +set(SOURCES src/util.c + src/hash.c + src/dico.c + src/word_emb.c + src/mcd.c + src/dico_vec.c + src/feat_types.c +) + +#compiling library +add_library(maca_common STATIC ${SOURCES}) diff --git a/maca_trans_parser/src/dico.h b/maca_common/include/dico.h similarity index 100% rename from maca_trans_parser/src/dico.h rename to maca_common/include/dico.h diff --git a/maca_trans_parser/src/dico_vec.h b/maca_common/include/dico_vec.h similarity index 100% rename from maca_trans_parser/src/dico_vec.h rename to maca_common/include/dico_vec.h diff --git a/maca_trans_parser/src/feat_types.h b/maca_common/include/feat_types.h similarity index 100% rename from maca_trans_parser/src/feat_types.h rename to maca_common/include/feat_types.h diff --git a/maca_trans_parser/src/hash.h b/maca_common/include/hash.h similarity index 100% rename from maca_trans_parser/src/hash.h rename to maca_common/include/hash.h diff --git a/maca_trans_parser/src/mcd.h b/maca_common/include/mcd.h similarity index 72% rename from maca_trans_parser/src/mcd.h rename to maca_common/include/mcd.h index aad932aaec50f2f4ac9346d1c6edc9783d61ae04..e759789f3667de115689dc6275f032161f3d3784 100644 --- a/maca_trans_parser/src/mcd.h +++ b/maca_common/include/mcd.h @@ -18,15 +18,19 @@ typedef struct { int nb_col; int type2col[FEAT_TYPE_NB]; - int *col2type; + /* int *col2type; */ int *type; + char **type_str; int *representation; char **filename; dico **dico_array; word_emb **word_emb_array; } mcd; -mcd *mcd_read(char *mcd_filename, char *corpus_filename, dico_vec *vocabs); +mcd *mcd_build_conll07(void); +mcd *mcd_read(char *mcd_filename); +void mcd_link_to_dico(mcd *m, dico_vec *vocabs); +void mcd_extract_dico_from_corpus(mcd *m, char *corpus_filename); void mcd_free(mcd *m); int mcd_get_code(mcd *m, char *str, int col); dico_vec *mcd_build_dico_vec(mcd *mcd_struct); diff --git a/maca_trans_parser/src/util.h b/maca_common/include/util.h similarity index 100% rename from maca_trans_parser/src/util.h rename to maca_common/include/util.h diff --git a/maca_trans_parser/src/word_emb.h b/maca_common/include/word_emb.h similarity index 100% rename from maca_trans_parser/src/word_emb.h rename to maca_common/include/word_emb.h diff --git a/maca_trans_parser/src/dico.c b/maca_common/src/dico.c similarity index 100% rename from maca_trans_parser/src/dico.c rename to maca_common/src/dico.c diff --git a/maca_trans_parser/src/dico_vec.c b/maca_common/src/dico_vec.c similarity index 100% rename from maca_trans_parser/src/dico_vec.c rename to maca_common/src/dico_vec.c diff --git a/maca_trans_parser/src/feat_types.c b/maca_common/src/feat_types.c similarity index 100% rename from maca_trans_parser/src/feat_types.c rename to maca_common/src/feat_types.c diff --git a/maca_trans_parser/src/hash.c b/maca_common/src/hash.c similarity index 100% rename from maca_trans_parser/src/hash.c rename to maca_common/src/hash.c diff --git a/maca_common/src/mcd.c b/maca_common/src/mcd.c new file mode 100644 index 0000000000000000000000000000000000000000..a11cae21294ab9a28cf5f51a558d0d1bb44e35fa --- /dev/null +++ b/maca_common/src/mcd.c @@ -0,0 +1,315 @@ +#include<stdio.h> +#include<stdlib.h> +#include<string.h> + +#include "mcd.h" +#include "util.h" +#include "dico.h" +#include "word_emb.h" + + +mcd *mcd_new(int nb_col) +{ + mcd *m = (mcd *)memalloc(sizeof(mcd)); + int i; + m->nb_col = nb_col; + + for(i=0; i < FEAT_TYPE_NB; i++) + m->type2col[i] = -1; + + m->representation = (int *)memalloc(nb_col * sizeof(int)); + m->type = (int *)memalloc(nb_col * sizeof(int)); + m->type_str = (char **)memalloc(nb_col * sizeof(char *)); + /* m->col2type = (int *)memalloc(nb_col * sizeof(int)); */ + m->filename = (char **)memalloc(nb_col * sizeof(char *)); + m->dico_array = (dico **)memalloc(nb_col * sizeof(dico *)); + m->word_emb_array = (word_emb **)memalloc(nb_col * sizeof(word_emb *)); + + for(i=0; i < nb_col; i++){ + m->representation[i] = MCD_REPRESENTATION_NULL; + m->type[i] = -1; + m->type_str[i] = NULL; + /* m->col2type[i] = -1; */ + m->filename[i] = NULL; + m->dico_array[i] = NULL; + m->word_emb_array[i] = NULL;; + } + return m; +} + +void mcd_free(mcd *m) +{ + int i; + for(i=0; i < m->nb_col; i++){ + if(m->dico_array[i]) dico_free(m->dico_array[i]); + if(m->word_emb_array[i]) word_emb_free(m->word_emb_array[i]); + if(m->type_str[i]) free(m->type_str[i]); + } + free(m->representation); + free(m->filename); + free(m->dico_array); + free(m->word_emb_array); + free(m->type_str); + free(m->type); + free(m); +} + +int mcd_get_code(mcd *m, char *str, int col){ + if(m->representation[col] == MCD_REPRESENTATION_VOCAB) + return dico_string2int(m->dico_array[col], str); + if(m->representation[col] == MCD_REPRESENTATION_EMB) + return word_emb_get_code(m->word_emb_array[col], str); + if(m->representation[col] == MCD_REPRESENTATION_INT) + return atoi(str); + return MCD_INVALID_VALUE; +} + +int mcd_max_column_index_in_file(char *mcd_filename) +{ + int max_col = -1; + FILE *f = myfopen(mcd_filename, "r"); + char buffer[1000]; /* ugly */ + int column; + char type[100]; + char representation[100]; + char filename[500]; /* ugly */ + int fields_number; + int line_number = 0; + + while(fgets(buffer, 1000, f)){ + line_number++; + if(feof(f)) break; + if((buffer[0] == '\n') || (buffer[0] == '#')) continue; + fields_number = sscanf(buffer, "%d %s %s %s", &column, type, representation, filename); + if(fields_number != 4){ + fprintf(stderr, "line %d of mcd file %s ill formed, I'm skipping it\n", line_number, mcd_filename); + continue; + } + if(column > max_col) max_col = column; + } + return max_col; +} + + +/* takes as argument an mcd structure (m) and the name of a corpus file (corpus_filename) */ +/* populates the vocabularies of m with values found in corpus_filename */ + +void mcd_extract_dico_from_corpus(mcd *m, char *corpus_filename) +{ + int column; + + for(column=0; column < m->nb_col; column++){ + if((m->representation[column] == MCD_REPRESENTATION_VOCAB) + /* && (strcmp(m->filename[column], "_")) */ + && (m->dico_array[column] == NULL)){ + m->dico_array[column] = dico_extract_from_corpus(corpus_filename, column, m->type_str[column]); + fprintf(stderr, "extracting dico %s from corpus\n", m->type_str[column]); + } + } +} + + +/* takes as argument an mcd structure (m) and a dictionary vector (vocabs) */ +/* links the vocabularies of m to vocabularies of vocabs (based on their names) */ + +void mcd_link_to_dico(mcd *m, dico_vec *vocabs) +{ + int column; + for(column=0; column < m->nb_col; column++){ + if((m->representation[column] == MCD_REPRESENTATION_VOCAB) + && (!strcmp(m->filename[column], "_")) + && (m->dico_array[column] == NULL)){ + m->dico_array[column] = dico_vec_get_dico(vocabs, m->type_str[column]); + fprintf(stderr, "linking to dico %s\n", m->type_str[column]); + } + } +} + +/* read an multi column description file and produces an mcd structure */ + +mcd *mcd_read(char *mcd_filename) +{ + int column; + char type[100]; + char representation[100]; + char filename[500]; /* ugly */ + int fields_number; + int line_number = 0; + char buffer[1000]; /* ugly */ + int nb_col = mcd_max_column_index_in_file(mcd_filename); + mcd *m = mcd_new(nb_col + 1); + FILE *f = myfopen(mcd_filename, "r"); + /* int first = 1; */ + + while(fgets(buffer, 1000, f)){ + line_number++; + if(feof(f)) break; + if((buffer[0] == '\n') || (buffer[0] == '#')) continue; + fields_number = sscanf(buffer, "%d %s %s %s", &column, type, representation, filename); + if(fields_number != 4){ + /* fprintf(stderr, "line %d of mcd file %s ill formed, I'm skipping it\n", line_number, mcd_filename); */ + continue; + } + fprintf(stderr, "column = %d type = %s representation = %s filename = %s\n", column, type, representation, filename); + m->type[column] = feat_type_string2int(type); + m->type_str[column] = strdup(type); + if(m->type[column] == -1){ + fprintf(stderr, "in line %d of mcd file %s invalid type, I'm skipping it\n", line_number, mcd_filename); + continue; + } + m->type2col[m->type[column]] = column; + + if(!strcmp(representation, "_")) m->representation[column] = MCD_REPRESENTATION_NULL; + else if(!strcmp(representation, "EMB")) m->representation[column] = MCD_REPRESENTATION_EMB; + else if(!strcmp(representation, "VOCAB")) m->representation[column] = MCD_REPRESENTATION_VOCAB; + else if(!strcmp(representation, "INT")) m->representation[column] = MCD_REPRESENTATION_INT; + else{ + fprintf(stderr, "in line %d of mcd file %s invalid mode of representation, I'm skipping it\n", line_number, mcd_filename); + m->representation[column] = MCD_REPRESENTATION_NULL; + } + if(m->representation[column] != MCD_REPRESENTATION_NULL) + m->filename[column] = strdup(filename); + + if(strcmp(m->filename[column], "_")){ + if(m->representation[column] == MCD_REPRESENTATION_EMB){ + fprintf(stderr, "loading word embedding %s\n", m->filename[column]); + m->word_emb_array[column] = word_emb_load(m->filename[column]); + } + else if(m->representation[column] == MCD_REPRESENTATION_VOCAB){ + fprintf(stderr, "loading dico %s\n", m->filename[column]); + m->dico_array[column] = dico_read(m->filename[column], 0.5); + } + } + } + fclose(f); + return m; +} + + +mcd *mcd_build_conll07(void) +{ + mcd *m = mcd_new(8); + m->type[0]=FEAT_TYPE_INDEX; + m->type_str[0]=strdup("INDEX"); + m->representation[0]= MCD_REPRESENTATION_INT; + m->type2col[FEAT_TYPE_INDEX] = 0; + + m->type[1]=FEAT_TYPE_FORM; + m->type_str[1]=strdup("FORM"); + m->representation[1]= MCD_REPRESENTATION_VOCAB; + m->type2col[FEAT_TYPE_FORM] = 1; + + m->type[2]=FEAT_TYPE_LEMMA; + m->type_str[2]=strdup("LEMMA"); + m->representation[2]= MCD_REPRESENTATION_VOCAB; + m->type2col[FEAT_TYPE_LEMMA] = 2; + + m->type[3]=FEAT_TYPE_CPOS; + m->type_str[3]=strdup("CPOS"); + m->representation[3]= MCD_REPRESENTATION_VOCAB; + m->type2col[FEAT_TYPE_CPOS] = 3; + + m->type[4]=FEAT_TYPE_POS; + m->type_str[4]=strdup("POS"); + m->representation[4]= MCD_REPRESENTATION_VOCAB; + m->type2col[FEAT_TYPE_POS] = 4; + + m->type[5]=FEAT_TYPE_FEATS; + m->type_str[5]=strdup("FEATS"); + m->representation[5]= MCD_REPRESENTATION_VOCAB; + m->type2col[FEAT_TYPE_FEATS] = 5; + + m->type[6]=FEAT_TYPE_GOV; + m->type_str[6]=strdup("GOV"); + m->representation[6]= MCD_REPRESENTATION_INT; + m->type2col[FEAT_TYPE_GOV] = 6; + + m->type[7]=FEAT_TYPE_LABEL; + m->type_str[7]=strdup("LABEL"); + m->representation[7]= MCD_REPRESENTATION_VOCAB; + m->type2col[FEAT_TYPE_LABEL] = 7; + + return m; +} + +mcd *mcd_read_old(char *mcd_filename, char *corpus_filename, dico_vec *vocabs) +{ + int column; + char type[100]; + char representation[100]; + char filename[500]; /* ugly */ + int fields_number; + int line_number = 0; + char buffer[1000]; /* ugly */ + int nb_col = mcd_max_column_index_in_file(mcd_filename); + mcd *m = mcd_new(nb_col + 1); + FILE *f = myfopen(mcd_filename, "r"); + /* int first = 1; */ + + while(fgets(buffer, 1000, f)){ + line_number++; + if(feof(f)) break; + if((buffer[0] == '\n') || (buffer[0] == '#')) continue; + fields_number = sscanf(buffer, "%d %s %s %s", &column, type, representation, filename); + if(fields_number != 4){ + /* fprintf(stderr, "line %d of mcd file %s ill formed, I'm skipping it\n", line_number, mcd_filename); */ + continue; + } + fprintf(stderr, "column = %d type = %s representation = %s filename = %s\n", column, type, representation, filename); + m->type[column] = feat_type_string2int(type); + if(m->type[column] == -1){ + fprintf(stderr, "in line %d of mcd file %s invalid type, I'm skipping it\n", line_number, mcd_filename); + continue; + } + m->type2col[m->type[column]] = column; + /* m->col2type[column] = m->type[column]; */ + if(!strcmp(representation, "_")) m->representation[column] = MCD_REPRESENTATION_NULL; + else if(!strcmp(representation, "EMB")) m->representation[column] = MCD_REPRESENTATION_EMB; + else if(!strcmp(representation, "VOCAB")) m->representation[column] = MCD_REPRESENTATION_VOCAB; + else if(!strcmp(representation, "INT")) m->representation[column] = MCD_REPRESENTATION_INT; + else{ + fprintf(stderr, "in line %d of mcd file %s invalid mode of representation, I'm skipping it\n", line_number, mcd_filename); + m->representation[column] = MCD_REPRESENTATION_NULL; + } + if(m->representation[column] != MCD_REPRESENTATION_NULL){ + m->filename[column] = strdup(filename); + if(m->representation[column] == MCD_REPRESENTATION_EMB){ + fprintf(stderr, "loading word embedding %s\n", m->filename[column]); + m->word_emb_array[column] = word_emb_load(m->filename[column]); + } + else if(m->representation[column] == MCD_REPRESENTATION_VOCAB){ + if(!strcmp(m->filename[column], "_")){ + if(corpus_filename){ + fprintf(stderr, "extracting dico %s from corpus\n", type); + m->dico_array[column] = dico_extract_from_corpus(corpus_filename, column, type); + } + else if(vocabs){ + fprintf(stderr, "linking to dico %s\n", type); + m->dico_array[column] = dico_vec_get_dico(vocabs, type); + } + if(m->dico_array[column] == NULL) + fprintf(stderr, "cannot find dico %s\n", type); + } + else{ + fprintf(stderr, "loading dico %s\n", m->filename[column]); + m->dico_array[column] = dico_read(m->filename[column], 0.5); + } + } + } + } + fclose(f); + return m; +} + + +dico_vec *mcd_build_dico_vec(mcd *mcd_struct) +{ + dico_vec *dv = dico_vec_new(); + int i; + for(i=0; i < mcd_struct->nb_col; i++){ + if(mcd_struct->dico_array[i]){ + dico_vec_add(dv, mcd_struct->dico_array[i]); + } + } + return dv; +} diff --git a/maca_trans_parser/src/util.c b/maca_common/src/util.c similarity index 100% rename from maca_trans_parser/src/util.c rename to maca_common/src/util.c diff --git a/maca_trans_parser/src/word_emb.c b/maca_common/src/word_emb.c similarity index 100% rename from maca_trans_parser/src/word_emb.c rename to maca_common/src/word_emb.c diff --git a/maca_lemmatizer/CMakeLists.txt b/maca_lemmatizer/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..a4f7e9da1fccafb07525ef1d29add9ffeb7a5a6f --- /dev/null +++ b/maca_lemmatizer/CMakeLists.txt @@ -0,0 +1,12 @@ +set(SOURCES src/context.c) + +##compiling library +include_directories(src) +add_library(maca_lemmatizer_lib STATIC ${SOURCES}) + +#compiling, linking and installing executables + +add_executable(maca_lemmatizer ./src/maca_lemmatizer.c) +target_link_libraries(maca_lemmatizer maca_lemmatizer_lib) +target_link_libraries(maca_lemmatizer maca_common) +install (TARGETS maca_lemmatizer DESTINATION bin) diff --git a/maca_lemmatizer/src/context.c b/maca_lemmatizer/src/context.c new file mode 100644 index 0000000000000000000000000000000000000000..514fcd040e2d7f6cb2f865455f4822e236a69640 --- /dev/null +++ b/maca_lemmatizer/src/context.c @@ -0,0 +1,147 @@ +#include<stdlib.h> +#include<stdio.h> +#include<string.h> +#include<unistd.h> +#include<getopt.h> +#include "context.h" +#include "util.h" + + +void context_set_linguistic_resources_filenames(context *ctx); + +void context_free(context *ctx) +{ + if(ctx->program_name) free(ctx->program_name); + if(ctx->conll_filename) free(ctx->conll_filename); + if(ctx->fplm_filename) free(ctx->fplm_filename); + if(ctx->language) free(ctx->language); + if(ctx->maca_data_path) free(ctx->maca_data_path); + free(ctx); +} + +context *context_new(void) +{ + context *ctx = (context *)memalloc(sizeof(context)); + + ctx->help = 0; + ctx->verbose = 0; + ctx->debug_mode = 0; + ctx->program_name = NULL; + ctx->conll_filename = NULL; + ctx->fplm_filename = NULL; + ctx->mcd_filename = NULL; + ctx->mcd_struct = NULL; + ctx->language = strdup("fr"); + ctx->maca_data_path = NULL; + return ctx; +} + +void context_general_help_message(context *ctx) +{ + fprintf(stderr, "usage: %s [options]\n", ctx->program_name); + fprintf(stderr, "Options:\n"); + fprintf(stderr, "\t-h --help : print this message\n"); + fprintf(stderr, "\t-v --verbose : activate verbose mode\n"); + fprintf(stderr, "\t-r --hratio <float> : set the occupation ratio of hash tables (default is 0.5)\n"); +} + +void context_conll_help_message(context *ctx){ + fprintf(stderr, "\t-i --conll <file> : conll file name\n"); +} +void context_fplm_help_message(context *ctx){ + fprintf(stderr, "\t-f --fplm <file> : fplm (form pos lemma morpho) file\n"); +} +void context_mcd_help_message(context *ctx){ + fprintf(stderr, "\t-m --mcd <file> : multi column description file name\n"); +} +void context_language_help_message(context *ctx){ + fprintf(stderr, "\t-C --language : identifier of the language to use\n"); +} +void context_maca_data_path_help_message(context *ctx){ + fprintf(stderr, "\t-M --maca_data_path : path to maca_data directory\n"); +} + +context *context_read_options(int argc, char *argv[]) +{ + int c; + int option_index = 0; + context *ctx = context_new(); + + ctx->program_name = strdup(argv[0]); + + static struct option long_options[8] = + { + {"help", no_argument, 0, 'h'}, + {"verbose", no_argument, 0, 'v'}, + {"debug", no_argument, 0, 'd'}, + {"conll", required_argument, 0, 'i'}, + {"mcd", required_argument, 0, 'm'}, + {"language", required_argument, 0, 'C'}, + {"fplm", required_argument, 0, 'f'}, + {"maca_data_path", required_argument, 0, 'M'} + }; + optind = 0; + opterr = 0; + + while ((c = getopt_long (argc, argv, "hvdi:f:m:C:M:", long_options, &option_index)) != -1){ + switch (c) + { + case 'd': + ctx->debug_mode = 1; + break; + case 'h': + ctx->help = 1; + break; + case 'v': + ctx->verbose = 1; + break; + case 'f': + ctx->fplm_filename = strdup(optarg); + break; + case 'i': + ctx->conll_filename = strdup(optarg); + break; + case 'm': + ctx->mcd_filename = strdup(optarg); + ctx->mcd_struct = mcd_read(ctx->mcd_filename); + break; + case 'C': + ctx->language = strdup(optarg); + break; + case 'M': + ctx->maca_data_path = strdup(optarg); + break; + } + } + + context_set_linguistic_resources_filenames(ctx); + + if(ctx->mcd_filename == NULL) + ctx->mcd_struct = mcd_build_conll07(); + + return ctx; +} + +void context_set_linguistic_resources_filenames(context *ctx) +{ + char absolute_path[500]; + char absolute_filename[500]; + + absolute_path[0] = '\0'; + + if(ctx->maca_data_path) + strcat(absolute_path, ctx->maca_data_path); + else + strcat(absolute_path, getenv("MACAON_DIR")); + + strcat(absolute_path, "/"); + strcat(absolute_path, ctx->language); + strcat(absolute_path, "/bin/"); + + if(!ctx->fplm_filename){ + strcpy(absolute_filename, absolute_path); + strcat(absolute_filename, DEFAULT_FPLM_FILENAME); + ctx->fplm_filename = strdup(absolute_filename); + } + +} diff --git a/maca_lemmatizer/src/context.h b/maca_lemmatizer/src/context.h new file mode 100644 index 0000000000000000000000000000000000000000..83c607104947882237e02b92275669c76e812657 --- /dev/null +++ b/maca_lemmatizer/src/context.h @@ -0,0 +1,38 @@ +#ifndef __MACA_LEMMATIZER_CONTEXT__ +#define __MACA_LEMMATIZER_CONTEXT__ + +#include "mcd.h" +#include <stdlib.h> + +#define DEFAULT_FPLM_FILENAME "fplm" + + + +typedef struct { + int help; + int verbose; + int debug_mode; + char *program_name; + char *conll_filename; + char *fplm_filename; + char *language; + char *maca_data_path; + char *mcd_filename; + mcd *mcd_struct; +} context; + + + +context *context_new(void); +void context_free(context *ctx); + +context *context_read_options(int argc, char *argv[]); +void context_general_help_message(context *ctx); +void context_conll_help_message(context *ctx); +void context_language_help_message(context *ctx); +void context_fplm_help_message(context *ctx); +void context_maca_data_path_help_message(context *ctx); +void context_mcd_help_message(context *ctx); + + +#endif diff --git a/maca_lemmatizer/src/maca_lemmatizer.c b/maca_lemmatizer/src/maca_lemmatizer.c new file mode 100644 index 0000000000000000000000000000000000000000..6737bbb366ea379386d79a8a182a367943c3a9a1 --- /dev/null +++ b/maca_lemmatizer/src/maca_lemmatizer.c @@ -0,0 +1,154 @@ +#include<stdio.h> +#include<stdlib.h> +#include<string.h> +#include<ctype.h> + +#include"util.h" +#include"hash.h" +#include"mcd.h" +#include"context.h" + +void maca_lemmatizer_help_message(context *ctx) +{ + context_general_help_message(ctx); + fprintf(stderr, "INPUT\n"); + context_conll_help_message(ctx); + context_mcd_help_message(ctx); + context_language_help_message(ctx); + context_maca_data_path_help_message(ctx); + context_fplm_help_message(ctx); +} + + +void maca_lemmatizer_check_options(context *ctx){ + if(!ctx->conll_filename + /* || !ctx->perc_model_filename + || !ctx->mcd_filename + || !ctx->vocabs_filename + || !ctx->features_model_filename*/ + || ctx->help + ){ + maca_lemmatizer_help_message(ctx); + exit(1); + } +} + +char **read_fplm_file(char *fplm_filename, hash *form_pos_ht) +{ + char form[1000]; + char pos[1000]; + char lemma[1000]; + char morpho[1000]; + int num = 0; + char **lemma_array; + int lemma_array_size = 10000; + + FILE *f= myfopen(fplm_filename, "r"); + int fields_nb; + + lemma_array = (char **)memalloc(lemma_array_size * sizeof(char *)); + + while(!feof(f)){ + fields_nb = fscanf(f, "%[^\t]\t%s\t%[^\t]\t%s\n", form, pos, lemma, morpho); + if(fields_nb != 4){ + fprintf(stderr, "incorrect fplm entry, skipping it\n"); + continue; + } + strcat(form, "/"); + strcat(form, pos); + hash_add(form_pos_ht, strdup(form), num); + + if(num >= lemma_array_size){ + lemma_array_size = 2 * (lemma_array_size) + 1; + lemma_array = realloc(lemma_array, (lemma_array_size) * sizeof(char *)); + } + + /* if(lemma_array[num] == NULL) */ + lemma_array[num] = strdup(lemma); + num++; + } + /* fprintf(stderr, "%d entries loaded\n", num); */ + return lemma_array; +} + +char *to_lower_string(char *s) +{ + int i; + for(i=0; i < strlen(s); i++) + s[i] = tolower(s[i]); + return s; +} + + +int main(int argc, char *argv[]) +{ + hash *form_pos_ht = hash_new(1000000); + char buffer[10000]; + char *form; + char *pos; + char *token; + int column_nb; + char form_pos[500]; + char *lemma; + int index_form_pos; + char **lemma_array; + context *ctx; + + ctx = context_read_options(argc, argv); + maca_lemmatizer_check_options(ctx); + + FILE *f = myfopen(ctx->conll_filename, "r"); + + lemma_array = read_fplm_file(ctx->fplm_filename, form_pos_ht); + + /* look for a valid word */ + while(fgets(buffer, 10000, f)){ + if(feof(f)) return 0; /* no more words to read */ + if((buffer[0] == '\n') || (buffer[0] == ' ')){ + printf("\n"); + continue; + } + + buffer[strlen(buffer)-1] = '\0'; + printf("%s", buffer); + token = strtok(buffer, "\t"); + column_nb = 0; + form = NULL; + pos = NULL; + do{ + if((column_nb < ctx->mcd_struct->nb_col) && (ctx->mcd_struct->type[column_nb] == FEAT_TYPE_FORM)) + form = strdup(token); + if((column_nb < ctx->mcd_struct->nb_col) && (ctx->mcd_struct->type[column_nb] == FEAT_TYPE_POS)) + pos = strdup(token); + column_nb++; + } while((token = strtok(NULL , "\t"))); + + strcpy(form_pos, form); + strcat(form_pos, "/"); + strcat(form_pos, pos); + index_form_pos = hash_get_val(form_pos_ht, form_pos); + if(index_form_pos != HASH_INVALID_VAL){ + lemma = lemma_array[index_form_pos]; + } + else{ + to_lower_string(form_pos); + index_form_pos = hash_get_val(form_pos_ht, form_pos); + if(index_form_pos != HASH_INVALID_VAL){ + lemma = lemma_array[index_form_pos]; + } + else + lemma = form; + } + + /* printf("form = %s pos = %s (%s) lemma = %s\n", form, pos, form_pos, lemma); */ + printf("\t%s\n", lemma); + + if(pos)free(pos); + if(form)free(form); + } + free(lemma_array); + hash_free(form_pos_ht); + + return 0; +} + diff --git a/maca_trans_parser/CMakeLists.txt b/maca_trans_parser/CMakeLists.txt index 739776227acc569240ce915852f59890fdd24ae4..c638a2c3ad89a17f9036cc525123a67608e9d166 100644 --- a/maca_trans_parser/CMakeLists.txt +++ b/maca_trans_parser/CMakeLists.txt @@ -1,10 +1,8 @@ set(SOURCES src/context.c - src/dico_vec.c src/feat_desc.c src/feature_table.c src/movement.c src/sentence.c - src/util.c src/feat_fct.c src/feat_vec.c src/global_feat_vec.c @@ -12,24 +10,17 @@ set(SOURCES src/context.c src/simple_decoder.c src/cf_file.c src/feat_lib.c - src/hash.c src/perceptron.c src/stack.c src/word.c src/config2feat_vec.c src/depset.c src/feat_model.c - src/word_emb.c src/config.c - src/dico.c - src/feat_types.c - src/mcd.c src/queue.c src/beam.c ) - - #compiling library include_directories(src) add_library(transparse STATIC ${SOURCES}) @@ -38,23 +29,28 @@ add_library(transparse STATIC ${SOURCES}) add_executable(maca_trans_parser_conll2cff ./src/transform_treebank.c) target_link_libraries(maca_trans_parser_conll2cff transparse) +target_link_libraries(maca_trans_parser_conll2cff maca_common) install (TARGETS maca_trans_parser_conll2cff DESTINATION bin) add_executable(maca_trans_parser ./src/decode.c) target_link_libraries(maca_trans_parser transparse) +target_link_libraries(maca_trans_parser maca_common) install (TARGETS maca_trans_parser DESTINATION bin) add_executable(maca_trans_parser_train ./src/train_perceptron.c) target_compile_options(maca_trans_parser_train INTERFACE -Wall) target_link_libraries(maca_trans_parser_train transparse) +target_link_libraries(maca_trans_parser_train maca_common) install (TARGETS maca_trans_parser_train DESTINATION bin) add_executable(maca_trans_parser_train_from_cff ./src/train.c) target_link_libraries(maca_trans_parser_train_from_cff transparse) +target_link_libraries(maca_trans_parser_train_from_cff maca_common) install (TARGETS maca_trans_parser_train_from_cff DESTINATION bin) add_executable(maca_trans_parser_cff_cutoff ./src/cff_cutoff.c) target_link_libraries(maca_trans_parser_cff_cutoff transparse) +target_link_libraries(maca_trans_parser_cff_cutoff maca_common) install (TARGETS maca_trans_parser_cff_cutoff DESTINATION bin) #add_executable(test_w2v ./src/test_w2v.c) diff --git a/maca_trans_parser/src/beam.c b/maca_trans_parser/src/beam.c index f0ba3ceed673bcfbcb48a75f077d5801259eed7e..ecfc0ea05b75ea0f1aa2186dd532e23473e34a70 100644 --- a/maca_trans_parser/src/beam.c +++ b/maca_trans_parser/src/beam.c @@ -176,8 +176,6 @@ config *beam_decoder_sentence(config *initial_config, dico *dico_features, featu beam *next_beam= beam_new(beam_width); beam *final_beam= beam_new(beam_width); beam *tmp_beam= NULL; - int i; - float max; config *argmax; int step = 0; diff --git a/maca_trans_parser/src/context.c b/maca_trans_parser/src/context.c index 1d98c287d639216f45ea26206cd6c99850601c45..21223ad89edaf5ec867cc34e6096cb5c98af0b22 100644 --- a/maca_trans_parser/src/context.c +++ b/maca_trans_parser/src/context.c @@ -256,6 +256,7 @@ context *context_read_options(int argc, char *argv[]) break; case 'C': ctx->mcd_filename = strdup(optarg); + ctx->mcd_struct = mcd_read(ctx->mcd_filename); break; case 'F': ctx->features_model_filename = strdup(optarg); @@ -281,11 +282,15 @@ context *context_read_options(int argc, char *argv[]) ctx->mvt_nb = ctx->mcd_struct->dico_array[ctx->mcd_struct->type2col[FEAT_TYPE_LABEL]]->nbelem * 2 + 1; }*/ + /* if(ctx->features_model && ctx->mcd_struct) feat_model_compute_ranges(ctx->features_model, ctx->mcd_struct, ctx->mvt_nb); - + */ context_set_linguistic_resources_filenames(ctx); + if(ctx->mcd_filename == NULL){ + ctx->mcd_struct = mcd_build_conll07(); + } return ctx; } @@ -309,25 +314,25 @@ void context_set_linguistic_resources_filenames(context *ctx) if(!ctx->perc_model_filename){ strcpy(absolute_filename, absolute_path); - strcat(absolute_filename, STANDARD_MODEL_FILENAME); + strcat(absolute_filename, DEFAULT_MODEL_FILENAME); ctx->perc_model_filename = strdup(absolute_filename); } if(!ctx->vocabs_filename){ strcpy(absolute_filename, absolute_path); - strcat(absolute_filename, STANDARD_VOCABS_FILENAME); + strcat(absolute_filename, DEFAULT_VOCABS_FILENAME); ctx->vocabs_filename = strdup(absolute_filename); } - if(!ctx->mcd_filename){ + /* if(!ctx->mcd_filename){ strcpy(absolute_filename, absolute_path); - strcat(absolute_filename, STANDARD_MULTI_COL_DESC_FILENAME); + strcat(absolute_filename, DEFAULT_MULTI_COL_DESC_FILENAME); ctx->mcd_filename = strdup(absolute_filename); - } + }*/ if(!ctx->features_model_filename){ strcpy(absolute_filename, absolute_path); - strcat(absolute_filename, STANDARD_FEATURES_MODEL_FILENAME); + strcat(absolute_filename, DEFAULT_FEATURES_MODEL_FILENAME); ctx->features_model_filename = strdup(absolute_filename); } diff --git a/maca_trans_parser/src/context.h b/maca_trans_parser/src/context.h index cab2a216c7284e863306f8d8e6a82fd94b3b8a7c..222f8ad31a978ad60623f56e99fbd57545ecf46d 100644 --- a/maca_trans_parser/src/context.h +++ b/maca_trans_parser/src/context.h @@ -4,10 +4,10 @@ #define TEST_MODE 1 #define TRAIN_MODE 2 -#define STANDARD_MULTI_COL_DESC_FILENAME "maca_trans_parser.mcd" -#define STANDARD_FEATURES_MODEL_FILENAME "maca_trans_parser.fm" -#define STANDARD_VOCABS_FILENAME "maca_trans_parser.vocab" -#define STANDARD_MODEL_FILENAME "maca_trans_parser.model" +#define DEFAULT_MULTI_COL_DESC_FILENAME "maca_trans_parser.mcd" +#define DEFAULT_FEATURES_MODEL_FILENAME "maca_trans_parser.fm" +#define DEFAULT_VOCABS_FILENAME "maca_trans_parser.vocab" +#define DEFAULT_MODEL_FILENAME "maca_trans_parser.model" #include "dico_vec.h" #include "feat_model.h" diff --git a/maca_trans_parser/src/decode.c b/maca_trans_parser/src/decode.c index 909f923b47584acc679e0f0554d9cbb050e0e806..5f8679dc5eb01c010c890d89dd3f7ffa171e1735 100644 --- a/maca_trans_parser/src/decode.c +++ b/maca_trans_parser/src/decode.c @@ -54,7 +54,9 @@ int main(int argc, char *argv[]) decode_check_options(ctx); ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio); - ctx->mcd_struct = mcd_read(ctx->mcd_filename, NULL, ctx->vocabs); + mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs); + + ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL"); if(ctx->dico_labels == NULL){ @@ -69,7 +71,7 @@ int main(int argc, char *argv[]) /* when in stream mode, force to renumber the tokens (ugly !) */ if(ctx->stream_mode){ - ctx->mcd_struct->col2type[ctx->mcd_struct->type2col[FEAT_TYPE_INDEX]] = -1; + ctx->mcd_struct->type[ctx->mcd_struct->type2col[FEAT_TYPE_INDEX]] = -1; } diff --git a/maca_trans_parser/src/depset.c b/maca_trans_parser/src/depset.c index 11f5ef8069c0d1c228e0c9fb0ab05ef1a9c039a5..c69c3122205ffee7c790b1ac646e002d6600ad8d 100644 --- a/maca_trans_parser/src/depset.c +++ b/maca_trans_parser/src/depset.c @@ -90,6 +90,7 @@ char *skip_index(char *buffer) if(buffer[i] < '0' || buffer[i] > '9' || buffer[i] == ' ' || buffer[i] == '\t') return &buffer[i]; } + return NULL; } void depset_print_new_index(FILE *f, depset *d, dico *dico_labels) diff --git a/maca_trans_parser/src/feat_fct.c b/maca_trans_parser/src/feat_fct.c index 515116268b355db60d85ff90708e10deda35da62..190ba38f5000627f917b771e4c2c74827b312ba4 100644 --- a/maca_trans_parser/src/feat_fct.c +++ b/maca_trans_parser/src/feat_fct.c @@ -1,282 +1,282 @@ #include<stdio.h> #include<stdlib.h> #include<string.h> +#include"stack.h" #include"feat_fct.h" #include"feat_types.h" /* word features */ -int s0f(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_FORM];} -int s0l(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_LEMMA];} -int s0c(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_CPOS];} -int s0p(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_POS];} -int s0m(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_FEATS];} -int s0s(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_STAG];} -int s0A(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_A];} -int s0B(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_B];} -int s0C(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_C];} -int s0D(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_D];} -int s0E(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_E];} -int s0F(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_F];} -int s0G(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_G];} -int s0H(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_H];} -int s0I(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_I];} -int s0J(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_J];} -int s0K(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_K];} -int s0L(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_L];} -int s0M(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_M];} -int s0N(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_N];} -int s0O(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_O];} -int s0P(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_P];} -int s0Q(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_Q];} -int s0R(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_R];} -int s0S(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_S];} -int s0T(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_T];} -int s0U(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_U];} -int s0V(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_V];} -int s0W(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_W];} -int s0X(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_X];} -int s0Y(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_Y];} -int s0Z(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->feat_array[FEAT_TYPE_Z];} - -int s0U1(config *c) {return (c->st->top < 1) ? -1 : c->st->array[c->st->top - 1]->U1;} - -int s1f(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_FORM];} -int s1l(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_LEMMA];} -int s1c(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_CPOS];} -int s1p(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_POS];} -int s1m(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_FEATS];} -int s1s(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_STAG];} -int s1A(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_A];} -int s1B(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_B];} -int s1C(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_C];} -int s1D(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_D];} -int s1E(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_E];} -int s1F(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_F];} -int s1G(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_G];} -int s1H(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_H];} -int s1I(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_I];} -int s1J(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_J];} -int s1K(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_K];} -int s1L(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_L];} -int s1M(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_M];} -int s1N(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_N];} -int s1O(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_O];} -int s1P(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_P];} -int s1Q(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_Q];} -int s1R(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_R];} -int s1S(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_S];} -int s1T(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_T];} -int s1U(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_U];} -int s1V(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_V];} -int s1W(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_W];} -int s1X(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_X];} -int s1Y(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_Y];} -int s1Z(config *c) {return (c->st->top < 2) ? -1 : c->st->array[c->st->top - 2]->feat_array[FEAT_TYPE_Z];} - -int s2f(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_FORM];} -int s2l(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_LEMMA];} -int s2c(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_CPOS];} -int s2p(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_POS];} -int s2m(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_FEATS];} -int s2s(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_STAG];} -int s2A(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_A];} -int s2B(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_B];} -int s2C(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_C];} -int s2D(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_D];} -int s2E(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_E];} -int s2F(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_F];} -int s2G(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_G];} -int s2H(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_H];} -int s2I(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_I];} -int s2J(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_J];} -int s2K(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_K];} -int s2L(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_L];} -int s2M(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_M];} -int s2N(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_N];} -int s2O(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_O];} -int s2P(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_P];} -int s2Q(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_Q];} -int s2R(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_R];} -int s2S(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_S];} -int s2T(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_T];} -int s2U(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_U];} -int s2V(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_V];} -int s2W(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_W];} -int s2X(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_X];} -int s2Y(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_Y];} -int s2Z(config *c) {return (c->st->top < 3) ? -1 : c->st->array[c->st->top - 3]->feat_array[FEAT_TYPE_Z];} - -int s3f(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_FORM];} -int s3l(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_LEMMA];} -int s3c(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_CPOS];} -int s3p(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_POS];} -int s3m(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_FEATS];} -int s3s(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_STAG];} -int s3A(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_A];} -int s3B(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_B];} -int s3C(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_C];} -int s3D(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_D];} -int s3E(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_E];} -int s3F(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_F];} -int s3G(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_G];} -int s3H(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_H];} -int s3I(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_I];} -int s3J(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_J];} -int s3K(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_K];} -int s3L(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_L];} -int s3M(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_M];} -int s3N(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_N];} -int s3O(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_O];} -int s3P(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_P];} -int s3Q(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_Q];} -int s3R(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_R];} -int s3S(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_S];} -int s3T(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_T];} -int s3U(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_U];} -int s3V(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_V];} -int s3W(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_W];} -int s3X(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_X];} -int s3Y(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_Y];} -int s3Z(config *c) {return (c->st->top < 4) ? -1 : c->st->array[c->st->top - 4]->feat_array[FEAT_TYPE_Z];} - -int b0f(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_FORM];} -int b0l(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_LEMMA];} -int b0c(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_CPOS];} -int b0p(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_POS];} -int b0m(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_FEATS];} -int b0s(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_STAG];} -int b0A(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_A];} -int b0B(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_B];} -int b0C(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_C];} -int b0D(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_D];} -int b0E(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_E];} -int b0F(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_F];} -int b0G(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_G];} -int b0H(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_H];} -int b0I(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_I];} -int b0J(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_J];} -int b0K(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_K];} -int b0L(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_L];} -int b0M(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_M];} -int b0N(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_N];} -int b0O(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_O];} -int b0P(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_P];} -int b0Q(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_Q];} -int b0R(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_R];} -int b0S(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_S];} -int b0T(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_T];} -int b0U(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_U];} -int b0V(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_V];} -int b0W(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_W];} -int b0X(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_X];} -int b0Y(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_Y];} -int b0Z(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->feat_array[FEAT_TYPE_Z];} - -int b0U1(config *c) {return (c->bf->nbelem < 1) ? -1 : queue_elt_n(c->bf, 0)->U1;} - - -int b1f(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_FORM];} -int b1l(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_LEMMA];} -int b1c(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_CPOS];} -int b1p(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_POS];} -int b1m(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_FEATS];} -int b1s(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_STAG];} -int b1A(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_A];} -int b1B(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_B];} -int b1C(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_C];} -int b1D(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_D];} -int b1E(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_E];} -int b1F(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_F];} -int b1G(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_G];} -int b1H(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_H];} -int b1I(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_I];} -int b1J(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_J];} -int b1K(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_K];} -int b1L(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_L];} -int b1M(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_M];} -int b1N(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_N];} -int b1O(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_O];} -int b1P(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_P];} -int b1Q(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_Q];} -int b1R(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_R];} -int b1S(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_S];} -int b1T(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_T];} -int b1U(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_U];} -int b1V(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_V];} -int b1W(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_W];} -int b1X(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_X];} -int b1Y(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_Y];} -int b1Z(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->feat_array[FEAT_TYPE_Z];} - -int b1U1(config *c) {return (c->bf->nbelem < 2) ? -1 : queue_elt_n(c->bf, 1)->U1;} - - -int b2f(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_FORM];} -int b2l(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_LEMMA];} -int b2c(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_CPOS];} -int b2p(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_POS];} -int b2m(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_FEATS];} -int b2s(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_STAG];} -int b2A(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_A];} -int b2B(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_B];} -int b2C(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_C];} -int b2D(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_D];} -int b2E(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_E];} -int b2F(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_F];} -int b2G(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_G];} -int b2H(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_H];} -int b2I(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_I];} -int b2J(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_J];} -int b2K(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_K];} -int b2L(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_L];} -int b2M(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_M];} -int b2N(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_N];} -int b2O(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_O];} -int b2P(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_P];} -int b2Q(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_Q];} -int b2R(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_R];} -int b2S(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_S];} -int b2T(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_T];} -int b2U(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_U];} -int b2V(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_V];} -int b2W(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_W];} -int b2X(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_X];} -int b2Y(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_Y];} -int b2Z(config *c) {return (c->bf->nbelem < 3) ? -1 : queue_elt_n(c->bf, 2)->feat_array[FEAT_TYPE_Z];} - -int b3f(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_FORM];} -int b3l(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_LEMMA];} -int b3c(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_CPOS];} -int b3p(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_POS];} -int b3m(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_FEATS];} -int b3s(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_STAG];} -int b3A(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_A];} -int b3B(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_B];} -int b3C(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_C];} -int b3D(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_D];} -int b3E(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_E];} -int b3F(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_F];} -int b3G(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_G];} -int b3H(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_H];} -int b3I(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_I];} -int b3J(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_J];} -int b3K(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_K];} -int b3L(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_L];} -int b3M(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_M];} -int b3N(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_N];} -int b3O(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_O];} -int b3P(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_P];} -int b3Q(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_Q];} -int b3R(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_R];} -int b3S(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_S];} -int b3T(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_T];} -int b3U(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_U];} -int b3V(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_V];} -int b3W(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_W];} -int b3X(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_X];} -int b3Y(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_Y];} -int b3Z(config *c) {return (c->bf->nbelem < 4) ? -1 : queue_elt_n(c->bf, 3)->feat_array[FEAT_TYPE_Z];} +int s0f(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_form(stack_elt_n(c->st, 0));} +int s0l(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_lemma(stack_elt_n(c->st, 0));} +int s0c(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_cpos(stack_elt_n(c->st, 0));} +int s0p(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_pos(stack_elt_n(c->st, 0));} +int s0m(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_feats(stack_elt_n(c->st, 0));} +int s0s(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_stag(stack_elt_n(c->st, 0));} +int s0A(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_A(stack_elt_n(c->st, 0));} +int s0B(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_B(stack_elt_n(c->st, 0));} +int s0C(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_C(stack_elt_n(c->st, 0));} +int s0D(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_D(stack_elt_n(c->st, 0));} +int s0E(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_E(stack_elt_n(c->st, 0));} +int s0F(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_F(stack_elt_n(c->st, 0));} +int s0G(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_G(stack_elt_n(c->st, 0));} +int s0H(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_H(stack_elt_n(c->st, 0));} +int s0I(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_I(stack_elt_n(c->st, 0));} +int s0J(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_J(stack_elt_n(c->st, 0));} +int s0K(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_K(stack_elt_n(c->st, 0));} +int s0L(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_L(stack_elt_n(c->st, 0));} +int s0M(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_M(stack_elt_n(c->st, 0));} +int s0N(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_N(stack_elt_n(c->st, 0));} +int s0O(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_O(stack_elt_n(c->st, 0));} +int s0P(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_P(stack_elt_n(c->st, 0));} +int s0Q(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_Q(stack_elt_n(c->st, 0));} +int s0R(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_R(stack_elt_n(c->st, 0));} +int s0S(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_S(stack_elt_n(c->st, 0));} +int s0T(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_T(stack_elt_n(c->st, 0));} +int s0U(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_U(stack_elt_n(c->st, 0));} +int s0V(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_V(stack_elt_n(c->st, 0));} +int s0W(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_W(stack_elt_n(c->st, 0));} +int s0X(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_X(stack_elt_n(c->st, 0));} +int s0Y(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_Y(stack_elt_n(c->st, 0));} +int s0Z(config *c) {return (stack_height(c->st) < 1) ? -1 : word_get_Z(stack_elt_n(c->st, 0));} + +int s0U1(config *c) {return (stack_height(c->st) < 1) ? -1 : stack_elt_n(c->st, 0)->U1;} + +int s1f(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_form(stack_elt_n(c->st, 1));} +int s1l(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_lemma(stack_elt_n(c->st, 1));} +int s1c(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_cpos(stack_elt_n(c->st, 1));} +int s1p(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_pos(stack_elt_n(c->st, 1));} +int s1m(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_feats(stack_elt_n(c->st, 1));} +int s1s(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_stag(stack_elt_n(c->st, 1));} +int s1A(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_A(stack_elt_n(c->st, 1));} +int s1B(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_B(stack_elt_n(c->st, 1));} +int s1C(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_C(stack_elt_n(c->st, 1));} +int s1D(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_D(stack_elt_n(c->st, 1));} +int s1E(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_E(stack_elt_n(c->st, 1));} +int s1F(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_F(stack_elt_n(c->st, 1));} +int s1G(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_G(stack_elt_n(c->st, 1));} +int s1H(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_H(stack_elt_n(c->st, 1));} +int s1I(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_I(stack_elt_n(c->st, 1));} +int s1J(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_J(stack_elt_n(c->st, 1));} +int s1K(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_K(stack_elt_n(c->st, 1));} +int s1L(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_L(stack_elt_n(c->st, 1));} +int s1M(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_M(stack_elt_n(c->st, 1));} +int s1N(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_N(stack_elt_n(c->st, 1));} +int s1O(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_O(stack_elt_n(c->st, 1));} +int s1P(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_P(stack_elt_n(c->st, 1));} +int s1Q(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_Q(stack_elt_n(c->st, 1));} +int s1R(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_R(stack_elt_n(c->st, 1));} +int s1S(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_S(stack_elt_n(c->st, 1));} +int s1T(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_T(stack_elt_n(c->st, 1));} +int s1U(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_U(stack_elt_n(c->st, 1));} +int s1V(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_V(stack_elt_n(c->st, 1));} +int s1W(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_W(stack_elt_n(c->st, 1));} +int s1X(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_X(stack_elt_n(c->st, 1));} +int s1Y(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_Y(stack_elt_n(c->st, 1));} +int s1Z(config *c) {return (stack_height(c->st) < 2) ? -1 : word_get_Z(stack_elt_n(c->st, 1));} + +int s2f(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_form(stack_elt_n(c->st, 2));} +int s2l(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_lemma(stack_elt_n(c->st, 2));} +int s2c(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_cpos(stack_elt_n(c->st, 2));} +int s2p(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_pos(stack_elt_n(c->st, 2));} +int s2m(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_feats(stack_elt_n(c->st, 2));} +int s2s(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_stag(stack_elt_n(c->st, 2));} +int s2A(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_A(stack_elt_n(c->st, 2));} +int s2B(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_B(stack_elt_n(c->st, 2));} +int s2C(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_C(stack_elt_n(c->st, 2));} +int s2D(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_D(stack_elt_n(c->st, 2));} +int s2E(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_E(stack_elt_n(c->st, 2));} +int s2F(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_F(stack_elt_n(c->st, 2));} +int s2G(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_G(stack_elt_n(c->st, 2));} +int s2H(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_H(stack_elt_n(c->st, 2));} +int s2I(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_I(stack_elt_n(c->st, 2));} +int s2J(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_J(stack_elt_n(c->st, 2));} +int s2K(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_K(stack_elt_n(c->st, 2));} +int s2L(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_L(stack_elt_n(c->st, 2));} +int s2M(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_M(stack_elt_n(c->st, 2));} +int s2N(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_N(stack_elt_n(c->st, 2));} +int s2O(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_O(stack_elt_n(c->st, 2));} +int s2P(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_P(stack_elt_n(c->st, 2));} +int s2Q(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_Q(stack_elt_n(c->st, 2));} +int s2R(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_R(stack_elt_n(c->st, 2));} +int s2S(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_S(stack_elt_n(c->st, 2));} +int s2T(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_T(stack_elt_n(c->st, 2));} +int s2U(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_U(stack_elt_n(c->st, 2));} +int s2V(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_V(stack_elt_n(c->st, 2));} +int s2W(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_W(stack_elt_n(c->st, 2));} +int s2X(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_X(stack_elt_n(c->st, 2));} +int s2Y(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_Y(stack_elt_n(c->st, 2));} +int s2Z(config *c) {return (stack_height(c->st) < 3) ? -1 : word_get_Z(stack_elt_n(c->st, 2));} + +int s3f(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_form(stack_elt_n(c->st, 3));} +int s3l(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_lemma(stack_elt_n(c->st, 3));} +int s3c(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_cpos(stack_elt_n(c->st, 3));} +int s3p(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_pos(stack_elt_n(c->st, 3));} +int s3m(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_feats(stack_elt_n(c->st, 3));} +int s3s(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_stag(stack_elt_n(c->st, 3));} +int s3A(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_A(stack_elt_n(c->st, 3));} +int s3B(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_B(stack_elt_n(c->st, 3));} +int s3C(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_C(stack_elt_n(c->st, 3));} +int s3D(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_D(stack_elt_n(c->st, 3));} +int s3E(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_E(stack_elt_n(c->st, 3));} +int s3F(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_F(stack_elt_n(c->st, 3));} +int s3G(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_G(stack_elt_n(c->st, 3));} +int s3H(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_H(stack_elt_n(c->st, 3));} +int s3I(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_I(stack_elt_n(c->st, 3));} +int s3J(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_J(stack_elt_n(c->st, 3));} +int s3K(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_K(stack_elt_n(c->st, 3));} +int s3L(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_L(stack_elt_n(c->st, 3));} +int s3M(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_M(stack_elt_n(c->st, 3));} +int s3N(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_N(stack_elt_n(c->st, 3));} +int s3O(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_O(stack_elt_n(c->st, 3));} +int s3P(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_P(stack_elt_n(c->st, 3));} +int s3Q(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_Q(stack_elt_n(c->st, 3));} +int s3R(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_R(stack_elt_n(c->st, 3));} +int s3S(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_S(stack_elt_n(c->st, 3));} +int s3T(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_T(stack_elt_n(c->st, 3));} +int s3U(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_U(stack_elt_n(c->st, 3));} +int s3V(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_V(stack_elt_n(c->st, 3));} +int s3W(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_W(stack_elt_n(c->st, 3));} +int s3X(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_X(stack_elt_n(c->st, 3));} +int s3Y(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_Y(stack_elt_n(c->st, 3));} +int s3Z(config *c) {return (stack_height(c->st) < 4) ? -1 : word_get_Z(stack_elt_n(c->st, 3));} + +int b0f(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_form(queue_elt_n(c->bf, 0));} +int b0l(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_lemma(queue_elt_n(c->bf, 0));} +int b0c(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_cpos(queue_elt_n(c->bf, 0));} +int b0p(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_pos(queue_elt_n(c->bf, 0));} +int b0m(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_feats(queue_elt_n(c->bf, 0));} +int b0s(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_stag(queue_elt_n(c->bf, 0));} +int b0A(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_A(queue_elt_n(c->bf, 0));} +int b0B(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_B(queue_elt_n(c->bf, 0));} +int b0C(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_C(queue_elt_n(c->bf, 0));} +int b0D(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_D(queue_elt_n(c->bf, 0));} +int b0E(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_E(queue_elt_n(c->bf, 0));} +int b0F(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_F(queue_elt_n(c->bf, 0));} +int b0G(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_G(queue_elt_n(c->bf, 0));} +int b0H(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_H(queue_elt_n(c->bf, 0));} +int b0I(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_I(queue_elt_n(c->bf, 0));} +int b0J(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_J(queue_elt_n(c->bf, 0));} +int b0K(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_K(queue_elt_n(c->bf, 0));} +int b0L(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_L(queue_elt_n(c->bf, 0));} +int b0M(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_M(queue_elt_n(c->bf, 0));} +int b0N(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_N(queue_elt_n(c->bf, 0));} +int b0O(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_O(queue_elt_n(c->bf, 0));} +int b0P(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_P(queue_elt_n(c->bf, 0));} +int b0Q(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_Q(queue_elt_n(c->bf, 0));} +int b0R(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_R(queue_elt_n(c->bf, 0));} +int b0S(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_S(queue_elt_n(c->bf, 0));} +int b0T(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_T(queue_elt_n(c->bf, 0));} +int b0U(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_U(queue_elt_n(c->bf, 0));} +int b0V(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_V(queue_elt_n(c->bf, 0));} +int b0W(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_W(queue_elt_n(c->bf, 0));} +int b0X(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_X(queue_elt_n(c->bf, 0));} +int b0Y(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_Y(queue_elt_n(c->bf, 0));} +int b0Z(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : word_get_Z(queue_elt_n(c->bf, 0));} + +int b0U1(config *c) {return (queue_nbelem(c->bf) < 1) ? -1 : queue_elt_n(c->bf, 0)->U1;} + + +int b1f(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_form(queue_elt_n(c->bf, 1));} +int b1l(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_lemma(queue_elt_n(c->bf, 1));} +int b1c(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_cpos(queue_elt_n(c->bf, 1));} +int b1p(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_pos(queue_elt_n(c->bf, 1));} +int b1m(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_feats(queue_elt_n(c->bf, 1));} +int b1s(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_stag(queue_elt_n(c->bf, 1));} +int b1A(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_A(queue_elt_n(c->bf, 1));} +int b1B(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_B(queue_elt_n(c->bf, 1));} +int b1C(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_C(queue_elt_n(c->bf, 1));} +int b1D(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_D(queue_elt_n(c->bf, 1));} +int b1E(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_E(queue_elt_n(c->bf, 1));} +int b1F(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_F(queue_elt_n(c->bf, 1));} +int b1G(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_G(queue_elt_n(c->bf, 1));} +int b1H(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_H(queue_elt_n(c->bf, 1));} +int b1I(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_I(queue_elt_n(c->bf, 1));} +int b1J(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_J(queue_elt_n(c->bf, 1));} +int b1K(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_K(queue_elt_n(c->bf, 1));} +int b1L(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_L(queue_elt_n(c->bf, 1));} +int b1M(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_M(queue_elt_n(c->bf, 1));} +int b1N(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_N(queue_elt_n(c->bf, 1));} +int b1O(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_O(queue_elt_n(c->bf, 1));} +int b1P(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_P(queue_elt_n(c->bf, 1));} +int b1Q(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_Q(queue_elt_n(c->bf, 1));} +int b1R(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_R(queue_elt_n(c->bf, 1));} +int b1S(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_S(queue_elt_n(c->bf, 1));} +int b1T(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_T(queue_elt_n(c->bf, 1));} +int b1U(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_U(queue_elt_n(c->bf, 1));} +int b1V(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_V(queue_elt_n(c->bf, 1));} +int b1W(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_W(queue_elt_n(c->bf, 1));} +int b1X(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_X(queue_elt_n(c->bf, 1));} +int b1Y(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_Y(queue_elt_n(c->bf, 1));} +int b1Z(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : word_get_Z(queue_elt_n(c->bf, 1));} + +int b1U1(config *c) {return (queue_nbelem(c->bf) < 2) ? -1 : queue_elt_n(c->bf, 1)->U1;} + +int b2f(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_form(queue_elt_n(c->bf, 2));} +int b2l(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_lemma(queue_elt_n(c->bf, 2));} +int b2c(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_cpos(queue_elt_n(c->bf, 2));} +int b2p(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_pos(queue_elt_n(c->bf, 2));} +int b2m(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_feats(queue_elt_n(c->bf, 2));} +int b2s(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_stag(queue_elt_n(c->bf, 2));} +int b2A(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_A(queue_elt_n(c->bf, 2));} +int b2B(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_B(queue_elt_n(c->bf, 2));} +int b2C(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_C(queue_elt_n(c->bf, 2));} +int b2D(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_D(queue_elt_n(c->bf, 2));} +int b2E(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_E(queue_elt_n(c->bf, 2));} +int b2F(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_F(queue_elt_n(c->bf, 2));} +int b2G(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_G(queue_elt_n(c->bf, 2));} +int b2H(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_H(queue_elt_n(c->bf, 2));} +int b2I(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_I(queue_elt_n(c->bf, 2));} +int b2J(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_J(queue_elt_n(c->bf, 2));} +int b2K(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_K(queue_elt_n(c->bf, 2));} +int b2L(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_L(queue_elt_n(c->bf, 2));} +int b2M(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_M(queue_elt_n(c->bf, 2));} +int b2N(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_N(queue_elt_n(c->bf, 2));} +int b2O(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_O(queue_elt_n(c->bf, 2));} +int b2P(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_P(queue_elt_n(c->bf, 2));} +int b2Q(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_Q(queue_elt_n(c->bf, 2));} +int b2R(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_R(queue_elt_n(c->bf, 2));} +int b2S(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_S(queue_elt_n(c->bf, 2));} +int b2T(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_T(queue_elt_n(c->bf, 2));} +int b2U(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_U(queue_elt_n(c->bf, 2));} +int b2V(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_V(queue_elt_n(c->bf, 2));} +int b2W(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_W(queue_elt_n(c->bf, 2));} +int b2X(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_X(queue_elt_n(c->bf, 2));} +int b2Y(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_Y(queue_elt_n(c->bf, 2));} +int b2Z(config *c) {return (queue_nbelem(c->bf) < 3) ? -1 : word_get_Z(queue_elt_n(c->bf, 2));} + +int b3f(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_form(queue_elt_n(c->bf, 3));} +int b3l(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_lemma(queue_elt_n(c->bf, 3));} +int b3c(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_cpos(queue_elt_n(c->bf, 3));} +int b3p(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_pos(queue_elt_n(c->bf, 3));} +int b3m(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_feats(queue_elt_n(c->bf, 3));} +int b3s(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_stag(queue_elt_n(c->bf, 3));} +int b3A(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_A(queue_elt_n(c->bf, 3));} +int b3B(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_B(queue_elt_n(c->bf, 3));} +int b3C(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_C(queue_elt_n(c->bf, 3));} +int b3D(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_D(queue_elt_n(c->bf, 3));} +int b3E(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_E(queue_elt_n(c->bf, 3));} +int b3F(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_F(queue_elt_n(c->bf, 3));} +int b3G(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_G(queue_elt_n(c->bf, 3));} +int b3H(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_H(queue_elt_n(c->bf, 3));} +int b3I(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_I(queue_elt_n(c->bf, 3));} +int b3J(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_J(queue_elt_n(c->bf, 3));} +int b3K(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_K(queue_elt_n(c->bf, 3));} +int b3L(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_L(queue_elt_n(c->bf, 3));} +int b3M(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_M(queue_elt_n(c->bf, 3));} +int b3N(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_N(queue_elt_n(c->bf, 3));} +int b3O(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_O(queue_elt_n(c->bf, 3));} +int b3P(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_P(queue_elt_n(c->bf, 3));} +int b3Q(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_Q(queue_elt_n(c->bf, 3));} +int b3R(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_R(queue_elt_n(c->bf, 3));} +int b3S(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_S(queue_elt_n(c->bf, 3));} +int b3T(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_T(queue_elt_n(c->bf, 3));} +int b3U(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_U(queue_elt_n(c->bf, 3));} +int b3V(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_V(queue_elt_n(c->bf, 3));} +int b3W(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_W(queue_elt_n(c->bf, 3));} +int b3X(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_X(queue_elt_n(c->bf, 3));} +int b3Y(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_Y(queue_elt_n(c->bf, 3));} +int b3Z(config *c) {return (queue_nbelem(c->bf) < 4) ? -1 : word_get_Z(queue_elt_n(c->bf, 3));} /* structural features */ diff --git a/maca_trans_parser/src/feat_vec.h b/maca_trans_parser/src/feat_vec.h index 9667ef3ede08d2226e6ef9bfbf760f5f3e10d499..4116d7652be8bdf4dd5922f936c40686cf28b67b 100644 --- a/maca_trans_parser/src/feat_vec.h +++ b/maca_trans_parser/src/feat_vec.h @@ -23,7 +23,7 @@ int feat_vec_add(feat_vec *fv, int feat); void feat_vec_empty(feat_vec *fv); void feat_vec_print_string(feat_vec *fv, dico *dico_features); void feat_vec_print(FILE *f, feat_vec *fv); -/* void feat_vec_print_dnn(FILE *f, feat_vec *fv, feat_model *fm, mcd *m); */ +/* void feat_vec_print_dnn(FILE *f, feat_vec *fv, feat_model *fm, mcd *m); */ /* void feat_vec_fill_input_array_dnn(fann_type *input_array, feat_vec *fv, feat_model *fm, mcd *m); */ /* void feat_vec_fill_input_array_dnn(float *input_array, feat_vec *fv, feat_model *fm, mcd *m); */ #endif diff --git a/maca_trans_parser/src/mcd.c b/maca_trans_parser/src/mcd.c deleted file mode 100644 index c701da7a8718f03e2772865cb4faacc0ca26f94d..0000000000000000000000000000000000000000 --- a/maca_trans_parser/src/mcd.c +++ /dev/null @@ -1,169 +0,0 @@ -#include<stdio.h> -#include<stdlib.h> -#include<string.h> - -#include "mcd.h" -#include "util.h" -#include "dico.h" -#include "word_emb.h" - - -mcd *mcd_new(int nb_col) -{ - mcd *m = (mcd *)memalloc(sizeof(mcd)); - int i; - m->nb_col = nb_col; - - for(i=0; i < FEAT_TYPE_NB; i++) - m->type2col[i] = -1; - - m->representation = (int *)memalloc(nb_col * sizeof(int)); - m->type = (int *)memalloc(nb_col * sizeof(int)); - m->col2type = (int *)memalloc(nb_col * sizeof(int)); - m->filename = (char **)memalloc(nb_col * sizeof(char *)); - m->dico_array = (dico **)memalloc(nb_col * sizeof(dico *)); - m->word_emb_array = (word_emb **)memalloc(nb_col * sizeof(word_emb *)); - - for(i=0; i < nb_col; i++){ - m->representation[i] = MCD_REPRESENTATION_NULL; - m->type[i] = -1; - m->col2type[i] = -1; - m->filename[i] = NULL; - m->dico_array[i] = NULL; - m->word_emb_array[i] = NULL;; - } - return m; -} - -void mcd_free(mcd *m) -{ - int i; - for(i=0; i < m->nb_col; i++){ - if(m->dico_array[i]) dico_free(m->dico_array[i]); - if(m->word_emb_array[i]) word_emb_free(m->word_emb_array[i]); - } - free(m->representation); - free(m->filename); - free(m->dico_array); - free(m->word_emb_array); - - free(m); -} - -int mcd_get_code(mcd *m, char *str, int col){ - if(m->representation[col] == MCD_REPRESENTATION_VOCAB) - return dico_string2int(m->dico_array[col], str); - if(m->representation[col] == MCD_REPRESENTATION_EMB) - return word_emb_get_code(m->word_emb_array[col], str); - if(m->representation[col] == MCD_REPRESENTATION_INT) - return atoi(str); - return MCD_INVALID_VALUE; -} - -int mcd_max_column_index_in_file(char *mcd_filename) -{ - int max_col = -1; - FILE *f = myfopen(mcd_filename, "r"); - char buffer[1000]; /* ugly */ - int column; - char type[100]; - char representation[100]; - char filename[500]; /* ugly */ - int fields_number; - int line_number = 0; - - while(fgets(buffer, 1000, f)){ - line_number++; - if(feof(f)) break; - if((buffer[0] == '\n') || (buffer[0] == '#')) continue; - fields_number = sscanf(buffer, "%d %s %s %s", &column, type, representation, filename); - if(fields_number != 4){ - fprintf(stderr, "line %d of mcd file %s ill formed, I'm skipping it\n", line_number, mcd_filename); - continue; - } - if(column > max_col) max_col = column; - } - return max_col; -} - -mcd *mcd_read(char *mcd_filename, char *corpus_filename, dico_vec *vocabs) -{ - int column; - char type[100]; - char representation[100]; - char filename[500]; /* ugly */ - int fields_number; - int line_number = 0; - char buffer[1000]; /* ugly */ - int nb_col = mcd_max_column_index_in_file(mcd_filename); - mcd *m = mcd_new(nb_col + 1); - FILE *f = myfopen(mcd_filename, "r"); - /* int first = 1; */ - - while(fgets(buffer, 1000, f)){ - line_number++; - if(feof(f)) break; - if((buffer[0] == '\n') || (buffer[0] == '#')) continue; - fields_number = sscanf(buffer, "%d %s %s %s", &column, type, representation, filename); - if(fields_number != 4){ - /* fprintf(stderr, "line %d of mcd file %s ill formed, I'm skipping it\n", line_number, mcd_filename); */ - continue; - } - fprintf(stderr, "column = %d type = %s representation = %s filename = %s\n", column, type, representation, filename); - m->type[column] = feat_type_string2int(type); - if(m->type[column] == -1){ - fprintf(stderr, "in line %d of mcd file %s invalid type, I'm skipping it\n", line_number, mcd_filename); - continue; - } - m->type2col[m->type[column]] = column; - m->col2type[column] = m->type[column]; - if(!strcmp(representation, "_")) m->representation[column] = MCD_REPRESENTATION_NULL; - else if(!strcmp(representation, "EMB")) m->representation[column] = MCD_REPRESENTATION_EMB; - else if(!strcmp(representation, "VOCAB")) m->representation[column] = MCD_REPRESENTATION_VOCAB; - else if(!strcmp(representation, "INT")) m->representation[column] = MCD_REPRESENTATION_INT; - else{ - fprintf(stderr, "in line %d of mcd file %s invalid mode of representation, I'm skipping it\n", line_number, mcd_filename); - m->representation[column] = MCD_REPRESENTATION_NULL; - } - if(m->representation[column] != MCD_REPRESENTATION_NULL){ - m->filename[column] = strdup(filename); - if(m->representation[column] == MCD_REPRESENTATION_EMB){ - fprintf(stderr, "loading word embedding %s\n", m->filename[column]); - m->word_emb_array[column] = word_emb_load(m->filename[column]); - } - else if(m->representation[column] == MCD_REPRESENTATION_VOCAB){ - if(!strcmp(m->filename[column], "_")){ - if(corpus_filename){ - fprintf(stderr, "extracting dico %s from corpus\n", type); - m->dico_array[column] = dico_extract_from_corpus(corpus_filename, column, type); - } - else if(vocabs){ - fprintf(stderr, "linking to dico %s\n", type); - m->dico_array[column] = dico_vec_get_dico(vocabs, type); - } - if(m->dico_array[column] == NULL) - fprintf(stderr, "cannot find dico %s\n", type); - } - else{ - fprintf(stderr, "loading dico %s\n", m->filename[column]); - m->dico_array[column] = dico_read(m->filename[column], 0.5); - } - } - } - } - fclose(f); - return m; -} - - -dico_vec *mcd_build_dico_vec(mcd *mcd_struct) -{ - dico_vec *dv = dico_vec_new(); - int i; - for(i=0; i < mcd_struct->nb_col; i++){ - if(mcd_struct->dico_array[i]){ - dico_vec_add(dv, mcd_struct->dico_array[i]); - } - } - return dv; -} diff --git a/maca_trans_parser/src/queue.c b/maca_trans_parser/src/queue.c index aaabd655d1f5cd33e5b003f002e995780dd8d710..fe21062f0e370bc0eaefa8558eac178c4a5c728e 100644 --- a/maca_trans_parser/src/queue.c +++ b/maca_trans_parser/src/queue.c @@ -51,11 +51,6 @@ void queue_print(FILE *f, queue *q) fprintf(f, ")\n"); } -int queue_nbelem(queue *q) -{ - return q->nbelem; -} - queue *queue_new(int size) { queue *q = (queue *)memalloc(sizeof(queue)); diff --git a/maca_trans_parser/src/queue.h b/maca_trans_parser/src/queue.h index d93c59ff0ef72239341400ad11c341f0c47801ff..8ecd766a7fb775f86d0a11b3db703c8bec77a650 100644 --- a/maca_trans_parser/src/queue.h +++ b/maca_trans_parser/src/queue.h @@ -5,6 +5,11 @@ #include"word.h" #include"mcd.h" +#define queue_nbelem(q) (q)->nbelem +#define queue_size(q) (q)->size +#define queue_head(q) (q)->head +#define queue_tail(q) (q)->tail + typedef struct { int size; word **array; diff --git a/maca_trans_parser/src/stack.c b/maca_trans_parser/src/stack.c index 988a5825dbfe9399a82f308ce5dc5d2bd9a077ea..2672732f260a5178cd6084ca09ddd3246803f912 100644 --- a/maca_trans_parser/src/stack.c +++ b/maca_trans_parser/src/stack.c @@ -3,6 +3,13 @@ #include"stack.h" #include"util.h" + +/*int stack_height(stack *s) +{ + return(s->top); + }*/ + + int stack_is_empty(stack *s) { return(s->top == 0); diff --git a/maca_trans_parser/src/stack.h b/maca_trans_parser/src/stack.h index 2682011abdf408003343dafd17dc0d6b860be681..4a0a42646ccca69a8d7513eff0a19d9a490be5f9 100644 --- a/maca_trans_parser/src/stack.h +++ b/maca_trans_parser/src/stack.h @@ -4,7 +4,8 @@ #include<stdio.h> #include"word.h" -#define stack_height(s) (s)->top +#define stack_height(s) (s)->top +#define stack_elt_n(s, n) (s)->array[(s)->top - (n) - 1] typedef struct { int size; @@ -20,4 +21,5 @@ word *stack_top(stack *s); void stack_print(FILE *buffer, stack *s); void stack_free(stack *s); int stack_is_empty(stack *s); +/* int stack_height(stack *s); */ #endif diff --git a/maca_trans_parser/src/train_perceptron.c b/maca_trans_parser/src/train_perceptron.c index f9c9482dabcbfa09885e9ec8e1162ff80da24b3f..54ff2990ec7318da4a84bc90dc1720b16ce77b16 100644 --- a/maca_trans_parser/src/train_perceptron.c +++ b/maca_trans_parser/src/train_perceptron.c @@ -42,7 +42,7 @@ void train_perceptron_check_options(context *ctx) { if(!ctx->conll_filename || ctx->help - || !ctx->mcd_filename + /* || !ctx->mcd_filename */ || !ctx->features_model_filename || !ctx->perc_model_filename || !ctx->vocabs_filename @@ -60,7 +60,8 @@ int main(int argc, char *argv[]) ctx = context_read_options(argc, argv); train_perceptron_check_options(ctx); - ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->conll_filename, NULL); + mcd_extract_dico_from_corpus(ctx->mcd_struct, ctx->conll_filename); + ctx->vocabs = mcd_build_dico_vec(ctx->mcd_struct); ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL"); diff --git a/maca_trans_parser/src/transform_treebank.c b/maca_trans_parser/src/transform_treebank.c index fbda610c9218d08e93298872510f4a229a08a20c..13a61c5f3f0d697f68e79493a65dc72ec004ab9f 100644 --- a/maca_trans_parser/src/transform_treebank.c +++ b/maca_trans_parser/src/transform_treebank.c @@ -36,7 +36,7 @@ void transform_treebank_check_options(context *ctx) { if(!ctx->conll_filename || ctx->help - || !ctx->mcd_filename + /* || !ctx->mcd_filename */ || !(ctx->cff_filename || ctx->fann_filename) ){ transform_treebank_help_message(ctx); @@ -117,7 +117,7 @@ int generate_training_file_stream(FILE *output_file, context *ctx) feat_vec_print(output_file, fv); } else if(ctx->fann_filename){ - feat_vec_print_dnn(output_file, fv, ctx->features_model, ctx->mcd_struct); + /* feat_vec_print_dnn(output_file, fv, ctx->features_model, ctx->mcd_struct); */ print_mvt_fann(output_file, ctx->mvt_nb, mvt_code); fprintf(output_file, "\n\n"); } @@ -218,7 +218,7 @@ int generate_training_file_buffer(FILE *output_file, context *ctx) c = config_initial(conll_file, ctx->mcd_struct, 1000, 0); while((ref = sentence_read(conll_file_ref, ctx->mcd_struct)) && (sentence_nb < ctx->sent_nb)){ - /* sentence_print(stdout, ref, NULL); */ + /* sentence_print(stdout, ref, NULL); */ queue_read_sentence(c->bf, conll_file, ctx->mcd_struct); while(!config_is_terminal(c)){ /* config_print(stdout,c); */ @@ -280,24 +280,22 @@ int main(int argc, char *argv[]) transform_treebank_check_options(ctx); if(ctx->mode == TRAIN_MODE){ - ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->conll_filename, NULL); + mcd_extract_dico_from_corpus(ctx->mcd_struct, ctx->conll_filename); ctx->vocabs = mcd_build_dico_vec(ctx->mcd_struct); - ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL"); - /* ctx->mvt_nb = ctx->mcd_struct->dico_array[ctx->mcd_struct->type2col[FEAT_TYPE_LABEL]]->nbelem * 2 + 1; */ } else if(ctx->mode == TEST_MODE){ ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio); - ctx->mcd_struct = mcd_read(ctx->mcd_filename, NULL, ctx->vocabs); - ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL"); + mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs); } - - + + ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL"); if(ctx->dico_labels == NULL){ fprintf(stderr, "cannot find label names\n"); return 1; } ctx->mvt_nb = ctx->dico_labels->nbelem * 2 + 1; + feat_model_compute_ranges(ctx->features_model, ctx->mcd_struct, ctx->mvt_nb); diff --git a/maca_trans_parser/src/word.c b/maca_trans_parser/src/word.c index ed3912374685e646512b2abd75b64fc5f149c664..bdb63930b3fa801c4608b1529ff5ce6607f89151 100644 --- a/maca_trans_parser/src/word.c +++ b/maca_trans_parser/src/word.c @@ -48,10 +48,10 @@ word *word_parse_buffer(char *buffer, mcd *mcd_struct) w = word_new(buffer); token = strtok(buffer, "\t"); do{ - if((column_nb < mcd_struct->nb_col) && (mcd_struct->col2type[column_nb] != -1)){ - w->feat_array[mcd_struct->col2type[column_nb]] = mcd_get_code(mcd_struct, token, column_nb); + if((column_nb < mcd_struct->nb_col) && (mcd_struct->type[column_nb] != -1)){ + w->feat_array[mcd_struct->type[column_nb]] = mcd_get_code(mcd_struct, token, column_nb); } - if(mcd_struct->col2type[column_nb] == FEAT_TYPE_FORM){ + if(mcd_struct->type[column_nb] == FEAT_TYPE_FORM){ w->U1 = isupper(token[0]) ? 1 : 0; } column_nb++;