Skip to content
Snippets Groups Projects
Commit a0a86014 authored by Alexis Nasr's avatar Alexis Nasr
Browse files

Merge branch 'maca_common' into 'master'

Maca common

merging with maca_common branch, general functions are now in maca_common

See merge request !1
parents 4d2a9550 362239fe
Branches
No related tags found
1 merge request!1Maca common
Showing with 327 additions and 317 deletions
set(SOURCES src/context.c set(SOURCES src/context.c
src/dico_vec.c
src/feat_desc.c src/feat_desc.c
src/feature_table.c src/feature_table.c
src/movement.c src/movement.c
src/sentence.c src/sentence.c
src/util.c
src/feat_fct.c src/feat_fct.c
src/feat_vec.c src/feat_vec.c
src/global_feat_vec.c src/global_feat_vec.c
...@@ -12,24 +10,17 @@ set(SOURCES src/context.c ...@@ -12,24 +10,17 @@ set(SOURCES src/context.c
src/simple_decoder.c src/simple_decoder.c
src/cf_file.c src/cf_file.c
src/feat_lib.c src/feat_lib.c
src/hash.c
src/perceptron.c src/perceptron.c
src/stack.c src/stack.c
src/word.c src/word.c
src/config2feat_vec.c src/config2feat_vec.c
src/depset.c src/depset.c
src/feat_model.c src/feat_model.c
src/word_emb.c
src/config.c src/config.c
src/dico.c
src/feat_types.c
src/mcd.c
src/queue.c src/queue.c
src/beam.c src/beam.c
) )
#compiling library #compiling library
include_directories(src) include_directories(src)
add_library(transparse STATIC ${SOURCES}) add_library(transparse STATIC ${SOURCES})
...@@ -38,23 +29,28 @@ add_library(transparse STATIC ${SOURCES}) ...@@ -38,23 +29,28 @@ add_library(transparse STATIC ${SOURCES})
add_executable(maca_trans_parser_conll2cff ./src/transform_treebank.c) add_executable(maca_trans_parser_conll2cff ./src/transform_treebank.c)
target_link_libraries(maca_trans_parser_conll2cff transparse) target_link_libraries(maca_trans_parser_conll2cff transparse)
target_link_libraries(maca_trans_parser_conll2cff maca_common)
install (TARGETS maca_trans_parser_conll2cff DESTINATION bin) install (TARGETS maca_trans_parser_conll2cff DESTINATION bin)
add_executable(maca_trans_parser ./src/decode.c) add_executable(maca_trans_parser ./src/decode.c)
target_link_libraries(maca_trans_parser transparse) target_link_libraries(maca_trans_parser transparse)
target_link_libraries(maca_trans_parser maca_common)
install (TARGETS maca_trans_parser DESTINATION bin) install (TARGETS maca_trans_parser DESTINATION bin)
add_executable(maca_trans_parser_train ./src/train_perceptron.c) add_executable(maca_trans_parser_train ./src/train_perceptron.c)
target_compile_options(maca_trans_parser_train INTERFACE -Wall) target_compile_options(maca_trans_parser_train INTERFACE -Wall)
target_link_libraries(maca_trans_parser_train transparse) target_link_libraries(maca_trans_parser_train transparse)
target_link_libraries(maca_trans_parser_train maca_common)
install (TARGETS maca_trans_parser_train DESTINATION bin) install (TARGETS maca_trans_parser_train DESTINATION bin)
add_executable(maca_trans_parser_train_from_cff ./src/train.c) add_executable(maca_trans_parser_train_from_cff ./src/train.c)
target_link_libraries(maca_trans_parser_train_from_cff transparse) target_link_libraries(maca_trans_parser_train_from_cff transparse)
target_link_libraries(maca_trans_parser_train_from_cff maca_common)
install (TARGETS maca_trans_parser_train_from_cff DESTINATION bin) install (TARGETS maca_trans_parser_train_from_cff DESTINATION bin)
add_executable(maca_trans_parser_cff_cutoff ./src/cff_cutoff.c) add_executable(maca_trans_parser_cff_cutoff ./src/cff_cutoff.c)
target_link_libraries(maca_trans_parser_cff_cutoff transparse) target_link_libraries(maca_trans_parser_cff_cutoff transparse)
target_link_libraries(maca_trans_parser_cff_cutoff maca_common)
install (TARGETS maca_trans_parser_cff_cutoff DESTINATION bin) install (TARGETS maca_trans_parser_cff_cutoff DESTINATION bin)
#add_executable(test_w2v ./src/test_w2v.c) #add_executable(test_w2v ./src/test_w2v.c)
......
...@@ -176,8 +176,6 @@ config *beam_decoder_sentence(config *initial_config, dico *dico_features, featu ...@@ -176,8 +176,6 @@ config *beam_decoder_sentence(config *initial_config, dico *dico_features, featu
beam *next_beam= beam_new(beam_width); beam *next_beam= beam_new(beam_width);
beam *final_beam= beam_new(beam_width); beam *final_beam= beam_new(beam_width);
beam *tmp_beam= NULL; beam *tmp_beam= NULL;
int i;
float max;
config *argmax; config *argmax;
int step = 0; int step = 0;
......
...@@ -256,6 +256,7 @@ context *context_read_options(int argc, char *argv[]) ...@@ -256,6 +256,7 @@ context *context_read_options(int argc, char *argv[])
break; break;
case 'C': case 'C':
ctx->mcd_filename = strdup(optarg); ctx->mcd_filename = strdup(optarg);
ctx->mcd_struct = mcd_read(ctx->mcd_filename);
break; break;
case 'F': case 'F':
ctx->features_model_filename = strdup(optarg); ctx->features_model_filename = strdup(optarg);
...@@ -281,11 +282,15 @@ context *context_read_options(int argc, char *argv[]) ...@@ -281,11 +282,15 @@ context *context_read_options(int argc, char *argv[])
ctx->mvt_nb = ctx->mcd_struct->dico_array[ctx->mcd_struct->type2col[FEAT_TYPE_LABEL]]->nbelem * 2 + 1; ctx->mvt_nb = ctx->mcd_struct->dico_array[ctx->mcd_struct->type2col[FEAT_TYPE_LABEL]]->nbelem * 2 + 1;
}*/ }*/
/*
if(ctx->features_model && ctx->mcd_struct) if(ctx->features_model && ctx->mcd_struct)
feat_model_compute_ranges(ctx->features_model, ctx->mcd_struct, ctx->mvt_nb); feat_model_compute_ranges(ctx->features_model, ctx->mcd_struct, ctx->mvt_nb);
*/
context_set_linguistic_resources_filenames(ctx); context_set_linguistic_resources_filenames(ctx);
if(ctx->mcd_filename == NULL){
ctx->mcd_struct = mcd_build_conll07();
}
return ctx; return ctx;
} }
...@@ -309,25 +314,25 @@ void context_set_linguistic_resources_filenames(context *ctx) ...@@ -309,25 +314,25 @@ void context_set_linguistic_resources_filenames(context *ctx)
if(!ctx->perc_model_filename){ if(!ctx->perc_model_filename){
strcpy(absolute_filename, absolute_path); strcpy(absolute_filename, absolute_path);
strcat(absolute_filename, STANDARD_MODEL_FILENAME); strcat(absolute_filename, DEFAULT_MODEL_FILENAME);
ctx->perc_model_filename = strdup(absolute_filename); ctx->perc_model_filename = strdup(absolute_filename);
} }
if(!ctx->vocabs_filename){ if(!ctx->vocabs_filename){
strcpy(absolute_filename, absolute_path); strcpy(absolute_filename, absolute_path);
strcat(absolute_filename, STANDARD_VOCABS_FILENAME); strcat(absolute_filename, DEFAULT_VOCABS_FILENAME);
ctx->vocabs_filename = strdup(absolute_filename); ctx->vocabs_filename = strdup(absolute_filename);
} }
if(!ctx->mcd_filename){ /* if(!ctx->mcd_filename){
strcpy(absolute_filename, absolute_path); strcpy(absolute_filename, absolute_path);
strcat(absolute_filename, STANDARD_MULTI_COL_DESC_FILENAME); strcat(absolute_filename, DEFAULT_MULTI_COL_DESC_FILENAME);
ctx->mcd_filename = strdup(absolute_filename); ctx->mcd_filename = strdup(absolute_filename);
} }*/
if(!ctx->features_model_filename){ if(!ctx->features_model_filename){
strcpy(absolute_filename, absolute_path); strcpy(absolute_filename, absolute_path);
strcat(absolute_filename, STANDARD_FEATURES_MODEL_FILENAME); strcat(absolute_filename, DEFAULT_FEATURES_MODEL_FILENAME);
ctx->features_model_filename = strdup(absolute_filename); ctx->features_model_filename = strdup(absolute_filename);
} }
......
...@@ -4,10 +4,10 @@ ...@@ -4,10 +4,10 @@
#define TEST_MODE 1 #define TEST_MODE 1
#define TRAIN_MODE 2 #define TRAIN_MODE 2
#define STANDARD_MULTI_COL_DESC_FILENAME "maca_trans_parser.mcd" #define DEFAULT_MULTI_COL_DESC_FILENAME "maca_trans_parser.mcd"
#define STANDARD_FEATURES_MODEL_FILENAME "maca_trans_parser.fm" #define DEFAULT_FEATURES_MODEL_FILENAME "maca_trans_parser.fm"
#define STANDARD_VOCABS_FILENAME "maca_trans_parser.vocab" #define DEFAULT_VOCABS_FILENAME "maca_trans_parser.vocab"
#define STANDARD_MODEL_FILENAME "maca_trans_parser.model" #define DEFAULT_MODEL_FILENAME "maca_trans_parser.model"
#include "dico_vec.h" #include "dico_vec.h"
#include "feat_model.h" #include "feat_model.h"
......
...@@ -54,7 +54,9 @@ int main(int argc, char *argv[]) ...@@ -54,7 +54,9 @@ int main(int argc, char *argv[])
decode_check_options(ctx); decode_check_options(ctx);
ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio); ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
ctx->mcd_struct = mcd_read(ctx->mcd_filename, NULL, ctx->vocabs); mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs);
ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL"); ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL");
if(ctx->dico_labels == NULL){ if(ctx->dico_labels == NULL){
...@@ -69,7 +71,7 @@ int main(int argc, char *argv[]) ...@@ -69,7 +71,7 @@ int main(int argc, char *argv[])
/* when in stream mode, force to renumber the tokens (ugly !) */ /* when in stream mode, force to renumber the tokens (ugly !) */
if(ctx->stream_mode){ if(ctx->stream_mode){
ctx->mcd_struct->col2type[ctx->mcd_struct->type2col[FEAT_TYPE_INDEX]] = -1; ctx->mcd_struct->type[ctx->mcd_struct->type2col[FEAT_TYPE_INDEX]] = -1;
} }
......
...@@ -90,6 +90,7 @@ char *skip_index(char *buffer) ...@@ -90,6 +90,7 @@ char *skip_index(char *buffer)
if(buffer[i] < '0' || buffer[i] > '9' || buffer[i] == ' ' || buffer[i] == '\t') if(buffer[i] < '0' || buffer[i] > '9' || buffer[i] == ' ' || buffer[i] == '\t')
return &buffer[i]; return &buffer[i];
} }
return NULL;
} }
void depset_print_new_index(FILE *f, depset *d, dico *dico_labels) void depset_print_new_index(FILE *f, depset *d, dico *dico_labels)
......
This diff is collapsed.
...@@ -51,11 +51,6 @@ void queue_print(FILE *f, queue *q) ...@@ -51,11 +51,6 @@ void queue_print(FILE *f, queue *q)
fprintf(f, ")\n"); fprintf(f, ")\n");
} }
int queue_nbelem(queue *q)
{
return q->nbelem;
}
queue *queue_new(int size) queue *queue_new(int size)
{ {
queue *q = (queue *)memalloc(sizeof(queue)); queue *q = (queue *)memalloc(sizeof(queue));
......
...@@ -5,6 +5,11 @@ ...@@ -5,6 +5,11 @@
#include"word.h" #include"word.h"
#include"mcd.h" #include"mcd.h"
#define queue_nbelem(q) (q)->nbelem
#define queue_size(q) (q)->size
#define queue_head(q) (q)->head
#define queue_tail(q) (q)->tail
typedef struct { typedef struct {
int size; int size;
word **array; word **array;
......
...@@ -3,6 +3,13 @@ ...@@ -3,6 +3,13 @@
#include"stack.h" #include"stack.h"
#include"util.h" #include"util.h"
/*int stack_height(stack *s)
{
return(s->top);
}*/
int stack_is_empty(stack *s) int stack_is_empty(stack *s)
{ {
return(s->top == 0); return(s->top == 0);
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include"word.h" #include"word.h"
#define stack_height(s) (s)->top #define stack_height(s) (s)->top
#define stack_elt_n(s, n) (s)->array[(s)->top - (n) - 1]
typedef struct { typedef struct {
int size; int size;
...@@ -20,4 +21,5 @@ word *stack_top(stack *s); ...@@ -20,4 +21,5 @@ word *stack_top(stack *s);
void stack_print(FILE *buffer, stack *s); void stack_print(FILE *buffer, stack *s);
void stack_free(stack *s); void stack_free(stack *s);
int stack_is_empty(stack *s); int stack_is_empty(stack *s);
/* int stack_height(stack *s); */
#endif #endif
...@@ -42,7 +42,7 @@ void train_perceptron_check_options(context *ctx) ...@@ -42,7 +42,7 @@ void train_perceptron_check_options(context *ctx)
{ {
if(!ctx->conll_filename if(!ctx->conll_filename
|| ctx->help || ctx->help
|| !ctx->mcd_filename /* || !ctx->mcd_filename */
|| !ctx->features_model_filename || !ctx->features_model_filename
|| !ctx->perc_model_filename || !ctx->perc_model_filename
|| !ctx->vocabs_filename || !ctx->vocabs_filename
...@@ -60,7 +60,8 @@ int main(int argc, char *argv[]) ...@@ -60,7 +60,8 @@ int main(int argc, char *argv[])
ctx = context_read_options(argc, argv); ctx = context_read_options(argc, argv);
train_perceptron_check_options(ctx); train_perceptron_check_options(ctx);
ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->conll_filename, NULL); mcd_extract_dico_from_corpus(ctx->mcd_struct, ctx->conll_filename);
ctx->vocabs = mcd_build_dico_vec(ctx->mcd_struct); ctx->vocabs = mcd_build_dico_vec(ctx->mcd_struct);
ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL"); ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL");
......
...@@ -36,7 +36,7 @@ void transform_treebank_check_options(context *ctx) ...@@ -36,7 +36,7 @@ void transform_treebank_check_options(context *ctx)
{ {
if(!ctx->conll_filename if(!ctx->conll_filename
|| ctx->help || ctx->help
|| !ctx->mcd_filename /* || !ctx->mcd_filename */
|| !(ctx->cff_filename || ctx->fann_filename) || !(ctx->cff_filename || ctx->fann_filename)
){ ){
transform_treebank_help_message(ctx); transform_treebank_help_message(ctx);
...@@ -117,7 +117,7 @@ int generate_training_file_stream(FILE *output_file, context *ctx) ...@@ -117,7 +117,7 @@ int generate_training_file_stream(FILE *output_file, context *ctx)
feat_vec_print(output_file, fv); feat_vec_print(output_file, fv);
} }
else if(ctx->fann_filename){ else if(ctx->fann_filename){
feat_vec_print_dnn(output_file, fv, ctx->features_model, ctx->mcd_struct); /* feat_vec_print_dnn(output_file, fv, ctx->features_model, ctx->mcd_struct); */
print_mvt_fann(output_file, ctx->mvt_nb, mvt_code); print_mvt_fann(output_file, ctx->mvt_nb, mvt_code);
fprintf(output_file, "\n\n"); fprintf(output_file, "\n\n");
} }
...@@ -280,24 +280,22 @@ int main(int argc, char *argv[]) ...@@ -280,24 +280,22 @@ int main(int argc, char *argv[])
transform_treebank_check_options(ctx); transform_treebank_check_options(ctx);
if(ctx->mode == TRAIN_MODE){ if(ctx->mode == TRAIN_MODE){
ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->conll_filename, NULL); mcd_extract_dico_from_corpus(ctx->mcd_struct, ctx->conll_filename);
ctx->vocabs = mcd_build_dico_vec(ctx->mcd_struct); ctx->vocabs = mcd_build_dico_vec(ctx->mcd_struct);
ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL");
/* ctx->mvt_nb = ctx->mcd_struct->dico_array[ctx->mcd_struct->type2col[FEAT_TYPE_LABEL]]->nbelem * 2 + 1; */
} }
else if(ctx->mode == TEST_MODE){ else if(ctx->mode == TEST_MODE){
ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio); ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
ctx->mcd_struct = mcd_read(ctx->mcd_filename, NULL, ctx->vocabs); mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs);
ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL");
} }
ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL");
if(ctx->dico_labels == NULL){ if(ctx->dico_labels == NULL){
fprintf(stderr, "cannot find label names\n"); fprintf(stderr, "cannot find label names\n");
return 1; return 1;
} }
ctx->mvt_nb = ctx->dico_labels->nbelem * 2 + 1; ctx->mvt_nb = ctx->dico_labels->nbelem * 2 + 1;
feat_model_compute_ranges(ctx->features_model, ctx->mcd_struct, ctx->mvt_nb);
......
...@@ -48,10 +48,10 @@ word *word_parse_buffer(char *buffer, mcd *mcd_struct) ...@@ -48,10 +48,10 @@ word *word_parse_buffer(char *buffer, mcd *mcd_struct)
w = word_new(buffer); w = word_new(buffer);
token = strtok(buffer, "\t"); token = strtok(buffer, "\t");
do{ do{
if((column_nb < mcd_struct->nb_col) && (mcd_struct->col2type[column_nb] != -1)){ if((column_nb < mcd_struct->nb_col) && (mcd_struct->type[column_nb] != -1)){
w->feat_array[mcd_struct->col2type[column_nb]] = mcd_get_code(mcd_struct, token, column_nb); w->feat_array[mcd_struct->type[column_nb]] = mcd_get_code(mcd_struct, token, column_nb);
} }
if(mcd_struct->col2type[column_nb] == FEAT_TYPE_FORM){ if(mcd_struct->type[column_nb] == FEAT_TYPE_FORM){
w->U1 = isupper(token[0]) ? 1 : 0; w->U1 = isupper(token[0]) ? 1 : 0;
} }
column_nb++; column_nb++;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment