diff --git a/maca_common/src/mcd.c b/maca_common/src/mcd.c index 79533c51ef86338b697bbd06faf46cd15c70a868..6cc0e7f2e6129ea97cacc2b405f06715e4a11a35 100644 --- a/maca_common/src/mcd.c +++ b/maca_common/src/mcd.c @@ -1,4 +1,3 @@ - #include<stdio.h> #include<stdlib.h> #include<string.h> @@ -86,7 +85,7 @@ void mcd_free(mcd *m) free(m); } -/* this function is used when reading an corpus file which structure is described in mcd m */ +/* this function is used when reading a corpus file which structure is described in mcd m */ /* it returns the code associated to string str found in column col */ /* the code depends on the way the column is represented (vocabulary, embedding or integer) */ diff --git a/maca_trans_parser/CMakeLists.txt b/maca_trans_parser/CMakeLists.txt index efe9bac9fe501f4855ed24d5a062935114587bd3..6ad73f7fd8743d5a5e9e0ce18ccce04e5ad4856f 100644 --- a/maca_trans_parser/CMakeLists.txt +++ b/maca_trans_parser/CMakeLists.txt @@ -22,7 +22,9 @@ src/movement_parser_arc_eager.c src/config.c src/queue.c # src/beam.c - src/feat_types.c +src/feat_types.c +src/mvt.c +src/mvt_stack.c ) #compiling library @@ -99,11 +101,22 @@ install (TARGETS maca_trans_tagger DESTINATION bin) #target_link_libraries(maca_trans_parser_train maca_common) #install (TARGETS maca_trans_parser_train DESTINATION bin) -add_executable(test_word_buffer ./src/test_word_buffer.c) -target_compile_options(test_word_buffer INTERFACE -Wall) -target_link_libraries(test_word_buffer transparse) -target_link_libraries(test_word_buffer maca_common) -install (TARGETS test_word_buffer DESTINATION bin) +#add_executable(test_word_buffer ./src/test_word_buffer.c) +#target_compile_options(test_word_buffer INTERFACE -Wall) +#target_link_libraries(test_word_buffer transparse) +#target_link_libraries(test_word_buffer maca_common) +#install (TARGETS test_word_buffer DESTINATION bin) + +add_executable(test_mvt_stack ./src/test_mvt_stack.c) +target_compile_options(test_mvt_stack INTERFACE -Wall) +target_link_libraries(test_mvt_stack transparse maca_common) +install (TARGETS test_mvt_stack DESTINATION bin) + +add_executable(cff2fann ./src/cff2fann.c) +target_compile_options(cff2fann INTERFACE -Wall) +target_link_libraries(cff2fann transparse) +target_link_libraries(cff2fann maca_common) +install (TARGETS cff2fann DESTINATION bin) #add_executable(test_w2v ./src/test_w2v.c) diff --git a/maca_trans_parser/src/feat_model.c b/maca_trans_parser/src/feat_model.c index 93b8bf0b230c380221643140501ea437aace440e..1495776c839f5e2e2394a4ff0fa8be292efe5d86 100644 --- a/maca_trans_parser/src/feat_model.c +++ b/maca_trans_parser/src/feat_model.c @@ -8,6 +8,37 @@ #include "feat_types.h" #include "config2feat_vec.h" +/* returns type of the nth feature in fm */ +/* if it is a complex feature, or if n is too large, returns -1 */ +int feat_model_get_type_feat_n(feat_model *fm, int n) +{ + feat_desc *fd; + simple_feat_desc *sfd; + + if(n >= fm->nbelem) return -1; + fd = fm->array[n]; + if(fd->nbelem > 1) return -1; + sfd = fd->array[0]; + return sfd->type; +} + +/* very basic version */ +void feat_model_print(FILE *f, feat_model *fm) +{ + int i,j; + feat_desc *fd; + simple_feat_desc *sfd; + if(fm == NULL) return; + for(i=0; i < fm->nbelem; i++){ + fd = fm->array[i]; + for(j=0; j < fd->nbelem; j++){ + sfd = fd->array[j]; + fprintf(f, "%s ", sfd->name); + } + fprintf(f, "\n"); + } +} + void feat_model_free(feat_model *fm) { int i; diff --git a/maca_trans_parser/src/feat_model.h b/maca_trans_parser/src/feat_model.h index fc7dc5908757edc3ee52286ca8fc4b3f710160f9..d371d2551fe238156485e12d80a84b6062106562 100644 --- a/maca_trans_parser/src/feat_model.h +++ b/maca_trans_parser/src/feat_model.h @@ -19,9 +19,12 @@ typedef struct { void feat_model_free(feat_model *fm); feat_model *feat_model_new(char *name); +void feat_model_print(FILE *f, feat_model *fm); feat_desc *feat_model_add(feat_model *fm, feat_desc *fd); int feat_model_get_feat_value_fann(feat_model *fm, config *c, int feat_nb); int feat_model_get_feat_value_cff(feat_model *fm, config *c, dico *dico_features, int feat_nb, int mode); feat_model *feat_model_read(char *filename, int verbose); void feat_model_compute_ranges(feat_model *fm, mcd *m, int mvt_nb); +int feat_model_get_type_feat_n(feat_model *fm, int n); + #endif diff --git a/maca_trans_parser/src/maca_trans_parser_arc_eager_mcf2cff.c b/maca_trans_parser/src/maca_trans_parser_arc_eager_mcf2cff.c index b93cd538cb8b38416aa96fc7bc8337d0251f7551..cee41b35a5221ffb747b439f563c731495442e4f 100644 --- a/maca_trans_parser/src/maca_trans_parser_arc_eager_mcf2cff.c +++ b/maca_trans_parser/src/maca_trans_parser_arc_eager_mcf2cff.c @@ -70,14 +70,12 @@ void generate_training_file_stream(FILE *output_file, context *ctx) word_buffer_print(stdout, ref); printf("*****************************\n");*/ - config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, ctx->mode); - mvt_code = oracle_parser_arc_eager(c, ref, root_label); mvt_type = movement_type(mvt_code); mvt_label = movement_label(mvt_code); if(ctx->debug_mode){ - config_print(stdout,c); + config_print(stdout,c); movement_print(stdout, mvt_code, ctx->dico_labels); } @@ -91,12 +89,14 @@ void generate_training_file_stream(FILE *output_file, context *ctx) } else{ fprintf(output_file, "%d", mvt_code); + config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, ctx->mode); feat_vec_print(output_file, fv); } if(mvt_type == MVT_EOS){ movement_eos(c, 0); sentence_nb++; + fprintf(stderr, "sentence %d\n", sentence_nb); if(word_buffer_is_last(ref)) break; } diff --git a/maca_trans_parser/src/mvt.c b/maca_trans_parser/src/mvt.c new file mode 100644 index 0000000000000000000000000000000000000000..228bfbf546b39a6aa1d30ecf22813bc9f565c446 --- /dev/null +++ b/maca_trans_parser/src/mvt.c @@ -0,0 +1,26 @@ +#include<stdio.h> +#include<stdlib.h> +#include"util.h" +#include"mvt.h" + +mvt *mvt_new(int type, int gov, int dep) +{ + mvt *m = memalloc(sizeof(mvt)); + m->type = type; + m->gov = gov; + m->dep = dep; + return m; +} + +void mvt_free(mvt *m) +{ + if(m) + free(m); +} + +void mvt_print(FILE *f, mvt *m) +{ + if(m) + printf("type = %d gov = %d dep = %d\n", mvt_get_type(m), mvt_get_gov(m), mvt_get_dep(m)); + +} diff --git a/maca_trans_parser/src/mvt.h b/maca_trans_parser/src/mvt.h new file mode 100644 index 0000000000000000000000000000000000000000..863fb0d9a9080a0139f22789b6fd3e1640d71a76 --- /dev/null +++ b/maca_trans_parser/src/mvt.h @@ -0,0 +1,26 @@ +#ifndef __MVT__ +#define __MVT__ + +#include<stdio.h> +#include<stdlib.h> + +#define mvt_get_type(m) ((m)->type) +#define mvt_set_type(m,v) ((m)->type = (v)) + +#define mvt_get_gov(m) ((m)->gov) +#define mvt_set_gov(m,v) ((m)->gov = (v)) + +#define mvt_get_dep(m) ((m)->dep) +#define mvt_set_dep(m,v) ((m)->dep = (v)) + +typedef struct { + int type; /* type of the movement */ + int gov; /* first argument of the movement (governor if it is a dependency creation movement) */ + int dep; /* second argument of the movement (dependent if it is a dependency creation movement) */ +} mvt; + +mvt *mvt_new(int type, int gov, int dep); +void mvt_free(mvt *m); +void mvt_print(FILE *f, mvt *m); + +#endif diff --git a/maca_trans_parser/src/mvt_stack.c b/maca_trans_parser/src/mvt_stack.c new file mode 100644 index 0000000000000000000000000000000000000000..2e301ecb8a3f0144e6def1c8599d0ac6cda3986a --- /dev/null +++ b/maca_trans_parser/src/mvt_stack.c @@ -0,0 +1,41 @@ +#include<stdio.h> +#include<stdlib.h> +#include"mvt_stack.h" +#include"util.h" + +void mvt_stack_print(FILE *f, mvt_stack *ms) +{ +int i; +for(i=0; i < mvt_stack_nbelem(ms); i++) + mvt_print(f, mvt_stack_elt_n(ms, i)); +} + +void mvt_stack_free(mvt_stack *s) +{ + free(s->array); + free(s); +} + +mvt_stack *mvt_stack_new(void) +{ + mvt_stack *s = (mvt_stack *)memalloc(sizeof(mvt_stack)); + s->size = 0; + s->array = NULL; + s->top = 0; + return s; +} + +void mvt_stack_push(mvt_stack *s, mvt *m) +{ + if(s->top == s->size){ + s->size++; + s->array = (mvt **)realloc(s->array, s->size * sizeof(mvt *)); + } + s->array[s->top] = m; + s->top++; +} + +mvt_stack *mvt_stack_copy(mvt_stack *ms) +{} + + diff --git a/maca_trans_parser/src/mvt_stack.h b/maca_trans_parser/src/mvt_stack.h new file mode 100644 index 0000000000000000000000000000000000000000..5648fc7ee0e5a9d91362bd522976504bce678110 --- /dev/null +++ b/maca_trans_parser/src/mvt_stack.h @@ -0,0 +1,34 @@ +#ifndef __MVT_STACK__ +#define __MVT_STACK__ + +#include"mvt.h" + +#define mvt_stack_nbelem(s) ((s)->top) +#define mvt_stack_elt_n(s, n) ((s)->array[(s)->top - (n) - 1]) +#define mvt_stack_pop(s) (((s)->top == 0)? NULL : (s)->array[--((s)->top)]) + + +#define mvt_stack_is_empty(s) (((s)->top == 0) ? 1 : 0) + + +#define mvt_stack_top(s) (((s)->top > 0)? (s)->array[(s)->top - 1] : NULL) +#define mvt_stack_0(s) (((s)->top > 0)? (s)->array[(s)->top - 1] : NULL) +#define mvt_stack_1(s) (((s)->top > 1)? (s)->array[(s)->top - 2] : NULL) +#define mvt_stack_2(s) (((s)->top > 2)? (s)->array[(s)->top - 3] : NULL) +#define mvt_stack_3(s) (((s)->top > 3)? (s)->array[(s)->top - 4] : NULL) +#define mvt_stack_4(s) (((s)->top > 4)? (s)->array[(s)->top - 5] : NULL) +#define mvt_stack_5(s) (((s)->top > 5)? (s)->array[(s)->top - 6] : NULL) + +typedef struct { + int size; + mvt **array; + int top; +} mvt_stack; + +mvt_stack *mvt_stack_new(void); +mvt_stack *mvt_stack_copy(mvt_stack *ms); +void mvt_stack_push(mvt_stack *ms, mvt *m); +void mvt_stack_print(FILE *f, mvt_stack *ms); +void mvt_stack_free(mvt_stack *ms); + +#endif diff --git a/perceptron/lib/include/cf_file.h b/perceptron/lib/include/cf_file.h index 2df7263a1e0105053320e0df3de1ea339cf85b5a..177b5e673513f5d6b577111081ab25407161cff6 100644 --- a/perceptron/lib/include/cf_file.h +++ b/perceptron/lib/include/cf_file.h @@ -4,5 +4,7 @@ void look_for_number_of_features_and_classes(char *filename, int *max_feat, int *max_class); int look_for_number_of_features(char *filename); int *count_occ_of_features(char *filename, int *n_feat); +int cff_look_for_number_of_columns(char *cff_filename); +int *cff_max_value_per_column(char *cff_filename, int n); #endif diff --git a/perceptron/lib/src/cf_file.c b/perceptron/lib/src/cf_file.c index ef43d01bbc40aa5d860a76b5ec0e73c93788731f..8a463f42874d70c3c8cd65ecd789b4b2790b28f9 100644 --- a/perceptron/lib/src/cf_file.c +++ b/perceptron/lib/src/cf_file.c @@ -3,6 +3,53 @@ #include<string.h> #include"util.h" + +int *cff_max_value_per_column(char *cff_filename, int n) +{ + char buffer[10000]; /* ugly */ + char *token; + int i; + int col; + int *max_array = memalloc(n * sizeof(int)); + for(i = 0; i < n; i++){ + max_array[i] = 0; + } + FILE *f = myfopen(cff_filename, "r"); + + while(fgets(buffer, 10000, f)){ + token = strtok(buffer, "\t"); + col = 0; + while(token){ + if(max_array[col] < atoi(token)){ + max_array[col] = atoi(token); + } + token = strtok(NULL , "\t"); + col++; + } + } + fclose(f); + return max_array; +} + +int cff_look_for_number_of_columns(char *cff_filename) +{ + int nb_col = 0; + FILE *f = myfopen(cff_filename, "r"); + char buffer[10000]; /* ugly */ + char *token; + + fgets(buffer, 10000, f); + token = strtok(buffer, "\t"); + while(token){ + nb_col++; + token = strtok(NULL , "\t"); + } + + fclose(f); + return nb_col; +} + + void look_for_number_of_features_and_classes(char *filename, int *max_feat, int *max_class) { char buffer[10000];