Skip to content
Snippets Groups Projects
Commit 29e914dd authored by Johannes Heinecke's avatar Johannes Heinecke
Browse files

Merge branch 'master' of https://gitlab.lif.univ-mrs.fr/alexis.nasr/macaon2 into johannes

parents 021c77a2 ebabce6b
Branches
No related tags found
1 merge request!7Johannes
This commit is part of merge request !7. Comments created here will be created in the context of that merge request.
Showing
with 187 additions and 84 deletions
...@@ -29,11 +29,6 @@ ...@@ -29,11 +29,6 @@
#define word_buffer_is_last(wb) (((wb)->current_index == (wb)->nbelem - 1)? 1 : 0) #define word_buffer_is_last(wb) (((wb)->current_index == (wb)->nbelem - 1)? 1 : 0)
#define word_buffer_is_empty(wb) (((wb)->nbelem == 0)? 1 : 0) #define word_buffer_is_empty(wb) (((wb)->nbelem == 0)? 1 : 0)
typedef struct { typedef struct {
int size; /* size of the array used to store words */ int size; /* size of the array used to store words */
int nbelem; /* number of words in the buffer */ int nbelem; /* number of words in the buffer */
......
...@@ -93,7 +93,6 @@ void trie_add_word(trie *t, int *word, int length) ...@@ -93,7 +93,6 @@ void trie_add_word(trie *t, int *word, int length)
trie_trans *current_trans = NULL; trie_trans *current_trans = NULL;
int transition_exists = 1; int transition_exists = 1;
int destination; int destination;
int i;
while((current_index < length) && transition_exists){ while((current_index < length) && transition_exists){
transition_exists = 0; transition_exists = 0;
......
...@@ -37,4 +37,9 @@ void context_mcd_help_message(context *ctx); ...@@ -37,4 +37,9 @@ void context_mcd_help_message(context *ctx);
void context_form_column_help_message(context *ctx); void context_form_column_help_message(context *ctx);
void context_pos_column_help_message(context *ctx); void context_pos_column_help_message(context *ctx);
void context_input_help_message(context *ctx);
void context_mwe_token_separator_help_message(context *ctx);
void context_mwe_filename_help_message(context *ctx);
void context_vocab_help_message(context *ctx);
#endif #endif
...@@ -23,7 +23,6 @@ dico *decompose_mwe_in_fplm_file(char *fplm_filename, FILE *output_file, int deb ...@@ -23,7 +23,6 @@ dico *decompose_mwe_in_fplm_file(char *fplm_filename, FILE *output_file, int deb
char pos[1000]; char pos[1000];
char lemma[1000]; char lemma[1000];
char morpho[1000]; char morpho[1000];
int num = 0;
char buffer[10000]; char buffer[10000];
FILE *f= myfopen(fplm_filename, "r"); FILE *f= myfopen(fplm_filename, "r");
int fields_nb; int fields_nb;
......
...@@ -42,16 +42,12 @@ int look_for_accept_state_in_path(trie *mwe_trie, int *states_array, int path_in ...@@ -42,16 +42,12 @@ int look_for_accept_state_in_path(trie *mwe_trie, int *states_array, int path_in
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
char buffer[10000]; char buffer[10000];
char *buffer_copy;
char *form;
int form_code; int form_code;
context *ctx; context *ctx;
int form_column; /* int form_column; */
FILE *f = NULL; FILE *f = NULL;
trie *mwe_trie; trie *mwe_trie;
dico *d_mwe_tokens = NULL; dico *d_mwe_tokens = NULL;
int origin_state = 0;
int destination_state = 0;
int states_array[100]; int states_array[100];
int symbols_array[100]; int symbols_array[100];
int path_index = 0; int path_index = 0;
...@@ -60,12 +56,12 @@ int main(int argc, char *argv[]) ...@@ -60,12 +56,12 @@ int main(int argc, char *argv[])
ctx = context_read_options(argc, argv); ctx = context_read_options(argc, argv);
maca_lexer_check_options(ctx); maca_lexer_check_options(ctx);
/*
if(ctx->form_column != -1) if(ctx->form_column != -1)
form_column = ctx->form_column; form_column = ctx->form_column;
else else
form_column = ctx->mcd_struct->wf2col[MCD_WF_FORM]; form_column = ctx->mcd_struct->wf2col[MCD_WF_FORM];
*/
if(ctx->input_filename == NULL) if(ctx->input_filename == NULL)
f = stdin; f = stdin;
else else
......
...@@ -4,13 +4,13 @@ FLEX_TARGET(en_tok_rules ./src/en_tok_rules.l ${CMAKE_CURRENT_BINARY_DIR}/en_lex ...@@ -4,13 +4,13 @@ FLEX_TARGET(en_tok_rules ./src/en_tok_rules.l ${CMAKE_CURRENT_BINARY_DIR}/en_lex
set(SOURCES ./src/context.c set(SOURCES ./src/context.c
${FLEX_fr_tok_rules_OUTPUTS} ${FLEX_fr_tok_rules_OUTPUTS}
${FLEX_en_tok_rules_OUTPUTS}) ${FLEX_en_tok_rules_OUTPUTS})
##compiling library ##compiling library
include_directories(./src) include_directories(./src)
add_library(maca_tokenizer_lib STATIC ${SOURCES}) add_library(maca_tokenizer_lib STATIC ${SOURCES})
#compiling, linking and installing executables
include_directories(${CMAKE_CURRENT_BINARY_DIR}) include_directories(${CMAKE_CURRENT_BINARY_DIR})
add_executable(maca_tokenizer ./src/maca_tokenizer.c) add_executable(maca_tokenizer ./src/maca_tokenizer.c)
......
...@@ -7,7 +7,6 @@ ...@@ -7,7 +7,6 @@
#include "util.h" #include "util.h"
void context_set_linguistic_resources_filenames(context *ctx);
void context_free(context *ctx) void context_free(context *ctx)
{ {
...@@ -109,41 +108,13 @@ context *context_read_options(int argc, char *argv[]) ...@@ -109,41 +108,13 @@ context *context_read_options(int argc, char *argv[])
} }
} }
context_set_linguistic_resources_filenames(ctx);
if(ctx->mcd_filename) if(ctx->mcd_filename)
ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->verbose); ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->verbose);
/*
if(ctx->mcd_filename == NULL) if(ctx->mcd_filename == NULL)
/* ctx->mcd_struct = mcd_build_conll07(); */
ctx->mcd_struct = mcd_build_wplgf(); ctx->mcd_struct = mcd_build_wplgf();
*/
return ctx; return ctx;
} }
void context_set_linguistic_resources_filenames(context *ctx)
{
char absolute_path[500];
char absolute_filename[500];
absolute_path[0] = '\0';
if(ctx->maca_data_path)
strcat(absolute_path, ctx->maca_data_path);
else {
char *e = getenv("MACAON_DIR");
if (e != NULL) {
strcat(absolute_path, e);
} else {
fprintf(stderr, "ATTENTION: the environment variable MACAON_DIR is not defined\n");
}
}
strcat(absolute_path, "/");
strcat(absolute_path, ctx->language);
strcat(absolute_path, "/bin/");
}
...@@ -29,5 +29,6 @@ void context_conll_help_message(context *ctx); ...@@ -29,5 +29,6 @@ void context_conll_help_message(context *ctx);
void context_language_help_message(context *ctx); void context_language_help_message(context *ctx);
void context_maca_data_path_help_message(context *ctx); void context_maca_data_path_help_message(context *ctx);
void context_mcd_help_message(context *ctx); void context_mcd_help_message(context *ctx);
void context_input_help_message(context *ctx);
#endif #endif
...@@ -8,19 +8,32 @@ extern int defait_amalgames; ...@@ -8,19 +8,32 @@ extern int defait_amalgames;
%option noyywrap %option noyywrap
%s state_defait_amalgames %s state_defait_amalgames
%s state_num
%% %%
if(defait_amalgames){ if(defait_amalgames){
BEGIN(state_defait_amalgames); BEGIN(state_defait_amalgames);
} }
<state_num>[0-9]*,[0-9]* printf("%s", yytext); [0-9]+,[0-9]* printf("%s", yytext);
[ \t]+ printf("\n"); [ \t]+ printf("\n");
\. printf("\n."); \. printf("\n.");
\, printf("\n,"); \, printf("\n,");
… printf("\n…");
' printf("'\n"); ' printf("'\n");
’ printf("'\n"); ’ printf("'\n");
-je printf("\n-je");
-tu printf("\n-tu");
-on printf("\n-on");
-ce printf("\n-ce");
-t-il printf("\n-t-il");
-il printf("\n-il");
-t-ils printf("\n-t-ils");
-ils printf("\n-ils");
-t-elle printf("\n-t-elle");
-elle printf("\n-elle");
-t-elles printf("\n-t-elles");
-elles printf("\n-elles");
\n+ printf("\n"); \n+ printf("\n");
<state_defait_amalgames>{ <state_defait_amalgames>{
" du " printf("\nde\nle\n"); " du " printf("\nde\nle\n");
" des " printf("\nde\nles\n"); " des " printf("\nde\nles\n");
......
...@@ -3,6 +3,9 @@ ...@@ -3,6 +3,9 @@
#include<string.h> #include<string.h>
#include"context.h" #include"context.h"
int enlex(void);
int frlex(void);
int defait_amalgames = 0; int defait_amalgames = 0;
void maca_tokenizer_help_message(context *ctx) void maca_tokenizer_help_message(context *ctx)
......
...@@ -164,6 +164,9 @@ void context_f2p_filename_help_message(context *ctx){ ...@@ -164,6 +164,9 @@ void context_f2p_filename_help_message(context *ctx){
void context_trace_mode_help_message(context *ctx){ void context_trace_mode_help_message(context *ctx){
fprintf(stderr, "\t-T --traces : activate trace mode (default is false)\n"); fprintf(stderr, "\t-T --traces : activate trace mode (default is false)\n");
} }
void context_debug_help_message(context *ctx){
fprintf(stderr, "\t-d --debug : activate debug mode (default is false)\n");
}
context *context_read_options(int argc, char *argv[]) context *context_read_options(int argc, char *argv[])
{ {
......
...@@ -102,4 +102,7 @@ void context_ifpls_help_message(context *ctx); ...@@ -102,4 +102,7 @@ void context_ifpls_help_message(context *ctx);
void context_input_help_message(context *ctx); void context_input_help_message(context *ctx);
void context_root_label_help_message(context *ctx); void context_root_label_help_message(context *ctx);
void context_debug_help_message(context *ctx);
#endif #endif
...@@ -13,11 +13,12 @@ ...@@ -13,11 +13,12 @@
/*#include"dnn_decoder.h"*/ /*#include"dnn_decoder.h"*/
#include"config2feat_vec.h" #include"config2feat_vec.h"
void decode_help_message(context *ctx) void maca_trans_parser_help_message(context *ctx)
{ {
context_general_help_message(ctx); context_general_help_message(ctx);
/* context_beam_help_message(ctx); */ /* context_beam_help_message(ctx); */
/* context_conll_help_message(ctx); */ /* context_conll_help_message(ctx); */
context_debug_help_message(ctx);
fprintf(stderr, "INPUT\n"); fprintf(stderr, "INPUT\n");
context_input_help_message(ctx); context_input_help_message(ctx);
context_mcd_help_message(ctx); context_mcd_help_message(ctx);
...@@ -27,7 +28,7 @@ void decode_help_message(context *ctx) ...@@ -27,7 +28,7 @@ void decode_help_message(context *ctx)
context_root_label_help_message(ctx); context_root_label_help_message(ctx);
} }
void decode_check_options(context *ctx){ void maca_trans_parser_check_options(context *ctx){
if(ctx->help if(ctx->help
/*!ctx->conll_filename*/ /*!ctx->conll_filename*/
/* || !ctx->perc_model_filename /* || !ctx->perc_model_filename
...@@ -35,7 +36,7 @@ void decode_check_options(context *ctx){ ...@@ -35,7 +36,7 @@ void decode_check_options(context *ctx){
|| !ctx->vocabs_filename || !ctx->vocabs_filename
|| !ctx->features_model_filename*/ || !ctx->features_model_filename*/
){ ){
decode_help_message(ctx); maca_trans_parser_help_message(ctx);
exit(1); exit(1);
} }
} }
...@@ -83,7 +84,7 @@ int main(int argc, char *argv[]) ...@@ -83,7 +84,7 @@ int main(int argc, char *argv[])
context *ctx; context *ctx;
ctx = context_read_options(argc, argv); ctx = context_read_options(argc, argv);
decode_check_options(ctx); maca_trans_parser_check_options(ctx);
set_linguistic_resources_filenames_parser(ctx); set_linguistic_resources_filenames_parser(ctx);
ctx->features_model = feat_model_read(ctx->features_model_filename, ctx->verbose); ctx->features_model = feat_model_read(ctx->features_model_filename, ctx->verbose);
......
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<unistd.h>
#include<getopt.h>
#include"context.h"
#include"feat_fct.h"
#include"feature_table.h"
#include"dico.h"
#include"beam.h"
#include"form2pos.h"
#include"simple_decoder_tagger.h"
/*#include"dnn_decoder.h"*/
#include"config2feat_vec.h"
void decode_tagger_help_message(context *ctx)
{
context_general_help_message(ctx);
context_beam_help_message(ctx);
context_conll_help_message(ctx);
fprintf(stderr, "INPUT\n");
context_input_help_message(ctx);
context_mcd_help_message(ctx);
context_model_help_message(ctx);
context_vocabs_help_message(ctx);
context_features_model_help_message(ctx);
context_f2p_filename_help_message(ctx);
}
void decode_tagger_check_options(context *ctx){
if(ctx->help
/*!ctx->conll_filename*/
/* || !ctx->perc_model_filename
|| !ctx->mcd_filename
|| !ctx->vocabs_filename
|| !ctx->features_model_filename*/
){
decode_tagger_help_message(ctx);
exit(1);
}
}
void decode_tagger_set_linguistic_resources_filenames(context *ctx)
{
char absolute_filename[500];
if(!ctx->perc_model_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_MODEL_TAGGER_FILENAME);
ctx->perc_model_filename = strdup(absolute_filename);
}
if(!ctx->vocabs_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_VOCABS_TAGGER_FILENAME);
ctx->vocabs_filename = strdup(absolute_filename);
}
/* if(!ctx->mcd_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_MULTI_COL_DESC_TAGGER_FILENAME);
ctx->mcd_filename = strdup(absolute_filename);
}*/
if(!ctx->features_model_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_FEATURES_MODEL_TAGGER_FILENAME);
ctx->features_model_filename = strdup(absolute_filename);
}
if(!ctx->f2p_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_F2P_FILENAME);
ctx->f2p_filename = strdup(absolute_filename);
ctx->f2p = form2pos_read(ctx->f2p_filename);
}
if(ctx->verbose){
fprintf(stderr, "perc_model_filename = %s\n", ctx->perc_model_filename);
fprintf(stderr, "vocabs_filename = %s\n", ctx->vocabs_filename);
fprintf(stderr, "mcd_filename = %s\n", ctx->mcd_filename);
fprintf(stderr, "perc_features_model_filename = %s\n", ctx->features_model_filename);
fprintf(stderr, "f2p_filename = %s\n", ctx->f2p_filename);
}
}
int main(int argc, char *argv[])
{
context *ctx = context_read_options(argc, argv);
decode_tagger_check_options(ctx);
decode_tagger_set_linguistic_resources_filenames(ctx);
ctx->features_model = feat_model_read(ctx->features_model_filename, ctx->verbose);
ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose);
simple_decoder_tagger2(ctx);
context_free(ctx);
return 0;
}
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
void look_for_number_of_features_and_classes(char *filename, int *max_feat, int *max_class); void look_for_number_of_features_and_classes(char *filename, int *max_feat, int *max_class);
int look_for_number_of_features(char *filename); int look_for_number_of_features(char *filename);
int look_for_number_of_examples(char *filename);
int *count_occ_of_features(char *filename, int *n_feat); int *count_occ_of_features(char *filename, int *n_feat);
int cff_look_for_number_of_columns(char *cff_filename); int cff_look_for_number_of_columns(char *cff_filename);
int *cff_max_value_per_column(char *cff_filename, int n); int *cff_max_value_per_column(char *cff_filename, int n);
......
...@@ -13,10 +13,8 @@ typedef struct { ...@@ -13,10 +13,8 @@ typedef struct {
/*#include "word_emb.h"*/ /*#include "word_emb.h"*/
#include "mcd.h" #include "mcd.h"
void feat_vec_concat(feat_vec *fv1, feat_vec *fv2); void feat_vec_concat(feat_vec *fv1, feat_vec *fv2);
feat_vec *feat_vec_copy(feat_vec *fv); feat_vec *feat_vec_copy(feat_vec *fv);
feat_vec *feat_vec_new(int size); feat_vec *feat_vec_new(int size);
void feat_vec_free(feat_vec *fv); void feat_vec_free(feat_vec *fv);
int feat_vec_add(feat_vec *fv, int feat); int feat_vec_add(feat_vec *fv, int feat);
......
...@@ -75,6 +75,18 @@ void look_for_number_of_features_and_classes(char *filename, int *max_feat, int ...@@ -75,6 +75,18 @@ void look_for_number_of_features_and_classes(char *filename, int *max_feat, int
fclose(f); fclose(f);
} }
int look_for_number_of_examples(char *filename)
{
char buffer[10000];
FILE *f = fopen(filename, "r");
int number = 0;
while(fgets(buffer, 10000, f))
number ++;
fclose(f);
return number;
}
int look_for_number_of_features(char *filename) int look_for_number_of_features(char *filename)
{ {
char buffer[10000]; char buffer[10000];
......
...@@ -8,8 +8,6 @@ ...@@ -8,8 +8,6 @@
feature_table *feature_table_load(char *filename, int verbose) feature_table *feature_table_load(char *filename, int verbose)
{ {
int i; int i;
feature_table *ft = NULL; feature_table *ft = NULL;
int features_nb; int features_nb;
int classes_nb; int classes_nb;
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#include<stdlib.h> #include<stdlib.h>
#include<string.h> #include<string.h>
#include"feature_table.h" #include"feature_table.h"
#include"cf_file.h"
#include"util.h" #include"util.h"
void perceptron_avg(char *filename, feature_table *ft, int n_iter) void perceptron_avg(char *filename, feature_table *ft, int n_iter)
...@@ -36,6 +37,7 @@ void perceptron_avg(char *filename, feature_table *ft, int n_iter) ...@@ -36,6 +37,7 @@ void perceptron_avg(char *filename, feature_table *ft, int n_iter)
feat_vec_add(fv, atoi(token)); feat_vec_add(fv, atoi(token));
} }
for(cla=0; cla < classes_nb; cla++) for(cla=0; cla < classes_nb; cla++)
classes_score[cla] = 0; classes_score[cla] = 0;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment