Skip to content
Snippets Groups Projects
Commit 29e914dd authored by Johannes Heinecke's avatar Johannes Heinecke
Browse files

Merge branch 'master' of https://gitlab.lif.univ-mrs.fr/alexis.nasr/macaon2 into johannes

parents 021c77a2 ebabce6b
Branches
Tags
1 merge request!7Johannes
Showing
with 187 additions and 84 deletions
......@@ -29,11 +29,6 @@
#define word_buffer_is_last(wb) (((wb)->current_index == (wb)->nbelem - 1)? 1 : 0)
#define word_buffer_is_empty(wb) (((wb)->nbelem == 0)? 1 : 0)
typedef struct {
int size; /* size of the array used to store words */
int nbelem; /* number of words in the buffer */
......
......@@ -93,7 +93,6 @@ void trie_add_word(trie *t, int *word, int length)
trie_trans *current_trans = NULL;
int transition_exists = 1;
int destination;
int i;
while((current_index < length) && transition_exists){
transition_exists = 0;
......
......@@ -37,4 +37,9 @@ void context_mcd_help_message(context *ctx);
void context_form_column_help_message(context *ctx);
void context_pos_column_help_message(context *ctx);
void context_input_help_message(context *ctx);
void context_mwe_token_separator_help_message(context *ctx);
void context_mwe_filename_help_message(context *ctx);
void context_vocab_help_message(context *ctx);
#endif
......@@ -23,7 +23,6 @@ dico *decompose_mwe_in_fplm_file(char *fplm_filename, FILE *output_file, int deb
char pos[1000];
char lemma[1000];
char morpho[1000];
int num = 0;
char buffer[10000];
FILE *f= myfopen(fplm_filename, "r");
int fields_nb;
......
......@@ -42,16 +42,12 @@ int look_for_accept_state_in_path(trie *mwe_trie, int *states_array, int path_in
int main(int argc, char *argv[])
{
char buffer[10000];
char *buffer_copy;
char *form;
int form_code;
context *ctx;
int form_column;
/* int form_column; */
FILE *f = NULL;
trie *mwe_trie;
dico *d_mwe_tokens = NULL;
int origin_state = 0;
int destination_state = 0;
int states_array[100];
int symbols_array[100];
int path_index = 0;
......@@ -60,12 +56,12 @@ int main(int argc, char *argv[])
ctx = context_read_options(argc, argv);
maca_lexer_check_options(ctx);
/*
if(ctx->form_column != -1)
form_column = ctx->form_column;
else
form_column = ctx->mcd_struct->wf2col[MCD_WF_FORM];
*/
if(ctx->input_filename == NULL)
f = stdin;
else
......
......@@ -4,13 +4,13 @@ FLEX_TARGET(en_tok_rules ./src/en_tok_rules.l ${CMAKE_CURRENT_BINARY_DIR}/en_lex
set(SOURCES ./src/context.c
${FLEX_fr_tok_rules_OUTPUTS}
${FLEX_en_tok_rules_OUTPUTS})
##compiling library
include_directories(./src)
add_library(maca_tokenizer_lib STATIC ${SOURCES})
#compiling, linking and installing executables
include_directories(${CMAKE_CURRENT_BINARY_DIR})
add_executable(maca_tokenizer ./src/maca_tokenizer.c)
......
......@@ -7,7 +7,6 @@
#include "util.h"
void context_set_linguistic_resources_filenames(context *ctx);
void context_free(context *ctx)
{
......@@ -109,41 +108,13 @@ context *context_read_options(int argc, char *argv[])
}
}
context_set_linguistic_resources_filenames(ctx);
if(ctx->mcd_filename)
ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->verbose);
/*
if(ctx->mcd_filename == NULL)
/* ctx->mcd_struct = mcd_build_conll07(); */
ctx->mcd_struct = mcd_build_wplgf();
*/
return ctx;
}
void context_set_linguistic_resources_filenames(context *ctx)
{
char absolute_path[500];
char absolute_filename[500];
absolute_path[0] = '\0';
if(ctx->maca_data_path)
strcat(absolute_path, ctx->maca_data_path);
else {
char *e = getenv("MACAON_DIR");
if (e != NULL) {
strcat(absolute_path, e);
} else {
fprintf(stderr, "ATTENTION: the environment variable MACAON_DIR is not defined\n");
}
}
strcat(absolute_path, "/");
strcat(absolute_path, ctx->language);
strcat(absolute_path, "/bin/");
}
......@@ -29,5 +29,6 @@ void context_conll_help_message(context *ctx);
void context_language_help_message(context *ctx);
void context_maca_data_path_help_message(context *ctx);
void context_mcd_help_message(context *ctx);
void context_input_help_message(context *ctx);
#endif
......@@ -8,19 +8,32 @@ extern int defait_amalgames;
%option noyywrap
%s state_defait_amalgames
%s state_num
%%
if(defait_amalgames){
BEGIN(state_defait_amalgames);
}
<state_num>[0-9]*,[0-9]* printf("%s", yytext);
[0-9]+,[0-9]* printf("%s", yytext);
[ \t]+ printf("\n");
\. printf("\n.");
\, printf("\n,");
… printf("\n…");
' printf("'\n");
’ printf("'\n");
-je printf("\n-je");
-tu printf("\n-tu");
-on printf("\n-on");
-ce printf("\n-ce");
-t-il printf("\n-t-il");
-il printf("\n-il");
-t-ils printf("\n-t-ils");
-ils printf("\n-ils");
-t-elle printf("\n-t-elle");
-elle printf("\n-elle");
-t-elles printf("\n-t-elles");
-elles printf("\n-elles");
\n+ printf("\n");
<state_defait_amalgames>{
" du " printf("\nde\nle\n");
" des " printf("\nde\nles\n");
......
......@@ -3,6 +3,9 @@
#include<string.h>
#include"context.h"
int enlex(void);
int frlex(void);
int defait_amalgames = 0;
void maca_tokenizer_help_message(context *ctx)
......
......@@ -164,6 +164,9 @@ void context_f2p_filename_help_message(context *ctx){
void context_trace_mode_help_message(context *ctx){
fprintf(stderr, "\t-T --traces : activate trace mode (default is false)\n");
}
void context_debug_help_message(context *ctx){
fprintf(stderr, "\t-d --debug : activate debug mode (default is false)\n");
}
context *context_read_options(int argc, char *argv[])
{
......
......@@ -102,4 +102,7 @@ void context_ifpls_help_message(context *ctx);
void context_input_help_message(context *ctx);
void context_root_label_help_message(context *ctx);
void context_debug_help_message(context *ctx);
#endif
......@@ -13,11 +13,12 @@
/*#include"dnn_decoder.h"*/
#include"config2feat_vec.h"
void decode_help_message(context *ctx)
void maca_trans_parser_help_message(context *ctx)
{
context_general_help_message(ctx);
/* context_beam_help_message(ctx); */
/* context_conll_help_message(ctx); */
context_debug_help_message(ctx);
fprintf(stderr, "INPUT\n");
context_input_help_message(ctx);
context_mcd_help_message(ctx);
......@@ -27,7 +28,7 @@ void decode_help_message(context *ctx)
context_root_label_help_message(ctx);
}
void decode_check_options(context *ctx){
void maca_trans_parser_check_options(context *ctx){
if(ctx->help
/*!ctx->conll_filename*/
/* || !ctx->perc_model_filename
......@@ -35,7 +36,7 @@ void decode_check_options(context *ctx){
|| !ctx->vocabs_filename
|| !ctx->features_model_filename*/
){
decode_help_message(ctx);
maca_trans_parser_help_message(ctx);
exit(1);
}
}
......@@ -83,7 +84,7 @@ int main(int argc, char *argv[])
context *ctx;
ctx = context_read_options(argc, argv);
decode_check_options(ctx);
maca_trans_parser_check_options(ctx);
set_linguistic_resources_filenames_parser(ctx);
ctx->features_model = feat_model_read(ctx->features_model_filename, ctx->verbose);
......
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<unistd.h>
#include<getopt.h>
#include"context.h"
#include"feat_fct.h"
#include"feature_table.h"
#include"dico.h"
#include"beam.h"
#include"form2pos.h"
#include"simple_decoder_tagger.h"
/*#include"dnn_decoder.h"*/
#include"config2feat_vec.h"
void decode_tagger_help_message(context *ctx)
{
context_general_help_message(ctx);
context_beam_help_message(ctx);
context_conll_help_message(ctx);
fprintf(stderr, "INPUT\n");
context_input_help_message(ctx);
context_mcd_help_message(ctx);
context_model_help_message(ctx);
context_vocabs_help_message(ctx);
context_features_model_help_message(ctx);
context_f2p_filename_help_message(ctx);
}
void decode_tagger_check_options(context *ctx){
if(ctx->help
/*!ctx->conll_filename*/
/* || !ctx->perc_model_filename
|| !ctx->mcd_filename
|| !ctx->vocabs_filename
|| !ctx->features_model_filename*/
){
decode_tagger_help_message(ctx);
exit(1);
}
}
void decode_tagger_set_linguistic_resources_filenames(context *ctx)
{
char absolute_filename[500];
if(!ctx->perc_model_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_MODEL_TAGGER_FILENAME);
ctx->perc_model_filename = strdup(absolute_filename);
}
if(!ctx->vocabs_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_VOCABS_TAGGER_FILENAME);
ctx->vocabs_filename = strdup(absolute_filename);
}
/* if(!ctx->mcd_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_MULTI_COL_DESC_TAGGER_FILENAME);
ctx->mcd_filename = strdup(absolute_filename);
}*/
if(!ctx->features_model_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_FEATURES_MODEL_TAGGER_FILENAME);
ctx->features_model_filename = strdup(absolute_filename);
}
if(!ctx->f2p_filename){
strcpy(absolute_filename, ctx->maca_data_path);
strcat(absolute_filename, DEFAULT_F2P_FILENAME);
ctx->f2p_filename = strdup(absolute_filename);
ctx->f2p = form2pos_read(ctx->f2p_filename);
}
if(ctx->verbose){
fprintf(stderr, "perc_model_filename = %s\n", ctx->perc_model_filename);
fprintf(stderr, "vocabs_filename = %s\n", ctx->vocabs_filename);
fprintf(stderr, "mcd_filename = %s\n", ctx->mcd_filename);
fprintf(stderr, "perc_features_model_filename = %s\n", ctx->features_model_filename);
fprintf(stderr, "f2p_filename = %s\n", ctx->f2p_filename);
}
}
int main(int argc, char *argv[])
{
context *ctx = context_read_options(argc, argv);
decode_tagger_check_options(ctx);
decode_tagger_set_linguistic_resources_filenames(ctx);
ctx->features_model = feat_model_read(ctx->features_model_filename, ctx->verbose);
ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose);
simple_decoder_tagger2(ctx);
context_free(ctx);
return 0;
}
......@@ -3,6 +3,7 @@
void look_for_number_of_features_and_classes(char *filename, int *max_feat, int *max_class);
int look_for_number_of_features(char *filename);
int look_for_number_of_examples(char *filename);
int *count_occ_of_features(char *filename, int *n_feat);
int cff_look_for_number_of_columns(char *cff_filename);
int *cff_max_value_per_column(char *cff_filename, int n);
......
......@@ -13,10 +13,8 @@ typedef struct {
/*#include "word_emb.h"*/
#include "mcd.h"
void feat_vec_concat(feat_vec *fv1, feat_vec *fv2);
feat_vec *feat_vec_copy(feat_vec *fv);
feat_vec *feat_vec_new(int size);
void feat_vec_free(feat_vec *fv);
int feat_vec_add(feat_vec *fv, int feat);
......
......@@ -75,6 +75,18 @@ void look_for_number_of_features_and_classes(char *filename, int *max_feat, int
fclose(f);
}
int look_for_number_of_examples(char *filename)
{
char buffer[10000];
FILE *f = fopen(filename, "r");
int number = 0;
while(fgets(buffer, 10000, f))
number ++;
fclose(f);
return number;
}
int look_for_number_of_features(char *filename)
{
char buffer[10000];
......
......@@ -8,8 +8,6 @@
feature_table *feature_table_load(char *filename, int verbose)
{
int i;
feature_table *ft = NULL;
int features_nb;
int classes_nb;
......
......@@ -2,6 +2,7 @@
#include<stdlib.h>
#include<string.h>
#include"feature_table.h"
#include"cf_file.h"
#include"util.h"
void perceptron_avg(char *filename, feature_table *ft, int n_iter)
......@@ -36,6 +37,7 @@ void perceptron_avg(char *filename, feature_table *ft, int n_iter)
feat_vec_add(fv, atoi(token));
}
for(cla=0; cla < classes_nb; cla++)
classes_score[cla] = 0;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment