Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
  • AC
  • Aloui_Dary
  • alexis
  • classifier
  • error_predictor
  • fixhelp
  • ignore_punct
  • johannes
  • libmacaon2
  • maca_common
  • maca_graph_parser
  • maca_trans_frame_parser
  • master
  • morpho
  • new_config
  • partial_parser
  • perceptron
  • refactor
  • silvio
  • ssrnn
  • tagger_options
  • tagparse
  • tfparsing
  • word_buffer
24 results

Target

Select target project
  • alexis.nasr/macaon2
1 result
Select Git revision
  • AC
  • Aloui_Dary
  • alexis
  • classifier
  • error_predictor
  • fixhelp
  • ignore_punct
  • johannes
  • libmacaon2
  • maca_common
  • maca_graph_parser
  • maca_trans_frame_parser
  • master
  • morpho
  • new_config
  • partial_parser
  • perceptron
  • refactor
  • silvio
  • ssrnn
  • tagger_options
  • tagparse
  • tfparsing
  • word_buffer
24 results
Show changes
Commits on Source (16)
Showing
with 604 additions and 20 deletions
......@@ -4,6 +4,13 @@ project(macaon2)
find_package(FLEX)
add_definitions("-Wall" )
SET(CMAKE_C_COMPILER g++)
SET(CMAKE_CXX_COMPILER g++)
SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Ofast -DUSE_CBLAS")
SET( CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lm -lopenblas" )
if (${CMAKE_C_COMPILER_VERSION} VERSION_LESS 5.3)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=gnu11")
......@@ -28,11 +35,12 @@ add_subdirectory(maca_common)
add_subdirectory(maca_tools)
add_subdirectory(perceptron)
#add_subdirectory(maca_lemmatizer)
#add_subdirectory(maca_morpho)
add_subdirectory(maca_tokenizer)
add_subdirectory(maca_lexer)
add_subdirectory(maca_trans_parser)
add_subdirectory(maca_crf_tagger)
add_subdirectory(maca_graph_parser)
#add_subdirectory(maca_graph_parser)
if(MACA_EXPORT)
add_subdirectory(maca_export)
......
......@@ -12,6 +12,7 @@ set(SOURCES src/util.c
src/feat_desc.c
src/feat_lib.c
src/feat_model.c
src/char16.c
)
......
#ifndef __CHAR16__
#define __CHAR16__
typedef short char16;
int utf8_strlen(char *utf8_string);
char *char16toutf8(char16 *char16_string);
int char16_strlen(char16 *string);
char16 *utf8tochar16(char *utf8_string);
#endif
......@@ -24,5 +24,5 @@ feat_desc *feat_model_add(feat_model *fm, feat_desc *fd);
feat_model *feat_model_read(char *filename, feat_lib *fl, int verbose);
void feat_model_compute_ranges(feat_model *fm, mcd *m, int mvt_nb);
int feat_model_get_type_feat_n(feat_model *fm, int n);
void catenate_int(char *string, int val);
#endif
......@@ -8,7 +8,7 @@
#define MCD_INVALID_VALUE -1
#define MCD_WF_NB 36
#define MCD_WF_NB 47
#define MCD_WF_ID 0
#define MCD_WF_FORM 1
......@@ -47,6 +47,81 @@
#define MCD_WF_Y 34
#define MCD_WF_Z 35
#define MCD_WF_Aspect 36
#define MCD_WF_Case 37
#define MCD_WF_Clitic 38
#define MCD_WF_Definite 39
#define MCD_WF_Gender 40
#define MCD_WF_Mood 41
#define MCD_WF_NameType 42
#define MCD_WF_NounType 43
#define MCD_WF_Number 44
#define MCD_WF_Person 45
#define MCD_WF_Tense 46
/*Abbr
AdpType
AdvType
Animacy
Animacy[gram]
ConjType
Connegative
Degree
Derivation
Dialect
Echo
Evident
Foreign
Form
Gender[dat]
Gender[erg]
Gender[psor]
HebBinyan
HebExistential
HebSource
Hyph
InfForm
Number[abs]
Number[dat]
Number[erg]
Number[psed]
Number[psor]
NumForm
NumType
NumValue
PartForm
PartType
Person[abs]
Person[dat]
Person[erg]
Person[psor]
Polarity
Polite
Polite[abs]
Polite[dat]
Polite[erg]
Position
Poss
Prefix
PrepCase
PrepForm
PronType
PunctSide
PunctType
Reflex
Strength
Style
Subcat
Typo
Variant
VerbForm
VerbType
Voice
Xtra*/
#include "dico.h"
#include "word_emb.h"
#include "dico_vec.h"
......@@ -90,6 +165,9 @@
#define mcd_get_y_col(m) (m)->wf2col[MCD_WF_Y]
#define mcd_get_z_col(m) (m)->wf2col[MCD_WF_Z]
#define mcd_set_form_col(m, v) (m)->wf[MCD_WF_FORM] = (v)
......@@ -121,6 +199,7 @@ mcd *mcd_build_conll07(void);
mcd *mcd_build_ifpls(void);
mcd *mcd_build_wplgf(void);
mcd *mcd_build_wplgfs(void);
mcd *mcd_build_wpmlgfs(void);
mcd *mcd_read(char *mcd_filename, int verbose);
void mcd_link_to_dico(mcd *m, dico_vec *vocabs, int verbose);
......
......@@ -2,6 +2,7 @@
#define __WORD__
#include "mcd.h"
#include "char16.h"
#define WORD_INVALID_GOV 10000
......@@ -12,10 +13,41 @@ typedef struct _word {
int signature; /* pos tags that this form can have (represented as a boolean string) */
int label;
char *form;
char16 *form_char16;
int index;
int is_root;
} word;
/*
#define word_get_s1(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 1))? -1 : (w)->form[strlen((w)->form) - 1])
#define word_get_s2(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 2))? -1 : (w)->form[strlen((w)->form) - 2])
#define word_get_s3(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 3))? -1 : (w)->form[strlen((w)->form) - 3])
#define word_get_s4(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 4))? -1 : (w)->form[strlen((w)->form) - 4])
#define word_get_s5(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 5))? -1 : (w)->form[strlen((w)->form) - 5])
#define word_get_s6(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 6))? -1 : (w)->form[strlen((w)->form) - 6])
*/
#define word_get_s1(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 1))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 1])
#define word_get_s2(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 2))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 2])
#define word_get_s3(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 3))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 3])
#define word_get_s4(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 4))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 4])
#define word_get_s5(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 5))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 5])
#define word_get_s6(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 6))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 6])
/*#define word_get_p1(w) ((((w) == NULL) || ((w)->form == NULL) )? -1 : (w)->form[0])
#define word_get_p2(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 1))? -1 : (w)->form[1])
#define word_get_p3(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 2))? -1 : (w)->form[2])
#define word_get_p4(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 3))? -1 : (w)->form[3])
#define word_get_p5(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 4))? -1 : (w)->form[4])
#define word_get_p6(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 5))? -1 : (w)->form[5])
*/
#define word_get_p1(w) ((((w) == NULL) || ((w)->form_char16 == NULL) )? -1 : (w)->form_char16[0])
#define word_get_p2(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 1))? -1 : (w)->form_char16[1])
#define word_get_p3(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 2))? -1 : (w)->form_char16[2])
#define word_get_p4(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 3))? -1 : (w)->form_char16[3])
#define word_get_p5(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 4))? -1 : (w)->form_char16[4])
#define word_get_p6(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 5))? -1 : (w)->form_char16[5])
#define word_get_id(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_ID])
#define word_get_form(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_FORM])
#define word_get_lemma(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LEMMA])
......
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
typedef short char16;
#define char_bit1(c) ((c) & 1)
#define char_bit2(c) (((c) & 2) >> 1)
#define char_bit3(c) (((c) & 4) >> 2)
#define char_bit4(c) (((c) & 8) >> 3)
#define char_bit5(c) (((c) & 16) >> 4)
#define char_bit6(c) (((c) & 32) >> 5)
#define char_bit7(c) (((c) & 64) >> 6)
#define char_bit8(c) (((c) & 128) >> 7)
#define length(c) ((!char_bit8((c)) || (char_bit8(c) && !char_bit7(c)))? 1 : 2)
/*
int length(char c)
{
if(!char_bit8(c)) return 1;
if(char_bit8(c) && !char_bit7(c)) return 1;
if(char_bit7(c)) return 2;
if(char_bit6(c)) return 3;
if(char_bit5(4)) return 4;
}
*/
int utf8_strlen(char *utf8_string)
{
int l = 0;
while(*utf8_string){
l += (length(*utf8_string) == 1) ? 1 : 0;
utf8_string++;
}
return l;
}
char *char16toutf8(char16 *char16_string)
{
return NULL;
}
int char16_strlen(char16 *string)
{
int i=0;
while(string[i]) i++;
return i;
}
char16 *utf8tochar16(char *utf8_string)
{
int i,j;
int utf8_length = strlen(utf8_string);
int char16_length = 0;
char16 *char16_string;
for(i=0; i < utf8_length; i++)
char16_length += length(utf8_string[i]);
char16_string = (char16*) malloc((char16_length + 1)* sizeof(char16));
for(i=0, j=0; i < utf8_length; i++, j++){
if(length(utf8_string[i]) == 1){
char16_string[j] = (char16)utf8_string[i];
}
if(length(utf8_string[i]) == 2){
char16_string[j] = utf8_string[i];
char16_string[j] = char16_string[j] << 8;
char16_string[j] += utf8_string[++i];
}
}
char16_string[j] = 0;
return char16_string;
}
/*
int main(void)
{
int i;
char string[200];
char16 *char16_string;
strcpy(string, "élémentaire");
printf("string = %s\n", string);
printf("length = %d\n", (int)strlen(string));
printf("utf8 length = %d\n", (int)utf8_strlen(string));
for(i=0; i < strlen(string); i++){
printf("%d\t%c\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\tl=%d\n", i, string[i], (int)string[i], char_bit1(string[i]), char_bit2(string[i]), char_bit3(string[i]), char_bit4(string[i]), char_bit5(string[i]), char_bit6(string[i]), char_bit7(string[i]), char_bit8(string[i]), length(string[i]));
}
char16_string = utf8tochar16(string);
printf("char16_strlen = %d\n", char16_strlen(char16_string));
}
*/
......@@ -131,7 +131,6 @@ void catenate_int(char *string, int val)
}
feat_model *feat_model_new(char *name)
{
feat_model *fm = (feat_model *)memalloc(sizeof(feat_model));
......
......@@ -6,13 +6,13 @@
form2pos *form2pos_new(int nbelem, int pos_nb, char *pos_list)
{
form2pos *f2p = memalloc(sizeof(form2pos));
form2pos *f2p = (form2pos *)memalloc(sizeof(form2pos));
char *token;
f2p->nbelem = nbelem;
f2p->pos_nb = pos_nb;
f2p->d_pos = dico_new("d_pos", pos_nb * 10);
f2p->d_signature = dico_new("d_signature", pos_nb * 10);
f2p->d_pos = dico_new((char *)"d_pos", pos_nb * 10);
f2p->d_signature = dico_new((char *)"d_signature", pos_nb * 10);
f2p->h_form2signature = hash_new(nbelem * 4);
token = strtok(pos_list, "\t");
do{
......
......@@ -422,6 +422,63 @@ mcd *mcd_build_wplgfs(void)
return m;
}
mcd *mcd_build_wpmlgfs(void)
{
mcd *m = mcd_new(7);
int col;
col = 0;
m->wf[col]=MCD_WF_FORM;
m->wf_str[col]=strdup("FORM");
m->representation[col]= MCD_REPRESENTATION_VOCAB;
m->filename[col] = strdup("_");
m->wf2col[MCD_WF_FORM] = col;
col = 1;
m->wf[col]=MCD_WF_POS;
m->wf_str[col]=strdup("POS");
m->representation[col]= MCD_REPRESENTATION_VOCAB;
m->filename[col] = strdup("_");
m->wf2col[MCD_WF_POS] = col;
col = 2;
m->wf[col]=MCD_WF_FEATS;
m->wf_str[col]=strdup("FEATS");
m->representation[col]= MCD_REPRESENTATION_VOCAB;
m->filename[col] = strdup("_");
m->wf2col[MCD_WF_FEATS] = col;
col = 3;
m->wf[col]=MCD_WF_LEMMA;
m->wf_str[col]=strdup("LEMMA");
m->representation[col]= MCD_REPRESENTATION_VOCAB;
m->filename[col] = strdup("_");
m->wf2col[MCD_WF_LEMMA] = col;
col = 4;
m->wf[col]=MCD_WF_GOV;
m->wf_str[col]=strdup("GOV");
m->representation[col]= MCD_REPRESENTATION_INT;
m->filename[col] = strdup("_");
m->wf2col[MCD_WF_GOV] = col;
col = 5;
m->wf[col]=MCD_WF_LABEL;
m->wf_str[col]=strdup("LABEL");
m->representation[col]= MCD_REPRESENTATION_VOCAB;
m->filename[col] = strdup("_");
m->wf2col[MCD_WF_LABEL] = col;
col = 6;
m->wf[col]=MCD_WF_SENT_SEG;
m->wf_str[col]=strdup("SENT_SEG");
m->representation[col]= MCD_REPRESENTATION_INT;
m->filename[col] = strdup("_");
m->wf2col[MCD_WF_SENT_SEG] = col;
return m;
}
/* returns a dico_vec containing the different dictionnaries found in an mcd structure */
......
......@@ -7,7 +7,7 @@
trie_state *trie_state_new(trie_trans *transitions, int is_accept)
{
trie_state *state = memalloc(sizeof(trie_state));
trie_state *state = (trie_state *) memalloc(sizeof(trie_state));
state->transitions = transitions;
state->is_accept = is_accept;
state->fail = 0;
......@@ -24,7 +24,7 @@ void trie_state_free(trie_state *state)
trie *trie_new(void)
{
trie *t = memalloc(sizeof(trie));
trie *t = (trie *) memalloc(sizeof(trie));
t->states = NULL;
t->size = 0;
t->states_nb = 0;
......@@ -45,7 +45,7 @@ void trie_free(trie *t)
trie_trans *trie_trans_new(int destination, int symbol, trie_trans *next)
{
trie_trans *trans = memalloc(sizeof(trie_trans));
trie_trans *trans = (trie_trans *)memalloc(sizeof(trie_trans));
trans->destination = destination;
trans->symbol = symbol;
trans->next = next;
......
......@@ -19,6 +19,7 @@ word *word_new(char *input)
w->wf_array[MCD_WF_GOV] = WORD_INVALID_GOV;
w->form = NULL;
w->form_char16 = NULL;
w->index = -1;
w->signature = -1;
......@@ -59,11 +60,13 @@ word *word_parse_buffer(char *buffer, mcd *mcd_struct)
w = word_new(buffer);
token = strtok(buffer, "\t");
do{
if((col < mcd_struct->nb_col) && (mcd_struct->wf[col] != -1) && (strcmp(token, "_"))){
/* if((col < mcd_struct->nb_col) && (mcd_struct->wf[col] != -1) && (strcmp(token, "_"))){ */
if((col < mcd_struct->nb_col) && (mcd_struct->wf[col] != -1)){
w->wf_array[mcd_struct->wf[col]] = mcd_get_code(mcd_struct, token, col);
}
if(mcd_struct->wf[col] == MCD_WF_FORM){
w->form = strdup(token);
w->form_char16 = utf8tochar16(w->form);
w->U1 = isupper(token[0]) ? 1 : 0;
}
col++;
......@@ -95,6 +98,7 @@ void word_free(word *w)
if(w == NULL) return;
if(w->input) free(w->input);
if(w->form) free(w->form);
if(w->form_char16) free(w->form_char16);
free(w);
}
......
......@@ -2,7 +2,7 @@
#include "array.h"
array_t* array_new() {
array_t* array = malloc(sizeof(array_t));
array_t* array = (array_t *)malloc(sizeof(array_t));
array->num_elements = 0;
array->data = NULL;
return array;
......@@ -18,7 +18,7 @@ ARRAY_TYPE array_get(array_t* array, int element) {
}
void array_push(array_t* array, ARRAY_TYPE value) {
array->data = realloc(array->data, sizeof(ARRAY_TYPE) * (array->num_elements + 1));
array->data = (ARRAY_TYPE *)realloc(array->data, sizeof(ARRAY_TYPE) * (array->num_elements + 1));
array->data[array->num_elements] = value;
array->num_elements++;
}
......
......@@ -75,7 +75,7 @@ void maca_graph_parser_print_ctx(maca_graph_parser_ctx *ctx)
maca_graph_parser_ctx * maca_graph_parser_InitCTX()
{
maca_graph_parser_ctx * ctx = calloc(sizeof(maca_graph_parser_ctx), 1);
maca_graph_parser_ctx * ctx = (maca_graph_parser_ctx *)calloc(sizeof(maca_graph_parser_ctx), 1);
ctx->cfg=MACA_DEFAULT_CFG;
ctx->verbose_flag = maca_verbose;
......
......@@ -36,7 +36,7 @@ void maca_graph_parser_alphabet_free(maca_graph_parser_alphabet *a)
maca_graph_parser_alphabet *maca_graph_parser_alphabet_new(char *name)
{
maca_graph_parser_alphabet *a = malloc(sizeof(maca_graph_parser_alphabet));
maca_graph_parser_alphabet *a = (maca_graph_parser_alphabet *)malloc(sizeof(maca_graph_parser_alphabet));
if(a == NULL){
fprintf(stderr, "memory allocation error\n");
exit(1);
......@@ -153,7 +153,7 @@ maca_graph_parser_alphabet **maca_graph_parser_alphabet_load4(char *filename)
int i = 0;
char symbol[1000];
maca_graph_parser_alphabet *a = NULL;
maca_graph_parser_alphabet **alpha_array = malloc(4 * sizeof(maca_graph_parser_alphabet*));
maca_graph_parser_alphabet **alpha_array = (maca_graph_parser_alphabet **)malloc(4 * sizeof(maca_graph_parser_alphabet*));
for(i=0; i < 4; i++)
alpha_array[i] = NULL;
......@@ -182,7 +182,7 @@ maca_graph_parser_alphabet **maca_graph_parser_alphabet_load5(char *filename)
int i = 0;
char symbol[1000];
maca_graph_parser_alphabet *a = NULL;
maca_graph_parser_alphabet **alpha_array = malloc(5 * sizeof(maca_graph_parser_alphabet*));
maca_graph_parser_alphabet **alpha_array = (maca_graph_parser_alphabet **)malloc(5 * sizeof(maca_graph_parser_alphabet*));
for(i=0; i < 5; i++)
alpha_array[i] = NULL;
......
......@@ -125,6 +125,7 @@ int main(int argc, char *argv[])
char *buffer_copy;
char *form;
char *pos;
char *feats;
char *token;
int column_nb;
......@@ -136,11 +137,16 @@ int main(int argc, char *argv[])
int form_column;
int pos_column;
int lemma_column;
int feats_column;
FILE *f = NULL;
ctx = context_read_options(argc, argv);
maca_lemmatizer_check_options(ctx);
feats_column = ctx->mcd_struct->wf2col[MCD_WF_FEATS];
if(ctx->pos_column != -1)
pos_column = ctx->pos_column;
else
......@@ -177,6 +183,7 @@ int main(int argc, char *argv[])
form = NULL;
pos = NULL;
lemma = NULL;
feats = NULL;
do{
if(column_nb == lemma_column) /* lemma is present in the input file */
if(strcmp(token, "_")) /* and it is not an underscore */
......@@ -188,6 +195,9 @@ int main(int argc, char *argv[])
if(column_nb == pos_column){
pos = strdup(token);
}
if(column_nb == feats_column){
feats = strdup(token);
}
column_nb++;
} while((token = strtok(NULL , "\t")));
......@@ -215,11 +225,13 @@ int main(int argc, char *argv[])
/* print_word(buffer, ctx->mcd_struct, lemma); */
/* printf("form = %s pos = %s (%s) lemma = %s\n", form, pos, form_pos, lemma); */
printf("form = %s pos = %s (%s) feats = %s lemma = %s\n", form, pos, form_pos, feats, lemma);
printf("form = %s pos = %s (%s) feats = %s lemma = %s\n", form, pos, form_pos, feats, lemma);
printf("\t%s\n", lemma);
if(pos)free(pos);
if(form)free(form);
if(feats)free(feats);
}
free(buffer_copy);
free(lemma_array);
......
......@@ -29,7 +29,7 @@ dico *decompose_mwe_in_fplm_file(char *fplm_filename, FILE *output_file, int deb
char token[1000];
int l;
int i, j;
dico *d_tokens = dico_new("TOKENS", 100000);
dico *d_tokens = dico_new((char *)"TOKENS", 100000);
int token_code;
while(fgets(buffer, 10000, f)){
fields_nb = sscanf(buffer, "%[^\t]\t%s\t%[^\t]\t%s\n", form, pos, lemma, morpho);
......@@ -71,6 +71,6 @@ int main(int argc, char *argv[])
dico *d_tokens;
d_tokens = decompose_mwe_in_fplm_file(argv[1], stdout, 1);
dico_print("d_tokens.dico", d_tokens);
dico_print((char *)"d_tokens.dico", d_tokens);
dico_free(d_tokens);
}
set(SOURCES
src/maca_morpho_feat_fct.c
src/maca_morpho_context.c
src/vectorize.c
)
#compiling library
include_directories(src)
add_library(maca_morpho STATIC ${SOURCES})
target_link_libraries(maca_morpho perceptron)
target_link_libraries(maca_morpho maca_common)
#compiling, linking and installing executables
add_executable(fplm2cff ./src/fplm2cff.c)
target_link_libraries(fplm2cff perceptron)
target_link_libraries(fplm2cff maca_common)
target_link_libraries(fplm2cff maca_morpho)
install (TARGETS fplm2cff DESTINATION bin)
add_executable(predict ./src/predict.c)
target_link_libraries(predict perceptron)
target_link_libraries(predict maca_common)
target_link_libraries(predict maca_morpho)
install (TARGETS predict DESTINATION bin)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "maca_morpho_context.h"
#include "feat_model.h"
#include "feat_vec.h"
#include "dico.h"
#include "util.h"
#include "vectorize.h"
void decompose_feature_value(char *feature_value, char *feature, char *value)
{
int i,j;
int l = strlen(feature_value);
int before = 1;
for(i=0; (i < l) && (feature_value[i] != '='); i++){
feature[i] = feature_value[i];
}
feature[i] = '\0';
i++;
for(j=0; i<l; i++, j++){
value[j] = feature_value[i];
}
value[j] = '\0';
}
int main(int argc, char *argv[])
{
context *ctx = context_read_options(argc, argv);
if(ctx->help){
context_general_help_message(ctx);
context_language_help_message(ctx);
context_fplm_help_message(ctx);
context_maca_data_path_help_message(ctx);
context_features_filename_help_message(ctx);
context_features_model_help_message(ctx);
exit(1);
}
feat_vec *fv = feat_vec_new(10);
dico *dico_features = dico_new("dico_features", 1000);
/* feat_model *fm = feat_model_read(ctx->fm_filename, feat_lib_build(), ctx->verbose); */
char form[100];
char pos[100];
char lemma[100];
char morpho[100];
FILE *F_fplm = NULL;
char buffer[1000];
char feature_value[100];
char feature[100];
char value[100];
char *token;
F_fplm = myfopen(ctx->fplm_filename, "r");
while(fgets(buffer, 1000, F_fplm)){
if(feof(F_fplm))
break;
// printf("%s", buffer);
buffer[strlen(buffer) - 1] = '\0';
sscanf(buffer, "%[^\t]\t%[^\t]\t%[^\t]\t%[^\n]\n", form, pos, lemma, morpho);
//printf("form = %s pos = %s lemma = %s morpho = %s\n", form, pos, lemma, morpho);
token = strtok(morpho, "|");
do{
//printf("token = %s\n", token);
decompose_feature_value(token, feature, value);
//printf("feature = %s value = %s\n", feature, value);
}while((token = strtok(NULL, "|")));
}
fclose(F_fplm);
}
/*
while(strcmp(form, "end")){
fscanf(stdin, "%s", form);
printf("form = %s\n", form);
form2fv(form, fv, fm, dico_features, ADD_MODE);
//void feat_vec_print_string(feat_vec *fv, dico *dico_features);
feat_vec_print(stdout, fv);
}
//dico_print_fh(stdout, dico_features);
if(ctx->features_filename)
dico_print(ctx->features_filename, dico_features);
*/
#include<stdlib.h>
#include<stdio.h>
#include<string.h>
#include<unistd.h>
#include<getopt.h>
#include "maca_morpho_context.h"
#include "util.h"
void context_set_linguistic_resources_filenames(context *ctx);
void context_free(context *ctx)
{
if(ctx->program_name) free(ctx->program_name);
if(ctx->fplm_filename) free(ctx->fplm_filename);
if(ctx->cfw_filename) free(ctx->cfw_filename);
if(ctx->language) free(ctx->language);
if(ctx->maca_data_path) free(ctx->maca_data_path);
free(ctx);
}
context *context_new(void)
{
context *ctx = (context *)memalloc(sizeof(context));
ctx->help = 0;
ctx->verbose = 0;
ctx->debug_mode = 0;
ctx->program_name = NULL;
ctx->fplm_filename = NULL;
ctx->language = strdup("fr");
ctx->maca_data_path = NULL;
ctx->features_filename = NULL;
ctx->cfw_filename = NULL;
return ctx;
}
void context_general_help_message(context *ctx)
{
fprintf(stderr, "usage: %s [options]\n", ctx->program_name);
fprintf(stderr, "Options:\n");
fprintf(stderr, "\t-h --help : print this message\n");
fprintf(stderr, "\t-v --verbose : activate verbose mode\n");
fprintf(stderr, "\t-r --hratio <float> : set the occupation ratio of hash tables (default is 0.5)\n");
}
void context_fplm_help_message(context *ctx){
fprintf(stderr, "\t-f --fplm <file> : fplm (form pos lemma morpho) file\n");
}
void context_language_help_message(context *ctx){
fprintf(stderr, "\t-L --language : identifier of the language to use\n");
}
void context_maca_data_path_help_message(context *ctx){
fprintf(stderr, "\t-M --maca_data_path : path to maca_data directory\n");
}
void context_fm_help_message(context *ctx){
fprintf(stderr, "\t-F --fm <file> : feature model file name\n");
}
void context_features_filename_help_message(context *ctx){
fprintf(stderr, "\t-x --feat <file> : features dictionary file name\n");
}
void context_weights_matrix_filename_help_message(context *ctx){
fprintf(stderr, "\t-w --weights <file> : weight matrix (cfw) filename\n");
}
void context_features_model_help_message(context *ctx){
fprintf(stderr, "\t-F --feat_model <file> : feature model file name\n");
}
context *context_read_options(int argc, char *argv[])
{
int c;
int option_index = 0;
context *ctx = context_new();
ctx->program_name = strdup(argv[0]);
static struct option long_options[10] =
{
{"help", no_argument, 0, 'h'},
{"verbose", no_argument, 0, 'v'},
{"debug", no_argument, 0, 'd'},
{"mcd", required_argument, 0, 'C'},
{"language", required_argument, 0, 'L'},
{"fplm", required_argument, 0, 'f'},
{"maca_data_path", required_argument, 0, 'D'},
{"fm", required_argument, 0, 'F'},
{"feat", required_argument, 0, 'x'},
{"weights", required_argument, 0, 'w'}
};
optind = 0;
opterr = 0;
while ((c = getopt_long (argc, argv, "hvdf:L:M:D:F:x:w:", long_options, &option_index)) != -1){
switch (c)
{
case 'd':
ctx->debug_mode = 1;
break;
case 'h':
ctx->help = 1;
break;
case 'v':
ctx->verbose = 1;
break;
case 'f':
ctx->fplm_filename = strdup(optarg);
break;
case 'L':
ctx->language = strdup(optarg);
break;
case 'D':
ctx->maca_data_path = strdup(optarg);
break;
case 'F':
ctx->fm_filename = strdup(optarg);
break;
case 'x':
ctx->features_filename = strdup(optarg);
break;
case 'w':
ctx->cfw_filename = strdup(optarg);
break;
}
}
context_set_linguistic_resources_filenames(ctx);
return ctx;
}
void context_set_linguistic_resources_filenames(context *ctx)
{
char absolute_path[500];
char absolute_filename[500];
absolute_path[0] = '\0';
if(ctx->maca_data_path)
strcat(absolute_path, ctx->maca_data_path);
else {
char *e = getenv("MACAON_DIR");
if (e != NULL) {
strcat(absolute_path, e);
} else {
fprintf(stderr, "ATTENTION: the environment variable MACAON_DIR is not defined\n");
}
}
strcat(absolute_path, "/");
strcat(absolute_path, ctx->language);
strcat(absolute_path, "/bin/");
if(!ctx->fplm_filename){
strcpy(absolute_filename, absolute_path);
strcat(absolute_filename, DEFAULT_FPLM_FILENAME);
ctx->fplm_filename = strdup(absolute_filename);
}
}