Skip to content
Snippets Groups Projects
Commit da28bdb4 authored by Alexis Nasr's avatar Alexis Nasr
Browse files

added a morphological analyzer (not operational yet)

parent d45d8cbf
No related branches found
No related tags found
No related merge requests found
set(SOURCES
src/maca_morpho_feat_fct.c
src/maca_morpho_context.c
src/vectorize.c
)
#compiling library
include_directories(src)
add_library(maca_morpho STATIC ${SOURCES})
target_link_libraries(maca_morpho perceptron)
target_link_libraries(maca_morpho maca_common)
#compiling, linking and installing executables
add_executable(fplm2cff ./src/fplm2cff.c)
target_link_libraries(fplm2cff perceptron)
target_link_libraries(fplm2cff maca_common)
target_link_libraries(fplm2cff maca_morpho)
install (TARGETS fplm2cff DESTINATION bin)
add_executable(predict ./src/predict.c)
target_link_libraries(predict perceptron)
target_link_libraries(predict maca_common)
target_link_libraries(predict maca_morpho)
install (TARGETS predict DESTINATION bin)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "maca_morpho_context.h"
#include "feat_model.h"
#include "feat_vec.h"
#include "dico.h"
#include "util.h"
#include "vectorize.h"
void decompose_feature_value(char *feature_value, char *feature, char *value)
{
int i,j;
int l = strlen(feature_value);
int before = 1;
for(i=0; (i < l) && (feature_value[i] != '='); i++){
feature[i] = feature_value[i];
}
feature[i] = '\0';
i++;
for(j=0; i<l; i++, j++){
value[j] = feature_value[i];
}
value[j] = '\0';
}
int main(int argc, char *argv[])
{
context *ctx = context_read_options(argc, argv);
if(ctx->help){
context_general_help_message(ctx);
context_language_help_message(ctx);
context_fplm_help_message(ctx);
context_maca_data_path_help_message(ctx);
context_features_filename_help_message(ctx);
context_features_model_help_message(ctx);
exit(1);
}
feat_vec *fv = feat_vec_new(10);
dico *dico_features = dico_new("dico_features", 1000);
/* feat_model *fm = feat_model_read(ctx->fm_filename, feat_lib_build(), ctx->verbose); */
char form[100];
char pos[100];
char lemma[100];
char morpho[100];
FILE *F_fplm = NULL;
char buffer[1000];
char feature_value[100];
char feature[100];
char value[100];
char *token;
F_fplm = myfopen(ctx->fplm_filename, "r");
while(fgets(buffer, 1000, F_fplm)){
if(feof(F_fplm))
break;
// printf("%s", buffer);
buffer[strlen(buffer) - 1] = '\0';
sscanf(buffer, "%[^\t]\t%[^\t]\t%[^\t]\t%[^\n]\n", form, pos, lemma, morpho);
//printf("form = %s pos = %s lemma = %s morpho = %s\n", form, pos, lemma, morpho);
token = strtok(morpho, "|");
do{
//printf("token = %s\n", token);
decompose_feature_value(token, feature, value);
//printf("feature = %s value = %s\n", feature, value);
}while((token = strtok(NULL, "|")));
}
fclose(F_fplm);
}
/*
while(strcmp(form, "end")){
fscanf(stdin, "%s", form);
printf("form = %s\n", form);
form2fv(form, fv, fm, dico_features, ADD_MODE);
//void feat_vec_print_string(feat_vec *fv, dico *dico_features);
feat_vec_print(stdout, fv);
}
//dico_print_fh(stdout, dico_features);
if(ctx->features_filename)
dico_print(ctx->features_filename, dico_features);
*/
#include<stdlib.h>
#include<stdio.h>
#include<string.h>
#include<unistd.h>
#include<getopt.h>
#include "maca_morpho_context.h"
#include "util.h"
void context_set_linguistic_resources_filenames(context *ctx);
void context_free(context *ctx)
{
if(ctx->program_name) free(ctx->program_name);
if(ctx->fplm_filename) free(ctx->fplm_filename);
if(ctx->cfw_filename) free(ctx->cfw_filename);
if(ctx->language) free(ctx->language);
if(ctx->maca_data_path) free(ctx->maca_data_path);
free(ctx);
}
context *context_new(void)
{
context *ctx = (context *)memalloc(sizeof(context));
ctx->help = 0;
ctx->verbose = 0;
ctx->debug_mode = 0;
ctx->program_name = NULL;
ctx->fplm_filename = NULL;
ctx->language = strdup("fr");
ctx->maca_data_path = NULL;
ctx->features_filename = NULL;
ctx->cfw_filename = NULL;
return ctx;
}
void context_general_help_message(context *ctx)
{
fprintf(stderr, "usage: %s [options]\n", ctx->program_name);
fprintf(stderr, "Options:\n");
fprintf(stderr, "\t-h --help : print this message\n");
fprintf(stderr, "\t-v --verbose : activate verbose mode\n");
fprintf(stderr, "\t-r --hratio <float> : set the occupation ratio of hash tables (default is 0.5)\n");
}
void context_fplm_help_message(context *ctx){
fprintf(stderr, "\t-f --fplm <file> : fplm (form pos lemma morpho) file\n");
}
void context_language_help_message(context *ctx){
fprintf(stderr, "\t-L --language : identifier of the language to use\n");
}
void context_maca_data_path_help_message(context *ctx){
fprintf(stderr, "\t-M --maca_data_path : path to maca_data directory\n");
}
void context_fm_help_message(context *ctx){
fprintf(stderr, "\t-F --fm <file> : feature model file name\n");
}
void context_features_filename_help_message(context *ctx){
fprintf(stderr, "\t-x --feat <file> : features dictionary file name\n");
}
void context_weights_matrix_filename_help_message(context *ctx){
fprintf(stderr, "\t-w --weights <file> : weight matrix (cfw) filename\n");
}
void context_features_model_help_message(context *ctx){
fprintf(stderr, "\t-F --feat_model <file> : feature model file name\n");
}
context *context_read_options(int argc, char *argv[])
{
int c;
int option_index = 0;
context *ctx = context_new();
ctx->program_name = strdup(argv[0]);
static struct option long_options[10] =
{
{"help", no_argument, 0, 'h'},
{"verbose", no_argument, 0, 'v'},
{"debug", no_argument, 0, 'd'},
{"mcd", required_argument, 0, 'C'},
{"language", required_argument, 0, 'L'},
{"fplm", required_argument, 0, 'f'},
{"maca_data_path", required_argument, 0, 'D'},
{"fm", required_argument, 0, 'F'},
{"feat", required_argument, 0, 'x'},
{"weights", required_argument, 0, 'w'}
};
optind = 0;
opterr = 0;
while ((c = getopt_long (argc, argv, "hvdf:L:M:D:F:x:w:", long_options, &option_index)) != -1){
switch (c)
{
case 'd':
ctx->debug_mode = 1;
break;
case 'h':
ctx->help = 1;
break;
case 'v':
ctx->verbose = 1;
break;
case 'f':
ctx->fplm_filename = strdup(optarg);
break;
case 'L':
ctx->language = strdup(optarg);
break;
case 'D':
ctx->maca_data_path = strdup(optarg);
break;
case 'F':
ctx->fm_filename = strdup(optarg);
break;
case 'x':
ctx->features_filename = strdup(optarg);
break;
case 'w':
ctx->cfw_filename = strdup(optarg);
break;
}
}
context_set_linguistic_resources_filenames(ctx);
return ctx;
}
void context_set_linguistic_resources_filenames(context *ctx)
{
char absolute_path[500];
char absolute_filename[500];
absolute_path[0] = '\0';
if(ctx->maca_data_path)
strcat(absolute_path, ctx->maca_data_path);
else {
char *e = getenv("MACAON_DIR");
if (e != NULL) {
strcat(absolute_path, e);
} else {
fprintf(stderr, "ATTENTION: the environment variable MACAON_DIR is not defined\n");
}
}
strcat(absolute_path, "/");
strcat(absolute_path, ctx->language);
strcat(absolute_path, "/bin/");
if(!ctx->fplm_filename){
strcpy(absolute_filename, absolute_path);
strcat(absolute_filename, DEFAULT_FPLM_FILENAME);
ctx->fplm_filename = strdup(absolute_filename);
}
}
#ifndef __MACA_MORPHO_CONTEXT__
#define __MACA_MORPHO_CONTEXT__
#include "mcd.h"
#include <stdlib.h>
#define DEFAULT_FPLM_FILENAME "fplm"
typedef struct {
int help;
int verbose;
int debug_mode;
char *program_name;
char *fplm_filename;
char *language;
char *maca_data_path;
char *fm_filename;
char *features_filename;
char *cfw_filename;
} context;
context *context_new(void);
void context_free(context *ctx);
context *context_read_options(int argc, char *argv[]);
void context_general_help_message(context *ctx);
void context_language_help_message(context *ctx);
void context_fplm_help_message(context *ctx);
void context_maca_data_path_help_message(context *ctx);
void context_features_filename_help_message(context *ctx);
void context_weights_matrix_filename_help_message(context *ctx);
void context_features_model_help_message(context *ctx);
#endif
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include"feat_lib.h"
int s1(void *input){return(input == NULL)? -1 : ((char *)input)[strlen((char *)input) - 1];}
int s2(void *input){return(input == NULL)? -1 : ((char *)input)[strlen((char *)input) - 2];}
feat_lib *feat_lib_build(void)
{
feat_lib *fl = feat_lib_new();
feat_lib_add(fl, 1, (char *)"s1", s1);
feat_lib_add(fl, 1, (char *)"s2", s2);
return fl;
}
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "maca_morpho_context.h"
#include "feat_model.h"
#include "feat_vec.h"
#include "dico.h"
#include "util.h"
#include "vectorize.h"
#include "feature_table.h"
void predict_help_message(context *ctx)
{
context_general_help_message(ctx);
context_language_help_message(ctx);
context_fplm_help_message(ctx);
context_maca_data_path_help_message(ctx);
context_features_filename_help_message(ctx);
context_weights_matrix_filename_help_message(ctx);
context_features_model_help_message(ctx);
exit(1);
}
int main(int argc, char *argv[])
{
context *ctx = context_read_options(argc, argv);
if(ctx->help) predict_help_message(ctx);
feature_table *cfw = feature_table_load(ctx->cfw_filename, ctx->verbose);
feat_vec *fv = feat_vec_new(10);
dico *dico_features = dico_read(ctx->features_filename, 0.5);
feat_model *fm = feat_model_read(ctx->fm_filename, feat_lib_build(), ctx->verbose);
char form[100];
int class;
float max;
while(strcmp(form, "end")){
fscanf(stdin, "%s", form);
printf("form = %s\n", form);
form2fv(form, fv, fm, dico_features, LOOKUP_MODE);
class = feature_table_argmax(fv, cfw, &max);
feat_vec_print(stdout, fv);
printf("class = %d\n", class);
}
if(ctx->features_filename)
dico_print(ctx->features_filename, dico_features);
}
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include"vectorize.h"
int get_feat_value(feat_model *fm, char *form, dico *dico_features, int feat_nb, int mode)
{
feat_desc *fd = fm->array[feat_nb];
int i;
int feat_val;
char str[10];
/* the name of the feature is built in fm->string and its value in the dictionnary (dico_features) is returned */
fm->string[0] = '\0';
for(i=0; i < fd->nbelem; i++){
strcat(fm->string, fd->array[i]->name);
feat_val = fd->array[i]->fct(form);
sprintf(str, "%d", feat_val);
strcat(fm->string, str);
/* catenate_int(fm->string, feat_val); */
}
if(mode == LOOKUP_MODE){
if(fm->string)
return dico_string2int(dico_features, fm->string);
}
return dico_add(dico_features, fm->string);
}
feat_vec *form2fv(char *form, feat_vec *fv, feat_model *fm, dico *dico_features, int mode)
{
int i;
feat_vec_empty(fv);
for(i=0; i < fm->nbelem; i++)
feat_vec_add(fv, get_feat_value(fm, form, dico_features, i, mode));
return fv;
}
#ifndef __VECTORIZE__
#define __VECTORIZE__
#include"dico.h"
#include"feat_model.h"
#include"feat_vec.h"
#define LOOKUP_MODE 1
#define ADD_MODE 2
feat_vec *form2fv(char *form, feat_vec *fv, feat_model *fm, dico *dico_features, int mode);
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment