Skip to content
Snippets Groups Projects
Commit 1b0ec83f authored by Alexis Nasr's avatar Alexis Nasr
Browse files

added maca_morpho tools for morphological analysis

parent b938cd32
Branches
No related tags found
No related merge requests found
...@@ -33,6 +33,7 @@ add_subdirectory(maca_lexer) ...@@ -33,6 +33,7 @@ add_subdirectory(maca_lexer)
add_subdirectory(maca_trans_parser) add_subdirectory(maca_trans_parser)
add_subdirectory(maca_crf_tagger) add_subdirectory(maca_crf_tagger)
add_subdirectory(maca_graph_parser) add_subdirectory(maca_graph_parser)
add_subdirectory(maca_morpho)
if(MACA_EXPORT) if(MACA_EXPORT)
add_subdirectory(maca_export) add_subdirectory(maca_export)
......
set(SOURCES
src/feat_fct.c
src/context.c
)
#compiling library
include_directories(src)
add_library(maca_morpho STATIC ${SOURCES})
target_link_libraries(maca_morpho perceptron)
target_link_libraries(maca_morpho maca_common)
#compiling, linking and installing executables
add_executable(fplm2cff ./src/fplm2cff.c)
target_link_libraries(fplm2cff perceptron)
target_link_libraries(fplm2cff maca_common)
target_link_libraries(fplm2cff maca_morpho)
install (TARGETS fplm2cff DESTINATION bin)
#include<stdlib.h>
#include<stdio.h>
#include<string.h>
#include<unistd.h>
#include<getopt.h>
#include "context.h"
#include "util.h"
void context_set_linguistic_resources_filenames(context *ctx);
void context_free(context *ctx)
{
if(ctx->program_name) free(ctx->program_name);
if(ctx->fplm_filename) free(ctx->fplm_filename);
if(ctx->language) free(ctx->language);
if(ctx->maca_data_path) free(ctx->maca_data_path);
free(ctx);
}
context *context_new(void)
{
context *ctx = (context *)memalloc(sizeof(context));
ctx->help = 0;
ctx->verbose = 0;
ctx->debug_mode = 0;
ctx->program_name = NULL;
ctx->fplm_filename = NULL;
ctx->language = strdup("fr");
ctx->maca_data_path = NULL;
return ctx;
}
void context_general_help_message(context *ctx)
{
fprintf(stderr, "usage: %s [options]\n", ctx->program_name);
fprintf(stderr, "Options:\n");
fprintf(stderr, "\t-h --help : print this message\n");
fprintf(stderr, "\t-v --verbose : activate verbose mode\n");
fprintf(stderr, "\t-r --hratio <float> : set the occupation ratio of hash tables (default is 0.5)\n");
}
void context_fplm_help_message(context *ctx){
fprintf(stderr, "\t-f --fplm <file> : fplm (form pos lemma morpho) file\n");
}
void context_language_help_message(context *ctx){
fprintf(stderr, "\t-L --language : identifier of the language to use\n");
}
void context_maca_data_path_help_message(context *ctx){
fprintf(stderr, "\t-M --maca_data_path : path to maca_data directory\n");
}
void context_fm_help_message(context *ctx){
fprintf(stderr, "\t-F --fm <file> : feature model file name\n");
}
context *context_read_options(int argc, char *argv[])
{
int c;
int option_index = 0;
context *ctx = context_new();
ctx->program_name = strdup(argv[0]);
static struct option long_options[8] =
{
{"help", no_argument, 0, 'h'},
{"verbose", no_argument, 0, 'v'},
{"debug", no_argument, 0, 'd'},
{"mcd", required_argument, 0, 'C'},
{"language", required_argument, 0, 'L'},
{"fplm", required_argument, 0, 'f'},
{"maca_data_path", required_argument, 0, 'D'},
{"fm", required_argument, 0, 'F'}
};
optind = 0;
opterr = 0;
while ((c = getopt_long (argc, argv, "hvdf:L:M:D:F:", long_options, &option_index)) != -1){
switch (c)
{
case 'd':
ctx->debug_mode = 1;
break;
case 'h':
ctx->help = 1;
break;
case 'v':
ctx->verbose = 1;
break;
case 'f':
ctx->fplm_filename = strdup(optarg);
break;
case 'L':
ctx->language = strdup(optarg);
break;
case 'D':
ctx->maca_data_path = strdup(optarg);
break;
case 'F':
ctx->fm_filename = strdup(optarg);
break;
}
}
context_set_linguistic_resources_filenames(ctx);
return ctx;
}
void context_set_linguistic_resources_filenames(context *ctx)
{
char absolute_path[500];
char absolute_filename[500];
absolute_path[0] = '\0';
if(ctx->maca_data_path)
strcat(absolute_path, ctx->maca_data_path);
else {
char *e = getenv("MACAON_DIR");
if (e != NULL) {
strcat(absolute_path, e);
} else {
fprintf(stderr, "ATTENTION: the environment variable MACAON_DIR is not defined\n");
}
}
strcat(absolute_path, "/");
strcat(absolute_path, ctx->language);
strcat(absolute_path, "/bin/");
if(!ctx->fplm_filename){
strcpy(absolute_filename, absolute_path);
strcat(absolute_filename, DEFAULT_FPLM_FILENAME);
ctx->fplm_filename = strdup(absolute_filename);
}
}
#ifndef __MACA_MORPHO_CONTEXT__
#define __MACA_MORPHO_CONTEXT__
#include "mcd.h"
#include <stdlib.h>
#define DEFAULT_FPLM_FILENAME "fplm"
typedef struct {
int help;
int verbose;
int debug_mode;
char *program_name;
char *fplm_filename;
char *language;
char *maca_data_path;
char *fm_filename;
} context;
context *context_new(void);
void context_free(context *ctx);
context *context_read_options(int argc, char *argv[]);
void context_general_help_message(context *ctx);
void context_conll_help_message(context *ctx);
void context_language_help_message(context *ctx);
void context_fplm_help_message(context *ctx);
void context_maca_data_path_help_message(context *ctx);
void context_mcd_help_message(context *ctx);
void context_form_column_help_message(context *ctx);
void context_pos_column_help_message(context *ctx);
#endif
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include"feat_lib.h"
int p1(void *input){return(input == NULL)? -1 : ((char *)input)[strlen((char *)input) - 1];}
int p2(void *input){return(input == NULL)? -1 : ((char *)input)[strlen((char *)input) - 2];}
feat_lib *feat_lib_build(void)
{
feat_lib *fl = feat_lib_new();
feat_lib_add(fl, 1, (char *)"p1", p1);
feat_lib_add(fl, 1, (char *)"p2", p2);
return fl;
}
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "context.h"
#include "feat_model.h"
#include "feat_vec.h"
#include "dico.h"
#include "util.h"
#define LOOKUP_MODE 1
#define TRAIN_MODE 2
int get_feat_value(feat_model *fm, char *form, dico *dico_features, int feat_nb, int mode)
{
feat_desc *fd = fm->array[feat_nb];
int i;
int feat_val;
char str[10];
/* the name of the feature is built in fm->string and its value in the dictionnary (dico_features) is returned */
fm->string[0] = '\0';
for(i=0; i < fd->nbelem; i++){
strcat(fm->string, fd->array[i]->name);
feat_val = fd->array[i]->fct(form);
sprintf(str, "%d", feat_val);
strcat(fm->string, str);
/* catenate_int(fm->string, feat_val); */
}
if(mode == LOOKUP_MODE){
if(fm->string)
return dico_string2int(dico_features, fm->string);
}
return dico_add(dico_features, fm->string);
}
feat_vec *form2fv(char *form, feat_vec *fv, feat_model *fm, dico *dico_features)
{
int i;
feat_vec_empty(fv);
for(i=0; i < fm->nbelem; i++)
feat_vec_add(fv, get_feat_value(fm, form, dico_features, i, TRAIN_MODE));
return fv;
}
int main(int argc, char *argv[])
{
context *ctx = context_read_options(argc, argv);
feat_vec *fv = feat_vec_new(10);
dico *dico_features = dico_new("dico_features", 1000);
feat_model *fm = feat_model_read(ctx->fm_filename, feat_lib_build(), ctx->verbose);
char form[100];
while(strcmp(form, "end")){
fscanf(stdin, "%s", form);
printf("form = %s\n", form);
form2fv(form, fv, fm, dico_features);
/* void feat_vec_print_string(feat_vec *fv, dico *dico_features); */
feat_vec_print(stdout, fv);
}
/* dico_print_fh(stdout, dico_features); */
dico_print("xx", dico_features);
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment