Skip to content
Snippets Groups Projects
Commit 54a43e6b authored by Cindy Aloui's avatar Cindy Aloui
Browse files

Added missing tool

parent 8d9978b2
Branches
No related tags found
No related merge requests found
...@@ -25,7 +25,7 @@ dev.mcf: $(DEV) ...@@ -25,7 +25,7 @@ dev.mcf: $(DEV)
$(TOOLS)/ftb2fr_stream -f $< | sed 's/_-_/-/g' > $@ $(TOOLS)/ftb2fr_stream -f $< | sed 's/_-_/-/g' > $@
$(RULES_FILENAME): $(FPLM_FILENAME) $(RULES_FILENAME): $(FPLM_FILENAME)
maca_compute_l_rules -f $(FPLM_FILENAME) -e $(EXCEPTIONS_FPLM_FILENAME) -r $(RULES_FILENAME) $(STRICT) -t $(THRESHOLD) $(TOOLS)/maca_compute_l_rules -f $(FPLM_FILENAME) -e $(EXCEPTIONS_FPLM_FILENAME) -r $(RULES_FILENAME) $(STRICT) -t $(THRESHOLD)
fplm: $(TOOLS)/lefff2fplm.pl $(TOOLS)/fplm_change_pos_fr.pl $(LEFFF_DIR)/*.lex fplm_add fplm: $(TOOLS)/lefff2fplm.pl $(TOOLS)/fplm_change_pos_fr.pl $(LEFFF_DIR)/*.lex fplm_add
$(TOOLS)/lefff2fplm.pl $(LEFFF_DIR) | $(TOOLS)/fplm_change_pos_fr.pl > tmp $(TOOLS)/lefff2fplm.pl $(LEFFF_DIR) | $(TOOLS)/fplm_change_pos_fr.pl > tmp
......
...@@ -7,9 +7,9 @@ CFLAGS = -Wall -ggdb -I. ...@@ -7,9 +7,9 @@ CFLAGS = -Wall -ggdb -I.
LIBS = -lm LIBS = -lm
#LIBS += $(shell pkg-config --libs glib-2.0) #LIBS += $(shell pkg-config --libs glib-2.0)
OBJ = conll_lib.o orfeo.o util.o hash_str.o ftb_lib.o OBJ = conll_lib.o orfeo.o util.o hash_str.o ftb_lib.o l_rule.o dico.o hash.o
ALL= ftb_tokenize_point ftb_remove_punct ftb2datcha decoda2orfeo ftb2orfeo process_decoda_tsv conll07_renumber_tokens ftb2fr_stream conll2mcf conllu2mcf ALL= ftb_tokenize_point ftb_remove_punct ftb2datcha decoda2orfeo ftb2orfeo process_decoda_tsv conll07_renumber_tokens ftb2fr_stream conll2mcf conllu2mcf maca_compute_l_rules
all: $(ALL) all: $(ALL)
...@@ -17,6 +17,9 @@ all: $(ALL) ...@@ -17,6 +17,9 @@ all: $(ALL)
conll2mcf : conll2mcf.c $(OBJ) conll2mcf : conll2mcf.c $(OBJ)
$(CC) $(CFLAGS) -o $@ $< $(OBJ) $(LIBS) $(CC) $(CFLAGS) -o $@ $< $(OBJ) $(LIBS)
maca_compute_l_rules : maca_compute_l_rules.c $(OBJ)
$(CC) $(CFLAGS) -o $@ $< $(OBJ) $(LIBS)
conllu2mcf : conllu2mcf.c $(OBJ) conllu2mcf : conllu2mcf.c $(OBJ)
$(CC) $(CFLAGS) -o $@ $< $(OBJ) $(LIBS) $(CC) $(CFLAGS) -o $@ $< $(OBJ) $(LIBS)
......
...@@ -68,7 +68,10 @@ def generateMCF(filename) : ...@@ -68,7 +68,10 @@ def generateMCF(filename) :
output[mcdFormat.pos] = split[format.pos] output[mcdFormat.pos] = split[format.pos]
output[mcdFormat.morpho] = split[format.morpho] output[mcdFormat.morpho] = split[format.morpho]
output[mcdFormat.lemma] = split[format.lemma] output[mcdFormat.lemma] = split[format.lemma]
output[mcdFormat.governor] = int(split[format.governor]) gov = split[format.governor].split('|')[0]
if gov == "" or gov == "_" :
gov = 0
output[mcdFormat.governor] = int(gov)
if not output[mcdFormat.governor] == 0 : if not output[mcdFormat.governor] == 0 :
output[mcdFormat.governor] -= id output[mcdFormat.governor] -= id
output[mcdFormat.label] = split[format.label] output[mcdFormat.label] = split[format.label]
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
#include"hash_str.h" #include"hash_str.h"
#include"util.h" #include"util.h"
cell *cell_new(char *key, char *val, cell *next) cell *str_cell_new(char *key, char *val, cell *next)
{ {
cell *c = (cell *)memalloc(sizeof(cell)); cell *c = (cell *)memalloc(sizeof(cell));
c->val = val; c->val = val;
...@@ -13,10 +13,10 @@ cell *cell_new(char *key, char *val, cell *next) ...@@ -13,10 +13,10 @@ cell *cell_new(char *key, char *val, cell *next)
return c; return c;
} }
void cell_free(cell *c) void str_cell_free(cell *c)
{ {
if(c == NULL) return; if(c == NULL) return;
cell_free(c->next); str_cell_free(c->next);
free(c->key); free(c->key);
free(c->val); free(c->val);
free(c); free(c);
...@@ -39,7 +39,7 @@ void hash_str_free(hash_str *h) ...@@ -39,7 +39,7 @@ void hash_str_free(hash_str *h)
{ {
int i; int i;
for(i=0; i < h->size; i++) for(i=0; i < h->size; i++)
cell_free(h->array[i]); str_cell_free(h->array[i]);
free(h); free(h);
} }
...@@ -84,14 +84,14 @@ void hash_str_add(hash_str *h, char *key, char *val) ...@@ -84,14 +84,14 @@ void hash_str_add(hash_str *h, char *key, char *val)
if(hash_str_lookup(h, key)) return; if(hash_str_lookup(h, key)) return;
index = hash_str_func(key, h->size); index = hash_str_func(key, h->size);
h->array[index] = cell_new(key, val, h->array[index]); h->array[index] = str_cell_new(key, val, h->array[index]);
h->nbelem++; h->nbelem++;
} }
int cell_nb(cell *c) int str_cell_nb(cell *c)
{ {
if(c == NULL) return 0; if(c == NULL) return 0;
return 1 + cell_nb(c->next); return 1 + str_cell_nb(c->next);
} }
void hash_str_stats(hash_str *h) void hash_str_stats(hash_str *h)
...@@ -102,14 +102,14 @@ void hash_str_stats(hash_str *h) ...@@ -102,14 +102,14 @@ void hash_str_stats(hash_str *h)
int nb; int nb;
for(i=0; i < h->size; i++) for(i=0; i < h->size; i++)
if((l = cell_nb(h->array[i])) > max) if((l = str_cell_nb(h->array[i])) > max)
max = l; max = l;
nb = max + 1; nb = max + 1;
table = (int *)memalloc(nb * sizeof(int)); table = (int *)memalloc(nb * sizeof(int));
for(i=0; i < nb; i++) for(i=0; i < nb; i++)
table[i] = 0; table[i] = 0;
for(i=0; i < h->size; i++) for(i=0; i < h->size; i++)
table[cell_nb(h->array[i])]++; table[str_cell_nb(h->array[i])]++;
for(i=0; i < nb; i++) for(i=0; i < nb; i++)
printf("%d %d\n", i, table[i]); printf("%d %d\n", i, table[i]);
......
...@@ -18,8 +18,8 @@ typedef struct ...@@ -18,8 +18,8 @@ typedef struct
} hash_str; } hash_str;
cell *cell_new(char *key, char *val, cell *next); cell *std_cell_new(char *key, char *val, cell *next);
void cell_free(cell *c); void std_cell_free(cell *c);
hash_str *hash_str_new(int size); hash_str *hash_str_new(int size);
void hash_str_free(hash_str *h); void hash_str_free(hash_str *h);
......
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<getopt.h>
#include"l_rule.h"
#include"util.h"
#include"dico.h"
typedef struct {
int help;
int verbose;
int debug_mode;
int strict_mode;
int threshold;
char *program_name;
char *fplm_filename;
char *l_rules_filename;
char *exceptions_filename;
} context;
void context_free(context *ctx)
{
if(ctx){
if(ctx->program_name)
free(ctx->program_name);
if(ctx->fplm_filename)
free(ctx->fplm_filename);
free(ctx);
}
}
context *context_new(void)
{
context *ctx = (context *)memalloc(sizeof(context));
ctx->help = 0;
ctx->verbose = 0;
ctx->debug_mode = 0;
ctx->threshold = 100;
ctx->strict_mode = 0;
ctx->program_name = NULL;
ctx->fplm_filename = NULL;
ctx->l_rules_filename = NULL;
ctx->exceptions_filename = NULL;
return ctx;
}
void help_message(context *ctx)
{
fprintf(stderr, "usage: %s [options]\n", ctx->program_name);
fprintf(stderr, "Options:\n");
fprintf(stderr, "\t-h --help : print this message\n");
fprintf(stderr, "\t-v --verbose : activate verbose mode\n");
fprintf(stderr, "\t-f --fplm <filename> : fplm filename\n");
fprintf(stderr, "\t-s --strict : generate strict l_rules\n");
fprintf(stderr, "\t-t --threshold <int> : threshold\n");
fprintf(stderr, "\t-r --l_rules <filename> : file to stock l_rules\n");
fprintf(stderr, "\t-e --exceptions <filename> : exceptions filename (fplm format)\n");
}
void check_options(context *ctx){
if((ctx->help)
|| !ctx->fplm_filename
|| !ctx->l_rules_filename
|| !ctx->exceptions_filename
)
{
help_message(ctx);
exit(1);
}
}
context *context_read_options(int argc, char *argv[])
{
int c;
int option_index = 0;
context *ctx = context_new();
ctx->program_name = strdup(argv[0]);
static struct option long_options[8] =
{
{"help", no_argument, 0, 'h'},
{"verbose", no_argument, 0, 'v'},
{"debug", no_argument, 0, 'd'},
{"strict", no_argument, 0, 's'},
{"fplm", required_argument, 0, 'f'},
{"threshold", required_argument, 0, 't'},
{"l_rules", required_argument, 0, 'r'},
{"exceptions", required_argument, 0, 'e'},
};
optind = 0;
opterr = 0;
while ((c = getopt_long (argc, argv, "hvdsf:t:r:e:", long_options, &option_index)) != -1){
switch (c)
{
case 'h':
ctx->help = 1;
break;
case 'v':
ctx->verbose = 1;
break;
case 'd':
ctx->debug_mode = 1;
break;
case 's':
ctx->strict_mode = 1;
break;
case 'f':
ctx->fplm_filename = strdup(optarg);
break;
case 'r':
ctx->l_rules_filename = strdup(optarg);
break;
case 'e':
ctx->exceptions_filename = strdup(optarg);
break;
case 't':
ctx->threshold = atoi(optarg);
break;
}
}
return ctx;
}
int main(int argc, char *argv[])
{
context *ctx = context_read_options(argc, argv);
int i;
cell *c;
char form[100];
char pos[100];
char lemma[100];
char morpho[100];
FILE *F_fplm = NULL;
FILE *F_exceptions = NULL;
char *l_rule;
char buffer[1000];
dico *d_rules = dico_new((char *)"d_rules", 100);
hash *h_rules = hash_new(10000);
check_options(ctx);
F_fplm = myfopen(ctx->fplm_filename, "r");
while(fgets(buffer, 1000, F_fplm)){
if(feof(F_fplm))
break;
/* printf("%s", buffer); */
buffer[strlen(buffer) - 1] = '\0';
sscanf(buffer, "%[^\t]\t%[^\t]\t%[^\t]\t%[^\n]\n", form, pos, lemma, morpho);
l_rule = compute_l_rule(lemma, form, ctx->strict_mode);
// char *new_lemma = apply_l_rule(form, l_rule);
// printf("%s\t%s\t%s=%s\t%s\t%s\n", form, pos, lemma, new_lemma, morpho, l_rule);
// printf("%s\t%s\t%s\t%s\t%s\n", form, pos, lemma, morpho, l_rule);
hash_inc_val(h_rules, l_rule, 1);
//free(new_lemma);
free(l_rule);
}
fclose(F_fplm);
for(i=0; i < h_rules->size; i++){
for(c = h_rules->array[i]; c; c = c->next)
if(c->val >= ctx->threshold){
dico_add(d_rules, c->key);
}
}
F_fplm = myfopen(ctx->fplm_filename, "r");
F_exceptions = myfopen(ctx->exceptions_filename, "w");
while(fgets(buffer, 1000, F_fplm)){
if(feof(F_fplm))
break;
/* printf("%s", buffer); */
buffer[strlen(buffer) - 1] = '\0';
sscanf(buffer, "%[^\t]\t%[^\t]\t%[^\t]\t%[^\n]\n", form, pos, lemma, morpho);
l_rule = compute_l_rule(lemma, form, ctx->strict_mode);
// if((dico_string2int(d_rules, l_rule) == -1) && (strcmp(form, lemma)))
if((dico_string2int(d_rules, l_rule) == -1))
fprintf(F_exceptions, "%s\t%s\t%s\t%s\t%s\n", form, pos, lemma, morpho, l_rule);
// fprintf(F_exceptions, "%s\t%s\t%s\t%s\n", form, pos, lemma, morpho);
free(l_rule);
}
fclose(F_fplm);
fclose(F_exceptions);
dico_print(ctx->l_rules_filename, d_rules);
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment