Skip to content
Snippets Groups Projects
Commit 5c81f3ea authored by Alexis Nasr's avatar Alexis Nasr
Browse files
parents 77550dc6 8dbc7fef
No related branches found
No related tags found
No related merge requests found
......@@ -6,6 +6,12 @@ target_link_libraries(mcf2conll transparse)
target_link_libraries(mcf2conll maca_common)
install (TARGETS mcf2conll DESTINATION bin)
add_executable(mcf2orfeo ./src/mcf2orfeo.c)
target_link_libraries(mcf2orfeo perceptron)
target_link_libraries(mcf2orfeo transparse)
target_link_libraries(mcf2orfeo maca_common)
install (TARGETS mcf2orfeo DESTINATION bin)
add_executable(maca_compute_l_rules ./src/maca_compute_l_rules.c)
target_link_libraries(maca_compute_l_rules maca_common)
install (TARGETS maca_compute_l_rules DESTINATION bin)
......
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<getopt.h>
#include"mcd.h"
#include"util.h"
#include"word_buffer.h"
typedef struct {
int help;
int verbose;
int debug_mode;
char *program_name;
char *conll_filename;
char *mcf_filename;
char *mcd_filename;
mcd *mcd_struct;
} context;
void context_free(context *ctx)
{
if(ctx){
if(ctx->program_name)
free(ctx->program_name);
if(ctx->conll_filename)
free(ctx->conll_filename);
if(ctx->mcf_filename)
free(ctx->mcf_filename);
if(ctx->mcd_filename)
free(ctx->mcd_filename);
if(ctx->mcd_struct)
mcd_free(ctx->mcd_struct);
free(ctx);
}
}
context *context_new(void)
{
context *ctx = (context *)memalloc(sizeof(context));
ctx->help = 0;
ctx->verbose = 0;
ctx->debug_mode = 0;
ctx->program_name = NULL;
ctx->conll_filename = NULL;
ctx->mcf_filename = NULL;
ctx->mcd_filename = NULL;
ctx->mcd_struct = NULL;
return ctx;
}
void context_general_help_message(context *ctx)
{
fprintf(stderr, "usage: %s [options]\n", ctx->program_name);
fprintf(stderr, "Options:\n");
fprintf(stderr, "\t-h --help : print this message\n");
fprintf(stderr, "\t-v --verbose : activate verbose mode\n");
fprintf(stderr, "\t-C --mcd : mcd filename\n");
fprintf(stderr, "\t-i --mcf : mcf filename (read from stdin if absent)\n");
fprintf(stderr, "\t-o --conll : conll filename (write to stdout if absent)\n");
}
void mcf2conll_check_options(context *ctx){
if(ctx->help){
context_general_help_message(ctx);
exit(1);
}
}
context *context_read_options(int argc, char *argv[])
{
int c;
int option_index = 0;
context *ctx = context_new();
ctx->program_name = strdup(argv[0]);
static struct option long_options[6] =
{
{"help", no_argument, 0, 'h'},
{"verbose", no_argument, 0, 'v'},
{"debug", no_argument, 0, 'd'},
{"conll", required_argument, 0, 'o'},
{"mcd", required_argument, 0, 'C'},
{"mcf", required_argument, 0, 'i'},
};
optind = 0;
opterr = 0;
while ((c = getopt_long (argc, argv, "hvdo:C:i:", long_options, &option_index)) != -1){
switch (c)
{
case 'd':
ctx->debug_mode = 1;
break;
case 'h':
ctx->help = 1;
break;
case 'v':
ctx->verbose = 1;
break;
case 'o':
ctx->conll_filename = strdup(optarg);
break;
case 'i':
ctx->mcf_filename = strdup(optarg);
break;
case 'C':
ctx->mcd_filename = strdup(optarg);
break;
}
}
if(ctx->mcd_filename){
ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->verbose);
}
else{
ctx->mcd_struct = mcd_build_wpmlgfs();
}
return ctx;
}
int main(int argc, char *argv[])
{
FILE *output_file;
context *ctx = context_read_options(argc, argv);
mcf2conll_check_options(ctx);
word_buffer *wb = word_buffer_load_mcf(ctx->mcf_filename, ctx->mcd_struct);
word *w = NULL;
int form_col = mcd_get_form_col(ctx->mcd_struct);
int pos_col = mcd_get_pos_col(ctx->mcd_struct);
int cpos_col = mcd_get_cpos_col(ctx->mcd_struct);
int lemma_col = mcd_get_lemma_col(ctx->mcd_struct);
int gov_col = mcd_get_gov_col(ctx->mcd_struct);
int label_col = mcd_get_label_col(ctx->mcd_struct);
int feats_col = mcd_get_feats_col(ctx->mcd_struct);
int sent_seg_col = mcd_get_sent_seg_col(ctx->mcd_struct);
int spkr_col = mcd_get_a_col(ctx->mcd_struct);
int start_col = mcd_get_b_col(ctx->mcd_struct);
int end_col = mcd_get_c_col(ctx->mcd_struct);
int index = 1;
output_file = (ctx->conll_filename)? myfopen_no_exit(ctx->conll_filename, "w"): stdout;
do{
w = word_buffer_b0(wb);
if(w){
fprintf(output_file, "%d\t", index);
if(form_col != -1)
word_print_col_n(output_file, w, form_col);
else
fprintf(output_file, "_");
fprintf(output_file, "\t");
if(lemma_col != -1)
word_print_col_n(output_file, w, lemma_col);
else
fprintf(output_file, "_");
fprintf(output_file, "\t");
/*
if(cpos_col != -1)
word_print_col_n(output_file, w, cpos_col);
else
fprintf(output_file, "_");
fprintf(output_file, "\t");
*/
if(pos_col != -1)
word_print_col_n(output_file, w, pos_col);
else
fprintf(output_file, "_");
fprintf(output_file, "\t");
if(pos_col != -1)
word_print_col_n(output_file, w, pos_col);
else
fprintf(output_file, "_");
fprintf(output_file, "\t");
/*
if(feats_col != -1)
word_print_col_n(output_file, w, feats_col);
else
fprintf(output_file, "_");
fprintf(output_file, "\t");
*/
fprintf(output_file, "_");
fprintf(output_file, "\t");
if(gov_col){
if((word_get_gov(w) == 0) || ((word_get_gov(w) + index) < 0))
fprintf(output_file, "0\t");
else
fprintf(output_file, "%d\t", word_get_gov(w) + index);
}
else
fprintf(output_file, "_\t");
if(label_col != -1)
word_print_col_n(output_file, w, label_col);
else
fprintf(output_file, "_");
fprintf(output_file, "\t");
fprintf(output_file, "_");
fprintf(output_file, "\t");
fprintf(output_file, "_");
fprintf(output_file, "\t");
if(start_col != -1)
word_print_col_n(output_file, w, start_col);
else
fprintf(output_file, "_");
fprintf(output_file, "\t");
if(end_col != -1)
word_print_col_n(output_file, w, end_col);
else
fprintf(output_file, "_");
fprintf(output_file, "\t");
if(spkr_col != -1)
word_print_col_n(output_file, w, spkr_col);
else
fprintf(output_file, "_");
fprintf(output_file, "\t");
/* fprintf(output_file, "\t_\t\n"); */
/* fprintf(output_file, "_\t_\n"); */
fprintf(output_file, "\n");
if((sent_seg_col) && (word_get_sent_seg(w))){
fprintf(output_file, "\n");
index = 0;
}
index ++;
}
} while(word_buffer_move_right(wb));
if (wb->input_file != stdin)
fclose(wb->input_file);
word_buffer_free(wb);
if(ctx->conll_filename)
fclose(output_file);
context_free(ctx);
return 0;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment