Commit d1b2e386 authored by Alexis Nasr's avatar Alexis Nasr
Browse files

added a verbose mode to maca_check_projectivity

parent 7af41f41
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<unistd.h>
#include<getopt.h>
#include"context.h"
#include"feat_vec.h"
#include"dico_vec.h"
#include"word_emb.h"
#include"config2feat_vec.h"
void maca_check_projectivity_help_message(context *ctx)
{
context_general_help_message(ctx);
context_mode_help_message(ctx);
context_sent_nb_help_message(ctx);
fprintf(stderr, "INPUT\n");
context_conll_help_message(ctx);
fprintf(stderr, "IN TEST MODE\n");
context_vocabs_help_message(ctx);
fprintf(stderr, "OUTPUT\n");
context_cff_help_message(ctx);
fprintf(stderr, "IN TRAIN MODE\n");
context_vocabs_help_message(ctx);
context_root_label_help_message(ctx);
}
void maca_check_projectivity_check_options(context *ctx)
{
if(!ctx->input_filename
|| ctx->help
/* || !ctx->mcd_filename */
/* || !(ctx->cff_filename || ctx->fann_filename) */
){
maca_check_projectivity_help_message(ctx);
exit(1);
}
}
int main(int argc, char *argv[])
{
context *ctx;
word_buffer *wb = NULL;
word *w;
int dep_index, gov_index, min_index, max_index, w_index;
int word_nb = 0;
int word_non_proj = 0;
int *non_proj_array = NULL;
dico *dico_labels;
ctx = context_read_options(argc, argv);
maca_check_projectivity_check_options(ctx);
mcd_extract_dico_from_corpus(ctx->mcd_struct, ctx->input_filename);
ctx->vocabs = mcd_build_dico_vec(ctx->mcd_struct);
dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL");
// dico_labels = mcd_get_dico_label(ctx->mcd_struct);
non_proj_array = (int *)malloc(dico_labels->nbelem * sizeof(int));
for(int i = 0; i < dico_labels->nbelem; i++){
non_proj_array[i] = 0;
}
wb = word_buffer_load_mcf(ctx->input_filename, ctx->mcd_struct);
while(!word_buffer_end(wb)){
dep_index = word_get_index(word_buffer_b0(wb));
// printf("dep_index = %d\n", dep_index);
gov_index = word_get_gov(word_buffer_b0(wb)) + dep_index;
if(gov_index < dep_index){
min_index = gov_index;
max_index = dep_index;
}
else{
min_index = dep_index;
max_index = gov_index;
}
for(w_index = min_index + 1; w_index < max_index; w_index++){
w = word_buffer_get_word_n(wb, w_index);
if(!((word_get_gov(w) + w_index <= max_index) && (word_get_gov(w) + w_index >= min_index))){
word_non_proj++;
// non_proj_array[word_get_label(word_buffer_b0(wb))]++;
// printf("NON PROJ label = %d\n", word_get_label(word_buffer_b0(wb)));
non_proj_array[word_get_label(word_buffer_b0(wb))]++;
break;
}
}
word_buffer_move_right(wb);
word_nb++;
}
if(ctx->verbose){
for(int i = 0; i < dico_labels->nbelem; i++){
printf("%d\t%s\n", non_proj_array[i], dico_int2string(dico_labels, i));
}
}
printf("number of dependencies = %d\n", word_nb);
printf("number of non proj dependencies = %d\n", word_non_proj);
printf("non projectivity ratio = %.2f\n", (float) word_non_proj / word_nb);
context_free(ctx);
return 0;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment