Skip to content
Snippets Groups Projects
Commit ad724099 authored by Alexis Nasr's avatar Alexis Nasr
Browse files

reconciling with master

parents d1795272 d1b2e386
Branches
No related tags found
No related merge requests found
...@@ -49,6 +49,13 @@ target_link_libraries(maca_trans_attach_punct transparse) ...@@ -49,6 +49,13 @@ target_link_libraries(maca_trans_attach_punct transparse)
target_link_libraries(maca_trans_attach_punct maca_common) target_link_libraries(maca_trans_attach_punct maca_common)
install (TARGETS maca_trans_attach_punct DESTINATION bin) install (TARGETS maca_trans_attach_punct DESTINATION bin)
add_executable(maca_check_projectivity ./src/maca_check_projectivity.c)
target_link_libraries(maca_check_projectivity perceptron)
target_link_libraries(maca_check_projectivity transparse)
target_link_libraries(maca_check_projectivity maca_common)
install (TARGETS maca_check_projectivity DESTINATION bin)
add_executable(maca_trans_lemmatizer_mcf2cff ./src/maca_trans_lemmatizer_mcf2cff.c) add_executable(maca_trans_lemmatizer_mcf2cff ./src/maca_trans_lemmatizer_mcf2cff.c)
target_link_libraries(maca_trans_lemmatizer_mcf2cff perceptron) target_link_libraries(maca_trans_lemmatizer_mcf2cff perceptron)
target_link_libraries(maca_trans_lemmatizer_mcf2cff transparse) target_link_libraries(maca_trans_lemmatizer_mcf2cff transparse)
......
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<unistd.h>
#include<getopt.h>
#include"context.h"
#include"feat_vec.h"
#include"dico_vec.h"
#include"word_emb.h"
#include"config2feat_vec.h"
void maca_check_projectivity_help_message(context *ctx)
{
context_general_help_message(ctx);
context_mode_help_message(ctx);
context_sent_nb_help_message(ctx);
fprintf(stderr, "INPUT\n");
context_conll_help_message(ctx);
fprintf(stderr, "IN TEST MODE\n");
context_vocabs_help_message(ctx);
fprintf(stderr, "OUTPUT\n");
context_cff_help_message(ctx);
fprintf(stderr, "IN TRAIN MODE\n");
context_vocabs_help_message(ctx);
context_root_label_help_message(ctx);
}
void maca_check_projectivity_check_options(context *ctx)
{
if(!ctx->input_filename
|| ctx->help
/* || !ctx->mcd_filename */
/* || !(ctx->cff_filename || ctx->fann_filename) */
){
maca_check_projectivity_help_message(ctx);
exit(1);
}
}
int main(int argc, char *argv[])
{
context *ctx;
word_buffer *wb = NULL;
word *w;
int dep_index, gov_index, min_index, max_index, w_index;
int word_nb = 0;
int word_non_proj = 0;
int *non_proj_array = NULL;
dico *dico_labels;
ctx = context_read_options(argc, argv);
maca_check_projectivity_check_options(ctx);
mcd_extract_dico_from_corpus(ctx->mcd_struct, ctx->input_filename);
ctx->vocabs = mcd_build_dico_vec(ctx->mcd_struct);
dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL");
// dico_labels = mcd_get_dico_label(ctx->mcd_struct);
non_proj_array = (int *)malloc(dico_labels->nbelem * sizeof(int));
for(int i = 0; i < dico_labels->nbelem; i++){
non_proj_array[i] = 0;
}
wb = word_buffer_load_mcf(ctx->input_filename, ctx->mcd_struct);
while(!word_buffer_end(wb)){
dep_index = word_get_index(word_buffer_b0(wb));
// printf("dep_index = %d\n", dep_index);
gov_index = word_get_gov(word_buffer_b0(wb)) + dep_index;
if(gov_index < dep_index){
min_index = gov_index;
max_index = dep_index;
}
else{
min_index = dep_index;
max_index = gov_index;
}
for(w_index = min_index + 1; w_index < max_index; w_index++){
w = word_buffer_get_word_n(wb, w_index);
if(!((word_get_gov(w) + w_index <= max_index) && (word_get_gov(w) + w_index >= min_index))){
word_non_proj++;
// non_proj_array[word_get_label(word_buffer_b0(wb))]++;
// printf("NON PROJ label = %d\n", word_get_label(word_buffer_b0(wb)));
non_proj_array[word_get_label(word_buffer_b0(wb))]++;
break;
}
}
word_buffer_move_right(wb);
word_nb++;
}
if(ctx->verbose){
for(int i = 0; i < dico_labels->nbelem; i++){
printf("%d\t%s\n", non_proj_array[i], dico_int2string(dico_labels, i));
}
}
printf("number of dependencies = %d\n", word_nb);
printf("number of non proj dependencies = %d\n", word_non_proj);
printf("non projectivity ratio = %.2f\n", (float) word_non_proj / word_nb);
context_free(ctx);
return 0;
}
...@@ -85,8 +85,10 @@ int movement_ignore(config *c, int movement_code) ...@@ -85,8 +85,10 @@ int movement_ignore(config *c, int movement_code)
{ {
if(word_buffer_end(config_get_buffer(c))) return 0; if(word_buffer_end(config_get_buffer(c))) return 0;
word *b0 = word_buffer_b0(config_get_buffer(c)); word *b0 = word_buffer_b0(config_get_buffer(c));
word_set_gov(b0, WORD_INVALID_GOV); word_set_gov(b0, WORD_INVALID_GOV);
word_set_label(b0, -1); word_set_label(b0, -1);
config_push_mvt(c, movement_code, b0, NULL); config_push_mvt(c, movement_code, b0, NULL);
word_buffer_move_right(config_get_buffer(c)); word_buffer_move_right(config_get_buffer(c));
// fprintf(stderr, "IGNORE\n"); // fprintf(stderr, "IGNORE\n");
......
...@@ -51,12 +51,26 @@ int oracle_parser_arc_eager(config *c, word_buffer *ref, int root_label, int pun ...@@ -51,12 +51,26 @@ int oracle_parser_arc_eager(config *c, word_buffer *ref, int root_label, int pun
/* if(!stack_is_empty(config_get_stack(c)) && !word_buffer_is_empty(config_get_buffer(c))){ */ /* if(!stack_is_empty(config_get_stack(c)) && !word_buffer_is_empty(config_get_buffer(c))){ */
b0 = word_buffer_b0(config_get_buffer(c)); b0 = word_buffer_b0(config_get_buffer(c));
b0_index = word_get_index(b0); b0_index = word_get_index(b0);
b0_gov_index = word_get_gov_index(word_buffer_get_word_n(ref, b0_index)); b0_gov_index = word_get_gov_index(word_buffer_get_word_n(ref, b0_index));
b0_label = word_get_label(word_buffer_get_word_n(ref, b0_index)); b0_label = word_get_label(word_buffer_get_word_n(ref, b0_index));
/* printf("s0_index = %d b0_index = %d\n", s0_index, b0_index);
printf("dans ref gov de s0 (%d) = %d\n", s0_index, s0_gov_index);
printf("dans ref gov de b0 (%d) = %d\n", b0_index, b0_gov_index);*/
/* s0 is the root of the sentence */
if((s0_label == root_label)
// && (word_get_label(word_buffer_get_word_n(config_get_buffer(c), s0_index)) != root_label)
&& check_all_dependents_of_word_in_ref_are_in_hyp(c, ref, s0_index)
){
return MVT_PARSER_ROOT;
}
/* word in front of the buffer is an end of sentence marker */ /* word in front of the buffer is an end of sentence marker */
if(word_get_sent_seg(word_buffer_get_word_n(ref, b0_index)) == 1) return MVT_PARSER_EOS; if(word_get_sent_seg(word_buffer_get_word_n(ref, b0_index)) == 1) return MVT_PARSER_EOS;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment