Skip to content
Snippets Groups Projects
Select Git revision
  • b2d601a28bdd87a340b56b2b4fe5b8de42557234
  • master default protected
  • johannes
  • partial_parser
  • Aloui_Dary
  • ignore_punct
  • AC
  • classifier
  • fixhelp
  • libmacaon2
  • error_predictor
  • morpho
  • ssrnn
  • tfparsing
  • silvio
  • tagger_options
  • maca_trans_frame_parser
  • alexis
  • new_config
  • tagparse
  • maca_graph_parser
21 results

maca_trans_parser_export.cc

Blame
  • maca_trans_parser_export.cc 9.56 KiB
    #include <cstdio>
    #include <cstring>
    
    
    //#ifdef __cplusplus
    //extern "C"{
    //#endif 
    
    #include "simple_decoder_parser_arc_eager.h"
    #include "movement_parser_arc_eager.h"
    #include "feat_fct.h"
    #include "config2feat_vec.h"
    
    #include "feature_table.h"
    #include "dico.h"
    
    //#ifdef __cplusplus
    //}
    //#endif
    
    #include "maca_trans_parser_export.h"
    
    /** initialises class variables
        resultstring: which keeps last result
        ctx: current context
     */
    MacaonTransParser::MacaonTransParser(char *lg, char *mcd) {
        resultstring = NULL;
        initOK = 1;
        char * argv[] = { (char *)"initParser",
    		      (char *)"-L", lg,
    		      (char *)"-C", mcd,
    	0
        };
    
        ctx = context_read_options(5, argv);
    
    
        set_linguistic_resources_filenames_parser(ctx);
        ctx->features_model = feat_model_read(ctx->features_model_filename, feat_lib_build(), ctx->verbose);
        ctx->vocabs = dico_vec_read(ctx->vocabs_filename, ctx->hash_ratio);
    
        mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose);
        ctx->dico_labels = dico_vec_get_dico(ctx->vocabs, (char *)"LABEL");
    
        if(ctx->dico_labels == NULL){
    	fprintf(stderr, "cannot find label names\n");
    	initOK = 0;
        }
    
        ctx->mvt_nb = ctx->dico_labels->nbelem * 2 + 3;
    
        /* load models */
        ctx->d_perceptron_features = dico_vec_get_dico(ctx->vocabs, (char *)"d_perceptron_features");
    }
    
    MacaonTransParser::~MacaonTransParser() {
        if (resultstring != NULL) {
    	free(resultstring);
    	resultstring = NULL;
        }
        context_free(ctx);
    }
    
    
    const char *MacaonTransParser::parsemcf(const char *mcf) {
        simple_decoder_parser_arc_eager_str(ctx, mcf);
        //printf("rrr %s\n", resultstring);
        //return "abcdef";
        return resultstring;
    }
    
    
    /** taken as is from maca_trans_parser.c, since this function is not in the
        libtransparse.a library */
    void MacaonTransParser::set_linguistic_resources_filenames_parser(context *ctx) {
        char absolute_path[500];
        char absolute_filename[500];
    
        absolute_path[0] = '\0';
    
        if(ctx->maca_data_path)
    	strcat(absolute_path, ctx->maca_data_path);
    
        if(!ctx->perc_model_filename){
    	strcpy(absolute_filename, absolute_path);
    	strcat(absolute_filename, DEFAULT_MODEL_FILENAME);
    	ctx->perc_model_filename = strdup(absolute_filename);
        }
    
        if(!ctx->vocabs_filename){
    	strcpy(absolute_filename, absolute_path);
    	strcat(absolute_filename, DEFAULT_VOCABS_FILENAME);
    	ctx->vocabs_filename = strdup(absolute_filename);
        }
    
        /*  if(!ctx->mcd_filename){
    	strcpy(absolute_filename, absolute_path);
    	strcat(absolute_filename, DEFAULT_MULTI_COL_DESC_FILENAME);
    	ctx->mcd_filename = strdup(absolute_filename);
    	}*/
    
        if(!ctx->features_model_filename){
    	strcpy(absolute_filename, absolute_path);
    	strcat(absolute_filename, DEFAULT_FEATURES_MODEL_FILENAME);
    	ctx->features_model_filename = strdup(absolute_filename);
        }
    
        if(ctx->verbose){
    	fprintf(stderr, "perc_model_filename = %s\n", ctx->perc_model_filename);
    	fprintf(stderr, "vocabs_filename = %s\n", ctx->vocabs_filename);
    	fprintf(stderr, "mcd_filename = %s\n", ctx->mcd_filename);
    	fprintf(stderr, "perc_features_model_filename = %s\n", ctx->features_model_filename);
        }
    }
    
    
    /** taken from simple_decode_parser_arc_eager.c and modified in order to
        taken an input string (in mcf format) which is read through a FILE * via fmemopen()
        instead reading a file or stdin.
        It writes the result to a FILE * opened with open_memstream() in order to get the result in a char *
     */
    void MacaonTransParser::simple_decoder_parser_arc_eager_str(context *ctx, const char *mcfString) {
        FILE *f = fmemopen ((void *)mcfString, strlen(mcfString), "r");
    
        feature_table *ft = feature_table_load(ctx->perc_model_filename, ctx->verbose);
        int root_label;
        int mvt_code;
        int mvt_type;
        int mvt_label;
        float max;
        feat_vec *fv = feat_vec_new(feature_types_nb);
        config *c = NULL;
        int result;
        /* float entropy; */
        /* float delta; */
        int argmax1, argmax2;
        float max1, max2;
        int index;
      
        root_label = dico_string2int(ctx->dico_labels, ctx->root_label);
        if(root_label == -1) root_label = 0;
      
        c = config_new(f, ctx->mcd_struct, 5);
        while(!config_is_terminal(c)){
    
    	if(ctx->debug_mode){
    	    fprintf(stdout, "***********************************\n");
    	    config_print(stdout, c);      
    	}	
    	/* forced EOS (the element on the top of the stack is eos, but the preceding movement is not MVT_PARSER_EOS */
    	/* which means that the top of the stack got its eos status from input */
    	/* force the parser to finish parsing the sentence (perform all pending reduce actions) and determine root of the sentence */ 
    
    	if((word_get_sent_seg(stack_top(config_get_stack(c))) == 1) && (mvt_get_type(mvt_stack_top(config_get_history(c))) != MVT_PARSER_EOS)){
    	    word_set_sent_seg(stack_top(config_get_stack(c)), -1);
    	    movement_parser_eos(c);
    	    while(movement_parser_reduce(c));
    	    while(movement_parser_root(c, root_label));
    	    if(ctx->debug_mode) printf("force EOS\n");
    	}
    
    	/* normal behavious, ask classifier what is the next movement to do and do it */
    	else{
    	    config2feat_vec_cff(ctx->features_model, c, ctx->d_perceptron_features, fv, LOOKUP_MODE);
    	    mvt_code = feature_table_argmax(fv, ft, &max);
    
    	    if(ctx->debug_mode){
    		vcode *vcode_array = feature_table_get_vcode_array(fv, ft);
    		for(int i=0; i < 3; i++){
    		    printf("%d\t", i);
    		    movement_parser_print(stdout, vcode_array[i].class_code, ctx->dico_labels);
    		    printf("\t%.4f\n", vcode_array[i].score);
    		}
    		free(vcode_array);
    	    }
          
    	    if(ctx->trace_mode){
    		index = word_get_index(word_buffer_b0(config_get_buffer(c)));
    		fprintf(stdout, "%d\t", index);
    	
    		stack_print(stdout, c->st);
    		fprintf(stdout, "\t");
    	
    		movement_parser_print(stdout, mvt_code, ctx->dico_labels);        
    		fprintf(stdout, "\t");
    		feature_table_argmax_1_2(fv, ft, &argmax1, &max1, &argmax2, &max2);
    		printf("%f\n", max1 - max2);
    	    }
    
    	    mvt_type = movement_parser_type(mvt_code);
    	    mvt_label = movement_parser_label(mvt_code);
                
    	    result = 0;
    	    switch(mvt_type){
    	    case MVT_PARSER_LEFT :
    		result = movement_parser_left_arc(c, mvt_label);
    		break;
    	    case MVT_PARSER_RIGHT:
    		result = movement_parser_right_arc(c, mvt_label);
    		break;
    	    case MVT_PARSER_REDUCE:
    		result = movement_parser_reduce(c);
    		break;
    	    case MVT_PARSER_ROOT:
    		result = movement_parser_root(c, root_label);
    		break;
    	    case MVT_PARSER_EOS:
    		result = movement_parser_eos(c);
    		break;
    	    case MVT_PARSER_SHIFT:
    		result = movement_parser_shift(c);
    	    }
          
    	    if(result == 0){
    		if(ctx->debug_mode) fprintf(stdout, "WARNING : movement cannot be executed doing a SHIFT instead !\n");
    		result = movement_parser_shift(c);
    		if(result == 0){ /* SHIFT failed no more words to read, let's get out of here ! */
    		    if(ctx->debug_mode) fprintf(stdout, "WARNING : cannot exectue a SHIFT emptying stack !\n");
    		    while(!stack_is_empty(config_get_stack(c)))
    			movement_parser_root(c, root_label);
    		}
    	    }
    	}
        }
      
        //if(!ctx->trace_mode) {
        size_t size;
        if (resultstring != NULL) {
    	free(resultstring);
    	resultstring = NULL;
        }
        FILE *outstream = open_memstream (&resultstring, &size);
        print_word_buffer_fp(c, ctx->dico_labels, ctx->mcd_struct, outstream);
        fclose(outstream);
    
        config_free(c); 
        feat_vec_free(fv);
        feature_table_free(ft);
        fclose(f);
    }
    
    
    /** taken from simple_decode_parser_arc_eager.c and modified in order to write to any FILE* not only stdout */
    void MacaonTransParser::print_word_buffer_fp(config *c, dico *dico_labels, mcd *mcd_struct, FILE *out) {
        int i;
        word *w;
        char *label;
        char *buffer = NULL;
        char *token = NULL;
        int col_nb = 0;
    
      
        for(i=0; i < config_get_buffer(c)->nbelem; i++){
    	w = word_buffer_get_word_n(config_get_buffer(c), i);
    
    	if((mcd_get_gov_col(mcd_struct) == -1)
    	   && (mcd_get_label_col(mcd_struct) == -1)
    	   && (mcd_get_sent_seg_col(mcd_struct) == -1)){
    	    fprintf(out, "%s\t", word_get_input(w));
    	    fprintf(out, "%d\t", word_get_gov(w));
    	    label = (word_get_label(w) == -1)? NULL : dico_int2string(dico_labels, word_get_label(w));
    	    if(label != NULL)
    		fprintf(out, "%s\t", label) ;
    	    else
    		fprintf(out, "_\t");
    	    if(word_get_sent_seg(w) == 1)
    		fprintf(out, "1\n") ;
    	    else
    		fprintf(out, "0\n");
    	}
    	else{
    	    buffer = strdup(w->input);
    	    token = strtok(buffer, "\t");
    	    col_nb = 0;
    	    while(token){
    		if(col_nb != 0) fprintf(out, "\t");
    		if(col_nb == mcd_get_gov_col(mcd_struct)){
    		    fprintf(out, "%d", word_get_gov(w));
    		}
    		else
    		    if(col_nb == mcd_get_label_col(mcd_struct)){
    			label = (word_get_label(w) == -1)? NULL : dico_int2string(dico_labels, word_get_label(w));
    			if(label != NULL)
    			    fprintf(out, "%s", label) ;
    			else
    			    fprintf(out, "_");
    		    }
    		    else
    			if(col_nb == mcd_get_sent_seg_col(mcd_struct)){
    			    if(word_get_sent_seg(w) == 1)
    				fprintf(out, "1") ;
    			    else
    				fprintf(out, "0");
    			}
    			else{
    			    word_print_col_n(out, w, col_nb);
    			}
    		col_nb++;
    		token = strtok(NULL, "\t");
    	    }
    	    if((col_nb <= mcd_get_gov_col(mcd_struct)) || (mcd_get_gov_col(mcd_struct) == -1)){
    		fprintf(out, "\t%d", word_get_gov(w));
    	    }
    	    if((col_nb <= mcd_get_label_col(mcd_struct)) || (mcd_get_label_col(mcd_struct) == -1)){
    		label = (word_get_label(w) == -1)? NULL : dico_int2string(dico_labels, word_get_label(w));
    		if(label != NULL)
    		    fprintf(out, "\t%s", label) ;
    		else
    		    fprintf(out, "\t_");
    	    }
    	    if((col_nb <= mcd_get_sent_seg_col(mcd_struct)) || (mcd_get_sent_seg_col(mcd_struct) == -1)){
    		if(word_get_sent_seg(w) == 1)
    		    fprintf(out, "\t1") ;
    		else
    		    fprintf(out, "\t0");
    	    }
    	    fprintf(out, "\n");
    	    free(buffer);
    	}
        }
    }