From 6b4c301612f718f6333483e24795d6d649e186f0 Mon Sep 17 00:00:00 2001
From: Marjorie Armando <marjorie.armando.1@etu.univ-amu.fr>
Date: Mon, 17 Apr 2017 23:17:30 +0200
Subject: [PATCH] generate train and test files, generate cff, predict test's
 forms' classes

---
 maca_morpho/src/fplm2cff_fct.c | 116 +++++++++++++++++++++++++++++++++
 1 file changed, 116 insertions(+)
 create mode 100644 maca_morpho/src/fplm2cff_fct.c

diff --git a/maca_morpho/src/fplm2cff_fct.c b/maca_morpho/src/fplm2cff_fct.c
new file mode 100644
index 0000000..b2b6e2a
--- /dev/null
+++ b/maca_morpho/src/fplm2cff_fct.c
@@ -0,0 +1,116 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "fplm2cff.h"
+
+void fplm2cff_help_message(context *ctx)
+{
+	context_general_help_message(ctx);
+	context_language_help_message(ctx);
+	context_fplm_help_message(ctx);
+	context_maca_data_path_help_message(ctx);
+	context_features_filename_help_message(ctx);
+	context_features_model_help_message(ctx);
+	context_class_help_message(ctx);
+	exit(1);
+}
+
+void create_cff(context* ctx)
+{
+	FILE* fplm = NULL;
+	FILE* cff = NULL;
+	feat_vec *fv = NULL;
+	dico *dico_features = NULL;
+	feat_model *fm = NULL;
+	int fields_nb;
+	char form[100];
+	char pos[50];
+	char lemma[100];
+	char morpho[50];
+	char classes_array[100];
+	CLASS class = choose_class(ctx->class_name);
+	
+	classes_array[0]='0';
+	classes_array[1]='\0';
+	fplm = fopen(ctx->fplm_filename,"r"); 
+	if(fplm == NULL)
+	{
+		fprintf(stderr,"Could not open input file.\nThe fplm file is in the Files directory.\n");
+		exit(EXIT_FAILURE);
+	}
+	fv = feat_vec_new(10);
+	dico_features = dico_new("dico_features", 1000);
+	fm = feat_model_read(ctx->fm_filename, feat_lib_build(), ctx->verbose);
+	cff = fopen("../../Files/cff.txt","w");
+	if(cff==NULL)
+	{
+		fprintf(stderr,"Problem with the cff file.\n");
+		exit(EXIT_FAILURE);
+	}
+	while((fields_nb = read_line_fplm(fplm, form, pos, lemma, morpho)) != -1)
+	{
+		if(fields_nb!=4)
+		{
+			if(1)
+			{
+				fprintf(stderr, "form = %s pos = %s lemma = %s morpho = %s\n", form, pos, lemma, morpho); 
+				fprintf(stderr, "incorrect fplm entry, skipping it\n");
+			}
+			continue;
+		}
+		write_cff(cff, class, classes_array, form, morpho, fv, fm, dico_features);
+	}
+	if(ctx->features_filename)
+		dico_print(ctx->features_filename, dico_features);
+
+	fclose(fplm);
+	fclose(cff);
+}
+
+void write_cff(FILE *cff, CLASS class, char* classes_array, char* form, char* morpho, feat_vec *fv, feat_model *fm, dico *dico_features)
+{
+	int i=0;
+	char morpho_feature[10];
+	extract_morpho_feature(class, morpho_feature, morpho);
+
+	/*write the class' number in cff (ASCII code of the morpho_feature)*/
+	if(morpho_feature[0] == '\0')
+	{
+		fprintf(cff,"0");
+		form2fv(form, fv, fm, dico_features, ADD_MODE);
+		feat_vec_print(cff, fv);
+		return;
+	}
+	if(class == GENDER || class == NUMBER)
+	{
+		fprintf(cff, "%d", associate_number_to_classes(classes_array, morpho_feature[i]));
+		form2fv(form, fv, fm, dico_features, ADD_MODE);
+		feat_vec_print(cff, fv);
+	}
+	else if(class == PERSON)
+	{
+		for(i=0; i<(int)strlen(morpho_feature); i++)
+		{
+			fprintf(cff,"%c", morpho_feature[i]);
+			/*because a word can have several persons in morpho so we have to write the
+			first class and the features' values, then the second class and the features' values, etc */
+			form2fv(form, fv, fm, dico_features, ADD_MODE); 
+			feat_vec_print(cff, fv);
+		}
+	}
+	else
+	{
+		//for example if tense is 'PS' the class' number will be the ASCII code of 'P' concatenate with the ASCII code of 'S'
+		FILE* tmp = fopen("tmp.txt","w+");
+		for(i=0; i<(int)strlen(morpho_feature); i++)
+			fprintf(tmp,"%d", morpho_feature[i]); 
+		rewind(tmp);
+		fscanf(tmp, "%d", &morpho_feature[0]);
+		fprintf(cff,"%d", associate_number_to_classes(classes_array, morpho_feature[0]));
+		form2fv(form, fv, fm, dico_features, ADD_MODE); 
+		feat_vec_print(cff, fv);		
+		fclose(tmp);
+		remove("tmp.txt");
+	}
+
+}
-- 
GitLab