/* mcd (multi column description) files describe the format of corpus files */
/* mcd (multi column description) files describe the format of corpus files */
/* every line of an mcd file describes the content of a column of the corpus file */
/* every line of an mcd file describes the content of a column of the corpus file */
/* every line contains four fields separated by a space character */
/* every line contains four fields separated by a space character */
/* first field is the index of the column described (first column corresponds to index zero) */
/* first field is the index of the column described (first column corresponds to index one) */
/* second field is the name of the column. Such must be taken from the following list: */
/* second field is the name of the column. Such name must be taken from the following list: */
/* INDEX, FORM, LEMMA, CPOS, POS, FEAT, LABEL, STAG, INT, GOV, A ... Z */
/* INDEX, FORM, LEMMA, CPOS, POS, FEAT, LABEL, STAG, INT, GOV, A ... Z */
/* third field correspond to the internal representation of the tokens found in the column described. Four values are possible : */
/* third field corresponds to the internal representation of the tokens found in the column described. Four values are possible : */
/* VOCAB if the internal representation is an integer code corresponding to the token */
/* VOCAB if the internal representation is an integer code corresponding to the token */
/* INT if the token is already an integer and its corresponding internal value is the same integer */
/* INT if the token is already an integer and its corresponding internal value is the same integer */
/* EMB if the internal representation of the token is a real valued vector. */
/* EMB if the internal representation of the token is a real valued vector (an embedding). */
/* _ if no internal representation is associated to the field */
/* _ if no internal representation is associated to the field */
/* fourth field is the name of a file in which the encoding is represented, this file can either be a dico (see dico.h) format file or an embedding file (see word_emb.h)*/
/* fourth field is the name of a file in which the encoding is represented, this file can either be a dico (see dico.h) format file or an embedding file (see word_emb.h)*/
typedefstruct{
typedefstruct{
intnb_col;/* number of columns in the mcd file */
intnb_col;/* number of columns in the mcd file */
intwf2col[MCD_WF_NB];/* in which column are the word features (MCD_WF_FORM, MCD_WF_LEMMA ...) represented */
intwf2col[MCD_WF_NB];/* in which column are the word features (MCD_WF_FORM, MCD_WF_LEMMA ... MCD_WF_A ... MCD_WF_Z) represented */
int*wf;/* array containing the word feature that correspond to each column */
int*wf;/* array containing the word feature that corresponds to each column */
char**wf_str;/* a string version of array word feature */
char**wf_str;/* a string version of array word feature */
int*representation;/* array containing the representation mode of every column (integer, vocabulary, embedding, NULL) */
int*representation;/* array containing the representation mode of every column (integer, vocabulary, embedding, NULL) */
char**filename;/* array containing the file in which the different values for a columnn is represented */
char**filename;/* array containing the file in which the different values for a columnn is represented */