code refactoring in mcd.h

06d939af · Alexis Nasr · 67ee4673 · 06d939af
Commit 06d939af authored 8 years ago by Alexis Nasr
--- a/maca_common/include/mcd.h
+++ b/maca_common/include/mcd.h
@@ -96,20 +96,20 @@
 /* mcd (multi column description) files describe the format of corpus files */
 /* every line of an mcd file describes the content of a column of the corpus file */
 /* every line contains four fields separated by a space character */
-/* first field is the index of the column described (first column corresponds to index zero) */
+/* first field is the index of the column described (first column corresponds to index one) */
-/* second field is the name of the column. Such must be taken from the following list: */
+/* second field is the name of the column. Such name must be taken from the following list: */
 /* INDEX, FORM, LEMMA, CPOS, POS, FEAT, LABEL, STAG, INT, GOV, A ... Z */
-/* third field correspond to the internal representation of the tokens found in the column described. Four values are possible : */
+/* third field corresponds to the internal representation of the tokens found in the column described. Four values are possible : */
 /* VOCAB if the internal representation is an integer code corresponding to the token */
 /* INT if the token is already an integer and its corresponding internal value is the same integer */
-/* EMB if the internal representation of the token is a real valued vector. */
+/* EMB if the internal representation of the token is a real valued vector (an embedding). */
 /* _   if no internal representation is associated to the field */
 /* fourth field is the name of a file in which the encoding is represented, this file can either be a dico (see dico.h) format file or an embedding file (see word_emb.h)*/
 typedef struct {
  int nb_col;                 /* number of columns in the mcd file */
-  int wf2col[MCD_WF_NB];      /* in which column are the word features (MCD_WF_FORM, MCD_WF_LEMMA ...) represented */ 
+  int wf2col[MCD_WF_NB];      /* in which column are the word features (MCD_WF_FORM, MCD_WF_LEMMA ... MCD_WF_A ... MCD_WF_Z) represented */ 
-  int *wf;                    /* array containing the word feature that correspond to each column */
+  int *wf;                    /* array containing the word feature that corresponds to each column */
  char **wf_str;              /* a string version of array word feature */
  int *representation;        /* array containing the representation mode of every column (integer, vocabulary, embedding, NULL) */
  char **filename;            /* array containing the file in which the different values for a columnn is represented */