#ifndef __WORD__
#define __WORD__

#include "mcd.h"
#include "char16.h"

#define WORD_INVALID_GOV 10000

typedef struct _word {
  int wf_array[MCD_WF_NB];      /* array containing the codes corresponding to the different word features */
  char *input;                  /* the string corresponding to the actual line in the corpus file */
  int U1;                       /* does the form begin with an uppercase character */
  int signature;                /* pos tags that this form can have (represented as a boolean string) */
  int label;
  char *form;
  char16 *form_char16;
  int index;
  int is_root;
} word;

/*
#define word_get_s1(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 1))? -1 : (w)->form[strlen((w)->form) - 1])
#define word_get_s2(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 2))? -1 : (w)->form[strlen((w)->form) - 2])
#define word_get_s3(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 3))? -1 : (w)->form[strlen((w)->form) - 3])
#define word_get_s4(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 4))? -1 : (w)->form[strlen((w)->form) - 4])
#define word_get_s5(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 5))? -1 : (w)->form[strlen((w)->form) - 5])
#define word_get_s6(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 6))? -1 : (w)->form[strlen((w)->form) - 6])
*/
#define word_get_s1(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 1))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 1])
#define word_get_s2(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 2))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 2])
#define word_get_s3(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 3))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 3])
#define word_get_s4(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 4))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 4])
#define word_get_s5(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 5))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 5])
#define word_get_s6(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 6))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 6])

/*#define word_get_p1(w) ((((w) == NULL) || ((w)->form == NULL)                           )? -1 : (w)->form[0])
#define word_get_p2(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 1))? -1 : (w)->form[1])
#define word_get_p3(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 2))? -1 : (w)->form[2])
#define word_get_p4(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 3))? -1 : (w)->form[3])
#define word_get_p5(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 4))? -1 : (w)->form[4])
#define word_get_p6(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 5))? -1 : (w)->form[5])
*/

#define word_get_p1(w) ((((w) == NULL) || ((w)->form_char16 == NULL)                           )? -1 : (w)->form_char16[0])
#define word_get_p2(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 1))? -1 : (w)->form_char16[1])
#define word_get_p3(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 2))? -1 : (w)->form_char16[2])
#define word_get_p4(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 3))? -1 : (w)->form_char16[3])
#define word_get_p5(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 4))? -1 : (w)->form_char16[4])
#define word_get_p6(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 5))? -1 : (w)->form_char16[5])

#define word_get_id(w)             (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_ID])
#define word_get_form(w)           (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_FORM])
#define word_get_lemma(w)          (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LEMMA])
#define word_get_cpos(w)           (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_CPOS])
#define word_get_pos(w)            (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_POS])
#define word_get_feats(w)          (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_FEATS])
#define word_get_gov(w)            (((w) == NULL) ?  0 : (w)->wf_array[MCD_WF_GOV])
#define word_get_label(w)          (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LABEL])
#define word_get_stag(w)           (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_STAG])
#define word_get_sent_seg(w)       (((w) == NULL) ?  0 : (w)->wf_array[MCD_WF_SENT_SEG])
#define word_get_A(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_A])
#define word_get_B(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_B])
#define word_get_C(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_C])
#define word_get_D(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_D])
#define word_get_E(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_E])
#define word_get_F(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_F])
#define word_get_G(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_G])
#define word_get_H(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_H])
#define word_get_I(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_I])
#define word_get_J(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_J])
#define word_get_K(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_K])
#define word_get_L(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_L])
#define word_get_M(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_M])
#define word_get_N(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_N])
#define word_get_O(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_O])
#define word_get_P(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_P])
#define word_get_Q(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_Q])
#define word_get_R(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_R])
#define word_get_S(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_S])
#define word_get_T(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_T])
#define word_get_U(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_U])
#define word_get_V(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_V])
#define word_get_W(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_W])
#define word_get_X(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_X])
#define word_get_Y(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_Y])
#define word_get_Z(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_Z])
#define word_get_input(w)          (((w) == NULL) ? NULL : (w)->input)
#define word_get_signature(w)      (((w) == NULL) ? -1 : (w)->signature)
#define word_get_U1(w)             (((w) == NULL) ? -1 : (w)->U1)
#define word_get_index(w)          (((w) == NULL) ? -1 : (w)->index)

#define word_set_id(w, val)        ((w)->wf_array[MCD_WF_ID] = (val))
#define word_set_form(w, val)      ((w)->wf_array[MCD_WF_FORM] = (val))
#define word_set_lemma(w, val)     ((w)->wf_array[MCD_WF_LEMMA] = (val))
#define word_set_cpos(w, val)      ((w)->wf_array[MCD_WF_CPOS] = (val))
#define word_set_pos(w, val)       ((w)->wf_array[MCD_WF_POS] = (val))
#define word_set_feats(w, val)     ((w)->wf_array[MCD_WF_FEATS] = (val))
#define word_set_gov(w, val)       ((w)->wf_array[MCD_WF_GOV] = (val))
#define word_set_label(w, val)     ((w)->wf_array[MCD_WF_LABEL] = (val))
#define word_set_stag(w, val)      ((w)->wf_array[MCD_WF_STAG] = (val))
#define word_set_sent_seg(w, val)  ((w)->wf_array[MCD_WF_SENT_SEG] = (val))
#define word_set_A(w, val)         ((w)->wf_array[MCD_WF_A] = (val))
#define word_set_B(w, val)         ((w)->wf_array[MCD_WF_B] = (val))
#define word_set_C(w, val)         ((w)->wf_array[MCD_WF_C] = (val))
#define word_set_D(w, val)         ((w)->wf_array[MCD_WF_D] = (val))
#define word_set_E(w, val)         ((w)->wf_array[MCD_WF_E] = (val))
#define word_set_F(w, val)         ((w)->wf_array[MCD_WF_F] = (val))
#define word_set_G(w, val)         ((w)->wf_array[MCD_WF_G] = (val))
#define word_set_H(w, val)         ((w)->wf_array[MCD_WF_H] = (val))
#define word_set_I(w, val)         ((w)->wf_array[MCD_WF_I] = (val))
#define word_set_J(w, val)         ((w)->wf_array[MCD_WF_J] = (val))
#define word_set_K(w, val)         ((w)->wf_array[MCD_WF_K] = (val))
#define word_set_L(w, val)         ((w)->wf_array[MCD_WF_L] = (val))
#define word_set_M(w, val)         ((w)->wf_array[MCD_WF_M] = (val))
#define word_set_N(w, val)         ((w)->wf_array[MCD_WF_N] = (val))
#define word_set_O(w, val)         ((w)->wf_array[MCD_WF_O] = (val))
#define word_set_P(w, val)         ((w)->wf_array[MCD_WF_P] = (val))
#define word_set_Q(w, val)         ((w)->wf_array[MCD_WF_Q] = (val))
#define word_set_R(w, val)         ((w)->wf_array[MCD_WF_R] = (val))
#define word_set_S(w, val)         ((w)->wf_array[MCD_WF_S] = (val))
#define word_set_T(w, val)         ((w)->wf_array[MCD_WF_T] = (val))
#define word_set_U(w, val)         ((w)->wf_array[MCD_WF_U] = (val))
#define word_set_V(w, val)         ((w)->wf_array[MCD_WF_V] = (val))
#define word_set_W(w, val)         ((w)->wf_array[MCD_WF_W] = (val))
#define word_set_X(w, val)         ((w)->wf_array[MCD_WF_X] = (val))
#define word_set_Y(w, val)         ((w)->wf_array[MCD_WF_Y] = (val))
#define word_set_Z(w, val)         ((w)->wf_array[MCD_WF_Z] = (val))
#define word_set_signature(w, val) ((w)->signature = (val))
#define word_set_index(w, val)     ((w)->index = (val))

word *word_new(char *input);
word *word_create_dummy(mcd *mcd_struct);

word *word_copy(word *w);
void word_free(word *w);
void word_print2(FILE *f, word *w);
void word_print(FILE *f, word *w);

word *word_read(FILE *f, mcd *mcd_struct);

word *word_parse_buffer(char *buffer, mcd *mcd_struct);
int word_is_eos(word *w, mcd *mcd_struct);
int word_get_gov_index(word *w);
void word_print_col_n(FILE *f, word *w, int n);
void word_sprint_col_n(char *s, word *w, int n);

#endif