#ifndef __WORD__ #define __WORD__ #include "mcd.h" #include "char16.h" #define WORD_INVALID_GOV 10000 typedef struct _word { int wf_array[MCD_WF_NB]; /* array containing the codes corresponding to the different word features */ char *input; /* the string corresponding to the actual line in the corpus file */ int U1; /* does the form begin with an uppercase character */ int signature; /* pos tags that this form can have (represented as a boolean string) */ int label; char *form; char16 *form_char16; int index; int is_root; } word; /* #define word_get_s1(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 1))? -1 : (w)->form[strlen((w)->form) - 1]) #define word_get_s2(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 2))? -1 : (w)->form[strlen((w)->form) - 2]) #define word_get_s3(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 3))? -1 : (w)->form[strlen((w)->form) - 3]) #define word_get_s4(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 4))? -1 : (w)->form[strlen((w)->form) - 4]) #define word_get_s5(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 5))? -1 : (w)->form[strlen((w)->form) - 5]) #define word_get_s6(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 6))? -1 : (w)->form[strlen((w)->form) - 6]) */ #define word_get_s1(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 1))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 1]) #define word_get_s2(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 2))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 2]) #define word_get_s3(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 3))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 3]) #define word_get_s4(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 4))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 4]) #define word_get_s5(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 5))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 5]) #define word_get_s6(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 6))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 6]) /*#define word_get_p1(w) ((((w) == NULL) || ((w)->form == NULL) )? -1 : (w)->form[0]) #define word_get_p2(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 1))? -1 : (w)->form[1]) #define word_get_p3(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 2))? -1 : (w)->form[2]) #define word_get_p4(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 3))? -1 : (w)->form[3]) #define word_get_p5(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 4))? -1 : (w)->form[4]) #define word_get_p6(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 5))? -1 : (w)->form[5]) */ #define word_get_p1(w) ((((w) == NULL) || ((w)->form_char16 == NULL) )? -1 : (w)->form_char16[0]) #define word_get_p2(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 1))? -1 : (w)->form_char16[1]) #define word_get_p3(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 2))? -1 : (w)->form_char16[2]) #define word_get_p4(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 3))? -1 : (w)->form_char16[3]) #define word_get_p5(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 4))? -1 : (w)->form_char16[4]) #define word_get_p6(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 5))? -1 : (w)->form_char16[5]) #define word_get_id(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_ID]) #define word_get_form(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_FORM]) #define word_get_lemma(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LEMMA]) #define word_get_cpos(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_CPOS]) #define word_get_pos(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_POS]) #define word_get_feats(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_FEATS]) #define word_get_gov(w) (((w) == NULL) ? 0 : (w)->wf_array[MCD_WF_GOV]) #define word_get_label(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LABEL]) #define word_get_stag(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_STAG]) #define word_get_sent_seg(w) (((w) == NULL) ? 0 : (w)->wf_array[MCD_WF_SENT_SEG]) #define word_get_A(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_A]) #define word_get_B(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_B]) #define word_get_C(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_C]) #define word_get_D(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_D]) #define word_get_E(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_E]) #define word_get_F(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_F]) #define word_get_G(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_G]) #define word_get_H(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_H]) #define word_get_I(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_I]) #define word_get_J(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_J]) #define word_get_K(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_K]) #define word_get_L(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_L]) #define word_get_M(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_M]) #define word_get_N(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_N]) #define word_get_O(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_O]) #define word_get_P(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_P]) #define word_get_Q(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_Q]) #define word_get_R(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_R]) #define word_get_S(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_S]) #define word_get_T(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_T]) #define word_get_U(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_U]) #define word_get_V(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_V]) #define word_get_W(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_W]) #define word_get_X(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_X]) #define word_get_Y(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_Y]) #define word_get_Z(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_Z]) #define word_get_input(w) (((w) == NULL) ? NULL : (w)->input) #define word_get_signature(w) (((w) == NULL) ? -1 : (w)->signature) #define word_get_U1(w) (((w) == NULL) ? -1 : (w)->U1) #define word_get_index(w) (((w) == NULL) ? -1 : (w)->index) #define word_set_id(w, val) ((w)->wf_array[MCD_WF_ID] = (val)) #define word_set_form(w, val) ((w)->wf_array[MCD_WF_FORM] = (val)) #define word_set_lemma(w, val) ((w)->wf_array[MCD_WF_LEMMA] = (val)) #define word_set_cpos(w, val) ((w)->wf_array[MCD_WF_CPOS] = (val)) #define word_set_pos(w, val) ((w)->wf_array[MCD_WF_POS] = (val)) #define word_set_feats(w, val) ((w)->wf_array[MCD_WF_FEATS] = (val)) #define word_set_gov(w, val) ((w)->wf_array[MCD_WF_GOV] = (val)) #define word_set_label(w, val) ((w)->wf_array[MCD_WF_LABEL] = (val)) #define word_set_stag(w, val) ((w)->wf_array[MCD_WF_STAG] = (val)) #define word_set_sent_seg(w, val) ((w)->wf_array[MCD_WF_SENT_SEG] = (val)) #define word_set_A(w, val) ((w)->wf_array[MCD_WF_A] = (val)) #define word_set_B(w, val) ((w)->wf_array[MCD_WF_B] = (val)) #define word_set_C(w, val) ((w)->wf_array[MCD_WF_C] = (val)) #define word_set_D(w, val) ((w)->wf_array[MCD_WF_D] = (val)) #define word_set_E(w, val) ((w)->wf_array[MCD_WF_E] = (val)) #define word_set_F(w, val) ((w)->wf_array[MCD_WF_F] = (val)) #define word_set_G(w, val) ((w)->wf_array[MCD_WF_G] = (val)) #define word_set_H(w, val) ((w)->wf_array[MCD_WF_H] = (val)) #define word_set_I(w, val) ((w)->wf_array[MCD_WF_I] = (val)) #define word_set_J(w, val) ((w)->wf_array[MCD_WF_J] = (val)) #define word_set_K(w, val) ((w)->wf_array[MCD_WF_K] = (val)) #define word_set_L(w, val) ((w)->wf_array[MCD_WF_L] = (val)) #define word_set_M(w, val) ((w)->wf_array[MCD_WF_M] = (val)) #define word_set_N(w, val) ((w)->wf_array[MCD_WF_N] = (val)) #define word_set_O(w, val) ((w)->wf_array[MCD_WF_O] = (val)) #define word_set_P(w, val) ((w)->wf_array[MCD_WF_P] = (val)) #define word_set_Q(w, val) ((w)->wf_array[MCD_WF_Q] = (val)) #define word_set_R(w, val) ((w)->wf_array[MCD_WF_R] = (val)) #define word_set_S(w, val) ((w)->wf_array[MCD_WF_S] = (val)) #define word_set_T(w, val) ((w)->wf_array[MCD_WF_T] = (val)) #define word_set_U(w, val) ((w)->wf_array[MCD_WF_U] = (val)) #define word_set_V(w, val) ((w)->wf_array[MCD_WF_V] = (val)) #define word_set_W(w, val) ((w)->wf_array[MCD_WF_W] = (val)) #define word_set_X(w, val) ((w)->wf_array[MCD_WF_X] = (val)) #define word_set_Y(w, val) ((w)->wf_array[MCD_WF_Y] = (val)) #define word_set_Z(w, val) ((w)->wf_array[MCD_WF_Z] = (val)) #define word_set_signature(w, val) ((w)->signature = (val)) #define word_set_index(w, val) ((w)->index = (val)) word *word_new(char *input); word *word_create_dummy(mcd *mcd_struct); word *word_copy(word *w); void word_free(word *w); void word_print2(FILE *f, word *w); void word_print(FILE *f, word *w); word *word_read(FILE *f, mcd *mcd_struct); word *word_parse_buffer(char *buffer, mcd *mcd_struct); int word_is_eos(word *w, mcd *mcd_struct); int word_get_gov_index(word *w); void word_print_col_n(FILE *f, word *w, int n); void word_sprint_col_n(char *s, word *w, int n); #endif