Select Git revision
word.h 6.87 KiB
#ifndef __WORD__
#define __WORD__
#include "mcd.h"
#define WORD_INVALID_GOV 10000
typedef struct _word {
int wf_array[MCD_WF_NB]; /* array containing the codes corresponding to the different word features */
char *input; /* the string corresponding to the actual line in the corpus file */
int U1; /* does the form begin with an uppercase character */
int signature; /* pos tags that this form can have (represented as a boolean string) */
int label;
char *form;
int index;
int is_root;
} word;
#define word_get_id(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_ID])
#define word_get_form(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_FORM])
#define word_get_lemma(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LEMMA])
#define word_get_cpos(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_CPOS])
#define word_get_pos(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_POS])
#define word_get_feats(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_FEATS])
#define word_get_gov(w) (((w) == NULL) ? 0 : (w)->wf_array[MCD_WF_GOV])
#define word_get_label(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LABEL])
#define word_get_stag(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_STAG])
#define word_get_sent_seg(w) (((w) == NULL) ? 0 : (w)->wf_array[MCD_WF_SENT_SEG])
#define word_get_letterfeat(w, L) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_A+(L)])
#define word_get_A(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_A])
#define word_get_B(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_B])
#define word_get_C(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_C])
#define word_get_D(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_D])
#define word_get_E(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_E])
#define word_get_F(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_F])
#define word_get_G(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_G])
#define word_get_H(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_H])
#define word_get_I(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_I])
#define word_get_J(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_J])
#define word_get_K(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_K])
#define word_get_L(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_L])
#define word_get_M(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_M])
#define word_get_N(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_N])
#define word_get_O(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_O])
#define word_get_P(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_P])
#define word_get_Q(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_Q])
#define word_get_R(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_R])
#define word_get_S(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_S])
#define word_get_T(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_T])
#define word_get_U(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_U])
#define word_get_V(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_V])
#define word_get_W(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_W])
#define word_get_X(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_X])
#define word_get_Y(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_Y])
#define word_get_Z(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_Z])
#define word_get_input(w) (((w) == NULL) ? NULL : (w)->input)
#define word_get_signature(w) (((w) == NULL) ? -1 : (w)->signature)
#define word_get_U1(w) (((w) == NULL) ? -1 : (w)->U1)
#define word_get_index(w) (((w) == NULL) ? -1 : (w)->index)
#define word_set_id(w, val) ((w)->wf_array[MCD_WF_ID] = (val))
#define word_set_form(w, val) ((w)->wf_array[MCD_WF_FORM] = (val))
#define word_set_lemma(w, val) ((w)->wf_array[MCD_WF_LEMMA] = (val))
#define word_set_cpos(w, val) ((w)->wf_array[MCD_WF_CPOS] = (val))
#define word_set_pos(w, val) ((w)->wf_array[MCD_WF_POS] = (val))
#define word_set_feats(w, val) ((w)->wf_array[MCD_WF_FEATS] = (val))
#define word_set_gov(w, val) ((w)->wf_array[MCD_WF_GOV] = (val))
#define word_set_label(w, val) ((w)->wf_array[MCD_WF_LABEL] = (val))
#define word_set_stag(w, val) ((w)->wf_array[MCD_WF_STAG] = (val))
#define word_set_sent_seg(w, val) ((w)->wf_array[MCD_WF_SENT_SEG] = (val))
#define word_set_A(w, val) ((w)->wf_array[MCD_WF_A] = (val))
#define word_set_B(w, val) ((w)->wf_array[MCD_WF_B] = (val))
#define word_set_C(w, val) ((w)->wf_array[MCD_WF_C] = (val))
#define word_set_D(w, val) ((w)->wf_array[MCD_WF_D] = (val))
#define word_set_E(w, val) ((w)->wf_array[MCD_WF_E] = (val))
#define word_set_F(w, val) ((w)->wf_array[MCD_WF_F] = (val))
#define word_set_G(w, val) ((w)->wf_array[MCD_WF_G] = (val))
#define word_set_H(w, val) ((w)->wf_array[MCD_WF_H] = (val))
#define word_set_I(w, val) ((w)->wf_array[MCD_WF_I] = (val))
#define word_set_J(w, val) ((w)->wf_array[MCD_WF_J] = (val))
#define word_set_K(w, val) ((w)->wf_array[MCD_WF_K] = (val))
#define word_set_L(w, val) ((w)->wf_array[MCD_WF_L] = (val))
#define word_set_M(w, val) ((w)->wf_array[MCD_WF_M] = (val))
#define word_set_N(w, val) ((w)->wf_array[MCD_WF_N] = (val))
#define word_set_O(w, val) ((w)->wf_array[MCD_WF_O] = (val))
#define word_set_P(w, val) ((w)->wf_array[MCD_WF_P] = (val))
#define word_set_Q(w, val) ((w)->wf_array[MCD_WF_Q] = (val))
#define word_set_R(w, val) ((w)->wf_array[MCD_WF_R] = (val))
#define word_set_S(w, val) ((w)->wf_array[MCD_WF_S] = (val))
#define word_set_T(w, val) ((w)->wf_array[MCD_WF_T] = (val))
#define word_set_U(w, val) ((w)->wf_array[MCD_WF_U] = (val))
#define word_set_V(w, val) ((w)->wf_array[MCD_WF_V] = (val))
#define word_set_W(w, val) ((w)->wf_array[MCD_WF_W] = (val))
#define word_set_X(w, val) ((w)->wf_array[MCD_WF_X] = (val))
#define word_set_Y(w, val) ((w)->wf_array[MCD_WF_Y] = (val))
#define word_set_Z(w, val) ((w)->wf_array[MCD_WF_Z] = (val))
#define word_set_signature(w, val) ((w)->signature = (val))
#define word_set_index(w, val) ((w)->index = (val))
word *word_new(char *input);
word *word_create_dummy(mcd *mcd_struct);
word *word_copy(word *w);
void word_free(word *w);
void word_print2(FILE *f, word *w);
void word_print(FILE *f, word *w);
word *word_read(FILE *f, mcd *mcd_struct);
word *word_parse_buffer(char *buffer, mcd *mcd_struct);
int word_is_eos(word *w, mcd *mcd_struct);
int word_get_gov_index(word *w);
void word_print_col_n(FILE *f, word *w, int n);
void word_sprint_col_n(char *s, word *w, int n);
#endif