Skip to content
Snippets Groups Projects
Select Git revision
  • 1a1934660f3a69849e837ea97a54efa94fee3e76
  • master default protected
  • johannes
  • partial_parser
  • Aloui_Dary
  • ignore_punct
  • AC
  • classifier
  • fixhelp
  • libmacaon2
  • error_predictor
  • morpho
  • ssrnn
  • tfparsing
  • silvio
  • tagger_options
  • maca_trans_frame_parser
  • alexis
  • new_config
  • tagparse
  • maca_graph_parser
21 results

word.h

Blame
  • word.h 10.05 KiB
    #ifndef __WORD__
    #define __WORD__
    
    #include "mcd.h"
    #include "char16.h"
    
    #define WORD_INVALID_GOV 10000
    
    typedef struct _word {
      int wf_array[MCD_WF_NB];      /* array containing the codes corresponding to the different word features */
      char *input;                  /* the string corresponding to the actual line in the corpus file */
      int U1;                       /* does the form begin with an uppercase character */
      int signature;                /* pos tags that this form can have (represented as a boolean string) */
      int label;
      char *form;
      char16 *form_char16;
      int index;
      int is_root;
    } word;
    
    /*
    #define word_get_s1(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 1))? -1 : (w)->form[strlen((w)->form) - 1])
    #define word_get_s2(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 2))? -1 : (w)->form[strlen((w)->form) - 2])
    #define word_get_s3(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 3))? -1 : (w)->form[strlen((w)->form) - 3])
    #define word_get_s4(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 4))? -1 : (w)->form[strlen((w)->form) - 4])
    #define word_get_s5(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 5))? -1 : (w)->form[strlen((w)->form) - 5])
    #define word_get_s6(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 6))? -1 : (w)->form[strlen((w)->form) - 6])
    */
    #define word_get_s1(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 1))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 1])
    #define word_get_s2(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 2))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 2])
    #define word_get_s3(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 3))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 3])
    #define word_get_s4(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 4))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 4])
    #define word_get_s5(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 5))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 5])
    #define word_get_s6(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 6))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 6])
    
    /*#define word_get_p1(w) ((((w) == NULL) || ((w)->form == NULL)                           )? -1 : (w)->form[0])
    #define word_get_p2(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 1))? -1 : (w)->form[1])
    #define word_get_p3(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 2))? -1 : (w)->form[2])
    #define word_get_p4(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 3))? -1 : (w)->form[3])
    #define word_get_p5(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 4))? -1 : (w)->form[4])
    #define word_get_p6(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 5))? -1 : (w)->form[5])
    */
    
    #define word_get_p1(w) ((((w) == NULL) || ((w)->form_char16 == NULL)                           )? -1 : (w)->form_char16[0])
    #define word_get_p2(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 1))? -1 : (w)->form_char16[1])
    #define word_get_p3(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 2))? -1 : (w)->form_char16[2])
    #define word_get_p4(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 3))? -1 : (w)->form_char16[3])
    #define word_get_p5(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 4))? -1 : (w)->form_char16[4])
    #define word_get_p6(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 5))? -1 : (w)->form_char16[5])
    
    #define word_get_id(w)             (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_ID])
    #define word_get_form(w)           (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_FORM])
    #define word_get_lemma(w)          (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LEMMA])
    #define word_get_cpos(w)           (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_CPOS])
    #define word_get_pos(w)            (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_POS])
    #define word_get_feats(w)          (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_FEATS])
    #define word_get_gov(w)            (((w) == NULL) ?  0 : (w)->wf_array[MCD_WF_GOV])
    #define word_get_label(w)          (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LABEL])
    #define word_get_stag(w)           (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_STAG])
    #define word_get_sent_seg(w)       (((w) == NULL) ?  0 : (w)->wf_array[MCD_WF_SENT_SEG])
    #define word_get_A(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_A])
    #define word_get_B(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_B])
    #define word_get_C(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_C])
    #define word_get_D(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_D])
    #define word_get_E(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_E])
    #define word_get_F(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_F])
    #define word_get_G(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_G])
    #define word_get_H(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_H])
    #define word_get_I(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_I])
    #define word_get_J(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_J])
    #define word_get_K(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_K])
    #define word_get_L(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_L])
    #define word_get_M(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_M])
    #define word_get_N(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_N])
    #define word_get_O(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_O])
    #define word_get_P(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_P])
    #define word_get_Q(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_Q])
    #define word_get_R(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_R])
    #define word_get_S(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_S])
    #define word_get_T(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_T])
    #define word_get_U(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_U])
    #define word_get_V(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_V])
    #define word_get_W(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_W])
    #define word_get_X(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_X])
    #define word_get_Y(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_Y])
    #define word_get_Z(w)              (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_Z])
    #define word_get_input(w)          (((w) == NULL) ? NULL : (w)->input)
    #define word_get_signature(w)      (((w) == NULL) ? -1 : (w)->signature)
    #define word_get_U1(w)             (((w) == NULL) ? -1 : (w)->U1)
    #define word_get_index(w)          (((w) == NULL) ? -1 : (w)->index)
    
    #define word_set_id(w, val)        ((w)->wf_array[MCD_WF_ID] = (val))
    #define word_set_form(w, val)      ((w)->wf_array[MCD_WF_FORM] = (val))
    #define word_set_lemma(w, val)     ((w)->wf_array[MCD_WF_LEMMA] = (val))
    #define word_set_cpos(w, val)      ((w)->wf_array[MCD_WF_CPOS] = (val))
    #define word_set_pos(w, val)       ((w)->wf_array[MCD_WF_POS] = (val))
    #define word_set_feats(w, val)     ((w)->wf_array[MCD_WF_FEATS] = (val))
    #define word_set_gov(w, val)       ((w)->wf_array[MCD_WF_GOV] = (val))
    #define word_set_label(w, val)     ((w)->wf_array[MCD_WF_LABEL] = (val))
    #define word_set_stag(w, val)      ((w)->wf_array[MCD_WF_STAG] = (val))
    #define word_set_sent_seg(w, val)  ((w)->wf_array[MCD_WF_SENT_SEG] = (val))
    #define word_set_A(w, val)         ((w)->wf_array[MCD_WF_A] = (val))
    #define word_set_B(w, val)         ((w)->wf_array[MCD_WF_B] = (val))
    #define word_set_C(w, val)         ((w)->wf_array[MCD_WF_C] = (val))
    #define word_set_D(w, val)         ((w)->wf_array[MCD_WF_D] = (val))
    #define word_set_E(w, val)         ((w)->wf_array[MCD_WF_E] = (val))
    #define word_set_F(w, val)         ((w)->wf_array[MCD_WF_F] = (val))
    #define word_set_G(w, val)         ((w)->wf_array[MCD_WF_G] = (val))
    #define word_set_H(w, val)         ((w)->wf_array[MCD_WF_H] = (val))
    #define word_set_I(w, val)         ((w)->wf_array[MCD_WF_I] = (val))
    #define word_set_J(w, val)         ((w)->wf_array[MCD_WF_J] = (val))
    #define word_set_K(w, val)         ((w)->wf_array[MCD_WF_K] = (val))
    #define word_set_L(w, val)         ((w)->wf_array[MCD_WF_L] = (val))
    #define word_set_M(w, val)         ((w)->wf_array[MCD_WF_M] = (val))
    #define word_set_N(w, val)         ((w)->wf_array[MCD_WF_N] = (val))
    #define word_set_O(w, val)         ((w)->wf_array[MCD_WF_O] = (val))
    #define word_set_P(w, val)         ((w)->wf_array[MCD_WF_P] = (val))
    #define word_set_Q(w, val)         ((w)->wf_array[MCD_WF_Q] = (val))
    #define word_set_R(w, val)         ((w)->wf_array[MCD_WF_R] = (val))
    #define word_set_S(w, val)         ((w)->wf_array[MCD_WF_S] = (val))
    #define word_set_T(w, val)         ((w)->wf_array[MCD_WF_T] = (val))
    #define word_set_U(w, val)         ((w)->wf_array[MCD_WF_U] = (val))
    #define word_set_V(w, val)         ((w)->wf_array[MCD_WF_V] = (val))
    #define word_set_W(w, val)         ((w)->wf_array[MCD_WF_W] = (val))
    #define word_set_X(w, val)         ((w)->wf_array[MCD_WF_X] = (val))
    #define word_set_Y(w, val)         ((w)->wf_array[MCD_WF_Y] = (val))
    #define word_set_Z(w, val)         ((w)->wf_array[MCD_WF_Z] = (val))
    #define word_set_signature(w, val) ((w)->signature = (val))
    #define word_set_index(w, val)     ((w)->index = (val))
    
    word *word_new(char *input);
    word *word_create_dummy(mcd *mcd_struct);
    
    word *word_copy(word *w);
    void word_free(word *w);
    void word_print2(FILE *f, word *w);
    void word_print(FILE *f, word *w);
    
    word *word_read(FILE *f, mcd *mcd_struct);
    
    word *word_parse_buffer(char *buffer, mcd *mcd_struct);
    int word_is_eos(word *w, mcd *mcd_struct);
    int word_get_gov_index(word *w);
    void word_print_col_n(FILE *f, word *w, int n);
    void word_sprint_col_n(char *s, word *w, int n);
    
    #endif