Skip to content
Snippets Groups Projects
Select Git revision
  • 1dd223d1361431500d97c8ad936dc718f22b9e0d
  • master default protected
  • johannes
  • partial_parser
  • Aloui_Dary
  • ignore_punct
  • AC
  • classifier
  • fixhelp
  • libmacaon2
  • error_predictor
  • morpho
  • ssrnn
  • tfparsing
  • silvio
  • tagger_options
  • maca_trans_frame_parser
  • alexis
  • new_config
  • tagparse
  • maca_graph_parser
21 results

maca_tokenizer_functions_for_lex.c

Blame
  • user avatar
    Alexis Nasr authored
    1dd223d1
    History
    maca_tokenizer_functions_for_lex.c 1.25 KiB
    #include <stdio.h>
    #include <string.h>
    #include "char16.h"
    
    extern int offset;
    extern int token_length;
    extern char token[];
    /*extern char *yytext;*/
    extern int print_offset;
    extern int print_token_length;
    
    void maca_tokenizer_segment(char *separator, char *text_matched){
      int first = 1;
      if(token_length != 0){
        if(print_offset){
          if(first ==  1) first = 0; else printf("\t");
          printf("%d", offset);
        }
        if(print_token_length){
          if(first ==  1) first = 0; else printf("\t");
          printf("%d", utf8_strlen(token));
        }
        if(first ==  1) first = 0; else printf("\t");
        printf("%s", token);
        printf("\n");
      }
      
      offset += utf8_strlen(token);
      token_length = 0;
      token[0] = 0;
      first = 1;
      if(strlen(separator) != 0){
        if(print_offset){
          if(first ==  1) first = 0; else printf("\t");
          printf("%d", offset);
        }
        if(print_token_length){
          if(first ==  1) first = 0; else printf("\t");
          printf("%d", (int) utf8_strlen(separator));
        }
        if(first ==  1) first = 0; else printf("\t");
        printf("%s", separator);
        printf("\n");
      }
      
      offset += utf8_strlen(text_matched); 
    }
    
    
    void maca_tokenizer_add_char_to_token(char c)
    {
      token[token_length] = c;
      token_length++;
      token[token_length] = 0;
    }