Skip to content
Snippets Groups Projects
Commit b564c719 authored by Alexis Nasr's avatar Alexis Nasr
Browse files

modified tokenizer so that it outputs last token (with the help of yywrap)

parent 0ee666eb
Branches
No related tags found
No related merge requests found
...@@ -9,7 +9,7 @@ extern char *token; ...@@ -9,7 +9,7 @@ extern char *token;
%option prefix="en" %option prefix="en"
%option noyywrap /*%option noyywrap*/
%% %%
\<[^\>]*\> {maca_tokenizer_segment((char *)"", yytext);} \<[^\>]*\> {maca_tokenizer_segment((char *)"", yytext);}
...@@ -44,3 +44,9 @@ wanna printf("want\nto"); ...@@ -44,3 +44,9 @@ wanna printf("want\nto");
. {maca_tokenizer_add_char_to_token(yytext[0]);} . {maca_tokenizer_add_char_to_token(yytext[0]);}
%% %%
int enwrap(void)
{
maca_tokenizer_segment((char *)"", (char *)"");
return 1;
}
...@@ -9,6 +9,9 @@ extern int defait_amalgames; ...@@ -9,6 +9,9 @@ extern int defait_amalgames;
extern int offset; extern int offset;
extern int token_length; extern int token_length;
extern char *token; extern char *token;
%} %}
separ [ \t\n] separ [ \t\n]
nosepar [^ \t\n] nosepar [^ \t\n]
...@@ -16,7 +19,7 @@ nosepar [^ \t\n] ...@@ -16,7 +19,7 @@ nosepar [^ \t\n]
%option prefix="fr" %option prefix="fr"
/*%option outfile="fr_lex.c"*/ /*%option outfile="fr_lex.c"*/
%option noyywrap /*%option noyywrap*/
%s state_defait_amalgames %s state_defait_amalgames
%% %%
...@@ -35,7 +38,7 @@ nosepar [^ \t\n] ...@@ -35,7 +38,7 @@ nosepar [^ \t\n]
{separ}*… {maca_tokenizer_segment((char *)"…", yytext);} {separ}*… {maca_tokenizer_segment((char *)"…", yytext);}
{separ}*\) {maca_tokenizer_segment((char *)")", yytext);} {separ}*\) {maca_tokenizer_segment((char *)")", yytext);}
{separ}*» {maca_tokenizer_segment((char *)"»", yytext);} {separ}*» {maca_tokenizer_segment((char *)"»", yytext);}
\( {maca_tokenizer_segment((char *)"((", yytext);} \( {maca_tokenizer_segment((char *)"(", yytext);}
\" {maca_tokenizer_segment((char *)"\"", yytext);} \" {maca_tokenizer_segment((char *)"\"", yytext);}
« {maca_tokenizer_segment((char *)"«", yytext);} « {maca_tokenizer_segment((char *)"«", yytext);}
...@@ -66,3 +69,9 @@ nosepar [^ \t\n] ...@@ -66,3 +69,9 @@ nosepar [^ \t\n]
" aux " printf("\nà\nles\n"); " aux " printf("\nà\nles\n");
} }
%% %%
int frwrap(void)
{
maca_tokenizer_segment((char *)"", (char *)"");
return 1;
}
...@@ -9,7 +9,7 @@ extern char token[]; ...@@ -9,7 +9,7 @@ extern char token[];
extern int print_offset; extern int print_offset;
extern int print_token_length; extern int print_token_length;
void maca_tokenizer_segment(char *separator, char *xx){ void maca_tokenizer_segment(char *separator, char *text_matched){
if(token_length != 0){ if(token_length != 0){
printf("%s", token); printf("%s", token);
if(print_offset) if(print_offset)
...@@ -21,6 +21,7 @@ void maca_tokenizer_segment(char *separator, char *xx){ ...@@ -21,6 +21,7 @@ void maca_tokenizer_segment(char *separator, char *xx){
offset += utf8_strlen(token); offset += utf8_strlen(token);
token_length = 0; token_length = 0;
token[0] = 0;
if(strlen(separator) != 0){ if(strlen(separator) != 0){
printf("%s", separator); printf("%s", separator);
...@@ -30,7 +31,7 @@ void maca_tokenizer_segment(char *separator, char *xx){ ...@@ -30,7 +31,7 @@ void maca_tokenizer_segment(char *separator, char *xx){
printf("\t%d", (int) strlen(separator)); printf("\t%d", (int) strlen(separator));
printf("\n"); printf("\n");
} }
offset += strlen(xx); offset += utf8_strlen(text_matched);
} }
...@@ -40,3 +41,4 @@ void maca_tokenizer_add_char_to_token(char c) ...@@ -40,3 +41,4 @@ void maca_tokenizer_add_char_to_token(char c)
token_length++; token_length++;
token[token_length] = 0; token[token_length] = 0;
} }
void maca_tokenizer_segment(char *separator, char *xx); void maca_tokenizer_segment(char *separator, char *xx);
void maca_tokenizer_add_char_to_token(char c); void maca_tokenizer_add_char_to_token(char c);
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment