Skip to content
Snippets Groups Projects
Commit 0ee666eb authored by Alexis Nasr's avatar Alexis Nasr
Browse files

modified some tokenization rules for french

parent d1a87444
No related branches found
No related tags found
No related merge requests found
...@@ -10,33 +10,37 @@ extern int offset; ...@@ -10,33 +10,37 @@ extern int offset;
extern int token_length; extern int token_length;
extern char *token; extern char *token;
%} %}
separ [ \t\n]
nosepar [^ \t\n]
%option prefix="fr" %option prefix="fr"
/*%option outfile="fr_lex.c"*/ /*%option outfile="fr_lex.c"*/
%option noyywrap %option noyywrap
%s state_defait_amalgames %s state_defait_amalgames
%% %%
if(defait_amalgames){ if(defait_amalgames){
BEGIN(state_defait_amalgames); BEGIN(state_defait_amalgames);
} }
\<[^\>]*\> {maca_tokenizer_segment((char *)"", yytext);} \<[^\>]*\> {maca_tokenizer_segment((char *)"", yytext);}
[ \t]+ {maca_tokenizer_segment((char *)"", yytext);} {separ}+ {maca_tokenizer_segment((char *)"", yytext);}
[ ]*\. {maca_tokenizer_segment((char *)".", yytext);} {separ}*\. {maca_tokenizer_segment((char *)".", yytext);}
[ ]*\? {maca_tokenizer_segment((char *)"?", yytext);} {separ}*\? {maca_tokenizer_segment((char *)"?", yytext);}
[ ]*\! {maca_tokenizer_segment((char *)"!", yytext);} {separ}*\! {maca_tokenizer_segment((char *)"!", yytext);}
[ ]*, {maca_tokenizer_segment((char *)",", yytext);} {separ}*, {maca_tokenizer_segment((char *)",", yytext);}
[ ]*: {maca_tokenizer_segment((char *)":", yytext);} {separ}*: {maca_tokenizer_segment((char *)":", yytext);}
[ ]*; {maca_tokenizer_segment((char *)";", yytext);} {separ}*; {maca_tokenizer_segment((char *)";", yytext);}
[ ]*… {maca_tokenizer_segment((char *)"…", yytext);} {separ}*… {maca_tokenizer_segment((char *)"…", yytext);}
[ ]*\) {maca_tokenizer_segment((char *)")", yytext);} {separ}*\) {maca_tokenizer_segment((char *)")", yytext);}
[ ]*» {maca_tokenizer_segment((char *)"»", yytext);} {separ}*» {maca_tokenizer_segment((char *)"»", yytext);}
\( {maca_tokenizer_segment((char *)"((", yytext);} \( {maca_tokenizer_segment((char *)"((", yytext);}
\" {maca_tokenizer_segment((char *)"\"", yytext);} \" {maca_tokenizer_segment((char *)"\"", yytext);}
« {maca_tokenizer_segment((char *)"«", yytext);} « {maca_tokenizer_segment((char *)"«", yytext);}
[^ ]*' {maca_tokenizer_segment((char *)yytext, yytext);} {nosepar}*' {maca_tokenizer_segment((char *)yytext, yytext);}
[^ ]*’ {maca_tokenizer_segment((char *)yytext, yytext);} {nosepar}*’ {maca_tokenizer_segment((char *)yytext, yytext);}
[0-9]+,[0-9]+ {maca_tokenizer_segment(yytext, yytext);} [0-9]+,[0-9]+ {maca_tokenizer_segment(yytext, yytext);}
...@@ -52,7 +56,7 @@ extern char *token; ...@@ -52,7 +56,7 @@ extern char *token;
-elle {maca_tokenizer_segment((char *)"-elle", yytext);} -elle {maca_tokenizer_segment((char *)"-elle", yytext);}
-t-elles {maca_tokenizer_segment((char *)"-t-elles", yytext);} -t-elles {maca_tokenizer_segment((char *)"-t-elles", yytext);}
-elles {maca_tokenizer_segment((char *)"-elles", yytext);} -elles {maca_tokenizer_segment((char *)"-elles", yytext);}
\n+ {maca_tokenizer_segment((char *)"", yytext);} -là {maca_tokenizer_segment((char *)"-là", yytext);}
. {maca_tokenizer_add_char_to_token(yytext[0]);} . {maca_tokenizer_add_char_to_token(yytext[0]);}
<state_defait_amalgames>{ <state_defait_amalgames>{
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment