Commit d5460fe9 authored by Alexis Nasr's avatar Alexis Nasr
Browse files

code refactoring in maca_lexer/src/maca_lexer.c

parent ce464596
......@@ -103,19 +103,20 @@ int main(int argc, char *argv[])
/* trie_print(stdout, mwe_trie); */
while(fgets(buffer, 10000, f)){
/* look for a valid word */
/* look for a valid form */
if((buffer[0] == '\n') || (buffer[0] == ' ') || (buffer[0] == '\t')){
printf("\n");
continue;
}
buffer[strlen(buffer)-1] = '\0';
/* look for code of word read */
/* look for code of form read */
form_code = dico_string2int(d_mwe_tokens, buffer);
if(form_code == -1){
/* if form has no code, it cannot be part of a mwe, print the potential mwe discovered so far */
print_states_array(buffer, ctx, mwe_trie, d_mwe_tokens, states_array, symbols_array, path_index);
path_index = 0;
/* print the current token */
/* print the current form */
if(ctx->paste)
printf("%s\n", buffer);
else
......@@ -123,82 +124,42 @@ int main(int argc, char *argv[])
continue;
}
/* look for the next state in the trie */
next_state = trie_destination_state(mwe_trie, (path_index == 0) ? 0: states_array[path_index - 1], form_code);
if(next_state != 0){
/* the path is growing */
symbols_array[path_index] = form_code;
states_array[path_index] = next_state;
path_index++;
continue;
}
/* print the potential mwe discovered so far */
print_states_array(buffer, ctx, mwe_trie, d_mwe_tokens, states_array, symbols_array, path_index);
if(path_index != 0)
/* if there was a path that aborted, see if there is a valid transition from state 0 with form */
next_state = trie_destination_state(mwe_trie, 0, form_code);
path_index = 0;
if(next_state){
/* such a transition exists */
symbols_array[path_index] = form_code;
states_array[path_index] = next_state;
path_index++;
continue;
}
/* such a transition does not exist, just print the form */
if(ctx->paste)
printf("%s\n", buffer);
else
printf("%s\t1\n", buffer);
#if 0
symbols_array[path_index] = form_code;
states_array[path_index] = (form_code == -1)? 0 /* if word has invalid code, go to initial state */
: trie_destination_state(mwe_trie, (path_index == 0) ? 0 : states_array[path_index - 1], form_code); /* otherwise try to move forward in the trie */
/* printf("buffer = %s ", buffer);
printf("code = %d\n", form_code);
printf("states array :");
for(i=0; i <= path_index; i++){
printf("%d ", states_array[i]);
}
printf("\n");
printf("symbols array :");
for(i=0; i <= path_index; i++){
printf("%d ", symbols_array[i]);
}
printf("\n**********************\n");
*/
if(states_array[path_index] == 0){ /* in initial state of trie */
/* nothing has been recognized, just print current word */
if(path_index == 0)
if(ctx->paste)
printf("%s\n", buffer);
else
printf("%s\t1\n", buffer);
else{ /* there is something in the path */
print_states_array(buffer, ctx, mwe_trie, d_mwe_tokens, states_array, symbols_array, path_index);
path_index = 0;
states_array[path_index] = (form_code == -1)? 0 /* if word has invalid code, go to initial state */
: trie_destination_state(mwe_trie, (path_index == 0) ? 0 : states_array[path_index - 1], form_code); /* otherwise try to move forward in the trie */
/* do not forget to print the current token */
if(ctx->paste)
printf("%s\n", buffer);
else
printf("%s\t1\n", buffer);
}
}
/* not in state 0 of trie we are processing tokens of a potential mwe */
else{
path_index++;
}
#endif
}
if(path_index != 0){ /* there is something in states array */
if(path_index != 0){
/* there is something in states array */
/* print the potential mwe discovered so far */
print_states_array(buffer, ctx, mwe_trie, d_mwe_tokens, states_array, symbols_array, path_index);
path_index = 0;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment