diff --git a/maca_lexer/src/maca_lexer.c b/maca_lexer/src/maca_lexer.c index d80e7cf332474e9c2a11ba25c80f7bf1dfd2b345..ac3eaa321166f595495e8ee31a0543e627839385 100644 --- a/maca_lexer/src/maca_lexer.c +++ b/maca_lexer/src/maca_lexer.c @@ -38,7 +38,7 @@ int look_for_accept_state_in_path(trie *mwe_trie, int *states_array, int path_in return -1; } -void print_states_array(char *buffer, context *ctx, trie *mwe_trie, dico *d_mwe_tokens, int *states_array, int *symbols_array, int path_index) +void print_states_array(char *buffer, context *ctx, trie *mwe_trie, dico *d_mwe_tokens, int *states_array, int *symbols_array, int path_index, float *start_array, float *end_array, int orfeo, char *spkr) { int i; if(path_index == 0) return; @@ -54,18 +54,30 @@ void print_states_array(char *buffer, context *ctx, trie *mwe_trie, dico *d_mwe_ else printf("%s\t0\n", dico_int2string(d_mwe_tokens, symbols_array[i])); } } + if(i>0 && orfeo){ + printf("\t%f\t%f\t%s", start_array[0], end_array[accept_state_index], spkr); + } + + if(ctx->paste) if(accept_state_index != -1) printf("\n"); /* all tokens in path s.t. accept_state_index < token_index < path_index do not form an mwe, they are just printed */ for(i = accept_state_index + 1; i < path_index; i++){ - if(ctx->paste) - printf("%s\n", dico_int2string(d_mwe_tokens, symbols_array[i])); + if(ctx->paste){ + if(orfeo){ + printf("%s\t%f\t%f\t%s\n", dico_int2string(d_mwe_tokens, symbols_array[i]), start_array[i], end_array[i], spkr); + } + else{ + printf("%s\n", dico_int2string(d_mwe_tokens, symbols_array[i])); + } + } else printf("%s\t1\n", dico_int2string(d_mwe_tokens, symbols_array[i])); } } + int main(int argc, char *argv[]) { char buffer[10000]; @@ -77,12 +89,18 @@ int main(int argc, char *argv[]) dico *d_mwe_tokens = NULL; int states_array[100]; /* an array in which we store the states we have traversed in the trie */ int symbols_array[100]; + float start_array[100]; + float end_array[100]; int path_index = 0; int next_state; - + int orfeo = 1; + char form[1000]; + float start; + float end; + char spkr[1000]; ctx = context_read_options(argc, argv); maca_lexer_check_options(ctx); - + /* if(ctx->form_column != -1) form_column = ctx->form_column; @@ -109,33 +127,50 @@ int main(int argc, char *argv[]) continue; } buffer[strlen(buffer)-1] = '\0'; - /* look for code of form read */ - form_code = dico_string2int(d_mwe_tokens, buffer); + + if(orfeo){ + sscanf(buffer,"%s\t%f\t%f\t%s", form, &start, &end, spkr); + /* look for code of form read */ + form_code = dico_string2int(d_mwe_tokens, form); + + } + else{ + /* look for code of form read */ + form_code = dico_string2int(d_mwe_tokens, buffer); + } if(form_code == -1){ /* if form has no code, it cannot be part of a mwe, print the potential mwe discovered so far */ - print_states_array(buffer, ctx, mwe_trie, d_mwe_tokens, states_array, symbols_array, path_index); + print_states_array(buffer, ctx, mwe_trie, d_mwe_tokens, states_array, symbols_array, path_index, start_array, end_array, orfeo, spkr); path_index = 0; /* print the current form */ - if(ctx->paste) - printf("%s\n", buffer); + if(ctx->paste){ + if(orfeo) + printf("%s\t%f\t%f\t%s\n", form, start, end, spkr); + else + printf("%s\n", buffer); + } else printf("%s\t1\n", buffer); continue; } - + /* look for the next state in the trie */ next_state = trie_destination_state(mwe_trie, (path_index == 0) ? 0: states_array[path_index - 1], form_code); if(next_state != 0){ /* the path is growing */ symbols_array[path_index] = form_code; + if(orfeo){ + start_array[path_index] = start; + end_array[path_index] = end; + } states_array[path_index] = next_state; path_index++; continue; } /* print the potential mwe discovered so far */ - print_states_array(buffer, ctx, mwe_trie, d_mwe_tokens, states_array, symbols_array, path_index); + print_states_array(buffer, ctx, mwe_trie, d_mwe_tokens, states_array, symbols_array, path_index, start_array, end_array, orfeo, spkr); if(path_index != 0) /* if there was a path that aborted, see if there is a valid transition from state 0 with form */ @@ -145,14 +180,22 @@ int main(int argc, char *argv[]) if(next_state){ /* such a transition exists */ symbols_array[path_index] = form_code; + if(orfeo){ + start_array[path_index] = start; + end_array[path_index] = end; + } states_array[path_index] = next_state; path_index++; continue; } /* such a transition does not exist, just print the form */ - if(ctx->paste) - printf("%s\n", buffer); + if(ctx->paste){ + if(orfeo) + printf("%s\t%f\t%f\t%s\n", form, start, end, spkr); + else + printf("%s\n", buffer); + } else printf("%s\t1\n", buffer); } @@ -160,7 +203,7 @@ int main(int argc, char *argv[]) if(path_index != 0){ /* there is something in states array */ /* print the potential mwe discovered so far */ - print_states_array(buffer, ctx, mwe_trie, d_mwe_tokens, states_array, symbols_array, path_index); + print_states_array(buffer, ctx, mwe_trie, d_mwe_tokens, states_array, symbols_array, path_index, start_array, end_array, orfeo, spkr); path_index = 0; }