From d249131ff0554522f8a754818a8cddd8969c8cbe Mon Sep 17 00:00:00 2001 From: Alexis Nasr <alexis.nasr@lif.univ-mrs.fr> Date: Fri, 21 Jul 2017 07:52:35 +0200 Subject: [PATCH] modified maca_lexer so that it can take as input speakers and word duration (very ad hoc)! --- maca_lexer/src/maca_lexer.c | 73 +++++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 15 deletions(-) diff --git a/maca_lexer/src/maca_lexer.c b/maca_lexer/src/maca_lexer.c index d80e7cf..ac3eaa3 100644 --- a/maca_lexer/src/maca_lexer.c +++ b/maca_lexer/src/maca_lexer.c @@ -38,7 +38,7 @@ int look_for_accept_state_in_path(trie *mwe_trie, int *states_array, int path_in return -1; } -void print_states_array(char *buffer, context *ctx, trie *mwe_trie, dico *d_mwe_tokens, int *states_array, int *symbols_array, int path_index) +void print_states_array(char *buffer, context *ctx, trie *mwe_trie, dico *d_mwe_tokens, int *states_array, int *symbols_array, int path_index, float *start_array, float *end_array, int orfeo, char *spkr) { int i; if(path_index == 0) return; @@ -54,18 +54,30 @@ void print_states_array(char *buffer, context *ctx, trie *mwe_trie, dico *d_mwe_ else printf("%s\t0\n", dico_int2string(d_mwe_tokens, symbols_array[i])); } } + if(i>0 && orfeo){ + printf("\t%f\t%f\t%s", start_array[0], end_array[accept_state_index], spkr); + } + + if(ctx->paste) if(accept_state_index != -1) printf("\n"); /* all tokens in path s.t. accept_state_index < token_index < path_index do not form an mwe, they are just printed */ for(i = accept_state_index + 1; i < path_index; i++){ - if(ctx->paste) - printf("%s\n", dico_int2string(d_mwe_tokens, symbols_array[i])); + if(ctx->paste){ + if(orfeo){ + printf("%s\t%f\t%f\t%s\n", dico_int2string(d_mwe_tokens, symbols_array[i]), start_array[i], end_array[i], spkr); + } + else{ + printf("%s\n", dico_int2string(d_mwe_tokens, symbols_array[i])); + } + } else printf("%s\t1\n", dico_int2string(d_mwe_tokens, symbols_array[i])); } } + int main(int argc, char *argv[]) { char buffer[10000]; @@ -77,12 +89,18 @@ int main(int argc, char *argv[]) dico *d_mwe_tokens = NULL; int states_array[100]; /* an array in which we store the states we have traversed in the trie */ int symbols_array[100]; + float start_array[100]; + float end_array[100]; int path_index = 0; int next_state; - + int orfeo = 1; + char form[1000]; + float start; + float end; + char spkr[1000]; ctx = context_read_options(argc, argv); maca_lexer_check_options(ctx); - + /* if(ctx->form_column != -1) form_column = ctx->form_column; @@ -109,33 +127,50 @@ int main(int argc, char *argv[]) continue; } buffer[strlen(buffer)-1] = '\0'; - /* look for code of form read */ - form_code = dico_string2int(d_mwe_tokens, buffer); + + if(orfeo){ + sscanf(buffer,"%s\t%f\t%f\t%s", form, &start, &end, spkr); + /* look for code of form read */ + form_code = dico_string2int(d_mwe_tokens, form); + + } + else{ + /* look for code of form read */ + form_code = dico_string2int(d_mwe_tokens, buffer); + } if(form_code == -1){ /* if form has no code, it cannot be part of a mwe, print the potential mwe discovered so far */ - print_states_array(buffer, ctx, mwe_trie, d_mwe_tokens, states_array, symbols_array, path_index); + print_states_array(buffer, ctx, mwe_trie, d_mwe_tokens, states_array, symbols_array, path_index, start_array, end_array, orfeo, spkr); path_index = 0; /* print the current form */ - if(ctx->paste) - printf("%s\n", buffer); + if(ctx->paste){ + if(orfeo) + printf("%s\t%f\t%f\t%s\n", form, start, end, spkr); + else + printf("%s\n", buffer); + } else printf("%s\t1\n", buffer); continue; } - + /* look for the next state in the trie */ next_state = trie_destination_state(mwe_trie, (path_index == 0) ? 0: states_array[path_index - 1], form_code); if(next_state != 0){ /* the path is growing */ symbols_array[path_index] = form_code; + if(orfeo){ + start_array[path_index] = start; + end_array[path_index] = end; + } states_array[path_index] = next_state; path_index++; continue; } /* print the potential mwe discovered so far */ - print_states_array(buffer, ctx, mwe_trie, d_mwe_tokens, states_array, symbols_array, path_index); + print_states_array(buffer, ctx, mwe_trie, d_mwe_tokens, states_array, symbols_array, path_index, start_array, end_array, orfeo, spkr); if(path_index != 0) /* if there was a path that aborted, see if there is a valid transition from state 0 with form */ @@ -145,14 +180,22 @@ int main(int argc, char *argv[]) if(next_state){ /* such a transition exists */ symbols_array[path_index] = form_code; + if(orfeo){ + start_array[path_index] = start; + end_array[path_index] = end; + } states_array[path_index] = next_state; path_index++; continue; } /* such a transition does not exist, just print the form */ - if(ctx->paste) - printf("%s\n", buffer); + if(ctx->paste){ + if(orfeo) + printf("%s\t%f\t%f\t%s\n", form, start, end, spkr); + else + printf("%s\n", buffer); + } else printf("%s\t1\n", buffer); } @@ -160,7 +203,7 @@ int main(int argc, char *argv[]) if(path_index != 0){ /* there is something in states array */ /* print the potential mwe discovered so far */ - print_states_array(buffer, ctx, mwe_trie, d_mwe_tokens, states_array, symbols_array, path_index); + print_states_array(buffer, ctx, mwe_trie, d_mwe_tokens, states_array, symbols_array, path_index, start_array, end_array, orfeo, spkr); path_index = 0; } -- GitLab