/* Process Rocio XML for SLU */ /* FRED 0215 */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <strings.h> /*................................................................*/ #define TailleLigne 8000 #define True 1 #define False 0 void ERREUR(char *ch1,char *ch2) { fprintf(stderr,"ERREUR : %s %s\n",ch1,ch2); exit(0); } void ERREURd(char *ch1, int i) { fprintf(stderr,"ERREUR : %s %d\n",ch1,i); exit(0); } /*................................................................*/ /* FST DICO */ #define MAX_SIZE_DICO 1000 #define IGLOU 1 #define PENALGLOU 100 #define PENALEND 100 #define PENALSUB 10 #define STARTNEW 2 char *CHglouton="<joker>"; char *CHepsilon="<epsilon>"; char *T_dico_action[MAX_SIZE_DICO],*T_dico_word[MAX_SIZE_DICO]; int NbAction=STARTNEW,NbWord=STARTNEW; int from_action_to_index(char *ch) { int i; for(i=STARTNEW;(i<NbAction)&&(strcmp(ch,T_dico_action[i]));i++); if (i==NbAction) T_dico_action[NbAction++]=strdup(ch); if (NbAction==MAX_SIZE_DICO) ERREUR("cste MAX_SIZE_DICO too small",""); return i; } int from_word_to_index(char *ch) { int i; for(i=STARTNEW;(i<NbWord)&&(strcmp(ch,T_dico_word[i]));i++); if (i==NbWord) T_dico_word[NbWord++]=strdup(ch); if (NbWord==MAX_SIZE_DICO) ERREUR("cste MAX_SIZE_DICO too small",""); return i; } /*................................................................*/ /* format: <homeostasis version="25-11-2014"> <liste_section sequences="1,2,3,4,5,6,7,8,9" ordre="variable" repetition="oui" action="exclusive"> <section id="1" action="start_scene1"> <sequence ordre="strict" repetition="non" action="" lang="eng"> <keyword action="start_scene1" lang="esp"> uno </keyword> </sequence> </section> <section id="2" action="close_scene1/start_scene2"> <sequence ordre="strict" repetition="non" action="" lang="esp"> <keyword action="open_scene2" lang="esp"> dos </keyword> <keyword action="open_2A" lang="eng"> open system </keyword> </sequence> <sequence ordre="strict" repetition="oui" action="" lang="eng"> <keyword action="start_system_voice" lang="eng"> tell me </keyword> <keyword action="open_2C2" lang="eng"> next </keyword> <keyword action="open_2D" lang="eng"> install the new version of me </keyword> <keyword action="#end" lang="eng"> give me my data </keyword> </sequence> </section> */ #define STRICT 0 #define VARIABLE 1 char *get_field(char *ch, char *attribut, char *chfield) { int i,j; chfield[0]='\0'; for(i=0;(ch[i])&&((ch[i]!=' ')||(strncmp(ch+i+1,attribut,strlen(attribut)))||(ch[i+1+strlen(attribut)]!='='));i++); if (ch[i]) { for(j=0,i=i+1+strlen(attribut)+2;(ch[i])&&(ch[i]!='"');i++,j++) chfield[j]=ch[i]; if (ch[i]!='"') ERREUR("bad format1:",ch); chfield[j]='\0'; } return chfield; } char *get_content(char *ch, char *chcontent) { int i,j; chcontent[0]='\0'; for(i=0;(ch[i])&&(ch[i]!='>');i++); if (!ch[i]) ERREUR("bad format2:",ch); for(++i;(ch[i])&&((ch[i]==' ')||(ch[i]=='\t'));i++); for(j=0;(ch[i])&&(ch[i]!='<');i++,j++) chcontent[j]=ch[i]; if (!ch[i]) ERREUR("bad format3:",ch); chcontent[j]='\0'; for(--j;(j>0)&&((chcontent[j]==' ')||(chcontent[j]=='\t'));j--) chcontent[j]='\0'; return chcontent; } void remove_space(char *ch) { int i; for(i=0;ch[i];i++) if (ch[i]==' ') ch[i]='_'; } /*................................................................*/ #define MAX_FIELD 60000 #define MAX_END_STATE 1000 int main(int argc, char **argv) { int nbsection,i,j,k,nb,ordre,repetition,nbseq,actionsection,actionsequence,currentstate,statedebseq,action,findend,t_end[MAX_END_STATE],nbend,idac; FILE *file; char *pt,ch[TailleLigne],*t_field[MAX_FIELD],*chprefix,chname[TailleLigne],chfield[TailleLigne],chcontent[TailleLigne]; chprefix=NULL; if (argc>1) for(nb=1;nb<argc;nb++) if (!strcmp(argv[nb],"-prefix")) { if (nb+1==argc) ERREUR("must have a value after argument;",argv[nb]); //if (!(file=fopen(argv[++nb],"rt"))) ERREUR("can't open:",argv[nb]); chprefix=argv[++nb]; } else if (!strcmp(argv[nb],"-h")) { fprintf(stderr,"Syntax: %s [-h] -prefix <string>\n",argv[0]); exit(0); } else ERREUR("unknown option:",argv[nb]); if (!chprefix) ERREUR("bad syntax, check '-h'",""); ordre=STRICT; repetition=False; file=NULL; actionsection=0; findend=False; nbend=0; nbseq=0; for(idac=nbsection=0;fgets(ch,TailleLigne,stdin);) { if (strstr(ch,"<section")) { if ((file)&&(nbseq>0)) { if (!findend) { if (ordre==VARIABLE) { fprintf(stderr,"ERREUR: no exit action on a variable section, in section %d and sequence %d\n",nbsection,nbseq); exit(0); } t_end[nbend++]=currentstate; } if (nbend>0) /* on emet les action fin de section */ { sprintf(chcontent,"action(%d,%d,%d,\"#ENDSEQUENCE(%d)\",\"\")",nbsection,nbseq,idac,nbseq); actionsequence=from_action_to_index(chcontent); sprintf(chcontent,"action(%d,%d,%d,\"#ENDSECTION(%d)\",\"\")",nbsection,nbseq,idac,nbsection); actionsection=from_action_to_index(chcontent); for(i=0;i<nbend;i++) fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",t_end[i],currentstate+1,actionsequence); fprintf(file,"%d\t%d\n",currentstate+1,PENALEND); /* in order to quit on ANY action */ fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",currentstate+1,currentstate+2,actionsection); fprintf(file,"%d\n",currentstate+2); currentstate+=2; nbend=0; } fclose(file); file=NULL; } nbsection++; nbseq=0; currentstate=0; //idac=0; get_field(ch,"id",chfield); sprintf(ch,"%s_section%s.txt",chprefix,chfield); if (!(file=fopen(ch,"wt"))) ERREUR("can't write in:",ch); sprintf(ch,"%s_section%s_text.txt",chprefix,chfield); get_field(ch,"action",chfield); if (chfield[0]) { sprintf(ch,"action(\"%s\",\"\")",chfield); actionsection=from_action_to_index(ch); } else actionsection=0; } else if (strstr(ch,"<sequence")) // <sequence ordre="strict" repetition="oui" action="" lang="eng"> { if ((nbseq!=0)&&(!findend)) { if (ordre==VARIABLE) { fprintf(stderr,"ERREUR: no exit action on a variable section, in section %d and sequence %d\n",nbsection,nbseq); exit(0); } t_end[nbend++]=currentstate; } if (nbend>0) { sprintf(chcontent,"action(%d,%d,%d,\"#ENDSEQUENCE(%d)\",\"\")",nbsection,nbseq,idac,nbseq); actionsequence=from_action_to_index(chcontent); for (i=0;i<nbend;i++) fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",t_end[i],currentstate+1,actionsequence); fprintf(file,"%d\n",currentstate+1); currentstate++; nbend=0; } statedebseq=currentstate; findend=False; nbseq++; //idac=0; get_field(ch,"ordre",chfield); if (!strcmp(chfield,"strict")) ordre=STRICT; else if (!strcmp(chfield,"variable")) ordre=VARIABLE; else ERREUR("wrong value for attribut ordre:",ch); if (!strcmp(get_field(ch,"repetition",chfield),"oui")) repetition=True; else repetition=False; sprintf(ch,"%s_sequence%d.txt",chname,nbseq); if (actionsection!=0) { fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",currentstate,currentstate+1,actionsection); currentstate++; } get_field(ch,"action",chfield); if (chfield[0]) { fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",currentstate,currentstate+1,from_action_to_index(chfield)); currentstate++; } } else if (strstr(ch,"<keyword")) { // <keyword action="open_2D" lang="eng"> install the new version of me </keyword> // <keyword action="#end" lang="eng"> give me my data </keyword> get_field(ch,"action",chfield); if (chfield[0]) { get_content(ch,chcontent); sprintf(ch,"action(%d,%d,%d,\"%s\",\"%s\")",nbsection,nbseq,idac++,chfield,chcontent); remove_space(ch); action=from_action_to_index(ch); for(i=1,t_field[0]=strtok(chcontent," ");t_field[i-1];i++) t_field[i]=strtok(NULL," "); for(i=0;t_field[i];i++) { if ((ordre==STRICT)||(i>0)) { fprintf(file,"%d\t%d\t%s\t%d\t0\n",currentstate+i,currentstate+1+i,t_field[i],i==0?action:0); /* we add a possible jump inside a multiword action, except for the first and last word */ if ((i>0)&&(t_field[i+1])) fprintf(file,"%d\t%d\t%s\t%d\t%d\n",currentstate+i,currentstate+1+i,"<epsilon>",0,PENALSUB); } else fprintf(file,"%d\t%d\t%s\t%d\t0\n",statedebseq,currentstate+1+i,t_field[i],i==0?action:0); from_word_to_index(t_field[i]); } if (ordre==STRICT) { if (repetition) fprintf(file,"%d\t%d\t%s\t%d\t0\n",currentstate+i,currentstate+1,t_field[0],action); } else fprintf(file,"%d\t%d\t<epsilon>\t0\t0\n",currentstate+i,statedebseq); /* now we can end at any keyword, but penality if not a valid end !! */ if (!strcmp(chfield,"#end")) { findend=True; t_end[nbend++]=currentstate+i; if (nbend==MAX_END_STATE) ERREUR("cste MAX_END_STATE too small",""); } fprintf(file,"%d\t%d\n",currentstate+i,PENALEND); // we add glouton transition if it's a strict section if (ordre==STRICT) fprintf(file,"%d\t%d\t%s\t%d\t%d\n",currentstate,currentstate+i,CHglouton,action,PENALGLOU); currentstate+=i; } } } if ((file)&&(nbseq>0)) { if (!findend) { if (ordre==VARIABLE) { fprintf(stderr,"ERREUR: no exit action on a variable section, in section %d and sequence %d\n",nbsection,nbseq); exit(0); } t_end[nbend++]=currentstate; } if (nbend>0) /* on emet les action fin de section */ { sprintf(chcontent,"action(%d,%d,%d,\"#ENDSEQUENCE(%d)\",\"\")",nbsection,nbseq,idac++,nbseq); actionsequence=from_action_to_index(chcontent); sprintf(chcontent,"action(%d,%d,%d,\"#ENDSECTION(%d)\",\"\")",nbsection,nbseq,idac++,nbsection); actionsection=from_action_to_index(chcontent); for(i=0;i<nbend;i++) fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",t_end[i],currentstate+1,actionsequence); fprintf(file,"%d\t%d\n",currentstate+1,PENALEND); /* in order to quit on ANY action */ fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",currentstate+1,currentstate+2,actionsection); fprintf(file,"%d\n",currentstate+2); } } if (file) fclose(file); // write tail GLOUTON eraser sprintf(ch,"%s_clean_tail.txt",chprefix); if (!(file=fopen(ch,"wt"))) ERREUR("can't write in:",ch); for(i=STARTNEW;i<NbWord;i++) fprintf(file,"0\t0\t%s\t%s\n",T_dico_word[i],T_dico_word[i]); for(i=STARTNEW;i<NbWord;i++) fprintf(file,"0\t1\t%s\t%s\n",T_dico_word[i],T_dico_word[i]); fprintf(file,"0\t2\t%s\t%s\n",CHglouton,CHglouton); fprintf(file,"1\n"); for(i=STARTNEW;i<NbWord;i++) fprintf(file,"2\t0\t%s\t%s\n",T_dico_word[i],T_dico_word[i]); for(i=STARTNEW;i<NbWord;i++) fprintf(file,"2\t1\t%s\t%s\n",T_dico_word[i],T_dico_word[i]); fclose(file); // write dico action sprintf(ch,"%s_dico_action.txt",chprefix); if (!(file=fopen(ch,"wt"))) ERREUR("can't write in:",ch); fprintf(file,"<epsilon> 0\n"); fprintf(file,"%s 1\n",CHglouton); for(i=STARTNEW;i<NbAction;i++) fprintf(file,"%s %d\n",T_dico_action[i],i); fclose(file); // write dico word sprintf(ch,"%s_dico_word.txt",chprefix); if (!(file=fopen(ch,"wt"))) ERREUR("can't write in:",ch); fprintf(file,"<epsilon> 0\n"); fprintf(file,"%s 1\n",CHglouton); for(i=STARTNEW;i<NbWord;i++) fprintf(file,"%s %d\n",T_dico_word[i],i); fclose(file); exit(0); }