Select Git revision
CMakeLists.txt
-
Franck Dary authoredFranck Dary authored
process_xml_for_slu.c 10.90 KiB
/* Process Rocio XML for SLU */
/* FRED 0215 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
/*................................................................*/
#define TailleLigne 8000
#define True 1
#define False 0
void ERREUR(char *ch1,char *ch2)
{
fprintf(stderr,"ERREUR : %s %s\n",ch1,ch2);
exit(0);
}
void ERREURd(char *ch1, int i)
{
fprintf(stderr,"ERREUR : %s %d\n",ch1,i);
exit(0);
}
/*................................................................*/
/* FST DICO */
#define MAX_SIZE_DICO 1000
#define IGLOU 1
#define PENALGLOU 100
#define PENALEND 100
#define PENALSUB 10
#define STARTNEW 2
char *CHglouton="<joker>";
char *CHepsilon="<epsilon>";
char *T_dico_action[MAX_SIZE_DICO],*T_dico_word[MAX_SIZE_DICO];
int NbAction=STARTNEW,NbWord=STARTNEW;
int from_action_to_index(char *ch)
{
int i;
for(i=STARTNEW;(i<NbAction)&&(strcmp(ch,T_dico_action[i]));i++);
if (i==NbAction) T_dico_action[NbAction++]=strdup(ch);
if (NbAction==MAX_SIZE_DICO) ERREUR("cste MAX_SIZE_DICO too small","");
return i;
}
int from_word_to_index(char *ch)
{
int i;
for(i=STARTNEW;(i<NbWord)&&(strcmp(ch,T_dico_word[i]));i++);
if (i==NbWord) T_dico_word[NbWord++]=strdup(ch);
if (NbWord==MAX_SIZE_DICO) ERREUR("cste MAX_SIZE_DICO too small","");
return i;
}
/*................................................................*/
/* format:
<homeostasis version="25-11-2014">
<liste_section sequences="1,2,3,4,5,6,7,8,9" ordre="variable" repetition="oui" action="exclusive">
<section id="1" action="start_scene1">
<sequence ordre="strict" repetition="non" action="" lang="eng">
<keyword action="start_scene1" lang="esp"> uno </keyword>
</sequence>
</section>
<section id="2" action="close_scene1/start_scene2">
<sequence ordre="strict" repetition="non" action="" lang="esp">
<keyword action="open_scene2" lang="esp"> dos </keyword>
<keyword action="open_2A" lang="eng"> open system </keyword>
</sequence>
<sequence ordre="strict" repetition="oui" action="" lang="eng">
<keyword action="start_system_voice" lang="eng"> tell me </keyword>
<keyword action="open_2C2" lang="eng"> next </keyword>
<keyword action="open_2D" lang="eng"> install the new version of me </keyword>
<keyword action="#end" lang="eng"> give me my data </keyword>
</sequence>
</section>
*/
#define STRICT 0
#define VARIABLE 1
char *get_field(char *ch, char *attribut, char *chfield)
{
int i,j;
chfield[0]='\0';
for(i=0;(ch[i])&&((ch[i]!=' ')||(strncmp(ch+i+1,attribut,strlen(attribut)))||(ch[i+1+strlen(attribut)]!='='));i++);
if (ch[i])
{
for(j=0,i=i+1+strlen(attribut)+2;(ch[i])&&(ch[i]!='"');i++,j++) chfield[j]=ch[i];
if (ch[i]!='"') ERREUR("bad format1:",ch);
chfield[j]='\0';
}
return chfield;
}
char *get_content(char *ch, char *chcontent)
{
int i,j;
chcontent[0]='\0';
for(i=0;(ch[i])&&(ch[i]!='>');i++);
if (!ch[i]) ERREUR("bad format2:",ch);
for(++i;(ch[i])&&((ch[i]==' ')||(ch[i]=='\t'));i++);
for(j=0;(ch[i])&&(ch[i]!='<');i++,j++) chcontent[j]=ch[i];
if (!ch[i]) ERREUR("bad format3:",ch);
chcontent[j]='\0';
for(--j;(j>0)&&((chcontent[j]==' ')||(chcontent[j]=='\t'));j--) chcontent[j]='\0';
return chcontent;
}
void remove_space(char *ch)
{
int i;
for(i=0;ch[i];i++) if (ch[i]==' ') ch[i]='_';
}
/*................................................................*/
#define MAX_FIELD 60000
#define MAX_END_STATE 1000
int main(int argc, char **argv)
{
int nbsection,i,j,k,nb,ordre,repetition,nbseq,actionsection,actionsequence,currentstate,statedebseq,action,findend,t_end[MAX_END_STATE],nbend,idac;
FILE *file;
char *pt,ch[TailleLigne],*t_field[MAX_FIELD],*chprefix,chname[TailleLigne],chfield[TailleLigne],chcontent[TailleLigne];
chprefix=NULL;
if (argc>1)
for(nb=1;nb<argc;nb++)
if (!strcmp(argv[nb],"-prefix"))
{
if (nb+1==argc) ERREUR("must have a value after argument;",argv[nb]);
//if (!(file=fopen(argv[++nb],"rt"))) ERREUR("can't open:",argv[nb]);
chprefix=argv[++nb];
}
else
if (!strcmp(argv[nb],"-h"))
{
fprintf(stderr,"Syntax: %s [-h] -prefix <string>\n",argv[0]);
exit(0);
}
else ERREUR("unknown option:",argv[nb]);
if (!chprefix) ERREUR("bad syntax, check '-h'","");
ordre=STRICT; repetition=False; file=NULL; actionsection=0; findend=False; nbend=0; nbseq=0;
for(idac=nbsection=0;fgets(ch,TailleLigne,stdin);)
{
if (strstr(ch,"<section"))
{
if ((file)&&(nbseq>0))
{
if (!findend)
{
if (ordre==VARIABLE) { fprintf(stderr,"ERREUR: no exit action on a variable section, in section %d and sequence %d\n",nbsection,nbseq); exit(0); }
t_end[nbend++]=currentstate;
}
if (nbend>0) /* on emet les action fin de section */
{
sprintf(chcontent,"action(%d,%d,%d,\"#ENDSEQUENCE(%d)\",\"\")",nbsection,nbseq,idac,nbseq); actionsequence=from_action_to_index(chcontent);
sprintf(chcontent,"action(%d,%d,%d,\"#ENDSECTION(%d)\",\"\")",nbsection,nbseq,idac,nbsection); actionsection=from_action_to_index(chcontent);
for(i=0;i<nbend;i++) fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",t_end[i],currentstate+1,actionsequence);
fprintf(file,"%d\t%d\n",currentstate+1,PENALEND); /* in order to quit on ANY action */
fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",currentstate+1,currentstate+2,actionsection);
fprintf(file,"%d\n",currentstate+2);
currentstate+=2;
nbend=0;
}
fclose(file); file=NULL;
}
nbsection++; nbseq=0; currentstate=0; //idac=0;
get_field(ch,"id",chfield);
sprintf(ch,"%s_section%s.txt",chprefix,chfield);
if (!(file=fopen(ch,"wt"))) ERREUR("can't write in:",ch);
sprintf(ch,"%s_section%s_text.txt",chprefix,chfield);
get_field(ch,"action",chfield);
if (chfield[0]) { sprintf(ch,"action(\"%s\",\"\")",chfield); actionsection=from_action_to_index(ch); } else actionsection=0;
}
else
if (strstr(ch,"<sequence")) // <sequence ordre="strict" repetition="oui" action="" lang="eng">
{
if ((nbseq!=0)&&(!findend))
{
if (ordre==VARIABLE) { fprintf(stderr,"ERREUR: no exit action on a variable section, in section %d and sequence %d\n",nbsection,nbseq); exit(0); }
t_end[nbend++]=currentstate;
}
if (nbend>0)
{
sprintf(chcontent,"action(%d,%d,%d,\"#ENDSEQUENCE(%d)\",\"\")",nbsection,nbseq,idac,nbseq); actionsequence=from_action_to_index(chcontent);
for (i=0;i<nbend;i++) fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",t_end[i],currentstate+1,actionsequence);
fprintf(file,"%d\n%d\n",currentstate+1,PENALEND);
currentstate++;
nbend=0;
}
statedebseq=currentstate;
findend=False;
nbseq++;
//idac=0;
get_field(ch,"ordre",chfield);
if (!strcmp(chfield,"strict")) ordre=STRICT; else
if (!strcmp(chfield,"variable")) ordre=VARIABLE; else ERREUR("wrong value for attribut ordre:",ch);
if (!strcmp(get_field(ch,"repetition",chfield),"oui")) repetition=True; else repetition=False;
sprintf(ch,"%s_sequence%d.txt",chname,nbseq);
if (actionsection!=0) { fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",currentstate,currentstate+1,actionsection); currentstate++; }
get_field(ch,"action",chfield);
if (chfield[0]) { fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",currentstate,currentstate+1,from_action_to_index(chfield)); currentstate++; }
}
else
if (strstr(ch,"<keyword"))
{
// <keyword action="open_2D" lang="eng"> install the new version of me </keyword>
// <keyword action="#end" lang="eng"> give me my data </keyword>
get_field(ch,"action",chfield);
if (chfield[0])
{
get_content(ch,chcontent);
sprintf(ch,"action(%d,%d,%d,\"%s\",\"%s\")",nbsection,nbseq,idac++,chfield,chcontent);
remove_space(ch);
action=from_action_to_index(ch);
for(i=1,t_field[0]=strtok(chcontent," ");t_field[i-1];i++) t_field[i]=strtok(NULL," ");
for(i=0;t_field[i];i++)
{
if ((ordre==STRICT)||(i>0))
{
fprintf(file,"%d\t%d\t%s\t%d\t0\n",currentstate+i,currentstate+1+i,t_field[i],i==0?action:0);
/* we add a possible jump inside a multiword action, except for the first and last word */
if ((i>0)&&(t_field[i+1])) fprintf(file,"%d\t%d\t%s\t%d\t%d\n",currentstate+i,currentstate+1+i,"<epsilon>",0,PENALSUB);
}
else fprintf(file,"%d\t%d\t%s\t%d\t0\n",statedebseq,currentstate+1+i,t_field[i],i==0?action:0);
from_word_to_index(t_field[i]);
}
if (ordre==STRICT)
{
if (repetition) fprintf(file,"%d\t%d\t%s\t%d\t0\n",currentstate+i,currentstate+1,t_field[0],action);
}
else fprintf(file,"%d\t%d\t<epsilon>\t0\t0\n",currentstate+i,statedebseq);
/* now we can end at any keyword, but penality if not a valid end !! */
if (!strcmp(chfield,"#end")) { findend=True; t_end[nbend++]=currentstate+i; if (nbend==MAX_END_STATE) ERREUR("cste MAX_END_STATE too small",""); }
fprintf(file,"%d\t%d\n",currentstate+i,PENALEND);
// we add glouton transition if it's a strict section
if (ordre==STRICT) fprintf(file,"%d\t%d\t%s\t%d\t%d\n",currentstate,currentstate+i,CHglouton,action,PENALGLOU);
currentstate+=i;
}
}
}
if ((file)&&(nbseq>0))
{
if (!findend)
{
if (ordre==VARIABLE) { fprintf(stderr,"ERREUR: no exit action on a variable section, in section %d and sequence %d\n",nbsection,nbseq); exit(0); }
t_end[nbend++]=currentstate;
}
if (nbend>0) /* on emet les action fin de section */
{
sprintf(chcontent,"action(%d,%d,%d,\"#ENDSEQUENCE(%d)\",\"\")",nbsection,nbseq,idac++,nbseq); actionsequence=from_action_to_index(chcontent);
sprintf(chcontent,"action(%d,%d,%d,\"#ENDSECTION(%d)\",\"\")",nbsection,nbseq,idac++,nbsection); actionsection=from_action_to_index(chcontent);
for(i=0;i<nbend;i++) fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",t_end[i],currentstate+1,actionsequence);
fprintf(file,"%d\t%d\n",currentstate+1,PENALEND); /* in order to quit on ANY action */
fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",currentstate+1,currentstate+2,actionsection);
fprintf(file,"%d\n",currentstate+2);
}
}
if (file) fclose(file);
// write tail GLOUTON eraser
sprintf(ch,"%s_clean_tail.txt",chprefix);
if (!(file=fopen(ch,"wt"))) ERREUR("can't write in:",ch);
for(i=STARTNEW;i<NbWord;i++) fprintf(file,"0\t0\t%s\t%s\n",T_dico_word[i],T_dico_word[i]);
for(i=STARTNEW;i<NbWord;i++) fprintf(file,"0\t1\t%s\t%s\n",T_dico_word[i],T_dico_word[i]);
fprintf(file,"0\t2\t%s\t%s\n",CHglouton,CHglouton);
fprintf(file,"1\n");
for(i=STARTNEW;i<NbWord;i++) fprintf(file,"2\t0\t%s\t%s\n",T_dico_word[i],T_dico_word[i]);
for(i=STARTNEW;i<NbWord;i++) fprintf(file,"2\t1\t%s\t%s\n",T_dico_word[i],T_dico_word[i]);
fclose(file);
// write dico action
sprintf(ch,"%s_dico_action.txt",chprefix);
if (!(file=fopen(ch,"wt"))) ERREUR("can't write in:",ch);
fprintf(file,"<epsilon> 0\n"); fprintf(file,"%s 1\n",CHglouton);
for(i=STARTNEW;i<NbAction;i++) fprintf(file,"%s %d\n",T_dico_action[i],i);
fclose(file);
// write dico word
sprintf(ch,"%s_dico_word.txt",chprefix);
if (!(file=fopen(ch,"wt"))) ERREUR("can't write in:",ch);
fprintf(file,"<epsilon> 0\n"); fprintf(file,"%s 1\n",CHglouton);
for(i=STARTNEW;i<NbWord;i++) fprintf(file,"%s %d\n",T_dico_word[i],i);
fclose(file);
exit(0);
}