/*  Process Rocio XML for SLU  */
/*  FRED 0215  */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>

/*................................................................*/

#define TailleLigne     8000

#define True    1
#define False   0

void ERREUR(char *ch1,char *ch2)
{
fprintf(stderr,"ERREUR : %s %s\n",ch1,ch2);
exit(0);
}

void ERREURd(char *ch1, int i)
{
fprintf(stderr,"ERREUR : %s %d\n",ch1,i);
exit(0);
}

/*................................................................*/

/* FST DICO */

#define MAX_SIZE_DICO	1000
#define IGLOU		1
#define PENALGLOU	100
#define PENALEND	100
#define PENALSUB	10
#define STARTNEW	2

char *CHglouton="<joker>";
char *CHepsilon="<epsilon>";

char *T_dico_action[MAX_SIZE_DICO],*T_dico_word[MAX_SIZE_DICO];
int NbAction=STARTNEW,NbWord=STARTNEW;

int from_action_to_index(char *ch)
{
int i;
for(i=STARTNEW;(i<NbAction)&&(strcmp(ch,T_dico_action[i]));i++);
if (i==NbAction) T_dico_action[NbAction++]=strdup(ch);
if (NbAction==MAX_SIZE_DICO) ERREUR("cste MAX_SIZE_DICO too small","");
return i;
}

int from_word_to_index(char *ch)
{
int i;
for(i=STARTNEW;(i<NbWord)&&(strcmp(ch,T_dico_word[i]));i++);
if (i==NbWord) T_dico_word[NbWord++]=strdup(ch);
if (NbWord==MAX_SIZE_DICO) ERREUR("cste MAX_SIZE_DICO too small","");
return i;
}

/*................................................................*/

/* format:
<homeostasis version="25-11-2014">
    <liste_section sequences="1,2,3,4,5,6,7,8,9" ordre="variable" repetition="oui" action="exclusive">
    <section id="1" action="start_scene1">
        <sequence ordre="strict" repetition="non" action="" lang="eng">
           <keyword action="start_scene1" lang="esp"> uno </keyword>
        </sequence>
        </section>
        <section id="2" action="close_scene1/start_scene2">
            <sequence ordre="strict" repetition="non" action="" lang="esp">
                <keyword action="open_scene2" lang="esp"> dos </keyword>
                <keyword action="open_2A" lang="eng"> open system </keyword>
            </sequence>
          <sequence ordre="strict" repetition="oui" action="" lang="eng">
            <keyword action="start_system_voice" lang="eng"> tell me </keyword>
            <keyword action="open_2C2" lang="eng"> next </keyword>
            <keyword action="open_2D" lang="eng"> install the new version of me </keyword>
            <keyword action="#end" lang="eng"> give me my data </keyword>
        </sequence>
    </section>
*/

#define STRICT		0
#define VARIABLE	1

char *get_field(char *ch, char *attribut, char *chfield)
{
int i,j;
chfield[0]='\0';
for(i=0;(ch[i])&&((ch[i]!=' ')||(strncmp(ch+i+1,attribut,strlen(attribut)))||(ch[i+1+strlen(attribut)]!='='));i++);
if (ch[i])
 {
 for(j=0,i=i+1+strlen(attribut)+2;(ch[i])&&(ch[i]!='"');i++,j++) chfield[j]=ch[i];
 if (ch[i]!='"') ERREUR("bad format1:",ch);
 chfield[j]='\0';
 }
return chfield;
}

char *get_content(char *ch, char *chcontent)
{
int i,j;
chcontent[0]='\0';
for(i=0;(ch[i])&&(ch[i]!='>');i++);
if (!ch[i]) ERREUR("bad format2:",ch);
for(++i;(ch[i])&&((ch[i]==' ')||(ch[i]=='\t'));i++);
for(j=0;(ch[i])&&(ch[i]!='<');i++,j++) chcontent[j]=ch[i];
if (!ch[i]) ERREUR("bad format3:",ch);
chcontent[j]='\0';
for(--j;(j>0)&&((chcontent[j]==' ')||(chcontent[j]=='\t'));j--) chcontent[j]='\0';
return chcontent;
}

void remove_space(char *ch)
{
int i;
for(i=0;ch[i];i++) if (ch[i]==' ') ch[i]='_';
}

/*................................................................*/

#define MAX_FIELD	60000
#define MAX_END_STATE	1000

int main(int argc, char **argv)
{
int nbsection,i,j,k,nb,ordre,repetition,nbseq,actionsection,actionsequence,currentstate,statedebseq,action,findend,t_end[MAX_END_STATE],nbend,idac;
FILE *file;
char *pt,ch[TailleLigne],*t_field[MAX_FIELD],*chprefix,chname[TailleLigne],chfield[TailleLigne],chcontent[TailleLigne];

chprefix=NULL;
if (argc>1)
 for(nb=1;nb<argc;nb++)
  if (!strcmp(argv[nb],"-prefix"))
   {
   if (nb+1==argc) ERREUR("must have a value after argument;",argv[nb]);
   //if (!(file=fopen(argv[++nb],"rt"))) ERREUR("can't open:",argv[nb]);
   chprefix=argv[++nb];
   }
  else
  if (!strcmp(argv[nb],"-h"))
   {
   fprintf(stderr,"Syntax: %s [-h] -prefix <string>\n",argv[0]);
   exit(0);
   }
  else ERREUR("unknown option:",argv[nb]);

if (!chprefix) ERREUR("bad syntax, check '-h'","");

ordre=STRICT; repetition=False; file=NULL; actionsection=0; findend=False; nbend=0; nbseq=0;
for(idac=nbsection=0;fgets(ch,TailleLigne,stdin);)
 {
 if (strstr(ch,"<section"))
  {
  if ((file)&&(nbseq>0))
   {
   if (!findend)
    {
    if (ordre==VARIABLE) { fprintf(stderr,"ERREUR: no exit action on a variable section, in section %d and sequence %d\n",nbsection,nbseq); exit(0); }
    t_end[nbend++]=currentstate;
    }
   if (nbend>0) /* on emet les action fin de section */
    {
    sprintf(chcontent,"action(%d,%d,%d,\"#ENDSEQUENCE(%d)\",\"\")",nbsection,nbseq,idac,nbseq); actionsequence=from_action_to_index(chcontent);
    sprintf(chcontent,"action(%d,%d,%d,\"#ENDSECTION(%d)\",\"\")",nbsection,nbseq,idac,nbsection); actionsection=from_action_to_index(chcontent);
    for(i=0;i<nbend;i++) fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",t_end[i],currentstate+1,actionsequence);
    fprintf(file,"%d\t%d\n",currentstate+1,PENALEND); /* in order to quit on ANY action */
    fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",currentstate+1,currentstate+2,actionsection);
    fprintf(file,"%d\n",currentstate+2);
    currentstate+=2;
    nbend=0;
    }
   fclose(file); file=NULL;
   }
  nbsection++; nbseq=0; currentstate=0; //idac=0;
  get_field(ch,"id",chfield);
  sprintf(ch,"%s_section%s.txt",chprefix,chfield);
  if (!(file=fopen(ch,"wt"))) ERREUR("can't write in:",ch);
  sprintf(ch,"%s_section%s_text.txt",chprefix,chfield);
  get_field(ch,"action",chfield);
  if (chfield[0]) { sprintf(ch,"action(\"%s\",\"\")",chfield); actionsection=from_action_to_index(ch); } else actionsection=0;
  }
 else
 if (strstr(ch,"<sequence")) // <sequence ordre="strict" repetition="oui" action="" lang="eng">
  {
  if ((nbseq!=0)&&(!findend))
   {
   if (ordre==VARIABLE) { fprintf(stderr,"ERREUR: no exit action on a variable section, in section %d and sequence %d\n",nbsection,nbseq); exit(0); }
   t_end[nbend++]=currentstate;
   }
  if (nbend>0)
   {
   sprintf(chcontent,"action(%d,%d,%d,\"#ENDSEQUENCE(%d)\",\"\")",nbsection,nbseq,idac,nbseq); actionsequence=from_action_to_index(chcontent);
   for (i=0;i<nbend;i++) fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",t_end[i],currentstate+1,actionsequence);
   fprintf(file,"%d\n",currentstate+1);
   currentstate++;
   nbend=0;
   }
  statedebseq=currentstate;
  findend=False;
  nbseq++;
  //idac=0;
  get_field(ch,"ordre",chfield);
  if (!strcmp(chfield,"strict")) ordre=STRICT; else
  if (!strcmp(chfield,"variable")) ordre=VARIABLE; else ERREUR("wrong value for attribut ordre:",ch);
  if (!strcmp(get_field(ch,"repetition",chfield),"oui")) repetition=True; else repetition=False;
  sprintf(ch,"%s_sequence%d.txt",chname,nbseq);
  if (actionsection!=0) { fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",currentstate,currentstate+1,actionsection); currentstate++; }
  get_field(ch,"action",chfield);
  if (chfield[0]) { fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",currentstate,currentstate+1,from_action_to_index(chfield)); currentstate++; }
  }
 else
 if (strstr(ch,"<keyword"))
  {
  // <keyword action="open_2D" lang="eng"> install the new version of me </keyword>
  // <keyword action="#end" lang="eng"> give me my data </keyword>
  get_field(ch,"action",chfield);
  if (chfield[0])
   {
   get_content(ch,chcontent);
   sprintf(ch,"action(%d,%d,%d,\"%s\",\"%s\")",nbsection,nbseq,idac++,chfield,chcontent);
   remove_space(ch);
   action=from_action_to_index(ch);
   for(i=1,t_field[0]=strtok(chcontent," ");t_field[i-1];i++) t_field[i]=strtok(NULL," ");
   for(i=0;t_field[i];i++)
    {
    if ((ordre==STRICT)||(i>0))
     {
     fprintf(file,"%d\t%d\t%s\t%d\t0\n",currentstate+i,currentstate+1+i,t_field[i],i==0?action:0); 
     /* we add a possible jump inside a multiword action, except for the first and last word */
     if ((i>0)&&(t_field[i+1])) fprintf(file,"%d\t%d\t%s\t%d\t%d\n",currentstate+i,currentstate+1+i,"<epsilon>",0,PENALSUB);
     }
    else fprintf(file,"%d\t%d\t%s\t%d\t0\n",statedebseq,currentstate+1+i,t_field[i],i==0?action:0);
    from_word_to_index(t_field[i]);
    }
   if (ordre==STRICT)
    {
    if (repetition) fprintf(file,"%d\t%d\t%s\t%d\t0\n",currentstate+i,currentstate+1,t_field[0],action);
    }
   else fprintf(file,"%d\t%d\t<epsilon>\t0\t0\n",currentstate+i,statedebseq);
   /* now we can end at any keyword, but penality if not a valid end !! */
   if (!strcmp(chfield,"#end")) { findend=True; t_end[nbend++]=currentstate+i; if (nbend==MAX_END_STATE) ERREUR("cste MAX_END_STATE too small","");  }
   fprintf(file,"%d\t%d\n",currentstate+i,PENALEND);
   // we add glouton transition if it's a strict section
   if (ordre==STRICT) fprintf(file,"%d\t%d\t%s\t%d\t%d\n",currentstate,currentstate+i,CHglouton,action,PENALGLOU);
   currentstate+=i;
   }
  }
 }

if ((file)&&(nbseq>0))
 {
 if (!findend)
  {
  if (ordre==VARIABLE) { fprintf(stderr,"ERREUR: no exit action on a variable section, in section %d and sequence %d\n",nbsection,nbseq); exit(0); }
  t_end[nbend++]=currentstate;
  }
 if (nbend>0) /* on emet les action fin de section */
  {
  sprintf(chcontent,"action(%d,%d,%d,\"#ENDSEQUENCE(%d)\",\"\")",nbsection,nbseq,idac++,nbseq); actionsequence=from_action_to_index(chcontent);
  sprintf(chcontent,"action(%d,%d,%d,\"#ENDSECTION(%d)\",\"\")",nbsection,nbseq,idac++,nbsection); actionsection=from_action_to_index(chcontent);
  for(i=0;i<nbend;i++) fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",t_end[i],currentstate+1,actionsequence);
  fprintf(file,"%d\t%d\n",currentstate+1,PENALEND); /* in order to quit on ANY action */
  fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",currentstate+1,currentstate+2,actionsection);
  fprintf(file,"%d\n",currentstate+2);
  }
 }

if (file) fclose(file);

// write tail GLOUTON eraser
sprintf(ch,"%s_clean_tail.txt",chprefix);
if (!(file=fopen(ch,"wt"))) ERREUR("can't write in:",ch);
for(i=STARTNEW;i<NbWord;i++) fprintf(file,"0\t0\t%s\t%s\n",T_dico_word[i],T_dico_word[i]);
for(i=STARTNEW;i<NbWord;i++) fprintf(file,"0\t1\t%s\t%s\n",T_dico_word[i],T_dico_word[i]);
fprintf(file,"0\t2\t%s\t%s\n",CHglouton,CHglouton);
fprintf(file,"1\n");
for(i=STARTNEW;i<NbWord;i++) fprintf(file,"2\t0\t%s\t%s\n",T_dico_word[i],T_dico_word[i]);
for(i=STARTNEW;i<NbWord;i++) fprintf(file,"2\t1\t%s\t%s\n",T_dico_word[i],T_dico_word[i]);
fclose(file);

// write dico action
sprintf(ch,"%s_dico_action.txt",chprefix);
if (!(file=fopen(ch,"wt"))) ERREUR("can't write in:",ch);
fprintf(file,"<epsilon> 0\n"); fprintf(file,"%s 1\n",CHglouton);
for(i=STARTNEW;i<NbAction;i++) fprintf(file,"%s %d\n",T_dico_action[i],i);
fclose(file);

// write dico word
sprintf(ch,"%s_dico_word.txt",chprefix);
if (!(file=fopen(ch,"wt"))) ERREUR("can't write in:",ch);
fprintf(file,"<epsilon> 0\n"); fprintf(file,"%s 1\n",CHglouton);
for(i=STARTNEW;i<NbWord;i++) fprintf(file,"%s %d\n",T_dico_word[i],i);
fclose(file);

exit(0);
}