Skip to content
Snippets Groups Projects
Select Git revision
  • 096b59d92c7784ea16d80c330c2a346bb75a9c25
  • master default protected
  • loss
  • producer
4 results

CMakeLists.txt

Blame
  • process_xml_for_slu.c 10.90 KiB
    /*  Process Rocio XML for SLU  */
    /*  FRED 0215  */
    
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <strings.h>
    
    /*................................................................*/
    
    #define TailleLigne     8000
    
    #define True    1
    #define False   0
    
    void ERREUR(char *ch1,char *ch2)
    {
    fprintf(stderr,"ERREUR : %s %s\n",ch1,ch2);
    exit(0);
    }
    
    void ERREURd(char *ch1, int i)
    {
    fprintf(stderr,"ERREUR : %s %d\n",ch1,i);
    exit(0);
    }
    
    /*................................................................*/
    
    /* FST DICO */
    
    #define MAX_SIZE_DICO	1000
    #define IGLOU		1
    #define PENALGLOU	100
    #define PENALEND	100
    #define PENALSUB	10
    #define STARTNEW	2
    
    char *CHglouton="<joker>";
    char *CHepsilon="<epsilon>";
    
    char *T_dico_action[MAX_SIZE_DICO],*T_dico_word[MAX_SIZE_DICO];
    int NbAction=STARTNEW,NbWord=STARTNEW;
    
    int from_action_to_index(char *ch)
    {
    int i;
    for(i=STARTNEW;(i<NbAction)&&(strcmp(ch,T_dico_action[i]));i++);
    if (i==NbAction) T_dico_action[NbAction++]=strdup(ch);
    if (NbAction==MAX_SIZE_DICO) ERREUR("cste MAX_SIZE_DICO too small","");
    return i;
    }
    
    int from_word_to_index(char *ch)
    {
    int i;
    for(i=STARTNEW;(i<NbWord)&&(strcmp(ch,T_dico_word[i]));i++);
    if (i==NbWord) T_dico_word[NbWord++]=strdup(ch);
    if (NbWord==MAX_SIZE_DICO) ERREUR("cste MAX_SIZE_DICO too small","");
    return i;
    }
    
    /*................................................................*/
    
    /* format:
    <homeostasis version="25-11-2014">
        <liste_section sequences="1,2,3,4,5,6,7,8,9" ordre="variable" repetition="oui" action="exclusive">
        <section id="1" action="start_scene1">
            <sequence ordre="strict" repetition="non" action="" lang="eng">
               <keyword action="start_scene1" lang="esp"> uno </keyword>
            </sequence>
            </section>
            <section id="2" action="close_scene1/start_scene2">
                <sequence ordre="strict" repetition="non" action="" lang="esp">
                    <keyword action="open_scene2" lang="esp"> dos </keyword>
                    <keyword action="open_2A" lang="eng"> open system </keyword>
                </sequence>
              <sequence ordre="strict" repetition="oui" action="" lang="eng">
                <keyword action="start_system_voice" lang="eng"> tell me </keyword>
                <keyword action="open_2C2" lang="eng"> next </keyword>
                <keyword action="open_2D" lang="eng"> install the new version of me </keyword>
                <keyword action="#end" lang="eng"> give me my data </keyword>
            </sequence>
        </section>
    */
    
    #define STRICT		0
    #define VARIABLE	1
    
    char *get_field(char *ch, char *attribut, char *chfield)
    {
    int i,j;
    chfield[0]='\0';
    for(i=0;(ch[i])&&((ch[i]!=' ')||(strncmp(ch+i+1,attribut,strlen(attribut)))||(ch[i+1+strlen(attribut)]!='='));i++);
    if (ch[i])
     {
     for(j=0,i=i+1+strlen(attribut)+2;(ch[i])&&(ch[i]!='"');i++,j++) chfield[j]=ch[i];
     if (ch[i]!='"') ERREUR("bad format1:",ch);
     chfield[j]='\0';
     }
    return chfield;
    }
    
    char *get_content(char *ch, char *chcontent)
    {
    int i,j;
    chcontent[0]='\0';
    for(i=0;(ch[i])&&(ch[i]!='>');i++);
    if (!ch[i]) ERREUR("bad format2:",ch);
    for(++i;(ch[i])&&((ch[i]==' ')||(ch[i]=='\t'));i++);
    for(j=0;(ch[i])&&(ch[i]!='<');i++,j++) chcontent[j]=ch[i];
    if (!ch[i]) ERREUR("bad format3:",ch);
    chcontent[j]='\0';
    for(--j;(j>0)&&((chcontent[j]==' ')||(chcontent[j]=='\t'));j--) chcontent[j]='\0';
    return chcontent;
    }
    
    void remove_space(char *ch)
    {
    int i;
    for(i=0;ch[i];i++) if (ch[i]==' ') ch[i]='_';
    }
    
    /*................................................................*/
    
    #define MAX_FIELD	60000
    #define MAX_END_STATE	1000
    
    int main(int argc, char **argv)
    {
    int nbsection,i,j,k,nb,ordre,repetition,nbseq,actionsection,actionsequence,currentstate,statedebseq,action,findend,t_end[MAX_END_STATE],nbend,idac;
    FILE *file;
    char *pt,ch[TailleLigne],*t_field[MAX_FIELD],*chprefix,chname[TailleLigne],chfield[TailleLigne],chcontent[TailleLigne];
    
    chprefix=NULL;
    if (argc>1)
     for(nb=1;nb<argc;nb++)
      if (!strcmp(argv[nb],"-prefix"))
       {
       if (nb+1==argc) ERREUR("must have a value after argument;",argv[nb]);
       //if (!(file=fopen(argv[++nb],"rt"))) ERREUR("can't open:",argv[nb]);
       chprefix=argv[++nb];
       }
      else
      if (!strcmp(argv[nb],"-h"))
       {
       fprintf(stderr,"Syntax: %s [-h] -prefix <string>\n",argv[0]);
       exit(0);
       }
      else ERREUR("unknown option:",argv[nb]);
    
    if (!chprefix) ERREUR("bad syntax, check '-h'","");
    
    ordre=STRICT; repetition=False; file=NULL; actionsection=0; findend=False; nbend=0; nbseq=0;
    for(idac=nbsection=0;fgets(ch,TailleLigne,stdin);)
     {
     if (strstr(ch,"<section"))
      {
      if ((file)&&(nbseq>0))
       {
       if (!findend)
        {
        if (ordre==VARIABLE) { fprintf(stderr,"ERREUR: no exit action on a variable section, in section %d and sequence %d\n",nbsection,nbseq); exit(0); }
        t_end[nbend++]=currentstate;
        }
       if (nbend>0) /* on emet les action fin de section */
        {
        sprintf(chcontent,"action(%d,%d,%d,\"#ENDSEQUENCE(%d)\",\"\")",nbsection,nbseq,idac,nbseq); actionsequence=from_action_to_index(chcontent);
        sprintf(chcontent,"action(%d,%d,%d,\"#ENDSECTION(%d)\",\"\")",nbsection,nbseq,idac,nbsection); actionsection=from_action_to_index(chcontent);
        for(i=0;i<nbend;i++) fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",t_end[i],currentstate+1,actionsequence);
        fprintf(file,"%d\t%d\n",currentstate+1,PENALEND); /* in order to quit on ANY action */
        fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",currentstate+1,currentstate+2,actionsection);
        fprintf(file,"%d\n",currentstate+2);
        currentstate+=2;
        nbend=0;
        }
       fclose(file); file=NULL;
       }
      nbsection++; nbseq=0; currentstate=0; //idac=0;
      get_field(ch,"id",chfield);
      sprintf(ch,"%s_section%s.txt",chprefix,chfield);
      if (!(file=fopen(ch,"wt"))) ERREUR("can't write in:",ch);
      sprintf(ch,"%s_section%s_text.txt",chprefix,chfield);
      get_field(ch,"action",chfield);
      if (chfield[0]) { sprintf(ch,"action(\"%s\",\"\")",chfield); actionsection=from_action_to_index(ch); } else actionsection=0;
      }
     else
     if (strstr(ch,"<sequence")) // <sequence ordre="strict" repetition="oui" action="" lang="eng">
      {
      if ((nbseq!=0)&&(!findend))
       {
       if (ordre==VARIABLE) { fprintf(stderr,"ERREUR: no exit action on a variable section, in section %d and sequence %d\n",nbsection,nbseq); exit(0); }
       t_end[nbend++]=currentstate;
       }
      if (nbend>0)
       {
       sprintf(chcontent,"action(%d,%d,%d,\"#ENDSEQUENCE(%d)\",\"\")",nbsection,nbseq,idac,nbseq); actionsequence=from_action_to_index(chcontent);
       for (i=0;i<nbend;i++) fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",t_end[i],currentstate+1,actionsequence);
       fprintf(file,"%d\n%d\n",currentstate+1,PENALEND);
       currentstate++;
       nbend=0;
       }
      statedebseq=currentstate;
      findend=False;
      nbseq++;
      //idac=0;
      get_field(ch,"ordre",chfield);
      if (!strcmp(chfield,"strict")) ordre=STRICT; else
      if (!strcmp(chfield,"variable")) ordre=VARIABLE; else ERREUR("wrong value for attribut ordre:",ch);
      if (!strcmp(get_field(ch,"repetition",chfield),"oui")) repetition=True; else repetition=False;
      sprintf(ch,"%s_sequence%d.txt",chname,nbseq);
      if (actionsection!=0) { fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",currentstate,currentstate+1,actionsection); currentstate++; }
      get_field(ch,"action",chfield);
      if (chfield[0]) { fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",currentstate,currentstate+1,from_action_to_index(chfield)); currentstate++; }
      }
     else
     if (strstr(ch,"<keyword"))
      {
      // <keyword action="open_2D" lang="eng"> install the new version of me </keyword>
      // <keyword action="#end" lang="eng"> give me my data </keyword>
      get_field(ch,"action",chfield);
      if (chfield[0])
       {
       get_content(ch,chcontent);
       sprintf(ch,"action(%d,%d,%d,\"%s\",\"%s\")",nbsection,nbseq,idac++,chfield,chcontent);
       remove_space(ch);
       action=from_action_to_index(ch);
       for(i=1,t_field[0]=strtok(chcontent," ");t_field[i-1];i++) t_field[i]=strtok(NULL," ");
       for(i=0;t_field[i];i++)
        {
        if ((ordre==STRICT)||(i>0))
         {
         fprintf(file,"%d\t%d\t%s\t%d\t0\n",currentstate+i,currentstate+1+i,t_field[i],i==0?action:0); 
         /* we add a possible jump inside a multiword action, except for the first and last word */
         if ((i>0)&&(t_field[i+1])) fprintf(file,"%d\t%d\t%s\t%d\t%d\n",currentstate+i,currentstate+1+i,"<epsilon>",0,PENALSUB);
         }
        else fprintf(file,"%d\t%d\t%s\t%d\t0\n",statedebseq,currentstate+1+i,t_field[i],i==0?action:0);
        from_word_to_index(t_field[i]);
        }
       if (ordre==STRICT)
        {
        if (repetition) fprintf(file,"%d\t%d\t%s\t%d\t0\n",currentstate+i,currentstate+1,t_field[0],action);
        }
       else fprintf(file,"%d\t%d\t<epsilon>\t0\t0\n",currentstate+i,statedebseq);
       /* now we can end at any keyword, but penality if not a valid end !! */
       if (!strcmp(chfield,"#end")) { findend=True; t_end[nbend++]=currentstate+i; if (nbend==MAX_END_STATE) ERREUR("cste MAX_END_STATE too small","");  }
       fprintf(file,"%d\t%d\n",currentstate+i,PENALEND);
       // we add glouton transition if it's a strict section
       if (ordre==STRICT) fprintf(file,"%d\t%d\t%s\t%d\t%d\n",currentstate,currentstate+i,CHglouton,action,PENALGLOU);
       currentstate+=i;
       }
      }
     }
    
    if ((file)&&(nbseq>0))
     {
     if (!findend)
      {
      if (ordre==VARIABLE) { fprintf(stderr,"ERREUR: no exit action on a variable section, in section %d and sequence %d\n",nbsection,nbseq); exit(0); }
      t_end[nbend++]=currentstate;
      }
     if (nbend>0) /* on emet les action fin de section */
      {
      sprintf(chcontent,"action(%d,%d,%d,\"#ENDSEQUENCE(%d)\",\"\")",nbsection,nbseq,idac++,nbseq); actionsequence=from_action_to_index(chcontent);
      sprintf(chcontent,"action(%d,%d,%d,\"#ENDSECTION(%d)\",\"\")",nbsection,nbseq,idac++,nbsection); actionsection=from_action_to_index(chcontent);
      for(i=0;i<nbend;i++) fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",t_end[i],currentstate+1,actionsequence);
      fprintf(file,"%d\t%d\n",currentstate+1,PENALEND); /* in order to quit on ANY action */
      fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",currentstate+1,currentstate+2,actionsection);
      fprintf(file,"%d\n",currentstate+2);
      }
     }
    
    if (file) fclose(file);
    
    // write tail GLOUTON eraser
    sprintf(ch,"%s_clean_tail.txt",chprefix);
    if (!(file=fopen(ch,"wt"))) ERREUR("can't write in:",ch);
    for(i=STARTNEW;i<NbWord;i++) fprintf(file,"0\t0\t%s\t%s\n",T_dico_word[i],T_dico_word[i]);
    for(i=STARTNEW;i<NbWord;i++) fprintf(file,"0\t1\t%s\t%s\n",T_dico_word[i],T_dico_word[i]);
    fprintf(file,"0\t2\t%s\t%s\n",CHglouton,CHglouton);
    fprintf(file,"1\n");
    for(i=STARTNEW;i<NbWord;i++) fprintf(file,"2\t0\t%s\t%s\n",T_dico_word[i],T_dico_word[i]);
    for(i=STARTNEW;i<NbWord;i++) fprintf(file,"2\t1\t%s\t%s\n",T_dico_word[i],T_dico_word[i]);
    fclose(file);
    
    // write dico action
    sprintf(ch,"%s_dico_action.txt",chprefix);
    if (!(file=fopen(ch,"wt"))) ERREUR("can't write in:",ch);
    fprintf(file,"<epsilon> 0\n"); fprintf(file,"%s 1\n",CHglouton);
    for(i=STARTNEW;i<NbAction;i++) fprintf(file,"%s %d\n",T_dico_action[i],i);
    fclose(file);
    
    // write dico word
    sprintf(ch,"%s_dico_word.txt",chprefix);
    if (!(file=fopen(ch,"wt"))) ERREUR("can't write in:",ch);
    fprintf(file,"<epsilon> 0\n"); fprintf(file,"%s 1\n",CHglouton);
    for(i=STARTNEW;i<NbWord;i++) fprintf(file,"%s %d\n",T_dico_word[i],i);
    fclose(file);
    
    exit(0);
    }