Skip to content
Snippets Groups Projects
Commit 0d9d0eb2 authored by Mickael Rouvier's avatar Mickael Rouvier
Browse files

Merge branch 'master' of gitlab.lif.univ-mrs.fr:benoit.favre/interface-roccio

parents 943eb17c 4d408837
No related branches found
No related tags found
No related merge requests found
/* Managing a lexicon with IDs */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
/*................................................................*/
#define False 0
#define True 1
#define TailleLigne 4000
#define LIA_MAX_TAILLE_MESSAGE 4000
#define VERBOSE 0
int LIA_AVL_NB_NODE;
void ERREUR(const char *, const char *);
/*................................................................*/
typedef struct
{
char *key_string;
int code;
} type_info;
type_info *new_type_info(char *key, int code)
{
type_info *pt;
pt=(type_info *)malloc(sizeof(type_info));
if (key) pt->key_string=strdup(key); else pt->key_string=NULL;
pt->code=code;
return pt;
}
void free_type_info(type_info *pt)
{
if (pt)
{
if (pt->key_string) free(pt->key_string);
free(pt);
}
}
void print_type_info(type_info* info, int format, FILE *file)
{
if (format==2) fprintf(file,"%d %s\n",info->code,info->key_string);
else fprintf(file,"%s\t%d\n",info->key_string,info->code);
}
/*................................................................*/
/* declaration du type noeud des arbres AVL */
typedef struct lia_avl_type
{
signed char dq;
type_info *info;
struct lia_avl_type *fg,*fd;
} *lia_avl_t;
/*................................................................*/
/* les rotations */
/* parametres :
1- lia_avl_t = racine de l'arbre */
/* retour : True=rotation effectuee / False=rotation impossible */
int lia_rotation_avl_droite(lia_avl_t);
int lia_rotation_avl_gauche(lia_avl_t);
int lia_rotation_avl_gauche_droite(lia_avl_t);
int lia_rotation_avl_droite_gauche(lia_avl_t);
/*................................................................*/
/* l'insertion d'un element */
/* parametres :
1- lia_avl_t = racine de l'arbre
2- char * = info a ajouter a l'arbre
3- int = booleen (True ou False) avec True=ajout avec reequilibrage
4- char * = chaine de caractere recevant la trace de l'ajout */
/* retour : lia_avl_t = racine de l'arbre modifie */
lia_avl_t lia_ajoute_element_avl(lia_avl_t, type_info *, int, char *);
/*................................................................*/
/* la liberation de la place memoire de l'arbre */
/* parametres :
1- lia_avl_t = racine de l'arbre */
void lia_libere_avl(lia_avl_t);
/*................................................................*/
/* la recherche d'un element */
/* parametres :
1- lia_avl_t = racine de l'arbre
2- char * = info a rechercher dans l'arbre
3- int * = nb d'occurence de la chaine */
/* retour : the node containing the info or NULL */
lia_avl_t lia_recherche_avl(lia_avl_t, type_info *);
/*................................................................*/
/* l'affichage de l'arbre */
/* parametres :
1- lia_avl_t = racine de l'arbre a afficher */
void lia_affiche_avl(lia_avl_t);
void lia_affiche_avl_simple(lia_avl_t ,FILE *);
/*................................................................*/
/* copy all the nodes of a tree into an array and sort them
* according to their frequency */
/* parameters:
* 1- lia_avl_t = root of the tree to copy
* 2- int * = return value containing the size of the array */
/* return : the adress of the table containing all the nodes sorted */
lia_avl_t *lia_avl_tree2table_freq(lia_avl_t , int *);
/*................................................................*/
/* binary search, according to the code (or freq) on the table of nodes:
* 1- lia_avl_t = adress of the node table (obtained with lia_avl_tree2table_freq)
* 2- int = size of the table (# of elements)
* 3- int = code or freq looked for */
lia_avl_t lia_avl_code2word(lia_avl_t *, int, int, int*);
/*................................................................*/
/* Info */
/*
typedef struct
{
char *key_string,*field;
} type_info;
*/
int compare_info(type_info *pt1, type_info *pt2)
{
return strcmp(pt1->key_string,pt2->key_string);
}
/*................................................................*/
/* les rotations */
int lia_rotation_avl_droite(lia_avl_t pt)
{
lia_avl_t tmpfgfd,tmpfd;
type_info *tmpinfo;
char tmpdq;
if ((pt==NULL)||(pt->fg==NULL)) return False; /* la rotation n'est pas definie */
/* On echange pt et fg */
tmpinfo=pt->info;
tmpdq=pt->dq;
pt->info=pt->fg->info;
pt->dq=pt->fg->dq;
pt->fg->info=tmpinfo;
pt->fg->dq=tmpdq;
tmpfgfd=pt->fg->fd;
tmpfd=pt->fd;
pt->fd=pt->fg;
pt->fg=pt->fg->fg;
pt->fd->fg=tmpfgfd;
pt->fd->fd=tmpfd;
return True;
}
int lia_rotation_avl_gauche(lia_avl_t pt)
{
lia_avl_t tmpfdfg,tmpfg;
type_info *tmpinfo;
char tmpdq;
if ((pt==NULL)||(pt->fd==NULL)) return False; /* la rotation n'est pas definie */
/* On echange pt et fd */
tmpinfo=pt->info;
tmpdq=pt->dq;
pt->info=pt->fd->info;
pt->dq=pt->fd->dq;
pt->fd->info=tmpinfo;
pt->fd->dq=tmpdq;
tmpfdfg=pt->fd->fg;
tmpfg=pt->fg;
pt->fg=pt->fd;
pt->fd=pt->fd->fd;
pt->fg->fd=tmpfdfg;
pt->fg->fg=tmpfg;
return True;
}
int lia_rotation_avl_gauche_droite(lia_avl_t pt)
{
return ((lia_rotation_avl_gauche(pt->fg))&&(lia_rotation_avl_droite(pt)))?True:False;
}
int lia_rotation_avl_droite_gauche(lia_avl_t pt)
{
return ((lia_rotation_avl_droite(pt->fd))&&(lia_rotation_avl_gauche(pt)))?True:False;
}
/*................................................................*/
/* la creation d'un noeud */
lia_avl_t new_tree_mot_node(type_info *info)
{
lia_avl_t pt;
pt=(lia_avl_t)malloc(sizeof(struct lia_avl_type));
pt->dq=0;
pt->info=info;
pt->fg=pt->fd=NULL;
LIA_AVL_NB_NODE++;
return pt;
}
/*................................................................*/
/* reequilibrage */
int lia_reequilibre_droite(lia_avl_t racine,char *mesg,char *si_modif) /* racine->dq=+2 */
{
char *r_noeud;
if (racine==NULL)
{ if (VERBOSE) sprintf(mesg,"ERREUR : rotation impossible : racine==NULL"); return False; }
if (racine->fg==NULL)
{ if (VERBOSE) sprintf(mesg,"ERREUR : rotation droite impossible : [%s]->fg==NULL",racine->info->key_string); return False; }
r_noeud=racine->info->key_string;
*si_modif=racine->fg->dq==0?0:1;
if (racine->fg->dq>=0) /* 0 ou +1 */
{
if (lia_rotation_avl_droite(racine))
{
if (VERBOSE) sprintf(mesg+strlen(mesg)," rotation droite sur le noeud [%s]",r_noeud);
if (racine->dq==1) racine->dq=racine->fd->dq=0; else { racine->dq=-1; racine->fd->dq=1; }
return True;
}
else
if (VERBOSE) sprintf(mesg,"ERREUR : rotation droite impossible sur le noeud [%s]",racine->info->key_string);
}
else
{
if (lia_rotation_avl_gauche_droite(racine))
{
if (VERBOSE) sprintf(mesg+strlen(mesg)," rotation gauche-droite sur le noeud [%s]",r_noeud);
switch (racine->dq)
{
case 1 : racine->fg->dq=0; racine->fd->dq=-1; break;
case -1 : racine->fg->dq=1; racine->fd->dq= 0; break;
case 0 : racine->fg->dq=racine->fd->dq=0; break;
}
racine->dq=0;
return True;
}
else
if (VERBOSE) sprintf(mesg,"ERREUR : gauche-droite impossible sur le noeud [%s]",racine->info->key_string);
}
return False;
}
int lia_reequilibre_gauche(lia_avl_t racine,char *mesg,char *si_modif) /* racine->dq=-2 */
{
char *r_noeud;
if (racine==NULL)
{ if (VERBOSE) sprintf(mesg,"ERREUR : rotation impossible : racine==NULL"); return False; }
if (racine->fd==NULL)
{ if (VERBOSE) sprintf(mesg,"ERREUR : rotation gauche impossible : [%s]->fd==NULL",racine->info->key_string); return False; }
r_noeud=racine->info->key_string;
*si_modif=racine->fd->dq==0?0:1;
if (racine->fd->dq<1) /* -1 ou 0 */
{
if (lia_rotation_avl_gauche(racine))
{
if (VERBOSE) sprintf(mesg+strlen(mesg)," rotation gauche sur le noeud [%s]",r_noeud);
if (racine->dq==-1) racine->dq=racine->fg->dq=0; else { racine->dq=1; racine->fg->dq=-1; }
return True;
}
else
if (VERBOSE) sprintf(mesg,"ERREUR : rotation gauche impossible sur le noeud [%s]",racine->info->key_string);
}
else
{
if (lia_rotation_avl_droite_gauche(racine))
{
if (VERBOSE) sprintf(mesg+strlen(mesg)," rotation droite-gauche sur le noeud [%s]",r_noeud);
switch (racine->dq)
{
case 1 : racine->fd->dq=-1; racine->fg->dq= 0; break;
case -1 : racine->fd->dq= 0; racine->fg->dq= 1; break;
case 0 : racine->fg->dq=racine->fd->dq=0; break;
}
racine->dq=0;
return True;
}
else
if (VERBOSE) sprintf(mesg,"ERREUR : droite-gauche impossible sur le noeud [%s]",racine->info->key_string);
}
return False;
}
/*................................................................*/
/* l'insertion d'un element */
lia_avl_t lia_insere_avl(lia_avl_t racine, type_info *info, char *si_augm,int avec_reequilibrage,char *mesg)
{
int comp;
if (racine==NULL) { *si_augm=1; return new_tree_mot_node(info); }
comp=compare_info(racine->info,info);
if (comp==0)
{
/* message -> le noeud est deja dans l'arbre */
if (VERBOSE) sprintf(mesg,"noeud [%s] deja present",info->key_string);
*si_augm=0;
}
else
if (comp>0)
{ /* sur le fils gauche */
racine->fg=lia_insere_avl(racine->fg,info,si_augm,avec_reequilibrage,mesg);
if (*si_augm)
{
if (racine->dq<0) *si_augm=0;
racine->dq++;
}
/* eventuelle rotation */
if ((avec_reequilibrage)&&(racine->dq==2))
{ lia_reequilibre_droite(racine,mesg,si_augm); *si_augm=0; }
}
else
{ /* sur le fils droit */
racine->fd=lia_insere_avl(racine->fd,info,si_augm,avec_reequilibrage,mesg);
if (*si_augm)
{
if (racine->dq>0) *si_augm=0;
racine->dq--;
}
/* eventuelle rotation */
if ((avec_reequilibrage)&&(racine->dq==-2))
{ lia_reequilibre_gauche(racine,mesg,si_augm); *si_augm=0; }
}
return racine;
}
lia_avl_t lia_ajoute_element_avl(lia_avl_t racine, type_info *info, int avec_reequilibrage, char *mesg)
{
char si_augm;
if (VERBOSE) mesg[0]='\0';
return lia_insere_avl(racine,info,&si_augm,avec_reequilibrage,mesg);
}
/*................................................................*/
/* la liberation de la place memoire de l'arbre */
void lia_libere_avl(lia_avl_t racine)
{
if (racine)
{
lia_libere_avl(racine->fg);
lia_libere_avl(racine->fd);
free_type_info(racine->info);
free(racine);
}
}
/*................................................................*/
/* la recherche d'un element */
lia_avl_t lia_recherche_avl(lia_avl_t racine, type_info *info)
{
int comp;
if (racine==NULL) return NULL;
if ((comp=compare_info(racine->info,info))==0) return racine;
if (comp>0) return lia_recherche_avl(racine->fg,info);
else return lia_recherche_avl(racine->fd,info);
}
/*................................................................*/
/* l'affichage de l'arbre */
void lia_affiche_avl_simple(lia_avl_t racine,FILE *file)
{
if (racine)
{
lia_affiche_avl_simple(racine->fg,file);
print_type_info(racine->info,1,file);
lia_affiche_avl_simple(racine->fd,file);
}
}
/*................................................................*/
/* copy all the nodes of a tree into an array and sort them
* according to their frequency */
int compare_freq(const void *a, const void *b)
{
lia_avl_t *c,*d;
c=(lia_avl_t *)a;
d=(lia_avl_t *)b;
return ((*d)->info->code-(*c)->info->code);
}
void copy_tree2table(lia_avl_t racine, lia_avl_t *tabl, int *i)
{
if (racine!=NULL)
{
tabl[(*i)++]=racine;
copy_tree2table(racine->fg,tabl,i);
copy_tree2table(racine->fd,tabl,i);
}
}
int lia_avl_size(lia_avl_t racine)
{
if (racine==NULL) return 0;
else return 1 + lia_avl_size(racine->fg) + lia_avl_size(racine->fd);
}
lia_avl_t *lia_avl_tree2table_freq(lia_avl_t racine, int *nb)
{
lia_avl_t *tabl;
int i;
*nb=lia_avl_size(racine);
tabl=(lia_avl_t *)malloc(sizeof(lia_avl_t)*(*nb));
i=0;
copy_tree2table(racine,tabl,&i);
qsort(tabl,*nb,sizeof(lia_avl_t),compare_freq);
return tabl;
}
lia_avl_t lia_avl_code2word(lia_avl_t *tabl, int nb, int code, int *index)
{
struct lia_avl_type tkey;
lia_avl_t *resu,key;
type_info info;
info.code=code;
tkey.info=&info;
key=(lia_avl_t)(&tkey);
resu=(lia_avl_t *)bsearch(&key,tabl,nb,sizeof(lia_avl_t),compare_freq);
if (resu) *index=resu-tabl; else *index=0;
return resu?*resu:NULL;
}
/*................................................................*/
/* managing lexicon */
#define MAX_LEXICON_AVL 100
lia_avl_t T_avl_lexicon[MAX_LEXICON_AVL];
lia_avl_t *T_tabl_avl_lexicon[MAX_LEXICON_AVL];
int T_tabl_avl_lexicon_size[MAX_LEXICON_AVL];
int T_avl_lexicon_max_code[MAX_LEXICON_AVL];
int Nb_Avl_Lexicon=0;
int load_lexicon(char *filename)
{
FILE *file;
static char ch[TailleLigne],*pt;
int code=0;
if (Nb_Avl_Lexicon==MAX_LEXICON_AVL) ERREUR("cste 'MAX_LEXICON_AVL' too small","");
T_avl_lexicon_max_code[Nb_Avl_Lexicon]=0;
if (!(file=fopen(filename,"rt"))) ERREUR("can't open:",filename);
for (T_avl_lexicon[Nb_Avl_Lexicon]=NULL;fgets(ch,TailleLigne,file);)
{
pt=strtok(ch," \t\n"); if (pt) pt=strtok(NULL," \t\n");
if (!pt) code++; else if (sscanf(pt,"%d",&code)!=1) ERREUR("bad format in:",filename);
if (code>T_avl_lexicon_max_code[Nb_Avl_Lexicon]) T_avl_lexicon_max_code[Nb_Avl_Lexicon]=code;
T_avl_lexicon[Nb_Avl_Lexicon]=lia_ajoute_element_avl(T_avl_lexicon[Nb_Avl_Lexicon],new_type_info(ch,code),True,NULL);
}
T_tabl_avl_lexicon[Nb_Avl_Lexicon]=lia_avl_tree2table_freq(T_avl_lexicon[Nb_Avl_Lexicon],
&(T_tabl_avl_lexicon_size[Nb_Avl_Lexicon]));
fclose(file);
return Nb_Avl_Lexicon++;
}
int load_lexicon_inv(char *filename)
{
FILE *file;
static char ch[TailleLigne],*ptword,*ptcode;
int code=0;
if (Nb_Avl_Lexicon==MAX_LEXICON_AVL) ERREUR("cste 'MAX_LEXICON_AVL' too small","");
T_avl_lexicon_max_code[Nb_Avl_Lexicon]=0;
if (!(file=fopen(filename,"rt"))) ERREUR("can't open:",filename);
for (T_avl_lexicon[Nb_Avl_Lexicon]=NULL;fgets(ch,TailleLigne,file);)
{
ptcode=strtok(ch," \t\n"); if (ptcode) ptword=strtok(NULL," \t\n"); else ERREUR("bad inv (code word) format:",ch);
if (sscanf(ptcode,"%d",&code)!=1) ERREUR("bad format in:",filename);
if (code>T_avl_lexicon_max_code[Nb_Avl_Lexicon]) T_avl_lexicon_max_code[Nb_Avl_Lexicon]=code;
T_avl_lexicon[Nb_Avl_Lexicon]=lia_ajoute_element_avl(T_avl_lexicon[Nb_Avl_Lexicon],new_type_info(ptword,code),True,NULL);
}
T_tabl_avl_lexicon[Nb_Avl_Lexicon]=lia_avl_tree2table_freq(T_avl_lexicon[Nb_Avl_Lexicon],
&(T_tabl_avl_lexicon_size[Nb_Avl_Lexicon]));
fclose(file);
return Nb_Avl_Lexicon++;
}
int new_lexicon()
{
if (Nb_Avl_Lexicon==MAX_LEXICON_AVL) ERREUR("cste 'MAX_LEXICON_AVL' too small","");
T_avl_lexicon_max_code[Nb_Avl_Lexicon]=0;
T_tabl_avl_lexicon_size[Nb_Avl_Lexicon]=0;
T_avl_lexicon[Nb_Avl_Lexicon]=NULL;
T_tabl_avl_lexicon[Nb_Avl_Lexicon]=NULL;
return Nb_Avl_Lexicon++;
}
int add_word_lexicon(int lexid, char *word, int code)
{
if (code>T_avl_lexicon_max_code[lexid]) T_avl_lexicon_max_code[lexid]=code;
T_avl_lexicon[lexid]=lia_ajoute_element_avl(T_avl_lexicon[lexid],new_type_info(word,code),True,NULL);
T_tabl_avl_lexicon_size[lexid]++;
return True;
}
void lexicon_sort_code(int lexid)
{
if (T_tabl_avl_lexicon[lexid]) free(T_tabl_avl_lexicon[lexid]);
T_tabl_avl_lexicon[lexid]=lia_avl_tree2table_freq(T_avl_lexicon[lexid],&(T_tabl_avl_lexicon_size[lexid]));
}
void print_lexicon_sort_code(int lexid, int format, FILE *file)
{
int i;
if (T_tabl_avl_lexicon[lexid]) free(T_tabl_avl_lexicon[lexid]);
T_tabl_avl_lexicon[lexid]=lia_avl_tree2table_freq(T_avl_lexicon[lexid],&(T_tabl_avl_lexicon_size[lexid]));
for(i=T_tabl_avl_lexicon_size[lexid]-1;i>=0;i--) print_type_info(T_tabl_avl_lexicon[lexid][i]->info,format,file);
}
void delete_lexicon(int lexid)
{
if (T_avl_lexicon[lexid]) lia_libere_avl(T_avl_lexicon[lexid]);
if (T_tabl_avl_lexicon[lexid]) free(T_tabl_avl_lexicon[lexid]);
T_avl_lexicon[lexid]=NULL;
T_tabl_avl_lexicon[lexid]=NULL;
}
/*................................................................*/
int code2word(int lexid, int code, char **word)
{
lia_avl_t resu;
int index;
if (code<0) return False;
resu=lia_avl_code2word(T_tabl_avl_lexicon[lexid],T_tabl_avl_lexicon_size[lexid],code,&index);
if (resu) *word=resu->info->key_string;
return resu?True:False;
}
int code2index(int lexid, int code, int *index)
{
lia_avl_t resu;
if (code<0) return False;
resu=lia_avl_code2word(T_tabl_avl_lexicon[lexid],T_tabl_avl_lexicon_size[lexid],code,index);
if ((0)&&(resu))
{
printf("code2index: code=%d index=%d\n",code,*index);
}
return resu?True:False;
}
int index2code(int lexid, int index, int *code)
{
if ((index<0)||(index>=T_tabl_avl_lexicon_size[lexid])) return False;
if (0) printf("index2code: index=%d code=%d\n",index,T_tabl_avl_lexicon[lexid][index]->info->code);
*code=(T_tabl_avl_lexicon[lexid][index])->info->code;
return True;
}
int index2word(int lexid, int index, char **word)
{
if ((index<0)||(index>=T_tabl_avl_lexicon_size[lexid])) return False;
*word=(T_tabl_avl_lexicon[lexid][index])->info->key_string;
return True;
}
int word2code(int lexid, char *word, int *code)
{
lia_avl_t resu;
type_info info;
info.key_string=word;
resu=lia_recherche_avl(T_avl_lexicon[lexid],&info);
if ((resu)&&(code)) *code=resu->info->code;
return resu?True:False;
}
int addcode2word(int lexid, char *word, int code)
{
lia_avl_t resu;
type_info info;
info.key_string=word;
resu=lia_recherche_avl(T_avl_lexicon[lexid],&info);
if (resu) resu->info->code+=code;
return resu?True:False;
}
int size_lexicon(int lexid)
{
return T_tabl_avl_lexicon_size[lexid];
}
int max_code_lexicon(int lexid)
{
return T_avl_lexicon_max_code[lexid];
}
/* Managing a lexicon with IDs */
/* load a lexicon and return a lexicon ID
* - input = filename (char *)
* - output = lexicon ID (int) */
int load_lexicon(char *);
int load_lexicon_inv(char *);
/* delete a lexicon
* - input = lexicon ID (int)
* - output = void */
void delete_lexicon(int);
/* get a string from a code
* - input = lexicon ID (int) + code (int)
* - output = 0 if the code is missing
* 1 if the code is here
* the adress of the word string in (char **) */
int code2word(int,int,char**);
int code2index(int,int,int*);
int index2code(int,int,int*);
int index2word(int,int,char**);
/* get a code from a string
* - input = lexicon ID (int) + word string (char*)
* - output = 0 if the word is not in the lexicon
* 1 if the word is in the lexicon
* the code found in (int*) */
int word2code(int,char*,int*);
int addcode2word(int , char *, int );
int size_lexicon(int);
int max_code_lexicon(int);
int new_lexicon();
int add_word_lexicon(int , char *, int );
void lexicon_sort_code(int);
void print_lexicon_sort_code(int , int , FILE *);
/* SLU for Rocio XML */
/* FRED 0215 */
#include <string>
#include <vector>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include "librocio_slu.h"
extern "C" {
#include "lia_liblex.h"
/*................................................................*/
#define TailleLigne 80000
#define True 1
#define False 0
void ERREUR(const char *ch1, const char *ch2)
{
fprintf(stderr,"ERREUR : %s %s\n",ch1,ch2);
exit(0);
}
void ERREURd(const char *ch1, int i)
{
fprintf(stderr,"ERREUR : %s %d\n",ch1,i);
exit(0);
}
/*................................................................*/
#define MAX_FIELD 60000
static const char *CHglouton="<joker>";
static const char *CHepsilon="<epsilon>";
#define IEPSILON 0
#define IGLOU 1
#define PENALEPSILON 50
#define PENALGLOU 100
#define WINLENGTH 30
typedef struct
{
int index;
char select;
} type_outword;
/*................................................................*/
fst::StdVectorFst *build_fst_words(slu_t* slu, char **words, int num_words, int lexidword, char *prevword, type_outword *t_outword, int *nbword)
{
int i, nb,numstate,code,uncertain,deca;
fst::StdVectorFst *input;
char *pt;
input = new fst::StdVectorFst;
input->AddState();
input->SetStart(0);
numstate=nb=0;
/* add the previous words */
if (prevword)
{
for(pt=strtok(prevword," \t\n");pt;pt=strtok(NULL," \t\n"))
{
if (!strncmp(pt,"**",2)) { uncertain=True; deca=2; t_outword[nb].select=3; } else { uncertain=False; deca=0; t_outword[nb].select=1; }
if (word2code(lexidword,pt+deca,&code))
{
t_outword[nb++].index=code; if (nb==MAX_FIELD) ERREUR("cste MAX_FIELD too small","");
input->AddState();
input->AddArc(numstate,fst::StdArc(code,code,0,numstate+1));
if (uncertain)
{
input->AddArc(numstate,fst::StdArc(code,IGLOU,PENALGLOU,numstate+1));
input->AddArc(numstate,fst::StdArc(code,IEPSILON,PENALEPSILON,numstate+1));
}
numstate++;
}
}
} else if(slu->words->size() > 0) { // add support for memorized words
for(size_t i = 0; i < slu->words->size(); i++) {
char* pt = (*slu->words)[i];
if (!strncmp(pt,"**",2)) { uncertain=True; deca=2; t_outword[nb].select=3; } else { uncertain=False; deca=0; t_outword[nb].select=1; }
if (word2code(lexidword,pt+deca,&code))
{
t_outword[nb++].index=code; if (nb==MAX_FIELD) ERREUR("cste MAX_FIELD too small","");
input->AddState();
input->AddArc(numstate,fst::StdArc(code,code,0,numstate+1));
if (uncertain)
{
input->AddArc(numstate,fst::StdArc(code,IGLOU,PENALGLOU,numstate+1));
input->AddArc(numstate,fst::StdArc(code,IEPSILON,PENALEPSILON,numstate+1));
}
numstate++;
}
}
}
/* now the new words */
for(i = 0; i < num_words; i++) {
if (word2code(lexidword,words[i],&code))
{
t_outword[nb].select=3;
t_outword[nb++].index=code; if (nb==MAX_FIELD) ERREUR("cste MAX_FIELD too small","");
input->AddState();
input->AddArc(numstate,fst::StdArc(code,IEPSILON,PENALEPSILON,numstate+1));
input->AddArc(numstate,fst::StdArc(code,IGLOU,PENALGLOU,numstate+1));
input->AddArc(numstate,fst::StdArc(code,code,0,numstate+1));
numstate++;
}
//else fprintf(stderr,"Warning: [%s] is unknown, discared\n",words[i]);
}
input->SetFinal(numstate,0);
*nbword=nb;
return input;
}
void run_process(slu_t* slu, char** words, int num_words, int prevn, char *prevword)
{
fst::StdVectorFst *input,result1,result2,result3;
char *ch;
static int *tocc,i,j,nb,nbac,nbword;
type_outword *t_outword;
t_outword=(type_outword *)malloc(sizeof(type_outword)*MAX_FIELD);
tocc=(int*)malloc(sizeof(int)*(max_code_lexicon(slu->lexidaction)+1));
for(i=0;i<max_code_lexicon(slu->lexidaction);i++) tocc[i]=0;
input=build_fst_words(slu, words, num_words, slu->lexidword,prevword,t_outword,&nbword);
//fprintf(stderr, "size of input = %d\n", input->NumStates());
fst::ArcSort(input, fst::StdOLabelCompare());
fst::Compose(*input, *(slu->fstClean), &result1);
//fprintf(stderr, "size of result1 = %d\n", result1.NumStates());
fst::ArcSort(&result1, fst::StdOLabelCompare());
//fprintf(stderr, "size of model = %d\n", slu->fstModel->NumStates());
fst::Compose(result1,*(slu->fstModel),&result2);
//fprintf(stderr, "size of result2 = %d\n", result2.NumStates());
fst::ShortestPath(result2,&result3,1,false);
fst::TopSort(&result3);
delete input;
// reset actions
for(size_t i = 0; i < slu->actions->size(); i++) {
free((*slu->actions)[i]);
}
slu->actions->clear();
// process nbest
fst::StdVectorFst::StateId start = result3.Start();
if ((int)start>=0)
{
for(fst::ArcIterator<fst::StdVectorFst> aiter(result3, start);!aiter.Done(); aiter.Next())
{
const fst::StdArc arc = aiter.Value(); // this arc is an epsilon arc leading to the next path
nbword=0;
if (arc.ilabel>0)
{
t_outword[nbword].index=arc.ilabel;
if ((arc.olabel==0)&&(arc.weight!=0)) t_outword[nbword].select=0; else
if (arc.weight==0) t_outword[nbword].select=1; else t_outword[nbword].select=2;
nbword++;
}
if (arc.olabel>=2)
{
nb=nbac=1;
if (!code2word(slu->lexidaction,arc.olabel,&ch)) ERREURd("unknown action code:",arc.olabel);
tocc[arc.olabel]++;
if (nbac>prevn) {
slu->actions->push_back(strdup(ch));
//printf("%s[%d]=>%d\n",ch,tocc[arc.olabel],nbac);
}
}
int64 state = arc.nextstate;
while(result3.Final(state) == fst::StdArc::Weight::Zero())
{
const fst::StdArc nextArc = fst::ArcIterator<fst::StdVectorFst>(result3, state).Value();
nb++;
if (nextArc.ilabel>0)
{
t_outword[nbword].index=nextArc.ilabel;
if ((nextArc.olabel==0)&&(nextArc.weight!=0)) t_outword[nbword].select=0; else
if (nextArc.weight==0) t_outword[nbword].select=1; else t_outword[nbword].select=2;
nbword++; if (nbword==MAX_FIELD) ERREUR("cste MAX_SIZE_MESG too small","");
}
if (nextArc.olabel>=2)
{
nbac=nb;
if (!code2word(slu->lexidaction,nextArc.olabel,&ch)) ERREURd("unknown action code:",nextArc.olabel);
tocc[nextArc.olabel]++;
if (nbac>prevn) {
slu->actions->push_back(strdup(ch));
//printf("%s[%d]=>%d\n",ch,tocc[nextArc.olabel],nbac);
}
}
state = nextArc.nextstate;
}
}
}
//printf("STRING:");
for(size_t i = 0; i < slu->words->size(); i++) free((*slu->words)[i]);
slu->words->clear();
for(j=nbword-1;(j>0)&&(t_outword[j].select==0);j--) t_outword[j].select=3;
for(i=0;i<=j;i++) if (t_outword[i].select!=0)
{
if (!code2word(slu->lexidword,t_outword[i].index,&ch)) ERREURd("unknown word code:",t_outword[i].index);
if (t_outword[i].select==2) {
//printf(" %s",CHglouton);
slu->words->push_back(strdup(CHglouton));
} else if (t_outword[i].select==3) {
//printf(" **%s",ch);
char buffer[strlen(ch) + 3];
buffer[0] = buffer[1] = '*';
strcpy(buffer + 2, ch);
slu->words->push_back(strdup(buffer));
} else {
//printf(" %s",ch);
slu->words->push_back(strdup(ch));
}
}
/* now we limit to a window of WINLENGTH unmatched words */
if (nbword-WINLENGTH>j) j=nbword-WINLENGTH; else j++;
for(;j<nbword;j++)
{
if (!code2word(slu->lexidword,t_outword[j].index,&ch)) ERREURd("unknown word code:",t_outword[j].index);
char buffer[strlen(ch) + 3];
buffer[0] = buffer[1] = '*';
strcpy(buffer + 2, ch);
slu->words->push_back(strdup(buffer));
//printf(" **%s",ch);
}
//printf("\n");
free(tocc); free(t_outword);
}
/*................................................................*/
slu_t* init_slu(char* chfileword, char* chfileaction, char* chfilemodel, char* chfileclean) {
slu_t* slu = (slu_t*) malloc(sizeof(slu_t));
slu->lexidword = load_lexicon(chfileword);
//fprintf(stderr, "lexidword = %d\n", slu->lexidword);
slu->lexidaction = load_lexicon(chfileaction);
//fprintf(stderr, "lexidaction = %d\n", slu->lexidaction);
slu->fstModel = fst::StdVectorFst::Read(chfilemodel);
slu->fstClean = fst::StdVectorFst::Read(chfileclean);
slu->actions = new std::vector<char*>();
slu->words = new std::vector<char*>();
return slu;
}
void free_slu(slu_t* slu) {
delete slu->fstClean;
delete slu->fstModel;
for(size_t i = 0; i < slu->actions->size(); i++) free((*slu->actions)[i]);
delete slu->actions;
for(size_t i = 0; i < slu->words->size(); i++) free((*slu->words)[i]);
delete slu->words;
free(slu);
}
int num_actions(slu_t* slu) {
return slu->actions->size();
}
char* get_action(slu_t* slu, size_t index) {
if(index >= 0 && index < slu->actions->size()) return (*slu->actions)[index];
return NULL;
}
int run_slu(slu_t* slu, char** words, int num_words, int prevn, char *prevword) {
//fprintf(stderr, "before process\n");
run_process(slu, words, num_words, prevn,prevword);
//fprintf(stderr, "after process\n");
return num_actions(slu);
}
}
#ifndef __ROCIO_SLU__
#define __ROCIO_SLU__
#include <vector>
#include <fst/fstlib.h>
extern "C" {
typedef struct {
int lexidword;
int lexidaction;
fst::StdVectorFst* fstModel;
fst::StdVectorFst* fstClean;
std::vector<char*>* actions;
std::vector<char*>* words;
} slu_t;
slu_t* init_slu(char* chfileword, char* chfileaction, char* chfilemodel, char* chfileclean);
int run_slu(slu_t* slu, char** words, int num_words, int prevn, char *prevword);
int num_actions(slu_t* slu);
char* get_action(slu_t* slu, size_t index);
void free_slu(slu_t* slu);
void ERREUR(const char *ch1, const char *ch2);
}
#endif
/* Process Rocio XML for SLU */
/* FRED 0215 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
/*................................................................*/
#define TailleLigne 8000
#define True 1
#define False 0
void ERREUR(char *ch1,char *ch2)
{
fprintf(stderr,"ERREUR : %s %s\n",ch1,ch2);
exit(0);
}
void ERREURd(char *ch1, int i)
{
fprintf(stderr,"ERREUR : %s %d\n",ch1,i);
exit(0);
}
/*................................................................*/
/* FST DICO */
#define MAX_SIZE_DICO 1000
#define IGLOU 1
#define PENALGLOU 100
#define PENALEND 100
#define STARTNEW 2
char *CHglouton="<joker>";
char *CHepsilon="<epsilon>";
char *T_dico_action[MAX_SIZE_DICO],*T_dico_word[MAX_SIZE_DICO];
int NbAction=STARTNEW,NbWord=STARTNEW;
int from_action_to_index(char *ch)
{
int i;
for(i=STARTNEW;(i<NbAction)&&(strcmp(ch,T_dico_action[i]));i++);
if (i==NbAction) T_dico_action[NbAction++]=strdup(ch);
if (NbAction==MAX_SIZE_DICO) ERREUR("cste MAX_SIZE_DICO too small","");
return i;
}
int from_word_to_index(char *ch)
{
int i;
for(i=STARTNEW;(i<NbWord)&&(strcmp(ch,T_dico_word[i]));i++);
if (i==NbWord) T_dico_word[NbWord++]=strdup(ch);
if (NbWord==MAX_SIZE_DICO) ERREUR("cste MAX_SIZE_DICO too small","");
return i;
}
/*................................................................*/
/* format:
<homeostasis version="25-11-2014">
<liste_section sequences="1,2,3,4,5,6,7,8,9" ordre="variable" repetition="oui" action="exclusive">
<section id="1" action="start_scene1">
<sequence ordre="strict" repetition="non" action="" lang="eng">
<keyword action="start_scene1" lang="esp"> uno </keyword>
</sequence>
</section>
<section id="2" action="close_scene1/start_scene2">
<sequence ordre="strict" repetition="non" action="" lang="esp">
<keyword action="open_scene2" lang="esp"> dos </keyword>
<keyword action="open_2A" lang="eng"> open system </keyword>
</sequence>
<sequence ordre="strict" repetition="oui" action="" lang="eng">
<keyword action="start_system_voice" lang="eng"> tell me </keyword>
<keyword action="open_2C2" lang="eng"> next </keyword>
<keyword action="open_2D" lang="eng"> install the new version of me </keyword>
<keyword action="#end" lang="eng"> give me my data </keyword>
</sequence>
</section>
*/
#define STRICT 0
#define VARIABLE 1
char *get_field(char *ch, char *attribut, char *chfield)
{
int i,j;
chfield[0]='\0';
for(i=0;(ch[i])&&((ch[i]!=' ')||(strncmp(ch+i+1,attribut,strlen(attribut)))||(ch[i+1+strlen(attribut)]!='='));i++);
if (ch[i])
{
for(j=0,i=i+1+strlen(attribut)+2;(ch[i])&&(ch[i]!='"');i++,j++) chfield[j]=ch[i];
if (ch[i]!='"') ERREUR("bad format1:",ch);
chfield[j]='\0';
}
return chfield;
}
char *get_content(char *ch, char *chcontent)
{
int i,j;
chcontent[0]='\0';
for(i=0;(ch[i])&&(ch[i]!='>');i++);
if (!ch[i]) ERREUR("bad format2:",ch);
for(++i;(ch[i])&&((ch[i]==' ')||(ch[i]=='\t'));i++);
for(j=0;(ch[i])&&(ch[i]!='<');i++,j++) chcontent[j]=ch[i];
if (!ch[i]) ERREUR("bad format3:",ch);
chcontent[j]='\0';
for(--j;(j>0)&&((chcontent[j]==' ')||(chcontent[j]=='\t'));j--) chcontent[j]='\0';
return chcontent;
}
void remove_space(char *ch)
{
int i;
for(i=0;ch[i];i++) if (ch[i]==' ') ch[i]='_';
}
/*................................................................*/
#define MAX_FIELD 60
#define MAX_END_STATE 1000
int main(int argc, char **argv)
{
int nbsection,i,j,nb,ordre,repetition,nbseq,actionsection,actionsequence,currentstate,statedebseq,action,statetext,findend,t_end[MAX_END_STATE],nbend;
FILE *file,*filetext;
char *pt,ch[TailleLigne],*t_field[MAX_FIELD],*chprefix,chname[TailleLigne],chfield[TailleLigne],chcontent[TailleLigne];
chprefix=NULL;
if (argc>1)
for(nb=1;nb<argc;nb++)
if (!strcmp(argv[nb],"-prefix"))
{
if (nb+1==argc) ERREUR("must have a value after argument;",argv[nb]);
//if (!(file=fopen(argv[++nb],"rt"))) ERREUR("can't open:",argv[nb]);
chprefix=argv[++nb];
}
else
if (!strcmp(argv[nb],"-h"))
{
fprintf(stderr,"Syntax: %s [-h] -prefix <string>\n",argv[0]);
exit(0);
}
else ERREUR("unknown option:",argv[nb]);
if (!chprefix) ERREUR("bad syntax, check '-h'","");
ordre=STRICT; repetition=False; file=filetext=NULL; actionsection=0; findend=False; nbend=0; nbseq=0;
for(nbsection=0;fgets(ch,TailleLigne,stdin);)
{
if (strstr(ch,"<section"))
{
if (filetext) { fprintf(filetext,"%d\n",statetext); fclose(filetext); }
if ((file)&&(nbseq>0))
{
if (!findend)
{
if (ordre==VARIABLE) { fprintf(stderr,"ERREUR: no exit action on a variable section, in section %d and sequence %d\n",nbsection,nbseq); exit(0); }
t_end[nbend++]=currentstate;
}
if (nbend>0) /* on emet les action fin de section */
{
sprintf(chcontent,"action(%d,%d,\"#ENDSEQUENCE(%d)\",\"\")",nbsection,nbseq,nbseq); actionsequence=from_action_to_index(chcontent);
sprintf(chcontent,"action(%d,%d,\"#ENDSECTION(%d)\",\"\")",nbsection,nbseq,nbsection); actionsection=from_action_to_index(chcontent);
for(i=0;i<nbend;i++) fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",t_end[i],currentstate+1,actionsequence);
fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",currentstate+1,currentstate+2,actionsection);
fprintf(file,"%d\n",currentstate+2);
currentstate+=2;
nbend=0;
}
fclose(file); file=NULL;
}
nbsection++; statetext=0; nbseq=0; currentstate=0;
get_field(ch,"id",chfield);
sprintf(ch,"%s_section%s.txt",chprefix,chfield);
if (!(file=fopen(ch,"wt"))) ERREUR("can't write in:",ch);
sprintf(ch,"%s_section%s_text.txt",chprefix,chfield);
if (!(filetext=fopen(ch,"wt"))) ERREUR("can't write in",ch);
get_field(ch,"action",chfield);
if (chfield[0]) { sprintf(ch,"action(\"%s\",\"\")",chfield); actionsection=from_action_to_index(ch); } else actionsection=0;
}
else
if (strstr(ch,"<sequence")) // <sequence ordre="strict" repetition="oui" action="" lang="eng">
{
if ((nbseq!=0)&&(!findend))
{
if (ordre==VARIABLE) { fprintf(stderr,"ERREUR: no exit action on a variable section, in section %d and sequence %d\n",nbsection,nbseq); exit(0); }
t_end[nbend++]=currentstate;
}
if (nbend>0)
{
sprintf(chcontent,"action(%d,%d,\"#ENDSEQUENCE(%d)\",\"\")",nbsection,nbseq,nbseq); actionsequence=from_action_to_index(chcontent);
for (i=0;i<nbend;i++) fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",t_end[i],currentstate+1,actionsequence);
fprintf(file,"%d\n",currentstate+1);
currentstate++;
nbend=0;
}
statedebseq=currentstate;
findend=False;
nbseq++;
get_field(ch,"ordre",chfield);
if (!strcmp(chfield,"strict")) ordre=STRICT; else
if (!strcmp(chfield,"variable")) ordre=VARIABLE; else ERREUR("wrong value for attribut ordre:",ch);
if (!strcmp(get_field(ch,"repetition",chfield),"oui")) repetition=True; else repetition=False;
sprintf(ch,"%s_sequence%d.txt",chname,nbseq);
if (actionsection!=0) { fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",currentstate,currentstate+1,actionsection); currentstate++; }
get_field(ch,"action",chfield);
if (chfield[0]) { fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",currentstate,currentstate+1,from_action_to_index(chfield)); currentstate++; }
}
else
if (strstr(ch,"<keyword"))
{
// <keyword action="open_2D" lang="eng"> install the new version of me </keyword>
// <keyword action="#end" lang="eng"> give me my data </keyword>
get_field(ch,"action",chfield);
if (chfield[0])
{
get_content(ch,chcontent);
sprintf(ch,"action(%d,%d,\"%s\",\"%s\")",nbsection,nbseq,chfield,chcontent);
remove_space(ch);
action=from_action_to_index(ch);
for(i=0,pt=strtok(chcontent," ");pt;pt=strtok(NULL," "),i++)
{
if ((ordre==STRICT)||(i>0)) fprintf(file,"%d\t%d\t%s\t%d\t0\n",currentstate+i,currentstate+1+i,pt,i==0?action:0);
else fprintf(file,"%d\t%d\t%s\t%d\t0\n",statedebseq,currentstate+1+i,pt,i==0?action:0);
from_word_to_index(pt);
fprintf(filetext,"%d\t%d\t%s\n",statetext,statetext+1,pt);
if ((0)&&(repetition)) { for (j=0;j<4;j++) fprintf(filetext,"%d\t%d\t%s\n",statetext,statetext+1,pt); }
statetext++;
}
if (ordre==STRICT) { if (repetition) fprintf(file,"%d\t%d\t<epsilon>\t0\t0\n",currentstate+i,currentstate); }
else fprintf(file,"%d\t%d\t<epsilon>\t0\t0\n",currentstate+i,statedebseq);
/* now we can end at any keyword, but penality if not a valid end !! */
if (!strcmp(chfield,"#end")) { findend=True; t_end[nbend++]=currentstate+i; if (nbend==MAX_END_STATE) ERREUR("cste MAX_END_STATE too small",""); }
else fprintf(file,"%d\t%d\n",currentstate+i,PENALEND);
// we add glouton transition if it's a strict section
if (ordre==STRICT) fprintf(file,"%d\t%d\t%s\t%d\t%d\n",currentstate,currentstate+i,CHglouton,action,PENALGLOU);
currentstate+=i;
}
}
}
if ((file)&&(nbseq>0))
{
if (!findend)
{
if (ordre==VARIABLE) { fprintf(stderr,"ERREUR: no exit action on a variable section, in section %d and sequence %d\n",nbsection,nbseq); exit(0); }
t_end[nbend++]=currentstate;
}
if (nbend>0) /* on emet les action fin de section */
{
sprintf(chcontent,"action(%d,%d,\"#ENDSEQUENCE(%d)\",\"\")",nbsection,nbseq,nbseq); actionsequence=from_action_to_index(chcontent);
sprintf(chcontent,"action(%d,%d,\"#ENDSECTION(%d)\",\"\")",nbsection,nbseq,nbsection); actionsection=from_action_to_index(chcontent);
for(i=0;i<nbend;i++) fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",t_end[i],currentstate+1,actionsequence);
fprintf(file,"%d\t%d\t<epsilon>\t%d\t0\n",currentstate+1,currentstate+2,actionsection);
fprintf(file,"%d\n",currentstate+2);
}
}
if (file) fclose(file);
if (filetext) { fprintf(filetext,"%d\n",statetext); fclose(filetext); }
// write tail GLOUTON eraser
sprintf(ch,"%s_clean_tail.txt",chprefix);
if (!(file=fopen(ch,"wt"))) ERREUR("can't write in:",ch);
for(i=STARTNEW;i<NbWord;i++) fprintf(file,"0\t1\t%s\t%s\n",T_dico_word[i],T_dico_word[i]);
fprintf(file,"1\t1\t%s\t%s\n",CHglouton,CHglouton);
for(i=STARTNEW;i<NbWord;i++) fprintf(file,"1\t1\t%s\t%s\n",T_dico_word[i],T_dico_word[i]);
fprintf(file,"1\t2\t%s\t%s\n",CHglouton,CHepsilon);
fprintf(file,"1\n");
fprintf(file,"2\t3\t%s\t%s\n",CHepsilon,CHglouton);
fprintf(file,"2\t2\t%s\t%s\n",CHglouton,CHepsilon);
fprintf(file,"2\n");
for(i=STARTNEW;i<NbWord;i++) fprintf(file,"3\t1\t%s\t%s\n",T_dico_word[i],T_dico_word[i]);
fclose(file);
// write dico action
sprintf(ch,"%s_dico_action.txt",chprefix);
if (!(file=fopen(ch,"wt"))) ERREUR("can't write in:",ch);
fprintf(file,"<epsilon> 0\n"); fprintf(file,"%s 1\n",CHglouton);
for(i=STARTNEW;i<NbAction;i++) fprintf(file,"%s %d\n",T_dico_action[i],i);
fclose(file);
// write dico word
sprintf(ch,"%s_dico_word.txt",chprefix);
if (!(file=fopen(ch,"wt"))) ERREUR("can't write in:",ch);
fprintf(file,"<epsilon> 0\n"); fprintf(file,"%s 1\n",CHglouton);
for(i=STARTNEW;i<NbWord;i++) fprintf(file,"%s %d\n",T_dico_word[i],i);
fclose(file);
exit(0);
}
File added
/* Produce the simplest string producing a given action */
/* FRED 0215 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
/*................................................................*/
#define TailleLigne 8000
#define True 1
#define False 0
void ERREUR(char *ch1,char *ch2)
{
fprintf(stderr,"ERREUR : %s %s\n",ch1,ch2);
exit(0);
}
void ERREURd(char *ch1, int i)
{
fprintf(stderr,"ERREUR : %s %d\n",ch1,i);
exit(0);
}
/*................................................................*/
/* format:
action(1,1,"start_scene1","uno") 2
action(1,1,"#ENDSEQUENCE(1)","") 3
action(1,1,"#ENDSECTION(1)","") 4
action(2,1,"open_scene2","dos") 5
action(2,1,"open_2A","open_system") 6
action(2,1,"#ENDSEQUENCE(1)","") 7
action(2,2,"start_system_voice","tell_me") 8
action(2,2,"open_2B","open_technical_characteristics") 9
action(2,2,"open_2B1","read") 10
action(2,2,"open_2B2","next") 11
action(2,2,"open_2B3","yes") 12
action(2,2,"open_2B4","read") 13
action(2,2,"open_2B5","download") 14
action(2,2,"open_2C","open_the_terms_and_conditions_of_use_of_body_x_epsilon_system_three_point_zero") 15
action(2,2,"open_2C1","accept_terms_and_conditions_of_use") 16
action(2,2,"open_2C2","next") 17
action(2,2,"open_2D","install_the_new_version_of_me") 18
action(2,2,"#end","give_me_my_data") 19
action(2,2,"#ENDSEQUENCE(2)","") 20
action(2,2,"#ENDSECTION(2)","") 21
*/
#define MAX_ACTION 2000
typedef struct
{
char *ch;
int nbsec,code;
} type_action;
type_action T_action[MAX_ACTION];
void load_action(char *chfile)
{
FILE *file;
char ch[TailleLigne],*chcode;
int code,i,nb,nbsec;
if (!(file=fopen(chfile,"rt"))) ERREUR("can't open:",chfile);
for(nb=0;fgets(ch,TailleLigne,file);) if (strstr(ch,"action("))
{
if (nb==MAX_ACTION) ERREUR("cste MAX_ACTION too small","");
chcode=strtok(ch," \n"); if (chcode) chcode=strtok(NULL," \n"); if (!chcode) ERREUR("bad format:",ch);
if (sscanf(ch,"action(%d,",&nbsec)!=1) ERREUR("bad format:",ch);
if (sscanf(chcode,"%d",&code)!=1) ERREUR("bad format:",chcode);
T_action[nb].ch=strdup(ch);
T_action[nb].nbsec=nbsec;
T_action[nb].code=code;
nb++;
}
if (nb==MAX_ACTION) ERREUR("cste MAX_ACTION too small","");
T_action[nb].ch=NULL;
fclose(file);
}
void print_fst_section(FILE *file, int nbsec, int nbac)
{
int i;
for(i=0;T_action[i].ch;i++) if ((T_action[i].nbsec==nbsec)&&(i!=nbac))
fprintf(file,"0\t0\t%d\n",T_action[i].code);
fprintf(file,"0\t1\t%d\n",T_action[nbac].code);
fprintf(file,"1\n");
}
void process_section(int nbsec)
{
int i;
static char ch[TailleLigne];
FILE *file;
for(i=0;T_action[i].ch;i++) if (T_action[i].nbsec==nbsec)
{
sprintf(ch,"temp.tmp");
if (!(file=fopen(ch,"wt"))) ERREUR("can't write in:",ch);
print_fst_section(file,nbsec,i);
fclose(file);
sprintf(ch,"fstcompile --acceptor temp.tmp | fstarcsort > temp.fst");
system(ch);
sprintf(ch,"fstarcsort automate/homeostasis_25nov_section%d.fst | fstcompose - temp.fst | fstshortestpath | fsttopsort | fstproject | fstrmepsilon | fstprint --isymbols=automate/homeostasis_25nov_dico_word.txt | grep ' ' | cut -f3 > temp.fst.txt",nbsec);
system(ch);
if (!(file=fopen("temp.fst.txt","rt"))) ERREUR("can't read:","temp.fst.txt");
printf("%s\t",T_action[i].ch);
if (fgets(ch,TailleLigne,file))
{
strtok(ch,"\n"); printf("%s",ch);
while ((!feof(file))&&(fgets(ch,TailleLigne,file))) { strtok(ch,"\n"); printf(" %s",ch); }
printf("\n");
}
fclose(file);
}
}
int main(int argc, char **argv)
{
int nb;
char ch[TailleLigne],*chaction;
chaction=NULL;
if (argc>1)
for(nb=1;nb<argc;nb++)
if (!strcmp(argv[nb],"-action"))
{
if (nb+1==argc) ERREUR("must have a value after argument;",argv[nb]);
chaction=argv[++nb];
}
else
if (!strcmp(argv[nb],"-h"))
{
fprintf(stderr,"Syntax: %s [-h] \n",argv[0]);
exit(0);
}
else ERREUR("unknown option:",argv[nb]);
if (!chaction) ERREUR("bad syntax, check '-h'","");
load_action(chaction);
for(nb=0;nb<10;nb++) process_section(nb);
exit(0);
}
/* SLU for Rocio XML */
/* FRED 0215 */
#include "librocio_slu.h"
#define TailleLigne 80000
#define MAX_FIELD 60000
int main(int argc, char **argv)
{
int nb,prevn;
char ch[TailleLigne],*chfileword,*chfileaction,*chfilemodel,*chfileclean,*prevword;
prevn=0; prevword=NULL;
chfileword=chfileaction=chfilemodel=chfileclean=NULL;
if (argc>1)
for(nb=1;nb<argc;nb++)
if (!strcmp(argv[nb],"-word")) { if (nb+1==argc) ERREUR("must have a value after argument;",argv[nb]); chfileword=argv[++nb]; }
else
if (!strcmp(argv[nb],"-action")) { if (nb+1==argc) ERREUR("must have a value after argument;",argv[nb]); chfileaction=argv[++nb]; }
else
if (!strcmp(argv[nb],"-fstmodel")) { if (nb+1==argc) ERREUR("must have a value after argument;",argv[nb]); chfilemodel=argv[++nb]; }
else
if (!strcmp(argv[nb],"-fstclean")) { if (nb+1==argc) ERREUR("must have a value after argument;",argv[nb]); chfileclean=argv[++nb]; }
else
if (!strcmp(argv[nb],"-prevn")) { if (nb+1==argc) ERREUR("must have a value after argument;",argv[nb]); if (sscanf(argv[++nb],"%d",&prevn)!=1) ERREUR("bad value:",argv[nb]); }
else
if (!strcmp(argv[nb],"-prevword")) { if (nb+1==argc) ERREUR("must have a value after argument;",argv[nb]); prevword=argv[++nb]; }
else
if (!strcmp(argv[nb],"-h"))
{
fprintf(stderr,"Syntax: %s [-h] -word <file> -action <file> -fstmodel <file> -fstclean <file> [-prevn <int>][-prevword <string>]\n",argv[0]);
exit(0);
}
else ERREUR("unknown option:",argv[nb]);
if ((!chfileword)||(!chfileaction)||(!chfilemodel)||(!chfileclean)) ERREUR("bad syntax, check '-h'","");
// init
slu_t* slu = init_slu(chfileword, chfileaction, chfilemodel, chfileclean);
// read words from stdin
char* words[MAX_FIELD];
int num_words = 0;
while (fgets(ch,TailleLigne,stdin))
{
for (words[num_words++]=strtok(ch," \t\n\r");(num_words<MAX_FIELD)&& NULL != (words[num_words]=strtok(NULL," \t\n\r")); num_words++) ;
if (num_words==MAX_FIELD) ERREUR("cste MAX_FIELD too small","");
}
fprintf(stderr, "%d words read from stdin\n", num_words);
// run them through system
run_slu(slu, words, num_words, prevn, prevword);
free_slu(slu);
exit(0);
}
#!/bin/csh
setenv LD_LIBRARY_PATH /storage/raid1/homedirs/frederic.bechet/bolt/errseg_tagger/src/crfsuite-0.12/lib/crf/.libs:/storage/raid1/homedirs/frederic.bechet/bolt/errseg_tagger/src/liblbfgs-1.10/lib/.libs:/storage/raid1/homedirs/frederic.bechet/bolt/errseg_tagger/src/crfsuite-0.12/lib/cqdb/.libs:/storage/raid1/homedirs/frederic.bechet/bolt/errseg_tagger/src/openfst-1.3.3/src/lib/.libs:./src_2015-02-22
if ( $#argv != 2 ) then
echo "Syntax: 1=fichier texte 2=numero de la section / exemple: csh try.csh homeostasis_25nov.asr/sect6.hyp 6"
exit
endif
set DIR_SRC=./src_2015-02-22
set DIR_DATA=./automate_2015-02-22
set DICOWORD=$DIR_DATA/homeostasis_25nov_dico_word.txt
set DICOACTION=$DIR_DATA/homeostasis_25nov_dico_action.txt
set FSTMODEL=$DIR_DATA/homeostasis_25nov_section$2.fst
set FSTCLEAN=$DIR_DATA/homeostasis_25nov_clean_tail.fst
set OPT = "DEBUT"
foreach i (`cat $1`)
echo "NEW_WORD=($i) - HISTORY=($OPT)"
echo "$i" | $DIR_SRC/rocio_slu -word $DICOWORD -action $DICOACTION -fstmodel $FSTMODEL -fstclean $FSTCLEAN -prevword "$OPT" | tee toto.tmp
set OPT = `cat toto.tmp | grep STRING | cut -d':' -f2`
grep -v STRING toto.tmp
echo '****************************************************************'
end
...@@ -102,8 +102,12 @@ class Keyword(Gtk.Label): ...@@ -102,8 +102,12 @@ class Keyword(Gtk.Label):
self.get_style_context().add_class('keyword') self.get_style_context().add_class('keyword')
self.connect('activate-link', self.link_clicked) self.connect('activate-link', self.link_clicked)
def highlight(self):
self.get_style_context().remove_class('keyword')
self.get_style_context().add_class('keyword-highlighted')
def link_clicked(self, widget, uri): def link_clicked(self, widget, uri):
actions.perform_action(actions.Action(uri, keyword=widget)) actions.perform_action(actions.Action(uri, keyword=widget), False)
return True return True
class Text(Gtk.Label): class Text(Gtk.Label):
...@@ -130,6 +134,15 @@ class XmlView(Gtk.ScrolledWindow): ...@@ -130,6 +134,15 @@ class XmlView(Gtk.ScrolledWindow):
section.add_listener(self.section_clicked) section.add_listener(self.section_clicked)
self.set_section(0) self.set_section(0)
self.keywords = []
for section in self.sections:
for sequence in section.sequences:
for line in sequence.lines:
for element in line.elements:
if hasattr(element, 'action'):
self.keywords.append(element)
self.last_highlighted = -1
def get_view(self): def get_view(self):
return self return self
...@@ -143,12 +156,6 @@ class XmlView(Gtk.ScrolledWindow): ...@@ -143,12 +156,6 @@ class XmlView(Gtk.ScrolledWindow):
vbox.pack_start(self.sections[-1], True, True, 5) vbox.pack_start(self.sections[-1], True, True, 5)
return vbox return vbox
def get_line_iterator(self):
for section in self.sections:
for sequence in section.sequences:
for line in sequence.lines:
yield line
def get_section(self): def get_section(self):
return int(self.current_section.name) - 1 return int(self.current_section.name) - 1
...@@ -163,5 +170,15 @@ class XmlView(Gtk.ScrolledWindow): ...@@ -163,5 +170,15 @@ class XmlView(Gtk.ScrolledWindow):
self.set_section(int(current.name) - 1) self.set_section(int(current.name) - 1)
def highlight(self, action): def highlight(self, action):
pass if hasattr(action, 'keyword'):
action.keyword.highlight()
else:
i = self.last_highlighted + 1
while i < len(self.keywords):
if self.keywords[i].action == action.text:
self.keywords[i].highlight()
self.last_highlighted = i
break
i += 1
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment