Skip to content
Snippets Groups Projects
Commit 60e30a9c authored by Alexis Nasr's avatar Alexis Nasr
Browse files

added in maca_tools json2mcf that projects annotation of the json file to the mcf file

parent cc5c1716
Branches
No related tags found
No related merge requests found
...@@ -15,6 +15,8 @@ set(SOURCES src/util.c ...@@ -15,6 +15,8 @@ set(SOURCES src/util.c
src/char16.c src/char16.c
src/l_rule.c src/l_rule.c
src/fplm.c src/fplm.c
src/json_parser.c
src/json_tree.c
) )
#compiling library #compiling library
......
#ifndef __JSON_PARSER__
#define __JSON_PARSER__
#include "json_tree.h"
#define YYTEXT_MAX 100
#define EPSILON 0
/* symboles non terminaux */
#define NB_NON_TERMINAUX 8
#define _structure_ 1
#define _list_ 2
#define _object_ 3
#define _list_structure_ 4
#define _list_structure2_ 5
#define _attr_val_ 6
#define _list_attr_val_ 7
#define _list_attr_val2_ 8
/* symboles terminaux */
#define NB_TERMINAUX 10
#define CROCHET_OUVRANT 1
#define CROCHET_FERMANT 2
#define VIRGULE 3
#define ACCOLADE_OUVRANTE 4
#define ACCOLADE_FERMANTE 5
#define COLON 6
#define STRING 7
#define NUMBER 8
#define CONSTANT 9
#define FIN 10
#define NB_MOTS_CLEFS 3
typedef struct {
FILE *yyin;
int uc; /* current token */
int comment;
char yytext[YYTEXT_MAX];
int yyleng;
/* Compter les lignes pour afficher les messages d'erreur avec numero ligne */
int nb_ligne;
int trace_xml;
int premiers[NB_NON_TERMINAUX+1][NB_TERMINAUX+1];
int suivants[NB_NON_TERMINAUX+1][NB_TERMINAUX+1];
int indent_xml;
int indent_step; // set to 0 for no indentation
char *tableMotsClefs[NB_MOTS_CLEFS];
int codeMotClefs[NB_MOTS_CLEFS];
}json_parser_ctx;
json_struct *structure(json_parser_ctx *ctx);
json_parser_ctx *json_parser_init(char *filename);
#endif
File moved
#include<stdio.h> #include<stdio.h>
#include<stdlib.h> #include<stdlib.h>
#include<string.h> #include<string.h>
#include"json_tree.h" #include"json_parser.h"
#define YYTEXT_MAX 100
#define EPSILON 0
/* symboles non terminaux */
#define NB_NON_TERMINAUX 8
#define _structure_ 1
#define _list_ 2
#define _object_ 3
#define _list_structure_ 4
#define _list_structure2_ 5
#define _attr_val_ 6
#define _list_attr_val_ 7
#define _list_attr_val2_ 8
/* symboles terminaux */
#define NB_TERMINAUX 10
#define CROCHET_OUVRANT 1
#define CROCHET_FERMANT 2
#define VIRGULE 3
#define ACCOLADE_OUVRANTE 4
#define ACCOLADE_FERMANTE 5
#define COLON 6
#define STRING 7
#define NUMBER 8
#define CONSTANT 9
#define FIN 10
#define NB_MOTS_CLEFS 3
/* --------------------------------------------------------------------------- */ /* --------------------------------------------------------------------------- */
/* quelques macros utiles */ /* quelques macros utiles */
...@@ -45,24 +15,6 @@ ...@@ -45,24 +15,6 @@
#define is_alphanum(c)(is_num((c)) || is_alpha((c))) #define is_alphanum(c)(is_num((c)) || is_alpha((c)))
typedef struct {
FILE *yyin;
int uc; /* current token */
int comment;
char yytext[YYTEXT_MAX];
int yyleng;
/* Compter les lignes pour afficher les messages d'erreur avec numero ligne */
int nb_ligne;
int trace_xml;
int premiers[NB_NON_TERMINAUX+1][NB_TERMINAUX+1];
int suivants[NB_NON_TERMINAUX+1][NB_TERMINAUX+1];
int indent_xml;
int indent_step; // set to 0 for no indentation
char *tableMotsClefs[NB_MOTS_CLEFS];
int codeMotClefs[NB_MOTS_CLEFS];
}json_parser_ctx;
void initialise_premiers(json_parser_ctx *ctx); void initialise_premiers(json_parser_ctx *ctx);
void initialise_suivants(json_parser_ctx *ctx); void initialise_suivants(json_parser_ctx *ctx);
...@@ -70,17 +22,17 @@ int yylex(json_parser_ctx *ctx); ...@@ -70,17 +22,17 @@ int yylex(json_parser_ctx *ctx);
json_parser_ctx *json_parser_init(char *filename) json_parser_ctx *json_parser_init(char *filename)
{ {
json_parser_ctx *ctx = malloc(sizeof(json_parser_ctx)); json_parser_ctx *ctx = (json_parser_ctx *) malloc(sizeof(json_parser_ctx));
ctx->nb_ligne = 1; ctx->nb_ligne = 1;
ctx->trace_xml = 1; ctx->trace_xml = 0;
ctx->indent_xml = 0; ctx->indent_xml = 0;
ctx->indent_step = 1; ctx->indent_step = 1;
initialise_premiers(ctx); initialise_premiers(ctx);
initialise_suivants(ctx); initialise_suivants(ctx);
ctx->tableMotsClefs[0] = "true"; ctx->tableMotsClefs[0] = (char *) "true";
ctx->tableMotsClefs[1] = "false"; ctx->tableMotsClefs[1] = (char *) "false";
ctx->tableMotsClefs[2] = "null"; ctx->tableMotsClefs[2] = (char *) "null";
ctx->codeMotClefs[0] = CONSTANT; ctx->codeMotClefs[0] = CONSTANT;
ctx->codeMotClefs[1] = CONSTANT; ctx->codeMotClefs[1] = CONSTANT;
ctx->codeMotClefs[2] = CONSTANT; ctx->codeMotClefs[2] = CONSTANT;
...@@ -185,13 +137,16 @@ int yylex(json_parser_ctx *ctx) ...@@ -185,13 +137,16 @@ int yylex(json_parser_ctx *ctx)
} }
if(c == '"') { if(c == '"') {
do{ ctx->yyleng = 0;
c = lireCar(ctx); c = lireCar(ctx);
while(c != '"'){
if(ctx->yyleng >= YYTEXT_MAX){ if(ctx->yyleng >= YYTEXT_MAX){
erreur(ctx, "constante trop longue"); erreur(ctx, (char *) "constante trop longue");
} }
} while(c != '"'); c = lireCar(ctx);
/* printf("c = %c yytext = %s\n", c, ctx->yytext); */ }
ctx->yytext[--ctx->yyleng] = '\0';
// printf("c = %c yytext = %s\n", c, ctx->yytext);
return STRING; return STRING;
} }
...@@ -214,7 +169,7 @@ int yylex(json_parser_ctx *ctx) ...@@ -214,7 +169,7 @@ int yylex(json_parser_ctx *ctx)
fprintf( stderr, "Ligne %d: caractère invalide: %c\n", ctx->nb_ligne, c ); fprintf( stderr, "Ligne %d: caractère invalide: %c\n", ctx->nb_ligne, c );
exit(-1); exit(-1);
} }
return -1;
} }
/*-------------------------------------------------------------------------*/ /*-------------------------------------------------------------------------*/
...@@ -234,7 +189,7 @@ void consommer(json_parser_ctx *ctx, int c ) { ...@@ -234,7 +189,7 @@ void consommer(json_parser_ctx *ctx, int c ) {
ctx->uc = yylex(ctx); /* consommer le caractère */ ctx->uc = yylex(ctx); /* consommer le caractère */
} }
else else
erreur(ctx, "erreure lexicale" ); erreur(ctx, (char *) "erreure lexicale" );
} }
/*-------------------------------------------------------------------------*/ /*-------------------------------------------------------------------------*/
...@@ -462,7 +417,7 @@ json_struct *structure(json_parser_ctx *ctx) ...@@ -462,7 +417,7 @@ json_struct *structure(json_parser_ctx *ctx)
} }
if(ctx->uc == STRING){ if(ctx->uc == STRING){
string = strdup(ctx->yytext); string = (ctx->yyleng == 0)? NULL : strdup(ctx->yytext);
consommer(ctx, STRING); consommer(ctx, STRING);
affiche_balise_fermante(ctx, __FUNCTION__); affiche_balise_fermante(ctx, __FUNCTION__);
return json_new_string(string); return json_new_string(string);
...@@ -484,8 +439,8 @@ json_struct *structure(json_parser_ctx *ctx) ...@@ -484,8 +439,8 @@ json_struct *structure(json_parser_ctx *ctx)
affiche_balise_fermante(ctx, __FUNCTION__); affiche_balise_fermante(ctx, __FUNCTION__);
return json_new_constant(constant); return json_new_constant(constant);
} }
erreur(ctx, "" ); erreur(ctx, (char *) "" );
return NULL;
} }
/*---------------------------------------------------------*/ /*---------------------------------------------------------*/
...@@ -500,7 +455,7 @@ json_struct *list (json_parser_ctx *ctx) ...@@ -500,7 +455,7 @@ json_struct *list (json_parser_ctx *ctx)
consommer(ctx, CROCHET_FERMANT); consommer(ctx, CROCHET_FERMANT);
affiche_balise_fermante(ctx, __FUNCTION__); affiche_balise_fermante(ctx, __FUNCTION__);
return json_new_list(s); return json_new_list(s);
erreur(ctx, ""); erreur(ctx, (char *)"");
} }
/*---------------------------------------------------------*/ /*---------------------------------------------------------*/
...@@ -521,7 +476,8 @@ json_struct *list_structure(json_parser_ctx *ctx) ...@@ -521,7 +476,8 @@ json_struct *list_structure(json_parser_ctx *ctx)
affiche_balise_fermante(ctx, __FUNCTION__); affiche_balise_fermante(ctx, __FUNCTION__);
return NULL; return NULL;
} }
erreur(ctx, ""); erreur(ctx, (char *)"");
return NULL;
} }
/*---------------------------------------------------------*/ /*---------------------------------------------------------*/
...@@ -544,7 +500,7 @@ json_struct *list_structure2(json_parser_ctx *ctx) ...@@ -544,7 +500,7 @@ json_struct *list_structure2(json_parser_ctx *ctx)
affiche_balise_fermante(ctx, __FUNCTION__); affiche_balise_fermante(ctx, __FUNCTION__);
return NULL; return NULL;
} }
erreur(ctx, ""); erreur(ctx, (char *)"");
} }
/*---------------------------------------------------------*/ /*---------------------------------------------------------*/
...@@ -579,7 +535,8 @@ json_attr_val *list_attr_val(json_parser_ctx *ctx) ...@@ -579,7 +535,8 @@ json_attr_val *list_attr_val(json_parser_ctx *ctx)
affiche_balise_fermante(ctx, __FUNCTION__); affiche_balise_fermante(ctx, __FUNCTION__);
return NULL; return NULL;
} }
erreur(ctx, ""); erreur(ctx, (char *)"");
return NULL;
} }
/*---------------------------------------------------------*/ /*---------------------------------------------------------*/
...@@ -603,7 +560,8 @@ json_attr_val *list_attr_val2(json_parser_ctx *ctx) ...@@ -603,7 +560,8 @@ json_attr_val *list_attr_val2(json_parser_ctx *ctx)
return NULL; return NULL;
} }
erreur(ctx, ""); erreur(ctx, (char *)"");
return NULL;
} }
/*---------------------------------------------------------*/ /*---------------------------------------------------------*/
...@@ -624,10 +582,10 @@ json_attr_val *attr_val(json_parser_ctx *ctx) ...@@ -624,10 +582,10 @@ json_attr_val *attr_val(json_parser_ctx *ctx)
} }
int main(int arc, char *argv[]) /*int main(int arc, char *argv[])
{ {
json_parser_ctx *ctx = json_parser_init(argv[1]); json_parser_ctx *ctx = json_parser_init(argv[1]);
json_struct *s = structure(ctx); json_struct *s = structure(ctx);
json_print_struct(stdout, s); json_print_struct(stdout, s);
json_free_struct(s); json_free_struct(s);
} }*/
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
json_struct *json_new_struct(int type) json_struct *json_new_struct(int type)
{ {
json_struct *c = malloc(sizeof(json_struct)); json_struct *c = (json_struct *)malloc(sizeof(json_struct));
if(c == NULL){ if(c == NULL){
fprintf(stderr, "memory allocation problem !\n"); fprintf(stderr, "memory allocation problem !\n");
exit(1); exit(1);
...@@ -39,7 +39,7 @@ json_struct *json_new_constant(int constant) ...@@ -39,7 +39,7 @@ json_struct *json_new_constant(int constant)
json_attr_val *json_new_attr_val(char *attr, json_struct *s, json_attr_val *next) json_attr_val *json_new_attr_val(char *attr, json_struct *s, json_attr_val *next)
{ {
json_attr_val *av = malloc(sizeof(json_attr_val)); json_attr_val *av = (json_attr_val *)malloc(sizeof(json_attr_val));
if(av == NULL){ if(av == NULL){
fprintf(stderr, "memory allocation problem !\n"); fprintf(stderr, "memory allocation problem !\n");
exit(1); exit(1);
......
...@@ -16,6 +16,10 @@ add_executable(mcf2json ./src/mcf2json.c) ...@@ -16,6 +16,10 @@ add_executable(mcf2json ./src/mcf2json.c)
target_link_libraries(mcf2json maca_common) target_link_libraries(mcf2json maca_common)
install (TARGETS mcf2json DESTINATION bin) install (TARGETS mcf2json DESTINATION bin)
add_executable(json2mcf ./src/json2mcf.c)
target_link_libraries(json2mcf maca_common)
install (TARGETS json2mcf DESTINATION bin)
add_executable(maca_compute_l_rules ./src/maca_compute_l_rules.c) add_executable(maca_compute_l_rules ./src/maca_compute_l_rules.c)
target_link_libraries(maca_compute_l_rules maca_common) target_link_libraries(maca_compute_l_rules maca_common)
install (TARGETS maca_compute_l_rules DESTINATION bin) install (TARGETS maca_compute_l_rules DESTINATION bin)
......
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<getopt.h>
#include"mcd.h"
#include"util.h"
#include"word_buffer.h"
#include"json_parser.h"
typedef struct {
int help;
int verbose;
int debug_mode;
char *program_name;
char *conll_filename;
char *mcf_filename;
char *mcd_filename;
mcd *mcd_struct;
char *json_filename;
} context;
void json2mcf_context_free(context *ctx)
{
if(ctx){
if(ctx->program_name)
free(ctx->program_name);
if(ctx->conll_filename)
free(ctx->conll_filename);
if(ctx->mcf_filename)
free(ctx->mcf_filename);
if(ctx->mcd_filename)
free(ctx->mcd_filename);
if(ctx->mcd_struct)
mcd_free(ctx->mcd_struct);
free(ctx);
}
}
context *json2mcf_context_new(void)
{
context *ctx = (context *)memalloc(sizeof(context));
ctx->help = 0;
ctx->verbose = 0;
ctx->debug_mode = 0;
ctx->program_name = NULL;
ctx->conll_filename = NULL;
ctx->mcf_filename = NULL;
ctx->mcd_filename = NULL;
ctx->mcd_struct = NULL;
ctx->json_filename = NULL;
return ctx;
}
void json2mcf_context_general_help_message(context *ctx)
{
fprintf(stderr, "usage: %s [options]\n", ctx->program_name);
fprintf(stderr, "Options:\n");
fprintf(stderr, "\t-h --help : print this message\n");
fprintf(stderr, "\t-v --verbose : activate verbose mode\n");
fprintf(stderr, "\t-C --mcd : mcd filename\n");
fprintf(stderr, "\t-i --mcf : mcf filename (read from stdin if absent)\n");
fprintf(stderr, "\t-j --json : json filename\n");
}
void json2mcf_check_options(context *ctx){
if(ctx->help){
json2mcf_context_general_help_message(ctx);
exit(1);
}
}
context *json2mcf_context_read_options(int argc, char *argv[])
{
int c;
int option_index = 0;
context *ctx = json2mcf_context_new();
ctx->program_name = strdup(argv[0]);
static struct option long_options[7] =
{
{"help", no_argument, 0, 'h'},
{"verbose", no_argument, 0, 'v'},
{"debug", no_argument, 0, 'd'},
{"conll", required_argument, 0, 'o'},
{"mcd", required_argument, 0, 'C'},
{"mcf", required_argument, 0, 'i'},
{"json", required_argument, 0, 'j'},
};
optind = 0;
opterr = 0;
while ((c = getopt_long (argc, argv, "hvdo:C:i:j:", long_options, &option_index)) != -1){
switch (c)
{
case 'd':
ctx->debug_mode = 1;
break;
case 'h':
ctx->help = 1;
break;
case 'v':
ctx->verbose = 1;
break;
case 'o':
ctx->conll_filename = strdup(optarg);
break;
case 'i':
ctx->mcf_filename = strdup(optarg);
break;
case 'C':
ctx->mcd_filename = strdup(optarg);
break;
case 'j':
ctx->json_filename = strdup(optarg);
break;
}
}
if(ctx->mcd_filename){
ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->verbose);
}
else{
ctx->mcd_struct = mcd_build_wpmlgfs();
}
return ctx;
}
void update_segment(word_buffer *wb, int start, int end, char *label, char *status_seg, char *status_lab)
{
}
void process_segment(json_attr_val *avl, word_buffer *wb)
{
int start, end;
char *label, *status_seg, *status_lab;
json_attr_val *av;
for(av = avl; av != NULL; av = av->next){
// printf("attr = %s\n", av->attr);
if(!strcmp(av->attr, "start")){start = (int)(av->val->u.number); continue;}
if(!strcmp(av->attr, "end")){end = (int)(av->val->u.number); continue;}
if(!strcmp(av->attr, "label")){label = av->val->u.string; continue;}
if(!strcmp(av->attr, "status_seg")){status_seg = av->val->u.string; continue;}
if(!strcmp(av->attr, "status_lab")){status_lab = av->val->u.string; continue;}
}
update_segment(wb, start, end, label, status_seg, status_lab);
// printf("segment : start = %d end = %d label = %s status_seg = %s status_lab = %s\n", start, end, label, status_seg, status_lab);
}
void process_segments(json_struct *segments, word_buffer *wb)
{
json_struct *segment;
printf("process_segments\n");
for(segment = segments->u.first; segment != NULL; segment = segment->next){
process_segment(segment->u.attr_val_list, wb);
}
}
// {"orig": 1, "dest":2, "label": "suj", "status_link": "", "status_lab": "", "timestamp": "", "author": "", "target": ""},
void process_link(json_attr_val *avl, word_buffer *wb)
{
int orig, dest;
char *label, *status_link, *status_lab;
json_attr_val *av;
for(av = avl; av != NULL; av = av->next){
// printf("attr = %s\n", av->attr);
if(!strcmp(av->attr, "orig")){orig = (int)(av->val->u.number); continue;}
if(!strcmp(av->attr, "dest")){dest = (int)(av->val->u.number); continue;}
if(!strcmp(av->attr, "label")){label = av->val->u.string; continue;}
if(!strcmp(av->attr, "status_link")){status_link = av->val->u.string; continue;}
if(!strcmp(av->attr, "status_lab")){status_lab = av->val->u.string; continue;}
}
// printf("link : orig = %d dest = %d label = %s status_link = %s status_lab = %s\n", orig, dest, label, status_link, status_lab);
}
void process_links(json_struct *segments, word_buffer *wb)
{
json_struct *link;
printf("process_links\n");
for(link = segments->u.first; link != NULL; link = link->next){
process_link(link->u.attr_val_list, wb);
}
}
void process_document(json_struct *document, word_buffer *wb)
{
json_attr_val *avl = NULL;
printf("process_document\n");
for(avl = document->u.attr_val_list; avl != NULL; avl = avl->next){
if(!strcmp(avl->attr, (char *)"id")) printf("id = %s\n", avl->val->u.string);
if(!strcmp(avl->attr, (char *)"segments")) process_segments(avl->val, wb);
if(!strcmp(avl->attr, (char *)"links")) process_links(avl->val, wb);
}
}
void process_documents(json_struct *documents, word_buffer *wb)
{
json_struct *document;
printf("process_documents\n");
for(document = documents->u.first; document != NULL; document = document->next){
process_document(document, wb);
}
}
int main(int argc, char *argv[])
{
FILE *output_file;
context *ctx = json2mcf_context_read_options(argc, argv);
word_buffer *wb = NULL;
word *w = NULL;
int first_sentence = 1;
int new_sentence = 1;
int index_first_word;
int index_last_word;
int sentence_nb = 0;
json_parser_ctx *parser_ctx = NULL;
json_struct *root = NULL;
json_struct *document = NULL;
json_attr_val *avl = NULL;
json2mcf_check_options(ctx);
wb = word_buffer_load_mcf(ctx->mcf_filename, ctx->mcd_struct);
parser_ctx = json_parser_init(ctx->json_filename);
root = structure(parser_ctx);
if(root->type != JSON_OBJECT){
fprintf(stderr, "erreur le json doit être un objet\n");
exit(1);
}
for(avl = root->u.attr_val_list; avl != NULL; avl = avl->next){
printf("section %s\n", avl->attr);
if(!strcmp(avl->attr, (char *)"documents")){
process_documents(avl->val, wb);
}
}
/*json_print_struct(stdout, root);
json_free_struct(root);*/
json2mcf_context_free(ctx);
return 0;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment