diff --git a/maca_common/CMakeLists.txt b/maca_common/CMakeLists.txt index f54ad51c84a92ad918ecc7d9e855605fa83e5238..e389629a4a755425b8b0f72d669195c2ff6e4a6d 100644 --- a/maca_common/CMakeLists.txt +++ b/maca_common/CMakeLists.txt @@ -15,6 +15,8 @@ set(SOURCES src/util.c src/char16.c src/l_rule.c src/fplm.c + src/json_parser.c + src/json_tree.c ) #compiling library diff --git a/maca_common/include/json_parser.h b/maca_common/include/json_parser.h new file mode 100644 index 0000000000000000000000000000000000000000..116c26b23838423fdc843e579903890177af9a08 --- /dev/null +++ b/maca_common/include/json_parser.h @@ -0,0 +1,61 @@ +#ifndef __JSON_PARSER__ +#define __JSON_PARSER__ + +#include "json_tree.h" + +#define YYTEXT_MAX 100 +#define EPSILON 0 + +/* symboles non terminaux */ +#define NB_NON_TERMINAUX 8 + +#define _structure_ 1 +#define _list_ 2 +#define _object_ 3 +#define _list_structure_ 4 +#define _list_structure2_ 5 +#define _attr_val_ 6 +#define _list_attr_val_ 7 +#define _list_attr_val2_ 8 + +/* symboles terminaux */ +#define NB_TERMINAUX 10 + +#define CROCHET_OUVRANT 1 +#define CROCHET_FERMANT 2 +#define VIRGULE 3 +#define ACCOLADE_OUVRANTE 4 +#define ACCOLADE_FERMANTE 5 +#define COLON 6 +#define STRING 7 +#define NUMBER 8 +#define CONSTANT 9 +#define FIN 10 + +#define NB_MOTS_CLEFS 3 + +typedef struct { + FILE *yyin; + int uc; /* current token */ + int comment; + char yytext[YYTEXT_MAX]; + int yyleng; + /* Compter les lignes pour afficher les messages d'erreur avec numero ligne */ + int nb_ligne; + int trace_xml; + int premiers[NB_NON_TERMINAUX+1][NB_TERMINAUX+1]; + int suivants[NB_NON_TERMINAUX+1][NB_TERMINAUX+1]; + int indent_xml; + int indent_step; // set to 0 for no indentation + char *tableMotsClefs[NB_MOTS_CLEFS]; + int codeMotClefs[NB_MOTS_CLEFS]; +}json_parser_ctx; + + + +json_struct *structure(json_parser_ctx *ctx); +json_parser_ctx *json_parser_init(char *filename); + + + +#endif diff --git a/maca_common/src/json_tree.h b/maca_common/include/json_tree.h similarity index 100% rename from maca_common/src/json_tree.h rename to maca_common/include/json_tree.h diff --git a/maca_common/src/json_parser.c b/maca_common/src/json_parser.c index ecc4ffcb28539c0b244dc94882f6af984f8e0c48..26e0aebf88ea0de48b7954f7335a446d027ad48a 100644 --- a/maca_common/src/json_parser.c +++ b/maca_common/src/json_parser.c @@ -1,38 +1,8 @@ #include<stdio.h> #include<stdlib.h> #include<string.h> -#include"json_tree.h" - -#define YYTEXT_MAX 100 -#define EPSILON 0 - -/* symboles non terminaux */ -#define NB_NON_TERMINAUX 8 - -#define _structure_ 1 -#define _list_ 2 -#define _object_ 3 -#define _list_structure_ 4 -#define _list_structure2_ 5 -#define _attr_val_ 6 -#define _list_attr_val_ 7 -#define _list_attr_val2_ 8 - -/* symboles terminaux */ -#define NB_TERMINAUX 10 - -#define CROCHET_OUVRANT 1 -#define CROCHET_FERMANT 2 -#define VIRGULE 3 -#define ACCOLADE_OUVRANTE 4 -#define ACCOLADE_FERMANTE 5 -#define COLON 6 -#define STRING 7 -#define NUMBER 8 -#define CONSTANT 9 -#define FIN 10 - -#define NB_MOTS_CLEFS 3 +#include"json_parser.h" + /* --------------------------------------------------------------------------- */ /* quelques macros utiles */ @@ -45,24 +15,6 @@ #define is_alphanum(c)(is_num((c)) || is_alpha((c))) -typedef struct { - FILE *yyin; - int uc; /* current token */ - int comment; - char yytext[YYTEXT_MAX]; - int yyleng; - /* Compter les lignes pour afficher les messages d'erreur avec numero ligne */ - int nb_ligne; - int trace_xml; - int premiers[NB_NON_TERMINAUX+1][NB_TERMINAUX+1]; - int suivants[NB_NON_TERMINAUX+1][NB_TERMINAUX+1]; - int indent_xml; - int indent_step; // set to 0 for no indentation - char *tableMotsClefs[NB_MOTS_CLEFS]; - int codeMotClefs[NB_MOTS_CLEFS]; -}json_parser_ctx; - - void initialise_premiers(json_parser_ctx *ctx); void initialise_suivants(json_parser_ctx *ctx); @@ -70,17 +22,17 @@ int yylex(json_parser_ctx *ctx); json_parser_ctx *json_parser_init(char *filename) { - json_parser_ctx *ctx = malloc(sizeof(json_parser_ctx)); + json_parser_ctx *ctx = (json_parser_ctx *) malloc(sizeof(json_parser_ctx)); ctx->nb_ligne = 1; - ctx->trace_xml = 1; + ctx->trace_xml = 0; ctx->indent_xml = 0; ctx->indent_step = 1; initialise_premiers(ctx); initialise_suivants(ctx); - ctx->tableMotsClefs[0] = "true"; - ctx->tableMotsClefs[1] = "false"; - ctx->tableMotsClefs[2] = "null"; + ctx->tableMotsClefs[0] = (char *) "true"; + ctx->tableMotsClefs[1] = (char *) "false"; + ctx->tableMotsClefs[2] = (char *) "null"; ctx->codeMotClefs[0] = CONSTANT; ctx->codeMotClefs[1] = CONSTANT; ctx->codeMotClefs[2] = CONSTANT; @@ -185,13 +137,16 @@ int yylex(json_parser_ctx *ctx) } if(c == '"') { - do{ - c = lireCar(ctx); + ctx->yyleng = 0; + c = lireCar(ctx); + while(c != '"'){ if(ctx->yyleng >= YYTEXT_MAX){ - erreur(ctx, "constante trop longue"); + erreur(ctx, (char *) "constante trop longue"); } - } while(c != '"'); - /* printf("c = %c yytext = %s\n", c, ctx->yytext); */ + c = lireCar(ctx); + } + ctx->yytext[--ctx->yyleng] = '\0'; + // printf("c = %c yytext = %s\n", c, ctx->yytext); return STRING; } @@ -214,7 +169,7 @@ int yylex(json_parser_ctx *ctx) fprintf( stderr, "Ligne %d: caractère invalide: %c\n", ctx->nb_ligne, c ); exit(-1); } - + return -1; } /*-------------------------------------------------------------------------*/ @@ -234,7 +189,7 @@ void consommer(json_parser_ctx *ctx, int c ) { ctx->uc = yylex(ctx); /* consommer le caractère */ } else - erreur(ctx, "erreure lexicale" ); + erreur(ctx, (char *) "erreure lexicale" ); } /*-------------------------------------------------------------------------*/ @@ -462,7 +417,7 @@ json_struct *structure(json_parser_ctx *ctx) } if(ctx->uc == STRING){ - string = strdup(ctx->yytext); + string = (ctx->yyleng == 0)? NULL : strdup(ctx->yytext); consommer(ctx, STRING); affiche_balise_fermante(ctx, __FUNCTION__); return json_new_string(string); @@ -484,8 +439,8 @@ json_struct *structure(json_parser_ctx *ctx) affiche_balise_fermante(ctx, __FUNCTION__); return json_new_constant(constant); } - erreur(ctx, "" ); - + erreur(ctx, (char *) "" ); + return NULL; } /*---------------------------------------------------------*/ @@ -500,7 +455,7 @@ json_struct *list (json_parser_ctx *ctx) consommer(ctx, CROCHET_FERMANT); affiche_balise_fermante(ctx, __FUNCTION__); return json_new_list(s); - erreur(ctx, ""); + erreur(ctx, (char *)""); } /*---------------------------------------------------------*/ @@ -521,7 +476,8 @@ json_struct *list_structure(json_parser_ctx *ctx) affiche_balise_fermante(ctx, __FUNCTION__); return NULL; } - erreur(ctx, ""); + erreur(ctx, (char *)""); + return NULL; } /*---------------------------------------------------------*/ @@ -544,7 +500,7 @@ json_struct *list_structure2(json_parser_ctx *ctx) affiche_balise_fermante(ctx, __FUNCTION__); return NULL; } - erreur(ctx, ""); + erreur(ctx, (char *)""); } /*---------------------------------------------------------*/ @@ -579,7 +535,8 @@ json_attr_val *list_attr_val(json_parser_ctx *ctx) affiche_balise_fermante(ctx, __FUNCTION__); return NULL; } - erreur(ctx, ""); + erreur(ctx, (char *)""); + return NULL; } /*---------------------------------------------------------*/ @@ -603,7 +560,8 @@ json_attr_val *list_attr_val2(json_parser_ctx *ctx) return NULL; } - erreur(ctx, ""); + erreur(ctx, (char *)""); + return NULL; } /*---------------------------------------------------------*/ @@ -624,10 +582,10 @@ json_attr_val *attr_val(json_parser_ctx *ctx) } -int main(int arc, char *argv[]) +/*int main(int arc, char *argv[]) { json_parser_ctx *ctx = json_parser_init(argv[1]); json_struct *s = structure(ctx); json_print_struct(stdout, s); json_free_struct(s); -} + }*/ diff --git a/maca_common/src/json_tree.c b/maca_common/src/json_tree.c index afb5361ccd8fe62bcd8b332df1ce10f4851b3e93..74a9f46b8e131e0f871f412a85b265547e0f48ab 100644 --- a/maca_common/src/json_tree.c +++ b/maca_common/src/json_tree.c @@ -5,7 +5,7 @@ json_struct *json_new_struct(int type) { - json_struct *c = malloc(sizeof(json_struct)); + json_struct *c = (json_struct *)malloc(sizeof(json_struct)); if(c == NULL){ fprintf(stderr, "memory allocation problem !\n"); exit(1); @@ -39,7 +39,7 @@ json_struct *json_new_constant(int constant) json_attr_val *json_new_attr_val(char *attr, json_struct *s, json_attr_val *next) { - json_attr_val *av = malloc(sizeof(json_attr_val)); + json_attr_val *av = (json_attr_val *)malloc(sizeof(json_attr_val)); if(av == NULL){ fprintf(stderr, "memory allocation problem !\n"); exit(1); diff --git a/maca_tools/CMakeLists.txt b/maca_tools/CMakeLists.txt index c0b21b12795166a800df4972a9328012ada94e72..20029ff0070e7ca85663c4a9ef0745fe4c4ea1b1 100644 --- a/maca_tools/CMakeLists.txt +++ b/maca_tools/CMakeLists.txt @@ -16,6 +16,10 @@ add_executable(mcf2json ./src/mcf2json.c) target_link_libraries(mcf2json maca_common) install (TARGETS mcf2json DESTINATION bin) +add_executable(json2mcf ./src/json2mcf.c) +target_link_libraries(json2mcf maca_common) +install (TARGETS json2mcf DESTINATION bin) + add_executable(maca_compute_l_rules ./src/maca_compute_l_rules.c) target_link_libraries(maca_compute_l_rules maca_common) install (TARGETS maca_compute_l_rules DESTINATION bin) diff --git a/maca_tools/src/json2mcf.c b/maca_tools/src/json2mcf.c new file mode 100644 index 0000000000000000000000000000000000000000..e65ac93ccc5bfa27924fc632b10349a79f2c9c8f --- /dev/null +++ b/maca_tools/src/json2mcf.c @@ -0,0 +1,259 @@ +#include<stdio.h> +#include<stdlib.h> +#include<string.h> +#include<getopt.h> + +#include"mcd.h" +#include"util.h" +#include"word_buffer.h" +#include"json_parser.h" + +typedef struct { + int help; + int verbose; + int debug_mode; + char *program_name; + char *conll_filename; + char *mcf_filename; + char *mcd_filename; + mcd *mcd_struct; + char *json_filename; +} context; + +void json2mcf_context_free(context *ctx) +{ + if(ctx){ + if(ctx->program_name) + free(ctx->program_name); + if(ctx->conll_filename) + free(ctx->conll_filename); + if(ctx->mcf_filename) + free(ctx->mcf_filename); + if(ctx->mcd_filename) + free(ctx->mcd_filename); + if(ctx->mcd_struct) + mcd_free(ctx->mcd_struct); + free(ctx); + } +} + +context *json2mcf_context_new(void) +{ + context *ctx = (context *)memalloc(sizeof(context)); + + ctx->help = 0; + ctx->verbose = 0; + ctx->debug_mode = 0; + ctx->program_name = NULL; + ctx->conll_filename = NULL; + ctx->mcf_filename = NULL; + ctx->mcd_filename = NULL; + ctx->mcd_struct = NULL; + ctx->json_filename = NULL; + return ctx; +} + +void json2mcf_context_general_help_message(context *ctx) +{ + fprintf(stderr, "usage: %s [options]\n", ctx->program_name); + fprintf(stderr, "Options:\n"); + fprintf(stderr, "\t-h --help : print this message\n"); + fprintf(stderr, "\t-v --verbose : activate verbose mode\n"); + fprintf(stderr, "\t-C --mcd : mcd filename\n"); + fprintf(stderr, "\t-i --mcf : mcf filename (read from stdin if absent)\n"); + fprintf(stderr, "\t-j --json : json filename\n"); +} + +void json2mcf_check_options(context *ctx){ + if(ctx->help){ + json2mcf_context_general_help_message(ctx); + exit(1); + } +} + +context *json2mcf_context_read_options(int argc, char *argv[]) +{ + int c; + int option_index = 0; + context *ctx = json2mcf_context_new(); + + ctx->program_name = strdup(argv[0]); + + static struct option long_options[7] = + { + {"help", no_argument, 0, 'h'}, + {"verbose", no_argument, 0, 'v'}, + {"debug", no_argument, 0, 'd'}, + {"conll", required_argument, 0, 'o'}, + {"mcd", required_argument, 0, 'C'}, + {"mcf", required_argument, 0, 'i'}, + {"json", required_argument, 0, 'j'}, + }; + optind = 0; + opterr = 0; + + while ((c = getopt_long (argc, argv, "hvdo:C:i:j:", long_options, &option_index)) != -1){ + switch (c) + { + case 'd': + ctx->debug_mode = 1; + break; + case 'h': + ctx->help = 1; + break; + case 'v': + ctx->verbose = 1; + break; + case 'o': + ctx->conll_filename = strdup(optarg); + break; + case 'i': + ctx->mcf_filename = strdup(optarg); + break; + case 'C': + ctx->mcd_filename = strdup(optarg); + break; + case 'j': + ctx->json_filename = strdup(optarg); + break; + } + } + + if(ctx->mcd_filename){ + ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->verbose); + } + else{ + ctx->mcd_struct = mcd_build_wpmlgfs(); + } + + return ctx; +} + +void update_segment(word_buffer *wb, int start, int end, char *label, char *status_seg, char *status_lab) +{ + +} + +void process_segment(json_attr_val *avl, word_buffer *wb) +{ + int start, end; + char *label, *status_seg, *status_lab; + json_attr_val *av; + + for(av = avl; av != NULL; av = av->next){ + // printf("attr = %s\n", av->attr); + if(!strcmp(av->attr, "start")){start = (int)(av->val->u.number); continue;} + if(!strcmp(av->attr, "end")){end = (int)(av->val->u.number); continue;} + if(!strcmp(av->attr, "label")){label = av->val->u.string; continue;} + if(!strcmp(av->attr, "status_seg")){status_seg = av->val->u.string; continue;} + if(!strcmp(av->attr, "status_lab")){status_lab = av->val->u.string; continue;} + } + update_segment(wb, start, end, label, status_seg, status_lab); + // printf("segment : start = %d end = %d label = %s status_seg = %s status_lab = %s\n", start, end, label, status_seg, status_lab); + +} + +void process_segments(json_struct *segments, word_buffer *wb) +{ + json_struct *segment; + printf("process_segments\n"); + for(segment = segments->u.first; segment != NULL; segment = segment->next){ + process_segment(segment->u.attr_val_list, wb); + } +} + +// {"orig": 1, "dest":2, "label": "suj", "status_link": "", "status_lab": "", "timestamp": "", "author": "", "target": ""}, + + +void process_link(json_attr_val *avl, word_buffer *wb) +{ + int orig, dest; + char *label, *status_link, *status_lab; + json_attr_val *av; + + for(av = avl; av != NULL; av = av->next){ + // printf("attr = %s\n", av->attr); + if(!strcmp(av->attr, "orig")){orig = (int)(av->val->u.number); continue;} + if(!strcmp(av->attr, "dest")){dest = (int)(av->val->u.number); continue;} + if(!strcmp(av->attr, "label")){label = av->val->u.string; continue;} + if(!strcmp(av->attr, "status_link")){status_link = av->val->u.string; continue;} + if(!strcmp(av->attr, "status_lab")){status_lab = av->val->u.string; continue;} + } + // printf("link : orig = %d dest = %d label = %s status_link = %s status_lab = %s\n", orig, dest, label, status_link, status_lab); +} + +void process_links(json_struct *segments, word_buffer *wb) +{ + json_struct *link; + printf("process_links\n"); + for(link = segments->u.first; link != NULL; link = link->next){ + process_link(link->u.attr_val_list, wb); + } + +} + +void process_document(json_struct *document, word_buffer *wb) +{ + json_attr_val *avl = NULL; + printf("process_document\n"); + for(avl = document->u.attr_val_list; avl != NULL; avl = avl->next){ + if(!strcmp(avl->attr, (char *)"id")) printf("id = %s\n", avl->val->u.string); + if(!strcmp(avl->attr, (char *)"segments")) process_segments(avl->val, wb); + if(!strcmp(avl->attr, (char *)"links")) process_links(avl->val, wb); + } +} + +void process_documents(json_struct *documents, word_buffer *wb) +{ + json_struct *document; + printf("process_documents\n"); + for(document = documents->u.first; document != NULL; document = document->next){ + process_document(document, wb); + } +} + +int main(int argc, char *argv[]) +{ + FILE *output_file; + context *ctx = json2mcf_context_read_options(argc, argv); + word_buffer *wb = NULL; + word *w = NULL; + int first_sentence = 1; + int new_sentence = 1; + int index_first_word; + int index_last_word; + int sentence_nb = 0; + json_parser_ctx *parser_ctx = NULL; + json_struct *root = NULL; + json_struct *document = NULL; + json_attr_val *avl = NULL; + + json2mcf_check_options(ctx); + wb = word_buffer_load_mcf(ctx->mcf_filename, ctx->mcd_struct); + + parser_ctx = json_parser_init(ctx->json_filename); + root = structure(parser_ctx); + + if(root->type != JSON_OBJECT){ + fprintf(stderr, "erreur le json doit être un objet\n"); + exit(1); + } + for(avl = root->u.attr_val_list; avl != NULL; avl = avl->next){ + printf("section %s\n", avl->attr); + if(!strcmp(avl->attr, (char *)"documents")){ + process_documents(avl->val, wb); + } + } + + /*json_print_struct(stdout, root); + json_free_struct(root);*/ + + + + + json2mcf_context_free(ctx); + + + + return 0; +}