Skip to content
Snippets Groups Projects
Commit 60e30a9c authored by Alexis Nasr's avatar Alexis Nasr
Browse files

added in maca_tools json2mcf that projects annotation of the json file to the mcf file

parent cc5c1716
No related branches found
No related tags found
No related merge requests found
......@@ -15,6 +15,8 @@ set(SOURCES src/util.c
src/char16.c
src/l_rule.c
src/fplm.c
src/json_parser.c
src/json_tree.c
)
#compiling library
......
#ifndef __JSON_PARSER__
#define __JSON_PARSER__
#include "json_tree.h"
#define YYTEXT_MAX 100
#define EPSILON 0
/* symboles non terminaux */
#define NB_NON_TERMINAUX 8
#define _structure_ 1
#define _list_ 2
#define _object_ 3
#define _list_structure_ 4
#define _list_structure2_ 5
#define _attr_val_ 6
#define _list_attr_val_ 7
#define _list_attr_val2_ 8
/* symboles terminaux */
#define NB_TERMINAUX 10
#define CROCHET_OUVRANT 1
#define CROCHET_FERMANT 2
#define VIRGULE 3
#define ACCOLADE_OUVRANTE 4
#define ACCOLADE_FERMANTE 5
#define COLON 6
#define STRING 7
#define NUMBER 8
#define CONSTANT 9
#define FIN 10
#define NB_MOTS_CLEFS 3
typedef struct {
FILE *yyin;
int uc; /* current token */
int comment;
char yytext[YYTEXT_MAX];
int yyleng;
/* Compter les lignes pour afficher les messages d'erreur avec numero ligne */
int nb_ligne;
int trace_xml;
int premiers[NB_NON_TERMINAUX+1][NB_TERMINAUX+1];
int suivants[NB_NON_TERMINAUX+1][NB_TERMINAUX+1];
int indent_xml;
int indent_step; // set to 0 for no indentation
char *tableMotsClefs[NB_MOTS_CLEFS];
int codeMotClefs[NB_MOTS_CLEFS];
}json_parser_ctx;
json_struct *structure(json_parser_ctx *ctx);
json_parser_ctx *json_parser_init(char *filename);
#endif
File moved
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include"json_tree.h"
#define YYTEXT_MAX 100
#define EPSILON 0
/* symboles non terminaux */
#define NB_NON_TERMINAUX 8
#define _structure_ 1
#define _list_ 2
#define _object_ 3
#define _list_structure_ 4
#define _list_structure2_ 5
#define _attr_val_ 6
#define _list_attr_val_ 7
#define _list_attr_val2_ 8
/* symboles terminaux */
#define NB_TERMINAUX 10
#define CROCHET_OUVRANT 1
#define CROCHET_FERMANT 2
#define VIRGULE 3
#define ACCOLADE_OUVRANTE 4
#define ACCOLADE_FERMANTE 5
#define COLON 6
#define STRING 7
#define NUMBER 8
#define CONSTANT 9
#define FIN 10
#define NB_MOTS_CLEFS 3
#include"json_parser.h"
/* --------------------------------------------------------------------------- */
/* quelques macros utiles */
......@@ -45,24 +15,6 @@
#define is_alphanum(c)(is_num((c)) || is_alpha((c)))
typedef struct {
FILE *yyin;
int uc; /* current token */
int comment;
char yytext[YYTEXT_MAX];
int yyleng;
/* Compter les lignes pour afficher les messages d'erreur avec numero ligne */
int nb_ligne;
int trace_xml;
int premiers[NB_NON_TERMINAUX+1][NB_TERMINAUX+1];
int suivants[NB_NON_TERMINAUX+1][NB_TERMINAUX+1];
int indent_xml;
int indent_step; // set to 0 for no indentation
char *tableMotsClefs[NB_MOTS_CLEFS];
int codeMotClefs[NB_MOTS_CLEFS];
}json_parser_ctx;
void initialise_premiers(json_parser_ctx *ctx);
void initialise_suivants(json_parser_ctx *ctx);
......@@ -70,17 +22,17 @@ int yylex(json_parser_ctx *ctx);
json_parser_ctx *json_parser_init(char *filename)
{
json_parser_ctx *ctx = malloc(sizeof(json_parser_ctx));
json_parser_ctx *ctx = (json_parser_ctx *) malloc(sizeof(json_parser_ctx));
ctx->nb_ligne = 1;
ctx->trace_xml = 1;
ctx->trace_xml = 0;
ctx->indent_xml = 0;
ctx->indent_step = 1;
initialise_premiers(ctx);
initialise_suivants(ctx);
ctx->tableMotsClefs[0] = "true";
ctx->tableMotsClefs[1] = "false";
ctx->tableMotsClefs[2] = "null";
ctx->tableMotsClefs[0] = (char *) "true";
ctx->tableMotsClefs[1] = (char *) "false";
ctx->tableMotsClefs[2] = (char *) "null";
ctx->codeMotClefs[0] = CONSTANT;
ctx->codeMotClefs[1] = CONSTANT;
ctx->codeMotClefs[2] = CONSTANT;
......@@ -185,13 +137,16 @@ int yylex(json_parser_ctx *ctx)
}
if(c == '"') {
do{
ctx->yyleng = 0;
c = lireCar(ctx);
while(c != '"'){
if(ctx->yyleng >= YYTEXT_MAX){
erreur(ctx, "constante trop longue");
erreur(ctx, (char *) "constante trop longue");
}
} while(c != '"');
/* printf("c = %c yytext = %s\n", c, ctx->yytext); */
c = lireCar(ctx);
}
ctx->yytext[--ctx->yyleng] = '\0';
// printf("c = %c yytext = %s\n", c, ctx->yytext);
return STRING;
}
......@@ -214,7 +169,7 @@ int yylex(json_parser_ctx *ctx)
fprintf( stderr, "Ligne %d: caractère invalide: %c\n", ctx->nb_ligne, c );
exit(-1);
}
return -1;
}
/*-------------------------------------------------------------------------*/
......@@ -234,7 +189,7 @@ void consommer(json_parser_ctx *ctx, int c ) {
ctx->uc = yylex(ctx); /* consommer le caractère */
}
else
erreur(ctx, "erreure lexicale" );
erreur(ctx, (char *) "erreure lexicale" );
}
/*-------------------------------------------------------------------------*/
......@@ -462,7 +417,7 @@ json_struct *structure(json_parser_ctx *ctx)
}
if(ctx->uc == STRING){
string = strdup(ctx->yytext);
string = (ctx->yyleng == 0)? NULL : strdup(ctx->yytext);
consommer(ctx, STRING);
affiche_balise_fermante(ctx, __FUNCTION__);
return json_new_string(string);
......@@ -484,8 +439,8 @@ json_struct *structure(json_parser_ctx *ctx)
affiche_balise_fermante(ctx, __FUNCTION__);
return json_new_constant(constant);
}
erreur(ctx, "" );
erreur(ctx, (char *) "" );
return NULL;
}
/*---------------------------------------------------------*/
......@@ -500,7 +455,7 @@ json_struct *list (json_parser_ctx *ctx)
consommer(ctx, CROCHET_FERMANT);
affiche_balise_fermante(ctx, __FUNCTION__);
return json_new_list(s);
erreur(ctx, "");
erreur(ctx, (char *)"");
}
/*---------------------------------------------------------*/
......@@ -521,7 +476,8 @@ json_struct *list_structure(json_parser_ctx *ctx)
affiche_balise_fermante(ctx, __FUNCTION__);
return NULL;
}
erreur(ctx, "");
erreur(ctx, (char *)"");
return NULL;
}
/*---------------------------------------------------------*/
......@@ -544,7 +500,7 @@ json_struct *list_structure2(json_parser_ctx *ctx)
affiche_balise_fermante(ctx, __FUNCTION__);
return NULL;
}
erreur(ctx, "");
erreur(ctx, (char *)"");
}
/*---------------------------------------------------------*/
......@@ -579,7 +535,8 @@ json_attr_val *list_attr_val(json_parser_ctx *ctx)
affiche_balise_fermante(ctx, __FUNCTION__);
return NULL;
}
erreur(ctx, "");
erreur(ctx, (char *)"");
return NULL;
}
/*---------------------------------------------------------*/
......@@ -603,7 +560,8 @@ json_attr_val *list_attr_val2(json_parser_ctx *ctx)
return NULL;
}
erreur(ctx, "");
erreur(ctx, (char *)"");
return NULL;
}
/*---------------------------------------------------------*/
......@@ -624,10 +582,10 @@ json_attr_val *attr_val(json_parser_ctx *ctx)
}
int main(int arc, char *argv[])
/*int main(int arc, char *argv[])
{
json_parser_ctx *ctx = json_parser_init(argv[1]);
json_struct *s = structure(ctx);
json_print_struct(stdout, s);
json_free_struct(s);
}
}*/
......@@ -5,7 +5,7 @@
json_struct *json_new_struct(int type)
{
json_struct *c = malloc(sizeof(json_struct));
json_struct *c = (json_struct *)malloc(sizeof(json_struct));
if(c == NULL){
fprintf(stderr, "memory allocation problem !\n");
exit(1);
......@@ -39,7 +39,7 @@ json_struct *json_new_constant(int constant)
json_attr_val *json_new_attr_val(char *attr, json_struct *s, json_attr_val *next)
{
json_attr_val *av = malloc(sizeof(json_attr_val));
json_attr_val *av = (json_attr_val *)malloc(sizeof(json_attr_val));
if(av == NULL){
fprintf(stderr, "memory allocation problem !\n");
exit(1);
......
......@@ -16,6 +16,10 @@ add_executable(mcf2json ./src/mcf2json.c)
target_link_libraries(mcf2json maca_common)
install (TARGETS mcf2json DESTINATION bin)
add_executable(json2mcf ./src/json2mcf.c)
target_link_libraries(json2mcf maca_common)
install (TARGETS json2mcf DESTINATION bin)
add_executable(maca_compute_l_rules ./src/maca_compute_l_rules.c)
target_link_libraries(maca_compute_l_rules maca_common)
install (TARGETS maca_compute_l_rules DESTINATION bin)
......
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<getopt.h>
#include"mcd.h"
#include"util.h"
#include"word_buffer.h"
#include"json_parser.h"
typedef struct {
int help;
int verbose;
int debug_mode;
char *program_name;
char *conll_filename;
char *mcf_filename;
char *mcd_filename;
mcd *mcd_struct;
char *json_filename;
} context;
void json2mcf_context_free(context *ctx)
{
if(ctx){
if(ctx->program_name)
free(ctx->program_name);
if(ctx->conll_filename)
free(ctx->conll_filename);
if(ctx->mcf_filename)
free(ctx->mcf_filename);
if(ctx->mcd_filename)
free(ctx->mcd_filename);
if(ctx->mcd_struct)
mcd_free(ctx->mcd_struct);
free(ctx);
}
}
context *json2mcf_context_new(void)
{
context *ctx = (context *)memalloc(sizeof(context));
ctx->help = 0;
ctx->verbose = 0;
ctx->debug_mode = 0;
ctx->program_name = NULL;
ctx->conll_filename = NULL;
ctx->mcf_filename = NULL;
ctx->mcd_filename = NULL;
ctx->mcd_struct = NULL;
ctx->json_filename = NULL;
return ctx;
}
void json2mcf_context_general_help_message(context *ctx)
{
fprintf(stderr, "usage: %s [options]\n", ctx->program_name);
fprintf(stderr, "Options:\n");
fprintf(stderr, "\t-h --help : print this message\n");
fprintf(stderr, "\t-v --verbose : activate verbose mode\n");
fprintf(stderr, "\t-C --mcd : mcd filename\n");
fprintf(stderr, "\t-i --mcf : mcf filename (read from stdin if absent)\n");
fprintf(stderr, "\t-j --json : json filename\n");
}
void json2mcf_check_options(context *ctx){
if(ctx->help){
json2mcf_context_general_help_message(ctx);
exit(1);
}
}
context *json2mcf_context_read_options(int argc, char *argv[])
{
int c;
int option_index = 0;
context *ctx = json2mcf_context_new();
ctx->program_name = strdup(argv[0]);
static struct option long_options[7] =
{
{"help", no_argument, 0, 'h'},
{"verbose", no_argument, 0, 'v'},
{"debug", no_argument, 0, 'd'},
{"conll", required_argument, 0, 'o'},
{"mcd", required_argument, 0, 'C'},
{"mcf", required_argument, 0, 'i'},
{"json", required_argument, 0, 'j'},
};
optind = 0;
opterr = 0;
while ((c = getopt_long (argc, argv, "hvdo:C:i:j:", long_options, &option_index)) != -1){
switch (c)
{
case 'd':
ctx->debug_mode = 1;
break;
case 'h':
ctx->help = 1;
break;
case 'v':
ctx->verbose = 1;
break;
case 'o':
ctx->conll_filename = strdup(optarg);
break;
case 'i':
ctx->mcf_filename = strdup(optarg);
break;
case 'C':
ctx->mcd_filename = strdup(optarg);
break;
case 'j':
ctx->json_filename = strdup(optarg);
break;
}
}
if(ctx->mcd_filename){
ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->verbose);
}
else{
ctx->mcd_struct = mcd_build_wpmlgfs();
}
return ctx;
}
void update_segment(word_buffer *wb, int start, int end, char *label, char *status_seg, char *status_lab)
{
}
void process_segment(json_attr_val *avl, word_buffer *wb)
{
int start, end;
char *label, *status_seg, *status_lab;
json_attr_val *av;
for(av = avl; av != NULL; av = av->next){
// printf("attr = %s\n", av->attr);
if(!strcmp(av->attr, "start")){start = (int)(av->val->u.number); continue;}
if(!strcmp(av->attr, "end")){end = (int)(av->val->u.number); continue;}
if(!strcmp(av->attr, "label")){label = av->val->u.string; continue;}
if(!strcmp(av->attr, "status_seg")){status_seg = av->val->u.string; continue;}
if(!strcmp(av->attr, "status_lab")){status_lab = av->val->u.string; continue;}
}
update_segment(wb, start, end, label, status_seg, status_lab);
// printf("segment : start = %d end = %d label = %s status_seg = %s status_lab = %s\n", start, end, label, status_seg, status_lab);
}
void process_segments(json_struct *segments, word_buffer *wb)
{
json_struct *segment;
printf("process_segments\n");
for(segment = segments->u.first; segment != NULL; segment = segment->next){
process_segment(segment->u.attr_val_list, wb);
}
}
// {"orig": 1, "dest":2, "label": "suj", "status_link": "", "status_lab": "", "timestamp": "", "author": "", "target": ""},
void process_link(json_attr_val *avl, word_buffer *wb)
{
int orig, dest;
char *label, *status_link, *status_lab;
json_attr_val *av;
for(av = avl; av != NULL; av = av->next){
// printf("attr = %s\n", av->attr);
if(!strcmp(av->attr, "orig")){orig = (int)(av->val->u.number); continue;}
if(!strcmp(av->attr, "dest")){dest = (int)(av->val->u.number); continue;}
if(!strcmp(av->attr, "label")){label = av->val->u.string; continue;}
if(!strcmp(av->attr, "status_link")){status_link = av->val->u.string; continue;}
if(!strcmp(av->attr, "status_lab")){status_lab = av->val->u.string; continue;}
}
// printf("link : orig = %d dest = %d label = %s status_link = %s status_lab = %s\n", orig, dest, label, status_link, status_lab);
}
void process_links(json_struct *segments, word_buffer *wb)
{
json_struct *link;
printf("process_links\n");
for(link = segments->u.first; link != NULL; link = link->next){
process_link(link->u.attr_val_list, wb);
}
}
void process_document(json_struct *document, word_buffer *wb)
{
json_attr_val *avl = NULL;
printf("process_document\n");
for(avl = document->u.attr_val_list; avl != NULL; avl = avl->next){
if(!strcmp(avl->attr, (char *)"id")) printf("id = %s\n", avl->val->u.string);
if(!strcmp(avl->attr, (char *)"segments")) process_segments(avl->val, wb);
if(!strcmp(avl->attr, (char *)"links")) process_links(avl->val, wb);
}
}
void process_documents(json_struct *documents, word_buffer *wb)
{
json_struct *document;
printf("process_documents\n");
for(document = documents->u.first; document != NULL; document = document->next){
process_document(document, wb);
}
}
int main(int argc, char *argv[])
{
FILE *output_file;
context *ctx = json2mcf_context_read_options(argc, argv);
word_buffer *wb = NULL;
word *w = NULL;
int first_sentence = 1;
int new_sentence = 1;
int index_first_word;
int index_last_word;
int sentence_nb = 0;
json_parser_ctx *parser_ctx = NULL;
json_struct *root = NULL;
json_struct *document = NULL;
json_attr_val *avl = NULL;
json2mcf_check_options(ctx);
wb = word_buffer_load_mcf(ctx->mcf_filename, ctx->mcd_struct);
parser_ctx = json_parser_init(ctx->json_filename);
root = structure(parser_ctx);
if(root->type != JSON_OBJECT){
fprintf(stderr, "erreur le json doit être un objet\n");
exit(1);
}
for(avl = root->u.attr_val_list; avl != NULL; avl = avl->next){
printf("section %s\n", avl->attr);
if(!strcmp(avl->attr, (char *)"documents")){
process_documents(avl->val, wb);
}
}
/*json_print_struct(stdout, root);
json_free_struct(root);*/
json2mcf_context_free(ctx);
return 0;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment