Commit 35f18a2d authored by Alexis Nasr's avatar Alexis Nasr
Browse files

modified mcf2json so that it creates a directory with json files in it

parent 87fdf274
......@@ -61,9 +61,9 @@
#define MCD_WF_Person 45
#define MCD_WF_Tense 46
#define MCD_WF_FILE 47
#define MCD_WF_DIRECTORY 48
#define MCD_WF_SPEAKER 49
#define MCD_WF_FILE 48
#define MCD_WF_DIRECTORY 49
#define MCD_WF_SPEAKER 50
......
......@@ -63,6 +63,11 @@ typedef struct _word {
#define word_get_label(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LABEL])
#define word_get_stag(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_STAG])
#define word_get_sent_seg(w) (((w) == NULL) ? 0 : (w)->wf_array[MCD_WF_SENT_SEG])
#define word_get_file(w) (((w) == NULL) ? 0 : (w)->wf_array[MCD_WF_FILE])
#define word_get_directory(w) (((w) == NULL) ? 0 : (w)->wf_array[MCD_WF_DIRECTORY])
#define word_get_speaker(w) (((w) == NULL) ? 0 : (w)->wf_array[MCD_WF_SPEAKER])
#define word_get_A(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_A])
#define word_get_B(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_B])
#define word_get_C(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_C])
......
......@@ -2,6 +2,9 @@
#include<stdlib.h>
#include<string.h>
#include<getopt.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include"mcd.h"
#include"util.h"
......@@ -16,6 +19,7 @@ typedef struct {
char *mcf_filename;
char *mcd_filename;
mcd *mcd_struct;
char *root_dir;
} context;
void mcf2json_context_free(context *ctx)
......@@ -31,6 +35,8 @@ void mcf2json_context_free(context *ctx)
free(ctx->mcd_filename);
if(ctx->mcd_struct)
mcd_free(ctx->mcd_struct);
if(ctx->root_dir)
free(ctx->root_dir);
free(ctx);
}
}
......@@ -47,6 +53,7 @@ context *mcf2json_context_new(void)
ctx->mcf_filename = NULL;
ctx->mcd_filename = NULL;
ctx->mcd_struct = NULL;
ctx->root_dir = NULL;
return ctx;
}
......@@ -58,7 +65,7 @@ void mcf2json_context_general_help_message(context *ctx)
fprintf(stderr, "\t-v --verbose : activate verbose mode\n");
fprintf(stderr, "\t-C --mcd : mcd filename\n");
fprintf(stderr, "\t-i --mcf : mcf filename (read from stdin if absent)\n");
fprintf(stderr, "\t-o --conll : conll filename (write to stdout if absent)\n");
fprintf(stderr, "\t-r --root : root directory of the json files\n");
}
void mcf2json_check_options(context *ctx){
......@@ -81,14 +88,14 @@ context *mcf2json_context_read_options(int argc, char *argv[])
{"help", no_argument, 0, 'h'},
{"verbose", no_argument, 0, 'v'},
{"debug", no_argument, 0, 'd'},
{"conll", required_argument, 0, 'o'},
{"mcd", required_argument, 0, 'C'},
{"mcf", required_argument, 0, 'i'},
{"root", required_argument, 0, 'r'},
};
optind = 0;
opterr = 0;
while ((c = getopt_long (argc, argv, "hvdo:C:i:", long_options, &option_index)) != -1){
while ((c = getopt_long (argc, argv, "hvdC:i:r:", long_options, &option_index)) != -1){
switch (c)
{
case 'd':
......@@ -100,15 +107,15 @@ context *mcf2json_context_read_options(int argc, char *argv[])
case 'v':
ctx->verbose = 1;
break;
case 'o':
ctx->conll_filename = strdup(optarg);
break;
case 'i':
ctx->mcf_filename = strdup(optarg);
break;
case 'C':
ctx->mcd_filename = strdup(optarg);
break;
case 'r':
ctx->root_dir = strdup(optarg);
break;
}
}
......@@ -118,7 +125,6 @@ context *mcf2json_context_read_options(int argc, char *argv[])
else{
ctx->mcd_struct = mcd_build_wpmlgfs();
}
return ctx;
}
......@@ -317,7 +323,7 @@ void print_sentence(FILE *output_file, int sentence_nb, word_buffer *wb, int ind
int main(int argc, char *argv[])
{
FILE *output_file;
FILE *output_file = NULL;
context *ctx = mcf2json_context_read_options(argc, argv);
word_buffer *wb = NULL;
word *w = NULL;
......@@ -326,41 +332,101 @@ int main(int argc, char *argv[])
int index_first_word;
int index_last_word;
int sentence_nb = 0;
char current_directory[1000];
char current_file[1000];
char previous_directory[1000];
char previous_file[1000];
char *root_directory = NULL;
char destination_file[1000];
char destination_dir[1000];
struct stat st = {0};
mcf2json_check_options(ctx);
mcd_extract_dico_from_corpus(ctx->mcd_struct, ctx->mcf_filename);
output_file = (ctx->conll_filename)? myfopen_no_exit(ctx->conll_filename, "w"): stdout;
wb = word_buffer_load_mcf(ctx->mcf_filename, ctx->mcd_struct);
print_header(output_file, ctx->mcd_struct);
do{
w = word_buffer_b0(wb);
if(new_sentence){
new_sentence = 0;
sentence_nb++;
index_first_word = word_buffer_get_current_index(wb);
}
if(word_get_sent_seg(w)){
index_last_word = word_buffer_get_current_index(wb);
new_sentence = 1;
if(first_sentence == 1)
first_sentence = 0;
else
fprintf(output_file, ",");
fprintf(output_file, "\n");
print_sentence(output_file, sentence_nb, wb, index_first_word, index_last_word);
if(ctx->root_dir){
if(stat(ctx->root_dir, &st) == -1) {
mkdir(ctx->root_dir, 0700);
fprintf(stderr, "creating directory %s\n", ctx->root_dir);
}
} while(word_buffer_move_right(wb));
print_footer(output_file);
if(ctx->conll_filename)
do{
w = word_buffer_b0(wb);
if(w == NULL) break;
word_sprint_col_n(current_directory, w, ctx->mcd_struct->wf2col[MCD_WF_DIRECTORY]);
word_sprint_col_n(current_file, w, ctx->mcd_struct->wf2col[MCD_WF_FILE]);
if(strcmp(current_directory, previous_directory)){
strcpy(destination_dir, ctx->root_dir);
strcat(destination_dir, "/");
strcat(destination_dir, current_directory);
if (stat(destination_dir, &st) == -1) {
mkdir(destination_dir, 0700);
fprintf(stderr, "creating directory %s\n", destination_dir);
}
}
if(strcmp(current_file, previous_file)){
strcpy(destination_file, destination_dir);
strcat(destination_file, "/");
strcat(destination_file, current_file);
strcat(destination_file, ".json");
fprintf(stderr, "creating file %s\n", destination_file);
if(output_file){
print_footer(output_file);
fclose(output_file);
}
output_file = myfopen_no_exit(destination_file, "w");
print_header(output_file, ctx->mcd_struct);
}
if(new_sentence){
new_sentence = 0;
sentence_nb++;
index_first_word = word_buffer_get_current_index(wb);
}
if(word_get_sent_seg(w)){
index_last_word = word_buffer_get_current_index(wb);
new_sentence = 1;
if(first_sentence == 1)
first_sentence = 0;
else
fprintf(output_file, ",");
fprintf(output_file, "\n");
print_sentence(output_file, sentence_nb, wb, index_first_word, index_last_word);
}
strcpy(previous_file, current_file);
strcpy(previous_directory, current_directory);
} while(word_buffer_move_right(wb));
print_footer(output_file);
fclose(output_file);
mcf2json_context_free(ctx);
}
else{ //ctx->root_dir is NULL dump everything to stdout
output_file = stdout;
print_header(output_file, ctx->mcd_struct);
do{
w = word_buffer_b0(wb);
if(new_sentence){
new_sentence = 0;
sentence_nb++;
index_first_word = word_buffer_get_current_index(wb);
}
if(word_get_sent_seg(w)){
index_last_word = word_buffer_get_current_index(wb);
new_sentence = 1;
if(first_sentence == 1)
first_sentence = 0;
else
fprintf(output_file, ",");
fprintf(output_file, "\n");
print_sentence(output_file, sentence_nb, wb, index_first_word, index_last_word);
}
} while(word_buffer_move_right(wb));
print_footer(output_file);
}
mcf2json_context_free(ctx);
return 0;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment