Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
macaon2
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Alexis Nasr
macaon2
Commits
35f18a2d
Commit
35f18a2d
authored
7 years ago
by
Alexis Nasr
Browse files
Options
Downloads
Patches
Plain Diff
modified mcf2json so that it creates a directory with json files in it
parent
87fdf274
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
maca_common/include/mcd.h
+3
-3
3 additions, 3 deletions
maca_common/include/mcd.h
maca_common/include/word.h
+5
-0
5 additions, 0 deletions
maca_common/include/word.h
maca_tools/src/mcf2json.c
+102
-36
102 additions, 36 deletions
maca_tools/src/mcf2json.c
with
110 additions
and
39 deletions
maca_common/include/mcd.h
+
3
−
3
View file @
35f18a2d
...
...
@@ -61,9 +61,9 @@
#define MCD_WF_Person 45
#define MCD_WF_Tense 46
#define MCD_WF_FILE 4
7
#define MCD_WF_DIRECTORY 4
8
#define MCD_WF_SPEAKER
49
#define MCD_WF_FILE 4
8
#define MCD_WF_DIRECTORY 4
9
#define MCD_WF_SPEAKER
50
...
...
This diff is collapsed.
Click to expand it.
maca_common/include/word.h
+
5
−
0
View file @
35f18a2d
...
...
@@ -63,6 +63,11 @@ typedef struct _word {
#define word_get_label(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LABEL])
#define word_get_stag(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_STAG])
#define word_get_sent_seg(w) (((w) == NULL) ? 0 : (w)->wf_array[MCD_WF_SENT_SEG])
#define word_get_file(w) (((w) == NULL) ? 0 : (w)->wf_array[MCD_WF_FILE])
#define word_get_directory(w) (((w) == NULL) ? 0 : (w)->wf_array[MCD_WF_DIRECTORY])
#define word_get_speaker(w) (((w) == NULL) ? 0 : (w)->wf_array[MCD_WF_SPEAKER])
#define word_get_A(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_A])
#define word_get_B(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_B])
#define word_get_C(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_C])
...
...
This diff is collapsed.
Click to expand it.
maca_tools/src/mcf2json.c
+
102
−
36
View file @
35f18a2d
...
...
@@ -2,6 +2,9 @@
#include
<stdlib.h>
#include
<string.h>
#include
<getopt.h>
#include
<sys/types.h>
#include
<sys/stat.h>
#include
<unistd.h>
#include
"mcd.h"
#include
"util.h"
...
...
@@ -16,6 +19,7 @@ typedef struct {
char
*
mcf_filename
;
char
*
mcd_filename
;
mcd
*
mcd_struct
;
char
*
root_dir
;
}
context
;
void
mcf2json_context_free
(
context
*
ctx
)
...
...
@@ -31,6 +35,8 @@ void mcf2json_context_free(context *ctx)
free
(
ctx
->
mcd_filename
);
if
(
ctx
->
mcd_struct
)
mcd_free
(
ctx
->
mcd_struct
);
if
(
ctx
->
root_dir
)
free
(
ctx
->
root_dir
);
free
(
ctx
);
}
}
...
...
@@ -47,6 +53,7 @@ context *mcf2json_context_new(void)
ctx
->
mcf_filename
=
NULL
;
ctx
->
mcd_filename
=
NULL
;
ctx
->
mcd_struct
=
NULL
;
ctx
->
root_dir
=
NULL
;
return
ctx
;
}
...
...
@@ -58,7 +65,7 @@ void mcf2json_context_general_help_message(context *ctx)
fprintf
(
stderr
,
"
\t
-v --verbose : activate verbose mode
\n
"
);
fprintf
(
stderr
,
"
\t
-C --mcd : mcd filename
\n
"
);
fprintf
(
stderr
,
"
\t
-i --mcf : mcf filename (read from stdin if absent)
\n
"
);
fprintf
(
stderr
,
"
\t
-
o
--
conll
:
conll filename (write to stdout if absent)
\n
"
);
fprintf
(
stderr
,
"
\t
-
r
--
root
:
root directory of the json files
\n
"
);
}
void
mcf2json_check_options
(
context
*
ctx
){
...
...
@@ -81,14 +88,14 @@ context *mcf2json_context_read_options(int argc, char *argv[])
{
"help"
,
no_argument
,
0
,
'h'
},
{
"verbose"
,
no_argument
,
0
,
'v'
},
{
"debug"
,
no_argument
,
0
,
'd'
},
{
"conll"
,
required_argument
,
0
,
'o'
},
{
"mcd"
,
required_argument
,
0
,
'C'
},
{
"mcf"
,
required_argument
,
0
,
'i'
},
{
"root"
,
required_argument
,
0
,
'r'
},
};
optind
=
0
;
opterr
=
0
;
while
((
c
=
getopt_long
(
argc
,
argv
,
"hvd
o:
C:i:"
,
long_options
,
&
option_index
))
!=
-
1
){
while
((
c
=
getopt_long
(
argc
,
argv
,
"hvdC:i:
r:
"
,
long_options
,
&
option_index
))
!=
-
1
){
switch
(
c
)
{
case
'd'
:
...
...
@@ -100,15 +107,15 @@ context *mcf2json_context_read_options(int argc, char *argv[])
case
'v'
:
ctx
->
verbose
=
1
;
break
;
case
'o'
:
ctx
->
conll_filename
=
strdup
(
optarg
);
break
;
case
'i'
:
ctx
->
mcf_filename
=
strdup
(
optarg
);
break
;
case
'C'
:
ctx
->
mcd_filename
=
strdup
(
optarg
);
break
;
case
'r'
:
ctx
->
root_dir
=
strdup
(
optarg
);
break
;
}
}
...
...
@@ -118,7 +125,6 @@ context *mcf2json_context_read_options(int argc, char *argv[])
else
{
ctx
->
mcd_struct
=
mcd_build_wpmlgfs
();
}
return
ctx
;
}
...
...
@@ -317,7 +323,7 @@ void print_sentence(FILE *output_file, int sentence_nb, word_buffer *wb, int ind
int
main
(
int
argc
,
char
*
argv
[])
{
FILE
*
output_file
;
FILE
*
output_file
=
NULL
;
context
*
ctx
=
mcf2json_context_read_options
(
argc
,
argv
);
word_buffer
*
wb
=
NULL
;
word
*
w
=
NULL
;
...
...
@@ -326,41 +332,101 @@ int main(int argc, char *argv[])
int
index_first_word
;
int
index_last_word
;
int
sentence_nb
=
0
;
char
current_directory
[
1000
];
char
current_file
[
1000
];
char
previous_directory
[
1000
];
char
previous_file
[
1000
];
char
*
root_directory
=
NULL
;
char
destination_file
[
1000
];
char
destination_dir
[
1000
];
struct
stat
st
=
{
0
};
mcf2json_check_options
(
ctx
);
mcd_extract_dico_from_corpus
(
ctx
->
mcd_struct
,
ctx
->
mcf_filename
);
output_file
=
(
ctx
->
conll_filename
)
?
myfopen_no_exit
(
ctx
->
conll_filename
,
"w"
)
:
stdout
;
wb
=
word_buffer_load_mcf
(
ctx
->
mcf_filename
,
ctx
->
mcd_struct
);
print_header
(
output_file
,
ctx
->
mcd_struct
);
do
{
w
=
word_buffer_b0
(
wb
);
if
(
new_sentence
){
new_sentence
=
0
;
sentence_nb
++
;
index_first_word
=
word_buffer_get_current_index
(
wb
);
}
if
(
word_get_sent_seg
(
w
)){
index_last_word
=
word_buffer_get_current_index
(
wb
);
new_sentence
=
1
;
if
(
first_sentence
==
1
)
first_sentence
=
0
;
else
fprintf
(
output_file
,
","
);
fprintf
(
output_file
,
"
\n
"
);
print_sentence
(
output_file
,
sentence_nb
,
wb
,
index_first_word
,
index_last_word
);
if
(
ctx
->
root_dir
){
if
(
stat
(
ctx
->
root_dir
,
&
st
)
==
-
1
)
{
mkdir
(
ctx
->
root_dir
,
0700
);
fprintf
(
stderr
,
"creating directory %s
\n
"
,
ctx
->
root_dir
);
}
}
while
(
word_buffer_move_right
(
wb
));
print_footer
(
output_file
);
if
(
ctx
->
conll_filename
)
do
{
w
=
word_buffer_b0
(
wb
);
if
(
w
==
NULL
)
break
;
word_sprint_col_n
(
current_directory
,
w
,
ctx
->
mcd_struct
->
wf2col
[
MCD_WF_DIRECTORY
]);
word_sprint_col_n
(
current_file
,
w
,
ctx
->
mcd_struct
->
wf2col
[
MCD_WF_FILE
]);
if
(
strcmp
(
current_directory
,
previous_directory
)){
strcpy
(
destination_dir
,
ctx
->
root_dir
);
strcat
(
destination_dir
,
"/"
);
strcat
(
destination_dir
,
current_directory
);
if
(
stat
(
destination_dir
,
&
st
)
==
-
1
)
{
mkdir
(
destination_dir
,
0700
);
fprintf
(
stderr
,
"creating directory %s
\n
"
,
destination_dir
);
}
}
if
(
strcmp
(
current_file
,
previous_file
)){
strcpy
(
destination_file
,
destination_dir
);
strcat
(
destination_file
,
"/"
);
strcat
(
destination_file
,
current_file
);
strcat
(
destination_file
,
".json"
);
fprintf
(
stderr
,
"creating file %s
\n
"
,
destination_file
);
if
(
output_file
){
print_footer
(
output_file
);
fclose
(
output_file
);
}
output_file
=
myfopen_no_exit
(
destination_file
,
"w"
);
print_header
(
output_file
,
ctx
->
mcd_struct
);
}
if
(
new_sentence
){
new_sentence
=
0
;
sentence_nb
++
;
index_first_word
=
word_buffer_get_current_index
(
wb
);
}
if
(
word_get_sent_seg
(
w
)){
index_last_word
=
word_buffer_get_current_index
(
wb
);
new_sentence
=
1
;
if
(
first_sentence
==
1
)
first_sentence
=
0
;
else
fprintf
(
output_file
,
","
);
fprintf
(
output_file
,
"
\n
"
);
print_sentence
(
output_file
,
sentence_nb
,
wb
,
index_first_word
,
index_last_word
);
}
strcpy
(
previous_file
,
current_file
);
strcpy
(
previous_directory
,
current_directory
);
}
while
(
word_buffer_move_right
(
wb
));
print_footer
(
output_file
);
fclose
(
output_file
);
mcf2json_context_free
(
ctx
);
}
else
{
//ctx->root_dir is NULL dump everything to stdout
output_file
=
stdout
;
print_header
(
output_file
,
ctx
->
mcd_struct
);
do
{
w
=
word_buffer_b0
(
wb
);
if
(
new_sentence
){
new_sentence
=
0
;
sentence_nb
++
;
index_first_word
=
word_buffer_get_current_index
(
wb
);
}
if
(
word_get_sent_seg
(
w
)){
index_last_word
=
word_buffer_get_current_index
(
wb
);
new_sentence
=
1
;
if
(
first_sentence
==
1
)
first_sentence
=
0
;
else
fprintf
(
output_file
,
","
);
fprintf
(
output_file
,
"
\n
"
);
print_sentence
(
output_file
,
sentence_nb
,
wb
,
index_first_word
,
index_last_word
);
}
}
while
(
word_buffer_move_right
(
wb
));
print_footer
(
output_file
);
}
mcf2json_context_free
(
ctx
);
return
0
;
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment