Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Alexis Nasr
macaon2
Commits
68ee1829
Commit
68ee1829
authored
Feb 07, 2018
by
Alexis Nasr
Browse files
fixed a bug in word.h
parents
1d536e6c
ee98b081
Changes
8
Hide whitespace changes
Inline
Side-by-side
maca_common/include/mcd.h
View file @
68ee1829
...
...
@@ -8,7 +8,7 @@
#define MCD_INVALID_VALUE -1
#define MCD_WF_NB
48
#define MCD_WF_NB
51
#define MCD_WF_ID 0
#define MCD_WF_OFFSET 0
/* ID and OFFSET are synonymous */
...
...
@@ -61,6 +61,12 @@
#define MCD_WF_Person 45
#define MCD_WF_Tense 46
#define MCD_WF_FILE 48
#define MCD_WF_DIRECTORY 49
#define MCD_WF_SPEAKER 50
/*Abbr
AdpType
AdvType
...
...
maca_common/include/word.h
View file @
68ee1829
...
...
@@ -63,6 +63,11 @@ typedef struct _word {
#define word_get_label(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LABEL])
#define word_get_stag(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_STAG])
#define word_get_sent_seg(w) (((w) == NULL) ? 0 : (w)->wf_array[MCD_WF_SENT_SEG])
#define word_get_file(w) (((w) == NULL) ? 0 : (w)->wf_array[MCD_WF_FILE])
#define word_get_directory(w) (((w) == NULL) ? 0 : (w)->wf_array[MCD_WF_DIRECTORY])
#define word_get_speaker(w) (((w) == NULL) ? 0 : (w)->wf_array[MCD_WF_SPEAKER])
#define word_get_A(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_A])
#define word_get_B(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_B])
#define word_get_C(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_C])
...
...
maca_common/src/mcd.c
View file @
68ee1829
...
...
@@ -512,6 +512,12 @@ int mcd_wf_code(char *wf)
/* if(!strcmp(wf, "INT")) return MCD_WF_INT; */
if
(
!
strcmp
(
wf
,
"GOV"
))
return
MCD_WF_GOV
;
if
(
!
strcmp
(
wf
,
"SENT_SEG"
))
return
MCD_WF_SENT_SEG
;
if
(
!
strcmp
(
wf
,
"FILE"
))
return
MCD_WF_FILE
;
if
(
!
strcmp
(
wf
,
"DIRECTORY"
))
return
MCD_WF_DIRECTORY
;
if
(
!
strcmp
(
wf
,
"SPEAKER"
))
return
MCD_WF_SPEAKER
;
if
(
!
strcmp
(
wf
,
"A"
))
return
MCD_WF_A
;
if
(
!
strcmp
(
wf
,
"B"
))
return
MCD_WF_B
;
if
(
!
strcmp
(
wf
,
"C"
))
return
MCD_WF_C
;
...
...
maca_common/src/word.c
View file @
68ee1829
...
...
@@ -22,7 +22,6 @@ word *word_new(char *input)
w
->
wf_array
[
MCD_WF_GOV
]
=
WORD_INVALID_GOV
;
w
->
form
=
NULL
;
w
->
form_char16
=
NULL
;
w
->
index
=
-
1
;
w
->
signature
=
-
1
;
w
->
is_root
=
0
;
...
...
maca_tokenizer/src/en_tok_rules.l
View file @
68ee1829
...
...
@@ -12,6 +12,7 @@ extern char *token;
/*%option noyywrap*/
%%
#.* ECHO;
\<[^\>]*\> {maca_tokenizer_segment((char *)"", yytext);}
[ \t]+ {maca_tokenizer_segment((char *)"", yytext);}
[ ]*\. {maca_tokenizer_segment((char *)".", yytext);}
...
...
maca_tokenizer/src/fr_tok_rules.l
View file @
68ee1829
...
...
@@ -26,7 +26,7 @@ nosepar [^ \t\n]
if(defait_amalgames){
BEGIN(state_defait_amalgames);
}
#.* ECHO;
\<[^\>]*\> {maca_tokenizer_segment((char *)"", yytext);}
{separ}+ {maca_tokenizer_segment((char *)"", yytext);}
\. {maca_tokenizer_segment((char *)".", yytext);}
...
...
maca_tools/src/mcf2json.c
View file @
68ee1829
...
...
@@ -2,6 +2,9 @@
#include
<stdlib.h>
#include
<string.h>
#include
<getopt.h>
#include
<sys/types.h>
#include
<sys/stat.h>
#include
<unistd.h>
#include
"mcd.h"
#include
"util.h"
...
...
@@ -16,6 +19,7 @@ typedef struct {
char
*
mcf_filename
;
char
*
mcd_filename
;
mcd
*
mcd_struct
;
char
*
root_dir
;
}
context
;
void
mcf2json_context_free
(
context
*
ctx
)
...
...
@@ -31,6 +35,8 @@ void mcf2json_context_free(context *ctx)
free
(
ctx
->
mcd_filename
);
if
(
ctx
->
mcd_struct
)
mcd_free
(
ctx
->
mcd_struct
);
if
(
ctx
->
root_dir
)
free
(
ctx
->
root_dir
);
free
(
ctx
);
}
}
...
...
@@ -47,6 +53,7 @@ context *mcf2json_context_new(void)
ctx
->
mcf_filename
=
NULL
;
ctx
->
mcd_filename
=
NULL
;
ctx
->
mcd_struct
=
NULL
;
ctx
->
root_dir
=
NULL
;
return
ctx
;
}
...
...
@@ -58,7 +65,7 @@ void mcf2json_context_general_help_message(context *ctx)
fprintf
(
stderr
,
"
\t
-v --verbose : activate verbose mode
\n
"
);
fprintf
(
stderr
,
"
\t
-C --mcd : mcd filename
\n
"
);
fprintf
(
stderr
,
"
\t
-i --mcf : mcf filename (read from stdin if absent)
\n
"
);
fprintf
(
stderr
,
"
\t
-
o
--
conll
:
conll filename (write to stdout if absent)
\n
"
);
fprintf
(
stderr
,
"
\t
-
r
--
root
:
root directory of the json files
\n
"
);
}
void
mcf2json_check_options
(
context
*
ctx
){
...
...
@@ -81,14 +88,14 @@ context *mcf2json_context_read_options(int argc, char *argv[])
{
"help"
,
no_argument
,
0
,
'h'
},
{
"verbose"
,
no_argument
,
0
,
'v'
},
{
"debug"
,
no_argument
,
0
,
'd'
},
{
"conll"
,
required_argument
,
0
,
'o'
},
{
"mcd"
,
required_argument
,
0
,
'C'
},
{
"mcf"
,
required_argument
,
0
,
'i'
},
{
"root"
,
required_argument
,
0
,
'r'
},
};
optind
=
0
;
opterr
=
0
;
while
((
c
=
getopt_long
(
argc
,
argv
,
"hvd
o:
C:i:"
,
long_options
,
&
option_index
))
!=
-
1
){
while
((
c
=
getopt_long
(
argc
,
argv
,
"hvdC:i:
r:
"
,
long_options
,
&
option_index
))
!=
-
1
){
switch
(
c
)
{
case
'd'
:
...
...
@@ -100,15 +107,15 @@ context *mcf2json_context_read_options(int argc, char *argv[])
case
'v'
:
ctx
->
verbose
=
1
;
break
;
case
'o'
:
ctx
->
conll_filename
=
strdup
(
optarg
);
break
;
case
'i'
:
ctx
->
mcf_filename
=
strdup
(
optarg
);
break
;
case
'C'
:
ctx
->
mcd_filename
=
strdup
(
optarg
);
break
;
case
'r'
:
ctx
->
root_dir
=
strdup
(
optarg
);
break
;
}
}
...
...
@@ -118,7 +125,6 @@ context *mcf2json_context_read_options(int argc, char *argv[])
else
{
ctx
->
mcd_struct
=
mcd_build_wpmlgfs
();
}
return
ctx
;
}
...
...
@@ -129,7 +135,7 @@ void print_footer(FILE *output_file)
}
void
print_header
(
FILE
*
output_file
,
mcd
*
mcd_struct
)
void
print_header
(
FILE
*
output_file
,
mcd
*
mcd_struct
,
char
*
filename
)
{
int
pos_col
=
mcd_get_pos_col
(
mcd_struct
);
int
label_col
=
mcd_get_label_col
(
mcd_struct
);
...
...
@@ -143,6 +149,7 @@ void print_header(FILE *output_file, mcd *mcd_struct)
fprintf
(
output_file
,
"
\"
header
\"
:{
\n
"
);
fprintf
(
output_file
,
"
\"
id
\"
:
\"\"
,
\n
"
);
fprintf
(
output_file
,
"
\"
timestamp
\"
:
\"\"
,
\n
"
);
fprintf
(
output_file
,
"
\"
filename
\"
:
\"
%s
\"
,
\n
"
,
filename
);
fprintf
(
output_file
,
"
\"
labels_segment
\"
: ["
);
for
(
i
=
0
;
i
<
dico_pos
->
nbelem
;
i
++
){
...
...
@@ -160,7 +167,7 @@ void print_header(FILE *output_file, mcd *mcd_struct)
fprintf
(
output_file
,
"},
\n
"
);
fprintf
(
output_file
,
"
\"
annotation
s
\"
:{
\n
"
);
fprintf
(
output_file
,
"
\"
annotation
\"
:{
\n
"
);
fprintf
(
output_file
,
"
\"
name
\"
:
\"\"
,
\n
"
);
fprintf
(
output_file
,
"
\"
time_start
\"
:
\"\"
,
\n
"
);
fprintf
(
output_file
,
"
\"
time_end
\"
:
\"\"\n
"
);
...
...
@@ -227,16 +234,16 @@ void print_links(FILE *output_file, word_buffer *wb, int index_first_word, int i
}
void
print_segment
(
FILE
*
output_file
,
word_buffer
*
wb
,
int
index
)
void
print_segment
(
FILE
*
output_file
,
word_buffer
*
wb
,
int
index_first_word
,
int
index
)
{
int
pos_col
=
mcd_get_pos_col
(
word_buffer_get_mcd
(
wb
));
word
*
w
=
word_buffer_get_word_n
(
wb
,
index
);
fprintf
(
output_file
,
"{ "
);
/* fprintf(output_file, "\"start\": %d, ", word_get_offset(w)); */
fprintf
(
output_file
,
"
\"
start
\"
: %d, "
,
index
);
fprintf
(
output_file
,
"
\"
start
\"
: %d, "
,
index
-
index_first_word
);
/* fprintf(output_file, "\"end\": %d, ", word_get_offset(w) + word_get_length(w) - 1); */
fprintf
(
output_file
,
"
\"
end
\"
: %d, "
,
index
);
fprintf
(
output_file
,
"
\"
end
\"
: %d, "
,
index
-
index_first_word
);
fprintf
(
output_file
,
"
\"
label
\"
:
\"
"
);
if
(
pos_col
!=
-
1
)
...
...
@@ -258,12 +265,12 @@ void print_segments(FILE *output_file, word_buffer *wb, int index_first_word, in
{
int
index
;
int
first_segment
=
1
;
fprintf
(
output_file
,
"
\"
segments
\"
: ["
);
for
(
index
=
index_first_word
;
index
<=
index_last_word
;
index
++
){
if
(
first_segment
==
1
)
first_segment
=
0
;
else
fprintf
(
output_file
,
","
);
fprintf
(
output_file
,
"
\n
"
);
print_segment
(
output_file
,
wb
,
index
);
print_segment
(
output_file
,
wb
,
index_first_word
,
index
);
}
fprintf
(
output_file
,
" ],
\n
"
);
}
...
...
@@ -317,7 +324,7 @@ void print_sentence(FILE *output_file, int sentence_nb, word_buffer *wb, int ind
int
main
(
int
argc
,
char
*
argv
[])
{
FILE
*
output_file
;
FILE
*
output_file
=
NULL
;
context
*
ctx
=
mcf2json_context_read_options
(
argc
,
argv
);
word_buffer
*
wb
=
NULL
;
word
*
w
=
NULL
;
...
...
@@ -326,41 +333,107 @@ int main(int argc, char *argv[])
int
index_first_word
;
int
index_last_word
;
int
sentence_nb
=
0
;
char
current_directory
[
1000
];
char
current_file
[
1000
];
char
previous_directory
[
1000
];
char
previous_file
[
1000
];
char
filename_for_header
[
1000
];
char
*
root_directory
=
NULL
;
char
destination_file
[
1000
];
char
destination_dir
[
1000
];
struct
stat
st
=
{
0
};
mcf2json_check_options
(
ctx
);
mcd_extract_dico_from_corpus
(
ctx
->
mcd_struct
,
ctx
->
mcf_filename
);
output_file
=
(
ctx
->
conll_filename
)
?
myfopen_no_exit
(
ctx
->
conll_filename
,
"w"
)
:
stdout
;
wb
=
word_buffer_load_mcf
(
ctx
->
mcf_filename
,
ctx
->
mcd_struct
);
print_header
(
output_file
,
ctx
->
mcd_struct
);
do
{
w
=
word_buffer_b0
(
wb
);
if
(
new_sentence
){
new_sentence
=
0
;
sentence_nb
++
;
index_first_word
=
word_buffer_get_current_index
(
wb
);
}
if
(
word_get_sent_seg
(
w
)){
index_last_word
=
word_buffer_get_current_index
(
wb
);
new_sentence
=
1
;
if
(
first_sentence
==
1
)
first_sentence
=
0
;
else
fprintf
(
output_file
,
","
);
fprintf
(
output_file
,
"
\n
"
);
print_sentence
(
output_file
,
sentence_nb
,
wb
,
index_first_word
,
index_last_word
);
if
(
ctx
->
root_dir
){
if
(
stat
(
ctx
->
root_dir
,
&
st
)
==
-
1
)
{
mkdir
(
ctx
->
root_dir
,
0700
);
fprintf
(
stderr
,
"creating directory %s
\n
"
,
ctx
->
root_dir
);
}
}
while
(
word_buffer_move_right
(
wb
));
print_footer
(
output_file
);
if
(
ctx
->
conll_filename
)
do
{
w
=
word_buffer_b0
(
wb
);
if
(
w
==
NULL
)
break
;
word_sprint_col_n
(
current_directory
,
w
,
ctx
->
mcd_struct
->
wf2col
[
MCD_WF_DIRECTORY
]);
word_sprint_col_n
(
current_file
,
w
,
ctx
->
mcd_struct
->
wf2col
[
MCD_WF_FILE
]);
if
(
strcmp
(
current_directory
,
previous_directory
)){
strcpy
(
destination_dir
,
ctx
->
root_dir
);
strcat
(
destination_dir
,
"/"
);
strcat
(
destination_dir
,
current_directory
);
if
(
stat
(
destination_dir
,
&
st
)
==
-
1
)
{
mkdir
(
destination_dir
,
0700
);
fprintf
(
stderr
,
"creating directory %s
\n
"
,
destination_dir
);
}
}
if
(
strcmp
(
current_file
,
previous_file
)){
strcpy
(
destination_file
,
destination_dir
);
strcat
(
destination_file
,
"/"
);
strcat
(
destination_file
,
current_file
);
strcat
(
destination_file
,
".json"
);
fprintf
(
stderr
,
"creating file %s
\n
"
,
destination_file
);
if
(
output_file
){
print_footer
(
output_file
);
fclose
(
output_file
);
}
output_file
=
myfopen_no_exit
(
destination_file
,
"w"
);
strcpy
(
filename_for_header
,
current_directory
);
strcat
(
filename_for_header
,
"/"
);
strcat
(
filename_for_header
,
current_file
);
strcat
(
filename_for_header
,
".json"
);
print_header
(
output_file
,
ctx
->
mcd_struct
,
filename_for_header
);
first_sentence
=
1
;
}
if
(
new_sentence
){
new_sentence
=
0
;
sentence_nb
++
;
index_first_word
=
word_buffer_get_current_index
(
wb
);
}
if
(
word_get_sent_seg
(
w
)){
index_last_word
=
word_buffer_get_current_index
(
wb
);
new_sentence
=
1
;
if
(
first_sentence
==
1
)
first_sentence
=
0
;
else
fprintf
(
output_file
,
","
);
fprintf
(
output_file
,
"
\n
"
);
print_sentence
(
output_file
,
sentence_nb
,
wb
,
index_first_word
,
index_last_word
);
}
strcpy
(
previous_file
,
current_file
);
strcpy
(
previous_directory
,
current_directory
);
}
while
(
word_buffer_move_right
(
wb
));
print_footer
(
output_file
);
fclose
(
output_file
);
mcf2json_context_free
(
ctx
);
}
else
{
//ctx->root_dir is NULL dump everything to stdout
output_file
=
stdout
;
print_header
(
output_file
,
ctx
->
mcd_struct
,
""
);
do
{
w
=
word_buffer_b0
(
wb
);
if
(
new_sentence
){
new_sentence
=
0
;
sentence_nb
++
;
index_first_word
=
word_buffer_get_current_index
(
wb
);
}
if
(
word_get_sent_seg
(
w
)){
index_last_word
=
word_buffer_get_current_index
(
wb
);
new_sentence
=
1
;
if
(
first_sentence
==
1
)
first_sentence
=
0
;
else
fprintf
(
output_file
,
","
);
fprintf
(
output_file
,
"
\n
"
);
print_sentence
(
output_file
,
sentence_nb
,
wb
,
index_first_word
,
index_last_word
);
}
}
while
(
word_buffer_move_right
(
wb
));
print_footer
(
output_file
);
}
mcf2json_context_free
(
ctx
);
return
0
;
}
maca_trans_parser/src/oracle_parser_arc_eager.c
View file @
68ee1829
...
...
@@ -63,18 +63,19 @@ int oracle_parser_arc_eager(config *c, word_buffer *ref, int root_label)
/* s0 is the root of the sentence */
if
((
s0_label
==
root_label
)
// && (word_get_label(word_buffer_get_word_n(config_get_buffer(c), s0_index)) != root_label)
&&
check_all_dependents_of_word_in_ref_are_in_hyp
(
c
,
ref
,
s0_index
)
&&
check_all_dependents_of_word_in_ref_are_in_hyp
(
c
,
ref
,
s0_index
)
){
return
MVT_PARSER_ROOT
;
}
/* word on the top of the stack is an end of sentence marker */
if
((
word_get_sent_seg
(
word_buffer_get_word_n
(
ref
,
s0_index
))
==
1
)
// && (word_get_sent_seg(word_buffer_get_word_n(config_get_buffer(c), s0_index)) != 1)
&&
check_all_dependents_of_word_in_ref_are_in_hyp
(
c
,
ref
,
s0_index
)
&&
check_all_dependents_of_word_in_ref_are_in_hyp
(
c
,
ref
,
s0_index
)
){
return
MVT_PARSER_EOS
;
}
/* LEFT ARC b0 is the governor and s0 the dependent */
if
(
s0_gov_index
==
b0_index
){
return
movement_parser_left_code
(
word_get_label
(
word_buffer_get_word_n
(
ref
,
s0_index
)));
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment