Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Alexis Nasr
macaon2
Commits
56e83bcf
Commit
56e83bcf
authored
Oct 21, 2016
by
Alexis Nasr
Browse files
added new tagparse decoder, that tags and parses the sentence at the same time
parent
1cddee84
Changes
7
Hide whitespace changes
Inline
Side-by-side
maca_common/include/word_buffer.h
View file @
56e83bcf
...
...
@@ -36,19 +36,19 @@ typedef struct {
word_buffer
*
word_buffer_new
(
FILE
*
input_file
,
mcd
*
mcd_struct
,
int
lookahead
);
void
word_buffer_free
(
word_buffer
*
wb
);
int
word_buffer_add
(
word_buffer
*
wb
,
word
*
w
);
word
*
word_buffer_get_word_relative
(
word_buffer
*
wb
,
int
dist
);
word
*
word_buffer_get_word_n
(
word_buffer
*
wb
,
int
n
);
int
word_buffer_read_next_word
(
word_buffer
*
wb
);
int
word_buffer_move_right
(
word_buffer
*
wb
);
int
word_buffer_move_left
(
word_buffer
*
wb
);
void
word_buffer_print
(
FILE
*
f
,
word_buffer
*
wb
);
void
word_buffer_print_compact
(
FILE
*
f
,
word_buffer
*
wb
);
int
word_buffer_is_empty
(
word_buffer
*
wb
);
int
word_buffer_is_last
(
word_buffer
*
wb
);
int
word_buffer_end
(
word_buffer
*
wb
);
int
word_buffer_read_sentence
(
word_buffer
*
bw
);
void
word_buffer_free
(
word_buffer
*
wb
);
int
word_buffer_add
(
word_buffer
*
wb
,
word
*
w
);
word
*
word_buffer_get_word_relative
(
word_buffer
*
wb
,
int
dist
);
word
*
word_buffer_get_word_n
(
word_buffer
*
wb
,
int
n
);
int
word_buffer_read_next_word
(
word_buffer
*
wb
);
int
word_buffer_move_right
(
word_buffer
*
wb
);
int
word_buffer_move_left
(
word_buffer
*
wb
);
void
word_buffer_print
(
FILE
*
f
,
word_buffer
*
wb
);
void
word_buffer_print_compact
(
FILE
*
f
,
word_buffer
*
wb
);
int
word_buffer_is_empty
(
word_buffer
*
wb
);
int
word_buffer_is_last
(
word_buffer
*
wb
);
int
word_buffer_end
(
word_buffer
*
wb
);
int
word_buffer_read_sentence
(
word_buffer
*
bw
);
word_buffer
*
word_buffer_load_mcf
(
char
*
mcf_filename
,
mcd
*
mcd_struct
);
#endif
maca_trans_parser/CMakeLists.txt
View file @
56e83bcf
set
(
SOURCES src/context.c
src/feat_desc.c
#
src/movement_parser_arc_eager.c
src/movement_parser_arc_eager.c
src/movement_tagparser_arc_eager.c
src/movement_tagger.c
src/feat_fct.c
src/global_feat_vec.c
# src/oracle_parser.c
#
src/oracle_parser_arc_eager.c
src/oracle_parser_arc_eager.c
src/oracle_tagparser_arc_eager.c
src/oracle_tagger.c
#
src/simple_decoder_parser.c
src/simple_decoder_parser.c
src/simple_decoder_parser_arc_eager.c
src/simple_decoder_tagparser_arc_eager.c
src/simple_decoder_forrest.c
...
...
maca_trans_parser/src/maca_trans_tagparser_arc_eager_mcf2cff.c
View file @
56e83bcf
...
...
@@ -78,12 +78,12 @@ void generate_training_file_stream(FILE *output_file, context *ctx)
config2feat_vec_cff
(
ctx
->
features_model
,
c
,
ctx
->
d_perceptron_features
,
fv
,
ctx
->
mode
);
mvt_code
=
oracle_tagparser_arc_eager
(
c
,
ref
,
root_label
);
mvt_type
=
movement_type
(
mvt_code
);
mvt_label
=
movement_label
(
mvt_code
);
mvt_type
=
movement_
tagparse_
type
(
mvt_code
);
mvt_label
=
movement_
tagparse_
label
(
mvt_code
);
if
(
ctx
->
debug_mode
){
config_print
(
stdout
,
c
);
movement_print
(
stdout
,
mvt_code
,
ctx
->
dico_labels
,
dico_postag
);
movement_
tagparse_
print
(
stdout
,
mvt_code
,
ctx
->
dico_labels
,
dico_postag
);
fprintf
(
stdout
,
"
\n
"
);
}
...
...
@@ -92,7 +92,7 @@ void generate_training_file_stream(FILE *output_file, context *ctx)
stack_print
(
output_file
,
c
->
st
);
fprintf
(
output_file
,
"
\t
"
);
movement_print
(
output_file
,
mvt_code
,
ctx
->
dico_labels
,
dico_postag
);
movement_
tagparse_
print
(
output_file
,
mvt_code
,
ctx
->
dico_labels
,
dico_postag
);
fprintf
(
output_file
,
"
\t
1
\n
"
);
}
else
{
...
...
@@ -101,40 +101,40 @@ void generate_training_file_stream(FILE *output_file, context *ctx)
}
if
(
mvt_type
==
MVT_EOS
){
movement_eos
(
c
,
0
);
movement_
tagparse_
eos
(
c
,
0
);
sentence_nb
++
;
if
(
word_buffer_is_last
(
ref
))
break
;
}
if
(
mvt_type
==
MVT_POSTAG
){
movement_add_pos
(
c
,
0
,
mvt_label
);
movement_
tagparse_
add_pos
(
c
,
0
,
mvt_label
);
continue
;
}
if
(
mvt_type
==
MVT_LEFT
){
movement_left_arc
(
c
,
mvt_label
,
0
);
movement_
tagparse_
left_arc
(
c
,
mvt_label
,
0
);
continue
;
}
if
(
mvt_type
==
MVT_RIGHT
){
movement_right_arc
(
c
,
mvt_label
,
0
);
movement_
tagparse_
right_arc
(
c
,
mvt_label
,
0
);
word_buffer_move_right
(
ref
);
continue
;
}
if
(
mvt_type
==
MVT_REDUCE
){
movement_reduce
(
c
,
0
);
movement_
tagparse_
reduce
(
c
,
0
);
continue
;
}
if
(
mvt_type
==
MVT_ROOT
){
movement_root
(
c
,
0
,
root_label
);
movement_
tagparse_
root
(
c
,
0
,
root_label
);
continue
;
}
if
(
mvt_type
==
MVT_SHIFT
){
movement_shift
(
c
,
1
,
0
);
movement_
tagparse_
shift
(
c
,
1
,
0
);
word_buffer_move_right
(
ref
);
continue
;
}
...
...
maca_trans_parser/src/movement_tagparser_arc_eager.c
View file @
56e83bcf
...
...
@@ -4,11 +4,11 @@
#include
"util.h"
#include
"movement_tagparser_arc_eager.h"
void
movement_print
(
FILE
*
f
,
int
mvt_code
,
dico
*
dico_labels
,
dico
*
dico_postag
)
void
movement_
tagparse_
print
(
FILE
*
f
,
int
mvt_code
,
dico
*
dico_labels
,
dico
*
dico_postag
)
{
int
mvt_type
=
movement_type
(
mvt_code
);
int
mvt_label
=
movement_label
(
mvt_code
);
int
mvt_type
=
movement_
tagparse_
type
(
mvt_code
);
int
mvt_label
=
movement_
tagparse_
label
(
mvt_code
);
char
*
label
;
if
(
mvt_type
==
MVT_SHIFT
)
{
fprintf
(
f
,
"SHIFT"
);
return
;}
...
...
@@ -27,7 +27,7 @@ void movement_print(FILE *f, int mvt_code, dico *dico_labels, dico *dico_postag)
fprintf
(
f
,
" %s"
,
label
);
}
int
movement_type
(
int
mvt
)
int
movement_
tagparse_
type
(
int
mvt
)
{
if
(
mvt
==
MVT_SHIFT
)
return
MVT_SHIFT
;
/* 0 */
if
(
mvt
==
MVT_REDUCE
)
return
MVT_REDUCE
;
/* 1 */
...
...
@@ -38,7 +38,7 @@ int movement_type(int mvt)
/*if(mvt % 3 == 2)*/
return
MVT_LEFT
;
/* 6, 9, 12 ... */
}
int
movement_label
(
int
mvt
)
int
movement_
tagparse_
label
(
int
mvt
)
{
if
(
mvt
==
MVT_SHIFT
)
return
-
1
;
if
(
mvt
==
MVT_REDUCE
)
return
-
1
;
...
...
@@ -52,7 +52,7 @@ int movement_label(int mvt)
return
(
mvt
-
6
)
/
3
;
}
int
movement_add_pos
(
config
*
c
,
float
score
,
int
pos
)
int
movement_
tagparse_
add_pos
(
config
*
c
,
float
score
,
int
pos
)
{
if
(
word_buffer_b0
(
config_get_buffer
(
c
))
==
NULL
)
return
0
;
if
(
word_get_pos
(
word_buffer_b0
(
config_get_buffer
(
c
)))
!=
-
1
)
return
0
;
...
...
@@ -60,12 +60,12 @@ int movement_add_pos(config *c, float score, int pos)
/* stack_push(config_get_stack(c), word_buffer_b0(config_get_buffer(c)));
word_buffer_move_right(config_get_buffer(c));*/
config_add_mvt
(
c
,
movement_postag
(
pos
));
config_add_mvt
(
c
,
movement_
tagparse_
postag
(
pos
));
return
1
;
}
int
movement_eos
(
config
*
c
,
float
score
)
int
movement_
tagparse_
eos
(
config
*
c
,
float
score
)
{
if
(
stack_is_empty
(
config_get_stack
(
c
)))
return
0
;
if
(
word_get_sent_seg
(
stack_top
(
config_get_stack
(
c
)))
==
1
)
return
0
;
...
...
@@ -80,7 +80,7 @@ int movement_eos(config *c, float score)
return
1
;
}
int
movement_left_arc
(
config
*
c
,
int
label
,
float
score
)
int
movement_
tagparse_
left_arc
(
config
*
c
,
int
label
,
float
score
)
{
if
(
stack_is_empty
(
config_get_stack
(
c
)))
return
0
;
/* if(word_buffer_is_empty(config_get_buffer(c))) return 0; */
...
...
@@ -97,11 +97,11 @@ int movement_left_arc(config *c, int label, float score)
word_set_label
(
dep
,
label
);
stack_pop
(
config_get_stack
(
c
));
config_add_mvt
(
c
,
movement_left_code
(
label
));
config_add_mvt
(
c
,
movement_
tagparse_
left_code
(
label
));
return
1
;
}
int
movement_right_arc
(
config
*
c
,
int
label
,
float
score
)
int
movement_
tagparse_
right_arc
(
config
*
c
,
int
label
,
float
score
)
{
if
(
stack_is_empty
(
config_get_stack
(
c
)))
return
0
;
...
...
@@ -116,11 +116,11 @@ int movement_right_arc(config *c, int label, float score)
stack_push
(
config_get_stack
(
c
),
word_buffer_b0
(
config_get_buffer
(
c
)));
word_buffer_move_right
(
config_get_buffer
(
c
));
config_add_mvt
(
c
,
movement_right_code
(
label
));
config_add_mvt
(
c
,
movement_
tagparse_
right_code
(
label
));
return
1
;
}
int
movement_shift
(
config
*
c
,
int
stream
,
float
score
)
int
movement_
tagparse_
shift
(
config
*
c
,
int
stream
,
float
score
)
{
if
(
word_buffer_is_empty
(
config_get_buffer
(
c
)))
return
0
;
stack_push
(
config_get_stack
(
c
),
word_buffer_b0
(
config_get_buffer
(
c
)));
...
...
@@ -129,7 +129,7 @@ int movement_shift(config *c, int stream, float score)
return
1
;
}
int
movement_reduce
(
config
*
c
,
float
score
)
int
movement_
tagparse_
reduce
(
config
*
c
,
float
score
)
{
if
(
stack_nbelem
(
config_get_stack
(
c
))
<=
1
)
return
0
;
...
...
@@ -142,7 +142,7 @@ int movement_reduce(config *c, float score)
return
1
;
}
int
movement_root
(
config
*
c
,
float
score
,
int
root_code
)
int
movement_
tagparse_
root
(
config
*
c
,
float
score
,
int
root_code
)
{
word
*
s0
=
stack_top
(
config_get_stack
(
c
));
if
(
s0
==
NULL
)
return
0
;
...
...
maca_trans_parser/src/movement_tagparser_arc_eager.h
View file @
56e83bcf
...
...
@@ -13,23 +13,23 @@
#define MVT_RIGHT 5
#define MVT_POSTAG 6
#define movement_postag(postag) (3 * (postag) + 4)
#define movement_
tagparse_
postag(postag) (3 * (postag) + 4)
/* even movements are left movements (except 0, which is shift and 2 which is root) */
#define movement_left_code(label) (3 * (label) + 5)
#define movement_
tagparse_
left_code(label) (3 * (label) + 5)
/* odd movements are right movements (except 1, which is reduce and 3 which is end_of_sentence) */
#define movement_right_code(label) (3 * (label) + 6)
int
movement_type
(
int
mvt
);
int
movement_label
(
int
mvt
);
int
movement_left_arc
(
config
*
c
,
int
label
,
float
score
);
int
movement_right_arc
(
config
*
c
,
int
label
,
float
score
);
int
movement_shift
(
config
*
c
,
int
stream
,
float
score
);
int
movement_reduce
(
config
*
c
,
float
score
);
int
movement_root
(
config
*
c
,
float
score
,
int
root_code
);
int
movement_eos
(
config
*
c
,
float
score
);
int
movement_add_pos
(
config
*
c
,
float
score
,
int
postag
);
void
movement_print
(
FILE
*
f
,
int
mvt_code
,
dico
*
dico_labels
,
dico
*
dico_postag
);
#define movement_
tagparse_
right_code(label) (3 * (label) + 6)
int
movement_
tagparse_
type
(
int
mvt
);
int
movement_
tagparse_
label
(
int
mvt
);
int
movement_
tagparse_
left_arc
(
config
*
c
,
int
label
,
float
score
);
int
movement_
tagparse_
right_arc
(
config
*
c
,
int
label
,
float
score
);
int
movement_
tagparse_
shift
(
config
*
c
,
int
stream
,
float
score
);
int
movement_
tagparse_
reduce
(
config
*
c
,
float
score
);
int
movement_
tagparse_
root
(
config
*
c
,
float
score
,
int
root_code
);
int
movement_
tagparse_
eos
(
config
*
c
,
float
score
);
int
movement_
tagparse_
add_pos
(
config
*
c
,
float
score
,
int
postag
);
void
movement_
tagparse_
print
(
FILE
*
f
,
int
mvt_code
,
dico
*
dico_labels
,
dico
*
dico_postag
);
#endif
maca_trans_parser/src/oracle_tagparser_arc_eager.c
View file @
56e83bcf
...
...
@@ -75,9 +75,9 @@ int oracle_tagparser_arc_eager(config *c, word_buffer *ref, int root_label)
/* give a pos to b0 if it does not have one */
if
(
word_get_pos
(
b0
)
==
-
1
){
/* word_set_pos(b0, word_get_pos(word_buffer_get_word_n(ref, b0_index))); */
/* return movement_postag(word_get_pos(b0)); */
/* return movement_
tagparse_
postag(word_get_pos(b0)); */
return
movement_postag
(
word_get_pos
(
word_buffer_get_word_n
(
ref
,
b0_index
)));
return
movement_
tagparse_
postag
(
word_get_pos
(
word_buffer_get_word_n
(
ref
,
b0_index
)));
}
...
...
@@ -113,12 +113,12 @@ int oracle_tagparser_arc_eager(config *c, word_buffer *ref, int root_label)
/* LEFT ARC b0 is the governor and s0 the dependent */
if
(
s0_gov_index
==
b0_index
){
return
movement_left_code
(
word_get_label
(
word_buffer_get_word_n
(
ref
,
s0_index
)));
return
movement_
tagparse_
left_code
(
word_get_label
(
word_buffer_get_word_n
(
ref
,
s0_index
)));
}
/* RIGHT ARC s0 is the governor and b0 the dependent */
if
(
b0_gov_index
==
s0_index
){
return
movement_right_code
(
word_get_label
(
word_buffer_get_word_n
(
ref
,
b0_index
)));
return
movement_
tagparse_
right_code
(
word_get_label
(
word_buffer_get_word_n
(
ref
,
b0_index
)));
}
/* REDUCE */
if
((
stack_nbelem
(
config_get_stack
(
c
))
>
1
)
...
...
maca_trans_parser/src/simple_decoder_tagparser_arc_eager.c
View file @
56e83bcf
...
...
@@ -82,8 +82,8 @@ void simple_decoder_tagparser_arc_eager(context *ctx)
config2feat_vec_cff
(
ctx
->
features_model
,
c
,
ctx
->
d_perceptron_features
,
fv
,
LOOKUP_MODE
);
mvt_code
=
feature_table_argmax
(
fv
,
ft
,
&
max
);
mvt_type
=
movement_type
(
mvt_code
);
mvt_label
=
movement_label
(
mvt_code
);
mvt_type
=
movement_
tagparse_
type
(
mvt_code
);
mvt_label
=
movement_
tagparse_
label
(
mvt_code
);
if
(
ctx
->
trace_mode
){
index
=
word_get_index
(
word_buffer_b0
(
config_get_buffer
(
c
)));
...
...
@@ -92,7 +92,7 @@ void simple_decoder_tagparser_arc_eager(context *ctx)
stack_print
(
stdout
,
c
->
st
);
fprintf
(
stdout
,
"
\t
"
);
movement_print
(
stdout
,
mvt_code
,
ctx
->
dico_labels
,
ctx
->
dico_postags
);
movement_
tagparse_
print
(
stdout
,
mvt_code
,
ctx
->
dico_labels
,
ctx
->
dico_postags
);
fprintf
(
stdout
,
"
\t
"
);
feature_table_argmax_1_2
(
fv
,
ft
,
&
argmax1
,
&
max1
,
&
argmax2
,
&
max2
);
printf
(
"%f
\n
"
,
max1
-
max2
);
...
...
@@ -105,9 +105,9 @@ void simple_decoder_tagparser_arc_eager(context *ctx)
entropy
=
feature_table_entropy
(
fv
,
ft
);
/* delta = feature_table_diff_scores(fv, ft); */
feature_table_argmax_1_2
(
fv
,
ft
,
&
argmax1
,
&
max1
,
&
argmax2
,
&
max2
);
movement_print
(
stdout
,
argmax1
,
ctx
->
dico_labels
,
ctx
->
dico_postags
);
movement_
tagparse_
print
(
stdout
,
argmax1
,
ctx
->
dico_labels
,
ctx
->
dico_postags
);
printf
(
":
\t
%f
\n
"
,
max1
);
movement_print
(
stdout
,
argmax2
,
ctx
->
dico_labels
,
ctx
->
dico_postags
);
movement_
tagparse_
print
(
stdout
,
argmax2
,
ctx
->
dico_labels
,
ctx
->
dico_postags
);
printf
(
":
\t
%f
\n
"
,
max2
);
printf
(
"delta = %f
\n
"
,
max1
-
max2
);
...
...
@@ -115,37 +115,37 @@ void simple_decoder_tagparser_arc_eager(context *ctx)
/* printf("entropy = %f delta = %f\n", entropy, delta); */
printf
(
"entropy = %f
\n
"
,
entropy
);
/* movement_print(stdout, mvt_code, ctx->dico_labels); */
/* movement_
tagparse_
print(stdout, mvt_code, ctx->dico_labels); */
}
result
=
0
;
switch
(
mvt_type
){
case
MVT_POSTAG
:
result
=
movement_add_pos
(
c
,
max
,
mvt_label
);
result
=
movement_
tagparse_
add_pos
(
c
,
max
,
mvt_label
);
break
;
case
MVT_LEFT
:
result
=
movement_left_arc
(
c
,
mvt_label
,
max
);
result
=
movement_
tagparse_
left_arc
(
c
,
mvt_label
,
max
);
break
;
case
MVT_RIGHT
:
result
=
movement_right_arc
(
c
,
mvt_label
,
max
);
result
=
movement_
tagparse_
right_arc
(
c
,
mvt_label
,
max
);
break
;
case
MVT_REDUCE
:
result
=
movement_reduce
(
c
,
max
);
result
=
movement_
tagparse_
reduce
(
c
,
max
);
break
;
case
MVT_ROOT
:
result
=
movement_root
(
c
,
max
,
root_label
);
result
=
movement_
tagparse_
root
(
c
,
max
,
root_label
);
break
;
case
MVT_EOS
:
result
=
movement_eos
(
c
,
max
);
result
=
movement_
tagparse_
eos
(
c
,
max
);
break
;
case
MVT_SHIFT
:
result
=
movement_shift
(
c
,
1
,
max
);
result
=
movement_
tagparse_
shift
(
c
,
1
,
max
);
}
if
(
result
==
0
){
if
(
ctx
->
debug_mode
){
fprintf
(
stdout
,
"WARNING : movement cannot be executed doing a SHIFT instead !
\n
"
);
}
movement_shift
(
c
,
1
,
max
);
movement_
tagparse_
shift
(
c
,
1
,
max
);
}
}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment