Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
macaon2
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Alexis Nasr
macaon2
Commits
26193d70
Commit
26193d70
authored
7 years ago
by
Mathux
Browse files
Options
Downloads
Patches
Plain Diff
maca_error_predictor_tagger works
parent
b71a2896
No related branches found
No related tags found
No related merge requests found
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
maca_trans_parser/src/context.h
+4
-1
4 additions, 1 deletion
maca_trans_parser/src/context.h
maca_trans_parser/src/maca_error_predictor_tagger.c
+98
-58
98 additions, 58 deletions
maca_trans_parser/src/maca_error_predictor_tagger.c
with
102 additions
and
59 deletions
maca_trans_parser/src/context.h
+
4
−
1
View file @
26193d70
...
...
@@ -37,7 +37,10 @@
#define DEFAULT_MODEL_PARSER_NN_FILENAME "maca_trans_parser_nn.weights"
#define DEFAULT_JSON_PARSER_NN_FILENAME "maca_trans_parser_nn.json"
#define DEFAULT_PATH_RELAT "../data/treebank/"
#define DEFAULT_CONLL07_DEV "dev.conll07"
#define DEFAULT_CONLL07_TRAIN "train.conll07"
#define DEFAULT_CONLL07_TEST "test.conll07"
#define DEFAULT_F2P_FILENAME "fP"
#define DEFAULT_FPLM_FILENAME "fplm"
...
...
This diff is collapsed.
Click to expand it.
maca_trans_parser/src/maca_error_predictor_tagger.c
+
98
−
58
View file @
26193d70
...
...
@@ -72,29 +72,24 @@ void add_signature_to_words_in_word_buffer_tagger(word_buffer *bf, form2pos *f2p
void
maca_error_predictor_help_message
(
context
*
ctx
)
{
context_general_help_message
(
ctx
);
context_mode_help_message
(
ctx
);
//
context_mode_help_message(ctx);
context_sent_nb_help_message
(
ctx
);
context_mcd_help_message
(
ctx
);
//context_mcd_help_message(ctx);
fprintf
(
stderr
,
"INPUT
\n
"
);
context_conll_help_message
(
ctx
);
fprintf
(
stderr
,
"IN TEST MODE
\n
"
);
fprintf
(
stderr
,
"
\t
-i --input <file> : input is in conll07 format (default is dev.conll07)
\n
"
);
//
fprintf(stderr, "IN TEST MODE\n");
context_vocabs_help_message
(
ctx
);
fprintf
(
stderr
,
"OUTPUT
\n
"
);
context_cff_help_message
(
ctx
);
fprintf
(
stderr
,
"IN TRAIN MODE
\n
"
);
context_vocabs_help_message
(
ctx
);
fprintf
(
stderr
,
"
\t
-x --cff <file> : CFF format file name (default is stdout)
\n
"
);
//
fprintf(stderr, "IN TRAIN MODE\n");
//
context_vocabs_help_message(ctx);
}
void
maca_error_predictor_check_options
(
context
*
ctx
)
{
if
(
0
/*!ctx->input_filename
|| ctx->help
/ || !ctx->mcd_filename /
|| !(ctx->cff_filename || ctx->fann_filename)
*/
){
if
(
ctx
->
help
){
maca_error_predictor_help_message
(
ctx
);
exit
(
1
);
}
...
...
@@ -106,55 +101,58 @@ int config_is_equal_tagger(config *c1, config *c2)
return
((
bm1p
(
c1
)
==
bm1p
(
c2
))
&&
(
bm2p
(
c1
)
==
bm2p
(
c2
))
&&
(
bm3p
(
c1
)
==
bm3p
(
c2
)));
}
void
generate_error_train
(
FILE
*
output_file
,
context
*
ctx
)
{
config
*
config_oracle
;
feature_table
*
ft
=
feature_table_load
(
ctx
->
perc_model_filename
,
ctx
->
verbose
);
feat_vec
*
fv_oracle
=
feat_vec_new
(
feature_types_nb
);
FILE
*
conll_file_oracle
=
myfopen
(
ctx
->
input_filename
,
"r"
);
int
postag_oracle
;
float
max
;
word
*
b0
;
dico
*
dico_pos
=
dico_vec_get_dico
(
ctx
->
vocabs
,
(
char
*
)
"POS"
);
config
*
config_predicted
;
feature_table
*
ft
=
feature_table_load
(
ctx
->
perc_model_filename
,
ctx
->
verbose
);
feat_vec
*
fv_predicted
=
feat_vec_new
(
feature_types_nb
);
FILE
*
conll_file_predicted
=
myfopen
(
ctx
->
input_filename
,
"r"
);
int
postag_predicted
;
float
max
;
dico
*
dico_pos
=
dico_vec_get_dico
(
ctx
->
vocabs
,
(
char
*
)
"POS"
);
config_oracle
=
config_new
(
conll_file_oracle
,
ctx
->
mcd_struct
,
5
);
config_predicted
=
config_new
(
conll_file_predicted
,
ctx
->
mcd_struct
,
5
);
while
(
!
config_is_terminal
(
config_oracle
)){
/*if(ctx->f2p){
add_signature_to_words_in_word_buffer_tagger(config_oracle->bf, ctx->f2p);
if
(
ctx
->
f2p
){
add_signature_to_words_in_word_buffer_tagger
(
config_predicted
->
bf
,
ctx
->
f2p
);
}*/
add_signature_to_words_in_word_buffer_tagger
(
config_oracle
->
bf
,
ctx
->
f2p
);
}
// oracle
config2feat_vec_cff
(
ctx
->
features_model
,
config_oracle
,
ctx
->
d_perceptron_features
,
fv_oracle
,
LOOKUP_MODE
);
postag_oracle
=
oracle_tagger
(
config_oracle
);
if
(
ctx
->
debug_mode
){
printf
(
"Oracle : "
);
print_word_simple
(
word_buffer_b0
(
config_oracle
->
bf
),
ctx
->
mcd_struct
,
dico_pos
,
postag_oracle
);
}
// predicted
b0
=
word_buffer_b0
(
config_predicted
->
bf
);
config2feat_vec_cff
(
ctx
->
features_model
,
config_predicted
,
ctx
->
d_perceptron_features
,
fv_predicted
,
LOOKUP_MODE
);
postag_predicted
=
feature_table_argmax
(
fv_predicted
,
ft
,
&
max
);
if
(
ctx
->
debug_mode
){
printf
(
"Predicted : "
);
print_word_simple
(
b0
,
ctx
->
mcd_struct
,
dico_pos
,
postag_predicted
);
}
if
(
1
){
if
(
ctx
->
debug_mode
){
vcode
*
vcode_array
=
feature_table_get_vcode_array
(
fv_predicted
,
ft
);
for
(
int
i
=
0
;
i
<
3
;
i
++
){
f
printf
(
stdout
,
"%d
\t
"
,
i
);
f
printf
(
stdout
,
"%s
\t
%.4f
\n
"
,
dico_int2string
(
dico_pos
,
vcode_array
[
i
].
class_code
),
vcode_array
[
i
].
score
);
printf
(
"%d
\t
"
,
i
);
printf
(
"%s
\t
%.4f
\n
"
,
dico_int2string
(
dico_pos
,
vcode_array
[
i
].
class_code
),
vcode_array
[
i
].
score
);
}
free
(
vcode_array
);
}
if
(
postag_oracle
!=
postag_predicted
)
fprintf
(
stdout
,
"**************** DIFFERENT CHOICE ***********
\n\n
"
);
...
...
@@ -162,16 +160,17 @@ void generate_error_train(FILE *output_file, context *ctx)
else
fprintf
(
stdout
,
"**************** EQUAL CHOICE ***********
\n\n
"
);
}
movement_tagger
(
config_oracle
,
postag_oracle
);
movement_tagger
(
config_predicted
,
postag_predicted
);
if
(
!
ctx
->
debug_mode
||
output_file
!=
stdout
)
{
fprintf
(
output_file
,
"%d"
,
((
config_is_equal_tagger
(
config_oracle
,
config_predicted
))
?
1
:
0
));
fprintf
(
output_file
,
" or : %d, pred : %d"
,
postag_oracle
,
postag_predicted
);
feat_vec_print
(
output_file
,
fv_predicted
);
//word_set_pos(word_buffer_bm1(config_predicted->bf), postag_oracle);
}
}
feat_vec_free
(
fv_oracle
);
feat_vec_free
(
fv_predicted
);
feature_table_free
(
ft
);
...
...
@@ -183,6 +182,54 @@ void generate_error_train(FILE *output_file, context *ctx)
}
void
error_tagger_set_linguistic_resources_filename
(
context
*
ctx
)
{
char
absolute_filename
[
500
];
if
(
!
ctx
->
perc_model_filename
){
strcpy
(
absolute_filename
,
ctx
->
maca_data_path
);
strcat
(
absolute_filename
,
DEFAULT_MODEL_TAGGER_FILENAME
);
ctx
->
perc_model_filename
=
strdup
(
absolute_filename
);
}
if
(
!
ctx
->
vocabs_filename
){
strcpy
(
absolute_filename
,
ctx
->
maca_data_path
);
strcat
(
absolute_filename
,
DEFAULT_VOCABS_TAGGER_FILENAME
);
ctx
->
vocabs_filename
=
strdup
(
absolute_filename
);
}
if
(
!
ctx
->
input_filename
){
strcpy
(
absolute_filename
,
ctx
->
maca_data_path
);
strcat
(
absolute_filename
,
DEFAULT_PATH_RELAT
);
strcat
(
absolute_filename
,
DEFAULT_CONLL07_DEV
);
ctx
->
input_filename
=
strdup
(
absolute_filename
);
}
if
(
!
ctx
->
cff_filename
){
//printf("cff -> stdout\n")
}
if
(
!
ctx
->
features_model_filename
){
strcpy
(
absolute_filename
,
ctx
->
maca_data_path
);
strcat
(
absolute_filename
,
DEFAULT_FEATURES_MODEL_TAGGER_FILENAME
);
ctx
->
features_model_filename
=
strdup
(
absolute_filename
);
}
if
(
!
ctx
->
f2p_filename
){
strcpy
(
absolute_filename
,
ctx
->
maca_data_path
);
strcat
(
absolute_filename
,
DEFAULT_F2P_FILENAME
);
ctx
->
f2p_filename
=
strdup
(
absolute_filename
);
}
if
(
ctx
->
verbose
){
fprintf
(
stderr
,
"perc_model_filename = %s
\n
"
,
ctx
->
perc_model_filename
);
fprintf
(
stderr
,
"vocabs_filename = %s
\n
"
,
ctx
->
vocabs_filename
);
fprintf
(
stderr
,
"mcd_filename = %s
\n
"
,
ctx
->
mcd_filename
);
fprintf
(
stderr
,
"perc_features_model_filename = %s
\n
"
,
ctx
->
features_model_filename
);
fprintf
(
stderr
,
"f2p_filename = %s
\n
"
,
ctx
->
f2p_filename
);
fprintf
(
stderr
,
"input_filename = %s
\n
"
,
ctx
->
f2p_filename
);
}
}
int
main
(
int
argc
,
char
*
argv
[])
...
...
@@ -191,20 +238,13 @@ int main(int argc, char *argv[])
FILE
*
output_file
;
ctx
=
context_read_options
(
argc
,
argv
);
//maca_error_predictor_check_options(ctx);
ctx
->
perc_model_filename
=
"/home/mathis/maca_data2/fr/bin/maca_trans_tagger.model"
;
ctx
->
features_model_filename
=
"/home/mathis/maca_data2/fr/bin/maca_trans_tagger.fm"
;
ctx
->
input_filename
=
"/home/mathis/maca_data2/fr/data/treebank/dev.conll07"
;
ctx
->
f2p_filename
=
"/home/mathis/maca_data2/fr/bin/fP"
;
ctx
->
vocabs_filename
=
"/home/mathis/maca_data2/fr/bin/maca_trans_tagger.vocab"
;
ctx
->
cff_filename
=
"/home/mathis/test/stage/error.cff"
;
error_tagger_set_linguistic_resources_filename
(
ctx
);
ctx
->
f2p
=
form2pos_read
(
ctx
->
f2p_filename
);
maca_error_predictor_check_options
(
ctx
);
/* load ctx */
ctx
->
mcd_struct
=
mcd_build_conll07
();
//decode_tagger_set_linguistic_resources_filenames(ctx);
ctx
->
features_model
=
feat_model_read
(
ctx
->
features_model_filename
,
feat_lib_build
(),
1
);
//ctx->verbose);
ctx
->
features_model
=
feat_model_read
(
ctx
->
features_model_filename
,
feat_lib_build
(),
ctx
->
verbose
);
ctx
->
vocabs
=
dico_vec_read
(
ctx
->
vocabs_filename
,
ctx
->
hash_ratio
);
mcd_link_to_dico
(
ctx
->
mcd_struct
,
ctx
->
vocabs
,
ctx
->
verbose
);
ctx
->
d_perceptron_features
=
dico_vec_get_dico
(
ctx
->
vocabs
,
(
char
*
)
"d_perceptron_features"
);
...
...
@@ -225,7 +265,7 @@ int main(int argc, char *argv[])
if
(
ctx
->
cff_filename
)
fclose
(
output_file
);
//
context_free(ctx);
context_free
(
ctx
);
return
0
;
}
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment