Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Alexis Nasr
macaon2
Commits
bbb0919c
Commit
bbb0919c
authored
Jul 12, 2016
by
Alexis Nasr
Browse files
code refactoring
parent
a4db64a2
Changes
15
Hide whitespace changes
Inline
Side-by-side
INSTALL
View file @
bbb0919c
...
...
@@ -10,6 +10,9 @@ The basic procedure to build and install macaon from sources is the following.
- Launch the cmake command:
cmake ..
If you want to compile macaon with debugging options type:
cmake -DCMAKE_BUILD_TYPE=Debug ..
If you want to install macaon locally, you can specify the install path with :
cmake -DCMAKE_INSTALL_PREFIX:PATH=/absolute/path/to/macaon_install_dir
...
...
maca_common/include/util.h
View file @
bbb0919c
...
...
@@ -5,4 +5,5 @@
void
myfree
(
void
*
ptr
);
void
*
memalloc
(
size_t
s
);
FILE
*
myfopen
(
const
char
*
path
,
const
char
*
mode
);
FILE
*
myfopen_no_exit
(
const
char
*
path
,
const
char
*
mode
);
#endif
maca_common/src/form2pos.c
View file @
bbb0919c
...
...
@@ -31,7 +31,7 @@ void form2pos_free(form2pos *f2p)
form2pos
*
form2pos_read
(
char
*
filename
)
{
FILE
*
f
=
myfopen
(
filename
,
"r"
);
FILE
*
f
=
myfopen
_no_exit
(
filename
,
"r"
);
int
nbelem
;
int
pos_nb
;
char
pos_list
[
10000
];
...
...
@@ -39,6 +39,8 @@ form2pos *form2pos_read(char *filename)
char
signature
[
200
];
form2pos
*
f2p
=
NULL
;
if
(
f
==
NULL
)
return
NULL
;
/* read number of forms */
fscanf
(
f
,
"%d
\n
"
,
&
nbelem
);
...
...
maca_common/src/util.c
View file @
bbb0919c
...
...
@@ -25,3 +25,12 @@ FILE *myfopen(const char *path, const char *mode)
}
return
f
;
}
FILE
*
myfopen_no_exit
(
const
char
*
path
,
const
char
*
mode
)
{
FILE
*
f
=
fopen
(
path
,
mode
);
if
(
f
==
NULL
){
fprintf
(
stderr
,
"cannot open file %s
\n
"
,
path
);
}
return
f
;
}
maca_lemmatizer/src/maca_lemmatizer.c
View file @
bbb0919c
...
...
@@ -123,7 +123,7 @@ int main(int argc, char *argv[])
/* look for a valid word */
while
(
fgets
(
buffer
,
10000
,
f
)){
if
(
feof
(
f
))
return
0
;
/* no more words to read */
if
((
buffer
[
0
]
==
'\n'
)
||
(
buffer
[
0
]
==
' '
)){
if
((
buffer
[
0
]
==
'\n'
)
||
(
buffer
[
0
]
==
' '
)
||
(
buffer
[
0
]
==
'\t'
)
){
printf
(
"
\n
"
);
continue
;
}
...
...
maca_trans_parser/src/context.c
View file @
bbb0919c
...
...
@@ -7,9 +7,6 @@
#include "context.h"
#include "util.h"
void
context_set_linguistic_resources_filenames
(
context
*
ctx
);
void
context_free
(
context
*
ctx
)
{
if
(
ctx
->
program_name
)
free
(
ctx
->
program_name
);
...
...
@@ -306,13 +303,8 @@ context *context_read_options(int argc, char *argv[])
}
}
context_set_linguistic_resources_filenames
(
ctx
);
if
(
ctx
->
features_model_filename
){
ctx
->
features_model
=
feat_model_read
(
ctx
->
features_model_filename
);
}
/* if(ctx->mcd_filename && ctx->conll_filename){
ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->conll_filename);
ctx->mvt_nb = ctx->mcd_struct->dico_array[ctx->mcd_struct->type2col[FEAT_TYPE_LABEL]]->nbelem * 2 + 1;
...
...
@@ -341,7 +333,7 @@ context *context_read_options(int argc, char *argv[])
return
ctx
;
}
void
context_set_linguistic_resources_filenames
(
context
*
ctx
)
void
context_set_linguistic_resources_filenames
_parser
(
context
*
ctx
)
{
char
absolute_path
[
500
];
char
absolute_filename
[
500
];
...
...
@@ -382,11 +374,10 @@ void context_set_linguistic_resources_filenames(context *ctx)
ctx
->
features_model_filename
=
strdup
(
absolute_filename
);
}
/* fprintf(stdout, "perc_model_filename = %s\n", ctx->perc_model_filename);
fprintf(stdout, "vocabs_filename = %s\n", ctx->vocabs_filename);
fprintf(stdout, "mcd_filename = %s\n", ctx->mcd_filename);
fprintf(stdout, "perc_features_model_filename = %s\n", ctx->features_model_filename);*/
fprintf
(
stderr
,
"perc_model_filename = %s
\n
"
,
ctx
->
perc_model_filename
);
fprintf
(
stderr
,
"vocabs_filename = %s
\n
"
,
ctx
->
vocabs_filename
);
fprintf
(
stderr
,
"mcd_filename = %s
\n
"
,
ctx
->
mcd_filename
);
fprintf
(
stderr
,
"perc_features_model_filename = %s
\n
"
,
ctx
->
features_model_filename
);
}
void
context_set_linguistic_resources_filenames_tagger
(
context
*
ctx
)
...
...
@@ -405,7 +396,7 @@ void context_set_linguistic_resources_filenames_tagger(context *ctx)
strcat
(
absolute_path
,
ctx
->
language
);
strcat
(
absolute_path
,
"/bin/"
);
if
(
!
ctx
->
perc_model_filename
){
strcpy
(
absolute_filename
,
absolute_path
);
strcat
(
absolute_filename
,
DEFAULT_MODEL_TAGGER_FILENAME
);
...
...
@@ -430,9 +421,17 @@ void context_set_linguistic_resources_filenames_tagger(context *ctx)
ctx
->
features_model_filename
=
strdup
(
absolute_filename
);
}
/* fprintf(stdout, "perc_model_filename = %s\n", ctx->perc_model_filename);
fprintf(stdout, "vocabs_filename = %s\n", ctx->vocabs_filename);
fprintf(stdout, "mcd_filename = %s\n", ctx->mcd_filename);
fprintf(stdout, "perc_features_model_filename = %s\n", ctx->features_model_filename);*/
if
(
!
ctx
->
f2p_filename
){
strcpy
(
absolute_filename
,
absolute_path
);
strcat
(
absolute_filename
,
DEFAULT_F2P_FILENAME
);
ctx
->
f2p_filename
=
strdup
(
absolute_filename
);
ctx
->
f2p
=
form2pos_read
(
ctx
->
f2p_filename
);
}
fprintf
(
stderr
,
"perc_model_filename = %s
\n
"
,
ctx
->
perc_model_filename
);
fprintf
(
stderr
,
"vocabs_filename = %s
\n
"
,
ctx
->
vocabs_filename
);
fprintf
(
stderr
,
"mcd_filename = %s
\n
"
,
ctx
->
mcd_filename
);
fprintf
(
stderr
,
"perc_features_model_filename = %s
\n
"
,
ctx
->
features_model_filename
);
fprintf
(
stderr
,
"f2p_filename = %s
\n
"
,
ctx
->
f2p_filename
);
}
maca_trans_parser/src/context.h
View file @
bbb0919c
...
...
@@ -14,6 +14,7 @@
#define DEFAULT_FEATURES_MODEL_TAGGER_FILENAME "maca_trans_tagger.fm"
#define DEFAULT_VOCABS_TAGGER_FILENAME "maca_trans_tagger.vocab"
#define DEFAULT_MODEL_TAGGER_FILENAME "maca_trans_tagger.model"
#define DEFAULT_F2P_FILENAME "fP"
#include "dico_vec.h"
#include "feat_model.h"
...
...
@@ -92,4 +93,10 @@ void context_maca_data_path_help_message(context *ctx);
void
context_f2p_filename_help_message
(
context
*
ctx
);
void
context_set_linguistic_resources_filenames_tagger
(
context
*
ctx
);
void
context_set_linguistic_resources_filenames_parser
(
context
*
ctx
);
#endif
maca_trans_parser/src/decode.c
View file @
bbb0919c
...
...
@@ -53,6 +53,9 @@ int main(int argc, char *argv[])
ctx
=
context_read_options
(
argc
,
argv
);
decode_check_options
(
ctx
);
context_set_linguistic_resources_filenames_parser
(
ctx
);
ctx
->
features_model
=
feat_model_read
(
ctx
->
features_model_filename
);
ctx
->
vocabs
=
dico_vec_read
(
ctx
->
vocabs_filename
,
ctx
->
hash_ratio
);
mcd_link_to_dico
(
ctx
->
mcd_struct
,
ctx
->
vocabs
);
...
...
maca_trans_parser/src/decode_tagger.c
View file @
bbb0919c
...
...
@@ -46,11 +46,12 @@ int main(int argc, char *argv[])
{
FILE
*
conll_file
=
NULL
;
context
*
ctx
;
/* struct fann *ann; */
ctx
=
context_read_options
(
argc
,
argv
);
decode_check_options
(
ctx
);
context_set_linguistic_resources_filenames_tagger
(
ctx
);
ctx
->
features_model
=
feat_model_read
(
ctx
->
features_model_filename
);
ctx
->
vocabs
=
dico_vec_read
(
ctx
->
vocabs_filename
,
ctx
->
hash_ratio
);
mcd_link_to_dico
(
ctx
->
mcd_struct
,
ctx
->
vocabs
);
...
...
maca_trans_parser/src/depset.c
View file @
bbb0919c
...
...
@@ -77,7 +77,8 @@ void depset_print2(FILE *f, depset *d, dico *dico_labels)
for
(
i
=
1
;
i
<
d
->
length
;
i
++
){
if
((
d
->
array
[
i
].
gov
)
&&
(
d
->
array
[
i
].
dep
)){
fprintf
(
f
,
"%s
\t
%d
\t
%s
\n
"
,
d
->
array
[
i
].
dep
->
input
,
word_get_index
(
d
->
array
[
i
].
gov
)
-
word_get_index
(
d
->
array
[
i
].
dep
),
dico_int2string
(
dico_labels
,
d
->
array
[
i
].
label
));
/* fprintf(f, "%s\t%d\t%s\n", d->array[i].dep->input, word_get_index(d->array[i].gov) - word_get_index(d->array[i].dep), dico_int2string(dico_labels, d->array[i].label ));*/
fprintf
(
f
,
"%s
\t
%d
\t
%s
\n
"
,
d
->
array
[
i
].
dep
->
input
,
word_get_index
(
d
->
array
[
i
].
gov
),
dico_int2string
(
dico_labels
,
d
->
array
[
i
].
label
));
}
}
fprintf
(
f
,
"
\n
"
);
...
...
maca_trans_parser/src/maca_trans_parser_conll2cff.c
View file @
bbb0919c
...
...
@@ -169,6 +169,10 @@ int main(int argc, char *argv[])
ctx
=
context_read_options
(
argc
,
argv
);
maca_trans_parser_conll2cff_check_options
(
ctx
);
ctx
->
features_model
=
feat_model_read
(
ctx
->
features_model_filename
);
if
(
ctx
->
mode
==
TRAIN_MODE
){
mcd_extract_dico_from_corpus
(
ctx
->
mcd_struct
,
ctx
->
conll_filename
);
...
...
maca_trans_parser/src/maca_trans_parser_conll2cff_tagger.c
View file @
bbb0919c
...
...
@@ -82,7 +82,6 @@ void generate_training_file_stream(FILE *output_file, context *ctx)
fprintf
(
output_file
,
"%d"
,
postag
);
feat_vec_print
(
output_file
,
fv
);
if
(
postag
!=
-
1
)
movement_tagger
(
c
,
postag
,
0
,
1
);
}
...
...
@@ -111,7 +110,6 @@ void generate_training_file_buffer(FILE *output_file, context *ctx)
if
(
ctx
->
f2p
)
add_signature_to_words_in_queue
(
c
->
bf
,
ctx
->
f2p
);
while
(
!
config_is_terminal
(
c
)){
/* config_print(stdout, c); */
config2feat_vec_cff
(
ctx
->
features_model
,
c
,
ctx
->
d_perceptron_features
,
fv
,
ctx
->
mode
);
...
...
@@ -136,6 +134,9 @@ int main(int argc, char *argv[])
ctx
=
context_read_options
(
argc
,
argv
);
maca_trans_parser_conll2cff_check_options
(
ctx
);
ctx
->
features_model
=
feat_model_read
(
ctx
->
features_model_filename
);
if
(
ctx
->
mode
==
TRAIN_MODE
){
mcd_extract_dico_from_corpus
(
ctx
->
mcd_struct
,
ctx
->
conll_filename
);
ctx
->
vocabs
=
mcd_build_dico_vec
(
ctx
->
mcd_struct
);
...
...
@@ -146,7 +147,6 @@ int main(int argc, char *argv[])
}
feat_model_compute_ranges
(
ctx
->
features_model
,
ctx
->
mcd_struct
,
ctx
->
mvt_nb
);
/* in train mode create feature dictionnary for perceptron */
if
(
ctx
->
mode
==
TRAIN_MODE
)
...
...
maca_trans_parser/src/queue.c
View file @
bbb0919c
...
...
@@ -22,7 +22,7 @@ int queue_read_sentence(queue *bf, FILE *f, mcd *mcd_struct)
while
(
fgets
(
buffer
,
10000
,
f
)){
if
(
feof
(
f
))
break
;
/* fprintf(stderr, "%s", buffer); */
if
((
buffer
[
0
]
==
'\n'
)
||
(
buffer
[
0
]
==
' '
))
break
;
/* end of the sentence */
if
((
buffer
[
0
]
==
'\n'
)
||
(
buffer
[
0
]
==
' '
)
||
(
buffer
[
0
]
==
'\t'
)
)
break
;
/* end of the sentence */
w
=
word_parse_buffer
(
buffer
,
mcd_struct
);
if
(
word_get_index
(
w
)
==
-
1
){
w
->
feat_array
[
FEAT_TYPE_INDEX
]
=
index
++
;
...
...
maca_trans_parser/src/simple_decoder_tagger.c
View file @
bbb0919c
...
...
@@ -21,25 +21,17 @@ void add_signature_to_words_in_queue(queue *bf, form2pos *f2p)
}
}
void
simple_decoder_buffer
(
context
*
ctx
)
{
FILE
*
f
=
NULL
;
dico
*
dico_pos
=
dico_vec_get_dico
(
ctx
->
vocabs
,
(
char
*
)
"POS"
);
feature_table
*
ft
=
feature_table_load
(
ctx
->
perc_model_filename
);
config
*
c
=
NULL
;
int
postag
;
feat_vec
*
fv
=
feat_vec_new
(
feature_types_nb
);
float
max
;
int
i
;
word
*
w
;
if
(
ctx
->
conll_filename
)
f
=
myfopen
(
ctx
->
conll_filename
,
"r"
);
else
f
=
stdin
;
c
=
config_initial
(
f
,
ctx
->
mcd_struct
,
1000
,
0
);
word
*
w
=
NULL
;
FILE
*
f
=
(
ctx
->
conll_filename
)
?
myfopen
(
ctx
->
conll_filename
,
"r"
)
:
stdin
;
config
*
c
=
config_initial
(
f
,
ctx
->
mcd_struct
,
1000
,
0
);
/* read a sentence and put it in the buffer */
while
(
queue_read_sentence
(
c
->
bf
,
f
,
ctx
->
mcd_struct
)){
...
...
@@ -59,6 +51,7 @@ void simple_decoder_buffer(context *ctx)
w
=
stack_elt_n
(
c
->
st
,
i
);
printf
(
"%s
\t
%s
\n
"
,
w
->
input
,
dico_int2string
(
dico_pos
,
word_get_pos
(
w
)));
}
printf
(
"
\n
"
);
/* config_free(c); */
c
=
config_initial
(
f
,
ctx
->
mcd_struct
,
1000
,
0
);
...
...
@@ -74,7 +67,6 @@ void simple_decoder_stream(context *ctx)
feat_vec
*
fv
=
feat_vec_new
(
feature_types_nb
);
FILE
*
f
=
NULL
;
/* when in stream mode, force to renumber the tokens (ugly !) */
ctx
->
mcd_struct
->
type
[
ctx
->
mcd_struct
->
type2col
[
FEAT_TYPE_INDEX
]]
=
-
1
;
...
...
@@ -82,23 +74,14 @@ void simple_decoder_stream(context *ctx)
while
(
!
config_is_terminal
(
c
)){
config_print
(
stdout
,
c
);
config2feat_vec_cff
(
ctx
->
features_model
,
c
,
ctx
->
d_perceptron_features
,
fv
,
LOOKUP_MODE
);
}
/* config_print(stdout, c); */
/* config_free(c); */
}
void
simple_decoder_tagger
(
context
*
ctx
)
/* (FILE *f, mcd *mcd_struct, dico *d_perceptron_features, dico *dico_pos, feature_table *ft, feat_model *fm, int verbose, int stream_mode)*/
{
/*conll_file, ctx->mcd_struct, ctx->d_perceptron_features, dico_pos, ft, ctx->features_model, ctx->verbose, ctx->stream_mode);*/
ctx
->
d_perceptron_features
=
dico_vec_get_dico
(
ctx
->
vocabs
,
(
char
*
)
"d_perceptron_features"
);
if
(
ctx
->
stream_mode
)
...
...
maca_trans_parser/src/word.c
View file @
bbb0919c
...
...
@@ -31,6 +31,7 @@ word *word_read(FILE *f, mcd *mcd_struct)
while
(
fgets
(
buffer
,
10000
,
f
)){
if
(
feof
(
f
))
return
NULL
;
/* no more words to read */
if
((
buffer
[
0
]
!=
'\n'
)
&&
(
buffer
[
0
]
!=
' '
)){
/* printf("word = %s\n", buffer); */
return
word_parse_buffer
(
buffer
,
mcd_struct
);
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment