Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Alexis Nasr
macaon2
Commits
64d47ec0
Commit
64d47ec0
authored
Jul 14, 2016
by
Alexis Nasr
Browse files
changed option names
parent
f9233876
Changes
20
Show whitespace changes
Inline
Side-by-side
maca_common/include/mcd.h
View file @
64d47ec0
...
...
@@ -33,8 +33,10 @@ typedef struct {
}
mcd
;
mcd
*
mcd_build_conll07
(
void
);
mcd
*
mcd_read
(
char
*
mcd_filename
);
void
mcd_link_to_dico
(
mcd
*
m
,
dico_vec
*
vocabs
);
mcd
*
mcd_build_ifpls
(
void
);
mcd
*
mcd_read
(
char
*
mcd_filename
,
int
verbose
);
void
mcd_link_to_dico
(
mcd
*
m
,
dico_vec
*
vocabs
,
int
verbose
);
void
mcd_extract_dico_from_corpus
(
mcd
*
m
,
char
*
corpus_filename
);
void
mcd_free
(
mcd
*
m
);
int
mcd_get_code
(
mcd
*
m
,
char
*
str
,
int
col
);
...
...
maca_common/src/form2pos.c
View file @
64d47ec0
...
...
@@ -65,6 +65,9 @@ form2pos *form2pos_read(char *filename)
int
form2pos_get_signature
(
form2pos
*
f2p
,
char
*
form
)
{
/* if(form == NULL)
return -1;
else*/
return
hash_get_val
(
f2p
->
h_form2signature
,
form
);
}
...
...
maca_common/src/mcd.c
View file @
64d47ec0
...
...
@@ -110,7 +110,7 @@ void mcd_extract_dico_from_corpus(mcd *m, char *corpus_filename)
/* takes as argument an mcd structure (m) and a dictionary vector (vocabs) */
/* links the vocabularies of m to vocabularies of vocabs (based on their names) */
void
mcd_link_to_dico
(
mcd
*
m
,
dico_vec
*
vocabs
)
void
mcd_link_to_dico
(
mcd
*
m
,
dico_vec
*
vocabs
,
int
verbose
)
{
int
column
;
for
(
column
=
0
;
column
<
m
->
nb_col
;
column
++
){
...
...
@@ -118,14 +118,14 @@ void mcd_link_to_dico(mcd *m, dico_vec *vocabs)
&&
(
!
strcmp
(
m
->
filename
[
column
],
"_"
))
&&
(
m
->
dico_array
[
column
]
==
NULL
)){
m
->
dico_array
[
column
]
=
dico_vec_get_dico
(
vocabs
,
m
->
type_str
[
column
]);
fprintf
(
stderr
,
"linking to dico %s
\n
"
,
m
->
type_str
[
column
]);
if
(
verbose
)
fprintf
(
stderr
,
"linking to dico %s
\n
"
,
m
->
type_str
[
column
]);
}
}
}
/* read an multi column description file and produces an mcd structure */
mcd
*
mcd_read
(
char
*
mcd_filename
)
mcd
*
mcd_read
(
char
*
mcd_filename
,
int
verbose
)
{
int
column
;
char
type
[
100
];
...
...
@@ -148,7 +148,7 @@ mcd *mcd_read(char *mcd_filename)
/* fprintf(stderr, "line %d of mcd file %s ill formed, I'm skipping it\n", line_number, mcd_filename); */
continue
;
}
fprintf
(
stderr
,
"column = %d type = %s representation = %s filename = %s
\n
"
,
column
,
type
,
representation
,
filename
);
if
(
verbose
)
fprintf
(
stderr
,
"column = %d type = %s representation = %s filename = %s
\n
"
,
column
,
type
,
representation
,
filename
);
m
->
type
[
column
]
=
feat_type_string2int
(
type
);
m
->
type_str
[
column
]
=
strdup
(
type
);
if
(
m
->
type
[
column
]
==
-
1
){
...
...
@@ -170,11 +170,11 @@ mcd *mcd_read(char *mcd_filename)
if
(
strcmp
(
m
->
filename
[
column
],
"_"
)){
if
(
m
->
representation
[
column
]
==
MCD_REPRESENTATION_EMB
){
fprintf
(
stderr
,
"loading word embedding %s
\n
"
,
m
->
filename
[
column
]);
if
(
verbose
)
fprintf
(
stderr
,
"loading word embedding %s
\n
"
,
m
->
filename
[
column
]);
m
->
word_emb_array
[
column
]
=
word_emb_load
(
m
->
filename
[
column
]);
}
else
if
(
m
->
representation
[
column
]
==
MCD_REPRESENTATION_VOCAB
){
fprintf
(
stderr
,
"loading dico %s
\n
"
,
m
->
filename
[
column
]);
if
(
verbose
)
fprintf
(
stderr
,
"loading dico %s
\n
"
,
m
->
filename
[
column
]);
m
->
dico_array
[
column
]
=
dico_read
(
m
->
filename
[
column
],
0
.
5
);
}
}
...
...
@@ -190,46 +190,97 @@ mcd *mcd_build_conll07(void)
m
->
type
[
0
]
=
FEAT_TYPE_INDEX
;
m
->
type_str
[
0
]
=
strdup
(
"INDEX"
);
m
->
representation
[
0
]
=
MCD_REPRESENTATION_INT
;
m
->
filename
[
0
]
=
strdup
(
"_"
);
m
->
type2col
[
FEAT_TYPE_INDEX
]
=
0
;
m
->
type
[
1
]
=
FEAT_TYPE_FORM
;
m
->
type_str
[
1
]
=
strdup
(
"FORM"
);
m
->
representation
[
1
]
=
MCD_REPRESENTATION_VOCAB
;
m
->
filename
[
1
]
=
strdup
(
"_"
);
m
->
type2col
[
FEAT_TYPE_FORM
]
=
1
;
m
->
type
[
2
]
=
FEAT_TYPE_LEMMA
;
m
->
type_str
[
2
]
=
strdup
(
"LEMMA"
);
m
->
representation
[
2
]
=
MCD_REPRESENTATION_VOCAB
;
m
->
filename
[
2
]
=
strdup
(
"_"
);
m
->
type2col
[
FEAT_TYPE_LEMMA
]
=
2
;
m
->
type
[
3
]
=
FEAT_TYPE_CPOS
;
m
->
type_str
[
3
]
=
strdup
(
"CPOS"
);
m
->
representation
[
3
]
=
MCD_REPRESENTATION_VOCAB
;
m
->
filename
[
3
]
=
strdup
(
"_"
);
m
->
type2col
[
FEAT_TYPE_CPOS
]
=
3
;
m
->
type
[
4
]
=
FEAT_TYPE_POS
;
m
->
type_str
[
4
]
=
strdup
(
"POS"
);
m
->
representation
[
4
]
=
MCD_REPRESENTATION_VOCAB
;
m
->
filename
[
4
]
=
strdup
(
"_"
);
m
->
type2col
[
FEAT_TYPE_POS
]
=
4
;
m
->
type
[
5
]
=
FEAT_TYPE_FEATS
;
m
->
type_str
[
5
]
=
strdup
(
"FEATS"
);
m
->
representation
[
5
]
=
MCD_REPRESENTATION_VOCAB
;
m
->
filename
[
5
]
=
strdup
(
"_"
);
m
->
type2col
[
FEAT_TYPE_FEATS
]
=
5
;
m
->
type
[
6
]
=
FEAT_TYPE_GOV
;
m
->
type_str
[
6
]
=
strdup
(
"GOV"
);
m
->
representation
[
6
]
=
MCD_REPRESENTATION_INT
;
m
->
filename
[
6
]
=
strdup
(
"_"
);
m
->
type2col
[
FEAT_TYPE_GOV
]
=
6
;
m
->
type
[
7
]
=
FEAT_TYPE_LABEL
;
m
->
type_str
[
7
]
=
strdup
(
"LABEL"
);
m
->
representation
[
7
]
=
MCD_REPRESENTATION_VOCAB
;
m
->
filename
[
7
]
=
strdup
(
"_"
);
m
->
type2col
[
FEAT_TYPE_LABEL
]
=
7
;
return
m
;
}
mcd
*
mcd_build_ifpls
(
void
)
{
mcd
*
m
=
mcd_new
(
6
);
m
->
type
[
0
]
=
FEAT_TYPE_INDEX
;
m
->
type_str
[
0
]
=
strdup
(
"INDEX"
);
m
->
representation
[
0
]
=
MCD_REPRESENTATION_INT
;
m
->
filename
[
0
]
=
strdup
(
"_"
);
m
->
type2col
[
FEAT_TYPE_INDEX
]
=
0
;
m
->
type
[
1
]
=
FEAT_TYPE_FORM
;
m
->
type_str
[
1
]
=
strdup
(
"FORM"
);
m
->
representation
[
1
]
=
MCD_REPRESENTATION_VOCAB
;
m
->
filename
[
1
]
=
strdup
(
"_"
);
m
->
type2col
[
FEAT_TYPE_FORM
]
=
1
;
m
->
type
[
2
]
=
FEAT_TYPE_POS
;
m
->
type_str
[
2
]
=
strdup
(
"POS"
);
m
->
representation
[
2
]
=
MCD_REPRESENTATION_VOCAB
;
m
->
filename
[
2
]
=
strdup
(
"_"
);
m
->
type2col
[
FEAT_TYPE_POS
]
=
2
;
m
->
type
[
3
]
=
FEAT_TYPE_LEMMA
;
m
->
type_str
[
3
]
=
strdup
(
"LEMMA"
);
m
->
representation
[
3
]
=
MCD_REPRESENTATION_VOCAB
;
m
->
filename
[
3
]
=
strdup
(
"_"
);
m
->
type2col
[
FEAT_TYPE_LEMMA
]
=
3
;
m
->
type
[
4
]
=
FEAT_TYPE_GOV
;
m
->
type_str
[
4
]
=
strdup
(
"GOV"
);
m
->
representation
[
4
]
=
MCD_REPRESENTATION_INT
;
m
->
filename
[
4
]
=
strdup
(
"_"
);
m
->
type2col
[
FEAT_TYPE_GOV
]
=
4
;
m
->
type
[
5
]
=
FEAT_TYPE_LABEL
;
m
->
type_str
[
5
]
=
strdup
(
"LABEL"
);
m
->
representation
[
5
]
=
MCD_REPRESENTATION_VOCAB
;
m
->
filename
[
5
]
=
strdup
(
"_"
);
m
->
type2col
[
FEAT_TYPE_LABEL
]
=
5
;
return
m
;
}
mcd
*
mcd_read_old
(
char
*
mcd_filename
,
char
*
corpus_filename
,
dico_vec
*
vocabs
)
{
int
column
;
...
...
maca_lemmatizer/src/context.c
View file @
64d47ec0
...
...
@@ -33,8 +33,8 @@ context *context_new(void)
ctx
->
mcd_struct
=
NULL
;
ctx
->
language
=
strdup
(
"fr"
);
ctx
->
maca_data_path
=
NULL
;
ctx
->
form_column
=
-
1
;
ctx
->
pos_column
=
-
1
;
ctx
->
form_column
=
0
;
ctx
->
pos_column
=
1
;
return
ctx
;
}
...
...
@@ -123,7 +123,6 @@ context *context_read_options(int argc, char *argv[])
break
;
case
'm'
:
ctx
->
mcd_filename
=
strdup
(
optarg
);
ctx
->
mcd_struct
=
mcd_read
(
ctx
->
mcd_filename
);
break
;
case
'C'
:
ctx
->
language
=
strdup
(
optarg
);
...
...
@@ -136,6 +135,11 @@ context *context_read_options(int argc, char *argv[])
context_set_linguistic_resources_filenames
(
ctx
);
if
(
ctx
->
mcd_filename
)
ctx
->
mcd_struct
=
mcd_read
(
ctx
->
mcd_filename
,
ctx
->
verbose
);
if
((
ctx
->
mcd_filename
==
NULL
)
&&
((
ctx
->
form_column
==
-
1
)
||
(
ctx
->
pos_column
==
-
1
)))
ctx
->
mcd_struct
=
mcd_build_conll07
();
...
...
maca_trans_parser/src/cff_cutoff.c
View file @
64d47ec0
...
...
@@ -18,7 +18,6 @@ void cff_cutoff_help_message(context *ctx)
context_cutoff_help_message
(
ctx
);
context_cff_help_message
(
ctx
);
fprintf
(
stderr
,
"INPUT/OUTPUT
\n
"
);
context_alphabet_help_message
(
ctx
);
}
void
cff_cutoff_check_options
(
context
*
ctx
)
...
...
maca_trans_parser/src/context.c
View file @
64d47ec0
...
...
@@ -10,11 +10,9 @@
void
context_free
(
context
*
ctx
)
{
if
(
ctx
->
program_name
)
free
(
ctx
->
program_name
);
if
(
ctx
->
conll
_filename
)
free
(
ctx
->
conll
_filename
);
if
(
ctx
->
input
_filename
)
free
(
ctx
->
input
_filename
);
if
(
ctx
->
perc_model_filename
)
free
(
ctx
->
perc_model_filename
);
if
(
ctx
->
dnn_model_filename
)
free
(
ctx
->
dnn_model_filename
);
if
(
ctx
->
dico_features_filename
)
free
(
ctx
->
dico_features_filename
);
if
(
ctx
->
dico_classes_filename
)
free
(
ctx
->
dico_classes_filename
);
if
(
ctx
->
cff_filename
)
free
(
ctx
->
cff_filename
);
if
(
ctx
->
fann_filename
)
free
(
ctx
->
fann_filename
);
if
(
ctx
->
mcd_filename
)
free
(
ctx
->
mcd_filename
);
...
...
@@ -46,11 +44,9 @@ context *context_new(void)
ctx
->
verbose
=
0
;
ctx
->
program_name
=
NULL
;
ctx
->
conll
_filename
=
NULL
;
ctx
->
input
_filename
=
NULL
;
ctx
->
perc_model_filename
=
NULL
;
ctx
->
dnn_model_filename
=
NULL
;
ctx
->
dico_features_filename
=
NULL
;
ctx
->
dico_classes_filename
=
NULL
;
ctx
->
cff_filename
=
NULL
;
ctx
->
fann_filename
=
NULL
;
ctx
->
stag_desc_filename
=
NULL
;
...
...
@@ -82,8 +78,8 @@ context *context_new(void)
ctx
->
hidden_neurons_nb
=
100
;
ctx
->
stream_mode
=
0
;
ctx
->
form_column
=
-
1
;
ctx
->
conll
=
0
;
ctx
->
ifpls
=
1
;
return
ctx
;
}
...
...
@@ -95,11 +91,16 @@ void context_general_help_message(context *ctx)
fprintf
(
stderr
,
"
\t
-h --help : print this message
\n
"
);
fprintf
(
stderr
,
"
\t
-v --verbose : activate verbose mode
\n
"
);
fprintf
(
stderr
,
"
\t
-r --hratio <float> : set the occupation ratio of hash tables (default is 0.5)
\n
"
);
fprintf
(
stderr
,
"
\t
-D --maca_data_path <str> : path to the maca_data directory
\n
"
);
fprintf
(
stderr
,
"
\t
-L --language <str> : identifier of the language to use (default is fr)
\n
"
);
}
void
context_model_help_message
(
context
*
ctx
){
fprintf
(
stderr
,
"
\t
-m --model <file> : model file name
\n
"
);
}
void
context_input_help_message
(
context
*
ctx
){
fprintf
(
stderr
,
"
\t
-i --input <file> : input file name
\n
"
);
}
void
context_iterations_help_message
(
context
*
ctx
){
fprintf
(
stderr
,
"
\t
-n --iter <int> : number of iterations (default is 4)
\n
"
);
}
...
...
@@ -107,22 +108,16 @@ void context_cff_help_message(context *ctx){
fprintf
(
stderr
,
"
\t
-x --cff <file> : CFF format file name
\n
"
);
}
void
context_fann_help_message
(
context
*
ctx
){
fprintf
(
stderr
,
"
\t
-y --fann <file> : FANN format file name
\n
"
);
}
void
context_d_features_help_message
(
context
*
ctx
){
fprintf
(
stderr
,
"
\t
-f --df <file> : features dictionnary file name
\n
"
);
}
void
context_d_classes_help_message
(
context
*
ctx
){
fprintf
(
stderr
,
"
\t
-c --dc <file> : classes dictionnary file name
\n
"
);
fprintf
(
stderr
,
"
\t
-f --fann <file> : FANN format file name
\n
"
);
}
void
context_conll_help_message
(
context
*
ctx
){
fprintf
(
stderr
,
"
\t
-
i
--conll
<file> : conll file name
\n
"
);
fprintf
(
stderr
,
"
\t
-
c
--conll
: input is in conll07 format
\n
"
);
}
void
context_cutoff_help_message
(
context
*
ctx
){
fprintf
(
stderr
,
"
\t
-u --cutoff <int> : cutoff value
\n
"
);
}
void
context_mode_help_message
(
context
*
ctx
){
fprintf
(
stderr
,
"
\t
-
o
--mode TEST|TRAIN
\n
"
);
fprintf
(
stderr
,
"
\t
-
M
--mode
:
TEST|TRAIN
\n
"
);
}
void
context_beam_help_message
(
context
*
ctx
){
fprintf
(
stderr
,
"
\t
-b --beam <int> : beam width (default is 1)
\n
"
);
...
...
@@ -130,18 +125,6 @@ void context_beam_help_message(context *ctx){
void
context_sent_nb_help_message
(
context
*
ctx
){
fprintf
(
stderr
,
"
\t
-s --sent_nb <int> : number of sentences to process (default is 1000000)
\n
"
);
}
void
context_alphabet_help_message
(
context
*
ctx
){
fprintf
(
stderr
,
"
\t
-a --alphabet <file> : name of the file containing the different dictionaries
\n
"
);
}
void
context_dnn_model_help_message
(
context
*
ctx
){
fprintf
(
stderr
,
"
\t
-M --dnn_model <file> : FANN model file
\n
"
);
}
void
context_hidden_neurons_nb_help_message
(
context
*
ctx
){
fprintf
(
stderr
,
"
\t
-H --hidden_neurons_nb <int> : number of neurons in the hidden layer (default is 100)
\n
"
);
}
void
context_stag_desc_filename_help_message
(
context
*
ctx
){
fprintf
(
stderr
,
"
\t
-S --stag_file <file> : name of the file containing the stag description
\n
"
);
}
void
context_mcd_help_message
(
context
*
ctx
){
fprintf
(
stderr
,
"
\t
-C --mcd <file> : multi column description file name
\n
"
);
}
...
...
@@ -149,27 +132,22 @@ void context_features_model_help_message(context *ctx){
fprintf
(
stderr
,
"
\t
-F --feat_model <file> : feature model file name
\n
"
);
}
void
context_stream_help_message
(
context
*
ctx
){
fprintf
(
stderr
,
"
\t
-
T
--stream
(0|1)
: steam mode
\n
"
);
fprintf
(
stderr
,
"
\t
-
S
--stream
: steam mode
\n
"
);
}
void
context_vocabs_help_message
(
context
*
ctx
){
fprintf
(
stderr
,
"
\t
-V --vocabs : vocabularies file
\n
"
);
fprintf
(
stderr
,
"
\t
-V --vocabs
<file>
: vocabularies file
\n
"
);
}
void
context_language_help_message
(
context
*
ctx
){
fprintf
(
stderr
,
"
\t
-
X
--language : identifier of the language to use
\n
"
);
fprintf
(
stderr
,
"
\t
-
L
--language
: identifier of the language to use
\n
"
);
}
void
context_maca_data_path_help_message
(
context
*
ctx
){
fprintf
(
stderr
,
"
\t
-
Y
--maca_data_path : path to
the
maca_data directory
\n
"
);
fprintf
(
stderr
,
"
\t
-
D
--maca_data_path
: path to maca_data directory
\n
"
);
}
void
context_root_label_help_message
(
context
*
ctx
){
fprintf
(
stderr
,
"
\t
-R --root_label : name of the root label (default is
\"
root
\"
)
\n
"
);
}
void
context_f2p_filename_help_message
(
context
*
ctx
){
fprintf
(
stderr
,
"
\t
-P --f2p : form to pos (f2p) filename
\n
"
);
fprintf
(
stderr
,
"
\t
-P --f2p
<file>
: form to pos (f2p) filename
\n
"
);
}
context
*
context_read_options
(
int
argc
,
char
*
argv
[])
...
...
@@ -180,75 +158,58 @@ context *context_read_options(int argc, char *argv[])
ctx
->
program_name
=
strdup
(
argv
[
0
]);
static
struct
option
long_options
[
2
8
]
=
static
struct
option
long_options
[
2
1
]
=
{
{
"help"
,
no_argument
,
0
,
'h'
},
{
"verbose"
,
no_argument
,
0
,
'v'
},
{
"debug"
,
no_argument
,
0
,
'd'
},
{
"conll"
,
no_argument
,
0
,
'c'
},
{
"stream"
,
no_argument
,
0
,
'S'
},
{
"model"
,
required_argument
,
0
,
'm'
},
{
"df"
,
required_argument
,
0
,
'f'
},
{
"dc"
,
required_argument
,
0
,
'c'
},
{
"conll"
,
required_argument
,
0
,
'i'
},
{
"input"
,
required_argument
,
0
,
'i'
},
{
"iter"
,
required_argument
,
0
,
'n'
},
{
"cff"
,
required_argument
,
0
,
'x'
},
{
"cutoff"
,
required_argument
,
0
,
'u'
},
{
"hratio"
,
required_argument
,
0
,
'r'
},
{
"mode"
,
required_argument
,
0
,
'
o
'
},
{
"mode"
,
required_argument
,
0
,
'
M
'
},
{
"beam"
,
required_argument
,
0
,
'b'
},
{
"fann"
,
required_argument
,
0
,
'
y
'
},
{
"fann"
,
required_argument
,
0
,
'
f
'
},
{
"sent_nb"
,
required_argument
,
0
,
's'
},
/* {"alphabet", required_argument, 0, 'a'}, */
{
"dnn_model"
,
required_argument
,
0
,
'M'
},
{
"hidden_neurons_nb"
,
required_argument
,
0
,
'H'
},
{
"stag_file"
,
required_argument
,
0
,
'S'
},
{
"mcd"
,
required_argument
,
0
,
'C'
},
{
"feat_model"
,
required_argument
,
0
,
'F'
},
{
"vocabs"
,
required_argument
,
0
,
'V'
},
{
"stream"
,
required_argument
,
0
,
'T'
},
{
"language"
,
required_argument
,
0
,
'X'
},
{
"maca_data_path"
,
required_argument
,
0
,
'Y'
},
{
"language"
,
required_argument
,
0
,
'L'
},
{
"maca_data_path"
,
required_argument
,
0
,
'D'
},
{
"root_label"
,
required_argument
,
0
,
'R'
},
{
"form_col"
,
required_argument
,
0
,
'O'
},
{
"f2p"
,
required_argument
,
0
,
'P'
}
};
optind
=
0
;
opterr
=
0
;
while
((
c
=
getopt_long
(
argc
,
argv
,
"dhvT:m:f:c:i:n:x:u:r:o:b:y:s:M:H:S:C:F:V:X:Y:R:O:P:"
,
long_options
,
&
option_index
))
!=
-
1
){
while
((
c
=
getopt_long
(
argc
,
argv
,
"hvcSm:i:n:x:u:r:M:b:f:s:C:F:V:L:D:R:P:"
,
long_options
,
&
option_index
))
!=
-
1
){
switch
(
c
)
{
case
'd'
:
ctx
->
debug_mode
=
1
;
break
;
case
'h'
:
ctx
->
help
=
1
;
break
;
case
'v'
:
ctx
->
verbose
=
1
;
break
;
case
'T'
:
ctx
->
stream_mode
=
atoi
(
optarg
);
break
;
case
'y'
:
ctx
->
fann_filename
=
strdup
(
optarg
);
break
;
case
'n'
:
ctx
->
iteration_nb
=
atoi
(
optarg
);
case
'c'
:
ctx
->
conll
=
1
;
break
;
case
'
i
'
:
ctx
->
conll_filename
=
strdup
(
optarg
)
;
case
'
S
'
:
ctx
->
stream_mode
=
1
;
break
;
case
'm'
:
ctx
->
perc_model_filename
=
strdup
(
optarg
);
break
;
case
'M'
:
ctx
->
dnn_model_filename
=
strdup
(
optarg
);
break
;
case
'f'
:
ctx
->
dico_features_filename
=
strdup
(
optarg
);
case
'i'
:
ctx
->
input_filename
=
strdup
(
optarg
);
break
;
case
'
c
'
:
ctx
->
dico_classes_filename
=
strdup
(
optarg
);
case
'
n
'
:
ctx
->
iteration_nb
=
atoi
(
optarg
);
break
;
case
'x'
:
ctx
->
cff_filename
=
strdup
(
optarg
);
...
...
@@ -256,27 +217,23 @@ context *context_read_options(int argc, char *argv[])
case
'u'
:
ctx
->
feature_cutoff
=
atoi
(
optarg
);
break
;
case
'b'
:
ctx
->
beam_width
=
atoi
(
optarg
);
break
;
case
'r'
:
ctx
->
hash_ratio
=
atof
(
optarg
);
break
;
case
'
o
'
:
case
'
M
'
:
ctx
->
mode
=
(
!
strcmp
(
optarg
,
"TEST"
))
?
TEST_MODE
:
TRAIN_MODE
;
break
;
case
'
s
'
:
ctx
->
sent_nb
=
atoi
(
optarg
);
case
'
b
'
:
ctx
->
beam_width
=
atoi
(
optarg
);
break
;
case
'
H
'
:
ctx
->
hidden_neurons_nb
=
atoi
(
optarg
);
case
'
f
'
:
ctx
->
fann_filename
=
strdup
(
optarg
);
break
;
case
'
S
'
:
ctx
->
s
tag_desc_filename
=
strdup
(
optarg
);
case
'
s
'
:
ctx
->
s
ent_nb
=
atoi
(
optarg
);
break
;
case
'C'
:
ctx
->
mcd_filename
=
strdup
(
optarg
);
ctx
->
mcd_struct
=
mcd_read
(
ctx
->
mcd_filename
);
break
;
case
'F'
:
ctx
->
features_model_filename
=
strdup
(
optarg
);
...
...
@@ -284,18 +241,15 @@ context *context_read_options(int argc, char *argv[])
case
'V'
:
ctx
->
vocabs_filename
=
strdup
(
optarg
);
break
;
case
'
X
'
:
case
'
L
'
:
ctx
->
language
=
strdup
(
optarg
);
break
;
case
'
Y
'
:
case
'
D
'
:
ctx
->
maca_data_path
=
strdup
(
optarg
);
break
;
case
'R'
:
ctx
->
root_label
=
strdup
(
optarg
);
break
;
case
'O'
:
ctx
->
form_column
=
atoi
(
optarg
);
break
;
case
'P'
:
ctx
->
f2p_filename
=
strdup
(
optarg
);
ctx
->
f2p
=
form2pos_read
(
ctx
->
f2p_filename
);
...
...
@@ -303,32 +257,13 @@ context *context_read_options(int argc, char *argv[])
}
}
/* if(ctx->mcd_filename && ctx->conll_filename){
ctx->mcd_struct = mcd_read(ctx->mcd_filename, ctx->conll_filename);
ctx->mvt_nb = ctx->mcd_struct->dico_array[ctx->mcd_struct->type2col[FEAT_TYPE_LABEL]]->nbelem * 2 + 1;
}*/
/*
if(ctx->features_model && ctx->mcd_struct)
feat_model_compute_ranges(ctx->features_model, ctx->mcd_struct, ctx->mvt_nb);
*/
/* if the form column has been set by user, change it in the mcd file */
/* if(ctx->form_column != -1){
ctx->mcd_struct = mcd_new(ctx->form_column + 1);
mcd_set_form_col(ctx->mcd_struct, ctx->form_column);
ctx->mcd_struct->representation[ctx->form_column] = MCD_REPRESENTATION_VOCAB;
ctx->mcd_struct->filename[ctx->form_column] = strdup("_");
ctx->mcd_struct->dico_array[ctx->form_column] = NULL;
ctx->mcd_struct->type_str[ctx->form_column] = strdup("FORM");
}*/
if
(
ctx
->
mcd_struct
==
NULL
){
if
(
ctx
->
mcd_filename
)
ctx
->
mcd_struct
=
mcd_read
(
ctx
->
mcd_filename
,
ctx
->
verbose
);
else
if
(
ctx
->
conll
)
ctx
->
mcd_struct
=
mcd_build_conll07
();
}
else
ctx
->
mcd_struct
=
mcd_build_ifpls
();
return
ctx
;
}
...
...
maca_trans_parser/src/context.h
View file @
64d47ec0
...
...
@@ -25,11 +25,9 @@
typedef
struct
{
int
help
;
char
*
program_name
;
char
*
conll
_filename
;
char
*
input
_filename
;
char
*
perc_model_filename
;
char
*
dnn_model_filename
;
char
*
dico_features_filename
;
char
*
dico_classes_filename
;
char
*
cff_filename
;
char
*
fann_filename
;
char
*
stag_desc_filename
;
...
...
@@ -57,8 +55,9 @@ typedef struct {
char
*
maca_data_path
;
char
*
language
;
char
*
root_label
;
int
form_column
;
form2pos
*
f2p
;
int
conll
;
int
ifpls
;
}
context
;
context
*
context_new
(
void
);
...
...
@@ -76,21 +75,21 @@ void context_cutoff_help_message(context *ctx);
void
context_mode_help_message
(
context
*
ctx
);
void
context_beam_help_message
(
context
*
ctx
);
void
context_sent_nb_help_message
(
context
*
ctx
);
void
context_alphabet_help_message
(
context
*
ctx
);
void
context_dnn_model_help_message
(
context
*
ctx
);
void
context_hidden_neurons_nb_help_message
(
context
*
ctx
);
void
context_stag_desc_filename_help_message
(
context
*
ctx
);
void
context_input_filename_help_message
(
context
*
ctx
);
void
context_mcd_help_message
(
context
*
ctx
);
void
context_features_model_help_message
(
context
*
ctx
);
void
context_vocabs_help_message
(
context
*
ctx
);
void
context_load_alphabets
(
context
*
ctx
);
void
context_print_alphabets
(
context
*
ctx
);
void
context_language_help_message
(
context
*
ctx
);
void
context_maca_data_path_help_message
(
context
*
ctx
);
void
context_f2p_filename_help_message
(
context
*
ctx
);
void
context_conll_help_message
(
context
*
ctx
);
void
context_ifpls_help_message
(
context
*
ctx
);
void
context_input_help_message
(
context
*
ctx
);
#endif
maca_trans_parser/src/decode.c
View file @
64d47ec0
...
...
@@ -18,15 +18,13 @@ void decode_help_message(context *ctx)
{
context_general_help_message
(
ctx
);
context_beam_help_message
(
ctx
);
fprintf
(
stderr
,
"INPUT
\n
"
);
context_conll_help_message
(
ctx
);
fprintf
(
stderr
,
"INPUT
\n
"
);
context_input_help_message
(
ctx
);
context_mcd_help_message
(
ctx
);
context_model_help_message
(
ctx
);
context_vocabs_help_message
(
ctx
);
context_features_model_help_message
(
ctx
);
context_language_help_message
(
ctx
);
context_maca_data_path_help_message
(
ctx
);
}
void
decode_check_options
(
context
*
ctx
){
...
...
@@ -83,29 +81,25 @@ void set_linguistic_resources_filenames_parser(context *ctx)
ctx
->
features_model_filename
=
strdup
(
absolute_filename
);
}
if
(
ctx
->
verbose
){
fprintf
(
stderr
,
"perc_model_filename = %s
\n
"
,
ctx