Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Alexis Nasr
macaon2
Commits
fa380ae0
Commit
fa380ae0
authored
Jan 11, 2018
by
Alexis Nasr
Browse files
Merge branch 'master' of gitlab.lif.univ-mrs.fr:alexis.nasr/macaon2
parents
fcfe6c1c
f50e7d93
Changes
6
Hide whitespace changes
Inline
Side-by-side
maca_common/include/word.h
View file @
fa380ae0
#ifndef __WORD__
#define __WORD__
#include
<ctype.h>
#include
"mcd.h"
#include
"char16.h"
...
...
@@ -28,6 +28,7 @@ typedef struct _word {
#define word_get_s5(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 5))? -1 : (w)->form[strlen((w)->form) - 5])
#define word_get_s6(w) ((((w) == NULL) || ((w)->form == NULL) || (strlen((w)->form) < 6))? -1 : (w)->form[strlen((w)->form) - 6])
*/
#define word_get_s1(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 1))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 1])
#define word_get_s2(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 2))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 2])
#define word_get_s3(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 3))? -1 : (w)->form_char16[char16_strlen((w)->form_char16) - 3])
...
...
@@ -51,8 +52,8 @@ typedef struct _word {
#define word_get_p6(w) ((((w) == NULL) || ((w)->form_char16 == NULL) || (char16_strlen((w)->form_char16) < 5))? -1 : (w)->form_char16[5])
#define word_get_id(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_ID])
#define word_get_offset(w)
(((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_OFFSET])
#define word_get_length(w)
(((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LENGTH])
#define word_get_offset(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_OFFSET])
#define word_get_length(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LENGTH])
#define word_get_form(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_FORM])
#define word_get_lemma(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_LEMMA])
#define word_get_cpos(w) (((w) == NULL) ? -1 : (w)->wf_array[MCD_WF_CPOS])
...
...
maca_common/include/word_buffer.h
View file @
fa380ae0
...
...
@@ -32,13 +32,13 @@
#define word_buffer_is_empty(wb) (((wb)->nbelem == 0)? 1 : 0)
typedef
struct
{
int
size
;
/* size of the array used to store words */
int
nbelem
;
/* number of words in the buffer */
int
lookahead
;
/* number of words between the current word and the last word of the buffer */
int
current_index
;
/* position of the current word */
word
**
array
;
/* array to store words */
FILE
*
input_file
;
/* file to read the words from */
mcd
*
mcd_struct
;
/* mcd describing the format of input_file */
int
size
;
/* size of the array used to store words */
int
nbelem
;
/* number of words in the buffer */
int
lookahead
;
/* number of words between the current word and the last word of the buffer */
int
current_index
;
/* position of the current word */
word
**
array
;
/* array to store words */
FILE
*
input_file
;
/* file to read the words from */
mcd
*
mcd_struct
;
/* mcd describing the format of input_file */
}
word_buffer
;
...
...
maca_trans_parser/CMakeLists.txt
View file @
fa380ae0
...
...
@@ -210,9 +210,10 @@ install (TARGETS maca_trans_lemmatizer DESTINATION bin)
#target_link_libraries(test_w2v transparse)
#install (TARGETS test_w2v DESTINATION bin)
#add_executable(w2v_filter ./src/w2v_filter.c)
#target_link_libraries(w2v_filter transparse)
#install (TARGETS w2v_filter DESTINATION bin)
add_executable
(
w2v_filter ./src/w2v_filter.c
)
target_link_libraries
(
w2v_filter transparse
)
target_link_libraries
(
w2v_filter maca_common
)
install
(
TARGETS w2v_filter DESTINATION bin
)
#add_executable(test_word_emb ./src/test_word_emb.c)
#target_link_libraries(test_word_emb transparse)
...
...
maca_trans_parser/src/cff2fann.c
View file @
fa380ae0
...
...
@@ -43,6 +43,20 @@ void one_hot_print(FILE *f, int val, int dim)
fprintf
(
f
,
"%d "
,
(
i
==
val
)
?
1
:
0
);
}
void
check_feature_model
(
feat_model
*
fm
)
{
int
i
;
feat_desc
*
fd
;
for
(
i
=
0
;
i
<
fm
->
nbelem
;
i
++
){
fd
=
fm
->
array
[
i
];
if
(
fd
->
nbelem
>
1
){
fprintf
(
stderr
,
"feature %d is a complex feature, aborting
\n
"
,
i
);
exit
(
1
);
}
}
}
void
print_header
(
mcd
*
m
,
feat_model
*
fm
)
{
int
i
;
...
...
@@ -53,33 +67,24 @@ void print_header(mcd *m, feat_model *fm)
for
(
i
=
0
;
i
<
fm
->
nbelem
;
i
++
){
fd
=
fm
->
array
[
i
];
if
(
fd
->
nbelem
>
1
){
fprintf
(
stderr
,
"feature %d is a complex feature, skipping it
\n
"
,
i
);
}
else
{
sfd
=
fd
->
array
[
0
];
printf
(
"
\t
%s"
,
sfd
->
name
);
}
sfd
=
fd
->
array
[
0
];
printf
(
"
\t
%s"
,
sfd
->
name
);
}
printf
(
"
\n
"
);
printf
(
"OUT"
);
for
(
i
=
0
;
i
<
fm
->
nbelem
;
i
++
){
fd
=
fm
->
array
[
i
];
if
(
fd
->
nbelem
>
1
){
fprintf
(
stderr
,
"feature %d is a complex feature, skipping it
\n
"
,
i
);
}
else
{
sfd
=
fd
->
array
[
0
];
if
(
sfd
->
type
==
FEAT_TYPE_FORM
){
printf
(
"
\t
FORM"
);
continue
;}
if
(
sfd
->
type
==
FEAT_TYPE_LEMMA
){
printf
(
"
\t
LEMMA"
);
continue
;}
if
(
sfd
->
type
==
FEAT_TYPE_CPOS
){
printf
(
"
\t
CPOS"
);
continue
;}
if
(
sfd
->
type
==
FEAT_TYPE_POS
){
printf
(
"
\t
POS"
);
continue
;}
if
(
sfd
->
type
==
FEAT_TYPE_LABEL
){
printf
(
"
\t
LABEL"
);
continue
;}
if
(
sfd
->
type
==
FEAT_TYPE_INT
){
printf
(
"
\t
INT"
);
continue
;}
printf
(
"
\t
UNK"
);
}
sfd
=
fd
->
array
[
0
];
if
(
sfd
->
type
==
FEAT_TYPE_FORM
){
printf
(
"
\t
FORM"
);
continue
;}
if
(
sfd
->
type
==
FEAT_TYPE_LEMMA
){
printf
(
"
\t
LEMMA"
);
continue
;}
if
(
sfd
->
type
==
FEAT_TYPE_CPOS
){
printf
(
"
\t
CPOS"
);
continue
;}
if
(
sfd
->
type
==
FEAT_TYPE_POS
){
printf
(
"
\t
POS"
);
continue
;}
if
(
sfd
->
type
==
FEAT_TYPE_LABEL
){
printf
(
"
\t
LABEL"
);
continue
;}
if
(
sfd
->
type
==
FEAT_TYPE_INT
){
printf
(
"
\t
INT"
);
continue
;}
printf
(
"
\t
UNK"
);
}
printf
(
"
\n
"
);
/*
for(i=0; i < m->nb_col; i++){
...
...
@@ -118,6 +123,7 @@ void cff2fann(context *ctx)
char
feature_type
[
64
];
int
feature_valindex
;
int
count
=
0
;
char
*
feat_str
=
NULL
;
vocab
=
dico_vec_get_dico
(
ctx
->
vocabs
,
(
char
*
)
"d_perceptron_features"
);
...
...
@@ -133,34 +139,46 @@ void cff2fann(context *ctx)
if
(
count
%
100
==
0
)
fprintf
(
stderr
,
"%d
\r
"
,
count
);
while
(
token
){
/* printf("col = %d token = %s
max = %d
\n", col_nb, token
, max_array[col_nb]
);
*/
/* printf("col = %d token = %s\n", col_nb, token); */
val
=
atoi
(
token
);
if
(
col_nb
==
0
){
/* one_hot_print(stdout, val, ctx->mvt_nb); */
/* printf("\n"); */
printf
(
"%d"
,
val
);
}
else
{
sscanf
(
dico_int2string
(
vocab
,
val
),
"%[^==]==%d"
,
feature_type
,
&
feature_valindex
);
/* printf("feature_type = %s\n", feature_type); */
feat_type
=
feat_model_get_type_feat_n
(
ctx
->
features_model
,
col_nb
-
1
);
/* printf("feat_type = %d\n", feat_type); */
/* printf("%d: ", col_nb); */
int
mcd_col
=
m
->
wf2col
[
feat_type
];
/* printf("representation = %d\n", m->representation[mcd_col]); */
if
(
m
->
representation
[
mcd_col
]
==
MCD_REPRESENTATION_EMB
){
/* printf("it is an embedding val = %d, file = %s\n", val, m->filename[mcd_col]); */
/* word_emb_print(stdout, m->word_emb_array[mcd_col], feature_valindex); */
/* printf("\n"); */
printf
(
"
\t
%d"
,
feature_valindex
);
}
else
if
(
m
->
representation
[
mcd_col
]
==
MCD_REPRESENTATION_VOCAB
){
/* printf("it is a vocab\n"); */
/* one_hot_print(stdout, feature_valindex, m->dico_array[mcd_col]->nbelem); */
/* printf("\n"); */
feat_str
=
dico_int2string
(
vocab
,
val
);
if
(
feat_str
){
/* printf("feat str = %s\n", feat_str); */
sscanf
(
feat_str
,
"%[^==]==%d"
,
feature_type
,
&
feature_valindex
);
/* printf("feature_type = %s\n", feature_type); */
feat_type
=
feat_model_get_type_feat_n
(
ctx
->
features_model
,
col_nb
-
1
);
/* printf("feat_type = %d\n", feat_type); */
/* printf("%d: ", col_nb); */
int
mcd_col
=
m
->
wf2col
[
feat_type
];
/* printf("representation = %d\n", m->representation[mcd_col]); */
if
(
m
->
representation
[
mcd_col
]
==
MCD_REPRESENTATION_EMB
){
/* printf("it is an embedding val = %d, file = %s\n", val, m->filename[mcd_col]); */
/* word_emb_print(stdout, m->word_emb_array[mcd_col], feature_valindex); */
/* printf("\n"); */
printf
(
"
\t
%d"
,
feature_valindex
);
}
else
if
(
m
->
representation
[
mcd_col
]
==
MCD_REPRESENTATION_VOCAB
){
/* printf("it is a vocab\n"); */
/* one_hot_print(stdout, feature_valindex, m->dico_array[mcd_col]->nbelem); */
/* printf("\n"); */
printf
(
"
\t
%d"
,
feature_valindex
);
}
else
{
printf
(
"
\t
%d"
,
feature_valindex
);
}
}
else
{
fprintf
(
stderr
,
"WARNING cannot find the description of feature : %d
\n
"
,
val
);
feature_valindex
=
-
1
;
printf
(
"
\t
%d"
,
feature_valindex
);
}
else
{
printf
(
"
\t
%d"
,
feature_valindex
);
}
}
}
col_nb
++
;
token
=
strtok
(
NULL
,
"
\t
"
);
...
...
@@ -184,6 +202,9 @@ int main(int argc, char *argv[])
ctx
->
features_model
=
feat_model_read
(
ctx
->
features_model_filename
,
feat_lib_build
(),
ctx
->
verbose
);
check_feature_model
(
ctx
->
features_model
);
look_for_number_of_features_and_classes
(
ctx
->
cff_filename
,
&
nb_feat
,
&
nb_class
);
ctx
->
mvt_nb
=
nb_class
;
...
...
maca_trans_parser/src/maca_trans_lemmatizer.c
View file @
fa380ae0
...
...
@@ -158,7 +158,8 @@ int main(int argc, char *argv[])
lemma_from_fplm
=
fplm_lookup_lemma
(
exceptions
,
form
,
pos
,
ctx
->
verbose
);
if
(
lemma_from_fplm
){
// printf("lemma %s found in exceptions file\n", lemma_from_fplm);
print_word
(
b0
,
ctx
->
mcd_struct
,
lemma_from_fplm
);
// print_word(b0, ctx->mcd_struct, to_lower_string(lemma_from_fplm));
print_word
(
b0
,
ctx
->
mcd_struct
,
lemma_from_fplm
);
}
// if lemma is not found in exception file, predict an l_rule
else
{
...
...
@@ -185,15 +186,16 @@ int main(int argc, char *argv[])
if
(
l_rule_is_applicable
(
form
,
l_rule
)){
char
*
transformed_lemma
=
apply_l_rule
(
form
,
l_rule
);
// printf("transformed_lemma = %s\n", transformed_lemma);
// print_word(b0, ctx->mcd_struct, to_lower_string(transformed_lemma));
print_word
(
b0
,
ctx
->
mcd_struct
,
transformed_lemma
);
//
print_word(b0, ctx->mcd_struct, to_lower_string(transformed_lemma));
print_word
(
b0
,
ctx
->
mcd_struct
,
transformed_lemma
);
free
(
transformed_lemma
);
break
;
}
}
/* no rule applied */
if
(
i
==
10
){
print_word
(
b0
,
ctx
->
mcd_struct
,
form
);
// print_word(b0, ctx->mcd_struct, to_lower_string(form));
print_word
(
b0
,
ctx
->
mcd_struct
,
form
);
}
free
(
vcode_array
);
}
...
...
maca_trans_parser/src/movement_parser_arc_eager.h
View file @
fa380ae0
...
...
@@ -7,9 +7,9 @@
#define MVT_PARSER_SHIFT 0
#define MVT_PARSER_REDUCE 1
#define MVT_PARSER_ROOT 2
#define MVT_PARSER_EOS
-1
#define MVT_PARSER_LEFT
3
#define MVT_PARSER_RIGHT
4
#define MVT_PARSER_EOS
3
#define MVT_PARSER_LEFT
4
#define MVT_PARSER_RIGHT
5
/* even movements are left movements (except 0, which is shift and 2 which is root) */
#define movement_parser_left_code(label) (2 * (label) + 4)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment