Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
macaon2
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Alexis Nasr
macaon2
Commits
df494a92
Commit
df494a92
authored
8 years ago
by
Alexis Nasr
Browse files
Options
Downloads
Patches
Plain Diff
modified several little details in maca_lexer
parent
ad8f50f8
No related branches found
No related tags found
No related merge requests found
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
maca_lexer/src/context.c
+21
-13
21 additions, 13 deletions
maca_lexer/src/context.c
maca_lexer/src/context.h
+2
-3
2 additions, 3 deletions
maca_lexer/src/context.h
maca_lexer/src/maca_lexer.c
+55
-18
55 additions, 18 deletions
maca_lexer/src/maca_lexer.c
with
78 additions
and
34 deletions
maca_lexer/src/context.c
+
21
−
13
View file @
df494a92
...
@@ -14,11 +14,11 @@ void context_free(context *ctx)
...
@@ -14,11 +14,11 @@ void context_free(context *ctx)
if
(
ctx
->
program_name
)
free
(
ctx
->
program_name
);
if
(
ctx
->
program_name
)
free
(
ctx
->
program_name
);
if
(
ctx
->
input_filename
)
free
(
ctx
->
input_filename
);
if
(
ctx
->
input_filename
)
free
(
ctx
->
input_filename
);
if
(
ctx
->
output_filename
)
free
(
ctx
->
output_filename
);
if
(
ctx
->
output_filename
)
free
(
ctx
->
output_filename
);
if
(
ctx
->
fplm_filename
)
free
(
ctx
->
fplm_filename
);
if
(
ctx
->
language
)
free
(
ctx
->
language
);
if
(
ctx
->
language
)
free
(
ctx
->
language
);
if
(
ctx
->
maca_data_path
)
free
(
ctx
->
maca_data_path
);
if
(
ctx
->
maca_data_path
)
free
(
ctx
->
maca_data_path
);
if
(
ctx
->
mwe_filename
)
free
(
ctx
->
mwe_filename
);
if
(
ctx
->
mwe_filename
)
free
(
ctx
->
mwe_filename
);
if
(
ctx
->
mwe_tokens_dico_filename
)
free
(
ctx
->
mwe_tokens_dico_filename
);
if
(
ctx
->
mwe_tokens_dico_filename
)
free
(
ctx
->
mwe_tokens_dico_filename
);
if
(
ctx
->
mwe_tokens_separator
)
free
(
ctx
->
mwe_tokens_separator
);
free
(
ctx
);
free
(
ctx
);
}
}
...
@@ -30,7 +30,6 @@ context *context_new(void)
...
@@ -30,7 +30,6 @@ context *context_new(void)
ctx
->
verbose
=
0
;
ctx
->
verbose
=
0
;
ctx
->
debug_mode
=
0
;
ctx
->
debug_mode
=
0
;
ctx
->
program_name
=
NULL
;
ctx
->
program_name
=
NULL
;
ctx
->
fplm_filename
=
NULL
;
ctx
->
mcd_filename
=
NULL
;
ctx
->
mcd_filename
=
NULL
;
ctx
->
mcd_struct
=
NULL
;
ctx
->
mcd_struct
=
NULL
;
ctx
->
language
=
strdup
(
"fr"
);
ctx
->
language
=
strdup
(
"fr"
);
...
@@ -40,6 +39,7 @@ context *context_new(void)
...
@@ -40,6 +39,7 @@ context *context_new(void)
ctx
->
output_filename
=
NULL
;
ctx
->
output_filename
=
NULL
;
ctx
->
mwe_filename
=
NULL
;
ctx
->
mwe_filename
=
NULL
;
ctx
->
mwe_tokens_dico_filename
=
NULL
;
ctx
->
mwe_tokens_dico_filename
=
NULL
;
ctx
->
mwe_tokens_separator
=
strdup
(
" "
);
return
ctx
;
return
ctx
;
}
}
...
@@ -60,10 +60,6 @@ void context_form_column_help_message(context *ctx){
...
@@ -60,10 +60,6 @@ void context_form_column_help_message(context *ctx){
fprintf
(
stderr
,
"
\t
-F --form_column <int> : column containing form
\n
"
);
fprintf
(
stderr
,
"
\t
-F --form_column <int> : column containing form
\n
"
);
}
}
void
context_fplm_help_message
(
context
*
ctx
){
fprintf
(
stderr
,
"
\t
-f --fplm <file> : fplm (form pos lemma morpho) file
\n
"
);
}
void
context_mcd_help_message
(
context
*
ctx
){
void
context_mcd_help_message
(
context
*
ctx
){
fprintf
(
stderr
,
"
\t
-C --mcd <file> : multi column description file name
\n
"
);
fprintf
(
stderr
,
"
\t
-C --mcd <file> : multi column description file name
\n
"
);
}
}
...
@@ -76,6 +72,18 @@ void context_maca_data_path_help_message(context *ctx){
...
@@ -76,6 +72,18 @@ void context_maca_data_path_help_message(context *ctx){
fprintf
(
stderr
,
"
\t
-M --maca_data_path : path to maca_data directory
\n
"
);
fprintf
(
stderr
,
"
\t
-M --maca_data_path : path to maca_data directory
\n
"
);
}
}
void
context_mwe_token_separator_help_message
(
context
*
ctx
){
fprintf
(
stderr
,
"
\t
-s --mwe_sep <string> : multi word expression tokens separator (default is space character)
\n
"
);
}
void
context_mwe_filename_help_message
(
context
*
ctx
){
fprintf
(
stderr
,
"
\t
-M --mwe <filename> : multi word expression file
\n
"
);
}
void
context_vocab_help_message
(
context
*
ctx
){
fprintf
(
stderr
,
"
\t
-V --vocab <filename> : multi word expression tokens vocabulary file
\n
"
);
}
context
*
context_read_options
(
int
argc
,
char
*
argv
[])
context
*
context_read_options
(
int
argc
,
char
*
argv
[])
{
{
int
c
;
int
c
;
...
@@ -84,7 +92,7 @@ context *context_read_options(int argc, char *argv[])
...
@@ -84,7 +92,7 @@ context *context_read_options(int argc, char *argv[])
ctx
->
program_name
=
strdup
(
argv
[
0
]);
ctx
->
program_name
=
strdup
(
argv
[
0
]);
static
struct
option
long_options
[
1
2
]
=
static
struct
option
long_options
[
1
3
]
=
{
{
{
"help"
,
no_argument
,
0
,
'h'
},
{
"help"
,
no_argument
,
0
,
'h'
},
{
"verbose"
,
no_argument
,
0
,
'v'
},
{
"verbose"
,
no_argument
,
0
,
'v'
},
...
@@ -93,16 +101,16 @@ context *context_read_options(int argc, char *argv[])
...
@@ -93,16 +101,16 @@ context *context_read_options(int argc, char *argv[])
{
"output"
,
required_argument
,
0
,
'o'
},
{
"output"
,
required_argument
,
0
,
'o'
},
{
"mcd"
,
required_argument
,
0
,
'C'
},
{
"mcd"
,
required_argument
,
0
,
'C'
},
{
"language"
,
required_argument
,
0
,
'L'
},
{
"language"
,
required_argument
,
0
,
'L'
},
{
"fplm"
,
required_argument
,
0
,
'f'
},
{
"form_column"
,
required_argument
,
0
,
'F'
},
{
"form_column"
,
required_argument
,
0
,
'F'
},
{
"maca_data_path"
,
required_argument
,
0
,
'D'
},
{
"maca_data_path"
,
required_argument
,
0
,
'D'
},
{
"mwe"
,
required_argument
,
0
,
'M'
},
{
"mwe"
,
required_argument
,
0
,
'M'
},
{
"vocab"
,
required_argument
,
0
,
'V'
}
{
"vocab"
,
required_argument
,
0
,
'V'
},
{
"mwe_sep"
,
required_argument
,
0
,
's'
}
};
};
optind
=
0
;
optind
=
0
;
opterr
=
0
;
opterr
=
0
;
while
((
c
=
getopt_long
(
argc
,
argv
,
"hvdi:o:
f:
C:L:M:F:D:V:"
,
long_options
,
&
option_index
))
!=
-
1
){
while
((
c
=
getopt_long
(
argc
,
argv
,
"hvdi:o:C:L:M:F:D:V:
s:
"
,
long_options
,
&
option_index
))
!=
-
1
){
switch
(
c
)
switch
(
c
)
{
{
case
'd'
:
case
'd'
:
...
@@ -117,9 +125,6 @@ context *context_read_options(int argc, char *argv[])
...
@@ -117,9 +125,6 @@ context *context_read_options(int argc, char *argv[])
case
'F'
:
case
'F'
:
ctx
->
form_column
=
atoi
(
optarg
)
-
1
;
ctx
->
form_column
=
atoi
(
optarg
)
-
1
;
break
;
break
;
case
'f'
:
ctx
->
fplm_filename
=
strdup
(
optarg
);
break
;
case
'i'
:
case
'i'
:
ctx
->
input_filename
=
strdup
(
optarg
);
ctx
->
input_filename
=
strdup
(
optarg
);
break
;
break
;
...
@@ -141,6 +146,9 @@ context *context_read_options(int argc, char *argv[])
...
@@ -141,6 +146,9 @@ context *context_read_options(int argc, char *argv[])
case
'M'
:
case
'M'
:
ctx
->
mwe_filename
=
strdup
(
optarg
);
ctx
->
mwe_filename
=
strdup
(
optarg
);
break
;
break
;
case
's'
:
ctx
->
mwe_tokens_separator
=
strdup
(
optarg
);
break
;
}
}
}
}
...
...
This diff is collapsed.
Click to expand it.
maca_lexer/src/context.h
+
2
−
3
View file @
df494a92
...
@@ -4,7 +4,7 @@
...
@@ -4,7 +4,7 @@
#include
"mcd.h"
#include
"mcd.h"
#include
<stdlib.h>
#include
<stdlib.h>
#define DEFAULT_MWE_TOKENS_DICO_FILENAME "
mwe
_tokens"
#define DEFAULT_MWE_TOKENS_DICO_FILENAME "
d
_tokens
.dico
"
#define DEFAULT_MWE_FILENAME "mwe"
#define DEFAULT_MWE_FILENAME "mwe"
typedef
struct
{
typedef
struct
{
...
@@ -12,7 +12,6 @@ typedef struct {
...
@@ -12,7 +12,6 @@ typedef struct {
int
verbose
;
int
verbose
;
int
debug_mode
;
int
debug_mode
;
char
*
program_name
;
char
*
program_name
;
char
*
fplm_filename
;
char
*
language
;
char
*
language
;
char
*
maca_data_path
;
char
*
maca_data_path
;
char
*
mcd_filename
;
char
*
mcd_filename
;
...
@@ -22,6 +21,7 @@ typedef struct {
...
@@ -22,6 +21,7 @@ typedef struct {
char
*
output_filename
;
char
*
output_filename
;
char
*
mwe_filename
;
char
*
mwe_filename
;
char
*
mwe_tokens_dico_filename
;
char
*
mwe_tokens_dico_filename
;
char
*
mwe_tokens_separator
;
}
context
;
}
context
;
context
*
context_new
(
void
);
context
*
context_new
(
void
);
...
@@ -31,7 +31,6 @@ context *context_read_options(int argc, char *argv[]);
...
@@ -31,7 +31,6 @@ context *context_read_options(int argc, char *argv[]);
void
context_general_help_message
(
context
*
ctx
);
void
context_general_help_message
(
context
*
ctx
);
void
context_conll_help_message
(
context
*
ctx
);
void
context_conll_help_message
(
context
*
ctx
);
void
context_language_help_message
(
context
*
ctx
);
void
context_language_help_message
(
context
*
ctx
);
void
context_fplm_help_message
(
context
*
ctx
);
void
context_maca_data_path_help_message
(
context
*
ctx
);
void
context_maca_data_path_help_message
(
context
*
ctx
);
void
context_mcd_help_message
(
context
*
ctx
);
void
context_mcd_help_message
(
context
*
ctx
);
void
context_form_column_help_message
(
context
*
ctx
);
void
context_form_column_help_message
(
context
*
ctx
);
...
...
This diff is collapsed.
Click to expand it.
maca_lexer/src/maca_lexer.c
+
55
−
18
View file @
df494a92
...
@@ -7,6 +7,39 @@
...
@@ -7,6 +7,39 @@
#include
"util.h"
#include
"util.h"
#include
"context.h"
#include
"context.h"
void
maca_lexer_help_message
(
context
*
ctx
)
{
context_general_help_message
(
ctx
);
fprintf
(
stderr
,
"INPUT
\n
"
);
context_input_help_message
(
ctx
);
context_mcd_help_message
(
ctx
);
context_language_help_message
(
ctx
);
context_maca_data_path_help_message
(
ctx
);
context_form_column_help_message
(
ctx
);
context_mwe_token_separator_help_message
(
ctx
);
context_mwe_filename_help_message
(
ctx
);
context_vocab_help_message
(
ctx
);
}
void
maca_lexer_check_options
(
context
*
ctx
){
if
(
ctx
->
help
){
maca_lexer_help_message
(
ctx
);
exit
(
1
);
}
}
int
look_for_accept_state_in_path
(
trie
*
mwe_trie
,
int
*
states_array
,
int
path_index
)
{
int
i
;
for
(
i
=
path_index
-
1
;
i
>=
0
;
i
--
){
if
(
mwe_trie
->
states
[
states_array
[
i
]]
->
is_accept
)
return
i
;
}
return
-
1
;
}
int
main
(
int
argc
,
char
*
argv
[])
int
main
(
int
argc
,
char
*
argv
[])
{
{
char
buffer
[
10000
];
char
buffer
[
10000
];
...
@@ -26,7 +59,7 @@ int main(int argc, char *argv[])
...
@@ -26,7 +59,7 @@ int main(int argc, char *argv[])
int
i
;
int
i
;
ctx
=
context_read_options
(
argc
,
argv
);
ctx
=
context_read_options
(
argc
,
argv
);
/*
maca_lexer_check_options(ctx);
*/
maca_lexer_check_options
(
ctx
);
if
(
ctx
->
form_column
!=
-
1
)
if
(
ctx
->
form_column
!=
-
1
)
...
@@ -39,7 +72,10 @@ int main(int argc, char *argv[])
...
@@ -39,7 +72,10 @@ int main(int argc, char *argv[])
else
else
f
=
myfopen
(
ctx
->
input_filename
,
"r"
);
f
=
myfopen
(
ctx
->
input_filename
,
"r"
);
if
(
ctx
->
verbose
)
fprintf
(
stderr
,
"reading mwe list from file : %s
\n
"
,
ctx
->
mwe_filename
);
mwe_trie
=
trie_build_from_collection
(
ctx
->
mwe_filename
);
mwe_trie
=
trie_build_from_collection
(
ctx
->
mwe_filename
);
if
(
ctx
->
verbose
)
fprintf
(
stderr
,
"reading mwe tokens vocabulary from file : %s
\n
"
,
ctx
->
mwe_tokens_dico_filename
);
d_mwe_tokens
=
dico_read
(
ctx
->
mwe_tokens_dico_filename
,
0
.
5
);
d_mwe_tokens
=
dico_read
(
ctx
->
mwe_tokens_dico_filename
,
0
.
5
);
/* trie_print(stdout, mwe_trie); */
/* trie_print(stdout, mwe_trie); */
...
@@ -58,7 +94,8 @@ int main(int argc, char *argv[])
...
@@ -58,7 +94,8 @@ int main(int argc, char *argv[])
symbols_array
[
path_index
]
=
form_code
;
symbols_array
[
path_index
]
=
form_code
;
states_array
[
path_index
]
=
(
form_code
==
-
1
)
?
0
states_array
[
path_index
]
=
(
form_code
==
-
1
)
?
0
:
trie_destination_state
(
mwe_trie
,
(
path_index
==
0
)
?
0
:
states_array
[
path_index
-
1
],
form_code
);
:
trie_destination_state
(
mwe_trie
,
(
path_index
==
0
)
?
0
:
states_array
[
path_index
-
1
],
form_code
);
/* printf("buffer = %s ", buffer);
/*
printf("buffer = %s ", buffer);
printf("code = %d\n", form_code);
printf("code = %d\n", form_code);
...
@@ -72,26 +109,27 @@ int main(int argc, char *argv[])
...
@@ -72,26 +109,27 @@ int main(int argc, char *argv[])
printf("\n");
printf("\n");
*/
*/
if
(
states_array
[
path_index
]
==
0
){
/* in initial state of trie */
if
(
states_array
[
path_index
]
==
0
){
/* in initial state of trie */
if
(
path_index
==
0
){
/* nothing has been recognized */
/* nothing has been recognized */
if
(
path_index
==
0
)
printf
(
"%s
\n
"
,
buffer
);
printf
(
"%s
\n
"
,
buffer
);
}
else
{
/* there is something in the path */
else
{
int
accept_state_index
=
look_for_accept_state_in_path
(
mwe_trie
,
states_array
,
path_index
);
if
(
mwe_trie
->
states
[
states_array
[
p
at
h
_index
-
1
]]
->
is_accept
){
/* all tokens in path s.t. 0 <= token_index <= accept_st
at
e
_index
form an mwe */
for
(
i
=
0
;
i
<
p
at
h
_index
;
i
++
){
for
(
i
=
0
;
i
<
=
accept_st
at
e
_index
;
i
++
){
if
(
i
>
0
)
printf
(
"
#"
);
if
(
i
>
0
)
printf
(
"
%s"
,
ctx
->
mwe_tokens_separator
);
printf
(
"%s"
,
dico_int2string
(
d_mwe_tokens
,
symbols_array
[
i
]));
printf
(
"%s"
,
dico_int2string
(
d_mwe_tokens
,
symbols_array
[
i
]));
}
}
printf
(
"
\n
"
);
if
(
accept_state_index
!=
-
1
)
printf
(
"
\n
"
);
}
/* all tokens in path s.t. accept_state_index < token_index < path_index do not form an mwe */
else
{
for
(
i
=
accept_state_index
+
1
;
i
<
path_index
;
i
++
){
for
(
i
=
0
;
i
<
path_index
;
i
++
){
printf
(
"%s
\n
"
,
dico_int2string
(
d_mwe_tokens
,
symbols_array
[
i
]));
printf
(
"%s
\n
"
,
dico_int2string
(
d_mwe_tokens
,
symbols_array
[
i
]));
}
}
}
/* do not forget to print the current token */
printf
(
"%s
\n
"
,
buffer
);
printf
(
"%s
\n
"
,
buffer
);
}
path_index
=
0
;
path_index
=
0
;
}
}
}
/* not in state 0 of trie */
else
{
else
{
path_index
++
;
path_index
++
;
}
}
...
@@ -99,4 +137,3 @@ int main(int argc, char *argv[])
...
@@ -99,4 +137,3 @@ int main(int argc, char *argv[])
}
}
return
0
;
return
0
;
}
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment