Skip to content
Snippets Groups Projects
Commit 4fc0c025 authored by Alexis Nasr's avatar Alexis Nasr
Browse files

new structure called word buffer to store words

parent 49dee1d0
Branches
No related tags found
No related merge requests found
......@@ -118,6 +118,7 @@ typedef struct {
mcd *mcd_build_conll07(void);
mcd *mcd_build_ifpls(void);
mcd *mcd_build_wplgf(void);
mcd *mcd_read(char *mcd_filename, int verbose);
void mcd_link_to_dico(mcd *m, dico_vec *vocabs, int verbose);
......
......@@ -101,7 +101,7 @@ word *word_create_dummy(mcd *mcd_struct);
word *word_copy(word *w);
void word_free(word *w);
void word_print2(FILE *f, word *w);
void word_print(FILE *f, word *w, mcd *mcd_struct, dico *dico_labels);
void word_print(FILE *f, word *w);
word *word_read(FILE *f, mcd *mcd_struct);
......
......@@ -250,6 +250,50 @@ mcd *mcd_build_conll07(void)
/* builds an mcd corresponding to the ifpls (index, form, pos, lemma, syntax) format */
mcd *mcd_build_wplgf(void)
{
mcd *m = mcd_new(5);
int col;
col = 0;
m->wf[col]=MCD_WF_FORM;
m->wf_str[col]=strdup("FORM");
m->representation[col]= MCD_REPRESENTATION_VOCAB;
m->filename[col] = strdup("_");
m->wf2col[MCD_WF_FORM] = col;
col = 1;
m->wf[col]=MCD_WF_POS;
m->wf_str[col]=strdup("POS");
m->representation[col]= MCD_REPRESENTATION_VOCAB;
m->filename[col] = strdup("_");
m->wf2col[MCD_WF_POS] = 1;
col = 2;
m->wf[col]=MCD_WF_LEMMA;
m->wf_str[col]=strdup("LEMMA");
m->representation[col]= MCD_REPRESENTATION_VOCAB;
m->filename[col] = strdup("_");
m->wf2col[MCD_WF_LEMMA] = 2;
col = 3;
m->wf[col]=MCD_WF_GOV;
m->wf_str[col]=strdup("GOV");
m->representation[col]= MCD_REPRESENTATION_INT;
m->filename[col] = strdup("_");
m->wf2col[MCD_WF_GOV] = 3;
col = 4;
m->wf[col]=MCD_WF_LABEL;
m->wf_str[col]=strdup("LABEL");
m->representation[col]= MCD_REPRESENTATION_VOCAB;
m->filename[col] = strdup("_");
m->wf2col[MCD_WF_LABEL] = 4;
return m;
}
/* builds an mcd corresponding to the ifpls (index, form, pos, lemma, syntax) format */
mcd *mcd_build_ifpls(void)
{
mcd *m = mcd_new(6);
......
......@@ -37,7 +37,7 @@ void sentence_print(FILE *f, sentence *s, dico *dico_labels)
for(i=1; i < s->length; i++){
fprintf(f, "%d\t", i);
word_print(f, s->words[i], s->mcd_struct, dico_labels);
word_print(f, s->words[i]);
fprintf(f, "\n");
}
fprintf(f, "\n");
......
......@@ -30,6 +30,7 @@ word *word_read(FILE *f, mcd *mcd_struct)
/* look for a valid word */
while(fgets(buffer, 10000, f)){
/* printf("buffer = %s\n", buffer); */
/* ignore empty lines */
if((buffer[0] == '\n')) continue;
/* lines beginning with ## are comments */
......@@ -120,7 +121,7 @@ void word_print2(FILE *f, word *w)
printf("rel index = %d\n", word_get_relative_index(w));
}
void word_print(FILE *f, word *w, mcd *mcd_struct, dico *dico_labels)
void word_print(FILE *f, word *w)
{
if(w == NULL) return;
......
......@@ -141,7 +141,8 @@ context *context_read_options(int argc, char *argv[])
if((ctx->mcd_filename == NULL) && ((ctx->form_column == -1) || (ctx->pos_column == -1)))
ctx->mcd_struct = mcd_build_conll07();
/* ctx->mcd_struct = mcd_build_conll07(); */
ctx->mcd_struct = mcd_build_wplgf();
return ctx;
}
......
......@@ -18,6 +18,7 @@ set(SOURCES src/context.c
src/queue.c
src/beam.c
src/feat_types.c
src/word_buffer.c
)
#compiling library
......@@ -70,6 +71,12 @@ target_link_libraries(maca_trans_parser_train transparse)
target_link_libraries(maca_trans_parser_train maca_common)
install (TARGETS maca_trans_parser_train DESTINATION bin)
add_executable(test_word_buffer ./src/test_word_buffer.c)
target_compile_options(test_word_buffer INTERFACE -Wall)
target_link_libraries(test_word_buffer transparse)
target_link_libraries(test_word_buffer maca_common)
install (TARGETS test_word_buffer DESTINATION bin)
#add_executable(test_w2v ./src/test_w2v.c)
#target_link_libraries(test_w2v transparse)
......
......@@ -265,7 +265,8 @@ context *context_read_options(int argc, char *argv[])
if(ctx->conll)
ctx->mcd_struct = mcd_build_conll07();
else
ctx->mcd_struct = mcd_build_ifpls();
ctx->mcd_struct = mcd_build_wplgf();
/* ctx->mcd_struct = mcd_build_ifpls(); */
return ctx;
}
......
#include<stdio.h>
#include<stdlib.h>
#include"word_buffer.h"
#include"util.h"
int main(int argc, char *argv[])
{
mcd *mcd_struct;
FILE *mcf;
word_buffer *wb;
if(argc < 2){
fprintf(stderr, "usage %s mcf mcd\n", argv[0]);
exit(1);
}
mcd_struct = mcd_read(argv[2], 1);
mcf = myfopen(argv[1], "r");
wb = word_buffer_new(mcf, mcd_struct, 0);
word_buffer_print(stdout, wb);
printf("\n");
while(word_buffer_move_right(wb)){
word_buffer_print(stdout, wb);
printf("\n");
}
printf("=================== CHANGE DIRECTION =====================\n");
while(word_buffer_move_left(wb)){
word_buffer_print(stdout, wb);
printf("\n");
}
word_buffer_free(wb);
}
#include<stdio.h>
#include"word_buffer.h"
#include"util.h"
word_buffer *word_buffer_new(FILE *input_file, mcd *mcd_struct, int lookahead)
{
int i;
word_buffer *wb = (word_buffer *)memalloc(sizeof(word_buffer));
wb->input_file = input_file;
wb->mcd_struct = mcd_struct;
wb->size = 10;
wb->nbelem = 0;
wb->array = (word **)memalloc(wb->size * sizeof(word *));
wb->current_index = 0;
wb->lookahead = lookahead;
for(i=0; i <= lookahead; i++)
word_buffer_read_next_word(wb);
return wb;
}
void word_buffer_print(FILE *f, word_buffer *wb)
{
word *w;
w = word_buffer_bm3(wb);
if(w){ fprintf(f, "[-3] "); word_print(f, w); fprintf(f, "\n");}
w = word_buffer_bm2(wb);
if(w){ fprintf(f, "[-2] "); word_print(f, w); fprintf(f, "\n");}
w = word_buffer_bm1(wb);
if(w){ fprintf(f, "[-1] "); word_print(f, w); fprintf(f, "\n");}
w = word_buffer_b0(wb);
if(w){ fprintf(f, "[ 0] "); word_print(f, w); fprintf(f, "\n");}
w = word_buffer_b1(wb);
if(w){ fprintf(f, "[ 1] "); word_print(f, w); fprintf(f, "\n");}
w = word_buffer_b2(wb);
if(w){ fprintf(f, "[ 2] "); word_print(f, w); fprintf(f, "\n");}
w = word_buffer_b3(wb);
if(w){ fprintf(f, "[ 3] "); word_print(f, w); fprintf(f, "\n");}
}
void word_buffer_free(word_buffer *wb)
{
int i;
free(wb->array);
free(wb);
for(i=0; i < wb->nbelem; i++){
if(wb->array[i])
word_free(wb->array[i]);
}
}
int word_buffer_add(word_buffer *wb, word *w)
{
if(wb->nbelem == wb->size -1){
wb->size = 2 * (wb->size + 1);
wb->array = (word **)realloc(wb->array, wb->size * sizeof(word *));
}
wb->array[wb->nbelem] = w;
wb->nbelem++;
return wb->nbelem - 1;
}
word *word_buffer_get_word(word_buffer *wb, int offset)
{
return ((wb->current_index + offset >=0) && (wb->current_index + offset <= wb->nbelem))? wb->array[wb->current_index + offset] : NULL;
}
word *word_buffer_b0(word_buffer *wb)
{
return(wb->nbelem == 0)? NULL : wb->array[wb->current_index];
}
word *word_buffer_b1(word_buffer *wb)
{
return(wb->current_index + 1 >= wb->nbelem)? NULL : wb->array[wb->current_index + 1];
}
word *word_buffer_b2(word_buffer *wb)
{
return(wb->current_index + 2 >= wb->nbelem)? NULL : wb->array[wb->current_index + 2];
}
word *word_buffer_b3(word_buffer *wb)
{
return(wb->current_index + 3 >= wb->nbelem)? NULL : wb->array[wb->current_index + 3];
}
word *word_buffer_bm1(word_buffer *wb)
{
return(wb->current_index - 1 < 0)? NULL : wb->array[wb->current_index - 1];
}
word *word_buffer_bm2(word_buffer *wb)
{
return(wb->current_index - 2 < 0)? NULL : wb->array[wb->current_index - 2];
}
word *word_buffer_bm3(word_buffer *wb)
{
return(wb->current_index - 3 < 0)? NULL : wb->array[wb->current_index - 3];
}
int word_buffer_read_next_word(word_buffer *wb)
{
word *w = NULL;
int index;
w = word_read(wb->input_file, wb->mcd_struct);
if(w == NULL) return -1;
index = word_buffer_add(wb, w);
word_set_relative_index(w, index);
return index;
}
int word_buffer_move_right(word_buffer *wb)
{
if((wb->nbelem - 1 - wb->current_index) <= wb->lookahead)
word_buffer_read_next_word(wb);
if(wb->current_index == wb->nbelem - 1) return 0;
wb->current_index++;
return 1;
}
int word_buffer_move_left(word_buffer *wb)
{
if(wb->current_index > 0){
wb->current_index--;
return 1;
}
return 0;
}
#ifndef __WORD_BUFFER__
#define __WORD_BUFFER__
#include<stdio.h>
#include"word.h"
#include"mcd.h"
typedef struct {
int size; /* size of the array used to store words */
int nbelem; /* number of words in the buffer */
int lookahead; /* number of words between the current word and the last word of the buffer */
int current_index; /* position of the current word */
word **array; /* array to store words */
FILE *input_file; /* file to read the words from */
mcd *mcd_struct; /* mcd describing the format of input_file */
} word_buffer;
word_buffer *word_buffer_new(FILE *input_file, mcd *mcd_struct, int lookahead);
void word_buffer_free(word_buffer *wb);
int word_buffer_add(word_buffer *wb, word *w);
word *word_buffer_get_word_relative(word_buffer *wb, int dist);
word *word_buffer_b0(word_buffer *wb);
word *word_buffer_b1(word_buffer *wb);
word *word_buffer_b2(word_buffer *wb);
word *word_buffer_b3(word_buffer *wb);
word *word_buffer_bm1(word_buffer *wb);
word *word_buffer_bm2(word_buffer *wb);
word *word_buffer_bm3(word_buffer *wb);
int word_buffer_read_next_word(word_buffer *wb);
int word_buffer_move_right(word_buffer *wb);
int word_buffer_move_left(word_buffer *wb);
void word_buffer_print(FILE *f, word_buffer *wb);
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment