Skip to content
Snippets Groups Projects
Commit 33d5af49 authored by robin.perrotin's avatar robin.perrotin
Browse files

solved issues with '\r' eol when running on windows

parent 71c198d9
Branches
No related tags found
No related merge requests found
......@@ -157,8 +157,8 @@ dico *dico_extract_from_corpus(char *filename, int column, char *dico_name)
while(fgets(buffer, 10000, f)){
if(feof(f)) return NULL; /* no more words to read */
if((buffer[0] == '\n') || (buffer[0] == ' ')) continue;
if(buffer[strlen(buffer) - 1] == '\n')
if((buffer[0] == '\n') || (buffer[0] == '\r') ||(buffer[0] == ' ')) continue;
if(buffer[strlen(buffer) - 1] == '\n' || buffer[strlen(buffer) - 1] == '\r')
buffer[strlen(buffer) - 1] = '\0';
token = strtok(buffer, "\t"); /* get index */
column_nb = 0;
......
......@@ -67,16 +67,16 @@ feat_model *feat_model_read(char *filename, feat_lib *fl, int verbose)
while(fgets(buffer, 1000, f)){
if(feof(f)) break;
if((buffer[0] == '\n') || (buffer[0] == '#')) continue;
if((buffer[0] == '\n') || (buffer[0] == '\r') || (buffer[0] == '#')) continue;
if(verbose) fprintf(stderr, "%d", feature_number + 1);
fd = feat_desc_new();
feat_name = strtok(buffer, " \n");
feat_name = strtok(buffer, " \n\r");
do{
if(verbose) fprintf(stderr, "\t%s", feat_name);
sfd = feat_lib_get_simple_feat_desc(fm->fl, feat_name);
if(sfd)
feat_desc_add(fd, sfd);
}while((feat_name = strtok(NULL, " \n")));
}while((feat_name = strtok(NULL, " \n\r")));
if(verbose) fprintf(stderr, "\n");
feat_model_add(fm, fd);
feature_number++;
......
......@@ -60,6 +60,9 @@ cell *hash_lookup(hash *h, char *key)
for(c=h->array[index]; c; c = c->next)
if(!strcmp(key, c->key))
return c;
//printf("<key:%s>\n",key);
return NULL;
}
......
......@@ -117,7 +117,7 @@ int mcd_max_column_index_in_file(char *mcd_filename)
while(fgets(buffer, 1000, f)){
line_number++;
if(feof(f)) break;
if((buffer[0] == '\n') || (buffer[0] == '#')) continue;
if((buffer[0] == '\n') || (buffer[0] == '\r') ||(buffer[0] == '#')) continue;
fields_number = sscanf(buffer, "%d %s %s %s", &column, wf, representation, filename);
if(fields_number != 4){
fprintf(stderr, "line %d of mcd file %s ill formed, I'm skipping it\n", line_number, mcd_filename);
......@@ -183,7 +183,7 @@ mcd *mcd_read(char *mcd_filename, int verbose)
while(fgets(buffer, 1000, f)){
line_number++;
if(feof(f)) break;
if((buffer[0] == '\n') || (buffer[0] == '#')) continue;
if((buffer[0] == '\n') || (buffer[0] == '\r') ||(buffer[0] == '#')) continue;
fields_number = sscanf(buffer, "%d %s %s %s", &column, wf, representation, filename);
if(fields_number != 4){
/* fprintf(stderr, "line %d of mcd file %s ill formed, I'm skipping it\n", line_number, mcd_filename); */
......
......@@ -73,7 +73,7 @@ sentence *sentence_read(FILE *f, mcd *mcd_struct)
while(fgets(buffer, 1000, f)){
/* printf("buffer = %s\n", buffer); */
if(feof(f)) break;
if((buffer[0] == '\n') || (buffer[0] == ' ')) break; /* end of the sentence indicated by empty line */
if((buffer[0] == '\n') || (buffer[0] == '\r') || (buffer[0] == ' ')) break; /* end of the sentence indicated by empty line */
w = word_parse_buffer(buffer, mcd_struct);
if(w) sentence_add_word(s, w);
if(word_is_eos(w, mcd_struct)) break;
......@@ -95,7 +95,7 @@ sentence *sentence_read_no_dummy_word(FILE *f, mcd *mcd_struct)
while(fgets(buffer, 1000, f)){
if(feof(f)) break;
if((buffer[0] == '\n') || (buffer[0] == ' ')) break; /* end of the sentence */
if((buffer[0] == '\n') || (buffer[0] == '\r') || (buffer[0] == ' ')) break; /* end of the sentence */
w = word_parse_buffer(buffer, mcd_struct);
sentence_add_word(s, w);
}
......
......@@ -57,7 +57,8 @@ word *word_parse_buffer(char *buffer, mcd *mcd_struct)
int col = 0;
/* remove newline from buffer */
if(buffer[strlen(buffer)-1] == '\n') buffer[strlen(buffer)-1] = '\0';
if(buffer[strlen(buffer)-1] == '\n' || buffer[strlen(buffer)-1] == '\r') buffer[strlen(buffer)-1] = '\0';
if(buffer[strlen(buffer)-2] == '\r') buffer[strlen(buffer)-2] = '\0';
w = word_new(buffer);
token = strtok(buffer, "\t");
......
......@@ -145,7 +145,7 @@ int word_buffer_read_sentence(word_buffer *wb)
int index = 1;
while(fgets(buffer, 10000, word_buffer_get_input_file(wb))){
if((buffer[0] == '\n') || (buffer[0] == ' ') || (buffer[0] == '\t')) continue; /* ignore empty lines */
if((buffer[0] == '\n') || (buffer[0] == ' ') || (buffer[0] == '\t') || (buffer[0] == '\r')) continue; /* ignore empty lines */
if(feof(word_buffer_get_input_file(wb))) break;
w = word_parse_buffer(buffer, word_buffer_get_mcd(wb));
word_set_index(w, index);
......
......@@ -69,7 +69,7 @@ word_emb *word_emb_load_w2v_file(char *file_name)
while (1) {
word[a] = fgetc(f);
if (feof(f) || (word[a] == ' ')) break;
if ((a < w2v_max_w) && (word[a] != '\n')) a++;
if ((a < w2v_max_w) && (word[a] != '\n') && (word[a] != '\r')) a++;
}
word[a] = 0;
hash_add(we->htable, word, word_nb++);
......@@ -126,7 +126,7 @@ int word_emb_number_of_columns_in_file(char *filename)
fgets(buffer, 10000, f);
token = strtok(buffer, " ");
column_nb = 1;
while((token = strtok(NULL , " \n")))
while((token = strtok(NULL , " \n\r")))
column_nb++;
fclose(f);
return column_nb;
......
......@@ -120,7 +120,6 @@ int main(int argc, char *argv[])
ctx->features_model = feat_model_read(ctx->features_model_filename, feat_lib_build(), ctx->verbose);
if(ctx->mode == TRAIN_MODE){
mcd_extract_dico_from_corpus(ctx->mcd_struct, ctx->input_filename);
ctx->vocabs = mcd_build_dico_vec(ctx->mcd_struct);
......@@ -130,6 +129,7 @@ int main(int argc, char *argv[])
mcd_link_to_dico(ctx->mcd_struct, ctx->vocabs, ctx->verbose);
}
feat_model_compute_ranges(ctx->features_model, ctx->mcd_struct, ctx->mvt_nb);
/* in train mode create feature dictionnary for perceptron */
......
......@@ -21,7 +21,7 @@ int queue_read_sentence(queue *bf, FILE *f, mcd *mcd_struct)
while(fgets(buffer, 10000, f)){
if(feof(f)) break;
/* fprintf(stderr, "%s", buffer); */
if((buffer[0] == '\n') || (buffer[0] == ' ') || (buffer[0] == '\t')) break; /* end of the sentence */
if((buffer[0] == '\n') || (buffer[0] == ' ') || (buffer[0] == '\t') || (buffer[0] == '\t')) break; /* end of the sentence */
w = word_parse_buffer(buffer, mcd_struct);
word_set_index(w, index);
index++;
......
......@@ -27,6 +27,7 @@ cff_cutoff_context *cff_cutoff_context_new(void)
{
cff_cutoff_context *ctx = (cff_cutoff_context *)memalloc(sizeof(cff_cutoff_context));
ctx->help = 0;
ctx->verbose = 0;
ctx->program_name = NULL;
ctx->vocabs_filename = NULL;
......
......@@ -19,6 +19,7 @@ perceptron_context *perceptron_context_new(void)
{
perceptron_context *ctx = (perceptron_context *)memalloc(sizeof(perceptron_context));
ctx->help = 0;
ctx->verbose = 0;
ctx->program_name = NULL;
ctx->perc_model_filename = NULL;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment