Skip to content
Snippets Groups Projects
Commit 5969a7c5 authored by Franck Dary's avatar Franck Dary
Browse files

Added project strucutre and classes Dict, File and util

parents
No related branches found
No related tags found
No related merge requests found
build
cmake_minimum_required(VERSION 2.8.7)
project(macaon)
set(CMAKE_VERBOSE_MAKEFILE 0)
set(CMAKE_CXX_STANDARD 11)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/Modules")
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
set(CMAKE_CXX_FLAGS "-Wall -Wextra")
set(CMAKE_CXX_FLAGS_DEBUG "-g3")
set(CMAKE_CXX_FLAGS_RELEASE "-O3")
include_directories(maca_common/include)
add_subdirectory(maca_common)
# - Try to find dynet
# Once done this will define
# DYNET_FOUND - System has dynet
# DYNET_INCLUDE_DIRS - The dynet include directories
# DYNET_LIBRARIES - The libraries needed to use dynet
# DYNET_DEFINITIONS - Compiler switches required for using dynet
find_package(PkgConfig)
pkg_check_modules(PC_DYNET QUIET dynet)
find_path(DYNET_INCLUDE_DIR dynet/model.h
HINTS ${PC_DYNET_INCLUDEDIR} ${PC_DYNET_INCLUDE_DIRS}
PATH_SUFFIXES dynet )
find_library(DYNET_LIBRARY NAMES dynet
HINTS ${PC_DYNET_LIBDIR} ${PC_DYNET_LIBRARY_DIRS} )
include(FindPackageHandleStandardArgs)
# handle the QUIETLY and REQUIRED arguments and set DYNET_FOUND to TRUE
# if all listed variables are TRUE
find_package_handle_standard_args(dynet DEFAULT_MSG
DYNET_LIBRARY DYNET_INCLUDE_DIR)
mark_as_advanced(DYNET_INCLUDE_DIR DYNET_LIBRARY )
set(DYNET_LIBRARIES ${DYNET_LIBRARY} )
set(DYNET_INCLUDE_DIRS ${DYNET_INCLUDE_DIR}/dynet )
FILE(GLOB SOURCES src/*.cpp)
#compiling library
add_library(maca_common STATIC ${SOURCES})
#target_link_libraries(maca_common dynet)
#ifndef DICT__H
#define DICT__H
#include <string>
#include <vector>
#include <map>
class Dict
{
public :
enum Mode
{
OneHot,
Embeddings
};
enum Policy
{
Final,
Modifiable,
FromZero
};
std::string name;
Mode mode;
Policy policy;
private :
int dimension;
std::map< std::string, std::vector<float> > str2vec;
std::string filename;
int oneHotIndex;
private :
void initEmbedding(std::vector<float> & vec);
public :
Dict(Mode mode, Policy policy, std::string filename);
void save();
std::vector<float> * getValue(const std::string & s);
};
#endif
#ifndef FILE__H
#define FILE__H
#include <cstdio>
#include <stack>
#include <string>
#include <functional>
class File
{
public :
static File stdOut;
private :
File(FILE * file);
private :
FILE * file;
std::stack<char> buffer;
bool endHasBeenReached;
std::string filename;
public :
File(const std::string & filename, const std::string & mode);
File(File && model) = delete;
File & operator=(const File & model) = delete;
~File();
char peek();
bool isFinished();
char getChar();
void ungetChar(char c);
FILE * getDescriptor();
const std::string & getName();
char readUntil(char c);
char readUntil(const std::function<bool(char)> & condition);
char readUntil(std::string & dest, const std::function<bool(char)> & condition);
void rewind();
};
#endif
#ifndef UTIL__H
#define UTIL__H
#include <string>
bool isSeparator(char c);
bool isNewline(char c);
std::string getFilenameFromPath(const std::string & s);
unsigned int lengthPrinted(const std::string & s);
bool isNum(char c);
//bool isNum(const std::string & s);
bool endSentence(char c);
bool endLine(char c);
void toLowerCase(std::string & s, unsigned int i);
void toUpperCase(std::string & s, unsigned int i);
#endif
#include "Dict.hpp"
#include "File.hpp"
#include "util.hpp"
Dict::Dict(Mode mode, Policy policy, std::string filename)
{
auto badFormatAndAbort = [&]()
{
fprintf(stderr, "Error (%s) : file %s bad format. Aborting.\n", __func__, filename.c_str());
exit(1);
};
this->mode = mode;
this->policy = policy;
this->filename = filename;
this->oneHotIndex = 0;
File file(filename, "r");
std::string readBuffer;
file.readUntil(name, isSeparator);
file.readUntil(isNum);
if(file.isFinished())
badFormatAndAbort();
file.readUntil(readBuffer, isSeparator);
dimension = std::stoi(readBuffer);
// If policy is FromZero, we don't need to read the current entries
if(this->policy == Policy::FromZero)
return;
while(!file.isFinished())
{
file.readUntil(isSeparator);
readBuffer.clear();
file.readUntil(readBuffer, isSeparator);
str2vec.emplace(readBuffer, std::vector<float>());
auto & vec = str2vec[readBuffer];
for(int i = 0; i < dimension; i++)
{
readBuffer.clear();
file.readUntil(isNum);
if(file.isFinished())
badFormatAndAbort();
file.readUntil(readBuffer, isSeparator);
vec.emplace_back(std::stof(readBuffer));
}
}
}
void Dict::save()
{
// If policy is Final, we didn't change any entry so no need to rewrite the file
if (policy == Policy::Final)
return;
File file(filename, "w");
FILE * fd = file.getDescriptor();
fprintf(fd, "%s\n%d\n", name.c_str(), dimension);
for(auto & it : str2vec)
{
fprintf(fd, "%s\t", it.first.c_str());
for(float value : it.second)
fprintf(fd, "%f\t", value);
fprintf(fd, "\n");
}
}
std::vector<float> * Dict::getValue(const std::string & s)
{
auto it = str2vec.find(s);
if(it != str2vec.end())
return &(it->second);
str2vec.emplace(s, std::vector<float>(dimension, 0.0));
auto & vec = str2vec[s];
if(mode == Mode::OneHot)
vec[oneHotIndex++] = 1.0;
else
initEmbedding(vec);
return &vec;
}
void Dict::initEmbedding(std::vector<float> & vec)
{
vec[0] = 0.0; // just to shut warning up
// Here initialize a new embedding, doing nothing = all zeroes
}
#include "File.hpp"
File File::stdOut(stdout);
File::File(FILE * file)
{
this->file = file;
}
char File::peek()
{
if (!buffer.empty())
return buffer.top();
if (endHasBeenReached)
return EOF;
char c = fgetc(file);
if (c == EOF)
endHasBeenReached = true;
else
ungetc(c, file);
return c;
}
File::File(const std::string & filename, const std::string & mode)
{
this->filename = filename;
endHasBeenReached = false;
if (mode != "r" && mode != "w")
{
printf("\"%s\" is an invalid mode when opening a file\n", mode.c_str());
exit(1);
}
if (filename == "stdin")
{
file = stdin;
return;
}
else if (filename == "stdout")
{
file = stdout;
return;
}
file = fopen(filename.c_str(), mode.c_str());
if (!file)
{
printf("Cannot open file %s\n", filename.c_str());
exit(1);
}
}
File::~File()
{
if (file != stdin && file != stdout)
fclose(file);
}
bool File::isFinished()
{
return buffer.empty() && peek() == EOF;
}
char File::getChar()
{
if (buffer.empty())
return getc(file);
char result = buffer.top();
buffer.pop();
return result;
}
void File::ungetChar(char c)
{
buffer.push(c);
}
FILE * File::getDescriptor()
{
return file;
}
const std::string & File::getName()
{
return filename;
}
char File::readUntil(char c)
{
while (!isFinished() && getChar() != c);
return c;
}
char File::readUntil(const std::function<bool(char)> & condition)
{
char c = 0;
while (!isFinished() && !condition(c = getChar()));
return c;
}
char File::readUntil(std::string & dest, const std::function<bool(char)> & condition)
{
char c = 0;
while (!isFinished() && !condition(c = getChar()))
dest.push_back(c);
return c;
}
void File::rewind()
{
fclose(file);
endHasBeenReached = false;
while (!buffer.empty())
buffer.pop();
file = fopen(filename.c_str(), "r");
if (!file)
{
printf("Cannot open file %s\n", filename.c_str());
exit(1);
}
}
#include "util.hpp"
#include <algorithm>
#include <cstring>
bool isAlpha(char c)
{
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}
bool isNum(char c)
{
return c >= '0' && c <= '9';
}
bool isNum(const std::string & s)
{
for (char c : s)
if (isAlpha(c) || c < 0)
return false;
return true;
}
bool isSeparator(char c)
{
if (c == '_' || c == '\'')
return false;
return (c == EOF) || (!isAlpha(c) && !isNum(c) && (c >= 0));
}
std::string getFilenameFromPath(const std::string & s)
{
std::string result;
for (int i = s.size()-1; i >= 0 && s[i] != '/'; i--)
{
result.push_back(s[s.size()-1]);
}
std::reverse(result.begin(), result.end());
return result;
}
unsigned int lengthPrinted(const std::string & s)
{
float l = 0.0;
for (char c : s)
{
if (c > 0)
l += 1;
if (c < 0)
l += 0.5;
}
return (l - (int)l > 0) ? (unsigned int) l + 1 : (unsigned int) l;
}
bool isUpper(char c)
{
return c >= 'A' && c <= 'Z';
}
bool endSentence(char c)
{
return c == '.' || c == '!' || c == '?' || c == '\n' || c == EOF;
}
bool endLine(char c)
{
return c == '\n' || c == EOF || c == '\r' || c == 10;
}
bool isNewline(char c)
{
return c == 10 || c == 13;
}
void toLowerCase(std::string & s, unsigned int i)
{
// Uncapitalize basic letters
if (s[i] >= 'A' && s[i] <= 'Z')
{
s[i] += -'A' + 'a';
return;
}
// Uncapitalize accentuated letters
if (i == s.size()-1)
return;
unsigned char s1 = s[i], s2 = s[i+1];
if (s1 == 195 && s2 >= 128 && s2 <= 158)
{
s2 += 32;
s[i+1] = s2;
return;
}
}
void toUpperCase(std::string & s, unsigned int i)
{
// Capitalize basic letters
if (s[i] >= 'a' && s[i] <= 'z')
{
s[i] += 'A' - 'a';
return;
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment