Skip to content
Snippets Groups Projects
Commit 52f0a517 authored by Franck Dary's avatar Franck Dary
Browse files

Greatly optimized Config size through the use of boost::flyweight for string

parent adce02c5
No related branches found
No related tags found
No related merge requests found
cmake_minimum_required(VERSION 2.8.7) cmake_minimum_required(VERSION 2.8.7)
project(test_torch) project(test_torch)
add_compile_definitions(BOOST_DISABLE_THREADS)
find_package(Torch REQUIRED) find_package(Torch REQUIRED)
find_package(fmt REQUIRED) find_package(fmt REQUIRED)
......
...@@ -39,35 +39,6 @@ void myThrow(std::string_view message, const std::experimental::source_location ...@@ -39,35 +39,6 @@ void myThrow(std::string_view message, const std::experimental::source_location
std::string int2HumanStr(int number); std::string int2HumanStr(int number);
template<typename T>
std::size_t memorySize(const T & val)
{
myThrow("Type not yet supported");
return sizeof val;
}
inline std::size_t memorySize(int val)
{
return sizeof val;
}
template<typename T>
std::size_t memorySize(const std::basic_string<T> & val)
{
return sizeof val + val.capacity() * sizeof (T);
}
template<typename T>
std::size_t memorySize(const std::vector<T> & vec)
{
std::size_t result = sizeof vec + sizeof (T) * (vec.capacity()-vec.size());
for (auto & elem : vec)
result += memorySize(elem);
return result;
}
}; };
template <> template <>
......
...@@ -10,8 +10,9 @@ int main(int argc, char * argv[]) ...@@ -10,8 +10,9 @@ int main(int argc, char * argv[])
Config config(argv[3], argv[1], argv[2]); Config config(argv[3], argv[1], argv[2]);
config.printSize(stderr); config.print(stdout);
fmt::print(stderr, "ok\n");
std::scanf("%*c"); std::scanf("%*c");
return 0; return 0;
......
...@@ -18,14 +18,10 @@ ...@@ -18,14 +18,10 @@
#include <vector> #include <vector>
#include <unordered_map> #include <unordered_map>
#include "util.hpp" #include "util.hpp"
#include <boost/flyweight.hpp>
class Config; class Config;
namespace util
{
std::size_t memorySize(const Config & c);
};
class Config class Config
{ {
public : public :
...@@ -34,6 +30,8 @@ class Config ...@@ -34,6 +30,8 @@ class Config
static constexpr const char * EOSSymbol1 = "1"; static constexpr const char * EOSSymbol1 = "1";
static constexpr const char * EOSSymbol0 = "0"; static constexpr const char * EOSSymbol0 = "0";
static constexpr int nbHypothesesMax = 1;
private : private :
std::vector<std::string> colIndex2Name; std::vector<std::string> colIndex2Name;
...@@ -42,9 +40,8 @@ class Config ...@@ -42,9 +40,8 @@ class Config
std::string rawInput; std::string rawInput;
util::utf8string rawInputUtf8; util::utf8string rawInputUtf8;
using ReferenceAndHypotheses = std::vector<std::string>; int nbColumns;
using Line = std::vector<ReferenceAndHypotheses>; std::vector<boost::flyweight<std::string>> lines;
std::vector<Line> lines;
private : private :
...@@ -52,13 +49,16 @@ class Config ...@@ -52,13 +49,16 @@ class Config
void readRawInput(std::string_view rawFilename); void readRawInput(std::string_view rawFilename);
void readTSVInput(std::string_view tsvFilename); void readTSVInput(std::string_view tsvFilename);
friend std::size_t util::memorySize(const Config &);
public : public :
Config(std::string_view mcdFilename, std::string_view tsvFilename, std::string_view rawFilename); Config(std::string_view mcdFilename, std::string_view tsvFilename, std::string_view rawFilename);
void print(FILE * dest) const; void print(FILE * dest);
void printSize(FILE * dest); void addLine();
boost::flyweight<std::string> & get(const std::string & colName, int lineIndex, int hypothesisIndex);
boost::flyweight<std::string> & get(int colIndex, int lineIndex, int hypothesisIndex);
boost::flyweight<std::string> & getLastNotEmpty(const std::string & colName, int lineIndex);
boost::flyweight<std::string> & getLastNotEmpty(int colIndex, int lineIndex);
std::size_t getNbLines() const;
}; };
#endif #endif
...@@ -32,6 +32,8 @@ void Config::readMCD(std::string_view mcdFilename) ...@@ -32,6 +32,8 @@ void Config::readMCD(std::string_view mcdFilename)
util::myThrow(fmt::format("mcd '{}' must not contain column '{}'", mcdFilename, EOSColName)); util::myThrow(fmt::format("mcd '{}' must not contain column '{}'", mcdFilename, EOSColName));
colIndex2Name.emplace_back(EOSColName); colIndex2Name.emplace_back(EOSColName);
colName2Index.emplace(EOSColName, colIndex2Name.size()-1); colName2Index.emplace(EOSColName, colIndex2Name.size()-1);
nbColumns = colIndex2Name.size();
} }
void Config::readRawInput(std::string_view rawFilename) void Config::readRawInput(std::string_view rawFilename)
...@@ -73,7 +75,7 @@ void Config::readTSVInput(std::string_view tsvFilename) ...@@ -73,7 +75,7 @@ void Config::readTSVInput(std::string_view tsvFilename)
if (!inputHasBeenRead) if (!inputHasBeenRead)
continue; continue;
lines.back()[colName2Index[EOSColName]][0] = EOSSymbol1; get(EOSColName, getNbLines()-1, 0) = EOSSymbol1;
continue; continue;
} }
...@@ -91,41 +93,17 @@ void Config::readTSVInput(std::string_view tsvFilename) ...@@ -91,41 +93,17 @@ void Config::readTSVInput(std::string_view tsvFilename)
if ((int)splited.size() != usualNbCol) if ((int)splited.size() != usualNbCol)
util::myThrow(fmt::format("in file {} line {} is invalid, it shoud have {} columns", tsvFilename, line, usualNbCol)); util::myThrow(fmt::format("in file {} line {} is invalid, it shoud have {} columns", tsvFilename, line, usualNbCol));
lines.emplace_back(); addLine();
for (unsigned int i = 0; i < colIndex2Name.size(); i++) get(EOSColName, getNbLines()-1, 0) = EOSSymbol0;
{
lines.back().emplace_back();
lines.back().back().emplace_back("");
}
lines.back()[colName2Index[EOSColName]][0] = EOSSymbol0;
for (unsigned int i = 0; i < splited.size(); i++) for (unsigned int i = 0; i < splited.size(); i++)
if (i < colIndex2Name.size()) if (i < colIndex2Name.size())
lines.back()[i][0] = splited[i]; get(i, getNbLines()-1, 0) = std::string(splited[i]);
} }
std::fclose(file); std::fclose(file);
} }
void Config::printSize(FILE * dest)
{
int rawInputSize = util::memorySize(rawInput);
int rawInputUtf8Size = util::memorySize(rawInputUtf8);
int linesSize = util::memorySize(lines);
int totalSize = rawInputSize + rawInputUtf8Size + linesSize;
std::string unit = "Mo";
int unitPower = 6;
float unitMultiplier = std::stof(fmt::format("0.{:0^{}}1","",unitPower-1));
fmt::print(dest, "{:<15} : {:<{}.2f} {}\n", "rawInput", unitMultiplier*rawInputSize, 2+11-unitPower, unit);
fmt::print(dest, "{:<15} : {:<{}.2f} {}\n", "rawInputUtf8", unitMultiplier*rawInputUtf8Size, 2+11-unitPower, unit);
fmt::print(dest, "{:<15} : {:<{}.2f} {}\n", "lines", unitMultiplier*linesSize, 2+11-unitPower, unit);
fmt::print(dest, "{:<15} : {:<{}.2f} {}\n", "Total", unitMultiplier*totalSize, 2+11-unitPower, unit);
}
Config::Config(std::string_view mcdFilename, std::string_view tsvFilename, std::string_view rawFilename) Config::Config(std::string_view mcdFilename, std::string_view tsvFilename, std::string_view rawFilename)
{ {
if (tsvFilename.empty() and rawFilename.empty()) if (tsvFilename.empty() and rawFilename.empty())
...@@ -142,19 +120,49 @@ Config::Config(std::string_view mcdFilename, std::string_view tsvFilename, std:: ...@@ -142,19 +120,49 @@ Config::Config(std::string_view mcdFilename, std::string_view tsvFilename, std::
readTSVInput(tsvFilename); readTSVInput(tsvFilename);
} }
void Config::print(FILE * dest) const void Config::print(FILE * dest)
{ {
for (auto & line : lines) for (unsigned int line = 0; line < getNbLines(); line++)
{ {
for (unsigned int i = 0; i < line.size()-1; i++) for (int i = 0; i < nbColumns-1; i++)
fmt::print(dest, "{}{}", line[i].back(), i < line.size()-2 ? "\t" : "\n"); fmt::print(dest, "{}{}", getLastNotEmpty(i, line).get(), i < nbColumns-2 ? "\t" : "\n");
if (line[colName2Index.at(EOSColName)].back() == EOSSymbol1) if (getLastNotEmpty(EOSColName, line) == EOSSymbol1)
fmt::print(dest, "\n"); fmt::print(dest, "\n");
} }
} }
std::size_t util::memorySize(const Config & c) void Config::addLine()
{
lines.resize(lines.size() + nbColumns*(nbHypothesesMax+1));
}
boost::flyweight<std::string> & Config::get(const std::string & colName, int lineIndex, int hypothesisIndex)
{
return get(colName2Index[colName], lineIndex, hypothesisIndex);
}
boost::flyweight<std::string> & Config::get(int colIndex, int lineIndex, int hypothesisIndex)
{
return lines[lineIndex * nbColumns * (nbHypothesesMax+1) + colIndex * (nbHypothesesMax+1) + hypothesisIndex];
}
boost::flyweight<std::string> & Config::getLastNotEmpty(int colIndex, int lineIndex)
{
int baseIndex = lineIndex * nbColumns * (nbHypothesesMax+1) + colIndex * (nbHypothesesMax+1);
for (int i = nbHypothesesMax; i > 0; --i)
if (!lines[baseIndex+i].get().empty())
return lines[baseIndex+i];
return lines[baseIndex];
}
boost::flyweight<std::string> & Config::getLastNotEmpty(const std::string & colName, int lineIndex)
{
return getLastNotEmpty(colName2Index[colName], lineIndex);
}
std::size_t Config::getNbLines() const
{ {
return sizeof c + memorySize(c.rawInput) + memorySize(c.rawInputUtf8) + memorySize(c.lines) + memorySize(c.colIndex2Name) + memorySize(c.colName2Index); return lines.size() / (nbColumns * (nbHypothesesMax+1));
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment