Skip to content
Snippets Groups Projects
Commit 5646953f authored by Franck Dary's avatar Franck Dary
Browse files

Optimization of Config size and added new types utf8char and utf8string

parent c0f915b2
No related branches found
No related tags found
No related merge requests found
......@@ -16,22 +16,29 @@
#include <string>
#include <vector>
#include <array>
#include <fmt/core.h>
#include <experimental/source_location>
#include "utf8.hpp"
namespace util
{
typedef std::array<char, 4> utf8char;
typedef std::basic_string<utf8char> utf8string;
std::string_view getFilenameFromPath(std::string_view s);
std::vector<std::string_view> splitAsUtf8(std::string_view s);
std::vector<std::string_view> split(std::string_view s, char delimiter);
utf8string splitAsUtf8(std::string_view s);
void warning(std::string_view message, const std::experimental::source_location & location = std::experimental::source_location::current());
void error(std::string_view message, const std::experimental::source_location & location = std::experimental::source_location::current());
void error(const std::exception & e, const std::experimental::source_location & location = std::experimental::source_location::current());
void myThrow(std::string_view message, const std::experimental::source_location & location = std::experimental::source_location::current());
std::string int2HumanStr(int number);
};
template <>
......@@ -46,6 +53,19 @@ struct fmt::formatter<std::experimental::source_location>
}
};
template <>
struct fmt::formatter<util::utf8char>
{
constexpr auto parse(format_parse_context & ctx) { return ctx.begin(); }
template <typename FormatContext>
auto format(const util::utf8char & c, FormatContext & ctx)
{
char * asPtr = (char*)&c;
return format_to(ctx.out(), "{}{}{}{}", asPtr[0] ? asPtr[0] : '\0', asPtr[1] ? asPtr[1] : '\0', asPtr[2] ? asPtr[2] : '\0', asPtr[3] ? asPtr[3] : '\0');
}
};
std::string_view operator+(std::string_view a, std::string_view b);
void operator+=(std::string_view & a, std::string_view b);
......
......@@ -20,24 +20,6 @@ std::string_view getFilenameFromPath(std::string_view s)
return {s.data()+indexOfSlash+1, s.size()-1-indexOfSlash};
}
std::vector<std::string_view> splitAsUtf8(std::string_view s)
{
std::vector<std::string_view> result;
const char * beginPtr = s.data();
const char * currentPtr = beginPtr;
const char * endPtr = s.data()+s.size()-1;
while (true)
try
{
utf8::next(currentPtr, endPtr);
result.emplace_back(beginPtr, currentPtr-beginPtr);
beginPtr = currentPtr;
} catch (std::exception &) {break;}
return result;
}
std::vector<std::string_view> split(std::string_view remaining, char delimiter)
{
std::vector<std::string_view> result;
......@@ -55,6 +37,31 @@ std::vector<std::string_view> split(std::string_view remaining, char delimiter)
return result;
}
utf8string splitAsUtf8(std::string_view s)
{
utf8string result;
const char * beginPtr = s.data();
const char * currentPtr = beginPtr;
const char * endPtr = s.data()+s.size()-1;
if (!utf8::is_valid(beginPtr, endPtr))
myThrow("Not a valid utf8 input");
while (currentPtr < endPtr)
{
utf8::next(currentPtr, endPtr);
if (currentPtr - beginPtr > 4 || currentPtr - beginPtr == 0)
myThrow(fmt::format("Invalid utf8 character at index {}", beginPtr-s.data()));
utf8char c = {};
for (int i = 0; i < currentPtr - beginPtr; i++)
((char*)&c)[i] = beginPtr[i];
beginPtr = currentPtr;
result.push_back(c);
}
return result;
}
void warning(std::string_view message, const std::experimental::source_location & location)
{
fmt::print(stderr, "WARNING ({}) : {}\n", location, message);
......@@ -76,6 +83,21 @@ void myThrow(std::string_view message, const std::experimental::source_location
throw std::invalid_argument(fmt::format("from ({}) {}", location, message));
}
std::string int2HumanStr(int number)
{
std::string nb = std::to_string(number);
std::string result;
for (unsigned int i = 0; i < nb.size(); i++)
{
result.push_back(nb[i]);
if (((nb.size()-i-1) % 3 == 0) && i < nb.size()-1)
result.push_back(' ');
}
return result;
}
};
std::string_view operator+(std::string_view a, std::string_view b)
......
......@@ -10,7 +10,9 @@ int main(int argc, char * argv[])
Config config(argv[3], argv[1], argv[2]);
config.print(stdout);
config.printSize(stderr);
std::scanf("%*c");
return 0;
}
......
......@@ -33,7 +33,7 @@ class Config
std::unordered_map<std::string, int> colName2Index;
std::string rawInput;
std::vector<std::string_view> rawInputUtf8;
util::utf8string rawInputUtf8;
using ReferenceAndHypotheses = std::vector<std::string>;
using Line = std::vector<ReferenceAndHypotheses>;
......@@ -49,6 +49,7 @@ class Config
Config(std::string_view mcdFilename, std::string_view tsvFilename, std::string_view rawFilename);
void print(FILE * dest) const;
void printSize(FILE * dest);
};
#endif
......@@ -106,6 +106,25 @@ void Config::readTSVInput(std::string_view tsvFilename)
std::fclose(file);
}
void Config::printSize(FILE * dest)
{
int rawInputNbElements = rawInput.size();
int rawInputSize = sizeof rawInput + rawInput.capacity() * sizeof rawInput[0];
int rawInputUtf8NbElements = 4*rawInputUtf8.size();
int rawInputUtf8Size = sizeof rawInputUtf8 + rawInputUtf8.capacity()* sizeof rawInputUtf8[0];
int totalSize = rawInputSize + rawInputUtf8Size;
std::string unit = "Mo";
int unitPower = 6;
float unitMultiplier = std::stof(fmt::format("0.{:0^{}}1","",unitPower-1));
fmt::print(dest, "{:<43} : {:<{}.2f} {}\n", fmt::format("{:<20} {:>12} elements", "rawInput", util::int2HumanStr(rawInputNbElements)), unitMultiplier*rawInputSize, 2+11-unitPower, unit);
fmt::print(dest, "{:<43} : {:<{}.2f} {}\n", fmt::format("{:<20} {:>12} elements", "rawInputUtf8", util::int2HumanStr(rawInputUtf8NbElements)), unitMultiplier*rawInputUtf8Size, 2+11-unitPower, unit);
fmt::print(dest, "{:<43} : {:<{}.2f} {}\n", "Total", unitMultiplier*totalSize, 2+11-unitPower, unit);
}
Config::Config(std::string_view mcdFilename, std::string_view tsvFilename, std::string_view rawFilename)
{
if (tsvFilename.empty() and rawFilename.empty())
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment