Skip to content
Snippets Groups Projects
Commit 5646953f authored by Franck Dary's avatar Franck Dary
Browse files

Optimization of Config size and added new types utf8char and utf8string

parent c0f915b2
No related branches found
No related tags found
No related merge requests found
...@@ -16,22 +16,29 @@ ...@@ -16,22 +16,29 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include <array>
#include <fmt/core.h> #include <fmt/core.h>
#include <experimental/source_location> #include <experimental/source_location>
#include "utf8.hpp" #include "utf8.hpp"
namespace util namespace util
{ {
typedef std::array<char, 4> utf8char;
typedef std::basic_string<utf8char> utf8string;
std::string_view getFilenameFromPath(std::string_view s); std::string_view getFilenameFromPath(std::string_view s);
std::vector<std::string_view> splitAsUtf8(std::string_view s);
std::vector<std::string_view> split(std::string_view s, char delimiter); std::vector<std::string_view> split(std::string_view s, char delimiter);
utf8string splitAsUtf8(std::string_view s);
void warning(std::string_view message, const std::experimental::source_location & location = std::experimental::source_location::current()); void warning(std::string_view message, const std::experimental::source_location & location = std::experimental::source_location::current());
void error(std::string_view message, const std::experimental::source_location & location = std::experimental::source_location::current()); void error(std::string_view message, const std::experimental::source_location & location = std::experimental::source_location::current());
void error(const std::exception & e, const std::experimental::source_location & location = std::experimental::source_location::current()); void error(const std::exception & e, const std::experimental::source_location & location = std::experimental::source_location::current());
void myThrow(std::string_view message, const std::experimental::source_location & location = std::experimental::source_location::current()); void myThrow(std::string_view message, const std::experimental::source_location & location = std::experimental::source_location::current());
std::string int2HumanStr(int number);
}; };
template <> template <>
...@@ -46,6 +53,19 @@ struct fmt::formatter<std::experimental::source_location> ...@@ -46,6 +53,19 @@ struct fmt::formatter<std::experimental::source_location>
} }
}; };
template <>
struct fmt::formatter<util::utf8char>
{
constexpr auto parse(format_parse_context & ctx) { return ctx.begin(); }
template <typename FormatContext>
auto format(const util::utf8char & c, FormatContext & ctx)
{
char * asPtr = (char*)&c;
return format_to(ctx.out(), "{}{}{}{}", asPtr[0] ? asPtr[0] : '\0', asPtr[1] ? asPtr[1] : '\0', asPtr[2] ? asPtr[2] : '\0', asPtr[3] ? asPtr[3] : '\0');
}
};
std::string_view operator+(std::string_view a, std::string_view b); std::string_view operator+(std::string_view a, std::string_view b);
void operator+=(std::string_view & a, std::string_view b); void operator+=(std::string_view & a, std::string_view b);
......
...@@ -20,24 +20,6 @@ std::string_view getFilenameFromPath(std::string_view s) ...@@ -20,24 +20,6 @@ std::string_view getFilenameFromPath(std::string_view s)
return {s.data()+indexOfSlash+1, s.size()-1-indexOfSlash}; return {s.data()+indexOfSlash+1, s.size()-1-indexOfSlash};
} }
std::vector<std::string_view> splitAsUtf8(std::string_view s)
{
std::vector<std::string_view> result;
const char * beginPtr = s.data();
const char * currentPtr = beginPtr;
const char * endPtr = s.data()+s.size()-1;
while (true)
try
{
utf8::next(currentPtr, endPtr);
result.emplace_back(beginPtr, currentPtr-beginPtr);
beginPtr = currentPtr;
} catch (std::exception &) {break;}
return result;
}
std::vector<std::string_view> split(std::string_view remaining, char delimiter) std::vector<std::string_view> split(std::string_view remaining, char delimiter)
{ {
std::vector<std::string_view> result; std::vector<std::string_view> result;
...@@ -55,6 +37,31 @@ std::vector<std::string_view> split(std::string_view remaining, char delimiter) ...@@ -55,6 +37,31 @@ std::vector<std::string_view> split(std::string_view remaining, char delimiter)
return result; return result;
} }
utf8string splitAsUtf8(std::string_view s)
{
utf8string result;
const char * beginPtr = s.data();
const char * currentPtr = beginPtr;
const char * endPtr = s.data()+s.size()-1;
if (!utf8::is_valid(beginPtr, endPtr))
myThrow("Not a valid utf8 input");
while (currentPtr < endPtr)
{
utf8::next(currentPtr, endPtr);
if (currentPtr - beginPtr > 4 || currentPtr - beginPtr == 0)
myThrow(fmt::format("Invalid utf8 character at index {}", beginPtr-s.data()));
utf8char c = {};
for (int i = 0; i < currentPtr - beginPtr; i++)
((char*)&c)[i] = beginPtr[i];
beginPtr = currentPtr;
result.push_back(c);
}
return result;
}
void warning(std::string_view message, const std::experimental::source_location & location) void warning(std::string_view message, const std::experimental::source_location & location)
{ {
fmt::print(stderr, "WARNING ({}) : {}\n", location, message); fmt::print(stderr, "WARNING ({}) : {}\n", location, message);
...@@ -76,6 +83,21 @@ void myThrow(std::string_view message, const std::experimental::source_location ...@@ -76,6 +83,21 @@ void myThrow(std::string_view message, const std::experimental::source_location
throw std::invalid_argument(fmt::format("from ({}) {}", location, message)); throw std::invalid_argument(fmt::format("from ({}) {}", location, message));
} }
std::string int2HumanStr(int number)
{
std::string nb = std::to_string(number);
std::string result;
for (unsigned int i = 0; i < nb.size(); i++)
{
result.push_back(nb[i]);
if (((nb.size()-i-1) % 3 == 0) && i < nb.size()-1)
result.push_back(' ');
}
return result;
}
}; };
std::string_view operator+(std::string_view a, std::string_view b) std::string_view operator+(std::string_view a, std::string_view b)
......
...@@ -10,7 +10,9 @@ int main(int argc, char * argv[]) ...@@ -10,7 +10,9 @@ int main(int argc, char * argv[])
Config config(argv[3], argv[1], argv[2]); Config config(argv[3], argv[1], argv[2]);
config.print(stdout); config.printSize(stderr);
std::scanf("%*c");
return 0; return 0;
} }
......
...@@ -33,7 +33,7 @@ class Config ...@@ -33,7 +33,7 @@ class Config
std::unordered_map<std::string, int> colName2Index; std::unordered_map<std::string, int> colName2Index;
std::string rawInput; std::string rawInput;
std::vector<std::string_view> rawInputUtf8; util::utf8string rawInputUtf8;
using ReferenceAndHypotheses = std::vector<std::string>; using ReferenceAndHypotheses = std::vector<std::string>;
using Line = std::vector<ReferenceAndHypotheses>; using Line = std::vector<ReferenceAndHypotheses>;
...@@ -49,6 +49,7 @@ class Config ...@@ -49,6 +49,7 @@ class Config
Config(std::string_view mcdFilename, std::string_view tsvFilename, std::string_view rawFilename); Config(std::string_view mcdFilename, std::string_view tsvFilename, std::string_view rawFilename);
void print(FILE * dest) const; void print(FILE * dest) const;
void printSize(FILE * dest);
}; };
#endif #endif
...@@ -106,6 +106,25 @@ void Config::readTSVInput(std::string_view tsvFilename) ...@@ -106,6 +106,25 @@ void Config::readTSVInput(std::string_view tsvFilename)
std::fclose(file); std::fclose(file);
} }
void Config::printSize(FILE * dest)
{
int rawInputNbElements = rawInput.size();
int rawInputSize = sizeof rawInput + rawInput.capacity() * sizeof rawInput[0];
int rawInputUtf8NbElements = 4*rawInputUtf8.size();
int rawInputUtf8Size = sizeof rawInputUtf8 + rawInputUtf8.capacity()* sizeof rawInputUtf8[0];
int totalSize = rawInputSize + rawInputUtf8Size;
std::string unit = "Mo";
int unitPower = 6;
float unitMultiplier = std::stof(fmt::format("0.{:0^{}}1","",unitPower-1));
fmt::print(dest, "{:<43} : {:<{}.2f} {}\n", fmt::format("{:<20} {:>12} elements", "rawInput", util::int2HumanStr(rawInputNbElements)), unitMultiplier*rawInputSize, 2+11-unitPower, unit);
fmt::print(dest, "{:<43} : {:<{}.2f} {}\n", fmt::format("{:<20} {:>12} elements", "rawInputUtf8", util::int2HumanStr(rawInputUtf8NbElements)), unitMultiplier*rawInputUtf8Size, 2+11-unitPower, unit);
fmt::print(dest, "{:<43} : {:<{}.2f} {}\n", "Total", unitMultiplier*totalSize, 2+11-unitPower, unit);
}
Config::Config(std::string_view mcdFilename, std::string_view tsvFilename, std::string_view rawFilename) Config::Config(std::string_view mcdFilename, std::string_view tsvFilename, std::string_view rawFilename)
{ {
if (tsvFilename.empty() and rawFilename.empty()) if (tsvFilename.empty() and rawFilename.empty())
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment