From d41dffa59a55fdf8032507c3d519c4de3fe95d47 Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Sun, 15 Dec 2019 15:55:09 +0100 Subject: [PATCH] SubConfig working properly --- CMakeLists.txt | 6 +-- common/include/utf8string.hpp | 66 ++++++++++++++++++++++++++ common/include/util.hpp | 50 ++++++++++---------- common/src/utf8string.cpp | 67 +++++++++++++++++++++++++++ common/src/util.cpp | 36 +++----------- dev/src/dev.cpp | 4 +- reading_machine/include/Config.hpp | 14 ++++-- reading_machine/include/SubConfig.hpp | 2 +- reading_machine/src/Config.cpp | 33 +++++++++++-- reading_machine/src/SubConfig.cpp | 26 ++--------- 10 files changed, 215 insertions(+), 89 deletions(-) create mode 100644 common/include/utf8string.hpp create mode 100644 common/src/utf8string.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index e305461..1bef268 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,12 +15,12 @@ set(CMAKE_VERBOSE_MAKEFILE 0) set(CMAKE_CXX_STANDARD 20) if(NOT CMAKE_BUILD_TYPE) - set(CMAKE_BUILD_TYPE Debug) -# set(CMAKE_BUILD_TYPE Release) +# set(CMAKE_BUILD_TYPE Debug) + set(CMAKE_BUILD_TYPE Release) endif() set(CMAKE_CXX_FLAGS "-Wall -Wextra") -set(CMAKE_CXX_FLAGS_DEBUG "-g3 -Ofast") +set(CMAKE_CXX_FLAGS_DEBUG "-g3") set(CMAKE_CXX_FLAGS_RELEASE "-Ofast") include_directories(common/include) diff --git a/common/include/utf8string.hpp b/common/include/utf8string.hpp new file mode 100644 index 0000000..0a3ff54 --- /dev/null +++ b/common/include/utf8string.hpp @@ -0,0 +1,66 @@ +#ifndef UTF8STRING__H +#define UTF8STRING__H + +#include <string> +#include <vector> +#include <array> +#include <fmt/core.h> + +namespace util +{ + +class utf8char : public std::array<char, 4> +{ + public : + + utf8char(); + utf8char & operator=(char other); + utf8char & operator=(const std::string & other); + bool operator==(char other); + bool operator==(const std::string & other); + bool operator!=(char other); +}; + +class utf8string : public std::vector<utf8char> +{ + public : + + utf8string & operator=(const std::string & other); + utf8string & operator=(const char * const other); + bool operator==(const std::string & other); +}; + +}; + +template <> +struct fmt::formatter<util::utf8char> +{ + constexpr auto parse(format_parse_context & ctx) { return ctx.begin(); } + + template <typename FormatContext> + auto format(const util::utf8char & c, FormatContext & ctx) + { + std::string result = ""; + for (char cc : c) + if (cc) + result += cc; + return format_to(ctx.out(), "{}", result); + } +}; + +template <> +struct fmt::formatter<util::utf8string> +{ + constexpr auto parse(format_parse_context & ctx) { return ctx.begin(); } + + template <typename FormatContext> + auto format(const util::utf8string & s, FormatContext & ctx) + { + std::string result; + for (auto & c : s) + result = fmt::format("{}{}", result, c); + return format_to(ctx.out(), "{}", result); + } +}; + +#endif diff --git a/common/include/util.hpp b/common/include/util.hpp index 3351b45..17d4ee8 100644 --- a/common/include/util.hpp +++ b/common/include/util.hpp @@ -22,50 +22,53 @@ #include <experimental/source_location> #include <boost/flyweight.hpp> #include "utf8.hpp" +#include "utf8string.hpp" namespace util { -typedef std::array<char, 4> utf8char; -typedef std::basic_string<utf8char> utf8string; - -std::string_view getFilenameFromPath(std::string_view s); - -std::vector<std::string_view> split(std::string_view s, char delimiter); -utf8string splitAsUtf8(std::string_view s); void warning(std::string_view message, const std::experimental::source_location & location = std::experimental::source_location::current()); void error(std::string_view message, const std::experimental::source_location & location = std::experimental::source_location::current()); void error(const std::exception & e, const std::experimental::source_location & location = std::experimental::source_location::current()); void myThrow(std::string_view message, const std::experimental::source_location & location = std::experimental::source_location::current()); +std::string_view getFilenameFromPath(std::string_view s); + +std::vector<std::string_view> split(std::string_view s, char delimiter); + +utf8string splitAsUtf8(std::string_view s); + std::string int2HumanStr(int number); -bool isEmpty(const std::string & s); -bool isEmpty(const boost::flyweight<std::string> & s); -}; +template <typename T> +bool isEmpty(const std::vector<T> & s) +{ + return s.empty(); +} -template <> -struct fmt::formatter<std::experimental::source_location> +template <typename T> +bool isEmpty(const std::basic_string<T> & s) { - constexpr auto parse(format_parse_context & ctx) { return ctx.begin(); } + return s.empty(); +} + +template <typename T> +bool isEmpty(const boost::flyweight<T> & s) +{ + return isEmpty(s.get()); +} - template <typename FormatContext> - auto format(const std::experimental::source_location & d, FormatContext & ctx) - { - return format_to(ctx.out(), "{},l.{},'{}'", util::getFilenameFromPath(d.file_name()), d.line(), d.function_name()); - } }; template <> -struct fmt::formatter<util::utf8char> +struct fmt::formatter<std::experimental::source_location> { constexpr auto parse(format_parse_context & ctx) { return ctx.begin(); } template <typename FormatContext> - auto format(const util::utf8char & c, FormatContext & ctx) + auto format(const std::experimental::source_location & d, FormatContext & ctx) { - char * asPtr = (char*)&c; - return format_to(ctx.out(), "{}{}{}{}", asPtr[0] ? asPtr[0] : '\0', asPtr[1] ? asPtr[1] : '\0', asPtr[2] ? asPtr[2] : '\0', asPtr[3] ? asPtr[3] : '\0'); + return format_to(ctx.out(), "{},l.{},'{}'", util::getFilenameFromPath(d.file_name()), d.line(), d.function_name()); } }; @@ -81,7 +84,4 @@ struct fmt::formatter<boost::flyweight<T>> } }; -std::string_view operator+(std::string_view a, std::string_view b); -void operator+=(std::string_view & a, std::string_view b); - #endif diff --git a/common/src/utf8string.cpp b/common/src/utf8string.cpp new file mode 100644 index 0000000..688b607 --- /dev/null +++ b/common/src/utf8string.cpp @@ -0,0 +1,67 @@ +#include "utf8string.hpp" +#include "util.hpp" + +util::utf8char::utf8char() +{ + for (auto & val : (*this)) + val = '\0'; +} + +util::utf8char & util::utf8char::operator=(char other) +{ + (*this)[0] = other; + return *this; +} + +util::utf8char & util::utf8char::operator=(const std::string & other) +{ + auto splited = splitAsUtf8(other); + if (splited.size() > 1) + myThrow(fmt::format("Assigning invalid utf8 character '{}'", other)); + return *this = splited[0]; +} + +bool util::utf8char::operator==(const std::string & other) +{ + auto splited = splitAsUtf8(other); + if (splited.size() > 1) + myThrow(fmt::format("Comparing with invalid utf8 character '{}'", other)); + return *this == splited[0]; +} + +bool util::utf8char::operator==(char other) +{ + return (*this)[0] == other && !(*this)[1] && !(*this)[2] && !(*this)[3]; +} + +bool util::utf8char::operator!=(char other) +{ + return ! (*this==other); +} + +util::utf8string & util::utf8string::operator=(const std::string & other) +{ + auto splited = splitAsUtf8(other); + resize(splited.size()); + for (unsigned int i = 0; i < splited.size(); i++) + (*this)[i] = splited[i]; + return *this; +} + +util::utf8string & util::utf8string::operator=(const char * const other) +{ + return operator=(std::string(other)); +} + +bool util::utf8string::operator==(const std::string & other) +{ + if (size() != other.size()) + return false; + + for (unsigned int i = 0; i < other.size(); i++) + if ((*this)[i] != other[i]) + return false; + + return true; +} + diff --git a/common/src/util.cpp b/common/src/util.cpp index 0c53e98..288b9a9 100644 --- a/common/src/util.cpp +++ b/common/src/util.cpp @@ -42,17 +42,18 @@ utf8string splitAsUtf8(std::string_view s) utf8string result; const char * beginPtr = s.data(); const char * currentPtr = beginPtr; - const char * endPtr = s.data()+s.size()-1; - - if (!utf8::is_valid(beginPtr, endPtr)) - myThrow("Not a valid utf8 input"); + const char * endPtr = s.data()+s.size(); while (currentPtr < endPtr) { - utf8::next(currentPtr, endPtr); + try {utf8::next(currentPtr, endPtr);} + catch (std::exception &) + { + break; + } if (currentPtr - beginPtr > 4 || currentPtr - beginPtr == 0) myThrow(fmt::format("Invalid utf8 character at index {}", beginPtr-s.data())); - utf8char c = {}; + utf8char c; for (int i = 0; i < currentPtr - beginPtr; i++) ((char*)&c)[i] = beginPtr[i]; beginPtr = currentPtr; @@ -98,28 +99,5 @@ std::string int2HumanStr(int number) return result; } -bool isEmpty(const std::string & s) -{ - return s.empty(); -} - -bool isEmpty(const boost::flyweight<std::string> & s) -{ - return s.get().empty(); -} - }; -std::string_view operator+(std::string_view a, std::string_view b) -{ - if (b.data() <= a.data()) - util::myThrow(fmt::format("std::string_view a+b with b <= a")); - - return std::string_view(a.data(), b.data()-a.data()+b.size()); -} - -void operator+=(std::string_view & a, std::string_view b) -{ - a = a + b; -} - diff --git a/dev/src/dev.cpp b/dev/src/dev.cpp index 5ebd435..9eba067 100644 --- a/dev/src/dev.cpp +++ b/dev/src/dev.cpp @@ -13,10 +13,10 @@ int main(int argc, char * argv[]) std::vector<SubConfig> configs; - for (int i = 0; i < 1; i++) + for (int i = 0; i < 2; i++) configs.emplace_back(config); - + configs[0].print(stdout); fmt::print(stderr, "ok\n"); std::scanf("%*c"); diff --git a/reading_machine/include/Config.hpp b/reading_machine/include/Config.hpp index a834496..1819615 100644 --- a/reading_machine/include/Config.hpp +++ b/reading_machine/include/Config.hpp @@ -5,6 +5,7 @@ #include <string> #include <vector> #include <boost/flyweight.hpp> +#include "util.hpp" class Config { @@ -17,12 +18,13 @@ class Config private : - //using String = boost::flyweight<std::string>; - using String = std::string; - std::vector<String> lines; + using String = boost::flyweight<std::string>; + using Utf8String = boost::flyweight<util::utf8string>; using ValueIterator = std::vector<String>::iterator; using ConstValueIterator = std::vector<String>::const_iterator; + std::vector<String> lines; + protected : virtual std::size_t getNbColumns() const = 0; @@ -39,15 +41,19 @@ class Config void resizeLines(unsigned int nbLines); String & get(const std::string & colName, int lineIndex, int hypothesisIndex); String & get(int colIndex, int lineIndex, int hypothesisIndex); + const String & getConst(const std::string & colName, int lineIndex, int hypothesisIndex) const; + const String & getConst(int colIndex, int lineIndex, int hypothesisIndex) const; String & getLastNotEmpty(const std::string & colName, int lineIndex); String & getLastNotEmpty(int colIndex, int lineIndex); + const String & getLastNotEmptyConst(const std::string & colName, int lineIndex) const; + const String & getLastNotEmptyConst(int colIndex, int lineIndex) const; ValueIterator getIterator(int colIndex, int lineIndex, int hypothesisIndex); ConstValueIterator getConstIterator(int colIndex, int lineIndex, int hypothesisIndex) const; public : virtual ~Config() {} - void print(FILE * dest); + void print(FILE * dest) const; }; #endif diff --git a/reading_machine/include/SubConfig.hpp b/reading_machine/include/SubConfig.hpp index daaa0cc..5a6db8e 100644 --- a/reading_machine/include/SubConfig.hpp +++ b/reading_machine/include/SubConfig.hpp @@ -25,7 +25,7 @@ class SubConfig : public Config public : SubConfig(BaseConfig & model); - void update(); + bool update(); }; #endif diff --git a/reading_machine/src/Config.cpp b/reading_machine/src/Config.cpp index cddcdfe..839cca9 100644 --- a/reading_machine/src/Config.cpp +++ b/reading_machine/src/Config.cpp @@ -27,23 +27,33 @@ Config::String & Config::get(const std::string & colName, int lineIndex, int hyp return get(getColIndex(colName), lineIndex, hypothesisIndex); } +const Config::String & Config::getConst(const std::string & colName, int lineIndex, int hypothesisIndex) const +{ + return getConst(getColIndex(colName), lineIndex, hypothesisIndex); +} + Config::String & Config::get(int colIndex, int lineIndex, int hypothesisIndex) { return *getIterator(colIndex, lineIndex, hypothesisIndex); } +const Config::String & Config::getConst(int colIndex, int lineIndex, int hypothesisIndex) const +{ + return *getConstIterator(colIndex, lineIndex, hypothesisIndex); +} + std::size_t Config::getNbLines() const { return lines.size() / getIndexOfCol(getNbColumns()); } -void Config::print(FILE * dest) +void Config::print(FILE * dest) const { for (unsigned int line = 0; line < getNbLines(); line++) { for (unsigned int i = 0; i < getNbColumns()-1; i++) - fmt::print(dest, "{}{}", getLastNotEmpty(i, line), i < getNbColumns()-2 ? "\t" : "\n"); - if (getLastNotEmpty(EOSColName, line) == EOSSymbol1) + fmt::print(dest, "{}{}", getLastNotEmptyConst(i, getFirstLineIndex()+line), i < getNbColumns()-2 ? "\t" : "\n"); + if (getLastNotEmptyConst(EOSColName, getFirstLineIndex()+line) == EOSSymbol1) fmt::print(dest, "\n"); } } @@ -51,6 +61,18 @@ void Config::print(FILE * dest) Config::String & Config::getLastNotEmpty(int colIndex, int lineIndex) { int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex); + + for (int i = nbHypothesesMax; i > 0; --i) + if (!util::isEmpty(lines[baseIndex+i])) + return lines[baseIndex+i]; + + return lines[baseIndex]; +} + +const Config::String & Config::getLastNotEmptyConst(int colIndex, int lineIndex) const +{ + int baseIndex = getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex); + for (int i = nbHypothesesMax; i > 0; --i) if (!util::isEmpty(lines[baseIndex+i])) return lines[baseIndex+i]; @@ -63,6 +85,11 @@ Config::String & Config::getLastNotEmpty(const std::string & colName, int lineIn return getLastNotEmpty(getColIndex(colName), lineIndex); } +const Config::String & Config::getLastNotEmptyConst(const std::string & colName, int lineIndex) const +{ + return getLastNotEmptyConst(getColIndex(colName), lineIndex); +} + Config::ValueIterator Config::getIterator(int colIndex, int lineIndex, int hypothesisIndex) { return lines.begin() + getIndexOfLine(lineIndex-getFirstLineIndex()) + getIndexOfCol(colIndex) + hypothesisIndex; diff --git a/reading_machine/src/SubConfig.cpp b/reading_machine/src/SubConfig.cpp index 75c6de6..af9c3af 100644 --- a/reading_machine/src/SubConfig.cpp +++ b/reading_machine/src/SubConfig.cpp @@ -4,47 +4,33 @@ SubConfig::SubConfig(BaseConfig & model) : model(model) { firstLineIndex = 0; update(); - update(); - print(stdout); } -void SubConfig::update() +bool SubConfig::update() { - fmt::print(stderr, "Begin\n"); unsigned int currentLastLineIndex = firstLineIndex + getNbLines(); if (currentLastLineIndex >= model.getNbLines()-1) - return; + return false; - unsigned int newFirstLineIndex = 0.8*currentLastLineIndex; + unsigned int newFirstLineIndex = firstLineIndex + 0.8*(currentLastLineIndex-firstLineIndex); unsigned int newLastLineIndex = std::min(newFirstLineIndex + spanSize, model.getNbLines()); unsigned int newLineNumber = newLastLineIndex - newFirstLineIndex; - fmt::print(stderr, "FirstlineIndex = {}\n", firstLineIndex); - fmt::print(stderr, "newFirstlineIndex = {}\n", newFirstLineIndex); - if (getNbLines() < newLineNumber) - { - fmt::print(stderr, "Resizing because {} < {}\n", getNbLines(), newLineNumber); resizeLines(newLineNumber); - } { auto linesBegin = getIterator(0, firstLineIndex, 0); auto firstToSave = getConstIterator(0, newFirstLineIndex, 0); auto lastToSave = getConstIterator(0, currentLastLineIndex, 0); - fmt::print(stderr, "Copying from {} to {} into {}\n", newFirstLineIndex, currentLastLineIndex, firstLineIndex); - while (firstToSave != lastToSave) (*linesBegin++) = (*firstToSave++); } if (getNbLines() > newLineNumber) - { - fmt::print(stderr, "Resizing because {} > {}\n", getNbLines(), newLineNumber); resizeLines(newLineNumber); - } { unsigned int nbLinesCopied = currentLastLineIndex - newFirstLineIndex; @@ -52,17 +38,13 @@ void SubConfig::update() auto firstToSave = model.getConstIterator(0, currentLastLineIndex, 0); auto lastToSave = model.getConstIterator(0, newLastLineIndex, 0); - fmt::print(stderr, "Copying from {} to {} into {}\n", currentLastLineIndex, newLastLineIndex, firstLineIndex+nbLinesCopied); - if (newlinesBegin < getIterator(0, 0, 0)) - fmt::print(stderr, "Bug\n"); - while (firstToSave != lastToSave) (*newlinesBegin++) = (*firstToSave++); firstLineIndex = newFirstLineIndex; } - fmt::print(stderr, "End\n"); + return true; } std::size_t SubConfig::getNbColumns() const -- GitLab