Skip to content
Snippets Groups Projects
Commit fc0a942e authored by Franck Dary's avatar Franck Dary
Browse files

Working first version

parent e7e29b09
No related branches found
No related tags found
No related merge requests found
...@@ -35,8 +35,10 @@ include_directories(trainer/include) ...@@ -35,8 +35,10 @@ include_directories(trainer/include)
include_directories(decoder/include) include_directories(decoder/include)
include_directories(macaon/include) include_directories(macaon/include)
include_directories(utf8) include_directories(utf8)
include_directories(pugixml/include)
add_subdirectory(fmt) add_subdirectory(fmt)
add_subdirectory(pugixml)
add_subdirectory(common) add_subdirectory(common)
add_subdirectory(reading_machine) add_subdirectory(reading_machine)
add_subdirectory(torch_modules) add_subdirectory(torch_modules)
......
...@@ -3,4 +3,5 @@ FILE(GLOB SOURCES src/*.cpp) ...@@ -3,4 +3,5 @@ FILE(GLOB SOURCES src/*.cpp)
add_library(decoder STATIC ${SOURCES}) add_library(decoder STATIC ${SOURCES})
target_link_libraries(decoder reading_machine) target_link_libraries(decoder reading_machine)
target_link_libraries(decoder Boost) target_link_libraries(decoder Boost)
target_link_libraries(decoder pugixml)
...@@ -2,25 +2,67 @@ ...@@ -2,25 +2,67 @@
#define PRODUCER__H #define PRODUCER__H
#include <filesystem> #include <filesystem>
#include "Config.hpp" #include "BaseConfig.hpp"
#include "pugixml.hpp"
/************************************************************
* SCHNAPPIPARSER
************************************************************/
class SchnappiParser
{
private:
pugi::xml_document doc;
pugi::xml_node events_log;
pugi::xml_node_iterator eventIterator;
public:
// Loads the DOM-tree of the given XML file.
SchnappiParser(std::string input);
// Sends the text of the next event.
std::string getCurrentNodeText();
void next();
// Returns true if currentPosition < size(events)
bool hasNext();
void insertAction(std::string macaonAction);
// Write the DOM-tree into the file at the specified path.
void saveFile(const char * output) const;
};
/************************************************************
* PRODUCER
************************************************************/
class Producer class Producer
{ {
private : private :
static constexpr int maxNb = 100; static constexpr int lookahead = 2;
int curNb = 0;
std::filesystem::path input, output; std::filesystem::path input, output;
std::vector<std::string> sequence;
SchnappiParser parser;
public : public :
Producer(std::filesystem::path input, std::filesystem::path output); Producer(std::filesystem::path input, std::filesystem::path output);
bool apply(Config & config); bool apply(BaseConfig & config);
void addConfigToSequence(const Config & config); void addConfigToSequence(const BaseConfig & config);
void writeOutputFile() const; void writeOutputFile() const;
private:
int parseInput();
}; };
#endif #endif
#include "Producer.hpp" #include "Producer.hpp"
#include <string>
#include <list>
Producer::Producer(std::filesystem::path input, std::filesystem::path output) : input(input), output(output) SchnappiParser::SchnappiParser(std::string input)
{ {
if (!doc.load_file(input.c_str(), pugi::parse_ws_pcdata_single))
{
util::error(fmt::format("Could not read file '{}'", input));
return;
}
events_log = doc.child("session").child("event_log");
eventIterator = events_log.begin();
} }
// Add one or more characters to config's rawInput. // Jumps to the next event node.
// Returns false if we are finished and true if we have events remaining. void SchnappiParser::next()
bool Producer::apply(Config & config)
{ {
if (util::choiceWithProbability(0.05)) do
{ {
config.rawInputAdd("."); ++eventIterator;
config.rawInputAdd(" "); } while (eventIterator != events_log.end() and (eventIterator->name() == std::string("event") and eventIterator->attribute("type") and eventIterator->attribute("type").as_string() != std::string("ins")));
sequence.push_back("<addletter \".\">");
sequence.push_back("<addletter \" \">");
} }
else if (util::choiceWithProbability(0.8))
// Returns, as a string, the text of the current event node.
std::string SchnappiParser::getCurrentNodeText()
{ {
auto letter = fmt::format("{}", (char) ('a'+rand()%26)); return eventIterator->child("text").first_child().text().as_string();
config.rawInputAdd(letter);
sequence.push_back(fmt::format("<addletter \"{}\">", letter));
} }
// Returns true if the current event node is not the last.
bool SchnappiParser::hasNext()
{
bool res = eventIterator != events_log.end();
return res;
}
// Splits the given macaonAction and adds iit under the current event in the DOM-Tree.
// /!\ TODO For now, a default format has been define to test the implementation.
// This has to be modified according to the actual format of the specified string.
// The test format was "field1=value1§field2=value2§field3=value3§", and give the
// xml tag: <macaon_action field1="value1" field2="value2" field3="value3" />
// See example.cpp for an applicable example.
void SchnappiParser::insertAction(std::string macaonAction)
{
pugi::xml_node action;
if (hasNext())
action = events_log.insert_child_before("macaon_action", *eventIterator);
else else
action = events_log.append_child("macaon_action");
pugi::xml_attribute attr = action.append_attribute("action");
attr.set_value(macaonAction.c_str());
}
// Write the DOM-tree into the file at the specified path.
void SchnappiParser::saveFile(const char * output) const
{
doc.save_file(output);
}
Producer::Producer(std::filesystem::path input, std::filesystem::path output) : input(input), output(output), parser(SchnappiParser(input.string()))
{ {
config.rawInputAdd(" ");
sequence.push_back("<addletter \" \">");
} }
curNb++; // Add one or more characters to config's rawInput.
return curNb < maxNb; // Returns false if we are finished and true if we have events remaining.
bool Producer::apply(BaseConfig & config)
{
auto text = util::splitAsUtf8(parser.getCurrentNodeText());
if (text.empty())
util::error("Trying to add empty text to raw input.");
for (auto c : text)
config.rawInputAdd(c);
if (parser.hasNext())
parser.next();
return parser.hasNext();
} }
// Adds an event in the sequence that represent the current config state. // Add the newest action of macaon in the SchnappiFile
void Producer::addConfigToSequence(const Config & config) void Producer::addConfigToSequence(const BaseConfig & config)
{ {
sequence.push_back(fmt::format("<action \"{}\">", config.getHistory(0))); parser.insertAction(config.getHistory(0));
} }
// Writes the entire sequence to the output file. // Writes the SchnappiFile into the output file.
void Producer::writeOutputFile() const void Producer::writeOutputFile() const
{ {
std::FILE * outputFile = output.empty() ? stdout : std::fopen(output.c_str(), "w"); parser.saveFile(output.c_str());
for (auto & event : sequence)
fmt::print(outputFile, "{}\n", event);
} }
FILE(GLOB SOURCES src/*.cpp)
add_library(pugixml STATIC ${SOURCES})
/**
* pugixml parser - version 1.11
* --------------------------------------------------------
* Copyright (C) 2006-2020, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
* Report bugs and download new versions at https://pugixml.org/
*
* This library is distributed under the MIT License. See notice at the end
* of this file.
*
* This work is based on the pugxml parser, which is:
* Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
*/
#ifndef HEADER_PUGICONFIG_HPP
#define HEADER_PUGICONFIG_HPP
// Uncomment this to enable wchar_t mode
// #define PUGIXML_WCHAR_MODE
// Uncomment this to enable compact mode
// #define PUGIXML_COMPACT
// Uncomment this to disable XPath
// #define PUGIXML_NO_XPATH
// Uncomment this to disable STL
// #define PUGIXML_NO_STL
// Uncomment this to disable exceptions
// #define PUGIXML_NO_EXCEPTIONS
// Set this to control attributes for public classes/functions, i.e.:
// #define PUGIXML_API __declspec(dllexport) // to export all public symbols from DLL
// #define PUGIXML_CLASS __declspec(dllimport) // to import all classes from DLL
// #define PUGIXML_FUNCTION __fastcall // to set calling conventions to all public functions to fastcall
// In absence of PUGIXML_CLASS/PUGIXML_FUNCTION definitions PUGIXML_API is used instead
// Tune these constants to adjust memory-related behavior
// #define PUGIXML_MEMORY_PAGE_SIZE 32768
// #define PUGIXML_MEMORY_OUTPUT_STACK 10240
// #define PUGIXML_MEMORY_XPATH_PAGE_SIZE 4096
// Tune this constant to adjust max nesting for XPath queries
// #define PUGIXML_XPATH_DEPTH_LIMIT 1024
// Uncomment this to switch to header-only version
// #define PUGIXML_HEADER_ONLY
// Uncomment this to enable long long support
// #define PUGIXML_HAS_LONG_LONG
#endif
/**
* Copyright (c) 2006-2020 Arseny Kapoulkine
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
This diff is collapsed.
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment