Skip to content
Snippets Groups Projects
Commit c0f915b2 authored by Franck Dary's avatar Franck Dary
Browse files

Can read Config

parent 12f80204
No related branches found
No related tags found
No related merge requests found
......@@ -10,6 +10,8 @@ int main(int argc, char * argv[])
Config config(argv[3], argv[1], argv[2]);
config.print(stdout);
return 0;
}
......@@ -21,10 +21,16 @@
class Config
{
public :
static constexpr const char * EOSColName = "EOS";
static constexpr const char * EOSSymbol1 = "1";
static constexpr const char * EOSSymbol0 = "0";
private :
std::unordered_map<int, std::string> colIndex2Name;
std::unordered_map<int, std::string> colName2Index;
std::vector<std::string> colIndex2Name;
std::unordered_map<std::string, int> colName2Index;
std::string rawInput;
std::vector<std::string_view> rawInputUtf8;
......@@ -33,9 +39,16 @@ class Config
using Line = std::vector<ReferenceAndHypotheses>;
std::vector<Line> lines;
private :
void readMCD(std::string_view mcdFilename);
void readRawInput(std::string_view rawFilename);
void readTSVInput(std::string_view tsvFilename);
public :
Config(std::string_view mcdFilename, std::string_view tsvFilename, std::string_view rawFilename);
void print(FILE * dest) const;
};
#endif
......@@ -9,32 +9,32 @@
#include "Config.hpp"
#include "util.hpp"
Config::Config(std::string_view mcdFilename, std::string_view tsvFilename, std::string_view rawFilename)
void Config::readMCD(std::string_view mcdFilename)
{
if (tsvFilename.empty() and rawFilename.empty())
util::myThrow("tsvFilename and rawFilenames can't be both empty");
if (mcdFilename.empty())
util::myThrow("mcdFilename can't be empty");
if (!colIndex2Name.empty())
util::myThrow("a mcd has already been read for this Config");
{
std::FILE * file = std::fopen(mcdFilename.data(), "r");
if (not file)
util::myThrow(fmt::format("Cannot open file '{}'", rawFilename));
util::myThrow(fmt::format("Cannot open file '{}'", mcdFilename));
char lineBuffer[1024];
while (std::fscanf(file, "%1023[^\n]\n", lineBuffer) == 1)
{
auto splited = util::split(lineBuffer, ' ');
if (splited.size() != 2)
util::myThrow(fmt::format("Invalid line '{}' for mcd '{}'", lineBuffer, mcdFilename));
colIndex2Name.emplace_back(lineBuffer);
colName2Index.emplace(lineBuffer, colIndex2Name.size()-1);
}
std::fclose(file);
if (colName2Index.count(EOSColName))
util::myThrow(fmt::format("mcd '{}' must not contain column '{}'", mcdFilename, EOSColName));
colIndex2Name.emplace_back(EOSColName);
colName2Index.emplace(EOSColName, colIndex2Name.size()-1);
}
if (not rawFilename.empty())
void Config::readRawInput(std::string_view rawFilename)
{
std::FILE * file = std::fopen(rawFilename.data(), "r");
......@@ -49,9 +49,87 @@ Config::Config(std::string_view mcdFilename, std::string_view tsvFilename, std::
rawInputUtf8 = util::splitAsUtf8(rawInput);
}
if (not tsvFilename.empty())
void Config::readTSVInput(std::string_view tsvFilename)
{
std::FILE * file = std::fopen(tsvFilename.data(), "r");
if (not file)
util::myThrow(fmt::format("Cannot open file '{}'", tsvFilename));
char lineBuffer[100000];
int inputLineIndex = 0;
bool inputHasBeenRead = false;
int usualNbCol = -1;
while (!std::feof(file))
{
if (lineBuffer != std::fgets(lineBuffer, 100000, file))
break;
std::string_view line(lineBuffer);
inputLineIndex++;
if (line.size() < 3)
{
if (!inputHasBeenRead)
continue;
lines.back()[colName2Index[EOSColName]][0] = EOSSymbol1;
continue;
}
else if (line[0] == '#')
continue;
if (line.back() == '\n')
line.remove_suffix(1);
inputHasBeenRead = true;
auto splited = util::split(line, '\t');
if (usualNbCol == -1)
usualNbCol = splited.size();
if ((int)splited.size() != usualNbCol)
util::myThrow(fmt::format("in file {} line {} is invalid, it shoud have {} columns", tsvFilename, line, usualNbCol));
lines.emplace_back();
for (unsigned int i = 0; i < colIndex2Name.size(); i++)
{
lines.back().emplace_back();
lines.back().back().emplace_back("");
}
lines.back()[colName2Index[EOSColName]][0] = EOSSymbol0;
for (unsigned int i = 0; i < splited.size(); i++)
if (i < colIndex2Name.size())
lines.back()[i][0] = splited[i];
}
std::fclose(file);
}
Config::Config(std::string_view mcdFilename, std::string_view tsvFilename, std::string_view rawFilename)
{
if (tsvFilename.empty() and rawFilename.empty())
util::myThrow("tsvFilename and rawFilenames can't be both empty");
if (mcdFilename.empty())
util::myThrow("mcdFilename can't be empty");
readMCD(mcdFilename);
if (not rawFilename.empty())
readRawInput(rawFilename);
if (not tsvFilename.empty())
readTSVInput(tsvFilename);
}
void Config::print(FILE * dest) const
{
for (auto & line : lines)
{
for (unsigned int i = 0; i < line.size()-1; i++)
fmt::print(dest, "{}{}", line[i].back(), i < line.size()-2 ? "\t" : "\n");
if (line[colName2Index.at(EOSColName)].back() == EOSSymbol1)
fmt::print(dest, "\n");
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment