Skip to content
Snippets Groups Projects
Commit c0f915b2 authored by Franck Dary's avatar Franck Dary
Browse files

Can read Config

parent 12f80204
No related branches found
No related tags found
No related merge requests found
......@@ -10,6 +10,8 @@ int main(int argc, char * argv[])
Config config(argv[3], argv[1], argv[2]);
config.print(stdout);
return 0;
}
......@@ -21,10 +21,16 @@
class Config
{
public :
static constexpr const char * EOSColName = "EOS";
static constexpr const char * EOSSymbol1 = "1";
static constexpr const char * EOSSymbol0 = "0";
private :
std::unordered_map<int, std::string> colIndex2Name;
std::unordered_map<int, std::string> colName2Index;
std::vector<std::string> colIndex2Name;
std::unordered_map<std::string, int> colName2Index;
std::string rawInput;
std::vector<std::string_view> rawInputUtf8;
......@@ -33,9 +39,16 @@ class Config
using Line = std::vector<ReferenceAndHypotheses>;
std::vector<Line> lines;
private :
void readMCD(std::string_view mcdFilename);
void readRawInput(std::string_view rawFilename);
void readTSVInput(std::string_view tsvFilename);
public :
Config(std::string_view mcdFilename, std::string_view tsvFilename, std::string_view rawFilename);
void print(FILE * dest) const;
};
#endif
......@@ -9,49 +9,127 @@
#include "Config.hpp"
#include "util.hpp"
Config::Config(std::string_view mcdFilename, std::string_view tsvFilename, std::string_view rawFilename)
void Config::readMCD(std::string_view mcdFilename)
{
if (tsvFilename.empty() and rawFilename.empty())
util::myThrow("tsvFilename and rawFilenames can't be both empty");
if (mcdFilename.empty())
util::myThrow("mcdFilename can't be empty");
if (!colIndex2Name.empty())
util::myThrow("a mcd has already been read for this Config");
std::FILE * file = std::fopen(mcdFilename.data(), "r");
if (not file)
util::myThrow(fmt::format("Cannot open file '{}'", mcdFilename));
char lineBuffer[1024];
while (std::fscanf(file, "%1023[^\n]\n", lineBuffer) == 1)
{
std::FILE * file = std::fopen(mcdFilename.data(), "r");
colIndex2Name.emplace_back(lineBuffer);
colName2Index.emplace(lineBuffer, colIndex2Name.size()-1);
}
if (not file)
util::myThrow(fmt::format("Cannot open file '{}'", rawFilename));
std::fclose(file);
char lineBuffer[1024];
while (std::fscanf(file, "%1023[^\n]\n", lineBuffer) == 1)
{
auto splited = util::split(lineBuffer, ' ');
if (colName2Index.count(EOSColName))
util::myThrow(fmt::format("mcd '{}' must not contain column '{}'", mcdFilename, EOSColName));
colIndex2Name.emplace_back(EOSColName);
colName2Index.emplace(EOSColName, colIndex2Name.size()-1);
}
if (splited.size() != 2)
util::myThrow(fmt::format("Invalid line '{}' for mcd '{}'", lineBuffer, mcdFilename));
}
void Config::readRawInput(std::string_view rawFilename)
{
std::FILE * file = std::fopen(rawFilename.data(), "r");
std::fclose(file);
}
if (not file)
util::myThrow(fmt::format("Cannot open file '{}'", rawFilename));
if (not rawFilename.empty())
while (not std::feof(file))
rawInput.push_back(std::fgetc(file));
std::fclose(file);
rawInputUtf8 = util::splitAsUtf8(rawInput);
}
void Config::readTSVInput(std::string_view tsvFilename)
{
std::FILE * file = std::fopen(tsvFilename.data(), "r");
if (not file)
util::myThrow(fmt::format("Cannot open file '{}'", tsvFilename));
char lineBuffer[100000];
int inputLineIndex = 0;
bool inputHasBeenRead = false;
int usualNbCol = -1;
while (!std::feof(file))
{
std::FILE * file = std::fopen(rawFilename.data(), "r");
if (lineBuffer != std::fgets(lineBuffer, 100000, file))
break;
std::string_view line(lineBuffer);
inputLineIndex++;
if (not file)
util::myThrow(fmt::format("Cannot open file '{}'", rawFilename));
if (line.size() < 3)
{
if (!inputHasBeenRead)
continue;
lines.back()[colName2Index[EOSColName]][0] = EOSSymbol1;
continue;
}
else if (line[0] == '#')
continue;
if (line.back() == '\n')
line.remove_suffix(1);
while (not std::feof(file))
rawInput.push_back(std::fgetc(file));
inputHasBeenRead = true;
std::fclose(file);
auto splited = util::split(line, '\t');
if (usualNbCol == -1)
usualNbCol = splited.size();
if ((int)splited.size() != usualNbCol)
util::myThrow(fmt::format("in file {} line {} is invalid, it shoud have {} columns", tsvFilename, line, usualNbCol));
rawInputUtf8 = util::splitAsUtf8(rawInput);
lines.emplace_back();
for (unsigned int i = 0; i < colIndex2Name.size(); i++)
{
lines.back().emplace_back();
lines.back().back().emplace_back("");
}
lines.back()[colName2Index[EOSColName]][0] = EOSSymbol0;
for (unsigned int i = 0; i < splited.size(); i++)
if (i < colIndex2Name.size())
lines.back()[i][0] = splited[i];
}
std::fclose(file);
}
Config::Config(std::string_view mcdFilename, std::string_view tsvFilename, std::string_view rawFilename)
{
if (tsvFilename.empty() and rawFilename.empty())
util::myThrow("tsvFilename and rawFilenames can't be both empty");
if (mcdFilename.empty())
util::myThrow("mcdFilename can't be empty");
readMCD(mcdFilename);
if (not rawFilename.empty())
readRawInput(rawFilename);
if (not tsvFilename.empty())
readTSVInput(tsvFilename);
}
void Config::print(FILE * dest) const
{
for (auto & line : lines)
{
for (unsigned int i = 0; i < line.size()-1; i++)
fmt::print(dest, "{}{}", line[i].back(), i < line.size()-2 ? "\t" : "\n");
if (line[colName2Index.at(EOSColName)].back() == EOSSymbol1)
fmt::print(dest, "\n");
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment