Commit 4487af1d authored by Franck Dary's avatar Franck Dary
Browse files

Made sure Dict never contained 2 elements with the same index

parent 11e87ce1
......@@ -42,6 +42,7 @@ class Dict
void readFromFile(const char * filename);
void insert(const std::string & element);
void reset();
public :
......
......@@ -22,6 +22,8 @@ Dict::Dict(const char * filename, State state)
void Dict::readFromFile(const char * filename)
{
reset();
std::FILE * file = std::fopen(filename, "r");
if (!file)
......@@ -55,6 +57,10 @@ void Dict::readFromFile(const char * filename)
if (!readEntry(file, &entryIndex, &nbOccsEntry, entryString, encoding))
util::myThrow(fmt::format("file '{}' line {} bad format", filename, i));
if (elementsToIndexes.count(entryString))
util::myThrow(fmt::format("entry '{}' is already in dict", entryString));
if (indexesToElements.count(entryIndex))
util::myThrow(fmt::format("index '{}' is already in dict", entryIndex));
elementsToIndexes[entryString] = entryIndex;
indexesToElements[entryIndex] = entryString;
while ((int)nbOccs.size() <= entryIndex)
......@@ -70,7 +76,14 @@ void Dict::insert(const std::string & element)
if (element.size() > maxEntrySize)
util::myThrow(fmt::format("inserting element of size={} > maxElementSize={}", element.size(), maxEntrySize));
if (elementsToIndexes.count(element))
util::myThrow(fmt::format("element '{}' already in dict", element));
elementsToIndexes.emplace(element, elementsToIndexes.size());
if (indexesToElements.count(elementsToIndexes.size()-1))
util::myThrow(fmt::format("index '{}' already in dict", elementsToIndexes.size()-1));
indexesToElements.emplace(elementsToIndexes.size()-1, element);
while (nbOccs.size() < elementsToIndexes.size())
nbOccs.emplace_back(0);
......@@ -101,8 +114,8 @@ int Dict::getIndexOrInsert(const std::string & element, const std::string & pref
{
insert(prefixed);
if (isCountingOccs)
nbOccs[elementsToIndexes[prefixed]]++;
return elementsToIndexes[prefixed];
nbOccs[elementsToIndexes.at(prefixed)]++;
return elementsToIndexes.at(prefixed);
}
prefixed = prefix.empty() ? util::lower(element) : fmt::format("{}({})", prefix, util::lower(element));
......@@ -115,9 +128,16 @@ int Dict::getIndexOrInsert(const std::string & element, const std::string & pref
}
prefixed = prefix.empty() ? unknownValueStr : fmt::format("{}({})", prefix, unknownValueStr);
if (isCountingOccs)
nbOccs[elementsToIndexes[prefixed]]++;
return elementsToIndexes[prefixed];
const auto & found3 = elementsToIndexes.find(prefixed);
if (found3 != elementsToIndexes.end())
{
if (isCountingOccs)
nbOccs[found3->second]++;
return found3->second;
}
return elementsToIndexes[unknownValueStr];
}
if (isCountingOccs)
......@@ -315,3 +335,12 @@ std::string Dict::getElement(std::size_t index)
return indexesToElements[index];
}
void Dict::reset()
{
elementsToIndexes.clear();
indexesToElements.clear();
nbOccs.clear();
state = State::Closed;
isCountingOccs = false;
}
......@@ -299,6 +299,7 @@ int MacaonTrain::main()
std::vector<std::pair<float,std::string>> devScores;
if (computeDevScore)
{
machine.setDictsState(Dict::State::Closed);
std::vector<BaseConfig> devConfigs;
if (lineByLine)
{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment