Skip to content
Snippets Groups Projects
Commit 4487af1d authored by Franck Dary's avatar Franck Dary
Browse files

Made sure Dict never contained 2 elements with the same index

parent 11e87ce1
Branches
No related tags found
No related merge requests found
...@@ -42,6 +42,7 @@ class Dict ...@@ -42,6 +42,7 @@ class Dict
void readFromFile(const char * filename); void readFromFile(const char * filename);
void insert(const std::string & element); void insert(const std::string & element);
void reset();
public : public :
......
...@@ -22,6 +22,8 @@ Dict::Dict(const char * filename, State state) ...@@ -22,6 +22,8 @@ Dict::Dict(const char * filename, State state)
void Dict::readFromFile(const char * filename) void Dict::readFromFile(const char * filename)
{ {
reset();
std::FILE * file = std::fopen(filename, "r"); std::FILE * file = std::fopen(filename, "r");
if (!file) if (!file)
...@@ -55,6 +57,10 @@ void Dict::readFromFile(const char * filename) ...@@ -55,6 +57,10 @@ void Dict::readFromFile(const char * filename)
if (!readEntry(file, &entryIndex, &nbOccsEntry, entryString, encoding)) if (!readEntry(file, &entryIndex, &nbOccsEntry, entryString, encoding))
util::myThrow(fmt::format("file '{}' line {} bad format", filename, i)); util::myThrow(fmt::format("file '{}' line {} bad format", filename, i));
if (elementsToIndexes.count(entryString))
util::myThrow(fmt::format("entry '{}' is already in dict", entryString));
if (indexesToElements.count(entryIndex))
util::myThrow(fmt::format("index '{}' is already in dict", entryIndex));
elementsToIndexes[entryString] = entryIndex; elementsToIndexes[entryString] = entryIndex;
indexesToElements[entryIndex] = entryString; indexesToElements[entryIndex] = entryString;
while ((int)nbOccs.size() <= entryIndex) while ((int)nbOccs.size() <= entryIndex)
...@@ -70,7 +76,14 @@ void Dict::insert(const std::string & element) ...@@ -70,7 +76,14 @@ void Dict::insert(const std::string & element)
if (element.size() > maxEntrySize) if (element.size() > maxEntrySize)
util::myThrow(fmt::format("inserting element of size={} > maxElementSize={}", element.size(), maxEntrySize)); util::myThrow(fmt::format("inserting element of size={} > maxElementSize={}", element.size(), maxEntrySize));
if (elementsToIndexes.count(element))
util::myThrow(fmt::format("element '{}' already in dict", element));
elementsToIndexes.emplace(element, elementsToIndexes.size()); elementsToIndexes.emplace(element, elementsToIndexes.size());
if (indexesToElements.count(elementsToIndexes.size()-1))
util::myThrow(fmt::format("index '{}' already in dict", elementsToIndexes.size()-1));
indexesToElements.emplace(elementsToIndexes.size()-1, element); indexesToElements.emplace(elementsToIndexes.size()-1, element);
while (nbOccs.size() < elementsToIndexes.size()) while (nbOccs.size() < elementsToIndexes.size())
nbOccs.emplace_back(0); nbOccs.emplace_back(0);
...@@ -101,8 +114,8 @@ int Dict::getIndexOrInsert(const std::string & element, const std::string & pref ...@@ -101,8 +114,8 @@ int Dict::getIndexOrInsert(const std::string & element, const std::string & pref
{ {
insert(prefixed); insert(prefixed);
if (isCountingOccs) if (isCountingOccs)
nbOccs[elementsToIndexes[prefixed]]++; nbOccs[elementsToIndexes.at(prefixed)]++;
return elementsToIndexes[prefixed]; return elementsToIndexes.at(prefixed);
} }
prefixed = prefix.empty() ? util::lower(element) : fmt::format("{}({})", prefix, util::lower(element)); prefixed = prefix.empty() ? util::lower(element) : fmt::format("{}({})", prefix, util::lower(element));
...@@ -115,9 +128,16 @@ int Dict::getIndexOrInsert(const std::string & element, const std::string & pref ...@@ -115,9 +128,16 @@ int Dict::getIndexOrInsert(const std::string & element, const std::string & pref
} }
prefixed = prefix.empty() ? unknownValueStr : fmt::format("{}({})", prefix, unknownValueStr); prefixed = prefix.empty() ? unknownValueStr : fmt::format("{}({})", prefix, unknownValueStr);
const auto & found3 = elementsToIndexes.find(prefixed);
if (found3 != elementsToIndexes.end())
{
if (isCountingOccs) if (isCountingOccs)
nbOccs[elementsToIndexes[prefixed]]++; nbOccs[found3->second]++;
return elementsToIndexes[prefixed]; return found3->second;
}
return elementsToIndexes[unknownValueStr];
} }
if (isCountingOccs) if (isCountingOccs)
...@@ -315,3 +335,12 @@ std::string Dict::getElement(std::size_t index) ...@@ -315,3 +335,12 @@ std::string Dict::getElement(std::size_t index)
return indexesToElements[index]; return indexesToElements[index];
} }
void Dict::reset()
{
elementsToIndexes.clear();
indexesToElements.clear();
nbOccs.clear();
state = State::Closed;
isCountingOccs = false;
}
...@@ -299,6 +299,7 @@ int MacaonTrain::main() ...@@ -299,6 +299,7 @@ int MacaonTrain::main()
std::vector<std::pair<float,std::string>> devScores; std::vector<std::pair<float,std::string>> devScores;
if (computeDevScore) if (computeDevScore)
{ {
machine.setDictsState(Dict::State::Closed);
std::vector<BaseConfig> devConfigs; std::vector<BaseConfig> devConfigs;
if (lineByLine) if (lineByLine)
{ {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment