Skip to content
Snippets Groups Projects
Commit 0ea827ee authored by Franck Dary's avatar Franck Dary
Browse files

aded oracle for tokeparser sequentiel

parent a0652237
No related branches found
No related tags found
No related merge requests found
...@@ -798,6 +798,175 @@ void Oracle::createDatabase() ...@@ -798,6 +798,175 @@ void Oracle::createDatabase()
return 0; return 0;
}))); })));
str2oracle.emplace("strategy_tokenizer,tagger,morpho,lemmatizer,parser_sequential", std::unique_ptr<Oracle>(new Oracle(
[](Oracle *)
{
},
[](Config & c, Oracle *)
{
if (c.pastActions.size() == 0)
return std::string("MOVE tokenizer 0");
std::string previousState = util::noAccentLower(c.pastActions.getElem(0).first);
std::string previousAction = util::noAccentLower(c.pastActions.getElem(0).second.name);
std::string newState;
int movement = 0;
static constexpr int lookahead = 2;
static std::map<std::string,int> done{{"tokenizer",0},{"tagger",0},{"morpho",0},{"lemmatizer_case",0},{"parser",0}};
static std::map<std::string,int> lastIndexDone{{"tokenizer",-1},{"tagger",-1},{"morpho",-1},{"lemmatizer_case",-1},{"parser",-1}};
static std::map<std::string,int> todo{{"tokenizer",4*lookahead+1},{"tagger",3*lookahead+1},{"morpho",2*lookahead+1},{"lemmatizer_case",lookahead+1}};
if (previousState == "tokenizer")
{
if (util::split(previousAction, ' ')[0] == "splitword" || util::split(previousAction, ' ')[0] == "endword")
{
done[previousState]++;
lastIndexDone[previousState] = c.getHead();
if (util::split(previousAction, ' ')[0] == "splitword")
{
int splitSize = util::split(util::split(previousAction, ' ')[1], '@').size();
done[previousState] += splitSize-1;
lastIndexDone[previousState] += splitSize-1;
}
if (done[previousState] < todo[previousState])
{
newState = "tokenizer";
movement = lastIndexDone[newState]-c.getHead()+1;
}
else
{
newState = "tagger";
movement = lastIndexDone[newState]-c.getHead()+1;
}
}
else
newState = "tokenizer";
}
else if (previousState == "tagger")
{
done[previousState]++;
lastIndexDone[previousState] = c.getHead();
if (done[previousState] < todo[previousState])
{
newState = "tagger";
movement = 1;
}
else
{
newState = "morpho";
movement = lastIndexDone[newState]-c.getHead()+1;
}
}
else if (previousState == "morpho")
{
newState = "morpho";
if (previousAction == "nothing")
{
done[previousState]++;
lastIndexDone[previousState] = c.getHead();
if (done[previousState] < todo[previousState])
{
newState = "morpho";
movement = 1;
}
else
{
newState = "lemmatizer_lookup";
movement = lastIndexDone["lemmatizer_case"]-c.getHead()+1;
}
}
}
else if (previousState == "lemmatizer_lookup")
{
if (previousAction == "notfound")
newState = "lemmatizer_rules";
else
newState = "lemmatizer_case";
}
else if (previousState == "lemmatizer_rules")
newState = "lemmatizer_case";
else if (previousState == "lemmatizer_case")
{
newState = "parser";
done[previousState]++;
lastIndexDone[previousState] = c.getHead();
if (done[previousState] < todo[previousState])
{
newState = "lemmatizer_rules";
movement = 1;
}
else
{
newState = "parser";
movement = lastIndexDone[newState]-c.getHead()+1;
}
}
else if (previousState == "parser")
{
if (util::split(previousAction, ' ')[0] == "shift" || util::split(previousAction, ' ')[0] == "right")
{
newState = "segmenter";
movement = 0;
lastIndexDone[previousState] = c.getHead();
}
else
newState = "parser";
}
else if (previousState == "segmenter")
{
todo["tokenizer"] += 1;
todo["tagger"] += 1;
todo["morpho"] += 1;
todo["lemmatizer_case"] += 1;
newState = "tokenizer";
movement = lastIndexDone[newState]-c.getHead()+1;
if (c.rawInputHeadIndex >= (int)c.rawInput.size() || done[newState] >= todo[newState])
{
newState = "tagger";
movement = lastIndexDone[newState]-c.getHead()+1;
if (lastIndexDone[newState]+1 >= c.getTape("FORM").size() || c.getTape("FORM")[lastIndexDone[newState]-c.getHead()+1].empty() || done[newState] >= todo[newState])
{
newState = "morpho";
movement = lastIndexDone[newState]-c.getHead()+1;
if (lastIndexDone[newState]+1 >= c.getTape("FORM").size() || c.getTape("FORM")[lastIndexDone[newState]-c.getHead()+1].empty() || done[newState] >= todo[newState])
{
newState = "lemmatizer_rules";
movement = lastIndexDone["lemmatizer_case"]-c.getHead()+1;
if (lastIndexDone["lemmatizer_case"]+1 >= c.getTape("FORM").size() || c.getTape("FORM")[lastIndexDone["lemmatizer_case"]-c.getHead()+1].empty() || done["lemmatizer_case"] >= todo["lemmatizer_case"])
{
newState = "parser";
movement = lastIndexDone[newState]-c.getHead()+1;
}
}
}
}
}
else
newState = "unknown("+std::string(ERRINFO)+")("+previousState+")("+previousAction+")";
if (c.isFinal())
{
done = {{"tokenizer",0},{"tagger",0},{"morpho",0},{"lemmatizer_case",0},{"parser",0}};
lastIndexDone = {{"tokenizer",-1},{"tagger",-1},{"morpho",-1},{"lemmatizer_case",-1},{"parser",-1}};
todo = {{"tokenizer",4*lookahead+1},{"tagger",3*lookahead+1},{"morpho",2*lookahead+1},{"lemmatizer_case",lookahead+1}};
if (previousState == "segmenter")
return std::string("");
}
return "MOVE " + newState + " " + std::to_string(movement);
},
[](Config &, Oracle *, const std::string &)
{
return 0;
})));
str2oracle.emplace("strategy_tokenizer,tagger,morpho,lemmatizer,parser", std::unique_ptr<Oracle>(new Oracle( str2oracle.emplace("strategy_tokenizer,tagger,morpho,lemmatizer,parser", std::unique_ptr<Oracle>(new Oracle(
[](Oracle *) [](Oracle *)
{ {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment