Skip to content
Snippets Groups Projects
Commit 67b8b07c authored by maxime.petit's avatar maxime.petit
Browse files

Fixed reward funcs

parent 7cc3e7f1
No related branches found
No related tags found
No related merge requests found
...@@ -101,7 +101,7 @@ def rewardB(appliable, config, action, missingLinks): ...@@ -101,7 +101,7 @@ def rewardB(appliable, config, action, missingLinks):
if "BACK" not in action.name : if "BACK" not in action.name :
reward = 1.0 - action.getOracleScore(config, missingLinks) reward = 1.0 - action.getOracleScore(config, missingLinks)
else : else :
back = int(action.name.split()[-1]) back = action.size
error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0] error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0]
last_error = error_in_pop[-1] if len(error_in_pop) > 0 else 0 last_error = error_in_pop[-1] if len(error_in_pop) > 0 else 0
reward = last_error - back reward = last_error - back
...@@ -116,7 +116,7 @@ def rewardC(appliable, config, action, missingLinks): ...@@ -116,7 +116,7 @@ def rewardC(appliable, config, action, missingLinks):
if "BACK" not in action.name : if "BACK" not in action.name :
reward = -action.getOracleScore(config, missingLinks) reward = -action.getOracleScore(config, missingLinks)
else : else :
back = int(action.name.split()[-1]) back = action.size
error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0] error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0]
canceledRewards = [h[3] for h in config.historyPop[-back:]] canceledRewards = [h[3] for h in config.historyPop[-back:]]
reward = -sum(canceledRewards) reward = -sum(canceledRewards)
...@@ -131,7 +131,7 @@ def rewardD(appliable, config, action, missingLinks): ...@@ -131,7 +131,7 @@ def rewardD(appliable, config, action, missingLinks):
if "BACK" not in action.name : if "BACK" not in action.name :
reward = -action.getOracleScore(config, missingLinks) reward = -action.getOracleScore(config, missingLinks)
else : else :
back = int(action.name.split()[-1]) back = action.size
error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0] error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0]
canceledRewards = [h[3] for h in config.historyPop[-back:]] canceledRewards = [h[3] for h in config.historyPop[-back:]]
reward = -sum(canceledRewards) - 1 reward = -sum(canceledRewards) - 1
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment