Skip to content
Snippets Groups Projects
Commit c444e1ee authored by maxime.petit's avatar maxime.petit
Browse files

Test reward func

parent b7045988
No related branches found
No related tags found
No related merge requests found
...@@ -100,7 +100,7 @@ def rewardA(appliable, config, action, missingLinks): ...@@ -100,7 +100,7 @@ def rewardA(appliable, config, action, missingLinks):
def rewardB(appliable, config, action, missingLinks): def rewardB(appliable, config, action, missingLinks):
if appliable: if appliable:
if "BACK" not in action.name : if "BACK" not in action.name :
reward = 1.0 - action.getOracleScore(config, missingLinks) reward = -1.0*action.getOracleScore(config, missingLinks)
else : else :
back = action.size back = action.size
error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0] error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0]
...@@ -108,7 +108,7 @@ def rewardB(appliable, config, action, missingLinks): ...@@ -108,7 +108,7 @@ def rewardB(appliable, config, action, missingLinks):
reward = last_error - back reward = last_error - back
else: else:
reward = -3.0 reward = -3.0
return reward return 2*reward
################################################################################ ################################################################################
################################################################################ ################################################################################
...@@ -123,7 +123,7 @@ def rewardC(appliable, config, action, missingLinks): ...@@ -123,7 +123,7 @@ def rewardC(appliable, config, action, missingLinks):
reward = -sum(canceledRewards) reward = -sum(canceledRewards)
else: else:
reward = -3.0 reward = -3.0
return reward return 3*reward
################################################################################ ################################################################################
################################################################################ ################################################################################
...@@ -172,7 +172,7 @@ def rewardF(appliable, config, action, missingLinks): ...@@ -172,7 +172,7 @@ def rewardF(appliable, config, action, missingLinks):
def rewardG(appliable, config, action, missingLinks): def rewardG(appliable, config, action, missingLinks):
if appliable: if appliable:
if "BACK" not in action.name : if "BACK" not in action.name :
reward = -action.getOracleScore(config, missingLinks) reward = -3 * action.getOracleScore(config, missingLinks)
else : else :
back = action.size back = action.size
canceledRewards = [h[3] for h in config.historyPop[-back:]] canceledRewards = [h[3] for h in config.historyPop[-back:]]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment