Skip to content
Snippets Groups Projects
Commit 558a83dd authored by Franck Dary's avatar Franck Dary
Browse files

Added 3C reward and added global variable to choose impossible action reward

parent e3a110f0
No related branches found
No related tags found
No related merge requests found
......@@ -92,6 +92,8 @@ def rewarding(appliable, config, action, missingLinks, funcname):
return globals()["reward"+funcname](appliable, config, action, missingLinks)
################################################################################
forbiddenReward = 1.5
################################################################################
def rewardA(appliable, config, action, missingLinks):
if appliable:
......@@ -103,7 +105,7 @@ def rewardA(appliable, config, action, missingLinks):
last_error = error_in_pop[-1] if len(error_in_pop) > 0 else 0
reward = last_error - back
else:
reward = -3.0
reward = -forbiddenReward
return reward
################################################################################
......@@ -118,7 +120,7 @@ def rewardB(appliable, config, action, missingLinks):
last_error = error_in_pop[-1] if len(error_in_pop) > 0 else 0
reward = last_error - back
else:
reward = -3.0
reward = -forbiddenReward
return reward
################################################################################
......@@ -133,10 +135,25 @@ def rewardC(appliable, config, action, missingLinks):
canceledRewards = [h[3] for h in config.historyPop[-back:]]
reward = -sum(canceledRewards)
else:
reward = -3.0
reward = -forbiddenReward
return reward
################################################################################
################################################################################
def reward3C(appliable, config, action, missingLinks):
if appliable:
if action.name != "BACK" :
reward = -action.getOracleScore(config, missingLinks)
else :
back = action.size
error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0]
canceledRewards = [h[3] for h in config.historyPop[-back:]]
reward = -sum(canceledRewards)
else:
reward = -forbiddenReward
return reward*3.0
################################################################################
################################################################################
def rewardD(appliable, config, action, missingLinks):
if appliable:
......@@ -148,7 +165,7 @@ def rewardD(appliable, config, action, missingLinks):
canceledRewards = [h[3] for h in config.historyPop[-back:]]
reward = -sum(canceledRewards) - 1
else:
reward = -3.0
reward = -forbiddenReward
return reward
################################################################################
......@@ -160,7 +177,7 @@ def rewardE(appliable, config, action, missingLinks):
else :
reward = -0.5
else:
reward = -3.0
reward = -forbiddenReward
return reward
################################################################################
......@@ -175,7 +192,7 @@ def rewardF(appliable, config, action, missingLinks):
last_error = error_in_pop[-1] if len(error_in_pop) > 0 else 0
reward = last_error - back
else:
reward = -3.0
reward = -forbiddenReward
return 10*reward
################################################################################
......@@ -189,7 +206,7 @@ def rewardG(appliable, config, action, missingLinks):
canceledRewards = [h[3] for h in config.historyPop[-back:]]
reward = np.log(1-sum(canceledRewards)) if -sum(canceledRewards) > 0 else -1
else:
reward = -3.0
reward = -forbiddenReward
return reward
################################################################################
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment