Skip to content
Snippets Groups Projects
Commit 1790aa5a authored by Franck Dary's avatar Franck Dary
Browse files

New rewards

parent fbd3d3cc
No related branches found
No related tags found
No related merge requests found
......@@ -99,115 +99,46 @@ def rewarding(appliable, config, action, missingLinks, funcname):
forbiddenReward = 1.5
################################################################################
def rewardA(appliable, config, action, missingLinks):
if appliable:
if action.name != "BACK" :
reward = -1.0*action.getOracleScore(config, missingLinks)
else :
back = action.size
error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0]
last_error = error_in_pop[-1] if len(error_in_pop) > 0 else 0
reward = last_error - back
else:
reward = -forbiddenReward
return reward
################################################################################
################################################################################
def rewardB(appliable, config, action, missingLinks):
if appliable:
if action.name != "BACK" :
reward = 1.0 - action.getOracleScore(config, missingLinks)
else :
back = action.size
error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0]
last_error = error_in_pop[-1] if len(error_in_pop) > 0 else 0
reward = last_error - back
else:
reward = -forbiddenReward
return reward
################################################################################
################################################################################
def rewardC(appliable, config, action, missingLinks):
def rewardE(appliable, config, action, missingLinks):
if appliable:
if action.name != "BACK" :
reward = -action.getOracleScore(config, missingLinks)
else :
back = action.size
error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0]
canceledRewards = [h[3] for h in config.historyPop[-back:]]
reward = -sum(canceledRewards)
reward = 0.5
else:
reward = -forbiddenReward
return reward
################################################################################
################################################################################
def reward3C(appliable, config, action, missingLinks):
if appliable:
if action.name != "BACK" :
reward = -action.getOracleScore(config, missingLinks)
else :
back = action.size
error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0]
canceledRewards = [h[3] for h in config.historyPop[-back:]]
reward = -sum(canceledRewards)
else:
reward = -forbiddenReward
return reward*3.0
################################################################################
################################################################################
def rewardD(appliable, config, action, missingLinks):
def rewardG(appliable, config, action, missingLinks):
if appliable:
if action.name != "BACK" :
reward = -action.getOracleScore(config, missingLinks)
else :
back = action.size
error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0]
canceledRewards = [h[3] for h in config.historyPop[-back:]]
reward = -sum(canceledRewards) - 1
reward = np.log(1-sum(canceledRewards)) if -sum(canceledRewards) > 0 else -1
else:
reward = -forbiddenReward
return reward
################################################################################
################################################################################
def rewardE(appliable, config, action, missingLinks):
def rewardA(appliable, config, action, missingLinks):
if appliable:
if action.name != "BACK" :
reward = -action.getOracleScore(config, missingLinks)
else :
reward = -0.5
else:
reward = -forbiddenReward
return reward
################################################################################
################################################################################
def rewardF(appliable, config, action, missingLinks):
if appliable:
if action.name != "BACK" :
reward = -1.0*action.getOracleScore(config, missingLinks)
else :
back = action.size
error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0]
last_error = error_in_pop[-1] if len(error_in_pop) > 0 else 0
reward = last_error - back
else:
reward = -forbiddenReward
return 10*reward
################################################################################
################################################################################
def rewardG(appliable, config, action, missingLinks):
if appliable:
if action.name != "BACK" :
reward = -action.getOracleScore(config, missingLinks)
canceledRewards = []
found = 0
for i in range(len(config.historyPop))[::-1] :
if config.historyPop[i][0].name == "NOBACK" :
found += 1
if found == action.size :
break
else :
back = action.size
canceledRewards = [h[3] for h in config.historyPop[-back:]]
canceledRewards.append(config.historyPop[i][3])
reward = np.log(1-sum(canceledRewards)) if -sum(canceledRewards) > 0 else -1
else:
reward = -forbiddenReward
......@@ -215,7 +146,7 @@ def rewardG(appliable, config, action, missingLinks):
################################################################################
################################################################################
def rewardA(appliable, config, action, missingLinks):
def rewardA2(appliable, config, action, missingLinks):
if appliable:
if action.name != "BACK" :
reward = -action.getOracleScore(config, missingLinks)
......@@ -229,7 +160,7 @@ def rewardA(appliable, config, action, missingLinks):
break
else :
canceledRewards.append(config.historyPop[i][3])
reward = np.log(1-sum(canceledRewards)) if -sum(canceledRewards) > 0 else -1
reward = np.log(1-sum(canceledRewards)) if -sum(canceledRewards) > 0 else 0
else:
reward = -forbiddenReward
return reward
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment