Skip to content
Snippets Groups Projects
Commit 1790aa5a authored by Franck Dary's avatar Franck Dary
Browse files

New rewards

parent fbd3d3cc
No related branches found
No related tags found
No related merge requests found
...@@ -99,115 +99,46 @@ def rewarding(appliable, config, action, missingLinks, funcname): ...@@ -99,115 +99,46 @@ def rewarding(appliable, config, action, missingLinks, funcname):
forbiddenReward = 1.5 forbiddenReward = 1.5
################################################################################ ################################################################################
def rewardA(appliable, config, action, missingLinks): def rewardE(appliable, config, action, missingLinks):
if appliable:
if action.name != "BACK" :
reward = -1.0*action.getOracleScore(config, missingLinks)
else :
back = action.size
error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0]
last_error = error_in_pop[-1] if len(error_in_pop) > 0 else 0
reward = last_error - back
else:
reward = -forbiddenReward
return reward
################################################################################
################################################################################
def rewardB(appliable, config, action, missingLinks):
if appliable:
if action.name != "BACK" :
reward = 1.0 - action.getOracleScore(config, missingLinks)
else :
back = action.size
error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0]
last_error = error_in_pop[-1] if len(error_in_pop) > 0 else 0
reward = last_error - back
else:
reward = -forbiddenReward
return reward
################################################################################
################################################################################
def rewardC(appliable, config, action, missingLinks):
if appliable: if appliable:
if action.name != "BACK" : if action.name != "BACK" :
reward = -action.getOracleScore(config, missingLinks) reward = -action.getOracleScore(config, missingLinks)
else : else :
back = action.size reward = 0.5
error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0]
canceledRewards = [h[3] for h in config.historyPop[-back:]]
reward = -sum(canceledRewards)
else: else:
reward = -forbiddenReward reward = -forbiddenReward
return reward return reward
################################################################################ ################################################################################
################################################################################ ################################################################################
def reward3C(appliable, config, action, missingLinks): def rewardG(appliable, config, action, missingLinks):
if appliable:
if action.name != "BACK" :
reward = -action.getOracleScore(config, missingLinks)
else :
back = action.size
error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0]
canceledRewards = [h[3] for h in config.historyPop[-back:]]
reward = -sum(canceledRewards)
else:
reward = -forbiddenReward
return reward*3.0
################################################################################
################################################################################
def rewardD(appliable, config, action, missingLinks):
if appliable: if appliable:
if action.name != "BACK" : if action.name != "BACK" :
reward = -action.getOracleScore(config, missingLinks) reward = -action.getOracleScore(config, missingLinks)
else : else :
back = action.size back = action.size
error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0]
canceledRewards = [h[3] for h in config.historyPop[-back:]] canceledRewards = [h[3] for h in config.historyPop[-back:]]
reward = -sum(canceledRewards) - 1 reward = np.log(1-sum(canceledRewards)) if -sum(canceledRewards) > 0 else -1
else: else:
reward = -forbiddenReward reward = -forbiddenReward
return reward return reward
################################################################################ ################################################################################
################################################################################ ################################################################################
def rewardE(appliable, config, action, missingLinks): def rewardA(appliable, config, action, missingLinks):
if appliable: if appliable:
if action.name != "BACK" : if action.name != "BACK" :
reward = -action.getOracleScore(config, missingLinks) reward = -action.getOracleScore(config, missingLinks)
else : else :
reward = -0.5 canceledRewards = []
else: found = 0
reward = -forbiddenReward for i in range(len(config.historyPop))[::-1] :
return reward if config.historyPop[i][0].name == "NOBACK" :
################################################################################ found += 1
if found == action.size :
################################################################################ break
def rewardF(appliable, config, action, missingLinks):
if appliable:
if action.name != "BACK" :
reward = -1.0*action.getOracleScore(config, missingLinks)
else :
back = action.size
error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0]
last_error = error_in_pop[-1] if len(error_in_pop) > 0 else 0
reward = last_error - back
else:
reward = -forbiddenReward
return 10*reward
################################################################################
################################################################################
def rewardG(appliable, config, action, missingLinks):
if appliable:
if action.name != "BACK" :
reward = -action.getOracleScore(config, missingLinks)
else : else :
back = action.size canceledRewards.append(config.historyPop[i][3])
canceledRewards = [h[3] for h in config.historyPop[-back:]]
reward = np.log(1-sum(canceledRewards)) if -sum(canceledRewards) > 0 else -1 reward = np.log(1-sum(canceledRewards)) if -sum(canceledRewards) > 0 else -1
else: else:
reward = -forbiddenReward reward = -forbiddenReward
...@@ -215,7 +146,7 @@ def rewardG(appliable, config, action, missingLinks): ...@@ -215,7 +146,7 @@ def rewardG(appliable, config, action, missingLinks):
################################################################################ ################################################################################
################################################################################ ################################################################################
def rewardA(appliable, config, action, missingLinks): def rewardA2(appliable, config, action, missingLinks):
if appliable: if appliable:
if action.name != "BACK" : if action.name != "BACK" :
reward = -action.getOracleScore(config, missingLinks) reward = -action.getOracleScore(config, missingLinks)
...@@ -229,7 +160,7 @@ def rewardA(appliable, config, action, missingLinks): ...@@ -229,7 +160,7 @@ def rewardA(appliable, config, action, missingLinks):
break break
else : else :
canceledRewards.append(config.historyPop[i][3]) canceledRewards.append(config.historyPop[i][3])
reward = np.log(1-sum(canceledRewards)) if -sum(canceledRewards) > 0 else -1 reward = np.log(1-sum(canceledRewards)) if -sum(canceledRewards) > 0 else 0
else: else:
reward = -forbiddenReward reward = -forbiddenReward
return reward return reward
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment