Skip to content
Snippets Groups Projects
Commit abac4d5b authored by Franck Dary's avatar Franck Dary
Browse files

Added reward functions

parent c05a888f
No related branches found
No related tags found
No related merge requests found
...@@ -214,3 +214,31 @@ def rewardG(appliable, config, action, missingLinks): ...@@ -214,3 +214,31 @@ def rewardG(appliable, config, action, missingLinks):
return reward return reward
################################################################################ ################################################################################
################################################################################
def reward3G(appliable, config, action, missingLinks):
if appliable:
if action.name != "BACK" :
reward = -action.getOracleScore(config, missingLinks)
else :
back = action.size
canceledRewards = [h[3] for h in config.historyPop[-back:]]
reward = np.log(1-sum(canceledRewards)) if -sum(canceledRewards) > 0 else -1
else:
reward = -forbiddenReward
return 3*reward
################################################################################
################################################################################
def reward10G(appliable, config, action, missingLinks):
if appliable:
if action.name != "BACK" :
reward = -action.getOracleScore(config, missingLinks)
else :
back = action.size
canceledRewards = [h[3] for h in config.historyPop[-back:]]
reward = np.log(1-sum(canceledRewards)) if -sum(canceledRewards) > 0 else -1
else:
reward = -forbiddenReward
return 10*reward
################################################################################
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment