From abac4d5b6399cc7b68a353640c6db0b5e05117b8 Mon Sep 17 00:00:00 2001 From: Franck Dary <franck.dary@lis-lab.fr> Date: Fri, 9 Jul 2021 15:00:38 +0200 Subject: [PATCH] Added reward functions --- Rl.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/Rl.py b/Rl.py index 8e320df..f0ed8f1 100644 --- a/Rl.py +++ b/Rl.py @@ -214,3 +214,31 @@ def rewardG(appliable, config, action, missingLinks): return reward ################################################################################ +################################################################################ +def reward3G(appliable, config, action, missingLinks): + if appliable: + if action.name != "BACK" : + reward = -action.getOracleScore(config, missingLinks) + else : + back = action.size + canceledRewards = [h[3] for h in config.historyPop[-back:]] + reward = np.log(1-sum(canceledRewards)) if -sum(canceledRewards) > 0 else -1 + else: + reward = -forbiddenReward + return 3*reward +################################################################################ + +################################################################################ +def reward10G(appliable, config, action, missingLinks): + if appliable: + if action.name != "BACK" : + reward = -action.getOracleScore(config, missingLinks) + else : + back = action.size + canceledRewards = [h[3] for h in config.historyPop[-back:]] + reward = np.log(1-sum(canceledRewards)) if -sum(canceledRewards) > 0 else -1 + else: + reward = -forbiddenReward + return 10*reward +################################################################################ + -- GitLab