From c444e1eeef9083a5b822704ff875652086771f00 Mon Sep 17 00:00:00 2001
From: "maxime.petit" <maxime.petit@sms.liscluster>
Date: Tue, 29 Jun 2021 16:43:52 +0200
Subject: [PATCH] Test reward func

---
 Rl.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Rl.py b/Rl.py
index f870bcc..98beada 100644
--- a/Rl.py
+++ b/Rl.py
@@ -100,7 +100,7 @@ def rewardA(appliable, config, action, missingLinks):
 def rewardB(appliable, config, action, missingLinks):
   if appliable:
     if "BACK" not in action.name :
-      reward = 1.0 - action.getOracleScore(config, missingLinks)
+      reward = -1.0*action.getOracleScore(config, missingLinks)
     else :
       back = action.size
       error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0]
@@ -108,7 +108,7 @@ def rewardB(appliable, config, action, missingLinks):
       reward = last_error - back
   else:
     reward = -3.0
-  return reward
+  return 2*reward
 ################################################################################
 
 ################################################################################
@@ -123,7 +123,7 @@ def rewardC(appliable, config, action, missingLinks):
       reward = -sum(canceledRewards)
   else:
     reward = -3.0
-  return reward
+  return 3*reward
 ################################################################################
 
 ################################################################################
@@ -172,7 +172,7 @@ def rewardF(appliable, config, action, missingLinks):
 def rewardG(appliable, config, action, missingLinks):
   if appliable:
     if "BACK" not in action.name :
-      reward = -action.getOracleScore(config, missingLinks)
+      reward = -3 * action.getOracleScore(config, missingLinks)
     else :
       back = action.size
       canceledRewards = [h[3] for h in config.historyPop[-back:]]
-- 
GitLab