From 558a83dd342843eebcd45453caec6487290c91f6 Mon Sep 17 00:00:00 2001
From: Franck Dary <franck.dary@lis-lab.fr>
Date: Tue, 6 Jul 2021 18:51:14 +0200
Subject: [PATCH] Added 3C reward and added global variable to choose
 impossible action reward

---
 Rl.py | 31 ++++++++++++++++++++++++-------
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/Rl.py b/Rl.py
index c88f46b..5f65adc 100644
--- a/Rl.py
+++ b/Rl.py
@@ -92,6 +92,8 @@ def rewarding(appliable, config, action, missingLinks, funcname):
   return globals()["reward"+funcname](appliable, config, action, missingLinks)
 ################################################################################
 
+forbiddenReward = 1.5
+
 ################################################################################
 def rewardA(appliable, config, action, missingLinks):
   if appliable:
@@ -103,7 +105,7 @@ def rewardA(appliable, config, action, missingLinks):
       last_error = error_in_pop[-1] if len(error_in_pop) > 0 else 0
       reward = last_error - back
   else:
-    reward = -3.0
+    reward = -forbiddenReward
   return reward
 ################################################################################
 
@@ -118,7 +120,7 @@ def rewardB(appliable, config, action, missingLinks):
       last_error = error_in_pop[-1] if len(error_in_pop) > 0 else 0
       reward = last_error - back
   else:
-    reward = -3.0
+    reward = -forbiddenReward
   return reward
 ################################################################################
 
@@ -133,10 +135,25 @@ def rewardC(appliable, config, action, missingLinks):
       canceledRewards = [h[3] for h in config.historyPop[-back:]]
       reward = -sum(canceledRewards)
   else:
-    reward = -3.0
+    reward = -forbiddenReward
   return reward
 ################################################################################
 
+################################################################################
+def reward3C(appliable, config, action, missingLinks):
+  if appliable:
+    if action.name != "BACK" :
+      reward = -action.getOracleScore(config, missingLinks)
+    else :
+      back = action.size
+      error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0]
+      canceledRewards = [h[3] for h in config.historyPop[-back:]]
+      reward = -sum(canceledRewards)
+  else:
+    reward = -forbiddenReward
+  return reward*3.0
+################################################################################
+
 ################################################################################
 def rewardD(appliable, config, action, missingLinks):
   if appliable:
@@ -148,7 +165,7 @@ def rewardD(appliable, config, action, missingLinks):
       canceledRewards = [h[3] for h in config.historyPop[-back:]]
       reward = -sum(canceledRewards) - 1
   else:
-    reward = -3.0
+    reward = -forbiddenReward
   return reward
 ################################################################################
 
@@ -160,7 +177,7 @@ def rewardE(appliable, config, action, missingLinks):
     else :
       reward = -0.5
   else:
-    reward = -3.0
+    reward = -forbiddenReward
   return reward
 ################################################################################
 
@@ -175,7 +192,7 @@ def rewardF(appliable, config, action, missingLinks):
       last_error = error_in_pop[-1] if len(error_in_pop) > 0 else 0
       reward = last_error - back
   else:
-    reward = -3.0
+    reward = -forbiddenReward
   return 10*reward
 ################################################################################
 
@@ -189,7 +206,7 @@ def rewardG(appliable, config, action, missingLinks):
       canceledRewards = [h[3] for h in config.historyPop[-back:]]
       reward = np.log(1-sum(canceledRewards)) if -sum(canceledRewards) > 0 else -1
   else:
-    reward = -3.0
+    reward = -forbiddenReward
   return reward
 ################################################################################
 
-- 
GitLab