Added 3C reward and added global variable to choose impossible action reward

558a83dd · Franck Dary · e3a110f0 · 558a83dd
Commit 558a83dd authored 3 years ago by Franck Dary
--- a/Rl.py
+++ b/Rl.py
@@ -92,6 +92,8 @@ def rewarding(appliable, config, action, missingLinks, funcname):
  return globals()["reward"+funcname](appliable, config, action, missingLinks)
 ################################################################################

+forbiddenReward = 1.5
+
 ################################################################################
 def rewardA(appliable, config, action, missingLinks):
  if appliable:
@@ -103,7 +105,7 @@ def rewardA(appliable, config, action, missingLinks):
      last_error = error_in_pop[-1] if len(error_in_pop) > 0 else 0
      reward = last_error - back
  else:
-    reward = -3.0
+    reward = -forbiddenReward
  return reward
 ################################################################################

@@ -118,7 +120,7 @@ def rewardB(appliable, config, action, missingLinks):
      last_error = error_in_pop[-1] if len(error_in_pop) > 0 else 0
      reward = last_error - back
  else:
-    reward = -3.0
+    reward = -forbiddenReward
  return reward
 ################################################################################

@@ -133,10 +135,25 @@ def rewardC(appliable, config, action, missingLinks):
      canceledRewards = [h[3] for h in config.historyPop[-back:]]
      reward = -sum(canceledRewards)
  else:
-    reward = -3.0
+    reward = -forbiddenReward
  return reward
 ################################################################################

+################################################################################
+def reward3C(appliable, config, action, missingLinks):
+  if appliable:
+    if action.name != "BACK" :
+      reward = -action.getOracleScore(config, missingLinks)
+    else :
+      back = action.size
+      error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0]
+      canceledRewards = [h[3] for h in config.historyPop[-back:]]
+      reward = -sum(canceledRewards)
+  else:
+    reward = -forbiddenReward
+  return reward*3.0
+################################################################################
+
 ################################################################################
 def rewardD(appliable, config, action, missingLinks):
  if appliable:
@@ -148,7 +165,7 @@ def rewardD(appliable, config, action, missingLinks):
      canceledRewards = [h[3] for h in config.historyPop[-back:]]
      reward = -sum(canceledRewards) - 1
  else:
-    reward = -3.0
+    reward = -forbiddenReward
  return reward
 ################################################################################

@@ -160,7 +177,7 @@ def rewardE(appliable, config, action, missingLinks):
    else :
      reward = -0.5
  else:
-    reward = -3.0
+    reward = -forbiddenReward
  return reward
 ################################################################################

@@ -175,7 +192,7 @@ def rewardF(appliable, config, action, missingLinks):
      last_error = error_in_pop[-1] if len(error_in_pop) > 0 else 0
      reward = last_error - back
  else:
-    reward = -3.0
+    reward = -forbiddenReward
  return 10*reward
 ################################################################################

@@ -189,7 +206,7 @@ def rewardG(appliable, config, action, missingLinks):
      canceledRewards = [h[3] for h in config.historyPop[-back:]]
      reward = np.log(1-sum(canceledRewards)) if -sum(canceledRewards) > 0 else -1
  else:
-    reward = -3.0
+    reward = -forbiddenReward
  return reward
 ################################################################################