diff --git a/Rl.py b/Rl.py
index 818af2ebcb3c88a4ece8f16dfb1f4ec40eb1a0a7..5d04b3fff3221ec06dfad4f2d26a473b864dc68b 100644
--- a/Rl.py
+++ b/Rl.py
@@ -99,115 +99,46 @@ def rewarding(appliable, config, action, missingLinks, funcname):
 forbiddenReward = 1.5
 
 ################################################################################
-def rewardA(appliable, config, action, missingLinks):
-  if appliable:
-    if action.name != "BACK" :
-      reward = -1.0*action.getOracleScore(config, missingLinks)
-    else :
-      back = action.size
-      error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0]
-      last_error = error_in_pop[-1] if len(error_in_pop) > 0 else 0
-      reward = last_error - back
-  else:
-    reward = -forbiddenReward
-  return reward
-################################################################################
-
-################################################################################
-def rewardB(appliable, config, action, missingLinks):
-  if appliable:
-    if action.name != "BACK" :
-      reward = 1.0 - action.getOracleScore(config, missingLinks)
-    else :
-      back = action.size
-      error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0]
-      last_error = error_in_pop[-1] if len(error_in_pop) > 0 else 0
-      reward = last_error - back
-  else:
-    reward = -forbiddenReward
-  return reward
-################################################################################
-
-################################################################################
-def rewardC(appliable, config, action, missingLinks):
+def rewardE(appliable, config, action, missingLinks):
   if appliable:
     if action.name != "BACK" :
       reward = -action.getOracleScore(config, missingLinks)
     else :
-      back = action.size
-      error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0]
-      canceledRewards = [h[3] for h in config.historyPop[-back:]]
-      reward = -sum(canceledRewards)
+      reward = 0.5
   else:
     reward = -forbiddenReward
   return reward
 ################################################################################
 
 ################################################################################
-def reward3C(appliable, config, action, missingLinks):
-  if appliable:
-    if action.name != "BACK" :
-      reward = -action.getOracleScore(config, missingLinks)
-    else :
-      back = action.size
-      error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0]
-      canceledRewards = [h[3] for h in config.historyPop[-back:]]
-      reward = -sum(canceledRewards)
-  else:
-    reward = -forbiddenReward
-  return reward*3.0
-################################################################################
-
-################################################################################
-def rewardD(appliable, config, action, missingLinks):
+def rewardG(appliable, config, action, missingLinks):
   if appliable:
     if action.name != "BACK" :
       reward = -action.getOracleScore(config, missingLinks)
     else :
       back = action.size
-      error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0]
       canceledRewards = [h[3] for h in config.historyPop[-back:]]
-      reward = -sum(canceledRewards) - 1
-  else:
-    reward = -forbiddenReward
-  return reward
-################################################################################
-
-################################################################################
-def rewardE(appliable, config, action, missingLinks):
-  if appliable:
-    if action.name != "BACK" :
-      reward = -action.getOracleScore(config, missingLinks)
-    else :
-      reward = -0.5
+      reward = np.log(1-sum(canceledRewards)) if -sum(canceledRewards) > 0 else -1
   else:
     reward = -forbiddenReward
   return reward
 ################################################################################
 
 ################################################################################
-def rewardF(appliable, config, action, missingLinks):
-  if appliable:
-    if action.name != "BACK" :
-      reward = -1.0*action.getOracleScore(config, missingLinks)
-    else :
-      back = action.size
-      error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0]
-      last_error = error_in_pop[-1] if len(error_in_pop) > 0 else 0
-      reward = last_error - back
-  else:
-    reward = -forbiddenReward
-  return 10*reward
-################################################################################
-
-################################################################################
-def rewardG(appliable, config, action, missingLinks):
+def rewardA(appliable, config, action, missingLinks):
   if appliable:
     if action.name != "BACK" :
       reward = -action.getOracleScore(config, missingLinks)
     else :
-      back = action.size
-      canceledRewards = [h[3] for h in config.historyPop[-back:]]
+      canceledRewards = []
+      found = 0
+      for i in range(len(config.historyPop))[::-1] :
+        if config.historyPop[i][0].name == "NOBACK" :
+          found += 1
+          if found == action.size :
+            break
+        else :
+          canceledRewards.append(config.historyPop[i][3])
       reward = np.log(1-sum(canceledRewards)) if -sum(canceledRewards) > 0 else -1
   else:
     reward = -forbiddenReward
@@ -215,7 +146,7 @@ def rewardG(appliable, config, action, missingLinks):
 ################################################################################
 
 ################################################################################
-def rewardA(appliable, config, action, missingLinks):
+def rewardA2(appliable, config, action, missingLinks):
   if appliable:
     if action.name != "BACK" :
       reward = -action.getOracleScore(config, missingLinks)
@@ -229,7 +160,7 @@ def rewardA(appliable, config, action, missingLinks):
             break
         else :
           canceledRewards.append(config.historyPop[i][3])
-      reward = np.log(1-sum(canceledRewards)) if -sum(canceledRewards) > 0 else -1
+      reward = np.log(1-sum(canceledRewards)) if -sum(canceledRewards) > 0 else 0
   else:
     reward = -forbiddenReward
   return reward