Fixed reward funcs

67b8b07c · maxime.petit · 7cc3e7f1 · 67b8b07c
Commit 67b8b07c authored May 26, 2021 by maxime.petit
--- a/Rl.py
+++ b/Rl.py
@@ -101,7 +101,7 @@ def rewardB(appliable, config, action, missingLinks):
    if "BACK" not in action.name :
      reward = 1.0 - action.getOracleScore(config, missingLinks)
    else :
-      back = int(action.name.split()[-1])
+      back = action.size
      error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0]
      last_error = error_in_pop[-1] if len(error_in_pop) > 0 else 0
      reward = last_error - back
@@ -116,7 +116,7 @@ def rewardC(appliable, config, action, missingLinks):
    if "BACK" not in action.name :
      reward = -action.getOracleScore(config, missingLinks)
    else :
-      back = int(action.name.split()[-1])
+      back = action.size
      error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0]
      canceledRewards = [h[3] for h in config.historyPop[-back:]]
      reward = -sum(canceledRewards)
@@ -131,7 +131,7 @@ def rewardD(appliable, config, action, missingLinks):
    if "BACK" not in action.name :
      reward = -action.getOracleScore(config, missingLinks)
    else :
-      back = int(action.name.split()[-1])
+      back = action.size
      error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0]
      canceledRewards = [h[3] for h in config.historyPop[-back:]]
      reward = -sum(canceledRewards) - 1