diff --git a/Rl.py b/Rl.py index f870bcc29624ca364c1a338ac2dc78badd907162..98beada183a039dfbec4aa9914cc575c2eb7b17c 100644 --- a/Rl.py +++ b/Rl.py @@ -100,7 +100,7 @@ def rewardA(appliable, config, action, missingLinks): def rewardB(appliable, config, action, missingLinks): if appliable: if "BACK" not in action.name : - reward = 1.0 - action.getOracleScore(config, missingLinks) + reward = -1.0*action.getOracleScore(config, missingLinks) else : back = action.size error_in_pop = [i for i in range(1,back) if config.historyPop[-i][3] < 0] @@ -108,7 +108,7 @@ def rewardB(appliable, config, action, missingLinks): reward = last_error - back else: reward = -3.0 - return reward + return 2*reward ################################################################################ ################################################################################ @@ -123,7 +123,7 @@ def rewardC(appliable, config, action, missingLinks): reward = -sum(canceledRewards) else: reward = -3.0 - return reward + return 3*reward ################################################################################ ################################################################################ @@ -172,7 +172,7 @@ def rewardF(appliable, config, action, missingLinks): def rewardG(appliable, config, action, missingLinks): if appliable: if "BACK" not in action.name : - reward = -action.getOracleScore(config, missingLinks) + reward = -3 * action.getOracleScore(config, missingLinks) else : back = action.size canceledRewards = [h[3] for h in config.historyPop[-back:]]