Upload New File

d0c047a1 · Nicolas Thellier · 7c73374c · d0c047a1
Commit d0c047a1 authored 3 years ago by Nicolas Thellier
--- a/envs/adsil_env.py
+++ b/envs/adsil_env.py
+#import gym
+#from gym import error, spaces, utils
+#from gym.utils import seeding
+
+import numpy as np
+import wget
+from gym import Env, spaces
+from gym.utils import seeding
+
+
+#import sys
+#from contextlib import closing
+
+#import numpy as np
+#from io import StringIO
+
+#from gym import utils
+from gym.envs.toy_text import discrete
+
+
+# possible actions
+WEST = 0
+SW = 1
+SOUTH = 2
+SE = 3
+EAST = 4
+NE = 5
+NORTH = 6
+NW = 7
+WAIT = 8
+
+
+#class adsilEnv(gym.Env):
+class adsilEnv(discrete.DiscreteEnv):
+  metadata = {'render.modes': ['human']}
+
+  def __init__(self, gridparams, agents):
+    # gridparams = [xbins, xlims, ylims]
+    # agents : list(agentID, initPos, maxsteps)
+    # initPos = list[x,y]
+    self.stepCounter = 0
+    self.nA = 9 # Nombre d'actions possibles
+    self.done = False
+    # discrete duration of the dynamic environement
+    self.nsteps = 5 # mimics in a fixed environment
+    # if dynamic : self.nsteps would be equal to number of env grids
+    # dynamic grid would be of shape nsteps x Xbins x Ybins
+    self.snr = np.load("../ressources/all_snr_reshaped.npy")
+    
+    if gridparams is None :
+      gridbinx = 6
+      xsidekm = 6000
+      ysidekm = 4000
+      xlims = np.array([-xsidekm/2,xsidekm/2])
+      ylims = np.array([-ysidekm/2,ysidekm/2])
+      gridbiny = int(gridbinx * ysidekm/xsidekm)
+      nstate = int(gridbinx*gridbiny)
+      print("nstate",nstate)
+
+      x = np.linspace(xlims[0],xlims[1], gridbinx)
+      y = np.linspace(ylims[0],ylims[1], gridbiny)
+
+      print(x.shape)
+      print(y.shape)
+
+      xx,yy = np.meshgrid(x,y)
+
+      gridcoordx = np.reshape(xx,(-1,1))
+      gridcoordy = np.reshape(yy,(-1,1))
+      gridcoordz = np.zeros((nstate,1))
+      gridcoord = np.concatenate((gridcoordx,gridcoordy,gridcoordz),axis=1)
+
+      print(gridcoord[0:8,:])
+
+      #plt.scatter(gridcoord[:,0],gridcoord[:,1])
+      #f2 = plt.figure()
+      #ax = f2.gca()
+      #ax.scatter(gridcoord[6,0],gridcoord[6,1])
+      #ax.set_xlim(1.1*xlims)
+      #ax.set_ylim(1.1*ylims)
+
+      statemap = np.arange(0,nstate)
+      statemapgrid = statemap.reshape(gridbiny,gridbinx)
+
+      # ATTENTION : statemap renommé en statemapgrid
+      print(statemap)
+      print(statemapgrid)
+      # ATTENTION : statemap renommé en statemapgrid
+
+      #s = np.array([[0,0,0]])
+      #snr = - np.linalg.norm(s - gridcoord, axis = 1)
+      ##print(snr.reshape((gridbiny,gridbinx)))
+      ##print(snr)
+      
+      self.nrow, self.ncol = statemapgrid.shape
+      self.nS = self.nrow * self.ncol
+      self.gridcoord = gridcoord
+      self.statemap = statemap
+      self.statemapgrid = statemapgrid
+    #self.grid = grid
+    #self.agents = agents
+    
+    #self.reward_range = (0, 1) ## si on veut restreindre l'espace des rewards
+
+    #self.nS = nS = self.nrow * self.ncol
+    
+    if agents is None :
+      # on définit un agent par défaut
+      self.agents = [0, [np.max(self.gridcoord[:,0]), np.min(self.gridcoord[:,1])], int(self.nS/2)]
+
+    #if self.agents[1] is None :
+      #self.agents[1] = [np.min(grid),np.min(grid)]
+    #if maxsteps is None :
+      #self.maxsteps
+    
+    cond1 = self.gridcoord[:,0] == self.agents[1][0]
+    cond2 = self.gridcoord[:,1] == self.agents[1][1]
+    cond3 = self.gridcoord[:,2] == 0
+    #gridcoordd[1,:]
+    mask = cond1 & cond2 & cond3
+    #print(mask)
+    idxstart = np.where(mask)[0][0]
+    print(idxstart)
+    #self.gridcoord[idxstart,:]
+    self.idxstart=idxstart
+    isd = np.zeros(self.nS)
+    isd[idxstart] = 1
+    #isd /= isd.sum()
+
+    #self.isd = isd
+    
+    a=2
+    print(a)
+    print(a*4)
+
+    P = {s: {a: [] for a in range(self.nA)} for s in range(self.nS)}
+
+    def to_s(row, col):
+      return row * self.ncol + col
+
+    def inc(row, col, a):
+      if a == WEST:
+        newcol = max(col - 1, 0)
+        newrow = row
+      elif a == SOUTH:
+        newrow = min(row + 1, self.nrow - 1)
+        newcol = col
+      elif a == EAST:
+        newcol = min(col + 1, self.ncol - 1)
+        newrow = row
+      elif a == NORTH:
+        newrow = max(row - 1, 0)
+        newcol = col
+      elif a == SW:
+        newcol = max(col - 1, 0)
+        newrow = min(row + 1, self.nrow - 1)
+      elif a == SE:
+        newrow = min(row + 1, self.nrow - 1)
+        newcol = min(col + 1, self.ncol - 1)
+      elif a == NE:
+        newrow = max(row - 1, 0)
+        newcol = min(col + 1, self.ncol - 1)
+      elif a == NW:
+        newrow = max(row - 1, 0)
+        newcol = max(col - 1, 0)
+      elif a == WAIT :
+        newrow = row
+        newcol = col
+      
+      return (newrow, newcol)
+
+
+    # if environment not dynamic : (pas de variation de l'env au cours du temps)
+    def update_probability_matrix(row, col, action):
+      newrow, newcol = inc(row, col, action)
+      newstate = to_s(newrow, newcol)
+      #self.done = False
+      #if self.stepCounter == self.nsteps :
+        #self.done = True
+      #newletter = desc[newrow, newcol]
+      
+      #done = bytes(newletter) in b"GH"
+      #done = False
+      #reward = float(newletter == b"G")
+      #reward = 10
+      reward = self.compute_reward()
+      return newstate, reward, False #self.done
+
+    # if environment not dynamic : (pas de variation de l'env au cours du temps)
+    for row in range(self.nrow):
+      for col in range(self.ncol):
+        s = to_s(row, col)
+        for a in range(self.nA):
+            li = P[s][a]
+            #letter = desc[row, col]
+            #if letter in b"GH":
+                #li.append((1.0, s, 0, True))
+            #else:
+                #if is_slippery:
+                    #for b in [(a - 1) % 4, a, (a + 1) % 4]:
+                        #li.append(
+                            #(1.0 / 3.0, *update_probability_matrix(row, col, b))
+                        #)
+                #else:
+                    #li.append((1.0, *update_probability_matrix(row, col, a)))
+            li.append((1.0, *update_probability_matrix(row, col, a)))
+
+    super().__init__(self.nS, self.nA, P, isd)
+
+  def step(self, action):
+    if self.stepCounter < self.nsteps :
+      self.stepCounter +=1
+      if self.stepCounter == self.nsteps :
+        self.done = True
+
+      super().step(action)
+
+    else :
+      pass
+
+
+
+    #b=3
+  def reset(self):
+    self.stepCounter = 0
+    self.done = False
+    super().reset()
+
+  #ef render(self, mode='human'):
+    #d=5
+  #def close(self):
+    #e=6
+  def compute_reward() :
+    return 1
+
+  '''
+  class MultiAgentEnv(gym.Env):
+
+    def step(self, action_n):
+      obs_n    = list()
+      reward_n = list()
+      done_n   = list()
+      info_n   = {'n': []}
+      # ...
+      return obs_n, reward_n, done_n, info_n
+  '''
\ No newline at end of file