diff --git a/envs/adsil_env.py b/envs/adsil_env.py new file mode 100644 index 0000000000000000000000000000000000000000..20ea66b5a0f5299a0ee5b275160c60e741f2f3a9 --- /dev/null +++ b/envs/adsil_env.py @@ -0,0 +1,246 @@ +#import gym +#from gym import error, spaces, utils +#from gym.utils import seeding + +import numpy as np +import wget +from gym import Env, spaces +from gym.utils import seeding + + +#import sys +#from contextlib import closing + +#import numpy as np +#from io import StringIO + +#from gym import utils +from gym.envs.toy_text import discrete + + +# possible actions +WEST = 0 +SW = 1 +SOUTH = 2 +SE = 3 +EAST = 4 +NE = 5 +NORTH = 6 +NW = 7 +WAIT = 8 + + +#class adsilEnv(gym.Env): +class adsilEnv(discrete.DiscreteEnv): + metadata = {'render.modes': ['human']} + + def __init__(self, gridparams, agents): + # gridparams = [xbins, xlims, ylims] + # agents : list(agentID, initPos, maxsteps) + # initPos = list[x,y] + self.stepCounter = 0 + self.nA = 9 # Nombre d'actions possibles + self.done = False + # discrete duration of the dynamic environement + self.nsteps = 5 # mimics in a fixed environment + # if dynamic : self.nsteps would be equal to number of env grids + # dynamic grid would be of shape nsteps x Xbins x Ybins + self.snr = np.load("../ressources/all_snr_reshaped.npy") + + if gridparams is None : + gridbinx = 6 + xsidekm = 6000 + ysidekm = 4000 + xlims = np.array([-xsidekm/2,xsidekm/2]) + ylims = np.array([-ysidekm/2,ysidekm/2]) + gridbiny = int(gridbinx * ysidekm/xsidekm) + nstate = int(gridbinx*gridbiny) + print("nstate",nstate) + + x = np.linspace(xlims[0],xlims[1], gridbinx) + y = np.linspace(ylims[0],ylims[1], gridbiny) + + print(x.shape) + print(y.shape) + + xx,yy = np.meshgrid(x,y) + + gridcoordx = np.reshape(xx,(-1,1)) + gridcoordy = np.reshape(yy,(-1,1)) + gridcoordz = np.zeros((nstate,1)) + gridcoord = np.concatenate((gridcoordx,gridcoordy,gridcoordz),axis=1) + + print(gridcoord[0:8,:]) + + #plt.scatter(gridcoord[:,0],gridcoord[:,1]) + #f2 = plt.figure() + #ax = f2.gca() + #ax.scatter(gridcoord[6,0],gridcoord[6,1]) + #ax.set_xlim(1.1*xlims) + #ax.set_ylim(1.1*ylims) + + statemap = np.arange(0,nstate) + statemapgrid = statemap.reshape(gridbiny,gridbinx) + + # ATTENTION : statemap renommé en statemapgrid + print(statemap) + print(statemapgrid) + # ATTENTION : statemap renommé en statemapgrid + + #s = np.array([[0,0,0]]) + #snr = - np.linalg.norm(s - gridcoord, axis = 1) + ##print(snr.reshape((gridbiny,gridbinx))) + ##print(snr) + + self.nrow, self.ncol = statemapgrid.shape + self.nS = self.nrow * self.ncol + self.gridcoord = gridcoord + self.statemap = statemap + self.statemapgrid = statemapgrid + #self.grid = grid + #self.agents = agents + + #self.reward_range = (0, 1) ## si on veut restreindre l'espace des rewards + + #self.nS = nS = self.nrow * self.ncol + + if agents is None : + # on définit un agent par défaut + self.agents = [0, [np.max(self.gridcoord[:,0]), np.min(self.gridcoord[:,1])], int(self.nS/2)] + + #if self.agents[1] is None : + #self.agents[1] = [np.min(grid),np.min(grid)] + #if maxsteps is None : + #self.maxsteps + + cond1 = self.gridcoord[:,0] == self.agents[1][0] + cond2 = self.gridcoord[:,1] == self.agents[1][1] + cond3 = self.gridcoord[:,2] == 0 + #gridcoordd[1,:] + mask = cond1 & cond2 & cond3 + #print(mask) + idxstart = np.where(mask)[0][0] + print(idxstart) + #self.gridcoord[idxstart,:] + self.idxstart=idxstart + isd = np.zeros(self.nS) + isd[idxstart] = 1 + #isd /= isd.sum() + + #self.isd = isd + + a=2 + print(a) + print(a*4) + + P = {s: {a: [] for a in range(self.nA)} for s in range(self.nS)} + + def to_s(row, col): + return row * self.ncol + col + + def inc(row, col, a): + if a == WEST: + newcol = max(col - 1, 0) + newrow = row + elif a == SOUTH: + newrow = min(row + 1, self.nrow - 1) + newcol = col + elif a == EAST: + newcol = min(col + 1, self.ncol - 1) + newrow = row + elif a == NORTH: + newrow = max(row - 1, 0) + newcol = col + elif a == SW: + newcol = max(col - 1, 0) + newrow = min(row + 1, self.nrow - 1) + elif a == SE: + newrow = min(row + 1, self.nrow - 1) + newcol = min(col + 1, self.ncol - 1) + elif a == NE: + newrow = max(row - 1, 0) + newcol = min(col + 1, self.ncol - 1) + elif a == NW: + newrow = max(row - 1, 0) + newcol = max(col - 1, 0) + elif a == WAIT : + newrow = row + newcol = col + + return (newrow, newcol) + + + # if environment not dynamic : (pas de variation de l'env au cours du temps) + def update_probability_matrix(row, col, action): + newrow, newcol = inc(row, col, action) + newstate = to_s(newrow, newcol) + #self.done = False + #if self.stepCounter == self.nsteps : + #self.done = True + #newletter = desc[newrow, newcol] + + #done = bytes(newletter) in b"GH" + #done = False + #reward = float(newletter == b"G") + #reward = 10 + reward = self.compute_reward() + return newstate, reward, False #self.done + + # if environment not dynamic : (pas de variation de l'env au cours du temps) + for row in range(self.nrow): + for col in range(self.ncol): + s = to_s(row, col) + for a in range(self.nA): + li = P[s][a] + #letter = desc[row, col] + #if letter in b"GH": + #li.append((1.0, s, 0, True)) + #else: + #if is_slippery: + #for b in [(a - 1) % 4, a, (a + 1) % 4]: + #li.append( + #(1.0 / 3.0, *update_probability_matrix(row, col, b)) + #) + #else: + #li.append((1.0, *update_probability_matrix(row, col, a))) + li.append((1.0, *update_probability_matrix(row, col, a))) + + super().__init__(self.nS, self.nA, P, isd) + + def step(self, action): + if self.stepCounter < self.nsteps : + self.stepCounter +=1 + if self.stepCounter == self.nsteps : + self.done = True + + super().step(action) + + else : + pass + + + + #b=3 + def reset(self): + self.stepCounter = 0 + self.done = False + super().reset() + + #ef render(self, mode='human'): + #d=5 + #def close(self): + #e=6 + def compute_reward() : + return 1 + + ''' + class MultiAgentEnv(gym.Env): + + def step(self, action_n): + obs_n = list() + reward_n = list() + done_n = list() + info_n = {'n': []} + # ... + return obs_n, reward_n, done_n, info_n + ''' \ No newline at end of file