Commit d0c047a1 authored by Nicolas Thellier's avatar Nicolas Thellier
Browse files

Upload New File

parent 7c73374c
#import gym
#from gym import error, spaces, utils
#from gym.utils import seeding
import numpy as np
import wget
from gym import Env, spaces
from gym.utils import seeding
#import sys
#from contextlib import closing
#import numpy as np
#from io import StringIO
#from gym import utils
from gym.envs.toy_text import discrete
# possible actions
WEST = 0
SW = 1
SOUTH = 2
SE = 3
EAST = 4
NE = 5
NORTH = 6
NW = 7
WAIT = 8
#class adsilEnv(gym.Env):
class adsilEnv(discrete.DiscreteEnv):
metadata = {'render.modes': ['human']}
def __init__(self, gridparams, agents):
# gridparams = [xbins, xlims, ylims]
# agents : list(agentID, initPos, maxsteps)
# initPos = list[x,y]
self.stepCounter = 0
self.nA = 9 # Nombre d'actions possibles
self.done = False
# discrete duration of the dynamic environement
self.nsteps = 5 # mimics in a fixed environment
# if dynamic : self.nsteps would be equal to number of env grids
# dynamic grid would be of shape nsteps x Xbins x Ybins
self.snr = np.load("../ressources/all_snr_reshaped.npy")
if gridparams is None :
gridbinx = 6
xsidekm = 6000
ysidekm = 4000
xlims = np.array([-xsidekm/2,xsidekm/2])
ylims = np.array([-ysidekm/2,ysidekm/2])
gridbiny = int(gridbinx * ysidekm/xsidekm)
nstate = int(gridbinx*gridbiny)
print("nstate",nstate)
x = np.linspace(xlims[0],xlims[1], gridbinx)
y = np.linspace(ylims[0],ylims[1], gridbiny)
print(x.shape)
print(y.shape)
xx,yy = np.meshgrid(x,y)
gridcoordx = np.reshape(xx,(-1,1))
gridcoordy = np.reshape(yy,(-1,1))
gridcoordz = np.zeros((nstate,1))
gridcoord = np.concatenate((gridcoordx,gridcoordy,gridcoordz),axis=1)
print(gridcoord[0:8,:])
#plt.scatter(gridcoord[:,0],gridcoord[:,1])
#f2 = plt.figure()
#ax = f2.gca()
#ax.scatter(gridcoord[6,0],gridcoord[6,1])
#ax.set_xlim(1.1*xlims)
#ax.set_ylim(1.1*ylims)
statemap = np.arange(0,nstate)
statemapgrid = statemap.reshape(gridbiny,gridbinx)
# ATTENTION : statemap renommé en statemapgrid
print(statemap)
print(statemapgrid)
# ATTENTION : statemap renommé en statemapgrid
#s = np.array([[0,0,0]])
#snr = - np.linalg.norm(s - gridcoord, axis = 1)
##print(snr.reshape((gridbiny,gridbinx)))
##print(snr)
self.nrow, self.ncol = statemapgrid.shape
self.nS = self.nrow * self.ncol
self.gridcoord = gridcoord
self.statemap = statemap
self.statemapgrid = statemapgrid
#self.grid = grid
#self.agents = agents
#self.reward_range = (0, 1) ## si on veut restreindre l'espace des rewards
#self.nS = nS = self.nrow * self.ncol
if agents is None :
# on définit un agent par défaut
self.agents = [0, [np.max(self.gridcoord[:,0]), np.min(self.gridcoord[:,1])], int(self.nS/2)]
#if self.agents[1] is None :
#self.agents[1] = [np.min(grid),np.min(grid)]
#if maxsteps is None :
#self.maxsteps
cond1 = self.gridcoord[:,0] == self.agents[1][0]
cond2 = self.gridcoord[:,1] == self.agents[1][1]
cond3 = self.gridcoord[:,2] == 0
#gridcoordd[1,:]
mask = cond1 & cond2 & cond3
#print(mask)
idxstart = np.where(mask)[0][0]
print(idxstart)
#self.gridcoord[idxstart,:]
self.idxstart=idxstart
isd = np.zeros(self.nS)
isd[idxstart] = 1
#isd /= isd.sum()
#self.isd = isd
a=2
print(a)
print(a*4)
P = {s: {a: [] for a in range(self.nA)} for s in range(self.nS)}
def to_s(row, col):
return row * self.ncol + col
def inc(row, col, a):
if a == WEST:
newcol = max(col - 1, 0)
newrow = row
elif a == SOUTH:
newrow = min(row + 1, self.nrow - 1)
newcol = col
elif a == EAST:
newcol = min(col + 1, self.ncol - 1)
newrow = row
elif a == NORTH:
newrow = max(row - 1, 0)
newcol = col
elif a == SW:
newcol = max(col - 1, 0)
newrow = min(row + 1, self.nrow - 1)
elif a == SE:
newrow = min(row + 1, self.nrow - 1)
newcol = min(col + 1, self.ncol - 1)
elif a == NE:
newrow = max(row - 1, 0)
newcol = min(col + 1, self.ncol - 1)
elif a == NW:
newrow = max(row - 1, 0)
newcol = max(col - 1, 0)
elif a == WAIT :
newrow = row
newcol = col
return (newrow, newcol)
# if environment not dynamic : (pas de variation de l'env au cours du temps)
def update_probability_matrix(row, col, action):
newrow, newcol = inc(row, col, action)
newstate = to_s(newrow, newcol)
#self.done = False
#if self.stepCounter == self.nsteps :
#self.done = True
#newletter = desc[newrow, newcol]
#done = bytes(newletter) in b"GH"
#done = False
#reward = float(newletter == b"G")
#reward = 10
reward = self.compute_reward()
return newstate, reward, False #self.done
# if environment not dynamic : (pas de variation de l'env au cours du temps)
for row in range(self.nrow):
for col in range(self.ncol):
s = to_s(row, col)
for a in range(self.nA):
li = P[s][a]
#letter = desc[row, col]
#if letter in b"GH":
#li.append((1.0, s, 0, True))
#else:
#if is_slippery:
#for b in [(a - 1) % 4, a, (a + 1) % 4]:
#li.append(
#(1.0 / 3.0, *update_probability_matrix(row, col, b))
#)
#else:
#li.append((1.0, *update_probability_matrix(row, col, a)))
li.append((1.0, *update_probability_matrix(row, col, a)))
super().__init__(self.nS, self.nA, P, isd)
def step(self, action):
if self.stepCounter < self.nsteps :
self.stepCounter +=1
if self.stepCounter == self.nsteps :
self.done = True
super().step(action)
else :
pass
#b=3
def reset(self):
self.stepCounter = 0
self.done = False
super().reset()
#ef render(self, mode='human'):
#d=5
#def close(self):
#e=6
def compute_reward() :
return 1
'''
class MultiAgentEnv(gym.Env):
def step(self, action_n):
obs_n = list()
reward_n = list()
done_n = list()
info_n = {'n': []}
# ...
return obs_n, reward_n, done_n, info_n
'''
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment