Skip to content
Snippets Groups Projects
Commit d0c047a1 authored by Nicolas Thellier's avatar Nicolas Thellier
Browse files

Upload New File

parent 7c73374c
No related branches found
No related tags found
No related merge requests found
#import gym
#from gym import error, spaces, utils
#from gym.utils import seeding
import numpy as np
import wget
from gym import Env, spaces
from gym.utils import seeding
#import sys
#from contextlib import closing
#import numpy as np
#from io import StringIO
#from gym import utils
from gym.envs.toy_text import discrete
# possible actions
WEST = 0
SW = 1
SOUTH = 2
SE = 3
EAST = 4
NE = 5
NORTH = 6
NW = 7
WAIT = 8
#class adsilEnv(gym.Env):
class adsilEnv(discrete.DiscreteEnv):
metadata = {'render.modes': ['human']}
def __init__(self, gridparams, agents):
# gridparams = [xbins, xlims, ylims]
# agents : list(agentID, initPos, maxsteps)
# initPos = list[x,y]
self.stepCounter = 0
self.nA = 9 # Nombre d'actions possibles
self.done = False
# discrete duration of the dynamic environement
self.nsteps = 5 # mimics in a fixed environment
# if dynamic : self.nsteps would be equal to number of env grids
# dynamic grid would be of shape nsteps x Xbins x Ybins
self.snr = np.load("../ressources/all_snr_reshaped.npy")
if gridparams is None :
gridbinx = 6
xsidekm = 6000
ysidekm = 4000
xlims = np.array([-xsidekm/2,xsidekm/2])
ylims = np.array([-ysidekm/2,ysidekm/2])
gridbiny = int(gridbinx * ysidekm/xsidekm)
nstate = int(gridbinx*gridbiny)
print("nstate",nstate)
x = np.linspace(xlims[0],xlims[1], gridbinx)
y = np.linspace(ylims[0],ylims[1], gridbiny)
print(x.shape)
print(y.shape)
xx,yy = np.meshgrid(x,y)
gridcoordx = np.reshape(xx,(-1,1))
gridcoordy = np.reshape(yy,(-1,1))
gridcoordz = np.zeros((nstate,1))
gridcoord = np.concatenate((gridcoordx,gridcoordy,gridcoordz),axis=1)
print(gridcoord[0:8,:])
#plt.scatter(gridcoord[:,0],gridcoord[:,1])
#f2 = plt.figure()
#ax = f2.gca()
#ax.scatter(gridcoord[6,0],gridcoord[6,1])
#ax.set_xlim(1.1*xlims)
#ax.set_ylim(1.1*ylims)
statemap = np.arange(0,nstate)
statemapgrid = statemap.reshape(gridbiny,gridbinx)
# ATTENTION : statemap renommé en statemapgrid
print(statemap)
print(statemapgrid)
# ATTENTION : statemap renommé en statemapgrid
#s = np.array([[0,0,0]])
#snr = - np.linalg.norm(s - gridcoord, axis = 1)
##print(snr.reshape((gridbiny,gridbinx)))
##print(snr)
self.nrow, self.ncol = statemapgrid.shape
self.nS = self.nrow * self.ncol
self.gridcoord = gridcoord
self.statemap = statemap
self.statemapgrid = statemapgrid
#self.grid = grid
#self.agents = agents
#self.reward_range = (0, 1) ## si on veut restreindre l'espace des rewards
#self.nS = nS = self.nrow * self.ncol
if agents is None :
# on définit un agent par défaut
self.agents = [0, [np.max(self.gridcoord[:,0]), np.min(self.gridcoord[:,1])], int(self.nS/2)]
#if self.agents[1] is None :
#self.agents[1] = [np.min(grid),np.min(grid)]
#if maxsteps is None :
#self.maxsteps
cond1 = self.gridcoord[:,0] == self.agents[1][0]
cond2 = self.gridcoord[:,1] == self.agents[1][1]
cond3 = self.gridcoord[:,2] == 0
#gridcoordd[1,:]
mask = cond1 & cond2 & cond3
#print(mask)
idxstart = np.where(mask)[0][0]
print(idxstart)
#self.gridcoord[idxstart,:]
self.idxstart=idxstart
isd = np.zeros(self.nS)
isd[idxstart] = 1
#isd /= isd.sum()
#self.isd = isd
a=2
print(a)
print(a*4)
P = {s: {a: [] for a in range(self.nA)} for s in range(self.nS)}
def to_s(row, col):
return row * self.ncol + col
def inc(row, col, a):
if a == WEST:
newcol = max(col - 1, 0)
newrow = row
elif a == SOUTH:
newrow = min(row + 1, self.nrow - 1)
newcol = col
elif a == EAST:
newcol = min(col + 1, self.ncol - 1)
newrow = row
elif a == NORTH:
newrow = max(row - 1, 0)
newcol = col
elif a == SW:
newcol = max(col - 1, 0)
newrow = min(row + 1, self.nrow - 1)
elif a == SE:
newrow = min(row + 1, self.nrow - 1)
newcol = min(col + 1, self.ncol - 1)
elif a == NE:
newrow = max(row - 1, 0)
newcol = min(col + 1, self.ncol - 1)
elif a == NW:
newrow = max(row - 1, 0)
newcol = max(col - 1, 0)
elif a == WAIT :
newrow = row
newcol = col
return (newrow, newcol)
# if environment not dynamic : (pas de variation de l'env au cours du temps)
def update_probability_matrix(row, col, action):
newrow, newcol = inc(row, col, action)
newstate = to_s(newrow, newcol)
#self.done = False
#if self.stepCounter == self.nsteps :
#self.done = True
#newletter = desc[newrow, newcol]
#done = bytes(newletter) in b"GH"
#done = False
#reward = float(newletter == b"G")
#reward = 10
reward = self.compute_reward()
return newstate, reward, False #self.done
# if environment not dynamic : (pas de variation de l'env au cours du temps)
for row in range(self.nrow):
for col in range(self.ncol):
s = to_s(row, col)
for a in range(self.nA):
li = P[s][a]
#letter = desc[row, col]
#if letter in b"GH":
#li.append((1.0, s, 0, True))
#else:
#if is_slippery:
#for b in [(a - 1) % 4, a, (a + 1) % 4]:
#li.append(
#(1.0 / 3.0, *update_probability_matrix(row, col, b))
#)
#else:
#li.append((1.0, *update_probability_matrix(row, col, a)))
li.append((1.0, *update_probability_matrix(row, col, a)))
super().__init__(self.nS, self.nA, P, isd)
def step(self, action):
if self.stepCounter < self.nsteps :
self.stepCounter +=1
if self.stepCounter == self.nsteps :
self.done = True
super().step(action)
else :
pass
#b=3
def reset(self):
self.stepCounter = 0
self.done = False
super().reset()
#ef render(self, mode='human'):
#d=5
#def close(self):
#e=6
def compute_reward() :
return 1
'''
class MultiAgentEnv(gym.Env):
def step(self, action_n):
obs_n = list()
reward_n = list()
done_n = list()
info_n = {'n': []}
# ...
return obs_n, reward_n, done_n, info_n
'''
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment