Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Nicolas Thellier
gym-adsilbackup
Commits
d0c047a1
Commit
d0c047a1
authored
Dec 05, 2021
by
Nicolas Thellier
Browse files
Upload New File
parent
7c73374c
Changes
1
Hide whitespace changes
Inline
Side-by-side
envs/adsil_env.py
0 → 100644
View file @
d0c047a1
#import gym
#from gym import error, spaces, utils
#from gym.utils import seeding
import
numpy
as
np
import
wget
from
gym
import
Env
,
spaces
from
gym.utils
import
seeding
#import sys
#from contextlib import closing
#import numpy as np
#from io import StringIO
#from gym import utils
from
gym.envs.toy_text
import
discrete
# possible actions
WEST
=
0
SW
=
1
SOUTH
=
2
SE
=
3
EAST
=
4
NE
=
5
NORTH
=
6
NW
=
7
WAIT
=
8
#class adsilEnv(gym.Env):
class
adsilEnv
(
discrete
.
DiscreteEnv
):
metadata
=
{
'render.modes'
:
[
'human'
]}
def
__init__
(
self
,
gridparams
,
agents
):
# gridparams = [xbins, xlims, ylims]
# agents : list(agentID, initPos, maxsteps)
# initPos = list[x,y]
self
.
stepCounter
=
0
self
.
nA
=
9
# Nombre d'actions possibles
self
.
done
=
False
# discrete duration of the dynamic environement
self
.
nsteps
=
5
# mimics in a fixed environment
# if dynamic : self.nsteps would be equal to number of env grids
# dynamic grid would be of shape nsteps x Xbins x Ybins
self
.
snr
=
np
.
load
(
"../ressources/all_snr_reshaped.npy"
)
if
gridparams
is
None
:
gridbinx
=
6
xsidekm
=
6000
ysidekm
=
4000
xlims
=
np
.
array
([
-
xsidekm
/
2
,
xsidekm
/
2
])
ylims
=
np
.
array
([
-
ysidekm
/
2
,
ysidekm
/
2
])
gridbiny
=
int
(
gridbinx
*
ysidekm
/
xsidekm
)
nstate
=
int
(
gridbinx
*
gridbiny
)
print
(
"nstate"
,
nstate
)
x
=
np
.
linspace
(
xlims
[
0
],
xlims
[
1
],
gridbinx
)
y
=
np
.
linspace
(
ylims
[
0
],
ylims
[
1
],
gridbiny
)
print
(
x
.
shape
)
print
(
y
.
shape
)
xx
,
yy
=
np
.
meshgrid
(
x
,
y
)
gridcoordx
=
np
.
reshape
(
xx
,(
-
1
,
1
))
gridcoordy
=
np
.
reshape
(
yy
,(
-
1
,
1
))
gridcoordz
=
np
.
zeros
((
nstate
,
1
))
gridcoord
=
np
.
concatenate
((
gridcoordx
,
gridcoordy
,
gridcoordz
),
axis
=
1
)
print
(
gridcoord
[
0
:
8
,:])
#plt.scatter(gridcoord[:,0],gridcoord[:,1])
#f2 = plt.figure()
#ax = f2.gca()
#ax.scatter(gridcoord[6,0],gridcoord[6,1])
#ax.set_xlim(1.1*xlims)
#ax.set_ylim(1.1*ylims)
statemap
=
np
.
arange
(
0
,
nstate
)
statemapgrid
=
statemap
.
reshape
(
gridbiny
,
gridbinx
)
# ATTENTION : statemap renommé en statemapgrid
print
(
statemap
)
print
(
statemapgrid
)
# ATTENTION : statemap renommé en statemapgrid
#s = np.array([[0,0,0]])
#snr = - np.linalg.norm(s - gridcoord, axis = 1)
##print(snr.reshape((gridbiny,gridbinx)))
##print(snr)
self
.
nrow
,
self
.
ncol
=
statemapgrid
.
shape
self
.
nS
=
self
.
nrow
*
self
.
ncol
self
.
gridcoord
=
gridcoord
self
.
statemap
=
statemap
self
.
statemapgrid
=
statemapgrid
#self.grid = grid
#self.agents = agents
#self.reward_range = (0, 1) ## si on veut restreindre l'espace des rewards
#self.nS = nS = self.nrow * self.ncol
if
agents
is
None
:
# on définit un agent par défaut
self
.
agents
=
[
0
,
[
np
.
max
(
self
.
gridcoord
[:,
0
]),
np
.
min
(
self
.
gridcoord
[:,
1
])],
int
(
self
.
nS
/
2
)]
#if self.agents[1] is None :
#self.agents[1] = [np.min(grid),np.min(grid)]
#if maxsteps is None :
#self.maxsteps
cond1
=
self
.
gridcoord
[:,
0
]
==
self
.
agents
[
1
][
0
]
cond2
=
self
.
gridcoord
[:,
1
]
==
self
.
agents
[
1
][
1
]
cond3
=
self
.
gridcoord
[:,
2
]
==
0
#gridcoordd[1,:]
mask
=
cond1
&
cond2
&
cond3
#print(mask)
idxstart
=
np
.
where
(
mask
)[
0
][
0
]
print
(
idxstart
)
#self.gridcoord[idxstart,:]
self
.
idxstart
=
idxstart
isd
=
np
.
zeros
(
self
.
nS
)
isd
[
idxstart
]
=
1
#isd /= isd.sum()
#self.isd = isd
a
=
2
print
(
a
)
print
(
a
*
4
)
P
=
{
s
:
{
a
:
[]
for
a
in
range
(
self
.
nA
)}
for
s
in
range
(
self
.
nS
)}
def
to_s
(
row
,
col
):
return
row
*
self
.
ncol
+
col
def
inc
(
row
,
col
,
a
):
if
a
==
WEST
:
newcol
=
max
(
col
-
1
,
0
)
newrow
=
row
elif
a
==
SOUTH
:
newrow
=
min
(
row
+
1
,
self
.
nrow
-
1
)
newcol
=
col
elif
a
==
EAST
:
newcol
=
min
(
col
+
1
,
self
.
ncol
-
1
)
newrow
=
row
elif
a
==
NORTH
:
newrow
=
max
(
row
-
1
,
0
)
newcol
=
col
elif
a
==
SW
:
newcol
=
max
(
col
-
1
,
0
)
newrow
=
min
(
row
+
1
,
self
.
nrow
-
1
)
elif
a
==
SE
:
newrow
=
min
(
row
+
1
,
self
.
nrow
-
1
)
newcol
=
min
(
col
+
1
,
self
.
ncol
-
1
)
elif
a
==
NE
:
newrow
=
max
(
row
-
1
,
0
)
newcol
=
min
(
col
+
1
,
self
.
ncol
-
1
)
elif
a
==
NW
:
newrow
=
max
(
row
-
1
,
0
)
newcol
=
max
(
col
-
1
,
0
)
elif
a
==
WAIT
:
newrow
=
row
newcol
=
col
return
(
newrow
,
newcol
)
# if environment not dynamic : (pas de variation de l'env au cours du temps)
def
update_probability_matrix
(
row
,
col
,
action
):
newrow
,
newcol
=
inc
(
row
,
col
,
action
)
newstate
=
to_s
(
newrow
,
newcol
)
#self.done = False
#if self.stepCounter == self.nsteps :
#self.done = True
#newletter = desc[newrow, newcol]
#done = bytes(newletter) in b"GH"
#done = False
#reward = float(newletter == b"G")
#reward = 10
reward
=
self
.
compute_reward
()
return
newstate
,
reward
,
False
#self.done
# if environment not dynamic : (pas de variation de l'env au cours du temps)
for
row
in
range
(
self
.
nrow
):
for
col
in
range
(
self
.
ncol
):
s
=
to_s
(
row
,
col
)
for
a
in
range
(
self
.
nA
):
li
=
P
[
s
][
a
]
#letter = desc[row, col]
#if letter in b"GH":
#li.append((1.0, s, 0, True))
#else:
#if is_slippery:
#for b in [(a - 1) % 4, a, (a + 1) % 4]:
#li.append(
#(1.0 / 3.0, *update_probability_matrix(row, col, b))
#)
#else:
#li.append((1.0, *update_probability_matrix(row, col, a)))
li
.
append
((
1.0
,
*
update_probability_matrix
(
row
,
col
,
a
)))
super
().
__init__
(
self
.
nS
,
self
.
nA
,
P
,
isd
)
def
step
(
self
,
action
):
if
self
.
stepCounter
<
self
.
nsteps
:
self
.
stepCounter
+=
1
if
self
.
stepCounter
==
self
.
nsteps
:
self
.
done
=
True
super
().
step
(
action
)
else
:
pass
#b=3
def
reset
(
self
):
self
.
stepCounter
=
0
self
.
done
=
False
super
().
reset
()
#ef render(self, mode='human'):
#d=5
#def close(self):
#e=6
def
compute_reward
()
:
return
1
'''
class MultiAgentEnv(gym.Env):
def step(self, action_n):
obs_n = list()
reward_n = list()
done_n = list()
info_n = {'n': []}
# ...
return obs_n, reward_n, done_n, info_n
'''
\ No newline at end of file
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment