Hi @mannyv
Sorry for not providing this information.
Here is my environment:
from gym import Env
from gym.spaces import Box, Dict
import numpy as np
import matplotlib.pyplot as plt
import plotext
import math
inf=1e4
TICK = 0.5
GAMMA_TRACTION_MAX = 1.5
GAMMA_FS_MAX = -1.5
VMAX = 38.8
PAEX = 1000.
PAF = 1100.
LIMITE_DOMAINE = PAF + 130
SPEED_LIMIT = np.array([[0., 30.], [250., VMAX], [800., 20.], [PAEX, 0], [PAF, 0], [LIMITE_DOMAINE, 0]], dtype=np.float32)
PENALTY_OVERSPEED = 10
PENALTY_DEPASSEMENT = 1000
BONUS_SPEED = 1000
EPSILON = 25
class SimuMiniEnv_v0(Env):
MAX_STEPS = 10000
def __init__(self):
self.action_space = Box(low=np.array([GAMMA_FS_MAX / 10]), high=np.array([GAMMA_TRACTION_MAX / 10]))
spaces = {
'acceleration': Box(low=GAMMA_FS_MAX, high=GAMMA_TRACTION_MAX, shape=(1,), dtype=np.float32),
'speed': Box(low=0, high=VMAX, shape=(1,), dtype=np.float32),
'position': Box(low=0, high=LIMITE_DOMAINE+EPSILON, shape=(1,), dtype=np.float32),
'LV': Box(low=-inf, high=inf, shape=(6, 2), dtype=np.float32),
}
self.observation_space = Dict(spaces)
self.state = {
'acceleration': np.array([0.]),
'speed': np.array([0.]),
'position': np.array([0.]),
'LV': SPEED_LIMIT,
}
self.acceleration = [0.]
self.speed = [0.]
self.position = [0.]
def step(self, action):
bonus = 0
acc_next_step = self.acceleration[-1] + action[0]
if acc_next_step < GAMMA_FS_MAX:
acceleration_temp = GAMMA_TRACTION_MAX
elif acc_next_step > GAMMA_TRACTION_MAX:
acceleration_temp = GAMMA_TRACTION_MAX
else:
acceleration_temp = acc_next_step
speed_temp = acceleration_temp * TICK + self.speed[-1]
if speed_temp < 0:
acceleration_temp = self.speed[-1] / TICK
speed_temp = 0.
elif speed_temp > VMAX:
acceleration_temp = (VMAX - self.speed[-1]) / TICK
speed_temp = VMAX
self.acceleration.append(acceleration_temp)
self.speed.append(speed_temp)
self.position.append(self.speed[-1] * TICK + self.position[-1])
SL_updated = np.add(SPEED_LIMIT, np.array([-self.position[-1], 0]))
index_current_SL = [np.where(SL_updated[:, 0] <= 0)[0][-1], 1]
current_SL = SL_updated[tuple(index_current_SL)]
SL_updated[SL_updated < 0] = inf
SL_updated[index_current_SL[0], 0] = 0
for i in range(np.size(SL_updated, 0)):
if SL_updated[i, 0] == inf:
SL_updated[i, 1] = inf
done = False
self.simone_length += TICK
if self.speed[-1] >= current_SL:
bonus -= PENALTY_OVERSPEED
if self.position[-1] >= PAEX:
if self.speed[-1] <= 0.1:
done = True
bonus += self.simone_length * TICK * BONUS_SPEED
else:
bonus -= PENALTY_DEPASSEMENT / 10
if self.position[-1] >= PAF:
if self.speed[-1] <= 0.01:
done = True
else:
bonus -= PENALTY_DEPASSEMENT
done = True
if self.simone_length / TICK >= self.MAX_STEPS:
bonus -= PENALTY_DEPASSEMENT * 10
done = True
if self.position[-1] >= LIMITE_DOMAINE:
done = True
if self.speed[-1] <= 0.001 :
bonus -= PENALTY_OVERSPEED / 10
info = {}
state = {
'acceleration': np.array([self.acceleration[-1]]),
'speed': np.array([self.speed[-1]]),
'position': np.array([self.position[-1]]),
'LV': SL_updated,
}
self.state = state
return self.state, bonus, done, info
def render(self):
pass
def reset(self):
self.state = {
'acceleration': np.array([0]),
'speed': np.array([0]),
'position': np.array([0]),
'LV': SPEED_LIMIT,
}
self.position = [0]
self.speed = [0]
self.acceleration = [0]
return self.state
and here is how I setup my training :
register_env("SimuMini-v0", lambda config: SimuMiniEnv_v0())
results=ray.tune.run(
"PPO",
stop={"training_iteration": 300},
config={"env": "SimuMini-v0", "lr": 1e-4, "vf_clip_param": 1e8}
)
I’m using ray 1.12.1 with Python 3.9 (macOS 11 in case that’s useful)