Hi @arturn, sorry about that. Here is the corrected script:
from gym import Env
from gym.spaces import Box, Dict
import numpy as np
from ray.tune.registry import register_env
import ray
inf = 1e4
TICK = 0.5
GAMMA_TRACTION_MAX = 1.5
GAMMA_FS_MAX = -1.5
VMAX = 38.8
PAEX = 1000.
PAF = 1100.
LIMITE_DOMAINE = PAF + 130
SPEED_LIMIT = np.array(
[[0., 30.], [250., VMAX], [800., 20.], [PAEX, 0], [PAF, 0], [LIMITE_DOMAINE, 0]],
dtype=np.float32)
PENALTY_OVERSPEED = 10
PENALTY_DEPASSEMENT = 1000
BONUS_SPEED = 1000
EPSILON = 25
class SimuMiniEnv_v0(Env):
MAX_STEPS = 10000
def __init__(self):
self.action_space = Box(low=np.array([GAMMA_FS_MAX / 10]),
high=np.array([GAMMA_TRACTION_MAX / 10]))
spaces = {
'acceleration': Box(low=GAMMA_FS_MAX, high=GAMMA_TRACTION_MAX, shape=(1,),
dtype=np.float32),
'speed': Box(low=0, high=VMAX, shape=(1,), dtype=np.float32),
'position': Box(low=0, high=LIMITE_DOMAINE + EPSILON, shape=(1,),
dtype=np.float32),
'LV': Box(low=-inf, high=inf, shape=(6, 2), dtype=np.float32),
}
self.observation_space = Dict(spaces)
self.state = {
'acceleration': np.array([0.]),
'speed': np.array([0.]),
'position': np.array([0.]),
'LV': SPEED_LIMIT,
}
self.acceleration = [0.]
self.speed = [0.]
self.position = [0.]
self.simu_length = 0
def step(self, action):
bonus = 0
acc_next_step = self.acceleration[-1] + action[0]
if acc_next_step < GAMMA_FS_MAX:
acceleration_temp = GAMMA_TRACTION_MAX
elif acc_next_step > GAMMA_TRACTION_MAX:
acceleration_temp = GAMMA_TRACTION_MAX
else:
acceleration_temp = acc_next_step
speed_temp = acceleration_temp * TICK + self.speed[-1]
if speed_temp < 0:
acceleration_temp = self.speed[-1] / TICK
speed_temp = 0.
elif speed_temp > VMAX:
acceleration_temp = (VMAX - self.speed[-1]) / TICK
speed_temp = VMAX
self.acceleration.append(acceleration_temp)
self.speed.append(speed_temp)
self.position.append(self.speed[-1] * TICK + self.position[-1])
SL_updated = np.add(SPEED_LIMIT, np.array([-self.position[-1], 0]))
index_current_SL = [np.where(SL_updated[:, 0] <= 0)[0][-1], 1]
current_SL = SL_updated[tuple(index_current_SL)]
SL_updated[SL_updated < 0] = inf
SL_updated[index_current_SL[0], 0] = 0
for i in range(np.size(SL_updated, 0)):
if SL_updated[i, 0] == inf:
SL_updated[i, 1] = inf
done = False
self.simu_length += TICK
if self.speed[-1] >= current_SL:
bonus -= PENALTY_OVERSPEED
if self.position[-1] >= PAEX:
if self.speed[-1] <= 0.1:
done = True
bonus += self.simu_length * TICK * BONUS_SPEED
else:
bonus -= PENALTY_DEPASSEMENT / 10
if self.position[-1] >= PAF:
if self.speed[-1] <= 0.01:
done = True
else:
bonus -= PENALTY_DEPASSEMENT
done = True
if self.simu_length / TICK >= self.MAX_STEPS:
bonus -= PENALTY_DEPASSEMENT * 10
done = True
if self.position[-1] >= LIMITE_DOMAINE:
done = True
if self.speed[-1] <= 0.001:
bonus -= PENALTY_OVERSPEED / 10
info = {}
state = {
'acceleration': np.array([self.acceleration[-1]]),
'speed': np.array([self.speed[-1]]),
'position': np.array([self.position[-1]]),
'LV': SL_updated,
}
self.state = state
return self.state, bonus, done, info
def render(self):
pass
def reset(self):
self.state = {
'acceleration': np.array([0]),
'speed': np.array([0]),
'position': np.array([0]),
'LV': SPEED_LIMIT,
}
self.position = [0]
self.speed = [0]
self.acceleration = [0]
self.simu_length = 0
return self.state
register_env("SimuMini-v0", lambda config: SimuMiniEnv_v0())
results = ray.tune.run(
"PPO",
stop={"training_iteration": 300},
config={"env": "SimuMini-v0", "lr": 1e-4, "vf_clip_param": 1e5})
For the error concerning ‘grad_clip’, I think this parameter is not implemented for PPO on Tensorflow, even if it is listed on the config page for PPO. To test this error, just add 'grad_clip': 4
to the config dict at the end of the script.
However, my training on this environment is still behaving badly (same as in my first post), and I still did not find a solution (I tried several hyperparameters configurations, without success).