I’m trying to run the PPO algorithm on my custom gym environment (I’m new to new to RL). first I wrote a gyn env for my robotic dog, you can see it here:
import gym
from gym import error, spaces, utils
from gym.utils import seeding
import numpy as np
import random
from gym_dog.envs.mujoco import mujoco_env
from ray.rllib.agents.ppo import PPOTrainer
class DogEnv2(mujoco_env.MujocoEnv, utils.EzPickle, gym.Env):
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, "./go1/xml/go1.xml", 5)
utils.EzPickle.__init__(self)
def step(self, action):
xposbefore = self.sim.data.qpos[0]
self.do_simulation(action, self.frame_skip)
xposafter = self.sim.data.qpos[0]
ob = self._get_obs()
reward_ctrl = -0.1 * np.square(action).sum()
reward_run = (xposafter - xposbefore) / self.dt
reward = reward_ctrl + reward_run
done = False
return ob, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl)
def _get_obs(self):
return np.concatenate(
[
self.sim.data.qpos.flat[1:],
self.sim.data.qvel.flat,
]
)
def reset_model(self):
return self._get_obs()
def viewer_setup(self):
self.viewer.cam.distance = self.model.stat.extent * 0.5
then, I wrote my main file like this:
import numpy as np
import gym
import mujoco_py
import random
import sys
sys.path.insert(1, '/home/mosaic-challenge/shirelle_ws/gym_shirelle/gym-dog')
import gym_dog
from gym_dog.envs.dog_env_2 import DogEnv2
import ray
from ray.rllib.agents import ppo
import tensorflow as tf
from ray.tune.registry import register_env
from ray import tune
from ray.tune.logger import pretty_print
import time
def create_my_env():
import gym
# from gym_dog.envs.dog_env_2 import DogEnv2
env = gym.make('dog-v2')
return env
ray.init()
env_creator = lambda config: create_my_env()
register_env('DogEnv2', env_creator)
ppo_config = ppo.DEFAULT_CONFIG.copy()
trainer = ppo.PPOTrainer(config=ppo_config, env="DogEnv2")
for _ in range(10):
result = trainer.train()
print(pretty_print(result))
ray.shutdown()
I’m getting- a raise error.UnregisteredEnv(‘No registered env with id: {}’.format(id))
what is wrong with my registration?
I also tried the following:
ray.init()
env_creator = lambda config: create_my_env()
register_env('DogEnv2', env_creator)
tune.run(
"PPO",
stop={"episode_reward_mean": 200},
config={
"env": env_creator,
"num_workers": 1,
},
)
and then I’m getting this error:
2022-05-15 09:54:40,617 INFO trial_runner.py:803 -- starting PPO_<function <lambda> at 0x7f43ecac4550>_e473d_00000
== Status ==
Current time: 2022-05-15 09:54:42 (running for 00:00:01.94)
Memory usage on this node: 8.1/62.6 GiB
Using FIFO scheduling algorithm.
Resources requested: 2.0/16 CPUs, 0/1 GPUs, 0.0/33.81 GiB heap, 0.0/16.9 GiB objects
Result logdir: /home/mosaic-challenge/ray_results/PPO
Number of trials: 1/1 (1 RUNNING)
+-------------------------------------------------------+----------+-------+
| Trial name | status | loc |
|-------------------------------------------------------+----------+-------|
| PPO_<function <lambda> at 0x7f43ecac4550>_e473d_00000 | RUNNING | |
+-------------------------------------------------------+----------+-------+
2022-05-15 09:54:42,410 ERROR trial_runner.py:876 -- Trial PPO_<function <lambda> at 0x7f43ecac4550>_e473d_00000: Error processing event.
NoneType: None
Result for PPO_<function <lambda> at 0x7f43ecac4550>_e473d_00000:
trial_id: e473d_00000
== Status ==
Current time: 2022-05-15 09:54:42 (running for 00:00:01.95)
Memory usage on this node: 8.1/62.6 GiB
Using FIFO scheduling algorithm.
Resources requested: 0/16 CPUs, 0/1 GPUs, 0.0/33.81 GiB heap, 0.0/16.9 GiB objects
Result logdir: /home/mosaic-challenge/ray_results/PPO
Number of trials: 1/1 (1 ERROR)
+-------------------------------------------------------+----------+-------+
| Trial name | status | loc |
|-------------------------------------------------------+----------+-------|
| PPO_<function <lambda> at 0x7f43ecac4550>_e473d_00000 | ERROR | |
+-------------------------------------------------------+----------+-------+
Number of errored trials: 1
+-------------------------------------------------------+--------------+------------------------------------------------------------------------------------------------------------------------------+
| Trial name | # failures | error file |
|-------------------------------------------------------+--------------+------------------------------------------------------------------------------------------------------------------------------|
| PPO_<function <lambda> at 0x7f43ecac4550>_e473d_00000 | 1 | /home/mosaic-challenge/ray_results/PPO/PPO_<function <lambda> at 0x7f43ecac4550>_e473d_00000_0_2022-05-15_09-54-40/error.txt |
+-------------------------------------------------------+--------------+------------------------------------------------------------------------------------------------------------------------------+
2022-05-15 09:54:42,412 ERROR ray_trial_executor.py:102 -- An exception occurred when trying to stop the Ray actor:Traceback (most recent call last):
File "/home/mosaic-challenge/anaconda3/envs/python3.8/lib/python3.8/site-packages/ray/tune/ray_trial_executor.py", line 93, in post_stop_cleanup
ray.get(future, timeout=0)
File "/home/mosaic-challenge/anaconda3/envs/python3.8/lib/python3.8/site-packages/ray/_private/client_mode_hook.py", line 105, in wrapper
return func(*args, **kwargs)
File "/home/mosaic-challenge/anaconda3/envs/python3.8/lib/python3.8/site-packages/ray/worker.py", line 1811, in get
raise value
ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, ray::PPOTrainer.__init__() (pid=29183, ip=132.72.112.217, repr=PPOTrainer)
File "/home/mosaic-challenge/anaconda3/envs/python3.8/lib/python3.8/site-packages/ray/rllib/agents/trainer.py", line 767, in __init__
self._env_id: Optional[str] = self._register_if_needed(
File "/home/mosaic-challenge/anaconda3/envs/python3.8/lib/python3.8/site-packages/ray/rllib/agents/trainer.py", line 2810, in _register_if_needed
raise ValueError(
ValueError: <function <lambda> at 0x7f3fbd2ba310> is an invalid env specification. You can specify a custom env as either a class (e.g., YourEnvCls) or a registered env id (e.g., "your_env").
(PPOTrainer pid=29183) 2022-05-15 09:54:42,407 ERROR worker.py:449 -- Exception raised in creation task: The actor died because of an error raised in its creation task, ray::PPOTrainer.__init__() (pid=29183, ip=132.72.112.217, repr=PPOTrainer)
(PPOTrainer pid=29183) File "/home/mosaic-challenge/anaconda3/envs/python3.8/lib/python3.8/site-packages/ray/rllib/agents/trainer.py", line 767, in __init__
(PPOTrainer pid=29183) self._env_id: Optional[str] = self._register_if_needed(
(PPOTrainer pid=29183) File "/home/mosaic-challenge/anaconda3/envs/python3.8/lib/python3.8/site-packages/ray/rllib/agents/trainer.py", line 2810, in _register_if_needed
(PPOTrainer pid=29183) raise ValueError(
(PPOTrainer pid=29183) ValueError: <function <lambda> at 0x7f3fbd2ba310> is an invalid env specification. You can specify a custom env as either a class (e.g., YourEnvCls) or a registered env id (e.g., "your_env").
Traceback (most recent call last):
File "main.py", line 44, in <module>
tune.run(
File "/home/mosaic-challenge/anaconda3/envs/python3.8/lib/python3.8/site-packages/ray/tune/tune.py", line 695, in run
raise TuneError("Trials did not complete", incomplete_trials)
ray.tune.error.TuneError: ('Trials did not complete', [PPO_<function <lambda> at 0x7f43ecac4550>_e473d_00000])