Transform is not working properly,agent need 84x84x4 not 84x84x3

my code:

import ray
import ray.rllib.agents.ppo as ppo
from ray.tune.logger import pretty_print
import gym
import time
from ray.rllib.models.preprocessors import get_preprocessor
# from ray.rllib.models.catalog import get_preprocessor
# from ray.rllib.models.catalog import ModelCatalog
import numpy as np

ray.init()
game = "Breakout-v0"

def Breakout():
    config1 = {
        "batch_mode": "truncate_episodes",
        "clip_param": 0.1,
        "clip_rewards": True,
        "entropy_coeff": 0.01,
        "env": game,
        "kl_coeff": 0.5,
        "lambda": 0.95,
        "num_envs_per_worker": 5,
        "num_gpus": 1,
        "num_sgd_iter": 10,
        "num_workers": 1,
        "observation_filter": "NoFilter",
        "sgd_minibatch_size": 500,
        "train_batch_size": 5000,
        "vf_clip_param": 10.0,
        "vf_share_layers": True
    }
    verifier = ppo.PPOTrainer(config=config1, env=game)
    #verifier.restore(
    #     r"/home/featurize/ray_results/PPO/PPO_Breakout-v0_a8d8c_00000_0_2021-05-28_11-06-13/checkpoint_004000/checkpoint-4000")

    env = gym.make(game)
    #env = verifier.workers.local_worker().env
    obs = env.reset()
    done = False
    prep = get_preprocessor(env.observation_space)(env.observation_space)

    verifier_reward = 0
    #padding = np.random.random([prep.shape[0], prep.shape[1], 1])
    while not done:
        observation = prep.transform(obs)
        #observation = np.concatenate((observation, padding), axis=2)
        action = verifier.compute_action(observation)
        obs, reward, done, info = env.step(action)
        verifier_reward += reward
    print('verifier_reward:', verifier_reward)


if __name__ == '__main__':
    Breakout()

error:
ValueError: Cannot feed value of shape (1, 84, 84, 3) for Tensor ‘default_policy/obs:0’, which has shape ‘(?, 84, 84, 4)’

1 Like

Hey @weileze , could you try using the DM wrappers for your env. These take care of wrapping your basic env such that the output observations are already frame-stacked (4x gray scaled 84,84,1 frames, rather then 1x 3 RGB colored 84,84 single frame).

The following code should work (tested it :slight_smile: ).

import ray
import ray.rllib.agents.ppo as ppo
import gym

from ray.rllib.env.wrappers.atari_wrappers import wrap_deepmind

ray.init()
game = "Breakout-v0"

def Breakout():
    config1 = {
        "batch_mode": "truncate_episodes",
        "clip_param": 0.1,
        "clip_rewards": True,
        "entropy_coeff": 0.01,
        "env": game,
        "kl_coeff": 0.5,
        "lambda": 0.95,
        "num_envs_per_worker": 5,
        "num_gpus": 1,
        "num_sgd_iter": 10,
        "num_workers": 0,
        "observation_filter": "NoFilter",
        "sgd_minibatch_size": 500,
        "train_batch_size": 5000,
        "vf_clip_param": 10.0,
        "vf_share_layers": True
    }
    verifier = ppo.PPOTrainer(config=config1, env=game)

    env = wrap_deepmind(
        gym.make(game),
        dim=84,
        framestack=True,
        framestack_via_traj_view_api=None)
    obs = env.reset()
    done = False

    verifier_reward = 0
    while not done:
        action = verifier.compute_action(obs)
        obs, reward, done, info = env.step(action)
        verifier_reward += reward
    print('verifier_reward:', verifier_reward)


if __name__ == '__main__':
    Breakout()

RLlib does the DM-wrapping internally. There is a setting in the config: “preprocessor_pref”, which is set to “deepmind” by default. You can set it to “rllib” to disable this.

We are planning of getting rid of RLlib’s built-in preprocessors altogether as they have been confusing to our users (and us :wink: tremendously in the past and do no real good. Preprocessing should be done in the model entirely (where it can be batched as well) or in the env itself (via gym obs filters) as done above.

Thank you for your answer, which perfectly solves my problem.
I should have worked harder to understand what each parameter in the configuration does.
I have also guessed that 84x84x4 is a gray image stacked with 4 frames, but RGB contains more information and should not directly gray the image.
:+1: :+1: :+1:

1 Like

No problem. Again, this is all very confusing and we should clean this up in due time.

:+1: :+1: :+1:
Expect next version.

1 Like