my code:
import ray
import ray.rllib.agents.ppo as ppo
from ray.tune.logger import pretty_print
import gym
import time
from ray.rllib.models.preprocessors import get_preprocessor
# from ray.rllib.models.catalog import get_preprocessor
# from ray.rllib.models.catalog import ModelCatalog
import numpy as np
ray.init()
game = "Breakout-v0"
def Breakout():
config1 = {
"batch_mode": "truncate_episodes",
"clip_param": 0.1,
"clip_rewards": True,
"entropy_coeff": 0.01,
"env": game,
"kl_coeff": 0.5,
"lambda": 0.95,
"num_envs_per_worker": 5,
"num_gpus": 1,
"num_sgd_iter": 10,
"num_workers": 1,
"observation_filter": "NoFilter",
"sgd_minibatch_size": 500,
"train_batch_size": 5000,
"vf_clip_param": 10.0,
"vf_share_layers": True
}
verifier = ppo.PPOTrainer(config=config1, env=game)
#verifier.restore(
# r"/home/featurize/ray_results/PPO/PPO_Breakout-v0_a8d8c_00000_0_2021-05-28_11-06-13/checkpoint_004000/checkpoint-4000")
env = gym.make(game)
#env = verifier.workers.local_worker().env
obs = env.reset()
done = False
prep = get_preprocessor(env.observation_space)(env.observation_space)
verifier_reward = 0
#padding = np.random.random([prep.shape[0], prep.shape[1], 1])
while not done:
observation = prep.transform(obs)
#observation = np.concatenate((observation, padding), axis=2)
action = verifier.compute_action(observation)
obs, reward, done, info = env.step(action)
verifier_reward += reward
print('verifier_reward:', verifier_reward)
if __name__ == '__main__':
Breakout()
error:
ValueError: Cannot feed value of shape (1, 84, 84, 3) for Tensor ‘default_policy/obs:0’, which has shape ‘(?, 84, 84, 4)’