my code:
import ray
import ray.rllib.agents.ppo as ppo
from ray.tune.logger import pretty_print
import gym
import time
from ray.rllib.models.preprocessors import get_preprocessor
# from ray.rllib.models.catalog import get_preprocessor
# from ray.rllib.models.catalog import ModelCatalog
import numpy as np
ray.init()
game = "Breakout-v0"
def Breakout():
config1 = {
"batch_mode": "truncate_episodes",
"clip_param": 0.1,
"clip_rewards": True,
"entropy_coeff": 0.01,
"env": game,
"kl_coeff": 0.5,
"lambda": 0.95,
"num_envs_per_worker": 5,
"num_gpus": 1,
"num_sgd_iter": 10,
"num_workers": 1,
"observation_filter": "NoFilter",
"sgd_minibatch_size": 500,
"train_batch_size": 5000,
"vf_clip_param": 10.0,
"vf_share_layers": True
}
verifier = ppo.PPOTrainer(config=config1, env=game)
#verifier.restore(
# r"/home/featurize/ray_results/PPO/PPO_Breakout-v0_a8d8c_00000_0_2021-05-28_11-06-13/checkpoint_004000/checkpoint-4000")
env = gym.make(game)
#env = verifier.workers.local_worker().env
obs = env.reset()
done = False
prep = get_preprocessor(env.observation_space)(env.observation_space)
verifier_reward = 0
#padding = np.random.random([prep.shape[0], prep.shape[1], 1])
while not done:
observation = prep.transform(obs)
#observation = np.concatenate((observation, padding), axis=2)
action = verifier.compute_action(observation)
obs, reward, done, info = env.step(action)
verifier_reward += reward
print('verifier_reward:', verifier_reward)
if __name__ == '__main__':
Breakout()
error:
ValueError: Cannot feed value of shape (1, 84, 84, 3) for Tensor ‘default_policy/obs:0’, which has shape ‘(?, 84, 84, 4)’
).
tremendously in the past and do no real good. Preprocessing should be done in the model entirely (where it can be batched as well) or in the env itself (via gym obs filters) as done above.