Hi guys!
How would you scale the reward in Rllib? (+ clipping) I know that Stable Baselines has something like that: stable-baselines3/vec_normalize.py at 237223f834fe9b8143ea24235d087c4e32addd2f · DLR-RM/stable-baselines3 · GitHub
I found the RunningStat class in ray.rllib.utils.filter and tried to write an env wrapper like so:
import gym
import numpy as np
from ray.rllib.utils.filter import RunningStat
class NormalizeReward(gym.RewardWrapper):
GAMMA = 0.9
CLIP = 10
EPSILON = 1e-8
def __init__(self, env):
super().__init__(env)
shape = env.observation_space.shape
self.running_stats = RunningStat(shape)
self.ret = np.zeros(shape) # return
def reward(self, reward):
return self._normalized_reward(reward)
def _normalized_reward(self, reward):
self.ret = NormalizeReward.GAMMA * self.ret + reward
self.running_stats.push(self.ret)
reward = reward / (self.running_stats.std + NormalizeReward.EPSILON) # add self.epsilon to avoid dividing by zero
# reward = np.clip(reward, -NormalizeReward.CLIP, NormalizeReward.CLIP)
return reward
How would you solve that? And how would I make it work with parallel envs?