Hi, I recently created a new environment using Rayllib’s MultiAgentEnv. However, when running a training algorithm on it, I got the following error “TypeError: ‘EnvContext’ object cannot be interpreted as an integer”. What exactly is causing this error? I assume it is the enumeration of the different environments as agents, but this came from the basic multi agent env in the examples folder.
Code:
from gym.utils import seeding, EzPickle
import gym
from gym import spaces
import numpy as np
import copy
from ray import tune
from ray.rllib.agents.ppo import PPOTrainer
class AiyagariEnvironment(gym.Env):
""" An environment for value function sampling from a basic RA GE model with capital"""
#resets state to initial value
# def u(cons):
# util = cons**(1-GAMMA)/(1-GAMMA)
# return util
# idea pass assets to multiagent, and then return interest rate back to environment.
metadata = {'render.modes':['human']}
def __init__(self):
super(AiyagariEnvironment,self).__init__()
self.reward_range =(0,np.inf)
self.seed()
#next period asset space bounds [borrow_lim, inf)
self.action_space =spaces.Box(low=np.array([BORROW_LIM]),high=np.array([np.inf]), dtype=np.float32)
# observation space -- all variables agent will observe before making new decision. Since we assume r will be fixed here, this will include here assets, prices, income. Due to assets acting as summary statistic in this model we will only provide current period assets, prices, income. We can extend this to multi-period if we wanted.
self.observation_space = spaces.Box(low=np.array([BORROW_LIM,0,0]), high=np.array([np.inf,np.inf,1]), dtype=np.float32)
#resets state to initial value
# def u(cons):
# util = cons**(1-GAMMA)/(1-GAMMA)
# return utily
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def reset(self):
self.assets = INITIAL_ASSET_HOLDINGS
self.price=R_VALUE - DELTA
self.W = W_VALUE
self.current_step=0
self.cons = 0
self.net_worth=0
self.reward=0
self.shock = np.exp(self.np_random.normal(0,1))
self.income = self.W*self.shock
self.obs = [[self.assets],[self.price],[self.income]]
self.current_step =0
#shifted exponential for time being, can impose own distribution with custom sampling later on.
#for time being will use default distribution for sampling.
return self.obs
# updating function
@property
def n(self):
return AGENT_NUM
def step(self,action,R,W):
self.current_step +=1
self.price = R - DELTA
self.W = W
self.shock = np.exp(np.random.normal(0,1))
self.income = self.W*self.shock
self.net_worth = (self.price)*self.assets + self.income
if action in self.action_space:
if action <= self.net_worth:
self.assets = action + (self.price)*self.assets
self.cons = self.net_worth-action
else:
self.assets = self.net_worth+(self.price)*self.assets
else:
raise ValueError("Received invalid action={:f} which is not part of the action space".format(action))
self.obs = [[self.assets],[self.price],[self.income]]
done =self.cons <=0
if self.cons>0:
self.reward = self.cons**(1-GAMMA)/(1-GAMMA)
else:
self.reward = -np.inf
return self.obs, self.reward, done, {}
def render(self, mode='human', close=False):
#work on render to make graph.
results = str(
f"Step: {self.current_step}\n"
f"Assets: {self.assets}\n"
f"Income: {self.income}\n"
f"Consumption: {self.cons}\n"
f"Net worth: {self.net_worth}\n"
f"Interest Rate: {self.price}\n"
f"Wage Rate: {self.W}\n"
f"Utility: {self.reward}\n")
return results
from ray.rllib.utils.typing import MultiAgentDict, AgentID
from typing import Tuple, Dict, List
from gym.envs.registration import EnvSpec
import gym
from ray.rllib.env.multi_agent_env import MultiAgentEnv
class AiyagariMultiAgentEnv(MultiAgentEnv):
def __init__(self, num):
self.agents = [AiyagariEnvironment() for _ in range(num)]
self.dones = set()
self.observation_space = gym.spaces.Box(low=np.array([BORROW_LIM,0,0]), high=np.array([np.inf,np.inf,1]), dtype=np.float32)
self.action_space = gym.spaces.Box(low=np.array([BORROW_LIM]),high=np.array([np.inf]), dtype=np.float32)
self.resetted = False
self.num = num
def reset(self):
self.resetted=True
self.dones = set()
dict_agents = {i: a.reset() for i, a in enumerate(self.agents)}
# initial holdings
self.K = sum(self.agents[i].assets for i in range(self.num))
self.N = self.num
self.R = Z*(1-ALPHA)*(self.N/self.K)**(ALPHA)
self.W = Z*(ALPHA)*(self.K/self.N)**(1-ALPHA)
for i in range(self.num):
dict_agents[i].append([self.K, self.N, self.R, self.W])
return dict_agents
def step(self, action_dict):
obs, rew, done, info = {},{},{},{}
for i, action in action_dict.items():
#get observations which is tomorrow's capital earnings. Use to construct tomorrow prices. then feedback in.
obs[i], rew[i], done[i], info[i] = self.agents[i].step(action, self.R, self.W)
#append aggregate observations to each i.
if done[i]:
self.dones.add(i)
# construct and append aggregate states
self.K = sum(obs_agent[0] for obs_agent in obs)
self.N = self.num
self.R = Z*(1-ALPHA)*(self.N/self.K)**(ALPHA)
self.W = Z*(ALPHA)*(self.K/self.N)**(1-ALPHA)
for i in range(num):
obs[i] += self.K
obs[i] += self.N
obs[i] += self.R
obs[i] += self.W
done["__all__"] = len(self.dones) == len(self.agents)
return obs, rew, done, info
def render(self, mode='human', close=True):
#TODO: work on nice render
results_n = []
for agent in self.agents:
#results += env.render(mode, close)
results = agent.render(mode,close)
results_n.append(results)
return results_n
env= AiyagariMultiAgentEnv(5)
obs = env.reset()
for items in env.render():
print(f"Agent: {env.render().index(items)+1} \n" )
print(items)
print(env.action_space)
Up to here works fine and it prints out the reset state of the agents correctly.
import ray
ray.init()
from ray.tune.registry import register_env
from ray.rllib.agents import ppo
trainer= ppo.PPOTrainer(env=AiyagariMultiAgentEnv)
while True:
print(trainer.train())
This produces error.
Full error is:
RayTaskError(TypeError): ray::RolloutWorker.foreach_policy() (pid=78609, ip=192.168.1.4)
File "python/ray/_raylet.pyx", line 422, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 456, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 459, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 463, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 415, in ray._raylet.execute_task.function_executor
File "/Users/brandonkaplowitz/opt/anaconda3/lib/python3.7/site-packages/ray/rllib/evaluation/rollout_worker.py", line 372, in __init__
self.env = _validate_env(env_creator(env_context))
File "/Users/brandonkaplowitz/opt/anaconda3/lib/python3.7/site-packages/ray/rllib/agents/trainer.py", line 1193, in <lambda>
register_env(name, lambda config: env_object(config))
File "<ipython-input-55-fe3e5d2bb727>", line 9, in __init__
TypeError: 'EnvContext' object cannot be interpreted as an integer
Line 9 in init corresponds to self.agents = [AiyagariEnvironment() for _ in range(num)]