@mannyv
Update: I am using python 3.9.12, pytorch 1.11.0, gym 0.21.0, ray 1.13.0 and I am using Windows
Thanks. The fix with using the model parameters works.
Below I have made a reproduction script. I do think it is a bug in the ray source code somewhere.
The below code fails with TypeError: linear(): argument 'input' (position 1) must be Tensor, not numpy.ndarray. In tower 0 on device cuda:0
But it doesn’t fail when 1 of the following conditions is true: (only 1 has to be true)
- a box space is used (see commented out code for instructions)
- When
config_simple["_disable_preprocessor_api"] = False
is set to False.
- When
config_simple["num_gpus"] = 0
is set to 0. So using cpu instead of GPU.
from typing import Dict, List
import torch.nn as nn
import gym
import numpy as np
from ray.rllib.agents.dqn.simple_q import DEFAULT_CONFIG, SimpleQTrainer
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.typing import TensorType, ModelConfigDict
from ray.tune.registry import register_env
class SimpleEnv(gym.Env):
def __init__(self):
self.shape = 4
self.action_space = gym.spaces.Discrete(self.shape) # +1 for escape action
self.observation_space = gym.spaces.Dict({"obs_1": gym.spaces.Box(low=np.float32(0),
high=np.float32(1),
shape=(self.shape,),
dtype=np.float32)})
# self.observation_space = gym.spaces.Box(low=np.float32(0),
# high=np.float32(1),
# shape=(self.shape,),
# dtype=np.float32)
def reset(self):
return {"obs_1": np.zeros(self.shape, dtype=np.float32)}
# return np.zeros(self.shape, dtype=np.float32)}
def step(self, action):
state = {"obs_1": np.zeros(self.shape, dtype=np.float32)}
# state = np.zeros(self.shape, dtype=np.float32)}
return state, 1, False, {}
class SimpleNetwork(TorchModelV2, nn.Module):
def __init__(self,
obs_space: gym.spaces.Space,
action_space: gym.spaces.Space,
num_outputs: int,
model_config: ModelConfigDict,
name: str,
**customized_model_kwargs):
TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
nn.Module.__init__(self)
self.layers = nn.Sequential(nn.Linear(in_features=4, out_features=4, bias=True),
nn.ReLU(),
nn.Linear(in_features=4, out_features=num_outputs, bias=True))
def forward(self, input_dict: Dict[str, TensorType], state: List[TensorType], seq_lens: TensorType) -> (TensorType, List[TensorType]):
obs = input_dict["obs"]
obs_1 = obs["obs_1"] # comment out if only using Box gym space and change obs to obs_1.
print(type(obs_1), obs_1.shape)
q_values = self.layers(obs_1)
return q_values, state
config_simple = DEFAULT_CONFIG.copy()
config_simple["model"] = {"custom_model": "simple_network"}
config_simple["framework"] = "torch"
ModelCatalog.register_custom_model("simple_network", SimpleNetwork)
select_env = "simple_env"
register_env(select_env, lambda config: SimpleEnv())
config_simple["env"] = select_env
config_simple["_disable_preprocessor_api"] = True
config_simple["num_gpus"] = 1
agent = SimpleQTrainer(config=config_simple, env=select_env)
for n in range(2):
result = agent.train()
agent.stop()