How severe does this issue affect your experience of using Ray?
Hello everyone, I have encountered a problem. I have created a Unity ML Agent environment as follows:
link to the main code notebook : MultiAgentContiniousPCGRL/Untitled2.ipynb at main · danianamir/MultiAgentContiniousPCGRL (github.com)
class Unity3DEnv(MultiAgentEnv):
_BASE_PORT_EDITOR = 5004
_BASE_PORT_ENVIRONMENT = 5005
_WORKER_ID = 0
def __init__(
self,
file_name: str = None,
port: Optional[int] = None,
seed: int = 0,
no_graphics: bool = False,
timeout_wait: int = 300,
episode_horizon: int = None,
):
self._skip_env_checking = True
super().__init__()
if file_name is None:
print(
"No game binary provided, will use a running Unity editor "
"instead.\nMake sure you are pressing the Play (|>) button in "
"your editor to start."
)
# Try connecting to the Unity3D game instance. If a port is blocked
port_ = None
while True:
# Sleep for random time to allow for concurrent startup of many
# environments (num_workers >> 1). Otherwise, would lead to port
# conflicts sometimes.
if port_ is not None:
time.sleep(random.randint(1, 10))
port_ = port or (
self._BASE_PORT_ENVIRONMENT if file_name else self._BASE_PORT_EDITOR
)
# cache the worker_id and
# increase it for the next environment
worker_id_ = Unity3DEnv._WORKER_ID if file_name else 0
Unity3DEnv._WORKER_ID += 1
try:
self.unity_env = UnityEnvironment(
file_name=file_name,
worker_id=worker_id_,
base_port=port_,
seed=seed,
no_graphics=no_graphics,
timeout_wait=timeout_wait,
)
print("Created UnityEnvironment for port {}".format(port_ + worker_id_))
except mlagents_envs.exception.UnityWorkerInUseException:
pass
else:
break
self.episode_horizon = episode_horizon
self.episode_timesteps = 0
def step(
self, action_dict
):
for behavior_name in self.unity_env.behavior_specs:
actions = []
for agent_id in self.unity_env.get_steps(behavior_name)[0].agent_id:
key = behavior_name + "_{}".format(agent_id)
actions.append(action_dict[key])
if behavior_name=="instantiator Behaviour?team=0":
continuous_action= np.array(actions[0][0])
print(continuous_action)
discrete_action=np.array([[actions[0][1]]])
print(discrete_action)
action_tuple =ActionTuple(continuous= continuous_action , discrete= discrete_action)
if behavior_name=="modifyer Behavior?team=0":
continuous_action = np.array(actions[0][0])
print(continuous_action)
discrete_action = np.array([actions[0][1]])
print(discrete_action)
action_tuple = ActionTuple(continuous= continuous_action , discrete= discrete_action)
self.unity_env.set_actions(behavior_name, action_tuple)
self.unity_env.step()
self.episode_timesteps += 1
obs, rewards, terminateds, truncateds, infos = self._get_step_results()
return obs, rewards, terminateds, truncateds, infos
def reset(
self, *, seed=None, options=None
):
self.episode_timesteps = 0
self.unity_env.reset()
obs, _, _, _, infos = self._get_step_results()
return obs, infos
def _get_step_results(self):
# first we set the initial dict that step return
obs = {}
rewards = {}
terminated ={}
truncated={}
infos = {}
num_active=0
num_done=0
num_all=0
#go thorugh all the behavior
# return decision_step (batch of agent have similar behavior)
# return trminal steps (batch of agent have similar behavior /that end thier episode)
for behavior_name in self.unity_env.behavior_specs:
decision_steps, terminal_steps = self.unity_env.get_steps(behavior_name)
num_active=num_active+len(decision_steps)
num_done=num_done+len(terminal_steps)
num_all=num_active+num_done
#set the obs / reward from decision step for each agent like:{"behavior_name+agent_id" : numpy array}
for agent_id, idx in decision_steps.agent_id_to_index.items():
key = behavior_name + "_{}".format(agent_id)
os = tuple(o[idx] for o in decision_steps.obs)
os = os[0] if len(os) == 1 else os
obs[key] = os
rewards[key] = (decision_steps.reward[idx] + decision_steps.group_reward[idx])
#set the obs / reward from terminal_steps for each agent like:{"behavior_name+agent_id" : numpy array}
for agent_id, idx in terminal_steps.agent_id_to_index.items():
key = behavior_name + "_{}".format(agent_id)
if key not in obs:
os = tuple(o[idx] for o in terminal_steps.obs)
obs[key] = os = os[0] if len(os) == 1 else os
rewards[key] = (terminal_steps.reward[idx] + terminal_steps.group_reward[idx])
#infos={"acive":num_active , "done":num_done , "all":num_all }
# set the terminated if all agent end thier own episod like: {"__all__":True}
if(num_active>0):
terminated["__all__"]=False
else:
terminated["__all__"]=True
# set the truncated /if the step of the env reach the horizen like{"__all__":True}
if self.episode_timesteps == self.episode_horizon:
truncated["__all__"]=True
else:
truncated["__all__"]=False
# return
return obs, rewards, terminated, truncated, infos
Then I utilized the rolloutworker
to randomly sample from the environment, like this:
continuous_action_space_initializer = spaces.Box(low=-100, high=100, shape=(2,), dtype=float)
discrete_action_space_initializer = spaces.Discrete(3)
continuous_action_space_modifier=spaces.Box(low=-100, high=100, shape=(2,), dtype=float)
multi_discrete_action_space_modifier=spaces.MultiDiscrete([6,2])
policies = { "policy_initializer": (RandomPolicy,spaces.Box(float("-inf"), float("inf"), (28,)),
spaces.Tuple((continuous_action_space_initializer, discrete_action_space_initializer)),
AlgorithmConfig())
,"policy_modifier":(RandomPolicy,spaces.Box(float("-inf"), float("inf"), (28,)),
spaces.Tuple((continuous_action_space_modifier,multi_discrete_action_space_modifier)),
AlgorithmConfig())
}
def policy_mapping_fn(agent_id, episode, worker, **kwargs):
if agent_id == 0:
return "policy_initializer"
else:
return "policy_modifier"
config=AlgorithmConfig().multi_agent(policies=policies,policy_mapping_fn=policy_mapping_fn).rollouts(rollout_fragment_length=10)
worker=RolloutWorker(env_creator=lambda _: Unity3DEnv(file_name="/content/MultiAgentContiniousPCGRL/unity_built/my_game.x86_64",episode_horizon = 10)
,config=config)
multi_batch=worker.sample()
but when i run the above code i got this error :
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/usr/local/lib/python3.10/dist-packages/tree/__init__.py in assert_same_structure(a, b, check_types)
283 try:
--> 284 _tree.assert_same_structure(a, b, check_types)
285 except (ValueError, TypeError) as e:
ValueError: The two structures don't have the same nested structure.
First structure: type=ndarray str=[[92.19893 -4.3250813]
[ 3. 1. ]]
Second structure: type=tuple str=(Box(-100.0, 100.0, (2,), float64), MultiDiscrete([6 2]))
More specifically: Substructure "type=tuple str=(Box(-100.0, 100.0, (2,), float64), MultiDiscrete([6 2]))" is a sequence, while substructure "type=ndarray str=[[92.19893 -4.3250813]
[ 3. 1. ]]" is not
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
12 frames
/usr/local/lib/python3.10/dist-packages/tree/__init__.py in assert_same_structure(a, b, check_types)
286 str1 = str(map_structure(lambda _: _DOT, a))
287 str2 = str(map_structure(lambda _: _DOT, b))
--> 288 raise type(e)("%s\n"
289 "Entire first structure:\n%s\n"
290 "Entire second structure:\n%s"
ValueError: The two structures don't have the same nested structure.
First structure: type=ndarray str=[[92.19893 -4.3250813]
[ 3. 1. ]]
Second structure: type=tuple str=(Box(-100.0, 100.0, (2,), float64), MultiDiscrete([6 2]))
More specifically: Substructure "type=tuple str=(Box(-100.0, 100.0, (2,), float64), MultiDiscrete([6 2]))" is a sequence, while substructure "type=ndarray str=[[92.19893 -4.3250813]
[ 3. 1. ]]" is not
Entire first structure:
.
Entire second structure:
(., .)
what is the problem ? i think the policy ouput actions are in the numpyarry structre like :
str=[[92.19893 -4.3250813][ 3. 1. ]]
but the environment accept the tuple action sapce like :
(tuple str=(Box(-100.0, 100.0, (2,), float64), MultiDiscrete([6 2]))
I would appreciate any guidance on how I can modify my environment to accept the policy output. Thank you.