How severe does this issue affect your experience of using Ray?
- Low: It annoys or frustrates me for a moment.
Hi all,
I am training a ppo policy using the built-in conv options + lstm wrapping, sample code:
ppo_config.model = {
"vf_share_layers": True,
"use_lstm": True,
"max_seq_len": 32,
"lstm_cell_size": 128,
"lstm_use_prev_action": True,
"conv_filters": [
[64, [12, 16], [7, 9]],
[128, [6, 6], 4],
[256, [9, 9], 1]
],
"conv_activation": "relu",
"post_fcnet_hiddens": [256],
"post_fcnet_activation": "relu"
}
But in the command window, it shows there is a [256, 256] fcnet. Is this just a default that is set, printed out but never used, or an actual bug as I was under the assumption that providing a conv_filter disables the fcnet
Current best trial: fcf3a_00000 with episode_reward_mean=-0.6460248804780876 and parameters={'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'num_gpus': 0.65, 'num_cpus_per_worker': 1, 'num_gpus_per_worker': 0, '_fake_gpus': False, 'num_learner_workers': 0, 'num_gpus_per_learner_worker': 0, 'num_cpus_per_learner_worker': 1, 'local_gpu_idx': 0, 'custom_resources_per_worker': {}, 'placement_strategy': 'PACK', 'eager_tracing': False, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'local_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'env': None, 'env_config': {'sleep': True, 'replayOn': False}, 'observation_space': Box(0.0, 1.0, (240, 320, 1), float32), 'action_space': MultiDiscrete([2 2 2 2 2 2 2 2 2]), 'env_task_fn': None, 'render_env': False, 'clip_rewards': None, 'normalize_actions': True, 'clip_actions': False, 'disable_env_checking': False, 'is_atari': False, 'auto_wrap_old_gym_envs': True, 'num_envs_per_worker': 1, 'sample_collector': <class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>, 'sample_async': False, 'enable_connectors': False, 'rollout_fragment_length': 64, 'batch_mode': 'complete_episodes', 'remote_worker_envs': False, 'remote_env_batch_wait_ms': 0, 'validate_workers_after_construction': True, 'preprocessor_pref': 'deepmind', 'observation_filter': 'NoFilter', 'synchronize_filters': True, 'compress_observations': True, 'enable_tf1_exec_eagerly': False, 'sampler_perf_stats_ema_coef': None, 'gamma': 0.998, 'lr': 9e-05, 'train_batch_size': 20480, 'model': {'_disable_preprocessor_api': False, '_disable_action_flattening': False, 'fcnet_hiddens': [256, 256], 'fcnet_activation': 'tanh', 'conv_filters': [[64, [12, 16], [7, 9]], [128, [6, 6], 4], [256, [9, 9], 1]], 'conv_activation': 'relu', 'post_fcnet_hiddens': [256], 'post_fcnet_activation': 'relu', 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': True, 'use_lstm': True, 'max_seq_len': 32, 'lstm_cell_size': 128, 'lstm_use_prev_action': True, 'lstm_use_prev_reward': False, '_time_major': False, 'use_attention': False, 'attention_num_transformer_units': 1, 'attention_dim': 64, 'attention_num_heads': 1, 'attention_head_dim': 32, 'attention_memory_inference': 50, 'attention_memory_training': 50, 'attention_position_wise_mlp_dim': 32, 'attention_init_gru_gate_bias': 2.0, 'attention_use_n_prev_actions': 0, 'attention_use_n_prev_rewards': 0, 'framestack': True, 'dim': 84, 'grayscale': False, 'zero_mean': True, 'custom_model': None, 'custom_model_config': {}, 'custom_action_dist': None, 'custom_preprocessor': None, 'encoder_latent_dim': None, 'lstm_use_prev_action_reward': -1, '_use_default_native_models': -1}, 'optimizer': {}, 'max_requests_in_flight_per_sampler_worker': 2, 'learner_class': None, '_enable_learner_api': False, '_learner_hps': PPOLearnerHPs(kl_coeff=0.2, kl_target=0.01, use_critic=True, clip_param=0.3, vf_clip_param=10.0, entropy_coeff=0.0, vf_loss_coeff=1.0, lr_schedule=None, entropy_coeff_schedule=None), 'explore': True, 'exploration_config': {'type': 'StochasticSampling'}, 'policy_states_are_swappable': False, 'input_config': {}, 'actions_in_input_normalized': False, 'postprocess_inputs': False, 'shuffle_buffer_size': 0, 'output': None, 'output_config': {}, 'output_compress_columns': ['obs', 'new_obs'], 'output_max_file_size': 67108864, 'offline_sampling': False, 'evaluation_interval': None, 'evaluation_duration': 10, 'evaluation_duration_unit': 'episodes', 'evaluation_sample_timeout_s': 180.0, 'evaluation_parallel_to_training': False, 'evaluation_config': None, 'off_policy_estimation_methods': {}, 'ope_split_batch_by_episode': True, 'evaluation_num_workers': 0, 'always_attach_evaluation_results': False, 'enable_async_evaluation': False, 'in_evaluation': False, 'sync_filters_on_rollout_workers_timeout_s': 60.0, 'keep_per_episode_custom_metrics': False, 'metrics_episode_collection_timeout_s': 60.0, 'metrics_num_episodes_for_smoothing': 100, 'min_time_s_per_iteration': None, 'min_train_timesteps_per_iteration': 0, 'min_sample_timesteps_per_iteration': 0, 'export_native_model_files': False, 'checkpoint_trainable_policies_only': False, 'logger_creator': None, 'logger_config': None, 'log_level': 'WARN', 'log_sys_usage': False, 'fake_sampler': False, 'seed': None, 'worker_cls': None, 'ignore_worker_failures': False, 'recreate_failed_workers': False, 'max_num_worker_restarts': 1000, 'delay_between_worker_restarts_s': 60.0, 'restart_failed_sub_environments': False, 'num_consecutive_worker_failures_tolerance': 100, 'worker_health_probe_timeout_s': 60, 'worker_restore_timeout_s': 1800, 'rl_module_spec': None, '_enable_rl_module_api': False, '_tf_policy_handles_more_than_one_loss': False, '_disable_preprocessor_api': False, '_disable_action_flattening': False, '_disable_execution_plan_api': True, 'simple_optimizer': True, 'replay_sequence_length': None, 'horizon': -1, 'soft_horizon': -1, 'no_done_at_end': -1, 'lr_schedule': None, 'use_critic': True, 'use_gae': True, 'kl_coeff': 0.2, 'sgd_minibatch_size': 1024, 'num_sgd_iter': 1, 'shuffle_sequences': False, 'vf_loss_coeff': 1.0, 'entropy_coeff': 0.0, 'entropy_coeff_schedule': None, 'clip_param': 0.3, 'vf_clip_param': 10.0, 'grad_clip': None, 'kl_target': 0.01, 'vf_share_layers': -1, '__stdout_file__': None, '__stderr_file__': None, 'lambda': 0.99, 'input': <function _input at 0x0000017E8A2CDC60>, 'multiagent': {'policies': {'default_policy': (None, None, None, None)}, 'policy_mapping_fn': <function AlgorithmConfig.DEFAULT_POLICY_MAPPING_FN at 0x0000017E84089FC0>, 'policies_to_train': None, 'policy_map_capacity': 100, 'policy_map_cache': -1, 'count_steps_by': 'env_steps', 'observation_fn': None}, 'callbacks': <class 'ray.rllib.algorithms.callbacks.DefaultCallbacks'>, 'create_env_on_driver': False, 'custom_eval_function': None, 'framework': 'torch', 'num_cpus_for_driver': 4, 'num_workers': 2}
→ ... 'fcnet_hiddens': [256, 256], 'fcnet_activation': 'tanh', 'conv_filters': [[64, [12, 16], [7, 9]], [128, [6, 6], 4], ...
Edit:
Not sure why, but it’s not letting me format the big output as multi-line