- # >> env = StatelessCartPole()
- # >> obs, info = env.reset()
- # >>
- # >> # range(2) b/c h- and c-states of the LSTM.
- # >> init_state = state = [
- # .. np.zeros([lstm_cell_size], np.float32) for _ in range(2)
- # .. ]
- # >> prev_a = 0
- # >> prev_r = 0.0
- # >>
- # >> while True:
- # >> a, state_out, _ = algo.compute_single_action(
- # .. obs, state, prev_a, prev_r)
- # >> obs, reward, done, truncated, _ = env.step(a)
- # >> if done:
- # >> obs, info = env.reset()
- # >> state = init_state
- # >> prev_a = 0
- # >> prev_r = 0.0
- # >> else:
- # >> state = state_out