Environment check throws Entire first structure: (., {}) Entire second structure:

But when I do the same thing with Ray tune Ray-tune-project/lstm.py at main · Athe-kunal/Ray-tune-project · GitHub, it is failing with the following error

First structure: type=tuple str=(array([2.2788222e+02, 2.9840982e-01, 0.0000000e+00, 2.7170289e-01,
        1.4102402e+00, 1.2217970e+00, 1.8251243e+00, 1.0809784e+00,
        8.5718751e-01, 2.5914115e-01, 1.3132960e+00, 1.0967580e+00,
        8.3406353e-01, 2.3491893e+00, 1.0383877e+00, 1.1138891e+00,
        1.8886892e+00, 3.0702892e-01, 1.1083301e+00, 7.0789695e-01,
        4.7711730e-01, 1.1759629e+00, 1.6288950e+00, 5.5352813e-01,
        4.9202183e-01, 5.5303156e-01, 9.6180123e-01, 1.1277000e+00,
        1.0114505e+00, 8.0985188e-01, 4.9966720e-01, 6.7659909e-01,
        1.0117418e+00, 2.0312500e-01, 2.8125000e-01, 0.0000000e+00,
        2.8125000e-01, 7.8125000e-01, 7.9687500e-01, 9.3750000e-02,
        3.5937500e-01, 2.9687500e-01, 6.8750000e-01, 4.6875000e-02,
        5.4687500e-01, 2.8125000e-01, 6.8750000e-01, 1.2500000e-01,
        4.8437500e-01, 7.5000000e-01, 6.8750000e-01, 6.8750000e-01,
        5.3125000e-01, 2.1875000e-01, 8.4375000e-01, 6.4062500e-01,
        8.7500000e-01, 5.7812500e-01, 7.0312500e-01, 6.8750000e-01,
        9.2187500e-01, 9.8437500e-01, 2.8125000e-01, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.5433314e-01,
        1.5433314e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 9.0468752e-01,
        9.0468752e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 6.9882810e-01,
        6.9882810e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.0677344e+00,
        1.0677344e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 7.0210940e-01,
        7.0210940e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 4.2859375e-01,
        4.2859375e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.7187500e-01,
        1.7187500e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 9.6984375e-01,
        9.6984375e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 5.9585935e-01,
        5.9585935e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 4.4359374e-01,
        4.4359374e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.3819531e+00,
        1.3819531e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 6.4078122e-01,
        6.4078122e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 6.7272139e-01,
        6.7272139e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.3857105e+00,
        1.3857105e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 2.0148438e-01,
        2.0148438e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 7.1117187e-01,
        7.1117187e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 4.5476562e-01,
        4.5476562e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 3.1765625e-01,
        3.1765625e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 7.5320315e-01,
        7.5320315e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.0791407e+00,
        1.0791407e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 3.6893189e-01,
        3.6893189e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 2.9031250e-01,
        2.9031250e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 3.0562499e-01,
        3.0562499e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 6.2921876e-01,
        6.2921876e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 6.9789064e-01,
        6.9789064e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 5.8257812e-01,
        5.8257812e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 4.3166015e-01,
        4.3166015e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 3.8281250e-01,
        3.8281250e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 4.4242188e-01,
        4.4242188e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 6.1648440e-01,
        6.1648440e-01], dtype=float32), {})
 
 Second structure: type=ndarray str=[ -792.2486   -2144.3738    -129.80092   2915.458    -1308.43
   1269.5146    2074.0398    1901.5674    2501.5889    1164.0255
   -322.9323    1597.6952   -2573.6863   -2746.5698    2014.0723
  -1753.0637    -850.8143    2718.6846    2060.1797     258.4059
    545.0819    2851.2747   -1561.0944     349.4782   -1130.0878
   -403.55383   1070.6107    1372.8518   -2732.747     2289.9187
   2586.476    -1875.0045   -1937.3246    1576.0361    1310.3674
   -774.18475   -803.65875   2640.113     -840.6407    2802.322
     19.350145  2128.231     -979.46356     31.213558 -1505.1682
    208.86848   -780.9078   -1926.5431    2011.087     -629.80927
   1189.3198    2154.1665    2446.9292   -1517.6228    -167.33015
   -185.99898   1706.5778     824.91113   1430.8196     289.54074
    -83.10947  -2880.832    -1074.4584    2531.774     -290.6278
    329.9323   -1801.7854   -2637.3906      16.099054  -168.64241
   -125.876114 -1034.4384    1642.2655     351.62454    509.90973
    360.6945    -697.22485  -1484.3866   -1492.6287    -459.1929
  -2391.1506   -1728.1654    -764.98883  -1659.634    -2860.677
   -773.3472    2364.6418     916.4717     507.7752    1981.0371
   -257.88364   -637.90393   -901.27203  -1614.2391    -721.26996
   1833.0618   -1154.7478     969.5041     175.16115    223.94052
  -2363.3438     286.42517   1502.3491   -1839.2252     990.35913
   1965.7655    2190.7688     -74.32482  -1554.0822   -1204.554
    359.37643   1198.4722    -960.0168    1388.8052    1451.6226
  -2596.9067    -851.9495   -2175.7922    1587.5688    -322.92984
   1295.961    -2309.2183    2583.9165    2652.8699    2667.5493
   2502.1238    2813.4495    -842.62805   -242.12244   1219.3545
   2137.8694    -829.9557    1579.9534    2924.1921   -1282.7042
   2918.0173   -1075.8888    1481.3325    1201.7566     689.5607
  -1504.4034    1773.555     2896.91      2875.2483    -138.64302
  -2744.6174    -629.5382    1995.0635   -1050.1123     -73.988846
   -825.6564   -2097.2258    -809.13696  -2182.993    -2932.378
  -1921.0228    2604.5742    1624.0813   -2238.1482   -2156.0967
  -1990.5931   -2145.9197    2912.961     -726.6033    1492.1472
   1311.7047   -1292.6624     676.77026   -419.49207  -1351.4688
  -2679.772      920.6515   -1701.5579   -2236.0894    2608.5308
   -522.4806    -729.45245  -1825.2494    1263.7957    -882.4481
    338.66553  -2045.2618   -1575.5939     839.1834    -764.1029
   -687.5354   -2897.113    -2659.8728   -1605.8586    1714.4854
   1740.4427    2742.6748     367.4229    1100.9697   -2560.8313
   1445.6852   -1679.0385   -2801.7346     895.27704   1424.2039
  -2685.7834     634.448     1515.6146   -2839.2126   -2124.1313
   -331.8107   -2572.0156    2901.6733    1867.6338    -889.7915
   2168.2368   -2892.2544    2699.993    -1446.0782   -2005.3076
   -287.88196   -571.70496  -1412.371     1470.7764    1845.962
    321.17136    -32.960793  -486.28726  -2803.8381   -2929.3594
    934.08813    501.7609     391.37888    420.77762  -1653.7288
  -2707.625    -2399.6025    -514.26886   2206.8079   -1886.1824
   2690.6538    -398.5362     715.6277    1739.5406   -2561.3362
     74.25057   -945.2912     634.98395    663.10583    114.414795
    384.81158  -2277.277     1975.9971    1328.6526     819.96796
   1112.7996      55.950527 -1017.8577   -1035.914     1011.9785
    714.5928    1143.4572    -772.09064   1600.602     -283.70132
   -463.30814   -519.4983     -96.57187    662.2404     518.65125
    440.34192   -543.07623   1639.1079    1398.8292    2889.071
  -2044.9774     284.34378   1523.3135   -2078.8965     269.63492
   -990.6091    -844.06244  -1025.7518     769.9258   -2966.9124
  -1770.2169   -2029.7166     -11.077788 -1954.0278     146.9773
   1446.9406    2624.8562    2317.0337      51.89259    603.17914
  -1323.7025    1419.9907   -2526.2334    1230.5354    2883.4102
  -1753.4723    1794.5089     613.6288    2328.141     2853.8284
   2666.7334   -1498.047     1295.5944     967.1999   -2580.7476
  -2294.3696    1557.7621    2936.6091   -1691.9373   -1991.3083
   -895.6827    2094.7136   -1278.9231     -96.7892    -472.42078
   1660.7162      75.07274  -2942.1597   -1975.431    -2479.6326
   2608.9258    2222.379     1456.2847   -1376.8156      83.49044
   -281.9596   -1511.0402    -807.6998   -1640.7294     673.7634
  -1975.97      2338.9548   -2384.6924  ]
 
 More specifically: Substructure "type=tuple str=(array([2.2788222e+02, 2.9840982e-01, 0.0000000e+00, 2.7170289e-01,
        1.4102402e+00, 1.2217970e+00, 1.8251243e+00, 1.0809784e+00,
        8.5718751e-01, 2.5914115e-01, 1.3132960e+00, 1.0967580e+00,
        8.3406353e-01, 2.3491893e+00, 1.0383877e+00, 1.1138891e+00,
        1.8886892e+00, 3.0702892e-01, 1.1083301e+00, 7.0789695e-01,
        4.7711730e-01, 1.1759629e+00, 1.6288950e+00, 5.5352813e-01,
        4.9202183e-01, 5.5303156e-01, 9.6180123e-01, 1.1277000e+00,
        1.0114505e+00, 8.0985188e-01, 4.9966720e-01, 6.7659909e-01,
        1.0117418e+00, 2.0312500e-01, 2.8125000e-01, 0.0000000e+00,
        2.8125000e-01, 7.8125000e-01, 7.9687500e-01, 9.3750000e-02,
        3.5937500e-01, 2.9687500e-01, 6.8750000e-01, 4.6875000e-02,
        5.4687500e-01, 2.8125000e-01, 6.8750000e-01, 1.2500000e-01,
        4.8437500e-01, 7.5000000e-01, 6.8750000e-01, 6.8750000e-01,
        5.3125000e-01, 2.1875000e-01, 8.4375000e-01, 6.4062500e-01,
        8.7500000e-01, 5.7812500e-01, 7.0312500e-01, 6.8750000e-01,
        9.2187500e-01, 9.8437500e-01, 2.8125000e-01, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.5433314e-01,
        1.5433314e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 9.0468752e-01,
        9.0468752e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 6.9882810e-01,
        6.9882810e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.0677344e+00,
        1.0677344e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 7.0210940e-01,
        7.0210940e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 4.2859375e-01,
        4.2859375e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.7187500e-01,
        1.7187500e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 9.6984375e-01,
        9.6984375e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 5.9585935e-01,
        5.9585935e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 4.4359374e-01,
        4.4359374e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.3819531e+00,
        1.3819531e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 6.4078122e-01,
        6.4078122e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 6.7272139e-01,
        6.7272139e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.3857105e+00,
        1.3857105e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 2.0148438e-01,
        2.0148438e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 7.1117187e-01,
        7.1117187e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 4.5476562e-01,
        4.5476562e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 3.1765625e-01,
        3.1765625e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 7.5320315e-01,
        7.5320315e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.0791407e+00,
        1.0791407e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 3.6893189e-01,
        3.6893189e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 2.9031250e-01,
        2.9031250e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 3.0562499e-01,
        3.0562499e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 6.2921876e-01,
        6.2921876e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 6.9789064e-01,
        6.9789064e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 5.8257812e-01,
        5.8257812e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 4.3166015e-01,
        4.3166015e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 3.8281250e-01,
        3.8281250e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 4.4242188e-01,
        4.4242188e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 6.1648440e-01,
        6.1648440e-01], dtype=float32), {})" is a sequence, while substructure "type=ndarray str=[ -792.2486   -2144.3738    -129.80092   2915.458    -1308.43
   1269.5146    2074.0398    1901.5674    2501.5889    1164.0255
   -322.9323    1597.6952   -2573.6863   -2746.5698    2014.0723
  -1753.0637    -850.8143    2718.6846    2060.1797     258.4059
    545.0819    2851.2747   -1561.0944     349.4782   -1130.0878
   -403.55383   1070.6107    1372.8518   -2732.747     2289.9187
   2586.476    -1875.0045   -1937.3246    1576.0361    1310.3674
   -774.18475   -803.65875   2640.113     -840.6407    2802.322
     19.350145  2128.231     -979.46356     31.213558 -1505.1682
    208.86848   -780.9078   -1926.5431    2011.087     -629.80927
   1189.3198    2154.1665    2446.9292   -1517.6228    -167.33015
   -185.99898   1706.5778     824.91113   1430.8196     289.54074
    -83.10947  -2880.832    -1074.4584    2531.774     -290.6278
    329.9323   -1801.7854   -2637.3906      16.099054  -168.64241
   -125.876114 -1034.4384    1642.2655     351.62454    509.90973
    360.6945    -697.22485  -1484.3866   -1492.6287    -459.1929
  -2391.1506   -1728.1654    -764.98883  -1659.634    -2860.677
   -773.3472    2364.6418     916.4717     507.7752    1981.0371
   -257.88364   -637.90393   -901.27203  -1614.2391    -721.26996
   1833.0618   -1154.7478     969.5041     175.16115    223.94052
  -2363.3438     286.42517   1502.3491   -1839.2252     990.35913
   1965.7655    2190.7688     -74.32482  -1554.0822   -1204.554
    359.37643   1198.4722    -960.0168    1388.8052    1451.6226
  -2596.9067    -851.9495   -2175.7922    1587.5688    -322.92984
   1295.961    -2309.2183    2583.9165    2652.8699    2667.5493
   2502.1238    2813.4495    -842.62805   -242.12244   1219.3545
   2137.8694    -829.9557    1579.9534    2924.1921   -1282.7042
   2918.0173   -1075.8888    1481.3325    1201.7566     689.5607
  -1504.4034    1773.555     2896.91      2875.2483    -138.64302
  -2744.6174    -629.5382    1995.0635   -1050.1123     -73.988846
   -825.6564   -2097.2258    -809.13696  -2182.993    -2932.378
  -1921.0228    2604.5742    1624.0813   -2238.1482   -2156.0967
  -1990.5931   -2145.9197    2912.961     -726.6033    1492.1472
   1311.7047   -1292.6624     676.77026   -419.49207  -1351.4688
  -2679.772      920.6515   -1701.5579   -2236.0894    2608.5308
   -522.4806    -729.45245  -1825.2494    1263.7957    -882.4481
    338.66553  -2045.2618   -1575.5939     839.1834    -764.1029
   -687.5354   -2897.113    -2659.8728   -1605.8586    1714.4854
   1740.4427    2742.6748     367.4229    1100.9697   -2560.8313
   1445.6852   -1679.0385   -2801.7346     895.27704   1424.2039
  -2685.7834     634.448     1515.6146   -2839.2126   -2124.1313
   -331.8107   -2572.0156    2901.6733    1867.6338    -889.7915
   2168.2368   -2892.2544    2699.993    -1446.0782   -2005.3076
   -287.88196   -571.70496  -1412.371     1470.7764    1845.962
    321.17136    -32.960793  -486.28726  -2803.8381   -2929.3594
    934.08813    501.7609     391.37888    420.77762  -1653.7288
  -2707.625    -2399.6025    -514.26886   2206.8079   -1886.1824
   2690.6538    -398.5362     715.6277    1739.5406   -2561.3362
     74.25057   -945.2912     634.98395    663.10583    114.414795
    384.81158  -2277.277     1975.9971    1328.6526     819.96796
   1112.7996      55.950527 -1017.8577   -1035.914     1011.9785
    714.5928    1143.4572    -772.09064   1600.602     -283.70132
   -463.30814   -519.4983     -96.57187    662.2404     518.65125
    440.34192   -543.07623   1639.1079    1398.8292    2889.071
  -2044.9774     284.34378   1523.3135   -2078.8965     269.63492
   -990.6091    -844.06244  -1025.7518     769.9258   -2966.9124
  -1770.2169   -2029.7166     -11.077788 -1954.0278     146.9773
   1446.9406    2624.8562    2317.0337      51.89259    603.17914
  -1323.7025    1419.9907   -2526.2334    1230.5354    2883.4102
  -1753.4723    1794.5089     613.6288    2328.141     2853.8284
   2666.7334   -1498.047     1295.5944     967.1999   -2580.7476
  -2294.3696    1557.7621    2936.6091   -1691.9373   -1991.3083
   -895.6827    2094.7136   -1278.9231     -96.7892    -472.42078
   1660.7162      75.07274  -2942.1597   -1975.431    -2479.6326
   2608.9258    2222.379     1456.2847   -1376.8156      83.49044
   -281.9596   -1511.0402    -807.6998   -1640.7294     673.7634
  -1975.97      2338.9548   -2384.6924  ]" is not
 Entire first structure:
 (., {})
 Entire second structure:
 .

My guess:
The first structure probably comes from the reset function, and the second structure from the step function. As per gymnasium API, we need to have an info_dict in the reset function, so that’s why we have an extra {}, but If I remove it, then gymnasium complains. This environment Ray-tune-project/env_stocktrading_np.py at main · Athe-kunal/Ray-tune-project · GitHub is working fine with RLlib for both LSTM and Attention network, but it is failing with Ray tune. This was working a month back, but now it is showing this issue. Am I missing something here? Please let me know, and if something is not clear, let me know, I will explain it again.

Please post a minimal reproduction script! This is hard to work on otherwise

Hi @arturn, sorry for not giving the reproduction script. Here is the reproduction script.
PLEASE ENSURE THAT YOU DOWNLOAD THESE TWO FILES FIRST BEFORE RUNNING THE SCRIPT, AS THE DATA IS REQUIRED FOR THE ENVIRONMENT.

from __future__ import annotations

import gymnasium as gym
import numpy as np
from numpy import random as rd


class StockTradingEnv(gym.Env):
    def __init__(
        self,
        config,
        initial_account=1e6,
        gamma=0.99,
        turbulence_thresh=99,
        min_stock_rate=0.1,
        max_stock=1e2,
        initial_capital=1e6,
        buy_cost_pct=1e-3,
        sell_cost_pct=1e-3,
        reward_scaling=2**-11,
        initial_stocks=None,
    ):
        price_ary = config["price_array"]
        tech_ary = config["tech_array"]
        turbulence_ary = config["turbulence_array"]
        if_train = config["if_train"]
        self.price_ary = price_ary.astype(np.float32)
        self.tech_ary = tech_ary.astype(np.float32)
        self.turbulence_ary = turbulence_ary

        self.tech_ary = self.tech_ary * 2**-7
        self.turbulence_bool = (turbulence_ary > turbulence_thresh).astype(np.float32)
        self.turbulence_ary = (
            self.sigmoid_sign(turbulence_ary, turbulence_thresh) * 2**-5
        ).astype(np.float32)

        stock_dim = self.price_ary.shape[1]
        self.gamma = gamma
        self.max_stock = max_stock
        self.min_stock_rate = min_stock_rate
        self.buy_cost_pct = buy_cost_pct
        self.sell_cost_pct = sell_cost_pct
        self.reward_scaling = reward_scaling
        self.initial_capital = initial_capital
        self.initial_stocks = (
            np.zeros(stock_dim, dtype=np.float32)
            if initial_stocks is None
            else initial_stocks
        )

        # reset()
        self.day = None
        self.amount = None
        self.stocks = None
        self.total_asset = None
        self.gamma_reward = None
        self.initial_total_asset = None

        # environment information
        self.env_name = "StockEnv"
        # self.state_dim = 1 + 2 + 2 * stock_dim + self.tech_ary.shape[1]
        # # amount + (turbulence, turbulence_bool) + (price, stock) * stock_dim + tech_dim
        self.state_dim = 1 + 2 + 3 * stock_dim + self.tech_ary.shape[1]
        # amount + (turbulence, turbulence_bool) + (price, stock) * stock_dim + tech_dim
        self.stocks_cd = None
        self.action_dim = stock_dim
        self.max_step = self.price_ary.shape[0] - 1
        self.if_train = if_train
        self.if_discrete = False
        self.target_return = 10.0
        self.episode_return = 0.0

        self.observation_space = gym.spaces.Box(
            low=-3000, high=3000, shape=(self.state_dim,), dtype=np.float32
        )
        self.action_space = gym.spaces.Box(
            low=-1, high=1, shape=(self.action_dim,), dtype=np.float32
        )

    def reset(self, seed=None, options=None):
        self.day = 0
        price = self.price_ary[self.day]

        if self.if_train:
            self.stocks = (
                self.initial_stocks + rd.randint(0, 64, size=self.initial_stocks.shape)
            ).astype(np.float32)
            self.stocks_cool_down = np.zeros_like(self.stocks)
            self.amount = (
                self.initial_capital * rd.uniform(0.95, 1.05)
                - (self.stocks * price).sum()
            )
        else:
            self.stocks = self.initial_stocks.astype(np.float32)
            self.stocks_cool_down = np.zeros_like(self.stocks)
            self.amount = self.initial_capital

        self.total_asset = self.amount + (self.stocks * price).sum()
        self.initial_total_asset = self.total_asset
        self.gamma_reward = 0.0
        print(self.get_state(price).shape)
        return self.get_state(price), {}  # state

    def step(self, actions):
        actions = (actions * self.max_stock).astype(int)

        self.day += 1
        price = self.price_ary[self.day]
        self.stocks_cool_down += 1

        if self.turbulence_bool[self.day] == 0:
            min_action = int(self.max_stock * self.min_stock_rate)  # stock_cd
            for index in np.where(actions < -min_action)[0]:  # sell_index:
                if price[index] > 0:  # Sell only if current asset is > 0
                    sell_num_shares = min(self.stocks[index], -actions[index])
                    self.stocks[index] -= sell_num_shares
                    self.amount += (
                        price[index] * sell_num_shares * (1 - self.sell_cost_pct)
                    )
                    self.stocks_cool_down[index] = 0
            for index in np.where(actions > min_action)[0]:  # buy_index:
                if (
                    price[index] > 0
                ):  # Buy only if the price is > 0 (no missing data in this particular date)
                    buy_num_shares = min(self.amount // price[index], actions[index])
                    self.stocks[index] += buy_num_shares
                    self.amount -= (
                        price[index] * buy_num_shares * (1 + self.buy_cost_pct)
                    )
                    self.stocks_cool_down[index] = 0

        else:  # sell all when turbulence
            self.amount += (self.stocks * price).sum() * (1 - self.sell_cost_pct)
            self.stocks[:] = 0
            self.stocks_cool_down[:] = 0

        state = self.get_state(price)
        print(state.shape)
        total_asset = self.amount + (self.stocks * price).sum()
        reward = (total_asset - self.total_asset) * self.reward_scaling
        self.total_asset = total_asset

        self.gamma_reward = self.gamma_reward * self.gamma + reward
        done = self.day == self.max_step
        if done:
            reward = self.gamma_reward
            self.episode_return = total_asset / self.initial_total_asset

        return state, reward, done, done, dict()

    def get_state(self, price):
        amount = np.array(self.amount * (2**-12), dtype=np.float32)
        scale = np.array(2**-6, dtype=np.float32)
        return np.hstack(
            (
                amount,
                self.turbulence_ary[self.day],
                self.turbulence_bool[self.day],
                price * scale,
                self.stocks * scale,
                self.stocks_cool_down,
                self.tech_ary[self.day],
            )
        )  # state.astype(np.float32)

    @staticmethod
    def sigmoid_sign(ary, thresh):
        def sigmoid(x):
            return 1 / (1 + np.exp(-x * np.e)) - 0.5

        return sigmoid(ary / thresh) * thresh


import ray

assert (
    ray.__version__ > "2.0.0"
), "Please install ray 2.2.0 by doing 'pip install ray[rllib] ray[tune] lz4' , lz4 is for population based tuning"
from pprint import pprint

import psutil
from ray import tune
from ray.air import FailureConfig, RunConfig, ScalingConfig
from ray.air.config import CheckpointConfig
from ray.rllib.algorithms import Algorithm
from ray.tune import register_env
from ray.tune.callback import Callback
from ray.tune.search import ConcurrencyLimiter
from ray.tune.tune_config import TuneConfig

psutil_memory_in_bytes = psutil.virtual_memory().total
ray._private.utils.get_system_memory = lambda: psutil_memory_in_bytes
from typing import Any, Dict, List, Optional, Union


class DRLlibv2:
    def __init__(
        self,
        trainable: Union[str, Any],
        params: dict,
        train_env=None,
        train_env_name: str = "",
        run_name: str = "tune_run",
        framework: str = "torch",
        local_dir: str = "tune_results",
        num_workers: int = 1,
        search_alg=None,
        concurrent_trials: int = 0,
        num_samples: int = 0,
        scheduler=None,
        log_level: str = "WARN",
        num_gpus: Union[float, int] = 0,
        num_cpus: Union[float, int] = 2,
        dataframe_save: str = "tune.csv",
        metric: str = "episode_reward_mean",
        mode: Union[str, List[str]] = "max",
        max_failures: int = 0,
        training_iterations: int = 100,
        checkpoint_num_to_keep: Union[None, int] = None,
        checkpoint_freq: int = 0,
        reuse_actors: bool = False,
        callbacks: Optional[List["Callback"]] = None,
    ):

        if train_env is not None:
            register_env(train_env_name, lambda config: train_env)

        self.params = params
        self.params["framework"] = framework
        self.params["log_level"] = log_level
        self.params["num_gpus"] = num_gpus
        self.params["num_workers"] = num_workers
        self.params["env"] = train_env_name

        self.run_name = run_name
        self.local_dir = local_dir
        self.search_alg = search_alg
        if concurrent_trials != 0:
            self.search_alg = ConcurrencyLimiter(
                self.search_alg, max_concurrent=concurrent_trials
            )
        self.scheduler = scheduler
        self.num_samples = num_samples
        self.trainable = trainable
        # self.trainable = tune.with_resources(self.trainable(),{"cpu":num_cpus,"gpu":num_gpus})
        if isinstance(self.trainable, str):
            self.trainable.upper()
        self.num_cpus = num_cpus
        self.num_gpus = num_gpus
        self.dataframe_save = dataframe_save
        self.metric = metric
        self.mode = mode
        self.max_failures = max_failures
        self.training_iterations = training_iterations
        self.checkpoint_freq = checkpoint_freq
        self.checkpoint_num_to_keep = checkpoint_num_to_keep
        self.reuse_actors = reuse_actors
        self.callbacks = callbacks

    def train_tune_model(self):
        """
        Tuning and training the model
        Returns the results object
        """
        ray.init(
            num_cpus=self.num_cpus, num_gpus=self.num_gpus, ignore_reinit_error=True
        )

        tuner = tune.Tuner(
            self.trainable,
            param_space=self.params,
            tune_config=TuneConfig(
                search_alg=self.search_alg,
                num_samples=self.num_samples,
                metric=self.metric,
                mode=self.mode,
                reuse_actors=self.reuse_actors,
            ),
            run_config=RunConfig(
                name=self.run_name,
                local_dir=self.local_dir,
                callbacks=self.callbacks,
                failure_config=FailureConfig(
                    max_failures=self.max_failures, fail_fast=False
                ),
                stop={"training_iteration": self.training_iterations},
                checkpoint_config=CheckpointConfig(
                    num_to_keep=self.checkpoint_num_to_keep,
                    checkpoint_score_attribute=self.metric,
                    checkpoint_score_order=self.mode,
                    checkpoint_frequency=self.checkpoint_freq,
                    checkpoint_at_end=True,
                ),
                verbose=3,
            ),
        )

        self.results = tuner.fit()
        # if self.search_alg is not None: self.search_alg.save_to_dir(self.local_dir)
        # ray.shutdown()
        return self.results


import ray

assert (
    ray.__version__ >= "2.2.0"
), "Please install ray 2.2.0 by doing 'pip install ray[rllib] ray[tune] lz4' , lz4 is for population based tuning"

from pprint import pprint

import numpy as np
# Importing the libraries
import pandas as pd
import psutil
import ray
import ray.rllib.algorithms.ppo as ppo
from ray import tune
from ray.tune.registry import register_env
from ray.tune.schedulers import ASHAScheduler
from ray.tune.search import ConcurrencyLimiter
from ray.tune.search.optuna import OptunaSearch

psutil_memory_in_bytes = psutil.virtual_memory().total
ray._private.utils.get_system_memory = lambda: psutil_memory_in_bytes
from typing import Any, Dict, Optional

model_name = "PPO"
env = StockTradingEnv
data_source = "yahoofinance"
time_interval = "1D"

import warnings

warnings.filterwarnings("ignore", category=FutureWarning)

import numpy as np

with open("train.npy", "rb") as f:
    train_env_config = np.load(f, allow_pickle=True)

with open("test.npy", "rb") as f:
    test_env_config = np.load(f, allow_pickle=True)
train_env_config = train_env_config.item()
test_env_config = test_env_config.item()
from gymnasium.wrappers import EnvCompatibility
from ray.tune import register_env

env_name = "StockTrading_train_env"


def reg_env(config):
    return env(config)


register_env(env_name, lambda config: env(train_env_config))

train_env_instance = EnvCompatibility(env(train_env_config))


def sample_ppo_params():
    return {
        "entropy_coeff": tune.loguniform(0.00000001, 0.1),
        "lr": tune.loguniform(5e-5, 0.001),
        "sgd_minibatch_size": tune.choice([32, 64, 128, 256, 512]),
        "lambda": tune.choice([0.1, 0.3, 0.5, 0.7, 0.9, 1.0]),
        #  "entropy_coeff": 0.0000001,
        #   "lr": 5e-5,
        #   "sgd_minibatch_size": 64,
        #   "lambda":0.9,
        "framework": "torch",
        "model": {
            "use_attention": True,
            "attention_num_transformer_units": 1,
            "attention_dim": 64,
            "attention_num_heads": 1,
            "attention_head_dim": 32,
            "attention_memory_inference": 50,
            "attention_memory_training": 50,
            "attention_position_wise_mlp_dim": 32,
            "attention_init_gru_gate_bias": 2.0,
            "attention_use_n_prev_actions": 0,
            "attention_use_n_prev_rewards": 0,
        },
    }


metric = "episode_reward_mean"
mode = "max"

search_alg = OptunaSearch(metric=metric, mode=mode)

scheduler_ = ASHAScheduler(
    metric=metric,
    mode=mode,
    max_t=5,
    grace_period=1,
    reduction_factor=2,
)

drl_agent = DRLlibv2(
    trainable=model_name,
    train_env=train_env_instance,
    train_env_name="StockTrading_train",
    framework="torch",
    num_workers=1,
    log_level="DEBUG",
    run_name="FINRL_TEST_TRANS",
    local_dir="FINRL_TEST_TRANS",
    params=sample_ppo_params(),
    num_samples=2,
    num_gpus=1,
    training_iterations=10,
    checkpoint_freq=5,
    scheduler=scheduler_,
    search_alg=search_alg,
)

trans_res = drl_agent.train_tune_model()

Ray: 2.4.0
System: Windows WSL2

Please let me know if I missing something, thanks

Thanks for the script.
The issue is that your wrap is compatible with the gymnasium API (for example, it has a reset method that returns observation and info dict), but still use from gymnasium.wrappers import EnvCompatibility.
This wraps your environment and the wrapped environment returns ((obs, {}), {}), while it should return (obs, {})

Thanks a lot, @arturn
This was a savior, it fixed my issue.