Hi all,
Has anyone encountered such an error for no reason?
I am pretty sure there is no issue in the code, as it was working pretty well for the 1st trial including 200 iters; and in the second trial it worked till 175 iters, but then suddenly gave this error. I am running exactly the same code on the Linux machine without any errors. But when I run on Windows I get this error.
Thanks!
2023-07-17 00:52:45,667 WARNING tune_controller.py:271 -- Trial controller checkpointing failed: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Users\\team\\ray_results\\PPO\\.tmp_generator' -> 'C:\\Users\\team\\ray_results\\PPO\\basic-variant-state-2023-07-16_22-24-44.json'
Traceback (most recent call last):
File "D:\Reza\housing_design\rlb_agents\wb_runner.py", line 145, in <module>
self.run()
File "D:\Reza\housing_design\rlb_agents\wb_runner.py", line 129, in run
self.learner.learn(trial_id, run_id, execution)
File "D:\Reza\housing_design\rlb_agents\learner.py", line 106, in learn
self.tunner.tune() # self.checkpoint_dir = self.tunner.tune()
File "D:\Reza\housing_design\rlb_agents\tunner.py", line 100, in tune
results = tuner.fit()
File "C:\Users\team\miniconda3\envs\ray\lib\site-packages\ray\tune\tuner.py", line 347, in fit
return self._local_tuner.fit()
File "C:\Users\team\miniconda3\envs\ray\lib\site-packages\ray\tune\impl\tuner_internal.py", line 588, in fit
analysis = self._fit_internal(trainable, param_space)
File "C:\Users\team\miniconda3\envs\ray\lib\site-packages\ray\tune\impl\tuner_internal.py", line 712, in _fit_internal
analysis = run(
File "C:\Users\team\miniconda3\envs\ray\lib\site-packages\ray\tune\tune.py", line 1070, in run
runner.step()
File "C:\Users\team\miniconda3\envs\ray\lib\site-packages\ray\tune\execution\tune_controller.py", line 272, in step
raise e
File "C:\Users\team\miniconda3\envs\ray\lib\site-packages\ray\tune\execution\tune_controller.py", line 269, in step
self.checkpoint()
File "C:\Users\team\miniconda3\envs\ray\lib\site-packages\ray\tune\execution\trial_runner.py", line 491, in checkpoint
self._checkpoint_manager.checkpoint(
File "C:\Users\team\miniconda3\envs\ray\lib\site-packages\ray\tune\execution\experiment_state.py", line 232, in checkpoint
save_fn()
File "C:\Users\team\miniconda3\envs\ray\lib\site-packages\ray\tune\execution\trial_runner.py", line 383, in save_to_dir
self._search_alg.save_to_dir(
File "C:\Users\team\miniconda3\envs\ray\lib\site-packages\ray\tune\search\basic_variant.py", line 409, in save_to_dir
_atomic_save(
File "C:\Users\team\miniconda3\envs\ray\lib\site-packages\ray\tune\utils\util.py", line 515, in _atomic_save
os.replace(tmp_search_ckpt_path, os.path.join(checkpoint_dir, file_name))
PermissionError: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Users\\team\\ray_results\\PPO\\.tmp_generator' -> 'C:\\Users\\team\\ray_results\\PPO\\basic-variant-state-2023-07-16_22-24-44.json'
Exception ignored in: <function WandbLoggerCallback.__del__ at 0x0000027D036D9AB0>
Traceback (most recent call last):
File "C:\Users\team\miniconda3\envs\ray\lib\site-packages\ray\air\integrations\wandb.py", line 727, in __del__
AttributeError: 'NoneType' object has no attribute 'is_initialized'