Hi,
I don’t understand where this error is coming from and how to debug it:
ConnectionError: Error 10054 while writing to the socket. An existing connection was forcibly closed by the remote host.
I have a W10 machine. Below is my code. What am I missing?
class RayTune:
def __init__(self, path):
self.data = Wrangler(path)
self.default = {
'train_dataset': self.data.train_dataset,
'test_dataset': self.data.test_dataset,
'test_dataframe': self.data.test_dataframe,
'user_pool': len(self.data.user_pool),
'item_pool': len(self.data.item_pool),
'latent_dim': 8,
'lr': 3e-4,
'wd': 1e-7,
'bs': 256,
'epochs': 35,
'cuda': True,
'comment': '_ray_tune'
}
def train_gmf_tune(self, config, checkpoint_dir=None):
self.update_default(config)
engine = Experiment(self.default)
engine.fit(ray_tune=True)
def update_default(self, config):
for key, val in config.items():
self.default[key] = val
def main(self, num_samples=10, max_epochs=10, gpus=1):
config = {
'lr': tune.loguniform(1e-5, 1e-1),
'wd': tune.loguniform(1.5e-7, 1e-7),
'bs': tune.choice([32, 64, 128, 256, 512]),
'latent_dim': tune.choice([4, 6, 8, 10, 12, 14, 16, 18, 20])
}
reporter = CLIReporter(metric_columns=['loss', 'hr'])
scheduler = ASHAScheduler(max_t=max_epochs, grace_period=1, reduction_factor=2)
result = tune.run(tune.with_parameters(self.train_gmf_tune),
resources_per_trial={"cpu": 2, "gpu": gpus},
config=config,
metric="hr",
mode="max",
num_samples=num_samples,
scheduler=scheduler,
progress_reporter=reporter)
return result.get_best_trial('hr', 'max', 'last').config
The full error message:
ConnectionResetError Traceback (most recent call last)
d:\anaconda3\envs\api4\lib\site-packages\redis\connection.py in send_packed_command(self, command, check_health)
699 for item in command:
→ 700 sendall(self._sock, item)
701 except socket.timeout:d:\anaconda3\envs\api4\lib\site-packages\redis_compat.py in sendall(sock, *args, **kwargs)
7 def sendall(sock, *args, **kwargs):
----> 8 return sock.sendall(*args, **kwargs)
9ConnectionResetError: [WinError 10054] An existing connection was forcibly closed by the remote host
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
in
1 import ray
2
----> 3 tuner.main()D:\workspace\imo-api4\latest\engine.py in main(self, num_samples, max_epochs, gpus)
139 ‘bs’: tune.choice([32, 64, 128, 256, 512]),
140 ‘latent_dim’: tune.choice([4, 6, 8, 10, 12, 14, 16, 18, 20])
→ 141 }
142 reporter = CLIReporter(metric_columns=[‘loss’, ‘hr’])
143 scheduler = ASHAScheduler(max_t=max_epochs, grace_period=1, reduction_factor=2)d:\anaconda3\envs\api4\lib\site-packages\ray\tune\tune.py in run(run_or_experiment, name, metric, mode, stop, time_budget_s, config, resources_per_trial, num_samples, local_dir, search_alg, scheduler, keep_checkpoints_num, checkpoint_score_attr, checkpoint_freq, checkpoint_at_end, verbose, progress_reporter, loggers, log_to_file, trial_name_creator, trial_dirname_creator, sync_config, export_formats, max_failures, fail_fast, restore, server_port, resume, queue_trials, reuse_actors, trial_executor, raise_on_failed_trial, callbacks, ray_auto_init, run_errored_only, global_checkpoint_period, with_server, upload_dir, sync_to_cloud, sync_to_driver, sync_on_checkpoint)
297 for i, exp in enumerate(experiments):
298 if not isinstance(exp, Experiment):
→ 299 experiments[i] = Experiment(
300 name=name,
301 run=exp,d:\anaconda3\envs\api4\lib\site-packages\ray\tune\experiment.py in init (self, name, run, stop, time_budget_s, config, resources_per_trial, num_samples, local_dir, upload_dir, trial_name_creator, trial_dirname_creator, loggers, log_to_file, sync_to_driver, checkpoint_freq, checkpoint_at_end, sync_on_checkpoint, keep_checkpoints_num, checkpoint_score_attr, export_formats, max_failures, restore)
136 "checkpointable function. You can specify checkpoints "
137 “within your trainable function.”)
→ 138 self._run_identifier = Experiment.register_if_needed(run)
139 self.name = name or self._run_identifier
140d:\anaconda3\envs\api4\lib\site-packages\ray\tune\experiment.py in register_if_needed(cls, run_object)
274 “No name detected on trainable. Using {}.”.format(name))
275 try:
→ 276 register_trainable(name, run_object)
277 except (TypeError, PicklingError) as e:
278 msg = (d:\anaconda3\envs\api4\lib\site-packages\ray\tune\registry.py in register_trainable(name, trainable, warn)
69 raise TypeError(“Second argument must be convertable to Trainable”,
70 trainable)
—> 71 _global_registry.register(TRAINABLE_CLASS, name, trainable)
72
73d:\anaconda3\envs\api4\lib\site-packages\ray\tune\registry.py in register(self, category, key, value)
122 self._to_flush[(category, key)] = pickle.dumps(value)
123 if _internal_kv_initialized():
→ 124 self.flush_values()
125
126 def contains(self, category, key):d:\anaconda3\envs\api4\lib\site-packages\ray\tune\registry.py in flush_values(self)
144 def flush_values(self):
145 for (category, key), value in self._to_flush.items():
→ 146 _internal_kv_put(_make_key(category, key), value, overwrite=True)
147 self._to_flush.clear()
148d:\anaconda3\envs\api4\lib\site-packages\ray\experimental\internal_kv.py in _internal_kv_put(key, value, overwrite)
25
26 if overwrite:
—> 27 updated = worker.redis_client.hset(key, “value”, value)
28 else:
29 updated = worker.redis_client.hsetnx(key, “value”, value)d:\anaconda3\envs\api4\lib\site-packages\redis\client.py in hset(self, name, key, value)
3002 Returns 1 if HSET created a new field, otherwise 0
3003 “”"
→ 3004 return self.execute_command(‘HSET’, name, key, value)
3005
3006 def hsetnx(self, name, key, value):d:\anaconda3\envs\api4\lib\site-packages\redis\client.py in execute_command(self, *args, **options)
875 conn = self.connection or pool.get_connection(command_name, **options)
876 try:
→ 877 conn.send_command(*args)
878 return self.parse_response(conn, command_name, **options)
879 except (ConnectionError, TimeoutError) as e:d:\anaconda3\envs\api4\lib\site-packages\redis\connection.py in send_command(self, *args, **kwargs)
718 def send_command(self, *args, **kwargs):
719 “Pack and send a command to the Redis server”
→ 720 self.send_packed_command(self.pack_command(*args),
721 check_health=kwargs.get(‘check_health’, True))
722d:\anaconda3\envs\api4\lib\site-packages\redis\connection.py in send_packed_command(self, command, check_health)
710 errno = e.args[0]
711 errmsg = e.args[1]
→ 712 raise ConnectionError(“Error %s while writing to socket. %s.” %
713 (errno, errmsg))
714 except: # noqa: E722ConnectionError: Error 10054 while writing to socket. An existing connection was forcibly closed by the remote host.