I want to use Ray and PyTorch to parallelize model training, but it doesn’t seem to be working.
import ray
import torch
import torch.nn as nn
import torch.optim as optim
# Ray 초기화
ray.init()
# 간단한 신경망 모델 정의
class SimpleNN(nn.Module):
def __init__(self):
super(SimpleNN, self).__init__()
self.fc1 = nn.Linear(10, 5)
self.fc2 = nn.Linear(5, 2)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = self.fc2(x)
return x
# 레이어 별로 나누기
@ray.remote
class LayerActor:
def __init__(self, layer):
self.layer = layer
def forward(self, x):
return self.layer(x)
def get_parameters(self):
return self.layer.state_dict()
def set_parameters(self, state_dict):
self.layer.load_state_dict(state_dict)
# 모델 인스턴스 생성
model = SimpleNN()
# 각 레이어를 액터로 생성
fc1_actor = LayerActor.remote(model.fc1)
fc2_actor = LayerActor.remote(model.fc2)
# 예제 입력 데이터와 타겟
input_data = torch.randn(1, 10, requires_grad=True)
target = torch.randint(0, 2, (1,))
# 손실 함수와 옵티마이저
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)
# 순전파
x = input_data
x = ray.get(fc1_actor.forward.remote(x))
x = torch.relu(x)
x = ray.get(fc2_actor.forward.remote(x))
# 손실 계산
loss = criterion(x, target)
# 역전파
optimizer.zero_grad()
loss.backward()
# 옵티마이저 업데이트
optimizer.step()
# 모델 파라미터 업데이트
fc1_state_dict = model.fc1.state_dict()
fc2_state_dict = model.fc2.state_dict()
ray.get(fc1_actor.set_parameters.remote(fc1_state_dict))
ray.get(fc2_actor.set_parameters.remote(fc2_state_dict))
# 최종 출력
print(x)
# Ray 종료
ray.shutdown()
thanks