So following this example of Fashion MNIST, I tried to modify from using torch DataLoader
into ray.data.Dataset
then calling it trainer function via the TorchTrainer(datasets=here), but it throws the error unable to convert.
import argparse
from typing import Dict
from ray.air import session
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
import ray.train as train
from ray.train.torch import TorchTrainer
from ray.air.config import ScalingConfig
# Download training data from open datasets.
training_data = datasets.FashionMNIST(
root="~/data",
train=True,
download=True,
transform=ToTensor(),
)
# Download test data from open datasets.
test_data = datasets.FashionMNIST(
root="~/data",
train=False,
download=True,
transform=ToTensor(),
)
# Define model
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(28 * 28, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 10),
nn.ReLU(),
)
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
training_dataloader = DataLoader(training_data)
ray_train_data = ray.data.from_torch(training_data)
#############
# Train function #
#############
def train_dist_func1():
# some config
n_epochs = config["n_epochs"]
batch_size = config["batch_size"]
lr = config["lr"]
worker_batch_size = batch_size//session.get_world_size()
# data
train_data = session.get_dataset_shard("train")
print("type of train_data:", type(train_data))
# model
model = NeuralNetwork()
model = train.torch.prepare_model(model)
# loss and optim
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)
# training
for n in range(n_epochs):
for batch in train_data.iter_torch_batches(
batch_size=batch_size, device=train.torch.get_device()
):
print(batch)
print(type(batch))
# here I am just passing now
pass
# then I call it via TorchTrainer
config={
"n_epochs":2,
"batch_size":32,
"lr":0.01
}
def train_torch_dist():
trainer = TorchTrainer(
train_loop_per_worker=train_dist_func1,
train_loop_config=config,
scaling_config=ScalingConfig(num_workers=2, use_gpu=True),
datasets={'train':ray_train_data}
)
results = trainer.fit()
print(results)
if __name__ == "__main__":
train_torch_dist()
the error is not that descriptive IMO, I know it failed to convert, but which part of my dataset is of type object? I don’t know which part causing this
TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint8, and bool.
Btw, I know I can just use the train.torch.prepare_data_loader
inside my train_dist_func1()
but I wonder if the above approach could works?
Any help is appreciated!