Hi, my friends, Now days I try to use cpp in my project, but I can’t pass the examples and it’s has puzzle me a lot.
The version of ray i used is: 2.5.1.
The code is as follows:
#include <ray/api.h>
/// common function
int Plus(int x, int y) { return x + y; }
/// Declare remote function
RAY_REMOTE(Plus);
/// class
class Counter {
public:
int count;
Counter(int init) { count = init; }
/// static factory method
static Counter *FactoryCreate(int init) { return new Counter(init); }
/// non static function
int Add(int x) {
count += x;
return count;
}
};
/// Declare remote function
RAY_REMOTE(Counter::FactoryCreate, &Counter::Add);
int main(int argc, char **argv) {
/// initialization
ray::RayConfig config;
config.address="192.168.10.61:9700";
config.code_search_path.push_back("/home/mic_rd/test/example.so");
ray::Init(config);
/// put and get object
auto object = ray::Put(100);
auto put_get_result = *(ray::Get(object));
std::cout << "put_get_result = " << put_get_result << std::endl;
/// common task
auto task_object = ray::Task(Plus).Remote(1, 2);
int task_result = *(ray::Get(task_object));
std::cout << "task_result = " << task_result << std::endl;
/// actor
std::vector<ray::ObjectRef<int> > actors;
for(int i=0;i<100;i++) {
ray::ActorHandle<Counter> actor = ray::Actor(Counter::FactoryCreate).Remote(0);
/// actor task
actors.push_back(actor.Task(&Counter::Add).Remote(3));
}
for(int i=0;i<100;i++) {
int actor_task_result = *(ray::Get(actors[i]));
std::cout << "actor_task_result = " << actor_task_result << std::endl;
}
/// actor task with reference argument
/* auto actor_object2 = actor.Task(&Counter::Add).Remote(task_object);
int actor_task_result2 = *(ray::Get(actor_object2));
std::cout << "actor_task_result2 = " << actor_task_result2 << std::endl;
*/
/// shutdown
ray::Shutdown();
return 0;
}
The code is works well for a cluster with only one node, but it’s not work for two or more nodes, And the exception is as follows:
[2023-07-14 17:06:22,016 E 2026113 2026113] logging.cc:97: Unhandled exception: N3ray8internal18RayWorkerExceptionE. what():
[2023-07-14 17:06:22,027 E 2026113 2026113] logging.cc:104: Stack trace:
/home/....../lib/python3.10/site-packages/ray/cpp/lib/libray_api.so(+0xd6fe6a) [0x7f3007a6fe6a] ray::operator<<()
/home/....../lib/python3.10/site-packages/ray/cpp/lib/libray_api.so(_ZN3ray16TerminateHandlerEv+0xd8) [0x7f3007a72628] ray::TerminateHandler()
/lib64/libstdc++.so.6(+0xad37c) [0x7f3006b8037c]
/lib64/libstdc++.so.6(+0xad3e7) [0x7f3006b803e7]
/lib64/libstdc++.so.6(+0xad649) [0x7f3006b80649]
/home/....../lib/python3.10/site-packages/ray/cpp/lib/libray_api.so(+0x48d32c) [0x7f300718d32c] ray::internal::NativeObjectStore::CheckException()
/home/....../lib/python3.10/site-packages/ray/cpp/lib/libray_api.so(_ZN3ray8internal17NativeObjectStore6GetRawERKSt6vectorINS_8ObjectIDESaIS3_EEi+0x27c) [0x7f300726009c] ray::internal::NativeObjectStore::GetRaw()
/home/....../lib/python3.10/site-packages/ray/cpp/lib/libray_api.so(_ZN3ray8internal17NativeObjectStore6GetRawERKNS_8ObjectIDEi+0x96) [0x7f300725fab6] ray::internal::NativeObjectStore::GetRaw()
/home/....../lib/python3.10/site-packages/ray/cpp/lib/libray_api.so(_ZN3ray8internal11ObjectStore3GetERKNS_8ObjectIDEi+0x13) [0x7f3007260d83] ray::internal::ObjectStore::Get()
/.../lib/python3.10/site-packages/ray/cpp/lib/libray_api.so(_ZN3ray8internal18AbstractRayRuntime3GetERKSs+0x49) [0x7f3007259649] ray::internal::AbstractRayRuntime::Get()
./example() [0x40ee0e] ray::GetFromRuntime<>()
./example() [0x40b83c] ray::Get<>()
./example() [0x404ab8] main
/lib64/libc.so.6(+0x3feb0) [0x7f3006813eb0] __libc_start_call_main
/lib64/libc.so.6(__libc_start_main+0x80) [0x7f3006813f60] __libc_start_main@GLIBC_2.2.5
./example() [0x404665] _start
*** SIGABRT received at time=1689325582 on cpu 7 ***
PC: @ 0x7f300687554c (unknown) __pthread_kill_implementation
@ 0x7f3006828df0 (unknown) (unknown)
[2023-07-14 17:06:22,028 E 2026113 2026113] logging.cc:361: *** SIGABRT received at time=1689325582 on cpu 7 ***
[2023-07-14 17:06:22,028 E 2026113 2026113] logging.cc:361: PC: @ 0x7f300687554c (unknown) __pthread_kill_implementation
[2023-07-14 17:06:22,028 E 2026113 2026113] logging.cc:361: @ 0x7f3006828df0 (unknown) (unknown)
run.sh: line 17: 2026113 Aborted (core dumped) LD_LIBRARY_PATH=/.../lib ./example
So, how to solve it? Thank you~