[libfabric-users] connection error using prov/verbs

Sergey Tatarintsev s.tatarintsev at postgrespro.ru
Thu Jul 24 20:17:05 PDT 2025


Hello!

I'm trying to write an simple app, that's allow to two RoCE hosts to 
communicate using verbs provider, but I'm getting an error during 
connection:

libfabric:534529:1753411967::verbs:core:ofi_check_ep_attr():742<info> 
Unsupported protocol version

I'm trying to debug and there are indeed differences in prov and user 
infos in ofi_check_ep_attr():

(gdb) p *user_info->ep_attr
$1 = {type = FI_EP_MSG, protocol = 0, protocol_version = 1, max_msg_size 
= 1073741824, msg_prefix_size = 0, max_order_raw_size = 1073741824, 
max_order_war_size = 0,
   max_order_waw_size = 1073741824, mem_tag_format = 0, tx_ctx_cnt = 1, 
rx_ctx_cnt = 1, auth_key_size = 0, auth_key = 0x0}
(gdb) p *prov_info->ep_attr
$2 = {type = FI_EP_MSG, protocol = 0, protocol_version = 0, max_msg_size 
= 0, msg_prefix_size = 0, max_order_raw_size = 0, max_order_war_size = 0,
   max_order_waw_size = 0, mem_tag_format = 0, tx_ctx_cnt = 0, 
rx_ctx_cnt = 0, auth_key_size = 0, auth_key = 0x0}

Can anyone help me to fix my mistake. Code below


#include <netinet/in.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdbool.h>

#include <rdma/fabric.h>
#include <rdma/fi_endpoint.h>
#include <rdma/fi_errno.h>
#include <rdma/fi_cm.h>
#include <rdma/fi_rma.h>
#include <sys/socket.h>
#include <arpa/inet.h>

#define report(res) do{if (res) {printf("res=%i 
%s\n",res,fi_strerror(res)); return -1;}}while(0);

#define eval(exp) do { \
                         res = exp; \
                         if (res) {\
                             printf(#exp "; res=%i 
%s\n",res,fi_strerror(res)); return -1;} \
                      }while(0);


struct fi_info *hints, *info;
struct fid_fabric *fabric;
struct fid_eq *eq;
struct fi_eq_attr eq_attr = {0};
struct fid_domain *domain;
struct fid_ep *ep;
struct fid_pep *pep;
struct fi_av_attr av_attr;
struct fid_av *av = {0};
struct fi_cq_attr cq_attr = {0};
struct fid_cq *cq;
struct fid_av *av;
fi_addr_t fi_addr[32];

#define bufsize 32
char *buf;
struct fid_mr *mr;
int res;
struct in_addr iaddr;
struct sockaddr_in addrs[2];
struct fi_av_attr av_attr = {0};
pthread_t cqth, eqth;

void *
cq_thread(void *arg)
{
     struct fi_cq_data_entry comp;
     ssize_t ret;
     struct fi_cq_err_entry err;
     const char *err_str;
     struct fi_eq_entry eq_entry;
     uint32_t event;

     while (1)
     {
         ret = fi_cq_sread(cq, &comp, 1, NULL, -1);
         if (ret != 1) {
             printf("fi_cq_sread() = %i", res);
             continue;
         }
         printf("===> CQ=%lu (%lu)\n", comp.len, comp.flags);
     }
     return NULL;
}

void *
eq_thread(void *arg)
{
     int ret;
     struct fi_eq_cm_entry entry;
     uint32_t event;

     while(1)
     {
         ret = fi_eq_sread(eq, &event, &entry, sizeof (entry), -1, 0);
         if (ret != sizeof (entry))
         {
             printf("\nfi_eq_sread err %i %s", ret, fi_strerror(ret));
             continue;

         }
         printf("===> EQ=%i\n", event);
     }
     return NULL;
}


static void print_infos(struct fi_info *inf)
{
     struct fi_info *cur;
     int ret;

     for (cur = inf; cur; cur = cur->next) {
         printf("provider: %s\n", cur->fabric_attr->prov_name);
         printf("    fabric: %s\n", cur->fabric_attr->name);
         printf("    domain: %s\n", cur->domain_attr->name);
         printf("    EP type: %i\n", cur->ep_attr->type);
         printf("    proto: %i\n", cur->ep_attr->protocol);
         printf("    proto version: %i\n", cur->ep_attr->protocol_version);
         printf("    version: %d.%d\n", 
FI_MAJOR(cur->fabric_attr->prov_version),
             FI_MINOR(cur->fabric_attr->prov_version));
     }
}

ssize_t post_writemsg(char *buf, size_t size, struct fi_rma_iov *remote)
{
     struct fi_msg_rma msg;
     struct iovec msg_iov;
     struct fi_rma_iov rma_iov;

     msg_iov.iov_base = buf;
     msg_iov.iov_len = size;
     msg.msg_iov = &msg_iov;
     msg.desc = fi_mr_desc(mr);
     msg.iov_count = 1;
     rma_iov.addr = remote->addr;
     rma_iov.len = size;
     rma_iov.key = remote->key;
     msg.rma_iov = &rma_iov;
     msg.rma_iov_count = 1;
     msg.addr = fi_addr[0];
     msg.data = 5678;
     msg.context = NULL;

     return fi_writemsg(ep, &msg, FI_REMOTE_CQ_DATA);
}

int main(int argc, char *argv[])
{

     char *host = argc>1?argv[1]:NULL;

     buf = malloc(bufsize);
     memset(buf, 'x', bufsize);

     hints = fi_allocinfo();
     hints->addr_format = FI_SOCKADDR;
     hints->ep_attr->type = FI_EP_RDM;
     hints->ep_attr->protocol = FI_PROTO_UNSPEC;
     hints->domain_attr->mr_mode = 
FI_MR_LOCAL|FI_MR_VIRT_ADDR|FI_MR_ALLOCATED|FI_MR_PROV_KEY;
     hints->fabric_attr->prov_name = "verbs";
     hints->caps = FI_RMA;
     hints->mode = FI_CONTEXT | FI_RX_CQ_DATA;
     eval(fi_getinfo(FI_VERSION(1, 8), host, "1234", 0, hints, &info));
     print_infos(info);

     eval(fi_fabric(info->fabric_attr, &fabric, NULL));

     eq_attr.wait_obj = FI_WAIT_UNSPEC;
     eval(fi_eq_open(fabric, &eq_attr, &eq, NULL));

     eval(fi_domain(fabric, info, &domain, NULL));

     cq_attr.format = FI_CQ_FORMAT_DATA;
     cq_attr.wait_obj = FI_WAIT_UNSPEC;
     cq_attr.wait_cond = FI_CQ_COND_NONE;
     eval(fi_cq_open(domain, &cq_attr, &cq, NULL));

     eval(fi_mr_reg(domain, buf, bufsize, 
FI_REMOTE_READ|FI_REMOTE_WRITE|FI_SEND|FI_RECV, 0, 0, 0, &mr, NULL));
     eval(fi_endpoint(domain, info, &ep, NULL));
     eval(fi_ep_bind(ep, &eq->fid, 0));
     eval(fi_ep_bind(ep, &cq->fid, FI_TRANSMIT|FI_RECV));

     eval(fi_av_open(domain, &av_attr, &av, NULL));
     if (host)
     {
         inet_aton(host, &iaddr);
         addrs[0].sin_family = AF_INET;
         addrs[0].sin_port = htons(1234);
         addrs[0].sin_addr = iaddr;
         av_attr.type = FI_AV_UNSPEC;
         av_attr.count = 32;
         if(fi_av_insert(av, addrs, 1, fi_addr, 0, NULL) !=1)
             report(res);
     }
     eval(fi_ep_bind(ep, &av->fid, 0));
     eval(fi_enable(ep));

     printf("%s\n", host?"writemsg":"wait for client");
     pthread_create(&cqth, NULL, cq_thread, NULL);
     pthread_create(&eqth, NULL, eq_thread, NULL);

     for (;;)
     {
         struct fi_rma_iov remote;
         if (host)
         {
             remote.addr = (uint64_t)buf; // doesn't matter now
             remote.key = fi_mr_key(mr);  // doesn't matter now
             remote.len = bufsize;
             res = post_writemsg(buf, bufsize, &remote);
             printf("(%i) %s\n",res, fi_strerror(res));
             sleep(2);
         }
     }

}

-- 
With best regards,
Sergey Tatarintsev,
PostgresPro



More information about the Libfabric-users mailing list