[ofa-general] How many processes on a node can open IB device ?

Tang, Changqing changquing.tang at hp.com
Sat Aug 16 08:14:08 PDT 2008



HI, driver engineers:

        I have system with 8 cores, 16G memory, the node is idle.
it is Mellanox connectX card:
hca_id: mlx4_0
         fw_ver:                         2.3.000
         node_guid:                      001e:0bff:ff83:9f1c
         sys_image_guid:                 001e:0bff:ff83:9f1f
         vendor_id:                      0x02c9
         vendor_part_id:                 25418
         hw_ver:                         0xA0
         board_id:                       HP_08B0000001
         phys_port_cnt:                  2

I have simple IBV code, which only open the device and create PD.
(attached below), then the code sleep there.

        When I start as many processes as I could, it fails at 895 copies, it fails with error:

        ibv_open_device() failed

So how many IB processes can I run on a node ? Is there any driver limit ?

Thanks for help.

--CQ Tang, HP-MPI



compile:
gcc -o ibv.x ibv.c -libverbs

run:
#!/bin/sh

count=0
while [ $? -eq 0 ]
do
count=`expr $count + 1`
echo "%%%%%%%%%%%%%loop: $count;"
./ibv.x &
done


ibv.c:

#include <stdio.h>
#include <stdlib.h>
#include <infiniband/verbs.h>

/*
  * Main program code.
  */
int
main(int argc, char *argv[])

{
         int                     i;
         int                     nif;
         int                     err;

         struct ibv_device       **interface_list;
         struct ibv_device_attr  device_attr;
         struct ibv_port_attr    port_attr;

         struct ibv_qp_attr      qp_attr;
         struct ibv_qp_init_attr qp_init_attr;

        struct ibv_context      *hca_hndl;
        struct ibv_pd           *pd_hndl;
        struct ibv_mr           *mr_hndl;
         uint8_t                 port_num;
         uint16_t                port_lid;

        char                    *buf;
        int                     size;
        int                     step;

         interface_list = ibv_get_device_list(&nif);
         if (nif <= 0) {
                 fprintf(stderr, "NO ibv interface found\n");
                 return(-1);
         }

         hca_hndl = ibv_open_device(interface_list[0]);
         if (!hca_hndl) {
                 fprintf(stderr, "ibv_open_device() failed\n");
                 return(-1);
         }

         err = ibv_query_device(hca_hndl, &device_attr);
         if (err != 0) {
                 fprintf(stderr, "ibv_query_device(() failed\n");
                 return(-1);
         }

         for (i = 0; i < device_attr.phys_port_cnt; i++) {
                 port_num = (uint8_t)(i + 1);
                 err = ibv_query_port(hca_hndl, port_num, &port_attr);
                 if (err != 0) {
                         fprintf(stderr, "ibv_query_port() failed\n");
                         return(-1);
                 }

                 if (port_attr.state != IBV_PORT_ACTIVE) {
                         continue;
                 }

                 port_lid = port_attr.lid;
                 break;
         }
         if (i == device_attr.phys_port_cnt) {
                 fprintf(stderr, "No active port\n");
                 return(-1);
         }

         ibv_free_device_list(interface_list);

         pd_hndl = ibv_alloc_pd(hca_hndl);
         if (!pd_hndl) {
                 fprintf(stderr, "ib_alloc_pd() failed\n");
                 return(-1);
         }


        sleep(600);
        fprintf(stderr, "No IBV error\n");
        return (0);
}



More information about the general mailing list