[ofa-general] ibv_get_cq_event blocking forever after successful ibv_post_send...

Ganesh Sadasivan gsadasiv7 at gmail.com
Mon May 12 16:32:26 PDT 2008


Hi,

  Was there any resolution to this issue? I am seeing the exact behavior
  where no event is generated after doing a send. There were a few
successful sends
  that got completion events. But it just stops without any error
indication.

  I am pasting the part of the code which does this operation:

create_qp ()
{
    struct ibv_qp_init_attr   init_attr;

    init_attr.cap.max_send_wr = 20;
    init_attr.cap.max_recv_wr = 20;
    init_attr.cap.max_recv_sge = 1;
    init_attr.cap.max_send_sge = 1;
    init_attr.qp_type = IBV_QPT_RC;
    init_attr.send_cq = send_cq;
    init_attr.recv_cq = recv_cq;
    init_attr.sq_sig_all = 0;

    qp = ibv_create_qp(pd, &init_attr);

    if (!qp) {
        return 1;
    }

    attr.qp_state        = IBV_QPS_INIT;
    attr.pkey_index      = 0;
    attr.port_num        = src_port;
    attr.qp_access_flags = 0;

    if (ibv_modify_qp(qp, &attr,
                      IBV_QP_STATE |
                      IBV_QP_PKEY_INDEX |
                      IBV_QP_PORT |
                      IBV_QP_ACCESS_FLAGS)) {
        return 1;
    }


    attr.qp_state = IBV_QPS_RTR;
    attr.path_mtu = IBV_MTU_2048;
    attr.rq_psn = 1;
    attr.dest_qp_num = dst_qp_num;
    attr.max_dest_rd_atomic = 1;
    attr.ah_attr.dlid = dst_lid;
    attr.ah_attr.sl = serv_level;
    attr.ah_attr.port_num = src_port;
    attr.min_rnr_timer = 12;
    attr.ah_attr.is_global = 0;
    attr.ah_attr.src_path_bits = 0;

    if (ibv_modify_qp(qp, &attr,
                          IBV_QP_STATE|
                          IBV_QP_PATH_MTU|
                          IBV_QP_RQ_PSN|
                          IBV_QP_DEST_QPN|
                          IBV_QP_MAX_DEST_RD_ATOMIC|
                          IBV_QP_AV|
                          IBV_QP_MIN_RNR_TIMER)) {
        return 1;
    }

    attr.qp_state = IBV_QPS_RTS;
    attr.timeout = 10;
    attr.retry_cnt = 7;
    attr.rnr_retry = 7;
    attr.sq_psn = 1;
    attr.max_rd_atomic = 1;
    if (ibv_modify_qp(qp, &attr,
                      IBV_QP_STATE |
                      IBV_QP_TIMEOUT |
                      IBV_QP_RETRY_CNT |
                      IBV_QP_RNR_RETRY |
                      IBV_QP_SQ_PSN |
                      IBV_QP_MAX_QP_RD_ATOMIC)) {
        return 1;
    }

}

send_data(char *buf, int datasz, void *arg)
{

    int ret;

    /*
     * Save the WR-id so that we can compare against this
     * once tx is done.
     */
    sq_wr_id[tail] = global_cnt++;

    send_sgl[tail].addr = (u64) (unsigned long) buf;
    send_sgl[tail].length = datasz;
    send_sgl[tail].lkey = send_mr->lkey;

    sq_wr[tail].opcode = IBV_WR_SEND;
    sq_wr[tail].send_flags = IBV_SEND_SIGNALED;
    sq_wr[tail].sg_list = &send_sgl[tail];
    sq_wr[tail].num_sge = 1;
    send_data[tail] = (u64)buf;
    send_arg[tail] = arg;

    ret = ibv_post_send(qp, &sq_wr[tail], &bad_wr);

    if (tail == 19) { //max_send_wr -1
          tail = 0;
    } else {
          tail += 1;
    }
    return ret;
}

recv_thread (void *arg)
{
    struct ibv_cq        *ev_cq;
    void                 *ev_ctx;
    int                   ret;


    ret = ibv_get_cq_event(comp_channel, &ev_cq, &ev_ctx);
    if (ret) {
        return 1;
    }

    ibv_ack_cq_events(ev_cq, 1);

    ret = ibv_req_notify_cq(ev_cq, 0);
    if (ret) {
        return 1;
    }

    while ((rv = ibv_poll_cq(cq, 1, &wc)) == 1) {
        switch (wc.opcode) {
            case IBV_WC_SEND: {
                if (wc.status == IBV_WC_SUCCESS) {
                    if (sq_wr_id[head] != wc.wr_id) {
                        datasz = 0;
                        return 1;
                    }
                } else {
                    retuen 1;
                }
                buf = (char *)send_data[head];
                arg = (u64)send_arg[head];
                sq_wr_id[head] = 0;
                if (head == 19) {//max_send_wr -1
                    head = 0;
                } else {
                    head += 1;
                }
                break;
         }
    }

}

Thanks
Ganesh

On Mon, May 28, 2007 at 9:28 PM, Roland Dreier <rdreier at cisco.com> wrote:

>  > Any ideas on why the ibv_get_cq_event() would never see an event
>  > after a "successful" send requesting a completion event?
>
> It's either a bug in your code or a bug in the stack below your code.
> The best way to debug this would be for you to post your actual code
> (in a form that someone else can run), so that we can either point out
> what's wrong with your code, or have a test case for the real bug.
>
>  - R.
> _______________________________________________
> general mailing list
> general at lists.openfabrics.org
> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general
>
> To unsubscribe, please visit
> http://openib.org/mailman/listinfo/openib-general
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/general/attachments/20080512/0957ed73/attachment.html>


More information about the general mailing list