[openib-general] [VAPI]VAPI_poll_cq: CQ is empty

Dotan Barak dotanb at mellanox.co.il
Sat Feb 18 23:00:36 PST 2006


 
 
-----Original Message-----
From: openib-general-bounces at openib.org
[mailto:openib-general-bounces at openib.org] On Behalf Of Ian Jiang
Sent: Friday, February 17, 2006 9:20 AM
To: openib-general
Subject: [openib-general] [VAPI]VAPI_poll_cq: CQ is empty
 
To get familiar to the IBGD-1.8.0 VAPI, I wrote a program very simple,
according to two examples *hca_per* and *rctp* in IBGD.
 A Sender and a Receiver ran on tow different nodes just to complete a
Send/Recv progress. 

Sender
======
(a) Create IB resources: 
1.	List HCAs (only one HCA in fact)
2.	Get the handle of the HCA
3.	Query the HCA
4.	Allocate a PD
5.	Quey Port 1 of the HCA (only one Port in fact)
6.	Create Send CQ and Recv CQ
7.	Create QP
(b) Modify QP to INIT state:
1.	qp_move_to_init(&params);
(c) Create MRs for Recv and Send respectively:
1.	user_mr_create(&params.in_mr, params.mr_sz_req);
2.	user_mr_create(&params.out_mr, params.mr_sz_req);
(d) Send parameters to Receiver

(e) Get ready to transfer:
1.	Modify QP to RTR state
2.	Modify QP to RTS state
(f) Post Send
1.	post_send_req(&params.ib_res, &params.out_mr)
(g) Wait Send to complete
1.	reap_send_req(&params.ib_res, &params.out_mr, 1/* not block*/);

Receiver
=======
(a) Wait parameters from Sender

(b) Create IB resources: 
1.	List HCAs (only one HCA in fact)
2.	Get the handle of the HCA
3.	Query the HCA
4.	Allocate a PD
5.	Quey Port 1 of the HCA (only one Port in fact)
6.	Create Send CQ and Recv CQ
7.	Create QP 
(b) Modify QP to INIT state:
1.	qp_move_to_init(&params);
(c) Create MRs for Recv and Send respectively:
1.	user_mr_create(&params.in_mr, params.mr_sz_req);
2.	user_mr_create(&params.out_mr, params.mr_sz_req);
(d) Post Recv
1.	post_recv_req(&params.ib_res, &params.in_mr)
(e) Get ready to transfer:
1.	Modify QP to RTR state
2.	Modify QP to RTS state
(g) Wait Recv to complete
1.	reap_recv_req(&params.ib_res, &params.in_mr, 1/* not block*/);

Problem:
=======
Both VAPI_poll_cq for Send CQ and Recv CQ returned "CQ is empty". And I
failed to find out where the problem was, so turned to OpenIB for help.
I am afraid that I am not clear enough about the CQ processing. Any
suggestion is appreciated!


Here are some pieces fo codes:
=========================

/*********************************** Create IB Resources
****************************************/
int ib_res_create(struct ib_resource *ib_res_p)
{
    VAPI_ret_t vapi_ret;
    u_int32_t num_of_hcas;
    VAPI_hca_id_t inst_hca_id;
    VAPI_cqe_num_t            num_of_cqe;

    VAPI_srq_attr_t         srq_props;
    VAPI_srq_attr_t         actual_srq_props;

    VAPI_qp_init_attr_t    qp_init_attr;
    VAPI_qp_init_attr_ext_t qp_ext_attr;
    VAPI_qp_prop_t         qp_prop;


    if (ib_res_p == NULL) {
        PRINT_ERR("NULL ib_res_p\n");
        return -1;
    }
    
    ini_ib_res(ib_res_p);

    /* list HCAs */
    vapi_ret = EVAPI_list_hcas(1, &num_of_hcas, &inst_hca_id);
    if ((vapi_ret != VAPI_OK) && (vapi_ret != VAPI_EAGAIN)) {
        printf("list HCAs failed\n");
        VAPIERR(vapi_ret);
        return -1;
    }
    PRINT_TRACE("number of HCAs: %d, HCA ID: %s\n", num_of_hcas, (char
*)inst_hca_id);
    switch(num_of_hcas) {
    case 0:
        printf("No HCAs installed\n");
        return -1;
    case 1:
        strcpy(ib_res_p->hca_id, inst_hca_id);
        break;
    default:
        /* ToDo: deal with multiple HCAs */
        printf("ToDo: deal with multiple HCAs\n");
        printf("Use the first HCA\n");
        strcpy(ib_res_p->hca_id, inst_hca_id);
    }
    PRINT_TRACE("HCA to be used: %s\n", (char *)ib_res_p->hca_id);

    /* get the handle of the HCA */
    vapi_ret = EVAPI_get_hca_hndl(ib_res_p->hca_id,
&ib_res_p->hca_hndl);
    if (vapi_ret != VAPI_OK) {
        printf("HCA not open\n");
        VAPIERR(vapi_ret);
        goto clean_exit;
    }

    /* query the HCA */
    vapi_ret = VAPI_query_hca_cap(ib_res_p->hca_hndl, 
            &ib_res_p->hca_vendor, 
            &ib_res_p->hca_cap);
    if (vapi_ret != VAPI_OK) {
        printf("Query HCA failed\n");
        VAPIERR(vapi_ret);
        goto clean_exit;
    }
    PRINT_HCA_CAP(&ib_res_p->hca_vendor, &ib_res_p->hca_cap);

    /* allocate PD */
    //vapi_ret = EVAPI_alloc_pd(ib_res_p->hca_hndl, MAX_NUM_AVS,
&ib_res_p->pd_hndl);
    vapi_ret = VAPI_alloc_pd(ib_res_p->hca_hndl, &ib_res_p->pd_hndl);
    if (vapi_ret != VAPI_OK) {
        printf("Allocate PA failed\n");
        VAPIERR(vapi_ret);
        goto clean_exit;
    }
    PRINT_TRACE("PD allocated: %ld\n", ib_res_p->pd_hndl);
    
    /* query Port */
    vapi_ret = VAPI_query_hca_port_prop(ib_res_p->hca_hndl, 
            DEFAULT_PORT_NUM, 
            &ib_res_p->hca_port);
    if (vapi_ret != VAPI_OK) {
        printf("Query Port %d failed\n", DEFAULT_PORT_NUM);
        VAPIERR(vapi_ret);
        goto clean_exit;
    }
    PRINT_PORT_PROP(&ib_res_p->hca_port);

    /* send CQ */
    vapi_ret = VAPI_create_cq(ib_res_p->hca_hndl, 
            MIN_SEND_CQE_NUM,
            &ib_res_p->s_cq_hndl,
            &num_of_cqe);
    if (vapi_ret != VAPI_OK) {
        printf("Create CQ for send failed\n");
        VAPIERR(vapi_ret);
        goto clean_exit;
    }
    PRINT_TRACE("CQ for send created. CQE NUM: %d\n", num_of_cqe);

    /* receive CQ */
    vapi_ret = VAPI_create_cq(ib_res_p->hca_hndl, 
            MIN_SEND_CQE_NUM,
            &ib_res_p->r_cq_hndl,
            &num_of_cqe);
    if (vapi_ret != VAPI_OK) {
        printf("Create CQ for send failed\n");
        VAPIERR(vapi_ret);
        goto clean_exit;
    }
    PRINT_TRACE("CQ for receive created. CQE NUM: %d\n", num_of_cqe);

    /* QP */
    qp_init_attr.rq_cq_hndl     = ib_res_p->r_cq_hndl;
    qp_init_attr.sq_cq_hndl     = ib_res_p->s_cq_hndl;
    
    qp_init_attr.cap.max_oust_wr_rq = QP_INI_MAX_OUST_WR_RQ_NUM;
    qp_init_attr.cap.max_oust_wr_sq = QP_INI_MAX_OUST_WR_SQ_NUM;
    qp_init_attr.cap.max_sg_size_rq = QP_INI_MAX_SG_SIZE_RQ_NUM;
    qp_init_attr.cap.max_sg_size_sq = QP_INI_MAX_SG_SIZE_SQ_NUM;

    qp_init_attr.pd_hndl            = ib_res_p->pd_hndl;
    qp_init_attr.rdd_hndl           = 0;
    qp_init_attr.sq_sig_type        = VAPI_SIGNAL_REQ_WR;
    qp_init_attr.rq_sig_type        = VAPI_SIGNAL_ALL_WR;

    qp_init_attr.ts_type            = VAPI_TS_RC;

    vapi_ret = VAPI_create_qp_ext(ib_res_p->hca_hndl, 
            &qp_init_attr, 
            &qp_ext_attr,
            &ib_res_p->qp_entry.qp_hndl, 
            &qp_prop);
    if (vapi_ret != VAPI_OK) {
        printf("Create QP failed\n");
        VAPIERR(vapi_ret);
        goto clean_exit;
    }
    ib_res_p->qp_entry.qp_num = qp_prop.qp_num;
    ib_res_p->qp_entry.srq_hndl = ib_res_p->srq_hndl;
    PRINT_TRACE("QP created\n");
    PRINT_QP_PROP(&qp_prop);

    return 0;
    
clean_exit:
    clean_ib_res(ib_res_p);
    return -1;
}

/*****************************Modify QP state
***************************************/
int qp_move_to_init(test_params_t *param_p)
{
    VAPI_qp_attr_mask_t    qp_attr_mask;
    VAPI_qp_attr_t         qp_attr;
    VAPI_qp_cap_t          qp_cap;
    VAPI_ret_t             res;

    QP_ATTR_MASK_CLR_ALL(qp_attr_mask);
    qp_attr.qp_state = VAPI_INIT;
    QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_QP_STATE);
    qp_attr.pkey_ix = 0;
    QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_PKEY_IX);
    qp_attr.port =  DEFAULT_PORT_NUM;
    QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_PORT);
    qp_attr.remote_atomic_flags = 
            VAPI_EN_REM_WRITE | VAPI_EN_REM_READ |
VAPI_EN_REM_ATOMIC_OP;
    QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_REMOTE_ATOMIC_FLAGS);

    res = VAPI_modify_qp(param_p->ib_res.hca_hndl, 
            param_p->ib_res.qp_entry.qp_hndl, 
            &qp_attr, &qp_attr_mask, &qp_cap);
    if (res != VAPI_OK) {
        printf("Error: Modifying  QP to INIT: %s\n",
VAPI_strerror(res));
        return -1;
    }
    PRINT_TRACE("Modified QP to INIT\n");
    print_qp_cap(&qp_cap);

    return 0;
}

int qp_move_to_rtr(test_params_t *param_p)
{
    VAPI_qp_attr_mask_t    qp_attr_mask;
    VAPI_qp_attr_t         qp_attr;
    VAPI_qp_cap_t          qp_cap;
    VAPI_ret_t             res;
    

    param_p->mtu = (param_p->ib_res.hca_vendor.vendor_part_id == 23108)
? 
            MTU1024 : MTU2048;

    QP_ATTR_MASK_CLR_ALL(qp_attr_mask);
    qp_attr.qp_state = VAPI_RTR;
    QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_QP_STATE);
    qp_attr.av.sl            = 0; /*USED_SL*/
    qp_attr.av.grh_flag      = FALSE;
    qp_attr.av.dlid          = param_p->dst_msg.lid;
    qp_attr.av.static_rate   = 2; /* 1x */
    qp_attr.av.src_path_bits = 0;
    QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_AV);
    qp_attr.path_mtu      = param_p->mtu;
    QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_PATH_MTU);
    qp_attr.rq_psn           = START_PSN;
    QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_RQ_PSN);
    qp_attr.qp_ous_rd_atom  = QP_OUS_RD_ATOM;
    QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_QP_OUS_RD_ATOM);
    qp_attr.dest_qp_num = param_p->dst_msg.qp_num;
    QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_DEST_QP_NUM);
    qp_attr.min_rnr_timer = 0;
    QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_MIN_RNR_TIMER);

    res = VAPI_modify_qp(param_p->ib_res.hca_hndl, 
            param_p->ib_res.qp_entry.qp_hndl, 
            &qp_attr, &qp_attr_mask, &qp_cap);
    if (res != VAPI_OK) {
        printf("Error: Modifying  QP to RTR: %s\n", VAPI_strerror(res));

        return -1/*(RET_ERR)*/;
    }
    PRINT_TRACE("Modified QP to RTR\n");
    print_qp_cap(&qp_cap);

    return 0;
}

int qp_move_to_rts(test_params_t *param_p)
{
    VAPI_qp_attr_mask_t    qp_attr_mask;
    VAPI_qp_attr_t         qp_attr;
    VAPI_qp_cap_t          qp_cap;
    VAPI_ret_t             res;

    QP_ATTR_MASK_CLR_ALL(qp_attr_mask);
    qp_attr.qp_state = VAPI_RTS; 
    QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_QP_STATE);
    qp_attr.sq_psn = START_PSN;
    QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_SQ_PSN);
    qp_attr.timeout = 18;
    QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_TIMEOUT);
    qp_attr.retry_count = 6;
    QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_RETRY_COUNT);
    qp_attr.rnr_retry = 6;
    QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_RNR_RETRY);
    qp_attr.ous_dst_rd_atom  = QP_OUS_RD_ATOM;
    QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_OUS_DST_RD_ATOM);
    
    res = VAPI_modify_qp(param_p->ib_res.hca_hndl, 
            param_p->ib_res.qp_entry.qp_hndl,
            &qp_attr, &qp_attr_mask, &qp_cap);
    if (res != VAPI_OK) {
        printf("Error: Modifying  QP to RTS: %s\n", VAPI_strerror(res));
        return /*(RET_ERR)*/-1;
    }
    PRINT_TRACE("Modified QP to RTS\n");
    print_qp_cap(&qp_cap);

    return 0;
}
/************************** Recv/Send requests
******************************/
/*
 *    post receive request
 */
int post_recv_req(struct ib_resource *ib_res_p, struct user_mr *u_mr_p)
{
    VAPI_ret_t             res;
    VAPI_rr_desc_t         rr;
    VAPI_sg_lst_entry_t    sg_entry_r;
    VAPI_hca_hndl_t    hca_hndl;
    VAPI_qp_hndl_t qp_hndl;
    VAPI_srq_hndl_t srq_hndl;
    
    if (ib_res_p == NULL) {
        PRINT_ERR("NULL ib_res_p\n");
        return -1;
    }
    if (u_mr_p == NULL) {
        PRINT_ERR("NULL user mr pointer\n");
        return -1;
    }

    hca_hndl = ib_res_p->hca_hndl;
    qp_hndl = ib_res_p->qp_entry.qp_hndl;
    hca_hndl = ib_res_p->srq_hndl;

    rr.opcode = VAPI_RECEIVE;
    rr.comp_type = VAPI_SIGNALED;
    rr.sg_lst_len = 1;
    sg_entry_r.lkey = u_mr_p->mrw_rep.l_key;
    sg_entry_r.len = u_mr_p->mrw_req.size;
    sg_entry_r.addr =
(VAPI_virt_addr_t)(MT_virt_addr_t)u_mr_p->user_buf;
    rr.sg_lst_p  = &sg_entry_r;
    rr.id = sg_entry_r.addr;

    PRINT_RECV_REQ(&rr);

    res = VAPI_post_rr(hca_hndl, qp_hndl, &rr);
        if (res != VAPI_OK) {
        printf("VAPI post Recv Req failed\n");
        VAPIERR(res);
        return -1;
    }
    
    return 0;
}

/*
 *    post send request
 */
int post_send_req(struct ib_resource *ib_res_p, struct user_mr *u_mr_p)
{
    VAPI_ret_t             res;
    VAPI_sr_desc_t         sr;
    VAPI_sg_lst_entry_t    sg_entry_s;
    VAPI_hca_hndl_t    hca_hndl;
    VAPI_qp_hndl_t qp_hndl;
    
    if (ib_res_p == NULL) {
        PRINT_ERR("NULL ib_res_p\n");
        return -1;
    }
    if (u_mr_p == NULL) {
        PRINT_ERR("NULL user mr pointer\n");
        return -1;
    }

    hca_hndl = ib_res_p->hca_hndl;
    qp_hndl = ib_res_p->qp_entry.qp_hndl;

    sr.comp_type = VAPI_SIGNALED;
    sr.set_se = FALSE;
    sr.opcode = VAPI_SEND;
    sr.remote_qkey = 0;
    sr.sg_lst_len = 1;
    sg_entry_s.lkey = u_mr_p->mrw_rep.l_key;
    sg_entry_s.len = u_mr_p->mrw_req.size;
    sg_entry_s.addr =
(VAPI_virt_addr_t)(MT_virt_addr_t)u_mr_p->user_buf;
    sr.sg_lst_p  = &sg_entry_s;
    sr.id = sg_entry_s.addr;
    PRINT_SEND_REQ(&sr);
    
    res = VAPI_post_sr(hca_hndl, qp_hndl, &sr);
    if (res != VAPI_OK) {
        printf("VAPI post Send Req failed\n");
        VAPIERR(res);
        return -1;
    }

    return 0;
}

int reap_send_req(struct ib_resource *ib_res_p, struct user_mr *u_mr_p,
int block)
{
    VAPI_ret_t res;
    VAPI_wc_desc_t wc_desc;
    VAPI_hca_hndl_t    hca_hndl;
    VAPI_cq_hndl_t s_cq_hndl;

    int poll_cnt = 0;

    if (ib_res_p == NULL) {
        PRINT_ERR("NULL ib_res_p\n");
        return -1;
    }
    if (u_mr_p == NULL) {
        PRINT_ERR("NULL user mr pointer\n");
        return -1;
    }

    hca_hndl = ib_res_p->hca_hndl;
    s_cq_hndl = ib_res_p->s_cq_hndl;

    if (block) {
do {    poll_cnt++;
MTPERF_TIME_START(VAPI_poll_cq);
        res = VAPI_poll_cq(hca_hndl, s_cq_hndl, &wc_desc);
        //res = EVAPI_poll_cq_block(hca_hndl, s_cq_hndl,
REAP_REQ_WAIT_TIME, &wc_desc);
MTPERF_TIME_END(VAPI_poll_cq);
        if (res != VAPI_OK && res != VAPI_CQ_EMPTY) {
            PRINT_ERR("Poll CQ block failed\n");
            VAPIERR(res);
            return -1;
        }
        show_qp_state(hca_hndl, ib_res_p->qp_entry.qp_hndl,
ib_res_p->qp_entry.qp_num);
        VAPI_RET(res);
    } while(res == VAPI_CQ_EMPTY && poll_cnt < 10);
        if (wc_desc.status != VAPI_SUCCESS) {
            PRINT_ERR("Req unsuccess: %s\n",
VAPI_wc_status_sym(wc_desc.status));
            PRINT_WC_DESC(&wc_desc);
            return -1;
        }
    } else {
        printf("ToDo: %s for unblock\n", __func__);
    }

    PRINT_TRACE("Req success\n");
    PRINT_WC_DESC(&wc_desc);
    return 0;
}


int reap_recv_req(struct ib_resource *ib_res_p, struct user_mr *u_mr_p,
int block)
{
    VAPI_ret_t res;
    VAPI_wc_desc_t wc_desc;
    VAPI_hca_hndl_t    hca_hndl;
    VAPI_cq_hndl_t r_cq_hndl;

    int poll_cnt = 0;

    if (ib_res_p == NULL) {
        PRINT_ERR("NULL ib_res_p\n");
        return -1;
    }
    if (u_mr_p == NULL) {
        PRINT_ERR("NULL user mr pointer\n");
        return -1;
    }

    hca_hndl = ib_res_p->hca_hndl;
    r_cq_hndl = ib_res_p->r_cq_hndl;

    if (block) {
do {    poll_cnt++;
        res = VAPI_poll_cq(hca_hndl, r_cq_hndl,&wc_desc);
        if (res != VAPI_OK && res != VAPI_CQ_EMPTY) {
            PRINT_ERR("Poll CQ block failed\n");
            VAPIERR(res);
            return -1;
        }
        sleep(1);
} while(res == VAPI_CQ_EMPTY && poll_cnt < 20);
        if (wc_desc.status != VAPI_SUCCESS) {
            PRINT_ERR("Req failed: %s\n",
VAPI_wc_status_sym(wc_desc.status));
            PRINT_WC_DESC(&wc_desc);
            return -1;
        }
    } else {
        printf("ToDo: %s for unblock\n", __func__);
    }

    PRINT_TRACE("Req success\n");
    PRINT_WC_DESC(&wc_desc);
    return 0;
}


-- 
Ian Jiang
ianjiang.ict at gmail.com

Laboratory of Spatial Information Technology
Division of System Architecture
Institute of Computing Technology 
Chinese Academy of Sciences 
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/general/attachments/20060219/7ff17327/attachment.html>


More information about the general mailing list