[Openib-windows] [PATCH] async_enent
Yossi Leybovich
sleybo at mellanox.co.il
Sun Sep 18 07:15:01 PDT 2005
Fab
Attached bug fix for async event in user space
Our verification team found that CQ\QP async events does not reach the user
level applications
The bug was that in hca_data.c line 812 (mlnx_async_dpc) the code compare
obj_idx against the qpn , that wrong it first need to mask the qpn and then
compare it.(same with the CQ)
- obj_idx = hh_er_p->event_modifier.qpn;
+ obj_idx = hh_er_p->event_modifier.qpn &
hobul_p->qp_idx_mask;
if (obj_idx < hobul_p->max_qp)
event_r.context = (void
*)hobul_p->qp_info_tbl[obj_idx].qp_context;
else
This patch fix this.
It also include fixes to the alts test (handling of CQE with error and few
debug prints)
I use the alts test to reproduce the problem by simply sending READ req in
the loop back scenario.
And on the way while debugging the async event flow I add the relevant
ENTER/EXIT macros
10x
Yossi
Signed-off-by: Yossi Leybovich (sleybo at mellanox.co.il)
Index: core/al/al_qp.c
===================================================================
--- core/al/al_qp.c (revision 388)
+++ core/al/al_qp.c (working copy)
@@ -2010,6 +2010,7 @@
{
ib_qp_handle_t h_qp;
+ AL_ENTER(AL_DBG_QP);
CL_ASSERT( p_event_rec );
h_qp = (ib_qp_handle_t)p_event_rec->context;
@@ -2038,6 +2039,7 @@
if( h_qp->pfn_event_cb )
h_qp->pfn_event_cb( p_event_rec );
+ AL_ENTER(AL_DBG_QP);
}
Index: core/al/kernel/al_ci_ca.c
===================================================================
--- core/al/kernel/al_ci_ca.c (revision 388)
+++ core/al/kernel/al_ci_ca.c (working copy)
@@ -332,7 +332,7 @@
{
ib_async_event_rec_t event_rec;
- CL_ENTER( AL_DBG_CA, g_al_dbg_lvl );
+ CL_ENTER( AL_DBG_ERROR, g_al_dbg_lvl );
CL_ASSERT( p_event_record );
@@ -342,7 +342,7 @@
ci_ca_async_event( &event_rec );
- CL_EXIT( AL_DBG_CA, g_al_dbg_lvl );
+ CL_EXIT( AL_DBG_ERROR, g_al_dbg_lvl );
}
Index: core/al/kernel/al_proxy_verbs.c
===================================================================
--- core/al/kernel/al_proxy_verbs.c (revision 388)
+++ core/al/kernel/al_proxy_verbs.c (working copy)
@@ -246,6 +246,8 @@
cl_ioctl_handle_t *ph_ioctl, h_ioctl;
uintn_t ioctl_size;
+ AL_ENTER( AL_DBG_DEV );
+
/* Set up the appropriate callback list. */
switch( cb_type )
{
@@ -310,7 +312,7 @@
proxy_context_deref( p_context );
}
cl_spinlock_release( &p_context->cb_lock );
-
+ AL_EXIT(AL_DBG_DEV);
return TRUE;
}
@@ -961,7 +963,8 @@
ib_qp_handle_t h_qp = p_err_rec->handle.h_qp;
al_dev_open_context_t *p_context = h_qp->obj.h_al->p_context;
misc_cb_ioctl_info_t cb_info;
-
+
+ AL_ENTER( AL_DBG_DEV | AL_DBG_QP );
/*
* If we're already closing the device - do not queue a callback,
since
* we're cleaning up the callback lists.
@@ -986,6 +989,7 @@
UAL_GET_MISC_CB_INFO, p_context, &cb_info, &h_qp->obj );
proxy_context_deref( p_context );
+ AL_EXIT( AL_DBG_DEV | AL_DBG_QP );
}
Index: hw/mt23108/kernel/hca_data.c
===================================================================
--- hw/mt23108/kernel/hca_data.c (revision 388)
+++ hw/mt23108/kernel/hca_data.c (working copy)
@@ -808,7 +808,7 @@
case E_EV_QP:
{
- obj_idx = hh_er_p->event_modifier.qpn;
+ obj_idx = hh_er_p->event_modifier.qpn &
hobul_p->qp_idx_mask;
if (obj_idx < hobul_p->max_qp)
event_r.context = (void
*)hobul_p->qp_info_tbl[obj_idx].qp_context;
else
@@ -821,7 +821,7 @@
case E_EV_CQ:
{
- obj_idx = hh_er_p->event_modifier.cq;
+ obj_idx = hh_er_p->event_modifier.cq &
hobul_p->cq_idx_mask;
if (obj_idx < hobul_p->max_cq)
event_r.context = (void
*)hobul_p->cq_info_tbl[obj_idx].cq_context;
else
Index: tests/alts/multisendrecv.c
===================================================================
--- tests/alts/multisendrecv.c (revision 388)
+++ tests/alts/multisendrecv.c (working copy)
@@ -1,4 +1,5 @@
/*
+* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2005 SilverStorm Technologies. All rights reserved.
* Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
*
@@ -751,7 +752,7 @@
p_s_wr->remote_ops.rkey = 0;
ALTS_PRINT(ALTS_DBG_VERBOSE,
- ("******vaddr(x%"PRIx64") lkey(x%x) len(%d)*****\n",
+ ("***** Send ******vaddr(0x%"PRIx64") lkey(0x%x)
len(%d)*****\n",
(void*)(uintn_t)p_s_wr->ds_array[0].vaddr,
p_s_wr->ds_array[0].lkey,
p_s_wr->ds_array[0].length));
@@ -806,7 +807,7 @@
p_r_wr->wr_id = i+reg_index;
ALTS_PRINT(ALTS_DBG_VERBOSE,
- ("******vaddr(x%"PRIx64") lkey(x%x) len(%d)*****\n",
+ ("***** Recv ******vaddr(0x%"PRIx64") lkey(0x%x)
len(%d)*****\n",
(void*)(uintn_t)p_r_wr->ds_array[0].vaddr,
p_r_wr->ds_array[0].lkey,
p_r_wr->ds_array[0].length));
@@ -922,7 +923,7 @@
qp_mod_attr.state.init.access_ctrl = IB_AC_LOCAL_WRITE |
IB_AC_MW_BIND;
ALTS_PRINT(ALTS_DBG_VERBOSE,
- ("******** port num = %d ***************\n",
+ ("****INIT***** port num = %d \n",
qp_mod_attr.state.init.primary_port));
qp_mod_attr.req_state = IB_QPS_INIT;
@@ -997,7 +998,7 @@
qp_mod_attr.state.rtr.rnr_nak_timeout = 7;
ALTS_PRINT(ALTS_DBG_VERBOSE,
- ("****RTR***** dlid = x%x (x%x) *port_num = %d *dest_qp = %d
***\n",
+ ("****RTR***** dlid = x%x (x%x) port_num = %d dest_qp = %d
\n",
qp_mod_attr.state.rtr.primary_av.dlid,
CL_NTOH16(qp_mod_attr.state.rtr.primary_av.dlid),
qp_mod_attr.state.rtr.primary_av.port_num,
@@ -1031,6 +1032,8 @@
qp_mod_attr.state.rts.init_depth = 3; //3;
qp_mod_attr.req_state = IB_QPS_RTS;
+ ALTS_PRINT(ALTS_DBG_VERBOSE,
+ ("****RTS***** \n"));
ib_status = ib_modify_qp(h_qp, &qp_mod_attr);
CL_ASSERT(ib_status == IB_SUCCESS);
@@ -1112,7 +1115,7 @@
p_ca_obj->src_port_num = p_src_port_attr->port_num;
ALTS_PRINT(ALTS_DBG_VERBOSE,
- ("**** slid = x%x (x%x) ***dlid = x%x (x%x)
***************\n",
+ ("****** slid = x%x (x%x) ***dlid = x%x (x%x)
***************\n",
p_ca_obj->slid,
CL_NTOH16(p_ca_obj->slid),
p_ca_obj->dlid,
@@ -1311,58 +1314,68 @@
while(p_done_cl)
{
- /*
- * print output
- */
- ALTS_PRINT(ALTS_DBG_VERBOSE,
- ("Got a completion:\n"
- "\ttype....:%s\n"
- "\twr_id...:%"PRIx64"\n",
- ib_get_wc_type_str(p_done_cl->wc_type),
- p_done_cl->wr_id ));
-
-
- if (p_done_cl->wc_type == IB_WC_RECV)
+ if(p_done_cl->status != IB_WCS_SUCCESS)
{
ALTS_PRINT(ALTS_DBG_VERBOSE,
- ("message length..:%d bytes\n",
- p_done_cl->length ));
+ ("Got a completion with error !!!!!!!! status = %s
type=%s\n",
+ ib_get_wc_status_str(p_done_cl->status),
+ ib_get_wc_type_str( p_done_cl->wc_type)));
+
+ }else{
+ /*
+ * print output
+ */
+ ALTS_PRINT(ALTS_DBG_VERBOSE,
+ ("Got a completion:\n"
+ "\ttype....:%s\n"
+ "\twr_id...:%"PRIx64"\n",
+ ib_get_wc_type_str(p_done_cl->wc_type),
+ p_done_cl->wr_id ));
- id = (uint32_t)p_done_cl->wr_id;
- buff = (char *)p_ca_obj->mem_region[id].buffer;
- if (qp_type == IB_QPT_UNRELIABLE_DGRM)
+
+
+ if (p_done_cl->wc_type == IB_WC_RECV)
{
ALTS_PRINT(ALTS_DBG_VERBOSE,
- ("---MSG--->%s\n",&buff[40]));
- ALTS_PRINT(ALTS_DBG_VERBOSE,
- ("RecvUD info:\n"
- "\trecv_opt...:x%x\n"
- "\timm_data...:x%x\n"
- "\tremote_qp..:x%x\n"
- "\tpkey_index.:%d\n"
- "\tremote_lid.:x%x\n"
- "\tremote_sl..:x%x\n"
- "\tpath_bits..:x%x\n"
- "\tsrc_lid....:x%x\n",
- p_done_cl->recv.ud.recv_opt,
- p_done_cl->recv.ud.immediate_data,
-
CL_NTOH32(p_done_cl->recv.ud.remote_qp),
- p_done_cl->recv.ud.pkey_index,
-
CL_NTOH16(p_done_cl->recv.ud.remote_lid),
- p_done_cl->recv.ud.remote_sl,
- p_done_cl->recv.ud.path_bits,
-
CL_NTOH16(p_ca_obj->mem_region[id].my_lid)));
+ ("message length..:%d bytes\n",
+ p_done_cl->length ));
+
+ id = (uint32_t)p_done_cl->wr_id;
+ buff = (char
*)p_ca_obj->mem_region[id].buffer;
+ if (qp_type == IB_QPT_UNRELIABLE_DGRM)
+ {
+ ALTS_PRINT(ALTS_DBG_VERBOSE,
+
("---MSG--->%s\n",&buff[40]));
+ ALTS_PRINT(ALTS_DBG_VERBOSE,
+ ("RecvUD info:\n"
+ "\trecv_opt...:x%x\n"
+ "\timm_data...:x%x\n"
+ "\tremote_qp..:x%x\n"
+ "\tpkey_index.:%d\n"
+ "\tremote_lid.:x%x\n"
+ "\tremote_sl..:x%x\n"
+ "\tpath_bits..:x%x\n"
+ "\tsrc_lid....:x%x\n",
+ p_done_cl->recv.ud.recv_opt,
+
p_done_cl->recv.ud.immediate_data,
+
CL_NTOH32(p_done_cl->recv.ud.remote_qp),
+
p_done_cl->recv.ud.pkey_index,
+
CL_NTOH16(p_done_cl->recv.ud.remote_lid),
+
p_done_cl->recv.ud.remote_sl,
+
p_done_cl->recv.ud.path_bits,
+
CL_NTOH16(p_ca_obj->mem_region[id].my_lid)));
+ }
+ else
+ {
+ ALTS_PRINT(ALTS_DBG_VERBOSE,
+ ("RecvRC info:\n"
+ "\trecv_opt...:x%x\n"
+ "\timm_data...:x%x\n",
+
p_done_cl->recv.conn.recv_opt,
+
p_done_cl->recv.ud.immediate_data ));
+ }
+
}
- else
- {
- ALTS_PRINT(ALTS_DBG_VERBOSE,
- ("RecvRC info:\n"
- "\trecv_opt...:x%x\n"
- "\timm_data...:x%x\n",
- p_done_cl->recv.conn.recv_opt,
- p_done_cl->recv.ud.immediate_data
));
- }
-
}
p_free_wcl = p_done_cl;
@@ -1422,6 +1435,8 @@
UNUSED_PARAM( p_err_rec );
+ ALTS_PRINT(ALTS_DBG_VERBOSE,("ERROR: Async CQ error !!!!!!!!!\n"));
+
ALTS_EXIT( ALTS_DBG_VERBOSE);
}
@@ -1434,6 +1449,8 @@
UNUSED_PARAM( p_err_rec );
+ ALTS_PRINT(ALTS_DBG_VERBOSE,("ERROR: Async QP error !!!!!!!!!\n"));
+
ALTS_EXIT( ALTS_DBG_VERBOSE);
}
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20050918/d961404f/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: async_event.patch
Type: application/octet-stream
Size: 8513 bytes
Desc: not available
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20050918/d961404f/attachment.obj>
More information about the ofw
mailing list