[Openib-windows] [PATCH] async_enent

Yossi Leybovich sleybo at mellanox.co.il
Sun Sep 18 07:15:01 PDT 2005


Fab

Attached bug fix for async event in user space
Our verification team found that CQ\QP async events does not reach the user
level applications
The bug was that in hca_data.c line 812 (mlnx_async_dpc) the code compare
obj_idx against the qpn , that wrong it first need to mask the qpn and then
compare it.(same with the CQ)

-			obj_idx = hh_er_p->event_modifier.qpn;
+			obj_idx = hh_er_p->event_modifier.qpn &
hobul_p->qp_idx_mask;
 			if (obj_idx < hobul_p->max_qp)
 				event_r.context = (void
*)hobul_p->qp_info_tbl[obj_idx].qp_context;
 			else

This patch fix this.
It also include fixes to the alts test (handling of CQE with error and few
debug prints)
I use the alts test to reproduce the problem by simply sending READ req in
the loop back scenario.
And on the way while debugging the async event flow I add the relevant
ENTER/EXIT macros

10x
Yossi  

Signed-off-by: Yossi Leybovich (sleybo at mellanox.co.il)
Index: core/al/al_qp.c
===================================================================
--- core/al/al_qp.c	(revision 388)
+++ core/al/al_qp.c	(working copy)
@@ -2010,6 +2010,7 @@
 {
 	ib_qp_handle_t			h_qp;
 
+	AL_ENTER(AL_DBG_QP);
 	CL_ASSERT( p_event_rec );
 	h_qp = (ib_qp_handle_t)p_event_rec->context;
 
@@ -2038,6 +2039,7 @@
 
 	if( h_qp->pfn_event_cb )
 		h_qp->pfn_event_cb( p_event_rec );
+	AL_ENTER(AL_DBG_QP);
 }
 
 
Index: core/al/kernel/al_ci_ca.c
===================================================================
--- core/al/kernel/al_ci_ca.c	(revision 388)
+++ core/al/kernel/al_ci_ca.c	(working copy)
@@ -332,7 +332,7 @@
 {
 	ib_async_event_rec_t	event_rec;
 
-	CL_ENTER( AL_DBG_CA, g_al_dbg_lvl );
+	CL_ENTER( AL_DBG_ERROR, g_al_dbg_lvl );
 
 	CL_ASSERT( p_event_record );
 
@@ -342,7 +342,7 @@
 
 	ci_ca_async_event( &event_rec );
 
-	CL_EXIT( AL_DBG_CA, g_al_dbg_lvl );
+	CL_EXIT( AL_DBG_ERROR, g_al_dbg_lvl );
 }
 
 
Index: core/al/kernel/al_proxy_verbs.c
===================================================================
--- core/al/kernel/al_proxy_verbs.c	(revision 388)
+++ core/al/kernel/al_proxy_verbs.c	(working copy)
@@ -246,6 +246,8 @@
 	cl_ioctl_handle_t			*ph_ioctl, h_ioctl;
 	uintn_t						ioctl_size;
 
+	AL_ENTER( AL_DBG_DEV );
+	
 	/* Set up the appropriate callback list. */
 	switch( cb_type )
 	{
@@ -310,7 +312,7 @@
 		proxy_context_deref( p_context );
 	}
 	cl_spinlock_release( &p_context->cb_lock );
-
+	AL_EXIT(AL_DBG_DEV);
 	return TRUE;
 }
 
@@ -961,7 +963,8 @@
 	ib_qp_handle_t			h_qp = p_err_rec->handle.h_qp;
 	al_dev_open_context_t	*p_context = h_qp->obj.h_al->p_context;
 	misc_cb_ioctl_info_t	cb_info;
-
+	
+	AL_ENTER( AL_DBG_DEV | AL_DBG_QP );
 	/*
 	 * If we're already closing the device - do not queue a callback,
since
 	 * we're cleaning up the callback lists.
@@ -986,6 +989,7 @@
 		UAL_GET_MISC_CB_INFO, p_context, &cb_info, &h_qp->obj );
 
 	proxy_context_deref( p_context );
+	AL_EXIT( AL_DBG_DEV | AL_DBG_QP );
 }
 
 
Index: hw/mt23108/kernel/hca_data.c
===================================================================
--- hw/mt23108/kernel/hca_data.c	(revision 388)
+++ hw/mt23108/kernel/hca_data.c	(working copy)
@@ -808,7 +808,7 @@
 
 	case E_EV_QP:
 		{
-			obj_idx = hh_er_p->event_modifier.qpn;
+			obj_idx = hh_er_p->event_modifier.qpn &
hobul_p->qp_idx_mask;
 			if (obj_idx < hobul_p->max_qp)
 				event_r.context = (void
*)hobul_p->qp_info_tbl[obj_idx].qp_context;
 			else
@@ -821,7 +821,7 @@
 
 	case E_EV_CQ:
 		{
-			obj_idx = hh_er_p->event_modifier.cq;
+			obj_idx = hh_er_p->event_modifier.cq &
hobul_p->cq_idx_mask;
 			if (obj_idx < hobul_p->max_cq)
 				event_r.context = (void
*)hobul_p->cq_info_tbl[obj_idx].cq_context;
 			else
Index: tests/alts/multisendrecv.c
===================================================================
--- tests/alts/multisendrecv.c	(revision 388)
+++ tests/alts/multisendrecv.c	(working copy)
@@ -1,4 +1,5 @@
 /*
+* Copyright (c) 2005 Mellanox Technologies.  All rights reserved.
  * Copyright (c) 2005 SilverStorm Technologies.  All rights reserved.
  * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. 
  *
@@ -751,7 +752,7 @@
 		p_s_wr->remote_ops.rkey = 0;
 
 		ALTS_PRINT(ALTS_DBG_VERBOSE,
-			("******vaddr(x%"PRIx64") lkey(x%x) len(%d)*****\n",
+			("***** Send ******vaddr(0x%"PRIx64") lkey(0x%x)
len(%d)*****\n",
 			(void*)(uintn_t)p_s_wr->ds_array[0].vaddr,
 			p_s_wr->ds_array[0].lkey,
 			p_s_wr->ds_array[0].length));
@@ -806,7 +807,7 @@
 		p_r_wr->wr_id = i+reg_index;
 
 		ALTS_PRINT(ALTS_DBG_VERBOSE,
-			("******vaddr(x%"PRIx64") lkey(x%x) len(%d)*****\n",
+			("***** Recv ******vaddr(0x%"PRIx64") lkey(0x%x)
len(%d)*****\n",
 			(void*)(uintn_t)p_r_wr->ds_array[0].vaddr,
 			p_r_wr->ds_array[0].lkey,
 			p_r_wr->ds_array[0].length));
@@ -922,7 +923,7 @@
 	qp_mod_attr.state.init.access_ctrl = IB_AC_LOCAL_WRITE |
IB_AC_MW_BIND;
 
 	ALTS_PRINT(ALTS_DBG_VERBOSE,
-		("******** port num = %d ***************\n",
+		("****INIT***** port num = %d \n",		
 		qp_mod_attr.state.init.primary_port));
 
 	qp_mod_attr.req_state = IB_QPS_INIT;
@@ -997,7 +998,7 @@
 	qp_mod_attr.state.rtr.rnr_nak_timeout = 7;
 
 	ALTS_PRINT(ALTS_DBG_VERBOSE,
-		("****RTR***** dlid = x%x (x%x) *port_num = %d *dest_qp = %d
***\n",
+		("****RTR***** dlid = x%x (x%x) port_num = %d dest_qp = %d
\n",
 		qp_mod_attr.state.rtr.primary_av.dlid,
 		CL_NTOH16(qp_mod_attr.state.rtr.primary_av.dlid),
 		qp_mod_attr.state.rtr.primary_av.port_num,
@@ -1031,6 +1032,8 @@
 	qp_mod_attr.state.rts.init_depth = 3;		//3;
 
 	qp_mod_attr.req_state = IB_QPS_RTS;
+	ALTS_PRINT(ALTS_DBG_VERBOSE,
+		("****RTS*****  \n"));
 	ib_status = ib_modify_qp(h_qp, &qp_mod_attr);
 
 	CL_ASSERT(ib_status == IB_SUCCESS);
@@ -1112,7 +1115,7 @@
 		p_ca_obj->src_port_num = p_src_port_attr->port_num;
 
 		ALTS_PRINT(ALTS_DBG_VERBOSE,
-			("**** slid = x%x (x%x) ***dlid = x%x (x%x)
***************\n",
+			("****** slid = x%x (x%x) ***dlid = x%x (x%x)
***************\n",
 			p_ca_obj->slid,
 			CL_NTOH16(p_ca_obj->slid),
 			p_ca_obj->dlid,
@@ -1311,58 +1314,68 @@
 	while(p_done_cl)
 	{
 
-		/*
-		 *  print output
-		 */
-		ALTS_PRINT(ALTS_DBG_VERBOSE,
-			("Got a completion:\n"
-			"\ttype....:%s\n"
-			"\twr_id...:%"PRIx64"\n",
-			ib_get_wc_type_str(p_done_cl->wc_type),
-			p_done_cl->wr_id ));
-
-
-		if (p_done_cl->wc_type == IB_WC_RECV)
+		if(p_done_cl->status != IB_WCS_SUCCESS)
 		{
 			ALTS_PRINT(ALTS_DBG_VERBOSE,
-				("message length..:%d bytes\n",
-				p_done_cl->length ));
+			("Got a completion with error !!!!!!!! status = %s
type=%s\n",
+				ib_get_wc_status_str(p_done_cl->status),
+				ib_get_wc_type_str( p_done_cl->wc_type)));
+			
+		}else{
+			/*
+			 *  print output
+			 */
+			ALTS_PRINT(ALTS_DBG_VERBOSE,
+				("Got a completion:\n"
+				"\ttype....:%s\n"
+				"\twr_id...:%"PRIx64"\n",
+				ib_get_wc_type_str(p_done_cl->wc_type),
+				p_done_cl->wr_id ));
 
-			id = (uint32_t)p_done_cl->wr_id;
-			buff = (char *)p_ca_obj->mem_region[id].buffer;
-			if (qp_type == IB_QPT_UNRELIABLE_DGRM)
+			
+				
+			if (p_done_cl->wc_type == IB_WC_RECV)
 			{
 				ALTS_PRINT(ALTS_DBG_VERBOSE,
-					("---MSG--->%s\n",&buff[40]));
-				ALTS_PRINT(ALTS_DBG_VERBOSE,
-					("RecvUD info:\n"
-					"\trecv_opt...:x%x\n"
-					"\timm_data...:x%x\n"
-					"\tremote_qp..:x%x\n"
-					"\tpkey_index.:%d\n"
-					"\tremote_lid.:x%x\n"
-					"\tremote_sl..:x%x\n"
-					"\tpath_bits..:x%x\n"
-					"\tsrc_lid....:x%x\n",
-					p_done_cl->recv.ud.recv_opt,
-					p_done_cl->recv.ud.immediate_data,
-
CL_NTOH32(p_done_cl->recv.ud.remote_qp),
-					p_done_cl->recv.ud.pkey_index,
-
CL_NTOH16(p_done_cl->recv.ud.remote_lid),
-					p_done_cl->recv.ud.remote_sl,
-					p_done_cl->recv.ud.path_bits,
-
CL_NTOH16(p_ca_obj->mem_region[id].my_lid)));
+					("message length..:%d bytes\n",
+					p_done_cl->length ));
+
+				id = (uint32_t)p_done_cl->wr_id;
+				buff = (char
*)p_ca_obj->mem_region[id].buffer;
+				if (qp_type == IB_QPT_UNRELIABLE_DGRM)
+				{
+					ALTS_PRINT(ALTS_DBG_VERBOSE,
+
("---MSG--->%s\n",&buff[40]));
+					ALTS_PRINT(ALTS_DBG_VERBOSE,
+						("RecvUD info:\n"
+						"\trecv_opt...:x%x\n"
+						"\timm_data...:x%x\n"
+						"\tremote_qp..:x%x\n"
+						"\tpkey_index.:%d\n"
+						"\tremote_lid.:x%x\n"
+						"\tremote_sl..:x%x\n"
+						"\tpath_bits..:x%x\n"
+						"\tsrc_lid....:x%x\n",
+						p_done_cl->recv.ud.recv_opt,
+
p_done_cl->recv.ud.immediate_data,
+
CL_NTOH32(p_done_cl->recv.ud.remote_qp),
+
p_done_cl->recv.ud.pkey_index,
+
CL_NTOH16(p_done_cl->recv.ud.remote_lid),
+
p_done_cl->recv.ud.remote_sl,
+
p_done_cl->recv.ud.path_bits,
+
CL_NTOH16(p_ca_obj->mem_region[id].my_lid)));
+				}
+				else
+				{
+					ALTS_PRINT(ALTS_DBG_VERBOSE,
+						("RecvRC info:\n"
+						"\trecv_opt...:x%x\n"
+						"\timm_data...:x%x\n",
+
p_done_cl->recv.conn.recv_opt,
+
p_done_cl->recv.ud.immediate_data ));
+				}
+
 			}
-			else
-			{
-				ALTS_PRINT(ALTS_DBG_VERBOSE,
-					("RecvRC info:\n"
-					"\trecv_opt...:x%x\n"
-					"\timm_data...:x%x\n",
-					p_done_cl->recv.conn.recv_opt,
-					p_done_cl->recv.ud.immediate_data
));
-			}
-
 		}
 
 		p_free_wcl = p_done_cl;
@@ -1422,6 +1435,8 @@
 
 	UNUSED_PARAM( p_err_rec );
 
+	ALTS_PRINT(ALTS_DBG_VERBOSE,("ERROR: Async CQ error  !!!!!!!!!\n"));
+
 	ALTS_EXIT( ALTS_DBG_VERBOSE);
 }
 
@@ -1434,6 +1449,8 @@
 
 	UNUSED_PARAM( p_err_rec );
 
+	ALTS_PRINT(ALTS_DBG_VERBOSE,("ERROR: Async QP error  !!!!!!!!!\n"));
+	
 	ALTS_EXIT( ALTS_DBG_VERBOSE);
 }
 

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20050918/d961404f/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: async_event.patch
Type: application/octet-stream
Size: 8513 bytes
Desc: not available
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20050918/d961404f/attachment.obj>


More information about the ofw mailing list