[Openib-windows] races on __destrot_obj function

Yossi Leybovich sleybo at mellanox.co.il
Tue Jul 11 02:09:15 PDT 2006


Attached is patch that solve this problem 

It includes:

1. fix to cl_obj to return value if the parent port is not in
CL_INITIALIZED
2. fix to insert/insert_locked function to check return value
3. add ref count mechanism to help debug ref count problems
4. not call port_resume in endpt cleanup but in port destroy

I tested this patch with our test and it pass

Pls review 

10x
Yossi 
 


Singed-off-by: Yossi Leybovich (sleybo at mellanox.co.il)

Index: core/complib/cl_obj.c
===================================================================
--- core/complib/cl_obj.c	(revision 1528)
+++ core/complib/cl_obj.c	(working copy)
@@ -351,17 +351,19 @@
 /*
  * Add a dependent relationship between two objects.
  */
-void
+cl_status_t
 cl_obj_insert_rel(
 	IN				cl_obj_rel_t * const
p_rel,
 	IN				cl_obj_t * const
p_parent_obj,
 	IN				cl_obj_t * const
p_child_obj )
 {
+	cl_status_t	status;
 	CL_ASSERT( p_rel && p_parent_obj && p_child_obj );
 
 	cl_spinlock_acquire( &p_parent_obj->lock );
-	cl_obj_insert_rel_parent_locked( p_rel, p_parent_obj,
p_child_obj );
+	status = cl_obj_insert_rel_parent_locked( p_rel, p_parent_obj,
p_child_obj );
 	cl_spinlock_release( &p_parent_obj->lock );
+	return status;
 }
 
 
@@ -369,7 +371,7 @@
 /*
  * Add a dependent relationship between two objects.
  */
-void
+cl_status_t
 cl_obj_insert_rel_parent_locked(
 	IN				cl_obj_rel_t * const
p_rel,
 	IN				cl_obj_t * const
p_parent_obj,
@@ -377,6 +379,8 @@
 {
 	CL_ASSERT( p_rel && p_parent_obj && p_child_obj );
 
+	if(p_parent_obj->state != CL_INITIALIZED)
+		return CL_INVALID_STATE;
 	/* The child object needs to maintain a reference on the parent.
*/
 	cl_obj_ref( p_parent_obj );
 	cl_obj_ref( p_child_obj );
@@ -396,6 +400,7 @@
 		(cl_list_item_t*)&p_rel->pool_item );
 
 	cl_spinlock_release( &p_child_obj->lock );
+	return CL_SUCCESS;
 }
 
 
Index: inc/complib/cl_obj.h
===================================================================
--- inc/complib/cl_obj.h	(revision 1528)
+++ inc/complib/cl_obj.h	(working copy)
@@ -877,7 +877,7 @@
 *
 * SYNOPSIS
 */
-CL_EXPORT void CL_API
+CL_EXPORT cl_status_t CL_API
 cl_obj_insert_rel(
 	IN				cl_obj_rel_t * const
p_rel,
 	IN				cl_obj_t * const
p_parent_obj,
@@ -922,7 +922,7 @@
 *
 * SYNOPSIS
 */
-CL_EXPORT void CL_API
+CL_EXPORT cl_status_t CL_API
 cl_obj_insert_rel_parent_locked(
 	IN				cl_obj_rel_t * const
p_rel,
 	IN				cl_obj_t * const
p_parent_obj,
Index: ulp/ipoib/kernel/ipoib_adapter.c
===================================================================
--- ulp/ipoib/kernel/ipoib_adapter.c	(revision 1528)
+++ ulp/ipoib/kernel/ipoib_adapter.c	(working copy)
@@ -717,7 +717,7 @@
 	if( p_adapter->state == IB_PNP_PORT_ACTIVE )
 	{
 		p_port = p_adapter->p_port;
-		cl_obj_ref( &p_port->obj );
+		ipoib_port_ref(p_port, 1);
 	}
 	cl_obj_unlock( &p_adapter->obj );
 
@@ -764,9 +764,9 @@
 		num_macs * sizeof(mac_addr_t) );
 	p_adapter->mcast_array_size = num_macs;
 
-	if( p_port )
-		cl_obj_deref( &p_port->obj );
-
+	if( p_port ){
+		ipoib_port_deref(p_port, 1);
+	}
 	IPOIB_EXIT( IPOIB_DBG_MCAST );
 }
 
Index: ulp/ipoib/kernel/ipoib_debug.h
===================================================================
--- ulp/ipoib/kernel/ipoib_debug.h	(revision 1528)
+++ ulp/ipoib/kernel/ipoib_debug.h	(working copy)
@@ -68,7 +68,8 @@
 	WPP_DEFINE_BIT(IPOIB_DBG_ALLOC) \
 	WPP_DEFINE_BIT(IPOIB_DBG_OID) \
 	WPP_DEFINE_BIT(IPOIB_DBG_IOCTL) \
-	WPP_DEFINE_BIT(IPOIB_DBG_STAT))
+	WPP_DEFINE_BIT(IPOIB_DBG_STAT) \
+	WPP_DEFINE_BIT(IPOIB_DBG_OBJ))
 
 
 
@@ -111,6 +112,7 @@
 #define IPOIB_DBG_OID	(1 << 10)
 #define IPOIB_DBG_IOCTL	(1 << 11)
 #define IPOIB_DBG_STAT	(1 << 12)
+#define IPOIB_DBG_OBJ	(1<<31)
 
 #define IPOIB_DBG_ERROR	(CL_DBG_ERROR | IPOIB_DBG_ERR)
 #define IPOIB_DBG_ALL	CL_DBG_ALL
Index: ulp/ipoib/kernel/ipoib_driver.c
===================================================================
--- ulp/ipoib/kernel/ipoib_driver.c	(revision 1528)
+++ ulp/ipoib/kernel/ipoib_driver.c	(working copy)
@@ -1855,14 +1855,13 @@
 	}
 
 	p_port = p_adapter->p_port;
-	cl_obj_ref( &p_port->obj );
+	ipoib_port_ref(p_port, 2);
 	cl_obj_unlock( &p_adapter->obj );
 
 	cl_perf_start( PortSend );
 	ipoib_port_send( p_port, packet_array, num_packets );
 	cl_perf_stop( &p_port->p_adapter->perf, PortSend );
-	cl_obj_deref( &p_port->obj );
-
+	ipoib_port_deref(p_port, 2);
 	cl_perf_stop( &p_adapter->perf, SendPackets );
 
 	cl_perf_log( &p_adapter->perf, SendBundle, num_packets );
Index: ulp/ipoib/kernel/ipoib_endpoint.c
===================================================================
--- ulp/ipoib/kernel/ipoib_endpoint.c	(revision 1528)
+++ ulp/ipoib/kernel/ipoib_endpoint.c	(working copy)
@@ -224,8 +224,6 @@
 	p_endpt = PARENT_STRUCT( p_obj, ipoib_endpt_t, obj );
 	p_port = __endpt_parent( p_endpt );
 
-	/* Resume sends so that any pending sends for this endpoint
fail. */
-	ipoib_port_resume( p_port );
 
 	/* Leave the multicast group if it exists. */
 	if( p_endpt->h_mcast )
Index: ulp/ipoib/kernel/ipoib_port.c
===================================================================
--- ulp/ipoib/kernel/ipoib_port.c	(revision 1528)
+++ ulp/ipoib/kernel/ipoib_port.c	(working copy)
@@ -389,13 +389,13 @@
 	IN				ipoib_port_t* const
p_port,
 	IN		const	net16_t
lid );
 
-static inline void
+static inline ib_api_status_t
 __endpt_mgr_insert_locked(
 	IN				ipoib_port_t* const
p_port,
 	IN		const	mac_addr_t
mac,
 	IN				ipoib_endpt_t* const
p_endpt );
 
-static inline void
+static inline ib_api_status_t
 __endpt_mgr_insert(
 	IN				ipoib_port_t* const
p_port,
 	IN		const	mac_addr_t
mac,
@@ -457,7 +457,25 @@
 	return cl_memcmp( p_key1, p_key2, sizeof(ib_gid_t) );
 }
 
+inline void ipoib_port_ref(ipoib_port_t * p_port, int type)
+{
+	cl_obj_ref( &p_port->obj );
+#if DBG
+	p_port->ref[type%100] --;
+	IPOIB_PRINT(TRACE_LEVEL_ERROR,IPOIB_DBG_OBJ,(" ref type %d
ref_cnt %d\n",type,p_port->obj.ref_cnt));
+#endif
+}
 
+inline void ipoib_port_deref(ipoib_port_t * p_port, int type)
+{
+	cl_obj_deref( &p_port->obj );
+
+#if DBG
+	p_port->ref[type%100] --;
+	IPOIB_PRINT(TRACE_LEVEL_ERROR,IPOIB_DBG_OBJ,("deref  type %d
ref_cnt %d\n",type,p_port->obj.ref_cnt));
+#endif
+}
+
 
/***********************************************************************
*******
 *
 * Implementation
@@ -623,6 +641,12 @@
 	/* We only ever destroy from the PnP callback thread. */
 	cl_status = cl_obj_init( &p_port->obj, CL_DESTROY_SYNC,
 		__port_destroying, __port_cleanup, __port_free );
+
+#if DBG
+	p_port->ref[0]++;
+	IPOIB_PRINT(TRACE_LEVEL_ERROR,IPOIB_DBG_OBJ,("ref  type %d
ref_cnt %d\n",0,p_port->obj.ref_cnt));
+#endif
+
 	if( cl_status != CL_SUCCESS )
 	{
 		IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
@@ -630,7 +654,19 @@
 		return IB_ERROR;
 	}
 
-	cl_obj_insert_rel( &p_port->rel, &p_adapter->obj, &p_port->obj
);
+	cl_status = cl_obj_insert_rel( &p_port->rel, &p_adapter->obj,
&p_port->obj );
+	if( cl_status != CL_SUCCESS )
+	{
+		IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+			("cl_obj_insert_rel returned %s\n",
cl_status_text[cl_status]) );
+		cl_obj_destroy( &p_port->obj );
+		return IB_ERROR;
+	}
+	
+#if DBG
+	p_port->ref[0]++;
+	IPOIB_PRINT(TRACE_LEVEL_ERROR,IPOIB_DBG_OBJ,("ref  type %d
ref_cnt %d\n",0,p_port->obj.ref_cnt));
+#endif
 
 	IPOIB_EXIT( IPOIB_DBG_INIT );
 	return IB_SUCCESS;
@@ -653,6 +689,8 @@
 
 	__endpt_mgr_remove_all( p_port );
 
+	ipoib_port_resume( p_port );
+
 	IPOIB_EXIT( IPOIB_DBG_INIT );
 }
 
@@ -1137,7 +1175,7 @@
 	/* Reference the port object for the send. */
 	if( p_desc )
 	{
-		cl_obj_ref( &p_port->obj );
+		ipoib_port_ref(p_port, 3);
 		CL_ASSERT( p_desc->wr.wr_id == (uintn_t)p_desc );
 #if IPOIB_INLINE_RECV
 		CL_ASSERT( p_desc->local_ds[0].vaddr ==
@@ -1181,8 +1219,7 @@
 	/*
 	 * Dereference the port object since the receive is no longer
outstanding.
 	 */
-	cl_obj_deref( &p_port->obj );
-
+	ipoib_port_deref(p_port, 3);
 	IPOIB_EXIT(  IPOIB_DBG_RECV );
 }
 
@@ -1327,7 +1364,7 @@
 			("Port in invalid state.  Not reposting.\n") );
 		return 0;
 	}
-	cl_obj_ref( &p_port->obj );
+	ipoib_port_ref(p_port, 4);
 	cl_obj_unlock( &p_port->obj );
 
 	while( p_port->recv_mgr.depth <
p_port->p_adapter->params.rq_depth )
@@ -1382,8 +1419,7 @@
 		}
 	}
 
-	cl_obj_deref( &p_port->obj );
-
+	ipoib_port_deref(p_port, 4);
 	IPOIB_EXIT( IPOIB_DBG_RECV );
 	return p_port->p_adapter->params.rq_low_watermark -
p_port->recv_mgr.depth;
 }
@@ -1513,8 +1549,7 @@
 	cl_qlist_init( &done_list );
 	cl_qlist_init( &bad_list );
 
-	cl_obj_ref( &p_port->obj );
-
+	ipoib_port_ref(p_port, 5);
 	for( i = 0; i < MAX_RECV_WC; i++ )
 		wc[i].p_next = &wc[i + 1];
 	wc[MAX_RECV_WC - 1].p_next = NULL;
@@ -1633,7 +1668,7 @@
 	cl_perf_stop( &p_port->p_adapter->perf, RearmRecv );
 	CL_ASSERT( status == IB_SUCCESS );
 
-	cl_obj_deref( &p_port->obj );
+	ipoib_port_deref(p_port, 5);
 
 	cl_perf_stop( &p_port->p_adapter->perf, RecvCb );
 
@@ -1715,13 +1750,19 @@
 			if( !*pp_src )
 			{
 				IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
-					("ipoib_endpt_create returned
%s\n",
-
p_port->p_adapter->p_ifc->get_err_str( status )) );
+					("ipoib_endpt_create
failed\n"));
 				return;
 			}
 			cl_perf_start( EndptInsert );
 			cl_obj_lock( &p_port->obj );
-			__endpt_mgr_insert( p_port, mac, *pp_src );
+			status = __endpt_mgr_insert( p_port, mac,
*pp_src );
+			if( status != IB_SUCCESS)
+			{
+				IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
+					("__endpt_mgr_insert failed \n",
+
p_port->p_adapter->p_ifc->get_err_str( status )));
+				return;
+			}
 			cl_obj_unlock( &p_port->obj );
 			cl_perf_stop( &p_port->p_adapter->perf,
EndptInsert );
 		}
@@ -1816,7 +1857,7 @@
 			}
 			cl_qlist_insert_tail( p_bad_list,
&p_desc->item.list_item );
 			/* Dereference the port object on behalf of the
failed receive. */
-			cl_obj_deref( &p_port->obj );
+			ipoib_port_deref(p_port, 103);
 			continue;
 		}
 
@@ -1828,7 +1869,7 @@
 				("Received ETH packet < min size\n") );
 			ipoib_inc_recv_stat( p_port->p_adapter,
IP_STAT_ERROR, 0 );
 			cl_qlist_insert_tail( p_bad_list,
&p_desc->item.list_item );
-			cl_obj_deref( &p_port->obj );
+			ipoib_port_deref(p_port, 203);
 			continue;
 		}
 
@@ -1857,7 +1898,7 @@
 				 */
 				cl_qlist_insert_tail( p_bad_list,
&p_desc->item.list_item );
 				/* Dereference the port object on behalf
of the failed recv. */
-				cl_obj_deref( &p_port->obj );
+				ipoib_port_deref(p_port, 303);
 				continue;
 			}
 		}
@@ -1948,7 +1989,7 @@
 			ipoib_inc_recv_stat( p_port->p_adapter,
IP_STAT_ERROR, 0 );
 			cl_qlist_insert_tail( p_bad_list,
&p_desc->item.list_item );
 			/* Dereference the port object on behalf of the
failed receive. */
-			cl_obj_deref( &p_port->obj );
+			ipoib_port_deref(p_port, 403);
 		}
 		else
 		{
@@ -2255,16 +2296,24 @@
 		 */
 		*pp_src = ipoib_endpt_create( &p_ib_arp->src_hw.gid,
 			0, (p_ib_arp->src_hw.flags_qpn &
CL_HTON32(0x00FFFFFF)) );
+
 		if( !*pp_src )
 		{
 			IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
-				("ipoib_endpt_create returned %s\n",
-				p_port->p_adapter->p_ifc->get_err_str(
status )) );
+				("ipoib_endpt_create failed\n"));
 			return status;
 		}
 
 		cl_obj_lock( &p_port->obj );
-		__endpt_mgr_insert( p_port, mac, *pp_src );
+		status = __endpt_mgr_insert( p_port, mac, *pp_src );
+		if( status != IB_SUCCESS )
+		{
+			IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
+				("__endpt_mgr_insert return %s \n",
+				p_port->p_adapter->p_ifc->get_err_str(
status )) );));
+			return status;
+		}
+
 		cl_obj_unlock( &p_port->obj );
 	}
 
@@ -3634,7 +3683,6 @@
 	PERF_DECLARE( PostSend );
 
 	IPOIB_ENTER( IPOIB_DBG_SEND );
-
 	cl_spinlock_acquire( &p_port->send_lock );
 
 	for( p_item = cl_qlist_head( &p_port->send_mgr.pending_list );
@@ -3774,8 +3822,7 @@
 
 	p_port = (ipoib_port_t*)cq_context;
 
-	cl_obj_ref( &p_port->obj );
-
+	ipoib_port_ref(p_port, 6);
 	for( i = 0; i < MAX_SEND_WC; i++ )
 		wc[i].p_next = &wc[i + 1];
 	wc[MAX_SEND_WC - 1].p_next = NULL;
@@ -3873,7 +3920,7 @@
 	ipoib_port_resume( p_port );
 	cl_perf_stop( &p_port->p_adapter->perf, PortResume );
 	
-	cl_obj_deref( &p_port->obj );
+	ipoib_port_deref(p_port, 6);
 
 	cl_perf_stop( &p_port->p_adapter->perf, SendCb );
 	cl_perf_update_ctr( &p_port->p_adapter->perf, SendCompBundle );
@@ -4240,12 +4287,13 @@
 }
 
 
-inline void
+inline ib_api_status_t
 __endpt_mgr_insert_locked(
 	IN				ipoib_port_t* const
p_port,
 	IN		const	mac_addr_t
mac,
 	IN				ipoib_endpt_t* const
p_endpt )
 {
+	ib_api_status_t	status = IB_SUCCESS;
 	IPOIB_ENTER( IPOIB_DBG_ENDPT );
 
 	IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_ENDPT,
@@ -4261,21 +4309,25 @@
 	}
 	/* __endpt_mgr_insert expects *one* reference to be held when
being called. */
 	cl_atomic_inc( &p_port->endpt_rdr );
-	__endpt_mgr_insert( p_port, mac, p_endpt );
+	status= __endpt_mgr_insert( p_port, mac, p_endpt );
 	cl_atomic_dec( &p_port->endpt_rdr );
 	cl_obj_unlock( &p_port->obj );
+	
+	IPOIB_EXIT( IPOIB_DBG_ENDPT );
+	return status;
 
-	IPOIB_EXIT( IPOIB_DBG_ENDPT );
 }
 
 
-void
+inline ib_api_status_t
 __endpt_mgr_insert(
 	IN				ipoib_port_t* const
p_port,
 	IN		const	mac_addr_t
mac,
 	IN				ipoib_endpt_t* const
p_endpt )
 {
-	uint64_t		key;
+
+	cl_status_t		cl_status;
+	uint64_t			key;
 	cl_map_item_t	*p_qitem;
 	cl_fmap_item_t	*p_fitem;
 
@@ -4285,10 +4337,22 @@
 	while( p_port->endpt_rdr > 1 )
 		;
 
+
 	/* Link the endpoint to the port. */
-	cl_obj_insert_rel_parent_locked(
+	cl_status = cl_obj_insert_rel_parent_locked(
 		&p_endpt->rel, &p_port->obj, &p_endpt->obj );
+	
+	if(cl_status != CL_SUCCESS){
+		cl_obj_destroy(&p_endpt->obj);
+		return IB_INVALID_STATE;
+	}
 
+#if DBG
+	p_port->ref[12]++;
+	IPOIB_PRINT(TRACE_LEVEL_ERROR,IPOIB_DBG_OBJ,("ref  type %d
ref_cnt %d\n",12,p_port->obj.ref_cnt));
+#endif
+	
+
 	p_endpt->mac = mac;
 	key = 0;
 	cl_memcpy( &key, &mac, sizeof(mac_addr_t) );
@@ -4304,8 +4368,10 @@
 			&p_port->endpt_mgr.lid_endpts, p_endpt->dlid,
&p_endpt->lid_item );
 		CL_ASSERT( p_qitem == &p_endpt->lid_item );
 	}
+	
+	IPOIB_EXIT( IPOIB_DBG_ENDPT );
+	return IB_SUCCESS;
 
-	IPOIB_EXIT( IPOIB_DBG_ENDPT );
 }
 
 
@@ -4348,10 +4414,10 @@
 
 	/* Add the broadcast endpoint to the endpoint map. */
 	cl_memset( &bcast_mac, 0xFF, sizeof(bcast_mac) );
-	__endpt_mgr_insert_locked( p_port, bcast_mac, p_endpt );
+	status = __endpt_mgr_insert_locked( p_port, bcast_mac, p_endpt
);
 
 	IPOIB_EXIT( IPOIB_DBG_INIT );
-	return IB_SUCCESS;
+	return status;
 }
 
 
@@ -4397,6 +4463,11 @@
 
 		cl_obj_unlock( &p_port->obj );
 		cl_obj_destroy( &p_endpt->obj );
+#if DBG
+		p_port->ref[12]--;
+		IPOIB_PRINT(TRACE_LEVEL_ERROR,IPOIB_DBG_OBJ,("ref  type
%d ref_cnt %d\n",12,p_port->obj.ref_cnt));
+#endif
+
 	}
 	else
 	{
@@ -4506,15 +4577,14 @@
 	query.pfn_query_cb = __port_info_cb;
 
 	/* reference the object for the multicast query. */
-	cl_obj_ref( &p_port->obj );
-
+	ipoib_port_ref(p_port, 7);
 	status = p_port->p_adapter->p_ifc->query(
 		p_port->p_adapter->h_al, &query, &p_port->ib_mgr.h_query
);
 	if( status != IB_SUCCESS )
 	{
 		KeSetEvent( &p_port->sa_event, EVENT_INCREMENT, FALSE );
 		ipoib_set_inactive( p_port->p_adapter );
-		cl_obj_deref( &p_port->obj );
+		ipoib_port_deref(p_port, 7);
 		IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
 			("ib_query returned %s\n", 
 			p_port->p_adapter->p_ifc->get_err_str( status ))
);
@@ -4567,13 +4637,18 @@
 
 	/* __endpt_mgr_insert expects *one* reference to be held. */
 	cl_atomic_inc( &p_port->endpt_rdr );
-	__endpt_mgr_insert( p_port, p_port->p_adapter->params.conf_mac,
p_endpt );
+	status  = __endpt_mgr_insert( p_port,
p_port->p_adapter->params.conf_mac, p_endpt );
 	cl_atomic_dec( &p_port->endpt_rdr );
+	if(status != IB_SUCCESS){
+		IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+			("__endpt_mgr_insert for local endpoint returned
%s\n",
+			p_port->p_adapter->p_ifc->get_err_str( status ))
);
+		return status;
+	}
 
 	p_port->p_local_endpt = p_endpt;
-
 	IPOIB_EXIT( IPOIB_DBG_INIT );
-	return IB_SUCCESS;
+	return status;
 }
 
 
@@ -4674,7 +4749,7 @@
 		p_port->p_adapter->p_ifc->put_mad(
p_query_rec->p_result_mad );
 
 	/* Release the reference taken when issuing the port info query.
*/
-	cl_obj_deref( &p_port->obj );
+	ipoib_port_deref(p_port,107);
 
 	IPOIB_EXIT( IPOIB_DBG_INIT );
 }
@@ -4711,13 +4786,12 @@
 	query.pfn_query_cb = __bcast_get_cb;
 
 	/* reference the object for the multicast query. */
-	cl_obj_ref( &p_port->obj );
-
+	ipoib_port_ref(p_port, 8);
 	status = p_port->p_adapter->p_ifc->query(
 		p_port->p_adapter->h_al, &query, &p_port->ib_mgr.h_query
);
 	if( status != IB_SUCCESS )
 	{
-		cl_obj_deref( &p_port->obj );
+	ipoib_port_deref(p_port, 8);
 		IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
 			("ib_query returned %s\n", 
 			p_port->p_adapter->p_ifc->get_err_str( status ))
);
@@ -4796,8 +4870,7 @@
 		p_port->p_adapter->p_ifc->put_mad(
p_query_rec->p_result_mad );
 
 	/* Release the reference taken when issuing the member record
query. */
-	cl_obj_deref( &p_port->obj );
-
+	ipoib_port_deref(p_port, 108);
 	IPOIB_EXIT( IPOIB_DBG_MCAST );
 }
 
@@ -4855,16 +4928,15 @@
 	}
 
 	/* reference the object for the multicast join request. */
-	cl_obj_ref( &p_port->obj );
-
+	ipoib_port_ref(p_port, 9);
 	status = p_port->p_adapter->p_ifc->join_mcast(
 		p_port->ib_mgr.h_qp, &mcast_req );
 	if( status != IB_SUCCESS )
 	{
-		cl_obj_deref( &p_port->obj );
+		ipoib_port_deref(p_port, 9);
 		IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
-			("ib_join_mcast returned %s\n", 
-			p_port->p_adapter->p_ifc->get_err_str( status ))
);
+				("ib_join_mcast returned %s\n", 
+				p_port->p_adapter->p_ifc->get_err_str(
status )) );
 	}
 	IPOIB_EXIT( IPOIB_DBG_MCAST );
 	return status;
@@ -4915,12 +4987,11 @@
 	mcast_req.pkey_index = 0;
 
 	/* reference the object for the multicast join request. */
-	cl_obj_ref( &p_port->obj );
-
+	ipoib_port_ref(p_port, 10);
 	status = p_port->p_adapter->p_ifc->join_mcast(
p_port->ib_mgr.h_qp, &mcast_req );
 	if( status != IB_SUCCESS )
 	{
-		cl_obj_deref( &p_port->obj );
+		ipoib_port_deref(p_port, 10);
 		IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
 			("ib_join_mcast returned %s\n", 
 			p_port->p_adapter->p_ifc->get_err_str( status ))
);
@@ -5011,7 +5082,7 @@
 			p_port->p_adapter->p_ifc->leave_mcast(
p_mcast_rec->h_mcast, NULL );
 
 		KeSetEvent( &p_port->sa_event, EVENT_INCREMENT, FALSE );
-		cl_obj_deref( &p_port->obj );
+		ipoib_port_deref(p_port, 109);
 		IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION,
IPOIB_DBG_INIT,
 			("Invalid state - Aborting.\n") );
 		return;
@@ -5056,7 +5127,7 @@
 			ipoib_set_inactive( p_port->p_adapter );
 			KeSetEvent( &p_port->sa_event, EVENT_INCREMENT,
FALSE );
 		}
-		cl_obj_deref( &p_port->obj );
+		ipoib_port_deref(p_port, 209);
 		IPOIB_EXIT( IPOIB_DBG_INIT );
 		return;
 	}
@@ -5085,7 +5156,7 @@
 		/* Flag the adapter as hung. */
 		p_port->p_adapter->hung = TRUE;
 		KeSetEvent( &p_port->sa_event, EVENT_INCREMENT, FALSE );
-		cl_obj_deref( &p_port->obj );
+		ipoib_port_deref(p_port, 309);
 		IPOIB_EXIT( IPOIB_DBG_INIT );
 		return;
 	}
@@ -5105,7 +5176,7 @@
 	ipoib_set_active( p_port->p_adapter );
 
 	KeSetEvent( &p_port->sa_event, EVENT_INCREMENT, FALSE );
-	cl_obj_deref( &p_port->obj );
+	ipoib_port_deref(p_port, 409);
 	IPOIB_EXIT( IPOIB_DBG_INIT );
 }
 
@@ -5277,15 +5348,20 @@
 		return IB_INSUFFICIENT_MEMORY;
 	}
 
-	__endpt_mgr_insert_locked( p_port, mac, p_endpt );
-
+	status = __endpt_mgr_insert_locked( p_port, mac, p_endpt );
+	if( status != IB_SUCCESS )
+	{
+		IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+			("__endpt_mgr_insert_locked returned %s\n", 
+			p_port->p_adapter->p_ifc->get_err_str( status ))
);
+		return status;
+	}
 	/* reference the object for the multicast join request. */
-	cl_obj_ref( &p_port->obj );
-
+	ipoib_port_ref(p_port, 11);
 	status = p_port->p_adapter->p_ifc->join_mcast(
p_port->ib_mgr.h_qp, &mcast_req );
 	if( status != IB_SUCCESS )
 	{
-		cl_obj_deref( &p_port->obj );
+		ipoib_port_deref(p_port, 11);
 		IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
 			("ib_join_mcast returned %s\n", 
 			p_port->p_adapter->p_ifc->get_err_str( status ))
);
@@ -5317,7 +5393,7 @@
 		if( p_mcast_rec->status == IB_SUCCESS )
 			p_port->p_adapter->p_ifc->leave_mcast(
p_mcast_rec->h_mcast, NULL );
 
-		cl_obj_deref( &p_port->obj );
+		ipoib_port_deref(p_port, 111);
 		IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION,
IPOIB_DBG_INIT,
 			("Invalid state - Aborting.\n") );
 		return;
@@ -5331,7 +5407,7 @@
 			p_port->p_adapter->p_ifc->get_err_str(
p_mcast_rec->status )) );
 		/* Flag the adapter as hung. */
 		p_port->p_adapter->hung =TRUE;
-		cl_obj_deref( &p_port->obj );
+		ipoib_port_deref(p_port, 211);
 		IPOIB_EXIT( IPOIB_DBG_MCAST );
 		return;
 	}
@@ -5350,7 +5426,7 @@
 		IPOIB_PRINT(TRACE_LEVEL_WARNING, IPOIB_DBG_ERROR,
 			("Failed to find endpoint for update.\n") );
 		p_port->p_adapter->p_ifc->leave_mcast(
p_mcast_rec->h_mcast, NULL );
-		cl_obj_deref( &p_port->obj );
+		ipoib_port_deref(p_port, 311);
 		IPOIB_EXIT( IPOIB_DBG_MCAST );
 		return;
 	}
@@ -5369,7 +5445,7 @@
 			p_port->p_adapter->p_ifc->get_err_str( status ))
);
 		/* Flag the adapter as hung. */
 		p_port->p_adapter->hung = TRUE;
-		cl_obj_deref( &p_port->obj );
+		ipoib_port_deref(p_port, 411);
 		IPOIB_EXIT( IPOIB_DBG_MCAST );
 		return;
 	}
@@ -5389,7 +5465,7 @@
 	/* Try to send all pending sends. */
 	ipoib_port_resume( p_port );
 
-	cl_obj_deref( &p_port->obj );
+	ipoib_port_deref(p_port, 511);
 
 	IPOIB_EXIT( IPOIB_DBG_MCAST );
 }
Index: ulp/ipoib/kernel/ipoib_port.h
===================================================================
--- ulp/ipoib/kernel/ipoib_port.h	(revision 1528)
+++ ulp/ipoib/kernel/ipoib_port.h	(working copy)
@@ -499,6 +499,10 @@
 	atomic32_t				hdr_idx;
 	ipoib_hdr_t				hdr[1];
 
+#if DBG
+	int						ref[20];
+#endif
+
 }	ipoib_port_t;
 /*
 * FIELDS
@@ -592,4 +596,14 @@
 	IN		const	mac_addr_t
mac,
 		OUT			ib_gid_t*
p_gid );
 
+
+inline void ipoib_port_ref(
+	IN				ipoib_port_t *
p_port, 
+	IN				int
type);
+
+inline void ipoib_port_deref(
+	IN				ipoib_port_t *
p_port,
+	IN				int
type);
+
+
 #endif	/* _IPOIB_PORT_H_ */










> -----Original Message-----
> From: openib-windows-bounces at openib.org 
> [mailto:openib-windows-bounces at openib.org] On Behalf Of Yossi 
> Leybovich
> Sent: Monday, July 10, 2006 6:45 PM
> To: Tillier, Fabian
> Cc: openib-windows at openib.org
> Subject: Re: [Openib-windows] races on __destrot_obj function
> 
>  
> Fab 
> 
> Look at the following scenario
> 
> call to ipoib_port_destroy that call to its destroy obj function.
> The destroy function send all objects in the child's list to 
> destroy (endpnt use ASYNC destroy) The port finish his 
> destroy function and wait till ref_cnt = 0
> 
> Now while the endpnt try to destroy itself it calls
> ipoib_port_resum,(endpnt_cleanup) in ipoib_port_resume in 
> case of MCAST packet the code create endpnt and try to join 
> to the mcast group.
> This new end point is added to the port child list and take 
> reference Now the port ref_cnt will never be 0 ---> deadlock
> 
> Here is the prints I generate on one of our machines.
> 
> 
> ~1:[IPoIB]:__endpt_mgr_reset_all() [
> ~1:[IPoIB]:__endpt_mgr_reset_all() ]
> ~1:[IPoIB]:__endpt_mgr_remove_all() [
> ~1:[IPoIB]:__endpt_mgr_remove_all() ]
> ~1:[IPoIB]:__endpt_destroying() [
> ~1:[IPoIB]:__endpt_destroying() ]
> ~1:[IPoIB]:__endpt_cleanup() [
> ~1:[IPoIB]:ipoib_port_resume(): ipoib_resume.....
> ~1:[IPoIB]:__endpt_destroying() [
> ~1:[IPoIB]:__endpt_destroying() ]
> ~1:[IPoIB]:__endpt_mgr_ref() [
> ~1:[IPoIB]:__endpt_mgr_ref(): Look for :	  MAC: 01-00-5E-00-00-16
> ~1:[IPoIB]:__endpt_mgr_ref(): Failed endpoint lookup.
> ~1:[IPoIB]:__endpt_mgr_ref() ]
> ~1:[IPoIB]:ipoib_port_join_mcast() [
> ~1:[IPoIB]:__endpt_mgr_ref() [
> ~1:[IPoIB]:__endpt_mgr_ref(): Look for :	  MAC: 01-00-5E-00-00-16
> ~1:[IPoIB]:__endpt_mgr_ref(): Failed endpoint lookup.
> ~1:[IPoIB]:__endpt_mgr_ref() ]
> ~1:[IPoIB]:ipoib_endpt_create() [
> ~1:[IPoIB]:ipoib_endpt_create() ]
> ~1:[IPoIB]:__endpt_mgr_insert_locked() [
> ~1:[IPoIB]:__endpt_mgr_insert_locked(): insert  :	  MAC:
> 01-00-5E-00-00-16
> ~1:[IPoIB]:__endpt_mgr_insert() [
> ~1:[IPoIB]:__endpt_mgr_insert() !ERROR!: take ref  type 12 ref_cnt 8
> ~1:[IPoIB]:__endpt_mgr_insert() ]
> ~1:[IPoIB]:__endpt_mgr_insert_locked() ]
> ~1:[IPoIB]:ipoib_port_ref() !ERROR!: take ref type 11 ref_cnt 9
> ~1:[IPoIB]:ipoib_port_join_mcast() ]
> ~1:[IPoIB]:__endpt_destroying() [
> ~1:[IPoIB]:__endpt_destroying() ]
> ~1:[IPoIB]:__endpt_destroying() [
> ~1:[IPoIB]:__endpt_cleanup() ]
> 
> 
> You can see that while we are trying to destroy everything 
> the ipoib_port_resume create new endpnt I think that the 
> proper fix for that is in cl_obj_insert_rel_parent_locked
> 
> Need to exit if the parent state != CL_INITIALIZED. (same as 
> we do in al_obj).
> I created patch that check status when adding obj to the 
> child list and if the parent is not in CL_INITIALIZED it 
> return error This effect endpt_insert and endpt_insert_locked 
> which now return status.
> 
> I also remove the port_resume from the destruction of endpnt 
> and move it to the destruction of the port.
> I did it because after the first change the port is being 
> destroyed before the endpnt and port_resume get NULL object 
> for the port.
> I am  testing the patch now to see its affect.
> 
> Do you have any comments?
> 
> Yossi 
> 
> 
> 
> 
> 
> 
> 
> _______________________________________________
> openib-windows mailing list
> openib-windows at openib.org
> http://openib.org/mailman/listinfo/openib-windows
> 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: ipoib_ref_cnt.patch
Type: application/octet-stream
Size: 22439 bytes
Desc: ipoib_ref_cnt.patch
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20060711/402ea31b/attachment.obj>


More information about the ofw mailing list