[Openib-windows] races on __destrot_obj function
Yossi Leybovich
sleybo at mellanox.co.il
Tue Jul 11 02:09:15 PDT 2006
Attached is patch that solve this problem
It includes:
1. fix to cl_obj to return value if the parent port is not in
CL_INITIALIZED
2. fix to insert/insert_locked function to check return value
3. add ref count mechanism to help debug ref count problems
4. not call port_resume in endpt cleanup but in port destroy
I tested this patch with our test and it pass
Pls review
10x
Yossi
Singed-off-by: Yossi Leybovich (sleybo at mellanox.co.il)
Index: core/complib/cl_obj.c
===================================================================
--- core/complib/cl_obj.c (revision 1528)
+++ core/complib/cl_obj.c (working copy)
@@ -351,17 +351,19 @@
/*
* Add a dependent relationship between two objects.
*/
-void
+cl_status_t
cl_obj_insert_rel(
IN cl_obj_rel_t * const
p_rel,
IN cl_obj_t * const
p_parent_obj,
IN cl_obj_t * const
p_child_obj )
{
+ cl_status_t status;
CL_ASSERT( p_rel && p_parent_obj && p_child_obj );
cl_spinlock_acquire( &p_parent_obj->lock );
- cl_obj_insert_rel_parent_locked( p_rel, p_parent_obj,
p_child_obj );
+ status = cl_obj_insert_rel_parent_locked( p_rel, p_parent_obj,
p_child_obj );
cl_spinlock_release( &p_parent_obj->lock );
+ return status;
}
@@ -369,7 +371,7 @@
/*
* Add a dependent relationship between two objects.
*/
-void
+cl_status_t
cl_obj_insert_rel_parent_locked(
IN cl_obj_rel_t * const
p_rel,
IN cl_obj_t * const
p_parent_obj,
@@ -377,6 +379,8 @@
{
CL_ASSERT( p_rel && p_parent_obj && p_child_obj );
+ if(p_parent_obj->state != CL_INITIALIZED)
+ return CL_INVALID_STATE;
/* The child object needs to maintain a reference on the parent.
*/
cl_obj_ref( p_parent_obj );
cl_obj_ref( p_child_obj );
@@ -396,6 +400,7 @@
(cl_list_item_t*)&p_rel->pool_item );
cl_spinlock_release( &p_child_obj->lock );
+ return CL_SUCCESS;
}
Index: inc/complib/cl_obj.h
===================================================================
--- inc/complib/cl_obj.h (revision 1528)
+++ inc/complib/cl_obj.h (working copy)
@@ -877,7 +877,7 @@
*
* SYNOPSIS
*/
-CL_EXPORT void CL_API
+CL_EXPORT cl_status_t CL_API
cl_obj_insert_rel(
IN cl_obj_rel_t * const
p_rel,
IN cl_obj_t * const
p_parent_obj,
@@ -922,7 +922,7 @@
*
* SYNOPSIS
*/
-CL_EXPORT void CL_API
+CL_EXPORT cl_status_t CL_API
cl_obj_insert_rel_parent_locked(
IN cl_obj_rel_t * const
p_rel,
IN cl_obj_t * const
p_parent_obj,
Index: ulp/ipoib/kernel/ipoib_adapter.c
===================================================================
--- ulp/ipoib/kernel/ipoib_adapter.c (revision 1528)
+++ ulp/ipoib/kernel/ipoib_adapter.c (working copy)
@@ -717,7 +717,7 @@
if( p_adapter->state == IB_PNP_PORT_ACTIVE )
{
p_port = p_adapter->p_port;
- cl_obj_ref( &p_port->obj );
+ ipoib_port_ref(p_port, 1);
}
cl_obj_unlock( &p_adapter->obj );
@@ -764,9 +764,9 @@
num_macs * sizeof(mac_addr_t) );
p_adapter->mcast_array_size = num_macs;
- if( p_port )
- cl_obj_deref( &p_port->obj );
-
+ if( p_port ){
+ ipoib_port_deref(p_port, 1);
+ }
IPOIB_EXIT( IPOIB_DBG_MCAST );
}
Index: ulp/ipoib/kernel/ipoib_debug.h
===================================================================
--- ulp/ipoib/kernel/ipoib_debug.h (revision 1528)
+++ ulp/ipoib/kernel/ipoib_debug.h (working copy)
@@ -68,7 +68,8 @@
WPP_DEFINE_BIT(IPOIB_DBG_ALLOC) \
WPP_DEFINE_BIT(IPOIB_DBG_OID) \
WPP_DEFINE_BIT(IPOIB_DBG_IOCTL) \
- WPP_DEFINE_BIT(IPOIB_DBG_STAT))
+ WPP_DEFINE_BIT(IPOIB_DBG_STAT) \
+ WPP_DEFINE_BIT(IPOIB_DBG_OBJ))
@@ -111,6 +112,7 @@
#define IPOIB_DBG_OID (1 << 10)
#define IPOIB_DBG_IOCTL (1 << 11)
#define IPOIB_DBG_STAT (1 << 12)
+#define IPOIB_DBG_OBJ (1<<31)
#define IPOIB_DBG_ERROR (CL_DBG_ERROR | IPOIB_DBG_ERR)
#define IPOIB_DBG_ALL CL_DBG_ALL
Index: ulp/ipoib/kernel/ipoib_driver.c
===================================================================
--- ulp/ipoib/kernel/ipoib_driver.c (revision 1528)
+++ ulp/ipoib/kernel/ipoib_driver.c (working copy)
@@ -1855,14 +1855,13 @@
}
p_port = p_adapter->p_port;
- cl_obj_ref( &p_port->obj );
+ ipoib_port_ref(p_port, 2);
cl_obj_unlock( &p_adapter->obj );
cl_perf_start( PortSend );
ipoib_port_send( p_port, packet_array, num_packets );
cl_perf_stop( &p_port->p_adapter->perf, PortSend );
- cl_obj_deref( &p_port->obj );
-
+ ipoib_port_deref(p_port, 2);
cl_perf_stop( &p_adapter->perf, SendPackets );
cl_perf_log( &p_adapter->perf, SendBundle, num_packets );
Index: ulp/ipoib/kernel/ipoib_endpoint.c
===================================================================
--- ulp/ipoib/kernel/ipoib_endpoint.c (revision 1528)
+++ ulp/ipoib/kernel/ipoib_endpoint.c (working copy)
@@ -224,8 +224,6 @@
p_endpt = PARENT_STRUCT( p_obj, ipoib_endpt_t, obj );
p_port = __endpt_parent( p_endpt );
- /* Resume sends so that any pending sends for this endpoint
fail. */
- ipoib_port_resume( p_port );
/* Leave the multicast group if it exists. */
if( p_endpt->h_mcast )
Index: ulp/ipoib/kernel/ipoib_port.c
===================================================================
--- ulp/ipoib/kernel/ipoib_port.c (revision 1528)
+++ ulp/ipoib/kernel/ipoib_port.c (working copy)
@@ -389,13 +389,13 @@
IN ipoib_port_t* const
p_port,
IN const net16_t
lid );
-static inline void
+static inline ib_api_status_t
__endpt_mgr_insert_locked(
IN ipoib_port_t* const
p_port,
IN const mac_addr_t
mac,
IN ipoib_endpt_t* const
p_endpt );
-static inline void
+static inline ib_api_status_t
__endpt_mgr_insert(
IN ipoib_port_t* const
p_port,
IN const mac_addr_t
mac,
@@ -457,7 +457,25 @@
return cl_memcmp( p_key1, p_key2, sizeof(ib_gid_t) );
}
+inline void ipoib_port_ref(ipoib_port_t * p_port, int type)
+{
+ cl_obj_ref( &p_port->obj );
+#if DBG
+ p_port->ref[type%100] --;
+ IPOIB_PRINT(TRACE_LEVEL_ERROR,IPOIB_DBG_OBJ,(" ref type %d
ref_cnt %d\n",type,p_port->obj.ref_cnt));
+#endif
+}
+inline void ipoib_port_deref(ipoib_port_t * p_port, int type)
+{
+ cl_obj_deref( &p_port->obj );
+
+#if DBG
+ p_port->ref[type%100] --;
+ IPOIB_PRINT(TRACE_LEVEL_ERROR,IPOIB_DBG_OBJ,("deref type %d
ref_cnt %d\n",type,p_port->obj.ref_cnt));
+#endif
+}
+
/***********************************************************************
*******
*
* Implementation
@@ -623,6 +641,12 @@
/* We only ever destroy from the PnP callback thread. */
cl_status = cl_obj_init( &p_port->obj, CL_DESTROY_SYNC,
__port_destroying, __port_cleanup, __port_free );
+
+#if DBG
+ p_port->ref[0]++;
+ IPOIB_PRINT(TRACE_LEVEL_ERROR,IPOIB_DBG_OBJ,("ref type %d
ref_cnt %d\n",0,p_port->obj.ref_cnt));
+#endif
+
if( cl_status != CL_SUCCESS )
{
IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
@@ -630,7 +654,19 @@
return IB_ERROR;
}
- cl_obj_insert_rel( &p_port->rel, &p_adapter->obj, &p_port->obj
);
+ cl_status = cl_obj_insert_rel( &p_port->rel, &p_adapter->obj,
&p_port->obj );
+ if( cl_status != CL_SUCCESS )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("cl_obj_insert_rel returned %s\n",
cl_status_text[cl_status]) );
+ cl_obj_destroy( &p_port->obj );
+ return IB_ERROR;
+ }
+
+#if DBG
+ p_port->ref[0]++;
+ IPOIB_PRINT(TRACE_LEVEL_ERROR,IPOIB_DBG_OBJ,("ref type %d
ref_cnt %d\n",0,p_port->obj.ref_cnt));
+#endif
IPOIB_EXIT( IPOIB_DBG_INIT );
return IB_SUCCESS;
@@ -653,6 +689,8 @@
__endpt_mgr_remove_all( p_port );
+ ipoib_port_resume( p_port );
+
IPOIB_EXIT( IPOIB_DBG_INIT );
}
@@ -1137,7 +1175,7 @@
/* Reference the port object for the send. */
if( p_desc )
{
- cl_obj_ref( &p_port->obj );
+ ipoib_port_ref(p_port, 3);
CL_ASSERT( p_desc->wr.wr_id == (uintn_t)p_desc );
#if IPOIB_INLINE_RECV
CL_ASSERT( p_desc->local_ds[0].vaddr ==
@@ -1181,8 +1219,7 @@
/*
* Dereference the port object since the receive is no longer
outstanding.
*/
- cl_obj_deref( &p_port->obj );
-
+ ipoib_port_deref(p_port, 3);
IPOIB_EXIT( IPOIB_DBG_RECV );
}
@@ -1327,7 +1364,7 @@
("Port in invalid state. Not reposting.\n") );
return 0;
}
- cl_obj_ref( &p_port->obj );
+ ipoib_port_ref(p_port, 4);
cl_obj_unlock( &p_port->obj );
while( p_port->recv_mgr.depth <
p_port->p_adapter->params.rq_depth )
@@ -1382,8 +1419,7 @@
}
}
- cl_obj_deref( &p_port->obj );
-
+ ipoib_port_deref(p_port, 4);
IPOIB_EXIT( IPOIB_DBG_RECV );
return p_port->p_adapter->params.rq_low_watermark -
p_port->recv_mgr.depth;
}
@@ -1513,8 +1549,7 @@
cl_qlist_init( &done_list );
cl_qlist_init( &bad_list );
- cl_obj_ref( &p_port->obj );
-
+ ipoib_port_ref(p_port, 5);
for( i = 0; i < MAX_RECV_WC; i++ )
wc[i].p_next = &wc[i + 1];
wc[MAX_RECV_WC - 1].p_next = NULL;
@@ -1633,7 +1668,7 @@
cl_perf_stop( &p_port->p_adapter->perf, RearmRecv );
CL_ASSERT( status == IB_SUCCESS );
- cl_obj_deref( &p_port->obj );
+ ipoib_port_deref(p_port, 5);
cl_perf_stop( &p_port->p_adapter->perf, RecvCb );
@@ -1715,13 +1750,19 @@
if( !*pp_src )
{
IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
- ("ipoib_endpt_create returned
%s\n",
-
p_port->p_adapter->p_ifc->get_err_str( status )) );
+ ("ipoib_endpt_create
failed\n"));
return;
}
cl_perf_start( EndptInsert );
cl_obj_lock( &p_port->obj );
- __endpt_mgr_insert( p_port, mac, *pp_src );
+ status = __endpt_mgr_insert( p_port, mac,
*pp_src );
+ if( status != IB_SUCCESS)
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
+ ("__endpt_mgr_insert failed \n",
+
p_port->p_adapter->p_ifc->get_err_str( status )));
+ return;
+ }
cl_obj_unlock( &p_port->obj );
cl_perf_stop( &p_port->p_adapter->perf,
EndptInsert );
}
@@ -1816,7 +1857,7 @@
}
cl_qlist_insert_tail( p_bad_list,
&p_desc->item.list_item );
/* Dereference the port object on behalf of the
failed receive. */
- cl_obj_deref( &p_port->obj );
+ ipoib_port_deref(p_port, 103);
continue;
}
@@ -1828,7 +1869,7 @@
("Received ETH packet < min size\n") );
ipoib_inc_recv_stat( p_port->p_adapter,
IP_STAT_ERROR, 0 );
cl_qlist_insert_tail( p_bad_list,
&p_desc->item.list_item );
- cl_obj_deref( &p_port->obj );
+ ipoib_port_deref(p_port, 203);
continue;
}
@@ -1857,7 +1898,7 @@
*/
cl_qlist_insert_tail( p_bad_list,
&p_desc->item.list_item );
/* Dereference the port object on behalf
of the failed recv. */
- cl_obj_deref( &p_port->obj );
+ ipoib_port_deref(p_port, 303);
continue;
}
}
@@ -1948,7 +1989,7 @@
ipoib_inc_recv_stat( p_port->p_adapter,
IP_STAT_ERROR, 0 );
cl_qlist_insert_tail( p_bad_list,
&p_desc->item.list_item );
/* Dereference the port object on behalf of the
failed receive. */
- cl_obj_deref( &p_port->obj );
+ ipoib_port_deref(p_port, 403);
}
else
{
@@ -2255,16 +2296,24 @@
*/
*pp_src = ipoib_endpt_create( &p_ib_arp->src_hw.gid,
0, (p_ib_arp->src_hw.flags_qpn &
CL_HTON32(0x00FFFFFF)) );
+
if( !*pp_src )
{
IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
- ("ipoib_endpt_create returned %s\n",
- p_port->p_adapter->p_ifc->get_err_str(
status )) );
+ ("ipoib_endpt_create failed\n"));
return status;
}
cl_obj_lock( &p_port->obj );
- __endpt_mgr_insert( p_port, mac, *pp_src );
+ status = __endpt_mgr_insert( p_port, mac, *pp_src );
+ if( status != IB_SUCCESS )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
+ ("__endpt_mgr_insert return %s \n",
+ p_port->p_adapter->p_ifc->get_err_str(
status )) );));
+ return status;
+ }
+
cl_obj_unlock( &p_port->obj );
}
@@ -3634,7 +3683,6 @@
PERF_DECLARE( PostSend );
IPOIB_ENTER( IPOIB_DBG_SEND );
-
cl_spinlock_acquire( &p_port->send_lock );
for( p_item = cl_qlist_head( &p_port->send_mgr.pending_list );
@@ -3774,8 +3822,7 @@
p_port = (ipoib_port_t*)cq_context;
- cl_obj_ref( &p_port->obj );
-
+ ipoib_port_ref(p_port, 6);
for( i = 0; i < MAX_SEND_WC; i++ )
wc[i].p_next = &wc[i + 1];
wc[MAX_SEND_WC - 1].p_next = NULL;
@@ -3873,7 +3920,7 @@
ipoib_port_resume( p_port );
cl_perf_stop( &p_port->p_adapter->perf, PortResume );
- cl_obj_deref( &p_port->obj );
+ ipoib_port_deref(p_port, 6);
cl_perf_stop( &p_port->p_adapter->perf, SendCb );
cl_perf_update_ctr( &p_port->p_adapter->perf, SendCompBundle );
@@ -4240,12 +4287,13 @@
}
-inline void
+inline ib_api_status_t
__endpt_mgr_insert_locked(
IN ipoib_port_t* const
p_port,
IN const mac_addr_t
mac,
IN ipoib_endpt_t* const
p_endpt )
{
+ ib_api_status_t status = IB_SUCCESS;
IPOIB_ENTER( IPOIB_DBG_ENDPT );
IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_ENDPT,
@@ -4261,21 +4309,25 @@
}
/* __endpt_mgr_insert expects *one* reference to be held when
being called. */
cl_atomic_inc( &p_port->endpt_rdr );
- __endpt_mgr_insert( p_port, mac, p_endpt );
+ status= __endpt_mgr_insert( p_port, mac, p_endpt );
cl_atomic_dec( &p_port->endpt_rdr );
cl_obj_unlock( &p_port->obj );
+
+ IPOIB_EXIT( IPOIB_DBG_ENDPT );
+ return status;
- IPOIB_EXIT( IPOIB_DBG_ENDPT );
}
-void
+inline ib_api_status_t
__endpt_mgr_insert(
IN ipoib_port_t* const
p_port,
IN const mac_addr_t
mac,
IN ipoib_endpt_t* const
p_endpt )
{
- uint64_t key;
+
+ cl_status_t cl_status;
+ uint64_t key;
cl_map_item_t *p_qitem;
cl_fmap_item_t *p_fitem;
@@ -4285,10 +4337,22 @@
while( p_port->endpt_rdr > 1 )
;
+
/* Link the endpoint to the port. */
- cl_obj_insert_rel_parent_locked(
+ cl_status = cl_obj_insert_rel_parent_locked(
&p_endpt->rel, &p_port->obj, &p_endpt->obj );
+
+ if(cl_status != CL_SUCCESS){
+ cl_obj_destroy(&p_endpt->obj);
+ return IB_INVALID_STATE;
+ }
+#if DBG
+ p_port->ref[12]++;
+ IPOIB_PRINT(TRACE_LEVEL_ERROR,IPOIB_DBG_OBJ,("ref type %d
ref_cnt %d\n",12,p_port->obj.ref_cnt));
+#endif
+
+
p_endpt->mac = mac;
key = 0;
cl_memcpy( &key, &mac, sizeof(mac_addr_t) );
@@ -4304,8 +4368,10 @@
&p_port->endpt_mgr.lid_endpts, p_endpt->dlid,
&p_endpt->lid_item );
CL_ASSERT( p_qitem == &p_endpt->lid_item );
}
+
+ IPOIB_EXIT( IPOIB_DBG_ENDPT );
+ return IB_SUCCESS;
- IPOIB_EXIT( IPOIB_DBG_ENDPT );
}
@@ -4348,10 +4414,10 @@
/* Add the broadcast endpoint to the endpoint map. */
cl_memset( &bcast_mac, 0xFF, sizeof(bcast_mac) );
- __endpt_mgr_insert_locked( p_port, bcast_mac, p_endpt );
+ status = __endpt_mgr_insert_locked( p_port, bcast_mac, p_endpt
);
IPOIB_EXIT( IPOIB_DBG_INIT );
- return IB_SUCCESS;
+ return status;
}
@@ -4397,6 +4463,11 @@
cl_obj_unlock( &p_port->obj );
cl_obj_destroy( &p_endpt->obj );
+#if DBG
+ p_port->ref[12]--;
+ IPOIB_PRINT(TRACE_LEVEL_ERROR,IPOIB_DBG_OBJ,("ref type
%d ref_cnt %d\n",12,p_port->obj.ref_cnt));
+#endif
+
}
else
{
@@ -4506,15 +4577,14 @@
query.pfn_query_cb = __port_info_cb;
/* reference the object for the multicast query. */
- cl_obj_ref( &p_port->obj );
-
+ ipoib_port_ref(p_port, 7);
status = p_port->p_adapter->p_ifc->query(
p_port->p_adapter->h_al, &query, &p_port->ib_mgr.h_query
);
if( status != IB_SUCCESS )
{
KeSetEvent( &p_port->sa_event, EVENT_INCREMENT, FALSE );
ipoib_set_inactive( p_port->p_adapter );
- cl_obj_deref( &p_port->obj );
+ ipoib_port_deref(p_port, 7);
IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
("ib_query returned %s\n",
p_port->p_adapter->p_ifc->get_err_str( status ))
);
@@ -4567,13 +4637,18 @@
/* __endpt_mgr_insert expects *one* reference to be held. */
cl_atomic_inc( &p_port->endpt_rdr );
- __endpt_mgr_insert( p_port, p_port->p_adapter->params.conf_mac,
p_endpt );
+ status = __endpt_mgr_insert( p_port,
p_port->p_adapter->params.conf_mac, p_endpt );
cl_atomic_dec( &p_port->endpt_rdr );
+ if(status != IB_SUCCESS){
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("__endpt_mgr_insert for local endpoint returned
%s\n",
+ p_port->p_adapter->p_ifc->get_err_str( status ))
);
+ return status;
+ }
p_port->p_local_endpt = p_endpt;
-
IPOIB_EXIT( IPOIB_DBG_INIT );
- return IB_SUCCESS;
+ return status;
}
@@ -4674,7 +4749,7 @@
p_port->p_adapter->p_ifc->put_mad(
p_query_rec->p_result_mad );
/* Release the reference taken when issuing the port info query.
*/
- cl_obj_deref( &p_port->obj );
+ ipoib_port_deref(p_port,107);
IPOIB_EXIT( IPOIB_DBG_INIT );
}
@@ -4711,13 +4786,12 @@
query.pfn_query_cb = __bcast_get_cb;
/* reference the object for the multicast query. */
- cl_obj_ref( &p_port->obj );
-
+ ipoib_port_ref(p_port, 8);
status = p_port->p_adapter->p_ifc->query(
p_port->p_adapter->h_al, &query, &p_port->ib_mgr.h_query
);
if( status != IB_SUCCESS )
{
- cl_obj_deref( &p_port->obj );
+ ipoib_port_deref(p_port, 8);
IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
("ib_query returned %s\n",
p_port->p_adapter->p_ifc->get_err_str( status ))
);
@@ -4796,8 +4870,7 @@
p_port->p_adapter->p_ifc->put_mad(
p_query_rec->p_result_mad );
/* Release the reference taken when issuing the member record
query. */
- cl_obj_deref( &p_port->obj );
-
+ ipoib_port_deref(p_port, 108);
IPOIB_EXIT( IPOIB_DBG_MCAST );
}
@@ -4855,16 +4928,15 @@
}
/* reference the object for the multicast join request. */
- cl_obj_ref( &p_port->obj );
-
+ ipoib_port_ref(p_port, 9);
status = p_port->p_adapter->p_ifc->join_mcast(
p_port->ib_mgr.h_qp, &mcast_req );
if( status != IB_SUCCESS )
{
- cl_obj_deref( &p_port->obj );
+ ipoib_port_deref(p_port, 9);
IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
- ("ib_join_mcast returned %s\n",
- p_port->p_adapter->p_ifc->get_err_str( status ))
);
+ ("ib_join_mcast returned %s\n",
+ p_port->p_adapter->p_ifc->get_err_str(
status )) );
}
IPOIB_EXIT( IPOIB_DBG_MCAST );
return status;
@@ -4915,12 +4987,11 @@
mcast_req.pkey_index = 0;
/* reference the object for the multicast join request. */
- cl_obj_ref( &p_port->obj );
-
+ ipoib_port_ref(p_port, 10);
status = p_port->p_adapter->p_ifc->join_mcast(
p_port->ib_mgr.h_qp, &mcast_req );
if( status != IB_SUCCESS )
{
- cl_obj_deref( &p_port->obj );
+ ipoib_port_deref(p_port, 10);
IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
("ib_join_mcast returned %s\n",
p_port->p_adapter->p_ifc->get_err_str( status ))
);
@@ -5011,7 +5082,7 @@
p_port->p_adapter->p_ifc->leave_mcast(
p_mcast_rec->h_mcast, NULL );
KeSetEvent( &p_port->sa_event, EVENT_INCREMENT, FALSE );
- cl_obj_deref( &p_port->obj );
+ ipoib_port_deref(p_port, 109);
IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION,
IPOIB_DBG_INIT,
("Invalid state - Aborting.\n") );
return;
@@ -5056,7 +5127,7 @@
ipoib_set_inactive( p_port->p_adapter );
KeSetEvent( &p_port->sa_event, EVENT_INCREMENT,
FALSE );
}
- cl_obj_deref( &p_port->obj );
+ ipoib_port_deref(p_port, 209);
IPOIB_EXIT( IPOIB_DBG_INIT );
return;
}
@@ -5085,7 +5156,7 @@
/* Flag the adapter as hung. */
p_port->p_adapter->hung = TRUE;
KeSetEvent( &p_port->sa_event, EVENT_INCREMENT, FALSE );
- cl_obj_deref( &p_port->obj );
+ ipoib_port_deref(p_port, 309);
IPOIB_EXIT( IPOIB_DBG_INIT );
return;
}
@@ -5105,7 +5176,7 @@
ipoib_set_active( p_port->p_adapter );
KeSetEvent( &p_port->sa_event, EVENT_INCREMENT, FALSE );
- cl_obj_deref( &p_port->obj );
+ ipoib_port_deref(p_port, 409);
IPOIB_EXIT( IPOIB_DBG_INIT );
}
@@ -5277,15 +5348,20 @@
return IB_INSUFFICIENT_MEMORY;
}
- __endpt_mgr_insert_locked( p_port, mac, p_endpt );
-
+ status = __endpt_mgr_insert_locked( p_port, mac, p_endpt );
+ if( status != IB_SUCCESS )
+ {
+ IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("__endpt_mgr_insert_locked returned %s\n",
+ p_port->p_adapter->p_ifc->get_err_str( status ))
);
+ return status;
+ }
/* reference the object for the multicast join request. */
- cl_obj_ref( &p_port->obj );
-
+ ipoib_port_ref(p_port, 11);
status = p_port->p_adapter->p_ifc->join_mcast(
p_port->ib_mgr.h_qp, &mcast_req );
if( status != IB_SUCCESS )
{
- cl_obj_deref( &p_port->obj );
+ ipoib_port_deref(p_port, 11);
IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
("ib_join_mcast returned %s\n",
p_port->p_adapter->p_ifc->get_err_str( status ))
);
@@ -5317,7 +5393,7 @@
if( p_mcast_rec->status == IB_SUCCESS )
p_port->p_adapter->p_ifc->leave_mcast(
p_mcast_rec->h_mcast, NULL );
- cl_obj_deref( &p_port->obj );
+ ipoib_port_deref(p_port, 111);
IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION,
IPOIB_DBG_INIT,
("Invalid state - Aborting.\n") );
return;
@@ -5331,7 +5407,7 @@
p_port->p_adapter->p_ifc->get_err_str(
p_mcast_rec->status )) );
/* Flag the adapter as hung. */
p_port->p_adapter->hung =TRUE;
- cl_obj_deref( &p_port->obj );
+ ipoib_port_deref(p_port, 211);
IPOIB_EXIT( IPOIB_DBG_MCAST );
return;
}
@@ -5350,7 +5426,7 @@
IPOIB_PRINT(TRACE_LEVEL_WARNING, IPOIB_DBG_ERROR,
("Failed to find endpoint for update.\n") );
p_port->p_adapter->p_ifc->leave_mcast(
p_mcast_rec->h_mcast, NULL );
- cl_obj_deref( &p_port->obj );
+ ipoib_port_deref(p_port, 311);
IPOIB_EXIT( IPOIB_DBG_MCAST );
return;
}
@@ -5369,7 +5445,7 @@
p_port->p_adapter->p_ifc->get_err_str( status ))
);
/* Flag the adapter as hung. */
p_port->p_adapter->hung = TRUE;
- cl_obj_deref( &p_port->obj );
+ ipoib_port_deref(p_port, 411);
IPOIB_EXIT( IPOIB_DBG_MCAST );
return;
}
@@ -5389,7 +5465,7 @@
/* Try to send all pending sends. */
ipoib_port_resume( p_port );
- cl_obj_deref( &p_port->obj );
+ ipoib_port_deref(p_port, 511);
IPOIB_EXIT( IPOIB_DBG_MCAST );
}
Index: ulp/ipoib/kernel/ipoib_port.h
===================================================================
--- ulp/ipoib/kernel/ipoib_port.h (revision 1528)
+++ ulp/ipoib/kernel/ipoib_port.h (working copy)
@@ -499,6 +499,10 @@
atomic32_t hdr_idx;
ipoib_hdr_t hdr[1];
+#if DBG
+ int ref[20];
+#endif
+
} ipoib_port_t;
/*
* FIELDS
@@ -592,4 +596,14 @@
IN const mac_addr_t
mac,
OUT ib_gid_t*
p_gid );
+
+inline void ipoib_port_ref(
+ IN ipoib_port_t *
p_port,
+ IN int
type);
+
+inline void ipoib_port_deref(
+ IN ipoib_port_t *
p_port,
+ IN int
type);
+
+
#endif /* _IPOIB_PORT_H_ */
> -----Original Message-----
> From: openib-windows-bounces at openib.org
> [mailto:openib-windows-bounces at openib.org] On Behalf Of Yossi
> Leybovich
> Sent: Monday, July 10, 2006 6:45 PM
> To: Tillier, Fabian
> Cc: openib-windows at openib.org
> Subject: Re: [Openib-windows] races on __destrot_obj function
>
>
> Fab
>
> Look at the following scenario
>
> call to ipoib_port_destroy that call to its destroy obj function.
> The destroy function send all objects in the child's list to
> destroy (endpnt use ASYNC destroy) The port finish his
> destroy function and wait till ref_cnt = 0
>
> Now while the endpnt try to destroy itself it calls
> ipoib_port_resum,(endpnt_cleanup) in ipoib_port_resume in
> case of MCAST packet the code create endpnt and try to join
> to the mcast group.
> This new end point is added to the port child list and take
> reference Now the port ref_cnt will never be 0 ---> deadlock
>
> Here is the prints I generate on one of our machines.
>
>
> ~1:[IPoIB]:__endpt_mgr_reset_all() [
> ~1:[IPoIB]:__endpt_mgr_reset_all() ]
> ~1:[IPoIB]:__endpt_mgr_remove_all() [
> ~1:[IPoIB]:__endpt_mgr_remove_all() ]
> ~1:[IPoIB]:__endpt_destroying() [
> ~1:[IPoIB]:__endpt_destroying() ]
> ~1:[IPoIB]:__endpt_cleanup() [
> ~1:[IPoIB]:ipoib_port_resume(): ipoib_resume.....
> ~1:[IPoIB]:__endpt_destroying() [
> ~1:[IPoIB]:__endpt_destroying() ]
> ~1:[IPoIB]:__endpt_mgr_ref() [
> ~1:[IPoIB]:__endpt_mgr_ref(): Look for : MAC: 01-00-5E-00-00-16
> ~1:[IPoIB]:__endpt_mgr_ref(): Failed endpoint lookup.
> ~1:[IPoIB]:__endpt_mgr_ref() ]
> ~1:[IPoIB]:ipoib_port_join_mcast() [
> ~1:[IPoIB]:__endpt_mgr_ref() [
> ~1:[IPoIB]:__endpt_mgr_ref(): Look for : MAC: 01-00-5E-00-00-16
> ~1:[IPoIB]:__endpt_mgr_ref(): Failed endpoint lookup.
> ~1:[IPoIB]:__endpt_mgr_ref() ]
> ~1:[IPoIB]:ipoib_endpt_create() [
> ~1:[IPoIB]:ipoib_endpt_create() ]
> ~1:[IPoIB]:__endpt_mgr_insert_locked() [
> ~1:[IPoIB]:__endpt_mgr_insert_locked(): insert : MAC:
> 01-00-5E-00-00-16
> ~1:[IPoIB]:__endpt_mgr_insert() [
> ~1:[IPoIB]:__endpt_mgr_insert() !ERROR!: take ref type 12 ref_cnt 8
> ~1:[IPoIB]:__endpt_mgr_insert() ]
> ~1:[IPoIB]:__endpt_mgr_insert_locked() ]
> ~1:[IPoIB]:ipoib_port_ref() !ERROR!: take ref type 11 ref_cnt 9
> ~1:[IPoIB]:ipoib_port_join_mcast() ]
> ~1:[IPoIB]:__endpt_destroying() [
> ~1:[IPoIB]:__endpt_destroying() ]
> ~1:[IPoIB]:__endpt_destroying() [
> ~1:[IPoIB]:__endpt_cleanup() ]
>
>
> You can see that while we are trying to destroy everything
> the ipoib_port_resume create new endpnt I think that the
> proper fix for that is in cl_obj_insert_rel_parent_locked
>
> Need to exit if the parent state != CL_INITIALIZED. (same as
> we do in al_obj).
> I created patch that check status when adding obj to the
> child list and if the parent is not in CL_INITIALIZED it
> return error This effect endpt_insert and endpt_insert_locked
> which now return status.
>
> I also remove the port_resume from the destruction of endpnt
> and move it to the destruction of the port.
> I did it because after the first change the port is being
> destroyed before the endpnt and port_resume get NULL object
> for the port.
> I am testing the patch now to see its affect.
>
> Do you have any comments?
>
> Yossi
>
>
>
>
>
>
>
> _______________________________________________
> openib-windows mailing list
> openib-windows at openib.org
> http://openib.org/mailman/listinfo/openib-windows
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: ipoib_ref_cnt.patch
Type: application/octet-stream
Size: 22439 bytes
Desc: ipoib_ref_cnt.patch
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20060711/402ea31b/attachment.obj>
More information about the ofw
mailing list