[ofw] [IBAL] serialize MAD pool access to eliminate POOL corruption crash in multi HCA config.
Sean Hefty
sean.hefty at intel.com
Tue Apr 28 10:23:29 PDT 2009
>From the WDK docs:
ExInitializeNPagedLookasideList:
Initializes a system spin lock to control allocations from and frees to the
lookaside list in a multiprocessor-safe manner if necessary
The locking around NPagedLookasidelist should be unnecessary. It's likely just
throwing off the timing enough to make it more difficult to hit the bug.
>-----Original Message-----
>From: ofw-bounces at lists.openfabrics.org [mailto:ofw-
>bounces at lists.openfabrics.org] On Behalf Of Smith, Stan
>Sent: Tuesday, April 28, 2009 9:18 AM
>To: Leonid Keller
>Cc: ofw at lists.openfabrics.org
>Subject: [ofw] [IBAL] serialize MAD pool access to eliminate POOL corruption
>crash in multi HCA config.
>
>Hello,
> The following patch serializes NPagedLookasideList access which eliminates
>MAD Pool corruption crash on 2nd HCA enable. The generic pool memory tag 'dmla'
>was changed to identify each specific pool - finer granularity debug.
>
>Please review for commit.
>
>This patch does not completely solve the 2nd HCA enable problem, although it
>does eliminate the POOL corruption crashes.
>
>Thanks,
>
>Stan.
>
>Signed off by stan.smith at intel.com
>
>--- C:/Documents and Settings/scsmith/Local Settings/Temp/al_mad_pool.c-
>revBASE.svn000.tmp.c Tue Apr 28 09:07:40 2009
>+++ C:/Documents and Settings/scsmith/My Documents/openIB-
>windows/SVN/gen1/trunk/core/al/kernel/al_mad_pool.c Tue Apr 28 09:08:42 2009
>@@ -140,12 +140,15 @@
>
> /* Initialize the pool lists. */
> cl_qlist_init( &h_pool->key_list );
>+
> ExInitializeNPagedLookasideList( &h_pool->mad_stack, NULL, NULL,
>- 0, sizeof(mad_item_t), 'dmla', 0 );
>+ 0, sizeof(mad_item_t), 'ktsm', 0 );
>+
> ExInitializeNPagedLookasideList( &h_pool->mad_send_pool, NULL, NULL,
>- 0, sizeof(mad_send_t), 'dmla', 0 );
>+ 0, sizeof(mad_send_t), 'lpsm', 0 );
>+
> ExInitializeNPagedLookasideList( &h_pool->mad_rmpp_pool, NULL, NULL,
>- 0, sizeof(mad_rmpp_t), 'dmla', 0 );
>+ 0, sizeof(mad_rmpp_t), 'pmrm', 0 );
>
> /* Initialize the pool object. */
> construct_al_obj( &h_pool->obj, AL_OBJ_TYPE_H_MAD_POOL );
>@@ -640,8 +643,10 @@
> CL_ASSERT( pp_mad_element );
>
> /* Obtain a MAD item from the stack. */
>+ cl_spinlock_acquire( &pool_key->h_pool->obj.lock );
> p_mad_item = (mad_item_t*)ExAllocateFromNPagedLookasideList(
> &pool_key->h_pool->mad_stack );
>+ cl_spinlock_release( &pool_key->h_pool->obj.lock );
> if( !p_mad_item )
> return IB_INSUFFICIENT_RESOURCES;
>
>@@ -709,7 +714,9 @@
> p_mad_element->element.p_next = NULL;
>
> /* Return the MAD element to the pool. */
>+ cl_spinlock_acquire( &pool_key->h_pool->obj.lock );
> ExFreeToNPagedLookasideList( &pool_key->h_pool->mad_stack, p_mad_item
>);
>+ cl_spinlock_release( &pool_key->h_pool->obj.lock );
>
> cl_atomic_dec( &pool_key->mad_cnt );
> deref_al_obj( &pool_key->obj );
>@@ -731,8 +738,10 @@
> CL_ASSERT( p_mad_item->pool_key );
> CL_ASSERT( p_mad_item->pool_key->h_pool );
>
>+ cl_spinlock_acquire( &p_mad_item->pool_key->h_pool->obj.lock );
> p_mad_send = ExAllocateFromNPagedLookasideList(
> &p_mad_item->pool_key->h_pool->mad_send_pool );
>+ cl_spinlock_release( &p_mad_item->pool_key->h_pool->obj.lock );
> if( !p_mad_send )
> return NULL;
>
>@@ -761,7 +770,9 @@
> p_mad_send = PARENT_STRUCT( h_mad_send, mad_send_t, mad_send );
> h_pool = p_mad_send->h_pool;
>
>+ cl_spinlock_acquire( &h_pool->obj.lock );
> ExFreeToNPagedLookasideList( &h_pool->mad_send_pool, p_mad_send );
>+ cl_spinlock_release( &h_pool->obj.lock );
> deref_al_obj( &h_pool->obj );
> }
>
>@@ -781,8 +792,10 @@
> CL_ASSERT( p_mad_item->pool_key );
> CL_ASSERT( p_mad_item->pool_key->h_pool );
>
>+ cl_spinlock_acquire( &p_mad_item->pool_key->h_pool->obj.lock );
> p_mad_rmpp = ExAllocateFromNPagedLookasideList(
> &p_mad_item->pool_key->h_pool->mad_rmpp_pool );
>+ cl_spinlock_release( &p_mad_item->pool_key->h_pool->obj.lock );
> if( !p_mad_rmpp )
> return NULL;
>
>@@ -805,7 +818,9 @@
>
> h_pool = p_mad_rmpp->h_pool;
>
>+ cl_spinlock_acquire( &h_pool->obj.lock );
> ExFreeToNPagedLookasideList( &h_pool->mad_rmpp_pool, p_mad_rmpp );
>+ cl_spinlock_release( &h_pool->obj.lock );
> deref_al_obj( &h_pool->obj );
> }
More information about the ofw
mailing list