[ofw] [IBAL] serialize MAD pool access to eliminate POOL corruption crash in multi HCA config.

Sean Hefty sean.hefty at intel.com
Tue Apr 28 10:23:29 PDT 2009


>From the WDK docs:

ExInitializeNPagedLookasideList:
Initializes a system spin lock to control allocations from and frees to the
lookaside list in a multiprocessor-safe manner if necessary

The locking around NPagedLookasidelist should be unnecessary.   It's likely just
throwing off the timing enough to make it more difficult to hit the bug.

>-----Original Message-----
>From: ofw-bounces at lists.openfabrics.org [mailto:ofw-
>bounces at lists.openfabrics.org] On Behalf Of Smith, Stan
>Sent: Tuesday, April 28, 2009 9:18 AM
>To: Leonid Keller
>Cc: ofw at lists.openfabrics.org
>Subject: [ofw] [IBAL] serialize MAD pool access to eliminate POOL corruption
>crash in multi HCA config.
>
>Hello,
>  The following patch serializes NPagedLookasideList access which eliminates
>MAD Pool corruption crash on 2nd HCA enable. The generic pool memory tag 'dmla'
>was changed to identify each specific pool - finer granularity debug.
>
>Please review for commit.
>
>This patch does not completely solve the 2nd HCA enable problem, although it
>does eliminate the POOL corruption crashes.
>
>Thanks,
>
>Stan.
>
>Signed off by stan.smith at intel.com
>
>--- C:/Documents and Settings/scsmith/Local Settings/Temp/al_mad_pool.c-
>revBASE.svn000.tmp.c    Tue Apr 28 09:07:40 2009
>+++ C:/Documents and Settings/scsmith/My Documents/openIB-
>windows/SVN/gen1/trunk/core/al/kernel/al_mad_pool.c   Tue Apr 28 09:08:42 2009
>@@ -140,12 +140,15 @@
>
>        /* Initialize the pool lists. */
>        cl_qlist_init( &h_pool->key_list );
>+
>        ExInitializeNPagedLookasideList( &h_pool->mad_stack, NULL, NULL,
>-               0, sizeof(mad_item_t), 'dmla', 0 );
>+               0, sizeof(mad_item_t), 'ktsm', 0 );
>+
>        ExInitializeNPagedLookasideList( &h_pool->mad_send_pool, NULL, NULL,
>-               0, sizeof(mad_send_t), 'dmla', 0 );
>+               0, sizeof(mad_send_t), 'lpsm', 0 );
>+
>        ExInitializeNPagedLookasideList( &h_pool->mad_rmpp_pool, NULL, NULL,
>-               0, sizeof(mad_rmpp_t), 'dmla', 0 );
>+               0, sizeof(mad_rmpp_t), 'pmrm', 0 );
>
>        /* Initialize the pool object. */
>        construct_al_obj( &h_pool->obj, AL_OBJ_TYPE_H_MAD_POOL );
>@@ -640,8 +643,10 @@
>        CL_ASSERT( pp_mad_element );
>
>        /* Obtain a MAD item from the stack. */
>+       cl_spinlock_acquire( &pool_key->h_pool->obj.lock );
>        p_mad_item = (mad_item_t*)ExAllocateFromNPagedLookasideList(
>                &pool_key->h_pool->mad_stack );
>+       cl_spinlock_release( &pool_key->h_pool->obj.lock );
>        if( !p_mad_item )
>                return IB_INSUFFICIENT_RESOURCES;
>
>@@ -709,7 +714,9 @@
>        p_mad_element->element.p_next = NULL;
>
>        /* Return the MAD element to the pool. */
>+       cl_spinlock_acquire( &pool_key->h_pool->obj.lock );
>        ExFreeToNPagedLookasideList( &pool_key->h_pool->mad_stack, p_mad_item
>);
>+       cl_spinlock_release( &pool_key->h_pool->obj.lock );
>
>        cl_atomic_dec( &pool_key->mad_cnt );
>        deref_al_obj( &pool_key->obj );
>@@ -731,8 +738,10 @@
>        CL_ASSERT( p_mad_item->pool_key );
>        CL_ASSERT( p_mad_item->pool_key->h_pool );
>
>+       cl_spinlock_acquire( &p_mad_item->pool_key->h_pool->obj.lock );
>        p_mad_send = ExAllocateFromNPagedLookasideList(
>                &p_mad_item->pool_key->h_pool->mad_send_pool );
>+       cl_spinlock_release( &p_mad_item->pool_key->h_pool->obj.lock );
>        if( !p_mad_send )
>                return NULL;
>
>@@ -761,7 +770,9 @@
>        p_mad_send = PARENT_STRUCT( h_mad_send, mad_send_t, mad_send );
>        h_pool = p_mad_send->h_pool;
>
>+       cl_spinlock_acquire( &h_pool->obj.lock );
>        ExFreeToNPagedLookasideList( &h_pool->mad_send_pool, p_mad_send );
>+       cl_spinlock_release( &h_pool->obj.lock );
>        deref_al_obj( &h_pool->obj );
> }
>
>@@ -781,8 +792,10 @@
>        CL_ASSERT( p_mad_item->pool_key );
>        CL_ASSERT( p_mad_item->pool_key->h_pool );
>
>+       cl_spinlock_acquire( &p_mad_item->pool_key->h_pool->obj.lock );
>        p_mad_rmpp = ExAllocateFromNPagedLookasideList(
>                &p_mad_item->pool_key->h_pool->mad_rmpp_pool );
>+       cl_spinlock_release( &p_mad_item->pool_key->h_pool->obj.lock );
>        if( !p_mad_rmpp )
>                return NULL;
>
>@@ -805,7 +818,9 @@
>
>        h_pool = p_mad_rmpp->h_pool;
>
>+       cl_spinlock_acquire( &h_pool->obj.lock );
>        ExFreeToNPagedLookasideList( &h_pool->mad_rmpp_pool, p_mad_rmpp );
>+       cl_spinlock_release( &h_pool->obj.lock );
>        deref_al_obj( &h_pool->obj );
> }





More information about the ofw mailing list