[ofw] [IBAL] serialize MAD pool access to eliminate POOL corruption crash in multi HCA config.
Smith, Stan
stan.smith at intel.com
Tue Apr 28 12:33:27 PDT 2009
Leonid, skip the review/commit.
I'm inclined to agree with Sean's timing speculation.
Stan.
Hefty, Sean wrote:
> From the WDK docs:
>
> ExInitializeNPagedLookasideList:
> Initializes a system spin lock to control allocations from and frees
> to the lookaside list in a multiprocessor-safe manner if necessary
>
> The locking around NPagedLookasidelist should be unnecessary. It's
> likely just throwing off the timing enough to make it more difficult
> to hit the bug.
>
>> -----Original Message-----
>> From: ofw-bounces at lists.openfabrics.org [mailto:ofw-
>> bounces at lists.openfabrics.org] On Behalf Of Smith, Stan
>> Sent: Tuesday, April 28, 2009 9:18 AM
>> To: Leonid Keller
>> Cc: ofw at lists.openfabrics.org
>> Subject: [ofw] [IBAL] serialize MAD pool access to eliminate POOL
>> corruption crash in multi HCA config.
>>
>> Hello,
>> The following patch serializes NPagedLookasideList access which
>> eliminates MAD Pool corruption crash on 2nd HCA enable. The generic
>> pool memory tag 'dmla' was changed to identify each specific pool -
>> finer granularity debug.
>>
>> Please review for commit.
>>
>> This patch does not completely solve the 2nd HCA enable problem,
>> although it does eliminate the POOL corruption crashes.
>>
>> Thanks,
>>
>> Stan.
>>
>> Signed off by stan.smith at intel.com
>>
>> --- C:/Documents and Settings/scsmith/Local
>> Settings/Temp/al_mad_pool.c- revBASE.svn000.tmp.c Tue Apr 28
>> 09:07:40 2009 +++ C:/Documents and Settings/scsmith/My
>> Documents/openIB-
>> windows/SVN/gen1/trunk/core/al/kernel/al_mad_pool.c Tue Apr 28
>> 09:08:42 2009 @@ -140,12 +140,15 @@
>>
>> /* Initialize the pool lists. */
>> cl_qlist_init( &h_pool->key_list );
>> +
>> ExInitializeNPagedLookasideList( &h_pool->mad_stack, NULL,
>> NULL, - 0, sizeof(mad_item_t), 'dmla', 0 );
>> + 0, sizeof(mad_item_t), 'ktsm', 0 ); +
>> ExInitializeNPagedLookasideList( &h_pool->mad_send_pool,
>> NULL, NULL, - 0, sizeof(mad_send_t), 'dmla', 0 );
>> + 0, sizeof(mad_send_t), 'lpsm', 0 ); +
>> ExInitializeNPagedLookasideList( &h_pool->mad_rmpp_pool,
>> NULL, NULL, - 0, sizeof(mad_rmpp_t), 'dmla', 0 );
>> + 0, sizeof(mad_rmpp_t), 'pmrm', 0 );
>>
>> /* Initialize the pool object. */
>> construct_al_obj( &h_pool->obj, AL_OBJ_TYPE_H_MAD_POOL ); @@
>> -640,8 +643,10 @@ CL_ASSERT( pp_mad_element );
>>
>> /* Obtain a MAD item from the stack. */
>> + cl_spinlock_acquire( &pool_key->h_pool->obj.lock );
>> p_mad_item = (mad_item_t*)ExAllocateFromNPagedLookasideList(
>> &pool_key->h_pool->mad_stack );
>> + cl_spinlock_release( &pool_key->h_pool->obj.lock );
>> if( !p_mad_item ) return IB_INSUFFICIENT_RESOURCES;
>>
>> @@ -709,7 +714,9 @@
>> p_mad_element->element.p_next = NULL;
>>
>> /* Return the MAD element to the pool. */
>> + cl_spinlock_acquire( &pool_key->h_pool->obj.lock );
>> ExFreeToNPagedLookasideList( &pool_key->h_pool->mad_stack,
>> p_mad_item ); + cl_spinlock_release(
>> &pool_key->h_pool->obj.lock );
>>
>> cl_atomic_dec( &pool_key->mad_cnt );
>> deref_al_obj( &pool_key->obj );
>> @@ -731,8 +738,10 @@
>> CL_ASSERT( p_mad_item->pool_key );
>> CL_ASSERT( p_mad_item->pool_key->h_pool );
>>
>> + cl_spinlock_acquire( &p_mad_item->pool_key->h_pool->obj.lock
>> ); p_mad_send = ExAllocateFromNPagedLookasideList(
>> &p_mad_item->pool_key->h_pool->mad_send_pool );
>> + cl_spinlock_release( &p_mad_item->pool_key->h_pool->obj.lock
>> ); if( !p_mad_send ) return NULL;
>>
>> @@ -761,7 +770,9 @@
>> p_mad_send = PARENT_STRUCT( h_mad_send, mad_send_t, mad_send
>> ); h_pool = p_mad_send->h_pool;
>>
>> + cl_spinlock_acquire( &h_pool->obj.lock );
>> ExFreeToNPagedLookasideList( &h_pool->mad_send_pool,
>> p_mad_send ); + cl_spinlock_release( &h_pool->obj.lock );
>> deref_al_obj( &h_pool->obj );
>> }
>>
>> @@ -781,8 +792,10 @@
>> CL_ASSERT( p_mad_item->pool_key );
>> CL_ASSERT( p_mad_item->pool_key->h_pool );
>>
>> + cl_spinlock_acquire( &p_mad_item->pool_key->h_pool->obj.lock
>> ); p_mad_rmpp = ExAllocateFromNPagedLookasideList(
>> &p_mad_item->pool_key->h_pool->mad_rmpp_pool );
>> + cl_spinlock_release( &p_mad_item->pool_key->h_pool->obj.lock
>> ); if( !p_mad_rmpp ) return NULL;
>>
>> @@ -805,7 +818,9 @@
>>
>> h_pool = p_mad_rmpp->h_pool;
>>
>> + cl_spinlock_acquire( &h_pool->obj.lock );
>> ExFreeToNPagedLookasideList( &h_pool->mad_rmpp_pool,
>> p_mad_rmpp ); + cl_spinlock_release( &h_pool->obj.lock );
>> deref_al_obj( &h_pool->obj );
>> }
More information about the ofw
mailing list