[ofw] [IBAL] serialize MAD pool access to eliminate POOL corruption crash in multi HCA config.
Leonid Keller
leonid at mellanox.co.il
Tue Apr 28 12:37:05 PDT 2009
OK
> -----Original Message-----
> From: Smith, Stan [mailto:stan.smith at intel.com]
> Sent: Tuesday, April 28, 2009 10:33 PM
> To: Hefty, Sean; Leonid Keller
> Cc: ofw at lists.openfabrics.org
> Subject: RE: [ofw] [IBAL] serialize MAD pool access to
> eliminate POOL corruption crash in multi HCA config.
>
>
> Leonid, skip the review/commit.
> I'm inclined to agree with Sean's timing speculation.
>
> Stan.
>
> Hefty, Sean wrote:
> > From the WDK docs:
> >
> > ExInitializeNPagedLookasideList:
> > Initializes a system spin lock to control allocations from
> and frees
> > to the lookaside list in a multiprocessor-safe manner if necessary
> >
> > The locking around NPagedLookasidelist should be unnecessary. It's
> > likely just throwing off the timing enough to make it more
> difficult
> > to hit the bug.
> >
> >> -----Original Message-----
> >> From: ofw-bounces at lists.openfabrics.org [mailto:ofw-
> >> bounces at lists.openfabrics.org] On Behalf Of Smith, Stan
> >> Sent: Tuesday, April 28, 2009 9:18 AM
> >> To: Leonid Keller
> >> Cc: ofw at lists.openfabrics.org
> >> Subject: [ofw] [IBAL] serialize MAD pool access to eliminate POOL
> >> corruption crash in multi HCA config.
> >>
> >> Hello,
> >> The following patch serializes NPagedLookasideList access which
> >> eliminates MAD Pool corruption crash on 2nd HCA enable.
> The generic
> >> pool memory tag 'dmla' was changed to identify each
> specific pool -
> >> finer granularity debug.
> >>
> >> Please review for commit.
> >>
> >> This patch does not completely solve the 2nd HCA enable problem,
> >> although it does eliminate the POOL corruption crashes.
> >>
> >> Thanks,
> >>
> >> Stan.
> >>
> >> Signed off by stan.smith at intel.com
> >>
> >> --- C:/Documents and Settings/scsmith/Local
> >> Settings/Temp/al_mad_pool.c- revBASE.svn000.tmp.c Tue Apr 28
> >> 09:07:40 2009 +++ C:/Documents and Settings/scsmith/My
> >> Documents/openIB-
> >> windows/SVN/gen1/trunk/core/al/kernel/al_mad_pool.c Tue Apr 28
> >> 09:08:42 2009 @@ -140,12 +140,15 @@
> >>
> >> /* Initialize the pool lists. */
> >> cl_qlist_init( &h_pool->key_list );
> >> +
> >> ExInitializeNPagedLookasideList( &h_pool->mad_stack, NULL,
> >> NULL, - 0, sizeof(mad_item_t), 'dmla', 0 );
> >> + 0, sizeof(mad_item_t), 'ktsm', 0 ); +
> >> ExInitializeNPagedLookasideList( &h_pool->mad_send_pool,
> >> NULL, NULL, - 0, sizeof(mad_send_t), 'dmla', 0 );
> >> + 0, sizeof(mad_send_t), 'lpsm', 0 ); +
> >> ExInitializeNPagedLookasideList( &h_pool->mad_rmpp_pool,
> >> NULL, NULL, - 0, sizeof(mad_rmpp_t), 'dmla', 0 );
> >> + 0, sizeof(mad_rmpp_t), 'pmrm', 0 );
> >>
> >> /* Initialize the pool object. */
> >> construct_al_obj( &h_pool->obj, AL_OBJ_TYPE_H_MAD_POOL ); @@
> >> -640,8 +643,10 @@ CL_ASSERT( pp_mad_element );
> >>
> >> /* Obtain a MAD item from the stack. */
> >> + cl_spinlock_acquire( &pool_key->h_pool->obj.lock );
> >> p_mad_item = (mad_item_t*)ExAllocateFromNPagedLookasideList(
> >> &pool_key->h_pool->mad_stack );
> >> + cl_spinlock_release( &pool_key->h_pool->obj.lock );
> >> if( !p_mad_item ) return IB_INSUFFICIENT_RESOURCES;
> >>
> >> @@ -709,7 +714,9 @@
> >> p_mad_element->element.p_next = NULL;
> >>
> >> /* Return the MAD element to the pool. */
> >> + cl_spinlock_acquire( &pool_key->h_pool->obj.lock );
> >> ExFreeToNPagedLookasideList( &pool_key->h_pool->mad_stack,
> >> p_mad_item ); + cl_spinlock_release(
> >> &pool_key->h_pool->obj.lock );
> >>
> >> cl_atomic_dec( &pool_key->mad_cnt );
> >> deref_al_obj( &pool_key->obj ); @@ -731,8 +738,10 @@
> >> CL_ASSERT( p_mad_item->pool_key );
> >> CL_ASSERT( p_mad_item->pool_key->h_pool );
> >>
> >> + cl_spinlock_acquire(
> &p_mad_item->pool_key->h_pool->obj.lock
> >> ); p_mad_send = ExAllocateFromNPagedLookasideList(
> >> &p_mad_item->pool_key->h_pool->mad_send_pool );
> >> + cl_spinlock_release(
> &p_mad_item->pool_key->h_pool->obj.lock
> >> ); if( !p_mad_send ) return NULL;
> >>
> >> @@ -761,7 +770,9 @@
> >> p_mad_send = PARENT_STRUCT( h_mad_send, mad_send_t, mad_send
> >> ); h_pool = p_mad_send->h_pool;
> >>
> >> + cl_spinlock_acquire( &h_pool->obj.lock );
> >> ExFreeToNPagedLookasideList( &h_pool->mad_send_pool,
> >> p_mad_send ); + cl_spinlock_release( &h_pool->obj.lock );
> >> deref_al_obj( &h_pool->obj );
> >> }
> >>
> >> @@ -781,8 +792,10 @@
> >> CL_ASSERT( p_mad_item->pool_key );
> >> CL_ASSERT( p_mad_item->pool_key->h_pool );
> >>
> >> + cl_spinlock_acquire(
> &p_mad_item->pool_key->h_pool->obj.lock
> >> ); p_mad_rmpp = ExAllocateFromNPagedLookasideList(
> >> &p_mad_item->pool_key->h_pool->mad_rmpp_pool );
> >> + cl_spinlock_release(
> &p_mad_item->pool_key->h_pool->obj.lock
> >> ); if( !p_mad_rmpp ) return NULL;
> >>
> >> @@ -805,7 +818,9 @@
> >>
> >> h_pool = p_mad_rmpp->h_pool;
> >>
> >> + cl_spinlock_acquire( &h_pool->obj.lock );
> >> ExFreeToNPagedLookasideList( &h_pool->mad_rmpp_pool,
> >> p_mad_rmpp ); + cl_spinlock_release( &h_pool->obj.lock );
> >> deref_al_obj( &h_pool->obj );
> >> }
>
>
More information about the ofw
mailing list