[ofw][patch] kernel igmp support

Sean Hefty sean.hefty at intel.com
Thu Jun 12 10:18:36 PDT 2008


As general feedback, #ifdef's in the middle of functions make the code more
difficult to read.  You can fix this by doing something like:

 

#ifdef CL_KERNEL

void foo()

{

            do lots of clever things here;

            and here;

            yada yada yada;

}

#else

void foo()

{

            return;

}

#endif

 

or using #define's.  Then always just call foo().

 

Other comments inline. 

 

 

+/******************************************************************************
*************

+*         name:   igmp_list_init

+*         input:    no

+*   return:        void

+*   initializes list and spin lock for igmp list maintenance

+*******************************************************************************
*************/ 

+void igmp_list_init()

+{

+          AL_ENTER( AL_DBG_MCAST );

 

+          if (0 == InterlockedCompareExchange(&g_mc_g.initialized,1,0))

 

I'm not a fan of this backwards logic format myself.  But besides that, what
would cause trying to initialized this more than once?  I would think you could
put this in an area of code that's only called once.

 

+          {

+                      cl_spinlock_construct( &g_mc_g.mc_list_lock );

+                      cl_spinlock_init( &g_mc_g.mc_list_lock );

+                      cl_qlist_init( &g_mc_g.mc_group_list );

 

This also uses a mix of Windows calls and complib abstractions.  Why not just
always use the Windows calls?

 

+          } 

+

+          AL_EXIT( AL_DBG_MCAST );

+}

+

+/******************************************************************************
*************

+*         name:   igmp_list_destroy

+*         input:    no

+*   return:        void

+*   Release spin lock of igmp list 

+*******************************************************************************
*************/ 

+void igmp_list_destroy()

+{

+          AL_ENTER( AL_DBG_MCAST );

+

+          CL_ASSERT(0 == cl_qlist_count( &g_mc_g.mc_group_list ));

+

+          if (1 == InterlockedCompareExchange(&g_mc_g.initialized,0,1))

+          {

+                      cl_spinlock_destroy( &g_mc_g.mc_list_lock );

+          }   

+

+          AL_EXIT( AL_DBG_MCAST );

+}

+

+/******************************************************************************
*************

+*         name:   al_igmp_list_add_group

+*         input:    ib_gid_t, ib_qp_handle_t,boolean_t

+*   return:        ib_api_status_t

+*   Adds MC group description in the list, increments ref count if exists

+*******************************************************************************
*************/ 

+ib_api_status_t al_igmp_list_add_group(const ib_gid_t *p_mgid, const
ib_qp_handle_t      h_qp, boolean_t is_user)

+          {

+                      cl_list_item_t
*pItem;

+                      join_record_t
*mc_gr = NULL;

+                      boolean_t
found = FALSE;

+                      (h_qp);

+

+                      AL_ENTER( AL_DBG_MCAST );

+                      CL_ASSERT(g_mc_g.initialized == 1);

+

+                      cl_spinlock_acquire(&g_mc_g.mc_list_lock);

+                      for( pItem = cl_qlist_head( &g_mc_g.mc_group_list );

+                                  pItem != cl_qlist_end( &g_mc_g.mc_group_list
);

+                                  pItem = cl_qlist_next( pItem ) )

+                      {

+

+                                  mc_gr = CONTAINING_RECORD( pItem,
join_record_t, entry );

+                                  if ( sizeof(mc_gr->mgid.raw) ==
RtlCompareMemory(mc_gr->mgid.raw,p_mgid->raw,sizeof(mc_gr->mgid.raw)))

+                                  {

+                                              found = TRUE;

+                                              break;

+                                  }

+                      }

+                      if (found)

+              {

+                                  cl_spinlock_release(&g_mc_g.mc_list_lock);

+                                  return IB_SUCCESS;

+                      }

 

Rather than setting found to true, breaking, then checking found again, just
release the lock and return and eliminate found.  Or even better, use a goto to
jump to the end of the function where the lock is released, so you also get the
debug out statements.

 

+                      

+

+                      mc_gr = cl_zalloc( sizeof(join_record_t) );

 

 

 

+                      if (! mc_gr) 

+                      {

+                                  AL_PRINT_EXIT( TRACE_LEVEL_ERROR,
AL_DBG_ERROR,

+                                  ("al_add_mcast : cl_zalloc failed\n"));

+                                  cl_spinlock_release(&g_mc_g.mc_list_lock);

+                                  return IB_INSUFFICIENT_MEMORY;

 

Same here - set an ib_status value and goto out:

 

+                      }

+                      RtlCopyMemory(&mc_gr->mgid,p_mgid,sizeof(ib_gid_t));

+                      mc_gr->h_qp = h_qp;

+                      mc_gr->j_state = STATE_NOT_JOINED;

+                      mc_gr->src_req = is_user ? JOIN_SRC_MCE : JOIN_SRC_IPOIB;

+                      mc_gr->time_stamp = cl_get_time_stamp();

+                      cl_qlist_insert_head(&g_mc_g.mc_group_list,
&mc_gr->entry);

 

out:  (you will need to move the AL_PRINT up before this.

 

+                      cl_spinlock_release(&g_mc_g.mc_list_lock);

+

+                      AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_MCAST,

+                                                ("al_igmp_list_add_group :
ADDED IP = %d.%d.%d.%d\n",0xE0 | mc_gr->mgid.multicast.raw_group_id[10],


+
mc_gr->mgid.multicast.raw_group_id[11],mc_gr->mgid.multicast.raw_group_id[12],

+
mc_gr->mgid.multicast.raw_group_id[13]));

+

+                      AL_EXIT( AL_DBG_MCAST );

+

+                      return IB_SUCCESS;

+}

+

+/******************************************************************************
*************

+*         name:   al_igmp_list_skip_attach

+*         input:    ib_gid_t *p_mgid

+*   return:        boolean_t

+*   Checks if p_mgid group already in the list and joined to Mcast through MCE

+*         used in decision to do attach or not

+*******************************************************************************
*************/ 

+static boolean_t al_igmp_list_skip_attach(const ib_gid_t *p_mgid)

+{

+          join_record_t                                         *mc_gr;

+          cl_list_item_t                                          *pItem;

+          boolean_t  ret = FALSE;

+

+          AL_ENTER( AL_DBG_MCAST );

+          CL_ASSERT(g_mc_g.initialized == 1);

+

+          cl_spinlock_acquire(&g_mc_g.mc_list_lock);

+

+          for( pItem = cl_qlist_head( &g_mc_g.mc_group_list );

+                      pItem != cl_qlist_end( &g_mc_g.mc_group_list );

+                      pItem = cl_qlist_next( pItem ) )

+          {

+                      mc_gr = CONTAINING_RECORD( pItem, join_record_t, entry );

+                      if ( sizeof(mc_gr->mgid.raw) ==
RtlCompareMemory(mc_gr->mgid.raw,p_mgid->raw,sizeof(mc_gr->mgid.raw)))

+                      {

+                         if ((mc_gr->j_state == STATE_JOINED) &&
(mc_gr->src_req == JOIN_SRC_MCE))

+                         {

 

Btw - the names 'mc_gw', 'j_state', 'src_req', 'g_mc_g' are a bit terse.

 

+                                     ret = TRUE;

+                                     AL_PRINT( TRACE_LEVEL_INFORMATION,
AL_DBG_MCAST,

+                                                                      ("ATTACH
Skipped for %d.%d.%d.%d\n",0xE0 | mc_gr->mgid.multicast.raw_group_id[10],


+
mc_gr->mgid.multicast.raw_group_id[11],mc_gr->mgid.multicast.raw_group_id[12],

+
mc_gr->mgid.multicast.raw_group_id[13]));

+

+                                     break;

+                         }

 

You continue searching here, but I don't think the GID can be in the list more
than once.  You can eliminate the inner if by replacing it with:

 

if (gids match) {

         ret = ((mc_gr->j_state == STATE_JOINED) && (mc_gr->src_req ==
JOIN_SRC_MCE));

         break;

}

 

+             }

+          }

+

+          cl_spinlock_release(&g_mc_g.mc_list_lock);

+

+          AL_EXIT( AL_DBG_MCAST );

+

+          return ret;

+}

+

+

+/******************************************************************************
*************

+*         name:   al_igmp_list_change_status_to_joined

+*         input:    ib_gid_t *p_mgid

+*   return:        void

+*   Changes MC status of p_mgid group to JOINED

+*******************************************************************************
*************/ 

+static void al_igmp_list_change_status_to_joined(const ib_gid_t *p_mgid)

+{

+          join_record_t                                         *mc_gr;

+          cl_list_item_t                                          *pItem;

+

+          AL_ENTER( AL_DBG_MCAST );

+          CL_ASSERT(g_mc_g.initialized == 1);

+

+          cl_spinlock_acquire(&g_mc_g.mc_list_lock);

+          for( pItem = cl_qlist_head( &g_mc_g.mc_group_list );

+                      pItem != cl_qlist_end( &g_mc_g.mc_group_list );

+                      pItem = cl_qlist_next( pItem ) )

+          {

+                      mc_gr = CONTAINING_RECORD( pItem, join_record_t, entry );

+                      if ( sizeof(mc_gr->mgid.raw) ==
RtlCompareMemory(mc_gr->mgid.raw,p_mgid->raw,sizeof(mc_gr->mgid.raw)))

 

Can I add in a formal complaint to Microsoft about their ridiculous return codes
for RtlCompareMemory?  Where's Fab?

 

More seriously, this same code snippet of looping through the groups searching
for an MGID appears about 3 times in this patch.  Please pull the for loop and
GID compare out into a separate function.  E.g.

 

// Locking provided by caller.

static join_record_t *al_igmp_find(ib_gid_t *p_mgid)

{

          for each group

                      if group->gid == p_mgid

                                  return group

          return null

}

 

 

 

+                      {

+                 mc_gr->ref_cnt++;

+                         mc_gr->j_state = STATE_JOINED;

+                         AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_MCAST,

+                                                          ("igmp group
%d.%d.%d.%d set to STATE_JOINED\n",0xE0 |
mc_gr->mgid.multicast.raw_group_id[10],          

+
mc_gr->mgid.multicast.raw_group_id[11],mc_gr->mgid.multicast.raw_group_id[12],

+
mc_gr->mgid.multicast.raw_group_id[13]));

+

+                         break;    

+             }

+          }

+          cl_spinlock_release(&g_mc_g.mc_list_lock);

+

+          AL_EXIT( AL_DBG_MCAST );

+}

+

+

+/******************************************************************************
*************

+*         name:   al_igmp_list_remove_record

+*         input:    ib_gid_t *p_mgid

+*   return:        void

+*   Remove MC record with p_mgid from the list if ref count reaches 0

+*******************************************************************************
*************/ 

+void al_igmp_list_remove_record(const ib_gid_t *p_mgid)

+{

+          join_record_t                                         *mc_gr;

+          cl_list_item_t                                          *pItem;

+

+          AL_ENTER( AL_DBG_MCAST );

+          CL_ASSERT(g_mc_g.initialized == 1);

+

+          cl_spinlock_acquire(&g_mc_g.mc_list_lock);

+          for( pItem = cl_qlist_head( &g_mc_g.mc_group_list );

+                      pItem != cl_qlist_end( &g_mc_g.mc_group_list );

+                      pItem = cl_qlist_next( pItem ) )

+          {

+                      mc_gr = CONTAINING_RECORD( pItem, join_record_t, entry );

+                      if ( sizeof(mc_gr->mgid.raw) ==
RtlCompareMemory(mc_gr->mgid.raw,p_mgid->raw,sizeof(mc_gr->mgid.raw)))

+             {

+                         CL_ASSERT((mc_gr->j_state == STATE_NOT_JOINED) ||
(mc_gr->ref_cnt > 0));

 

How can ref_cnt ever be 0 here?  If it can, then the ref_cnt tracking is off.
(It looks like it might be being used more as a duplicate type of state checking
than a real reference count.)  Maybe this assert should use '&&"?

 

+                         if ((--mc_gr->ref_cnt) > 0)

+                         {

+                                     if ((1 == mc_gr->ref_cnt)&&(mc_gr->src_req
== JOIN_SRC_MCE))

+                                     {

+                                                 /* last reference usually igmp
through ipoib */

+                                                 mc_gr->src_req =
JOIN_SRC_IPOIB;

 

I'm not following the use of src_req.  IBAL doesn't know what user has called
into its interface, and it shouldn't assume.

 

+                                     }

+                                     break;

 

break isn't needed - will hit break below else.

 

+                         }

+                         else

+                         {

+                                     AL_PRINT( TRACE_LEVEL_INFORMATION,
AL_DBG_MCAST,

+                                                                      ("igmp
group %d.%d.%d.%d removed\n",0xE0 | mc_gr->mgid.multicast.raw_group_id[10],


+
mc_gr->mgid.multicast.raw_group_id[11],mc_gr->mgid.multicast.raw_group_id[12],

+
mc_gr->mgid.multicast.raw_group_id[13]));

+                                     cl_qlist_remove_item(
&g_mc_g.mc_group_list, &mc_gr->entry );

+                                     cl_free(mc_gr);

+                         }

+                         break;

+             }

+          }

+          cl_spinlock_release(&g_mc_g.mc_list_lock);

+

+          AL_EXIT( AL_DBG_MCAST );

+

+}

+

+#endif //CL_KERNEL

+

+

+

 ib_api_status_t

 al_join_mcast(

            IN                     const    ib_qp_handle_t FUNC_PTR64
h_qp,

@@ -112,6 +356,17 @@

 

            AL_ENTER( AL_DBG_MCAST );

 

+#ifdef CL_KERNEL

+

+                      status =
al_igmp_list_add_group(&p_mcast_req->member_rec.mgid, NULL,FALSE); 

+                      if (status != IB_SUCCESS)

+                      {

+                                  AL_PRINT_EXIT( TRACE_LEVEL_ERROR,
AL_DBG_ERROR,

+                                  ("al_join_mcast : al_igmp_list_add_group
FAILED status: %s\n", ib_get_err_str(status)) );

+                                  return status;

+                      }

+      

+#endif

            /*

             * Validate the port GUID.  There is no need to validate the pkey
index as

             * the user could change it later to make it invalid.  There is also
no

@@ -468,6 +723,9 @@

            ib_mcast_rec_t                          mcast_rec;

            boolean_t                                              sync;

 

+#if defined( CL_KERNEL )

+          boolean_t skip_attach;

+#endif

            AL_ENTER( AL_DBG_MCAST );

 

            h_mcast = PARENT_STRUCT( p_item, ib_mcast_t, async );

@@ -497,13 +755,29 @@

                        /* Ensure that the user wants the join operation to
proceed. */

                        if( h_mcast->state == SA_REG_STARTING )

                        {

+#if defined( CL_KERNEL )

+                                  /* It's a IPoIB join callback - check for
attach */

 

I think this would be cleaner if the caller just specified whether or not to
attach/detach as part of the join/leave request.  You could probably simplify
(eliminate) a lot of this list/state tracking by doing that.  Give the control
to the ULP rather than pushing this sort of policy into IBAL.

 

+                                  skip_attach =
al_igmp_list_skip_attach(&h_mcast->member_rec.mgid);

+                                  if (skip_attach)

+                                  {

+                                              AL_PRINT(
TRACE_LEVEL_INFORMATION, AL_DBG_MCAST,

+                                    ("ATTACH Skipped\n") );

+                                  }

+

+                                  /* It's a IPoIB - change status to JOINED and
inc ref count*/

+
al_igmp_list_change_status_to_joined(&h_mcast->member_rec.mgid);

+#endif

                                    /*

                                     * Change the state here so that we avoid
trying to cancel

                                     * the request if the verb operation fails.

                                     */

                                    h_mcast->state = SA_REG_ACTIVE;

                                    /* Attach the QP to the multicast group. */

-
if(ib_member_get_state(mcast_rec.p_member_rec->scope_state) ==
IB_MC_REC_STATE_FULL_MEMBER)

+                                  if( 

+#if defined( CL_KERNEL )

+                                              (!skip_attach) && 

+#endif

 

This is why #ifdefs in functions make the code hard to read.  You're separating
code out from the middle of an if() statement.

 

+
(ib_member_get_state(mcast_rec.p_member_rec->scope_state) ==
IB_MC_REC_STATE_FULL_MEMBER))

                                    {

                                                status =
verbs_attach_mcast(h_mcast);

                                                if( status != IB_SUCCESS )

@@ -559,7 +833,15 @@

                                    ("IB_INVALID_MCAST_HANDLE\n") );

                        return IB_INVALID_MCAST_HANDLE;

            }

+#if defined( CL_KERNEL ) 

+          AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_MCAST,

+                                    ("ib_leave_mcast KERNEL:IP :
%d.%d.%d.%d\n",0xE0 | h_mcast->member_rec.mgid.multicast.raw_group_id[10],   

+
h_mcast->member_rec.mgid.multicast.raw_group_id[11],h_mcast->member_rec.mgid.mul
ticast.raw_group_id[12],

+
h_mcast->member_rec.mgid.multicast.raw_group_id[13]));

 

+          al_igmp_list_remove_record(&h_mcast->member_rec.mgid);

+

+#endif

            /* Record that we're already leaving the multicast group. */

            ref_al_obj( &h_mcast->obj );

            h_mcast->obj.pfn_destroy( &h_mcast->obj, pfn_destroy_cb );

@@ -647,6 +929,8 @@

                        h_attach->obj.pfn_destroy( &h_attach->obj, NULL );

                        return status;

            }

+    /* called from user mode MC callback. By calling the function below we
indicate that user mode(MCE) is joined and attached*/

+          al_igmp_list_change_status_to_joined(p_mcast_gid);

 

            /* The proxy will release the reference taken in init_al_obj. */

            *ph_attach = h_attach;

Index: core/al/ib_common.h

===================================================================

--- core/al/ib_common.h (revision 1261)

+++ core/al/ib_common.h          (working copy)

@@ -35,6 +35,7 @@

 

 

 #include <complib/cl_types.h>

+#include <complib/cl_qlist.h>

 #include <iba/ib_types.h>

 

 

@@ -47,4 +48,29 @@

            IN                                             ib_ca_attr_t* const
p_dest,

            IN                     const    ib_ca_attr_t* const
p_src );

 

+typedef enum   _JOIN_STATE

+{

+          STATE_NOT_JOINED = 0,

+          STATE_JOINED

+}JOIN_STATE;

+

+typedef enum

+{

+          JOIN_SRC_MCE = 1,

+          JOIN_SRC_IPOIB

+}JOIN_SRC;

+

+typedef struct _JOIN_RECORD

+{

+          cl_list_item_t      entry;

+          ib_gid_t        mgid;

+          ib_qp_handle_t  h_qp;

+          JOIN_STATE                 j_state;

+          int32_t        ref_cnt;

+          JOIN_SRC                    src_req;

+          uint64_t     time_stamp;

+}join_record_t;

+

+void al_igmp_list_remove_record(const ib_gid_t *p_mgid);

+ib_api_status_t al_igmp_list_add_group(const ib_gid_t *p_mgid, const
ib_qp_handle_t      h_qp, boolean_t is_user);

 #endif /* __IB_COMMON_H__ */

Index: core/al/kernel/al_proxy_subnet.c

===================================================================

--- core/al/kernel/al_proxy_subnet.c        (revision 1261)

+++ core/al/kernel/al_proxy_subnet.c      (working copy)

@@ -289,7 +289,22 @@

            p_sa_req->pfn_sa_req_cb = __proxy_sa_req_cb;

 

            p_ioctl->in.sa_req.p_attr = p_ioctl->in.attr;

+          if ((p_ioctl->in.sa_req.attr_id == IB_MAD_ATTR_MCMEMBER_RECORD) && \

+                                  (p_ioctl->in.sa_req.method ==
IB_MAD_METHOD_SET))

+          {

+                      status =
al_igmp_list_add_group(&((ib_member_rec_t*)p_ioctl->in.attr)->mgid, NULL,TRUE); 

 

+                      if (status != IB_SUCCESS)

+                      {

+                                  AL_PRINT_EXIT( TRACE_LEVEL_ERROR,
AL_DBG_ERROR, ("Failed to add mc group\n") );

+                                  goto proxy_send_sa_req_err2;

+                      }

+          }

+          else if ((p_ioctl->in.sa_req.attr_id == IB_MAD_ATTR_MCMEMBER_RECORD)
&& \

+                                  (p_ioctl->in.sa_req.method ==
IB_MAD_METHOD_DELETE))

+    {

+
al_igmp_list_remove_record(&((ib_member_rec_t*)p_ioctl->in.attr)->mgid);

+          }

            /*

             * We never pass the user-mode flag when sending SA requests - the

             * I/O manager will perform all synchronization to make this IRP
sync

 

Slava Strebkov

SW Engineer

Voltaire

099718750

 

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080612/5ad24193/attachment.html>


More information about the ofw mailing list