[ofw][patch] kernel igmp support
    Sean Hefty 
    sean.hefty at intel.com
       
    Thu Jun 12 10:18:36 PDT 2008
    
    
  
As general feedback, #ifdef's in the middle of functions make the code more
difficult to read.  You can fix this by doing something like:
 
#ifdef CL_KERNEL
void foo()
{
            do lots of clever things here;
            and here;
            yada yada yada;
}
#else
void foo()
{
            return;
}
#endif
 
or using #define's.  Then always just call foo().
 
Other comments inline. 
 
 
+/******************************************************************************
*************
+*         name:   igmp_list_init
+*         input:    no
+*   return:        void
+*   initializes list and spin lock for igmp list maintenance
+*******************************************************************************
*************/ 
+void igmp_list_init()
+{
+          AL_ENTER( AL_DBG_MCAST );
 
+          if (0 == InterlockedCompareExchange(&g_mc_g.initialized,1,0))
 
I'm not a fan of this backwards logic format myself.  But besides that, what
would cause trying to initialized this more than once?  I would think you could
put this in an area of code that's only called once.
 
+          {
+                      cl_spinlock_construct( &g_mc_g.mc_list_lock );
+                      cl_spinlock_init( &g_mc_g.mc_list_lock );
+                      cl_qlist_init( &g_mc_g.mc_group_list );
 
This also uses a mix of Windows calls and complib abstractions.  Why not just
always use the Windows calls?
 
+          } 
+
+          AL_EXIT( AL_DBG_MCAST );
+}
+
+/******************************************************************************
*************
+*         name:   igmp_list_destroy
+*         input:    no
+*   return:        void
+*   Release spin lock of igmp list 
+*******************************************************************************
*************/ 
+void igmp_list_destroy()
+{
+          AL_ENTER( AL_DBG_MCAST );
+
+          CL_ASSERT(0 == cl_qlist_count( &g_mc_g.mc_group_list ));
+
+          if (1 == InterlockedCompareExchange(&g_mc_g.initialized,0,1))
+          {
+                      cl_spinlock_destroy( &g_mc_g.mc_list_lock );
+          }   
+
+          AL_EXIT( AL_DBG_MCAST );
+}
+
+/******************************************************************************
*************
+*         name:   al_igmp_list_add_group
+*         input:    ib_gid_t, ib_qp_handle_t,boolean_t
+*   return:        ib_api_status_t
+*   Adds MC group description in the list, increments ref count if exists
+*******************************************************************************
*************/ 
+ib_api_status_t al_igmp_list_add_group(const ib_gid_t *p_mgid, const
ib_qp_handle_t      h_qp, boolean_t is_user)
+          {
+                      cl_list_item_t
*pItem;
+                      join_record_t
*mc_gr = NULL;
+                      boolean_t
found = FALSE;
+                      (h_qp);
+
+                      AL_ENTER( AL_DBG_MCAST );
+                      CL_ASSERT(g_mc_g.initialized == 1);
+
+                      cl_spinlock_acquire(&g_mc_g.mc_list_lock);
+                      for( pItem = cl_qlist_head( &g_mc_g.mc_group_list );
+                                  pItem != cl_qlist_end( &g_mc_g.mc_group_list
);
+                                  pItem = cl_qlist_next( pItem ) )
+                      {
+
+                                  mc_gr = CONTAINING_RECORD( pItem,
join_record_t, entry );
+                                  if ( sizeof(mc_gr->mgid.raw) ==
RtlCompareMemory(mc_gr->mgid.raw,p_mgid->raw,sizeof(mc_gr->mgid.raw)))
+                                  {
+                                              found = TRUE;
+                                              break;
+                                  }
+                      }
+                      if (found)
+              {
+                                  cl_spinlock_release(&g_mc_g.mc_list_lock);
+                                  return IB_SUCCESS;
+                      }
 
Rather than setting found to true, breaking, then checking found again, just
release the lock and return and eliminate found.  Or even better, use a goto to
jump to the end of the function where the lock is released, so you also get the
debug out statements.
 
+                      
+
+                      mc_gr = cl_zalloc( sizeof(join_record_t) );
 
 
 
+                      if (! mc_gr) 
+                      {
+                                  AL_PRINT_EXIT( TRACE_LEVEL_ERROR,
AL_DBG_ERROR,
+                                  ("al_add_mcast : cl_zalloc failed\n"));
+                                  cl_spinlock_release(&g_mc_g.mc_list_lock);
+                                  return IB_INSUFFICIENT_MEMORY;
 
Same here - set an ib_status value and goto out:
 
+                      }
+                      RtlCopyMemory(&mc_gr->mgid,p_mgid,sizeof(ib_gid_t));
+                      mc_gr->h_qp = h_qp;
+                      mc_gr->j_state = STATE_NOT_JOINED;
+                      mc_gr->src_req = is_user ? JOIN_SRC_MCE : JOIN_SRC_IPOIB;
+                      mc_gr->time_stamp = cl_get_time_stamp();
+                      cl_qlist_insert_head(&g_mc_g.mc_group_list,
&mc_gr->entry);
 
out:  (you will need to move the AL_PRINT up before this.
 
+                      cl_spinlock_release(&g_mc_g.mc_list_lock);
+
+                      AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_MCAST,
+                                                ("al_igmp_list_add_group :
ADDED IP = %d.%d.%d.%d\n",0xE0 | mc_gr->mgid.multicast.raw_group_id[10],
+
mc_gr->mgid.multicast.raw_group_id[11],mc_gr->mgid.multicast.raw_group_id[12],
+
mc_gr->mgid.multicast.raw_group_id[13]));
+
+                      AL_EXIT( AL_DBG_MCAST );
+
+                      return IB_SUCCESS;
+}
+
+/******************************************************************************
*************
+*         name:   al_igmp_list_skip_attach
+*         input:    ib_gid_t *p_mgid
+*   return:        boolean_t
+*   Checks if p_mgid group already in the list and joined to Mcast through MCE
+*         used in decision to do attach or not
+*******************************************************************************
*************/ 
+static boolean_t al_igmp_list_skip_attach(const ib_gid_t *p_mgid)
+{
+          join_record_t                                         *mc_gr;
+          cl_list_item_t                                          *pItem;
+          boolean_t  ret = FALSE;
+
+          AL_ENTER( AL_DBG_MCAST );
+          CL_ASSERT(g_mc_g.initialized == 1);
+
+          cl_spinlock_acquire(&g_mc_g.mc_list_lock);
+
+          for( pItem = cl_qlist_head( &g_mc_g.mc_group_list );
+                      pItem != cl_qlist_end( &g_mc_g.mc_group_list );
+                      pItem = cl_qlist_next( pItem ) )
+          {
+                      mc_gr = CONTAINING_RECORD( pItem, join_record_t, entry );
+                      if ( sizeof(mc_gr->mgid.raw) ==
RtlCompareMemory(mc_gr->mgid.raw,p_mgid->raw,sizeof(mc_gr->mgid.raw)))
+                      {
+                         if ((mc_gr->j_state == STATE_JOINED) &&
(mc_gr->src_req == JOIN_SRC_MCE))
+                         {
 
Btw - the names 'mc_gw', 'j_state', 'src_req', 'g_mc_g' are a bit terse.
 
+                                     ret = TRUE;
+                                     AL_PRINT( TRACE_LEVEL_INFORMATION,
AL_DBG_MCAST,
+                                                                      ("ATTACH
Skipped for %d.%d.%d.%d\n",0xE0 | mc_gr->mgid.multicast.raw_group_id[10],
+
mc_gr->mgid.multicast.raw_group_id[11],mc_gr->mgid.multicast.raw_group_id[12],
+
mc_gr->mgid.multicast.raw_group_id[13]));
+
+                                     break;
+                         }
 
You continue searching here, but I don't think the GID can be in the list more
than once.  You can eliminate the inner if by replacing it with:
 
if (gids match) {
         ret = ((mc_gr->j_state == STATE_JOINED) && (mc_gr->src_req ==
JOIN_SRC_MCE));
         break;
}
 
+             }
+          }
+
+          cl_spinlock_release(&g_mc_g.mc_list_lock);
+
+          AL_EXIT( AL_DBG_MCAST );
+
+          return ret;
+}
+
+
+/******************************************************************************
*************
+*         name:   al_igmp_list_change_status_to_joined
+*         input:    ib_gid_t *p_mgid
+*   return:        void
+*   Changes MC status of p_mgid group to JOINED
+*******************************************************************************
*************/ 
+static void al_igmp_list_change_status_to_joined(const ib_gid_t *p_mgid)
+{
+          join_record_t                                         *mc_gr;
+          cl_list_item_t                                          *pItem;
+
+          AL_ENTER( AL_DBG_MCAST );
+          CL_ASSERT(g_mc_g.initialized == 1);
+
+          cl_spinlock_acquire(&g_mc_g.mc_list_lock);
+          for( pItem = cl_qlist_head( &g_mc_g.mc_group_list );
+                      pItem != cl_qlist_end( &g_mc_g.mc_group_list );
+                      pItem = cl_qlist_next( pItem ) )
+          {
+                      mc_gr = CONTAINING_RECORD( pItem, join_record_t, entry );
+                      if ( sizeof(mc_gr->mgid.raw) ==
RtlCompareMemory(mc_gr->mgid.raw,p_mgid->raw,sizeof(mc_gr->mgid.raw)))
 
Can I add in a formal complaint to Microsoft about their ridiculous return codes
for RtlCompareMemory?  Where's Fab?
 
More seriously, this same code snippet of looping through the groups searching
for an MGID appears about 3 times in this patch.  Please pull the for loop and
GID compare out into a separate function.  E.g.
 
// Locking provided by caller.
static join_record_t *al_igmp_find(ib_gid_t *p_mgid)
{
          for each group
                      if group->gid == p_mgid
                                  return group
          return null
}
 
 
 
+                      {
+                 mc_gr->ref_cnt++;
+                         mc_gr->j_state = STATE_JOINED;
+                         AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_MCAST,
+                                                          ("igmp group
%d.%d.%d.%d set to STATE_JOINED\n",0xE0 |
mc_gr->mgid.multicast.raw_group_id[10],          
+
mc_gr->mgid.multicast.raw_group_id[11],mc_gr->mgid.multicast.raw_group_id[12],
+
mc_gr->mgid.multicast.raw_group_id[13]));
+
+                         break;    
+             }
+          }
+          cl_spinlock_release(&g_mc_g.mc_list_lock);
+
+          AL_EXIT( AL_DBG_MCAST );
+}
+
+
+/******************************************************************************
*************
+*         name:   al_igmp_list_remove_record
+*         input:    ib_gid_t *p_mgid
+*   return:        void
+*   Remove MC record with p_mgid from the list if ref count reaches 0
+*******************************************************************************
*************/ 
+void al_igmp_list_remove_record(const ib_gid_t *p_mgid)
+{
+          join_record_t                                         *mc_gr;
+          cl_list_item_t                                          *pItem;
+
+          AL_ENTER( AL_DBG_MCAST );
+          CL_ASSERT(g_mc_g.initialized == 1);
+
+          cl_spinlock_acquire(&g_mc_g.mc_list_lock);
+          for( pItem = cl_qlist_head( &g_mc_g.mc_group_list );
+                      pItem != cl_qlist_end( &g_mc_g.mc_group_list );
+                      pItem = cl_qlist_next( pItem ) )
+          {
+                      mc_gr = CONTAINING_RECORD( pItem, join_record_t, entry );
+                      if ( sizeof(mc_gr->mgid.raw) ==
RtlCompareMemory(mc_gr->mgid.raw,p_mgid->raw,sizeof(mc_gr->mgid.raw)))
+             {
+                         CL_ASSERT((mc_gr->j_state == STATE_NOT_JOINED) ||
(mc_gr->ref_cnt > 0));
 
How can ref_cnt ever be 0 here?  If it can, then the ref_cnt tracking is off.
(It looks like it might be being used more as a duplicate type of state checking
than a real reference count.)  Maybe this assert should use '&&"?
 
+                         if ((--mc_gr->ref_cnt) > 0)
+                         {
+                                     if ((1 == mc_gr->ref_cnt)&&(mc_gr->src_req
== JOIN_SRC_MCE))
+                                     {
+                                                 /* last reference usually igmp
through ipoib */
+                                                 mc_gr->src_req =
JOIN_SRC_IPOIB;
 
I'm not following the use of src_req.  IBAL doesn't know what user has called
into its interface, and it shouldn't assume.
 
+                                     }
+                                     break;
 
break isn't needed - will hit break below else.
 
+                         }
+                         else
+                         {
+                                     AL_PRINT( TRACE_LEVEL_INFORMATION,
AL_DBG_MCAST,
+                                                                      ("igmp
group %d.%d.%d.%d removed\n",0xE0 | mc_gr->mgid.multicast.raw_group_id[10],
+
mc_gr->mgid.multicast.raw_group_id[11],mc_gr->mgid.multicast.raw_group_id[12],
+
mc_gr->mgid.multicast.raw_group_id[13]));
+                                     cl_qlist_remove_item(
&g_mc_g.mc_group_list, &mc_gr->entry );
+                                     cl_free(mc_gr);
+                         }
+                         break;
+             }
+          }
+          cl_spinlock_release(&g_mc_g.mc_list_lock);
+
+          AL_EXIT( AL_DBG_MCAST );
+
+}
+
+#endif //CL_KERNEL
+
+
+
 ib_api_status_t
 al_join_mcast(
            IN                     const    ib_qp_handle_t FUNC_PTR64
h_qp,
@@ -112,6 +356,17 @@
 
            AL_ENTER( AL_DBG_MCAST );
 
+#ifdef CL_KERNEL
+
+                      status =
al_igmp_list_add_group(&p_mcast_req->member_rec.mgid, NULL,FALSE); 
+                      if (status != IB_SUCCESS)
+                      {
+                                  AL_PRINT_EXIT( TRACE_LEVEL_ERROR,
AL_DBG_ERROR,
+                                  ("al_join_mcast : al_igmp_list_add_group
FAILED status: %s\n", ib_get_err_str(status)) );
+                                  return status;
+                      }
+      
+#endif
            /*
             * Validate the port GUID.  There is no need to validate the pkey
index as
             * the user could change it later to make it invalid.  There is also
no
@@ -468,6 +723,9 @@
            ib_mcast_rec_t                          mcast_rec;
            boolean_t                                              sync;
 
+#if defined( CL_KERNEL )
+          boolean_t skip_attach;
+#endif
            AL_ENTER( AL_DBG_MCAST );
 
            h_mcast = PARENT_STRUCT( p_item, ib_mcast_t, async );
@@ -497,13 +755,29 @@
                        /* Ensure that the user wants the join operation to
proceed. */
                        if( h_mcast->state == SA_REG_STARTING )
                        {
+#if defined( CL_KERNEL )
+                                  /* It's a IPoIB join callback - check for
attach */
 
I think this would be cleaner if the caller just specified whether or not to
attach/detach as part of the join/leave request.  You could probably simplify
(eliminate) a lot of this list/state tracking by doing that.  Give the control
to the ULP rather than pushing this sort of policy into IBAL.
 
+                                  skip_attach =
al_igmp_list_skip_attach(&h_mcast->member_rec.mgid);
+                                  if (skip_attach)
+                                  {
+                                              AL_PRINT(
TRACE_LEVEL_INFORMATION, AL_DBG_MCAST,
+                                    ("ATTACH Skipped\n") );
+                                  }
+
+                                  /* It's a IPoIB - change status to JOINED and
inc ref count*/
+
al_igmp_list_change_status_to_joined(&h_mcast->member_rec.mgid);
+#endif
                                    /*
                                     * Change the state here so that we avoid
trying to cancel
                                     * the request if the verb operation fails.
                                     */
                                    h_mcast->state = SA_REG_ACTIVE;
                                    /* Attach the QP to the multicast group. */
-
if(ib_member_get_state(mcast_rec.p_member_rec->scope_state) ==
IB_MC_REC_STATE_FULL_MEMBER)
+                                  if( 
+#if defined( CL_KERNEL )
+                                              (!skip_attach) && 
+#endif
 
This is why #ifdefs in functions make the code hard to read.  You're separating
code out from the middle of an if() statement.
 
+
(ib_member_get_state(mcast_rec.p_member_rec->scope_state) ==
IB_MC_REC_STATE_FULL_MEMBER))
                                    {
                                                status =
verbs_attach_mcast(h_mcast);
                                                if( status != IB_SUCCESS )
@@ -559,7 +833,15 @@
                                    ("IB_INVALID_MCAST_HANDLE\n") );
                        return IB_INVALID_MCAST_HANDLE;
            }
+#if defined( CL_KERNEL ) 
+          AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_MCAST,
+                                    ("ib_leave_mcast KERNEL:IP :
%d.%d.%d.%d\n",0xE0 | h_mcast->member_rec.mgid.multicast.raw_group_id[10],   
+
h_mcast->member_rec.mgid.multicast.raw_group_id[11],h_mcast->member_rec.mgid.mul
ticast.raw_group_id[12],
+
h_mcast->member_rec.mgid.multicast.raw_group_id[13]));
 
+          al_igmp_list_remove_record(&h_mcast->member_rec.mgid);
+
+#endif
            /* Record that we're already leaving the multicast group. */
            ref_al_obj( &h_mcast->obj );
            h_mcast->obj.pfn_destroy( &h_mcast->obj, pfn_destroy_cb );
@@ -647,6 +929,8 @@
                        h_attach->obj.pfn_destroy( &h_attach->obj, NULL );
                        return status;
            }
+    /* called from user mode MC callback. By calling the function below we
indicate that user mode(MCE) is joined and attached*/
+          al_igmp_list_change_status_to_joined(p_mcast_gid);
 
            /* The proxy will release the reference taken in init_al_obj. */
            *ph_attach = h_attach;
Index: core/al/ib_common.h
===================================================================
--- core/al/ib_common.h (revision 1261)
+++ core/al/ib_common.h          (working copy)
@@ -35,6 +35,7 @@
 
 
 #include <complib/cl_types.h>
+#include <complib/cl_qlist.h>
 #include <iba/ib_types.h>
 
 
@@ -47,4 +48,29 @@
            IN                                             ib_ca_attr_t* const
p_dest,
            IN                     const    ib_ca_attr_t* const
p_src );
 
+typedef enum   _JOIN_STATE
+{
+          STATE_NOT_JOINED = 0,
+          STATE_JOINED
+}JOIN_STATE;
+
+typedef enum
+{
+          JOIN_SRC_MCE = 1,
+          JOIN_SRC_IPOIB
+}JOIN_SRC;
+
+typedef struct _JOIN_RECORD
+{
+          cl_list_item_t      entry;
+          ib_gid_t        mgid;
+          ib_qp_handle_t  h_qp;
+          JOIN_STATE                 j_state;
+          int32_t        ref_cnt;
+          JOIN_SRC                    src_req;
+          uint64_t     time_stamp;
+}join_record_t;
+
+void al_igmp_list_remove_record(const ib_gid_t *p_mgid);
+ib_api_status_t al_igmp_list_add_group(const ib_gid_t *p_mgid, const
ib_qp_handle_t      h_qp, boolean_t is_user);
 #endif /* __IB_COMMON_H__ */
Index: core/al/kernel/al_proxy_subnet.c
===================================================================
--- core/al/kernel/al_proxy_subnet.c        (revision 1261)
+++ core/al/kernel/al_proxy_subnet.c      (working copy)
@@ -289,7 +289,22 @@
            p_sa_req->pfn_sa_req_cb = __proxy_sa_req_cb;
 
            p_ioctl->in.sa_req.p_attr = p_ioctl->in.attr;
+          if ((p_ioctl->in.sa_req.attr_id == IB_MAD_ATTR_MCMEMBER_RECORD) && \
+                                  (p_ioctl->in.sa_req.method ==
IB_MAD_METHOD_SET))
+          {
+                      status =
al_igmp_list_add_group(&((ib_member_rec_t*)p_ioctl->in.attr)->mgid, NULL,TRUE); 
 
+                      if (status != IB_SUCCESS)
+                      {
+                                  AL_PRINT_EXIT( TRACE_LEVEL_ERROR,
AL_DBG_ERROR, ("Failed to add mc group\n") );
+                                  goto proxy_send_sa_req_err2;
+                      }
+          }
+          else if ((p_ioctl->in.sa_req.attr_id == IB_MAD_ATTR_MCMEMBER_RECORD)
&& \
+                                  (p_ioctl->in.sa_req.method ==
IB_MAD_METHOD_DELETE))
+    {
+
al_igmp_list_remove_record(&((ib_member_rec_t*)p_ioctl->in.attr)->mgid);
+          }
            /*
             * We never pass the user-mode flag when sending SA requests - the
             * I/O manager will perform all synchronization to make this IRP
sync
 
Slava Strebkov
SW Engineer
Voltaire
099718750
 
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080612/5ad24193/attachment.html>
    
    
More information about the ofw
mailing list