[ofw][patch] kernel igmp support
Sean Hefty
sean.hefty at intel.com
Thu Jun 12 10:18:36 PDT 2008
As general feedback, #ifdef's in the middle of functions make the code more
difficult to read. You can fix this by doing something like:
#ifdef CL_KERNEL
void foo()
{
do lots of clever things here;
and here;
yada yada yada;
}
#else
void foo()
{
return;
}
#endif
or using #define's. Then always just call foo().
Other comments inline.
+/******************************************************************************
*************
+* name: igmp_list_init
+* input: no
+* return: void
+* initializes list and spin lock for igmp list maintenance
+*******************************************************************************
*************/
+void igmp_list_init()
+{
+ AL_ENTER( AL_DBG_MCAST );
+ if (0 == InterlockedCompareExchange(&g_mc_g.initialized,1,0))
I'm not a fan of this backwards logic format myself. But besides that, what
would cause trying to initialized this more than once? I would think you could
put this in an area of code that's only called once.
+ {
+ cl_spinlock_construct( &g_mc_g.mc_list_lock );
+ cl_spinlock_init( &g_mc_g.mc_list_lock );
+ cl_qlist_init( &g_mc_g.mc_group_list );
This also uses a mix of Windows calls and complib abstractions. Why not just
always use the Windows calls?
+ }
+
+ AL_EXIT( AL_DBG_MCAST );
+}
+
+/******************************************************************************
*************
+* name: igmp_list_destroy
+* input: no
+* return: void
+* Release spin lock of igmp list
+*******************************************************************************
*************/
+void igmp_list_destroy()
+{
+ AL_ENTER( AL_DBG_MCAST );
+
+ CL_ASSERT(0 == cl_qlist_count( &g_mc_g.mc_group_list ));
+
+ if (1 == InterlockedCompareExchange(&g_mc_g.initialized,0,1))
+ {
+ cl_spinlock_destroy( &g_mc_g.mc_list_lock );
+ }
+
+ AL_EXIT( AL_DBG_MCAST );
+}
+
+/******************************************************************************
*************
+* name: al_igmp_list_add_group
+* input: ib_gid_t, ib_qp_handle_t,boolean_t
+* return: ib_api_status_t
+* Adds MC group description in the list, increments ref count if exists
+*******************************************************************************
*************/
+ib_api_status_t al_igmp_list_add_group(const ib_gid_t *p_mgid, const
ib_qp_handle_t h_qp, boolean_t is_user)
+ {
+ cl_list_item_t
*pItem;
+ join_record_t
*mc_gr = NULL;
+ boolean_t
found = FALSE;
+ (h_qp);
+
+ AL_ENTER( AL_DBG_MCAST );
+ CL_ASSERT(g_mc_g.initialized == 1);
+
+ cl_spinlock_acquire(&g_mc_g.mc_list_lock);
+ for( pItem = cl_qlist_head( &g_mc_g.mc_group_list );
+ pItem != cl_qlist_end( &g_mc_g.mc_group_list
);
+ pItem = cl_qlist_next( pItem ) )
+ {
+
+ mc_gr = CONTAINING_RECORD( pItem,
join_record_t, entry );
+ if ( sizeof(mc_gr->mgid.raw) ==
RtlCompareMemory(mc_gr->mgid.raw,p_mgid->raw,sizeof(mc_gr->mgid.raw)))
+ {
+ found = TRUE;
+ break;
+ }
+ }
+ if (found)
+ {
+ cl_spinlock_release(&g_mc_g.mc_list_lock);
+ return IB_SUCCESS;
+ }
Rather than setting found to true, breaking, then checking found again, just
release the lock and return and eliminate found. Or even better, use a goto to
jump to the end of the function where the lock is released, so you also get the
debug out statements.
+
+
+ mc_gr = cl_zalloc( sizeof(join_record_t) );
+ if (! mc_gr)
+ {
+ AL_PRINT_EXIT( TRACE_LEVEL_ERROR,
AL_DBG_ERROR,
+ ("al_add_mcast : cl_zalloc failed\n"));
+ cl_spinlock_release(&g_mc_g.mc_list_lock);
+ return IB_INSUFFICIENT_MEMORY;
Same here - set an ib_status value and goto out:
+ }
+ RtlCopyMemory(&mc_gr->mgid,p_mgid,sizeof(ib_gid_t));
+ mc_gr->h_qp = h_qp;
+ mc_gr->j_state = STATE_NOT_JOINED;
+ mc_gr->src_req = is_user ? JOIN_SRC_MCE : JOIN_SRC_IPOIB;
+ mc_gr->time_stamp = cl_get_time_stamp();
+ cl_qlist_insert_head(&g_mc_g.mc_group_list,
&mc_gr->entry);
out: (you will need to move the AL_PRINT up before this.
+ cl_spinlock_release(&g_mc_g.mc_list_lock);
+
+ AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_MCAST,
+ ("al_igmp_list_add_group :
ADDED IP = %d.%d.%d.%d\n",0xE0 | mc_gr->mgid.multicast.raw_group_id[10],
+
mc_gr->mgid.multicast.raw_group_id[11],mc_gr->mgid.multicast.raw_group_id[12],
+
mc_gr->mgid.multicast.raw_group_id[13]));
+
+ AL_EXIT( AL_DBG_MCAST );
+
+ return IB_SUCCESS;
+}
+
+/******************************************************************************
*************
+* name: al_igmp_list_skip_attach
+* input: ib_gid_t *p_mgid
+* return: boolean_t
+* Checks if p_mgid group already in the list and joined to Mcast through MCE
+* used in decision to do attach or not
+*******************************************************************************
*************/
+static boolean_t al_igmp_list_skip_attach(const ib_gid_t *p_mgid)
+{
+ join_record_t *mc_gr;
+ cl_list_item_t *pItem;
+ boolean_t ret = FALSE;
+
+ AL_ENTER( AL_DBG_MCAST );
+ CL_ASSERT(g_mc_g.initialized == 1);
+
+ cl_spinlock_acquire(&g_mc_g.mc_list_lock);
+
+ for( pItem = cl_qlist_head( &g_mc_g.mc_group_list );
+ pItem != cl_qlist_end( &g_mc_g.mc_group_list );
+ pItem = cl_qlist_next( pItem ) )
+ {
+ mc_gr = CONTAINING_RECORD( pItem, join_record_t, entry );
+ if ( sizeof(mc_gr->mgid.raw) ==
RtlCompareMemory(mc_gr->mgid.raw,p_mgid->raw,sizeof(mc_gr->mgid.raw)))
+ {
+ if ((mc_gr->j_state == STATE_JOINED) &&
(mc_gr->src_req == JOIN_SRC_MCE))
+ {
Btw - the names 'mc_gw', 'j_state', 'src_req', 'g_mc_g' are a bit terse.
+ ret = TRUE;
+ AL_PRINT( TRACE_LEVEL_INFORMATION,
AL_DBG_MCAST,
+ ("ATTACH
Skipped for %d.%d.%d.%d\n",0xE0 | mc_gr->mgid.multicast.raw_group_id[10],
+
mc_gr->mgid.multicast.raw_group_id[11],mc_gr->mgid.multicast.raw_group_id[12],
+
mc_gr->mgid.multicast.raw_group_id[13]));
+
+ break;
+ }
You continue searching here, but I don't think the GID can be in the list more
than once. You can eliminate the inner if by replacing it with:
if (gids match) {
ret = ((mc_gr->j_state == STATE_JOINED) && (mc_gr->src_req ==
JOIN_SRC_MCE));
break;
}
+ }
+ }
+
+ cl_spinlock_release(&g_mc_g.mc_list_lock);
+
+ AL_EXIT( AL_DBG_MCAST );
+
+ return ret;
+}
+
+
+/******************************************************************************
*************
+* name: al_igmp_list_change_status_to_joined
+* input: ib_gid_t *p_mgid
+* return: void
+* Changes MC status of p_mgid group to JOINED
+*******************************************************************************
*************/
+static void al_igmp_list_change_status_to_joined(const ib_gid_t *p_mgid)
+{
+ join_record_t *mc_gr;
+ cl_list_item_t *pItem;
+
+ AL_ENTER( AL_DBG_MCAST );
+ CL_ASSERT(g_mc_g.initialized == 1);
+
+ cl_spinlock_acquire(&g_mc_g.mc_list_lock);
+ for( pItem = cl_qlist_head( &g_mc_g.mc_group_list );
+ pItem != cl_qlist_end( &g_mc_g.mc_group_list );
+ pItem = cl_qlist_next( pItem ) )
+ {
+ mc_gr = CONTAINING_RECORD( pItem, join_record_t, entry );
+ if ( sizeof(mc_gr->mgid.raw) ==
RtlCompareMemory(mc_gr->mgid.raw,p_mgid->raw,sizeof(mc_gr->mgid.raw)))
Can I add in a formal complaint to Microsoft about their ridiculous return codes
for RtlCompareMemory? Where's Fab?
More seriously, this same code snippet of looping through the groups searching
for an MGID appears about 3 times in this patch. Please pull the for loop and
GID compare out into a separate function. E.g.
// Locking provided by caller.
static join_record_t *al_igmp_find(ib_gid_t *p_mgid)
{
for each group
if group->gid == p_mgid
return group
return null
}
+ {
+ mc_gr->ref_cnt++;
+ mc_gr->j_state = STATE_JOINED;
+ AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_MCAST,
+ ("igmp group
%d.%d.%d.%d set to STATE_JOINED\n",0xE0 |
mc_gr->mgid.multicast.raw_group_id[10],
+
mc_gr->mgid.multicast.raw_group_id[11],mc_gr->mgid.multicast.raw_group_id[12],
+
mc_gr->mgid.multicast.raw_group_id[13]));
+
+ break;
+ }
+ }
+ cl_spinlock_release(&g_mc_g.mc_list_lock);
+
+ AL_EXIT( AL_DBG_MCAST );
+}
+
+
+/******************************************************************************
*************
+* name: al_igmp_list_remove_record
+* input: ib_gid_t *p_mgid
+* return: void
+* Remove MC record with p_mgid from the list if ref count reaches 0
+*******************************************************************************
*************/
+void al_igmp_list_remove_record(const ib_gid_t *p_mgid)
+{
+ join_record_t *mc_gr;
+ cl_list_item_t *pItem;
+
+ AL_ENTER( AL_DBG_MCAST );
+ CL_ASSERT(g_mc_g.initialized == 1);
+
+ cl_spinlock_acquire(&g_mc_g.mc_list_lock);
+ for( pItem = cl_qlist_head( &g_mc_g.mc_group_list );
+ pItem != cl_qlist_end( &g_mc_g.mc_group_list );
+ pItem = cl_qlist_next( pItem ) )
+ {
+ mc_gr = CONTAINING_RECORD( pItem, join_record_t, entry );
+ if ( sizeof(mc_gr->mgid.raw) ==
RtlCompareMemory(mc_gr->mgid.raw,p_mgid->raw,sizeof(mc_gr->mgid.raw)))
+ {
+ CL_ASSERT((mc_gr->j_state == STATE_NOT_JOINED) ||
(mc_gr->ref_cnt > 0));
How can ref_cnt ever be 0 here? If it can, then the ref_cnt tracking is off.
(It looks like it might be being used more as a duplicate type of state checking
than a real reference count.) Maybe this assert should use '&&"?
+ if ((--mc_gr->ref_cnt) > 0)
+ {
+ if ((1 == mc_gr->ref_cnt)&&(mc_gr->src_req
== JOIN_SRC_MCE))
+ {
+ /* last reference usually igmp
through ipoib */
+ mc_gr->src_req =
JOIN_SRC_IPOIB;
I'm not following the use of src_req. IBAL doesn't know what user has called
into its interface, and it shouldn't assume.
+ }
+ break;
break isn't needed - will hit break below else.
+ }
+ else
+ {
+ AL_PRINT( TRACE_LEVEL_INFORMATION,
AL_DBG_MCAST,
+ ("igmp
group %d.%d.%d.%d removed\n",0xE0 | mc_gr->mgid.multicast.raw_group_id[10],
+
mc_gr->mgid.multicast.raw_group_id[11],mc_gr->mgid.multicast.raw_group_id[12],
+
mc_gr->mgid.multicast.raw_group_id[13]));
+ cl_qlist_remove_item(
&g_mc_g.mc_group_list, &mc_gr->entry );
+ cl_free(mc_gr);
+ }
+ break;
+ }
+ }
+ cl_spinlock_release(&g_mc_g.mc_list_lock);
+
+ AL_EXIT( AL_DBG_MCAST );
+
+}
+
+#endif //CL_KERNEL
+
+
+
ib_api_status_t
al_join_mcast(
IN const ib_qp_handle_t FUNC_PTR64
h_qp,
@@ -112,6 +356,17 @@
AL_ENTER( AL_DBG_MCAST );
+#ifdef CL_KERNEL
+
+ status =
al_igmp_list_add_group(&p_mcast_req->member_rec.mgid, NULL,FALSE);
+ if (status != IB_SUCCESS)
+ {
+ AL_PRINT_EXIT( TRACE_LEVEL_ERROR,
AL_DBG_ERROR,
+ ("al_join_mcast : al_igmp_list_add_group
FAILED status: %s\n", ib_get_err_str(status)) );
+ return status;
+ }
+
+#endif
/*
* Validate the port GUID. There is no need to validate the pkey
index as
* the user could change it later to make it invalid. There is also
no
@@ -468,6 +723,9 @@
ib_mcast_rec_t mcast_rec;
boolean_t sync;
+#if defined( CL_KERNEL )
+ boolean_t skip_attach;
+#endif
AL_ENTER( AL_DBG_MCAST );
h_mcast = PARENT_STRUCT( p_item, ib_mcast_t, async );
@@ -497,13 +755,29 @@
/* Ensure that the user wants the join operation to
proceed. */
if( h_mcast->state == SA_REG_STARTING )
{
+#if defined( CL_KERNEL )
+ /* It's a IPoIB join callback - check for
attach */
I think this would be cleaner if the caller just specified whether or not to
attach/detach as part of the join/leave request. You could probably simplify
(eliminate) a lot of this list/state tracking by doing that. Give the control
to the ULP rather than pushing this sort of policy into IBAL.
+ skip_attach =
al_igmp_list_skip_attach(&h_mcast->member_rec.mgid);
+ if (skip_attach)
+ {
+ AL_PRINT(
TRACE_LEVEL_INFORMATION, AL_DBG_MCAST,
+ ("ATTACH Skipped\n") );
+ }
+
+ /* It's a IPoIB - change status to JOINED and
inc ref count*/
+
al_igmp_list_change_status_to_joined(&h_mcast->member_rec.mgid);
+#endif
/*
* Change the state here so that we avoid
trying to cancel
* the request if the verb operation fails.
*/
h_mcast->state = SA_REG_ACTIVE;
/* Attach the QP to the multicast group. */
-
if(ib_member_get_state(mcast_rec.p_member_rec->scope_state) ==
IB_MC_REC_STATE_FULL_MEMBER)
+ if(
+#if defined( CL_KERNEL )
+ (!skip_attach) &&
+#endif
This is why #ifdefs in functions make the code hard to read. You're separating
code out from the middle of an if() statement.
+
(ib_member_get_state(mcast_rec.p_member_rec->scope_state) ==
IB_MC_REC_STATE_FULL_MEMBER))
{
status =
verbs_attach_mcast(h_mcast);
if( status != IB_SUCCESS )
@@ -559,7 +833,15 @@
("IB_INVALID_MCAST_HANDLE\n") );
return IB_INVALID_MCAST_HANDLE;
}
+#if defined( CL_KERNEL )
+ AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_MCAST,
+ ("ib_leave_mcast KERNEL:IP :
%d.%d.%d.%d\n",0xE0 | h_mcast->member_rec.mgid.multicast.raw_group_id[10],
+
h_mcast->member_rec.mgid.multicast.raw_group_id[11],h_mcast->member_rec.mgid.mul
ticast.raw_group_id[12],
+
h_mcast->member_rec.mgid.multicast.raw_group_id[13]));
+ al_igmp_list_remove_record(&h_mcast->member_rec.mgid);
+
+#endif
/* Record that we're already leaving the multicast group. */
ref_al_obj( &h_mcast->obj );
h_mcast->obj.pfn_destroy( &h_mcast->obj, pfn_destroy_cb );
@@ -647,6 +929,8 @@
h_attach->obj.pfn_destroy( &h_attach->obj, NULL );
return status;
}
+ /* called from user mode MC callback. By calling the function below we
indicate that user mode(MCE) is joined and attached*/
+ al_igmp_list_change_status_to_joined(p_mcast_gid);
/* The proxy will release the reference taken in init_al_obj. */
*ph_attach = h_attach;
Index: core/al/ib_common.h
===================================================================
--- core/al/ib_common.h (revision 1261)
+++ core/al/ib_common.h (working copy)
@@ -35,6 +35,7 @@
#include <complib/cl_types.h>
+#include <complib/cl_qlist.h>
#include <iba/ib_types.h>
@@ -47,4 +48,29 @@
IN ib_ca_attr_t* const
p_dest,
IN const ib_ca_attr_t* const
p_src );
+typedef enum _JOIN_STATE
+{
+ STATE_NOT_JOINED = 0,
+ STATE_JOINED
+}JOIN_STATE;
+
+typedef enum
+{
+ JOIN_SRC_MCE = 1,
+ JOIN_SRC_IPOIB
+}JOIN_SRC;
+
+typedef struct _JOIN_RECORD
+{
+ cl_list_item_t entry;
+ ib_gid_t mgid;
+ ib_qp_handle_t h_qp;
+ JOIN_STATE j_state;
+ int32_t ref_cnt;
+ JOIN_SRC src_req;
+ uint64_t time_stamp;
+}join_record_t;
+
+void al_igmp_list_remove_record(const ib_gid_t *p_mgid);
+ib_api_status_t al_igmp_list_add_group(const ib_gid_t *p_mgid, const
ib_qp_handle_t h_qp, boolean_t is_user);
#endif /* __IB_COMMON_H__ */
Index: core/al/kernel/al_proxy_subnet.c
===================================================================
--- core/al/kernel/al_proxy_subnet.c (revision 1261)
+++ core/al/kernel/al_proxy_subnet.c (working copy)
@@ -289,7 +289,22 @@
p_sa_req->pfn_sa_req_cb = __proxy_sa_req_cb;
p_ioctl->in.sa_req.p_attr = p_ioctl->in.attr;
+ if ((p_ioctl->in.sa_req.attr_id == IB_MAD_ATTR_MCMEMBER_RECORD) && \
+ (p_ioctl->in.sa_req.method ==
IB_MAD_METHOD_SET))
+ {
+ status =
al_igmp_list_add_group(&((ib_member_rec_t*)p_ioctl->in.attr)->mgid, NULL,TRUE);
+ if (status != IB_SUCCESS)
+ {
+ AL_PRINT_EXIT( TRACE_LEVEL_ERROR,
AL_DBG_ERROR, ("Failed to add mc group\n") );
+ goto proxy_send_sa_req_err2;
+ }
+ }
+ else if ((p_ioctl->in.sa_req.attr_id == IB_MAD_ATTR_MCMEMBER_RECORD)
&& \
+ (p_ioctl->in.sa_req.method ==
IB_MAD_METHOD_DELETE))
+ {
+
al_igmp_list_remove_record(&((ib_member_rec_t*)p_ioctl->in.attr)->mgid);
+ }
/*
* We never pass the user-mode flag when sending SA requests - the
* I/O manager will perform all synchronization to make this IRP
sync
Slava Strebkov
SW Engineer
Voltaire
099718750
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080612/5ad24193/attachment.html>
More information about the ofw
mailing list