[ofw][patch] kernel igmp support
Slava Strebkov
slavas at voltaire.com
Thu Jun 12 06:53:27 PDT 2008
Hi,
Attached patch designed to work with user mode MCE (multicast
offloading).
Data path is going through user mode dll, IPoIB holds IGMP messages
support and does not perform ATTACH
in order to prevent unnecessary data copying.
Index: core/al/al_init.c
===================================================================
--- core/al/al_init.c (revision 1261)
+++ core/al/al_init.c (working copy)
@@ -49,6 +49,10 @@
uint32_t g_al_dbg_level
= TRACE_LEVEL_ERROR;
uint32_t g_al_dbg_flags
= 0xf0;
+#ifdef CL_KERNEL
+ extern void igmp_list_init();
+ extern void igmp_list_destroy();
+#endif
/*
* Device driver initialization routine.
*/
@@ -111,7 +115,9 @@
("init_al_mgr: status = 0x%x.\n",
status) );
return status;
}
-
+#ifdef CL_KERNEL
+ igmp_list_init();
+#endif
AL_EXIT( AL_DBG_DEV );
return status;
}
@@ -168,6 +174,8 @@
cl_free( gp_async_proc_mgr );
gp_async_proc_mgr = NULL;
}
-
+#ifdef CL_KERNEL
+ igmp_list_destroy();
+#endif
AL_PRINT_EXIT( TRACE_LEVEL_WARNING, AL_DBG_DEV, ("Goodbye
Cruel World =(\n") );
}
Index: core/al/al_mcast.c
===================================================================
--- core/al/al_mcast.c (revision 1261)
+++ core/al/al_mcast.c (working copy)
@@ -96,10 +96,254 @@
static void
__free_attach(
IN al_obj_t
*p_obj );
-#endif
+struct
+{
+ cl_spinlock_t mc_list_lock;
+ cl_qlist_t mc_group_list;
+ LONG initialized;
+}g_mc_g;
+/**********************************************************************
*********************
+* name: igmp_list_init
+* input: no
+* return: void
+* initializes list and spin lock for igmp list maintenance
+***********************************************************************
*********************/
+void igmp_list_init()
+{
+ AL_ENTER( AL_DBG_MCAST );
+ if (0 == InterlockedCompareExchange(&g_mc_g.initialized,1,0))
+ {
+ cl_spinlock_construct( &g_mc_g.mc_list_lock );
+ cl_spinlock_init( &g_mc_g.mc_list_lock );
+ cl_qlist_init( &g_mc_g.mc_group_list );
+ }
+
+ AL_EXIT( AL_DBG_MCAST );
+}
+
+/**********************************************************************
*********************
+* name: igmp_list_destroy
+* input: no
+* return: void
+* Release spin lock of igmp list
+***********************************************************************
*********************/
+void igmp_list_destroy()
+{
+ AL_ENTER( AL_DBG_MCAST );
+
+ CL_ASSERT(0 == cl_qlist_count( &g_mc_g.mc_group_list ));
+
+ if (1 == InterlockedCompareExchange(&g_mc_g.initialized,0,1))
+ {
+ cl_spinlock_destroy( &g_mc_g.mc_list_lock );
+ }
+
+ AL_EXIT( AL_DBG_MCAST );
+}
+
+/**********************************************************************
*********************
+* name: al_igmp_list_add_group
+* input: ib_gid_t, ib_qp_handle_t,boolean_t
+* return: ib_api_status_t
+* Adds MC group description in the list, increments ref count if
exists
+***********************************************************************
*********************/
+ib_api_status_t al_igmp_list_add_group(const ib_gid_t *p_mgid, const
ib_qp_handle_t h_qp, boolean_t is_user)
+ {
+ cl_list_item_t
*pItem;
+ join_record_t
*mc_gr = NULL;
+ boolean_t
found = FALSE;
+ (h_qp);
+
+ AL_ENTER( AL_DBG_MCAST );
+ CL_ASSERT(g_mc_g.initialized == 1);
+
+ cl_spinlock_acquire(&g_mc_g.mc_list_lock);
+ for( pItem = cl_qlist_head( &g_mc_g.mc_group_list
);
+ pItem != cl_qlist_end(
&g_mc_g.mc_group_list );
+ pItem = cl_qlist_next( pItem ) )
+ {
+
+ mc_gr = CONTAINING_RECORD( pItem,
join_record_t, entry );
+ if ( sizeof(mc_gr->mgid.raw) ==
RtlCompareMemory(mc_gr->mgid.raw,p_mgid->raw,sizeof(mc_gr->mgid.raw)))
+ {
+ found = TRUE;
+ break;
+ }
+ }
+ if (found)
+ {
+
cl_spinlock_release(&g_mc_g.mc_list_lock);
+ return IB_SUCCESS;
+ }
+
+
+ mc_gr = cl_zalloc( sizeof(join_record_t) );
+ if (! mc_gr)
+ {
+ AL_PRINT_EXIT( TRACE_LEVEL_ERROR,
AL_DBG_ERROR,
+ ("al_add_mcast : cl_zalloc
failed\n"));
+
cl_spinlock_release(&g_mc_g.mc_list_lock);
+ return IB_INSUFFICIENT_MEMORY;
+ }
+
RtlCopyMemory(&mc_gr->mgid,p_mgid,sizeof(ib_gid_t));
+ mc_gr->h_qp = h_qp;
+ mc_gr->j_state = STATE_NOT_JOINED;
+ mc_gr->src_req = is_user ? JOIN_SRC_MCE :
JOIN_SRC_IPOIB;
+ mc_gr->time_stamp = cl_get_time_stamp();
+ cl_qlist_insert_head(&g_mc_g.mc_group_list,
&mc_gr->entry);
+ cl_spinlock_release(&g_mc_g.mc_list_lock);
+
+ AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_MCAST,
+
("al_igmp_list_add_group : ADDED IP = %d.%d.%d.%d\n",0xE0 |
mc_gr->mgid.multicast.raw_group_id[10],
+
mc_gr->mgid.multicast.raw_group_id[11],mc_gr->mgid.multicast.raw_group_i
d[12],
+
mc_gr->mgid.multicast.raw_group_id[13]));
+
+ AL_EXIT( AL_DBG_MCAST );
+
+ return IB_SUCCESS;
+}
+
+/**********************************************************************
*********************
+* name: al_igmp_list_skip_attach
+* input: ib_gid_t *p_mgid
+* return: boolean_t
+* Checks if p_mgid group already in the list and joined to Mcast
through MCE
+* used in decision to do attach or not
+***********************************************************************
*********************/
+static boolean_t al_igmp_list_skip_attach(const ib_gid_t *p_mgid)
+{
+ join_record_t *mc_gr;
+ cl_list_item_t
*pItem;
+ boolean_t ret = FALSE;
+
+ AL_ENTER( AL_DBG_MCAST );
+ CL_ASSERT(g_mc_g.initialized == 1);
+
+ cl_spinlock_acquire(&g_mc_g.mc_list_lock);
+
+ for( pItem = cl_qlist_head( &g_mc_g.mc_group_list );
+ pItem != cl_qlist_end( &g_mc_g.mc_group_list );
+ pItem = cl_qlist_next( pItem ) )
+ {
+ mc_gr = CONTAINING_RECORD( pItem, join_record_t,
entry );
+ if ( sizeof(mc_gr->mgid.raw) ==
RtlCompareMemory(mc_gr->mgid.raw,p_mgid->raw,sizeof(mc_gr->mgid.raw)))
+ {
+ if ((mc_gr->j_state == STATE_JOINED) &&
(mc_gr->src_req == JOIN_SRC_MCE))
+ {
+ ret = TRUE;
+ AL_PRINT( TRACE_LEVEL_INFORMATION,
AL_DBG_MCAST,
+
("ATTACH Skipped for %d.%d.%d.%d\n",0xE0 |
mc_gr->mgid.multicast.raw_group_id[10],
+
mc_gr->mgid.multicast.raw_group_id[11],mc_gr->mgid.multicast.raw_group_i
d[12],
+
mc_gr->mgid.multicast.raw_group_id[13]));
+
+ break;
+ }
+ }
+ }
+
+ cl_spinlock_release(&g_mc_g.mc_list_lock);
+
+ AL_EXIT( AL_DBG_MCAST );
+
+ return ret;
+}
+
+
+/**********************************************************************
*********************
+* name: al_igmp_list_change_status_to_joined
+* input: ib_gid_t *p_mgid
+* return: void
+* Changes MC status of p_mgid group to JOINED
+***********************************************************************
*********************/
+static void al_igmp_list_change_status_to_joined(const ib_gid_t
*p_mgid)
+{
+ join_record_t *mc_gr;
+ cl_list_item_t
*pItem;
+
+ AL_ENTER( AL_DBG_MCAST );
+ CL_ASSERT(g_mc_g.initialized == 1);
+
+ cl_spinlock_acquire(&g_mc_g.mc_list_lock);
+ for( pItem = cl_qlist_head( &g_mc_g.mc_group_list );
+ pItem != cl_qlist_end( &g_mc_g.mc_group_list );
+ pItem = cl_qlist_next( pItem ) )
+ {
+ mc_gr = CONTAINING_RECORD( pItem, join_record_t,
entry );
+ if ( sizeof(mc_gr->mgid.raw) ==
RtlCompareMemory(mc_gr->mgid.raw,p_mgid->raw,sizeof(mc_gr->mgid.raw)))
+ {
+ mc_gr->ref_cnt++;
+ mc_gr->j_state = STATE_JOINED;
+ AL_PRINT( TRACE_LEVEL_INFORMATION,
AL_DBG_MCAST,
+ ("igmp group
%d.%d.%d.%d set to STATE_JOINED\n",0xE0 |
mc_gr->mgid.multicast.raw_group_id[10],
+
mc_gr->mgid.multicast.raw_group_id[11],mc_gr->mgid.multicast.raw_group_i
d[12],
+
mc_gr->mgid.multicast.raw_group_id[13]));
+
+ break;
+ }
+ }
+ cl_spinlock_release(&g_mc_g.mc_list_lock);
+
+ AL_EXIT( AL_DBG_MCAST );
+}
+
+
+/**********************************************************************
*********************
+* name: al_igmp_list_remove_record
+* input: ib_gid_t *p_mgid
+* return: void
+* Remove MC record with p_mgid from the list if ref count reaches 0
+***********************************************************************
*********************/
+void al_igmp_list_remove_record(const ib_gid_t *p_mgid)
+{
+ join_record_t *mc_gr;
+ cl_list_item_t
*pItem;
+
+ AL_ENTER( AL_DBG_MCAST );
+ CL_ASSERT(g_mc_g.initialized == 1);
+
+ cl_spinlock_acquire(&g_mc_g.mc_list_lock);
+ for( pItem = cl_qlist_head( &g_mc_g.mc_group_list );
+ pItem != cl_qlist_end( &g_mc_g.mc_group_list );
+ pItem = cl_qlist_next( pItem ) )
+ {
+ mc_gr = CONTAINING_RECORD( pItem, join_record_t,
entry );
+ if ( sizeof(mc_gr->mgid.raw) ==
RtlCompareMemory(mc_gr->mgid.raw,p_mgid->raw,sizeof(mc_gr->mgid.raw)))
+ {
+ CL_ASSERT((mc_gr->j_state == STATE_NOT_JOINED)
|| (mc_gr->ref_cnt > 0));
+ if ((--mc_gr->ref_cnt) > 0)
+ {
+ if ((1 ==
mc_gr->ref_cnt)&&(mc_gr->src_req == JOIN_SRC_MCE))
+ {
+ /* last reference
usually igmp through ipoib */
+ mc_gr->src_req =
JOIN_SRC_IPOIB;
+ }
+ break;
+ }
+ else
+ {
+ AL_PRINT( TRACE_LEVEL_INFORMATION,
AL_DBG_MCAST,
+
("igmp group %d.%d.%d.%d removed\n",0xE0 |
mc_gr->mgid.multicast.raw_group_id[10],
+
mc_gr->mgid.multicast.raw_group_id[11],mc_gr->mgid.multicast.raw_group_i
d[12],
+
mc_gr->mgid.multicast.raw_group_id[13]));
+ cl_qlist_remove_item(
&g_mc_g.mc_group_list, &mc_gr->entry );
+ cl_free(mc_gr);
+ }
+ break;
+ }
+ }
+ cl_spinlock_release(&g_mc_g.mc_list_lock);
+
+ AL_EXIT( AL_DBG_MCAST );
+
+}
+
+#endif //CL_KERNEL
+
+
+
ib_api_status_t
al_join_mcast(
IN const ib_qp_handle_t FUNC_PTR64
h_qp,
@@ -112,6 +356,17 @@
AL_ENTER( AL_DBG_MCAST );
+#ifdef CL_KERNEL
+
+ status =
al_igmp_list_add_group(&p_mcast_req->member_rec.mgid, NULL,FALSE);
+ if (status != IB_SUCCESS)
+ {
+ AL_PRINT_EXIT( TRACE_LEVEL_ERROR,
AL_DBG_ERROR,
+ ("al_join_mcast :
al_igmp_list_add_group FAILED status: %s\n", ib_get_err_str(status)) );
+ return status;
+ }
+
+#endif
/*
* Validate the port GUID. There is no need to validate the
pkey index as
* the user could change it later to make it invalid. There
is also no
@@ -468,6 +723,9 @@
ib_mcast_rec_t mcast_rec;
boolean_t sync;
+#if defined( CL_KERNEL )
+ boolean_t skip_attach;
+#endif
AL_ENTER( AL_DBG_MCAST );
h_mcast = PARENT_STRUCT( p_item, ib_mcast_t, async );
@@ -497,13 +755,29 @@
/* Ensure that the user wants the join operation
to proceed. */
if( h_mcast->state == SA_REG_STARTING )
{
+#if defined( CL_KERNEL )
+ /* It's a IPoIB join callback - check
for attach */
+ skip_attach =
al_igmp_list_skip_attach(&h_mcast->member_rec.mgid);
+ if (skip_attach)
+ {
+ AL_PRINT(
TRACE_LEVEL_INFORMATION, AL_DBG_MCAST,
+ ("ATTACH Skipped\n") );
+ }
+
+ /* It's a IPoIB - change status to
JOINED and inc ref count*/
+
al_igmp_list_change_status_to_joined(&h_mcast->member_rec.mgid);
+#endif
/*
* Change the state here so that we
avoid trying to cancel
* the request if the verb operation
fails.
*/
h_mcast->state = SA_REG_ACTIVE;
/* Attach the QP to the multicast
group. */
-
if(ib_member_get_state(mcast_rec.p_member_rec->scope_state) ==
IB_MC_REC_STATE_FULL_MEMBER)
+ if(
+#if defined( CL_KERNEL )
+ (!skip_attach) &&
+#endif
+
(ib_member_get_state(mcast_rec.p_member_rec->scope_state) ==
IB_MC_REC_STATE_FULL_MEMBER))
{
status =
verbs_attach_mcast(h_mcast);
if( status != IB_SUCCESS
)
@@ -559,7 +833,15 @@
("IB_INVALID_MCAST_HANDLE\n") );
return IB_INVALID_MCAST_HANDLE;
}
+#if defined( CL_KERNEL )
+ AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_MCAST,
+ ("ib_leave_mcast KERNEL:IP :
%d.%d.%d.%d\n",0xE0 |
h_mcast->member_rec.mgid.multicast.raw_group_id[10],
+
h_mcast->member_rec.mgid.multicast.raw_group_id[11],h_mcast->member_rec.
mgid.multicast.raw_group_id[12],
+
h_mcast->member_rec.mgid.multicast.raw_group_id[13]));
+ al_igmp_list_remove_record(&h_mcast->member_rec.mgid);
+
+#endif
/* Record that we're already leaving the multicast group. */
ref_al_obj( &h_mcast->obj );
h_mcast->obj.pfn_destroy( &h_mcast->obj, pfn_destroy_cb );
@@ -647,6 +929,8 @@
h_attach->obj.pfn_destroy( &h_attach->obj, NULL
);
return status;
}
+ /* called from user mode MC callback. By calling the function below
we indicate that user mode(MCE) is joined and attached*/
+ al_igmp_list_change_status_to_joined(p_mcast_gid);
/* The proxy will release the reference taken in
init_al_obj. */
*ph_attach = h_attach;
Index: core/al/ib_common.h
===================================================================
--- core/al/ib_common.h (revision 1261)
+++ core/al/ib_common.h (working copy)
@@ -35,6 +35,7 @@
#include <complib/cl_types.h>
+#include <complib/cl_qlist.h>
#include <iba/ib_types.h>
@@ -47,4 +48,29 @@
IN ib_ca_attr_t*
const p_dest,
IN const ib_ca_attr_t* const
p_src );
+typedef enum _JOIN_STATE
+{
+ STATE_NOT_JOINED = 0,
+ STATE_JOINED
+}JOIN_STATE;
+
+typedef enum
+{
+ JOIN_SRC_MCE = 1,
+ JOIN_SRC_IPOIB
+}JOIN_SRC;
+
+typedef struct _JOIN_RECORD
+{
+ cl_list_item_t entry;
+ ib_gid_t mgid;
+ ib_qp_handle_t h_qp;
+ JOIN_STATE j_state;
+ int32_t ref_cnt;
+ JOIN_SRC src_req;
+ uint64_t time_stamp;
+}join_record_t;
+
+void al_igmp_list_remove_record(const ib_gid_t *p_mgid);
+ib_api_status_t al_igmp_list_add_group(const ib_gid_t *p_mgid, const
ib_qp_handle_t h_qp, boolean_t is_user);
#endif /* __IB_COMMON_H__ */
Index: core/al/kernel/al_proxy_subnet.c
===================================================================
--- core/al/kernel/al_proxy_subnet.c (revision 1261)
+++ core/al/kernel/al_proxy_subnet.c (working copy)
@@ -289,7 +289,22 @@
p_sa_req->pfn_sa_req_cb = __proxy_sa_req_cb;
p_ioctl->in.sa_req.p_attr = p_ioctl->in.attr;
+ if ((p_ioctl->in.sa_req.attr_id ==
IB_MAD_ATTR_MCMEMBER_RECORD) && \
+ (p_ioctl->in.sa_req.method ==
IB_MAD_METHOD_SET))
+ {
+ status =
al_igmp_list_add_group(&((ib_member_rec_t*)p_ioctl->in.attr)->mgid,
NULL,TRUE);
+ if (status != IB_SUCCESS)
+ {
+ AL_PRINT_EXIT( TRACE_LEVEL_ERROR,
AL_DBG_ERROR, ("Failed to add mc group\n") );
+ goto proxy_send_sa_req_err2;
+ }
+ }
+ else if ((p_ioctl->in.sa_req.attr_id ==
IB_MAD_ATTR_MCMEMBER_RECORD) && \
+ (p_ioctl->in.sa_req.method ==
IB_MAD_METHOD_DELETE))
+ {
+
al_igmp_list_remove_record(&((ib_member_rec_t*)p_ioctl->in.attr)->mgid);
+ }
/*
* We never pass the user-mode flag when sending SA requests
- the
* I/O manager will perform all synchronization to make this
IRP sync
Slava Strebkov
SW Engineer
Voltaire
099718750
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080612/d590147c/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: igmp_support.diff
Type: application/octet-stream
Size: 14586 bytes
Desc: igmp_support.diff
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080612/d590147c/attachment.obj>
More information about the ofw
mailing list