[ofw][patch] mcast garbage collector

Sean Hefty sean.hefty at intel.com
Thu Jun 12 10:45:38 PDT 2008


Index: core/al/al_common.h

===================================================================

--- core/al/al_common.h (revision 1261)

+++ core/al/al_common.h           (working copy)

@@ -49,6 +49,8 @@

 extern uint32_t              g_ioc_query_retries;

 extern uint32_t              g_ioc_poll_interval;

 

+extern uint32_t             g_mc_destr_retr_timeout;

+extern uint32_t             g_mc_dest_retr_count;

 

Why the extra 'r' in the first name?  Any reason not to add the 'oy' and 'y' to
these names to clarify their use? 

 

 

 /* Wait operations performed in user-mode must be alertable. */

 #ifdef CL_KERNEL

Index: core/al/al_mcast.c

===================================================================

--- core/al/al_mcast.c     (revision 1261)

+++ core/al/al_mcast.c  (working copy)

@@ -96,6 +96,9 @@

 static void

 __free_attach(

            IN                                             al_obj_t
*p_obj );

+/* Mcast destroy timeout and retry count read from registry */

+uint32_t           g_mc_destr_retr_timeout = 1000;

+uint32_t           g_mc_dest_retr_count    = 10;

 #endif

 

I don't understand why ibal is changing for ipoib multicast handling. 

 

@@ -271,8 +274,14 @@

            sa_mad_data.p_attr = &h_mcast->member_rec;

 

            ref_al_obj( &h_mcast->obj );

-           status = al_send_sa_req(

-                       &h_mcast->sa_dereg_req, h_mcast->port_guid, 500, 0,
&sa_mad_data, 0 );

+          status = 

+#if defined( CL_KERNEL )

+                      al_send_sa_req(

+                      &h_mcast->sa_dereg_req, h_mcast->port_guid,
g_mc_destr_retr_timeout, g_mc_dest_retr_count, &sa_mad_data, 0 );

+#else

+          al_send_sa_req(

+          &h_mcast->sa_dereg_req, h_mcast->port_guid, 500, 0, &sa_mad_data, 0
);

+#endif

 

This is separate from this patch, but can someone explain this part to me?  The
only way joining multicast groups can work is if it's done in a single location,
like the kernel.  Does al_send_sa_req() drop to the kernel and get filtered?

 

 

            if( status != IB_SUCCESS )

                        deref_al_obj( &h_mcast->obj );

 

Index: core/bus/kernel/bus_driver.c

===================================================================

--- core/bus/kernel/bus_driver.c  (revision 1261)

+++ core/bus/kernel/bus_driver.c           (working copy)

@@ -326,7 +326,7 @@

 {

            NTSTATUS
status;

            /* Remember the terminating entry in the table below. */

-           RTL_QUERY_REGISTRY_TABLE           table[10];

+          RTL_QUERY_REGISTRY_TABLE           table[12];

            UNICODE_STRING
param_path;

            UNICODE_STRING
pkeyString;

            UNICODE_STRING
empy_string;

@@ -431,7 +431,19 @@

            table[8].EntryContext = &pkeyString;

            table[8].DefaultType  = REG_SZ;

            table[8].DefaultData  = &empy_string;

-           table[8].DefaultLength = 1024*sizeof(WCHAR);

+          table[9].Flags = RTL_QUERY_REGISTRY_DIRECT;

+          table[9].Name = L"McDestrRetrCnt";

+          table[9].EntryContext = &g_mc_dest_retr_count;

+          table[9].DefaultType = REG_DWORD;

+          table[9].DefaultData = &g_mc_dest_retr_count;

+          table[9].DefaultLength = sizeof(ULONG);

+

+          table[10].Flags = RTL_QUERY_REGISTRY_DIRECT;

+          table[10].Name = L"McDestrRetrTimeout";

 

Registry names should be explicit and spelled out.  This makes me think of
McDonald's.  :-)

 

+          table[10].EntryContext = &g_mc_destr_retr_timeout;

+          table[10].DefaultType = REG_DWORD;

+          table[10].DefaultData = &g_mc_destr_retr_timeout;

+          table[10].DefaultLength = sizeof(ULONG);

            /* Have at it! */

            status = RtlQueryRegistryValues( RTL_REGISTRY_ABSOLUTE, 

                        param_path.Buffer, table, NULL, NULL );

Index: inc/complib/cl_types.h

===================================================================

--- inc/complib/cl_types.h           (revision 1261)

+++ inc/complib/cl_types.h         (working copy)

@@ -46,7 +46,7 @@

 

 #include <complib/cl_types_osd.h>

 

-

+typedef uint8_t                         net8_t;

 

There's no need for this.  A byte is a byte.  Adding this would require changes
throughout the code base to mark which uint8_t are expected to hit the wire,
versus those that do not.

 

 typedef uint16_t                        net16_t;

 typedef uint32_t                        net32_t;

 typedef uint64_t                        net64_t;

Index: inc/kernel/ip_packet.h

===================================================================

--- inc/kernel/ip_packet.h           (revision 1261)

+++ inc/kernel/ip_packet.h         (working copy)

@@ -196,6 +196,7 @@

 #define IP_PROT_IP                             4

 #define IP_PROT_TCP                         6

 #define IP_PROT_UDP                         17

+#define IP_PROT_IGMP                       2

 

 

 #include <complib/cl_packon.h>

@@ -355,7 +356,58 @@

 *********/

 #include <complib/cl_packoff.h>

 

+#define IGMP_V2_MEMBERSHIP_QUERY        0x11

+#define IGMP_V2_MEMBERSHIP_REPORT      0x16

+#define IGMP_V1_MEMBERSHIP_REPORT      0x12     // for backward compatibility
with IGMPv1

+#define IGMP_V2_LEAVE_GROUP                               0x17

 

+

+#include <complib/cl_packon.h>

+/****s* IB Network Drivers/igmp__v2_hdr_t

+* NAME

+*         igmp_v2_hdr_t

+*

+* DESCRIPTION

+*         Defines the IGMPv2 header for IP packets.

+*

+* SYNOPSIS

+*/

+typedef struct _igmp_v2_hdr

+{

+          net8_t               type;

+          net8_t               max_resp_time;

+          net16_t             chksum;

+          net32_t             group_address;

+}         PACK_SUFFIX igmp_v2_hdr_t;

+/*

+* FIELDS

+*         type

+*                     type of IGMPv2 message: query/report/leave

+*

+*         max_resp_time

+*                     The Max Response Time field is meaningful only in
Membership Query

+*                     messages, and specifies the maximum allowed time before
sending a

+*                     responding report in units of 1/10 second.  In all other
messages, it

+*                     is set to zero by the sender and ignored by receivers.

+*

+*         checksum

+*                     The checksum is the 16-bit one's complement of the one's
complement

+*         sum of the whole IGMP message (the entire IP payload).  

+*

+*         group_address

+*                     In a Membership Query message, the group address field is
set to zero

+*       when sending a General Query, and set to the group address being

+*       queried when sending a Group-Specific Query.

+*

+*       In a Membership Report or Leave Group message, the group address

+*       field holds the IP multicast group address of the group being

+*       reported or left.

+*

+* SEE ALSO

+*         IB Network Drivers, eth_hdr_t, arp_pkt_t, ip_hdr_t, tcp_hdr_t

+*********/

+#include <complib/cl_packoff.h>

+

 #define DHCP_PORT_SERVER             CL_HTON16(67)

 #define DHCP_PORT_CLIENT              CL_HTON16(68)

 

Index: ulp/ipoib/kernel/ipoib_adapter.h

===================================================================

--- ulp/ipoib/kernel/ipoib_adapter.h          (revision 1261)

+++ ulp/ipoib/kernel/ipoib_adapter.h       (working copy)

@@ -74,6 +74,9 @@

            uint32_t payload_mtu;

            uint32_t xfer_block_size;

            mac_addr_t       conf_mac;

+    boolean_t    mc_garbage_collector;

+          uint32_t mc_leave_rescan;

+          uint32_t mc_aging_time;

 

 }          ipoib_params_t;

 /*

Index: ulp/ipoib/kernel/ipoib_driver.c

===================================================================

--- ulp/ipoib/kernel/ipoib_driver.c (revision 1261)

+++ ulp/ipoib/kernel/ipoib_driver.c          (working copy)

@@ -526,6 +526,38 @@

            }

            p_adapter->params.recv_pool_ratio =
p_param->ParameterData.IntegerData;

 

+    /* Required: MC garbage collector. */

+    RtlInitUnicodeString( &keyword, L"MCGarbageCollector" );

+    NdisReadConfiguration(

+        &status, &p_param, h_config, &keyword, NdisParameterInteger );

+    if( status != NDIS_STATUS_SUCCESS )

+    {

+        IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,

+            ("MC garbage collector parameter is missing.\n") );

+        return status;

+    }

 

Why make this required?  Just default to true or false.

 

+    p_adapter->params.mc_garbage_collector =
(p_param->ParameterData.IntegerData != 0);

+          /* Optional: MC leave rescan (sec) for the MC garable collector
thread. */

+          RtlInitUnicodeString( &keyword, L"MCLeaveRescan" );

 

It's not obvious to me from the name what this parameter does exactly.  We
should try to limit parameters that the user has to configure.

 

+          NdisReadConfiguration(

+                      &status, &p_param, h_config, &keyword,
NdisParameterInteger );

+          if( status != NDIS_STATUS_SUCCESS )

+          {

+                      p_adapter->params.mc_leave_rescan = 130;

+          }

+          else

+                      p_adapter->params.mc_leave_rescan =
p_param->ParameterData.IntegerData;

+

+          /* Optional: MC aging time (sec) */

+          RtlInitUnicodeString( &keyword, L"MCAgingTime" );

 

Same here.  I would rather see a single parameter that indicated how long a
multicast group can be idle before it's removed.  The underlying implementation
can use this to determine when to scan for idle groups.

 

+          NdisReadConfiguration(

+                      &status, &p_param, h_config, &keyword,
NdisParameterInteger );

+          if( status != NDIS_STATUS_SUCCESS )

+          {

+                      p_adapter->params.mc_aging_time = 260;

+          }

+          else

+                      p_adapter->params.mc_aging_time =
p_param->ParameterData.IntegerData;

            /* required: MTU size. */

            RtlInitUnicodeString( &keyword, L"PayloadMtu" );

            NdisReadConfiguration(

Index: ulp/ipoib/kernel/ipoib_endpoint.h

===================================================================

--- ulp/ipoib/kernel/ipoib_endpoint.h        (revision 1261)

+++ ulp/ipoib/kernel/ipoib_endpoint.h      (working copy)

@@ -61,7 +61,10 @@

 TO_LONG_PTR(           ib_av_handle_t ,                        h_av) ; 

            boolean_t                                              expired;

            ib_al_ifc_t                                             *p_ifc;

-

+          uint32_t                                     mcast_send_timestamp;

+    int32_t                 mcast_count;

+    boolean_t                                        is_mcast_endpoint;

+          boolean_t
is_mcast_listener;

 }          ipoib_endpt_t;

 /*

 * FIELDS

Index: ulp/ipoib/kernel/ipoib_port.c

===================================================================

--- ulp/ipoib/kernel/ipoib_port.c   (revision 1261)

+++ ulp/ipoib/kernel/ipoib_port.c (working copy)

@@ -66,7 +66,7 @@

 ipoib_port_t      *gp_ipoib_port;

 #endif

 

-

+static void __port_do_mcast_garbage(ipoib_port_t *p_port);

 /******************************************************************************

 *

 * Declarations

@@ -94,6 +94,8 @@

 __port_free(

            IN                                             cl_obj_t* const
p_obj );

 

+static void CL_API __port_mcast_garbage_collector

+          (IN                                            void*
context );

 

 /******************************************************************************

 *

@@ -290,6 +292,14 @@

            IN         OUT                              ipoib_send_desc_t* const
p_desc );

 

 static NDIS_STATUS

+__send_mgr_filter_igmp_v2(

+          IN                                             ipoib_port_t* const
p_port,

+    IN               const    ip_hdr_t* const
p_ip_hdr,

+          IN                                             size_t
iph_options_size,

+          IN                                             NDIS_BUFFER*
p_buf,

+          IN                                             size_t
buf_len );

+

 

Unclear how this relates to garbage collection.

 

+static NDIS_STATUS

 __send_mgr_filter_udp(

            IN                                             ipoib_port_t* const
p_port,

            IN                     const    ip_hdr_t* const
p_ip_hdr,

@@ -579,6 +589,11 @@

            KeInitializeEvent( &p_port->sa_event, NotificationEvent, TRUE );

            KeInitializeEvent( &p_port->leave_mcast_event, NotificationEvent,
TRUE );

            

+          p_port->mcast_event_init = FALSE;

+          cl_event_construct(&p_port->mcast_event);

+

+          p_port->mcast_thread_init = FALSE;

+          cl_thread_construct(&p_port->mcast_thread);

            IPOIB_EXIT( IPOIB_DBG_INIT );

 }

 

@@ -653,6 +668,18 @@

                        return status;

            }

 

+    if (p_port->p_adapter->params.mc_garbage_collector) 

+    {

+        /* Initialize multicast garbage collector event */

+        cl_status = cl_event_init(&p_port->mcast_event, TRUE);

+        if( cl_status != CL_SUCCESS )

+        {

+            IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,

+                ("cl_event_init returned %#x\n", cl_status) );

+            return IB_ERROR;

+        }

+        p_port->mcast_event_init = TRUE;

+          }

            /* We only ever destroy from the PnP callback thread. */

            cl_status = cl_obj_init( &p_port->obj, CL_DESTROY_SYNC,

                        __port_destroying, __port_cleanup, __port_free );

@@ -746,7 +773,26 @@

            CL_ASSERT( p_obj );

 

            p_port = PARENT_STRUCT( p_obj, ipoib_port_t, obj );

+    if (p_port->p_adapter->params.mc_garbage_collector) 

+    {

+        /* Destroy multicast garbage collector thread */

 

+        if(p_port->mcast_thread_init)

+        {

+            CL_ASSERT(p_port->mcast_event_init);

+            cl_event_signal(&p_port->mcast_event);

+

+            cl_thread_destroy(&p_port->mcast_thread);

+            p_port->mcast_thread_init = FALSE;

+        }

+

+        if (p_port->mcast_event_init) 

+        {

+            cl_event_destroy(&p_port->mcast_event);

+            p_port->mcast_event_init = FALSE;

+        }

+    }

 

Do we have access to some generic thread work queue that we can post work items
to, rather than allocating a new thread for this?

 

- Sean

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080612/9d94c25a/attachment.html>


More information about the ofw mailing list