[ofw][patch] mcast garbage collector

Slava Strebkov slavas at voltaire.com
Thu Jun 12 02:49:53 PDT 2008


 

Hi,

Please review mcast garbage collector, which removes old endpoints of
ipoib port.

Rescan time and aging time are parameters and can be configured from
device property page.

SA retry count and timeout are also configurable from registry.

 

Index: core/al/al_common.h

===================================================================

--- core/al/al_common.h (revision 1261)

+++ core/al/al_common.h           (working copy)

@@ -49,6 +49,8 @@

 extern uint32_t              g_ioc_query_retries;

 extern uint32_t              g_ioc_poll_interval;

 

+extern uint32_t             g_mc_destr_retr_timeout;

+extern uint32_t             g_mc_dest_retr_count;

 

 /* Wait operations performed in user-mode must be alertable. */

 #ifdef CL_KERNEL

Index: core/al/al_mcast.c

===================================================================

--- core/al/al_mcast.c     (revision 1261)

+++ core/al/al_mcast.c  (working copy)

@@ -96,6 +96,9 @@

 static void

 __free_attach(

            IN                                             al_obj_t
*p_obj );

+/* Mcast destroy timeout and retry count read from registry */

+uint32_t           g_mc_destr_retr_timeout = 1000;

+uint32_t           g_mc_dest_retr_count    = 10;

 #endif

 

 

@@ -271,8 +274,14 @@

            sa_mad_data.p_attr = &h_mcast->member_rec;

 

            ref_al_obj( &h_mcast->obj );

-           status = al_send_sa_req(

-                       &h_mcast->sa_dereg_req, h_mcast->port_guid, 500,
0, &sa_mad_data, 0 );

+          status = 

+#if defined( CL_KERNEL )

+                      al_send_sa_req(

+                      &h_mcast->sa_dereg_req, h_mcast->port_guid,
g_mc_destr_retr_timeout, g_mc_dest_retr_count, &sa_mad_data, 0 );

+#else

+          al_send_sa_req(

+          &h_mcast->sa_dereg_req, h_mcast->port_guid, 500, 0,
&sa_mad_data, 0 );

+#endif

            if( status != IB_SUCCESS )

                        deref_al_obj( &h_mcast->obj );

 

Index: core/bus/kernel/bus_driver.c

===================================================================

--- core/bus/kernel/bus_driver.c  (revision 1261)

+++ core/bus/kernel/bus_driver.c           (working copy)

@@ -326,7 +326,7 @@

 {

            NTSTATUS
status;

            /* Remember the terminating entry in the table below. */

-           RTL_QUERY_REGISTRY_TABLE           table[10];

+          RTL_QUERY_REGISTRY_TABLE           table[12];

            UNICODE_STRING
param_path;

            UNICODE_STRING
pkeyString;

            UNICODE_STRING
empy_string;

@@ -431,7 +431,19 @@

            table[8].EntryContext = &pkeyString;

            table[8].DefaultType  = REG_SZ;

            table[8].DefaultData  = &empy_string;

-           table[8].DefaultLength = 1024*sizeof(WCHAR);

+          table[9].Flags = RTL_QUERY_REGISTRY_DIRECT;

+          table[9].Name = L"McDestrRetrCnt";

+          table[9].EntryContext = &g_mc_dest_retr_count;

+          table[9].DefaultType = REG_DWORD;

+          table[9].DefaultData = &g_mc_dest_retr_count;

+          table[9].DefaultLength = sizeof(ULONG);

+

+          table[10].Flags = RTL_QUERY_REGISTRY_DIRECT;

+          table[10].Name = L"McDestrRetrTimeout";

+          table[10].EntryContext = &g_mc_destr_retr_timeout;

+          table[10].DefaultType = REG_DWORD;

+          table[10].DefaultData = &g_mc_destr_retr_timeout;

+          table[10].DefaultLength = sizeof(ULONG);

            /* Have at it! */

            status = RtlQueryRegistryValues( RTL_REGISTRY_ABSOLUTE, 

                        param_path.Buffer, table, NULL, NULL );

Index: inc/complib/cl_types.h

===================================================================

--- inc/complib/cl_types.h           (revision 1261)

+++ inc/complib/cl_types.h         (working copy)

@@ -46,7 +46,7 @@

 

 #include <complib/cl_types_osd.h>

 

-

+typedef uint8_t                         net8_t;

 typedef uint16_t                        net16_t;

 typedef uint32_t                        net32_t;

 typedef uint64_t                        net64_t;

Index: inc/kernel/ip_packet.h

===================================================================

--- inc/kernel/ip_packet.h           (revision 1261)

+++ inc/kernel/ip_packet.h         (working copy)

@@ -196,6 +196,7 @@

 #define IP_PROT_IP                             4

 #define IP_PROT_TCP                         6

 #define IP_PROT_UDP                         17

+#define IP_PROT_IGMP                       2

 

 

 #include <complib/cl_packon.h>

@@ -355,7 +356,58 @@

 *********/

 #include <complib/cl_packoff.h>

 

+#define IGMP_V2_MEMBERSHIP_QUERY        0x11

+#define IGMP_V2_MEMBERSHIP_REPORT      0x16

+#define IGMP_V1_MEMBERSHIP_REPORT      0x12     // for backward
compatibility with IGMPv1

+#define IGMP_V2_LEAVE_GROUP                               0x17

 

+

+#include <complib/cl_packon.h>

+/****s* IB Network Drivers/igmp__v2_hdr_t

+* NAME

+*         igmp_v2_hdr_t

+*

+* DESCRIPTION

+*         Defines the IGMPv2 header for IP packets.

+*

+* SYNOPSIS

+*/

+typedef struct _igmp_v2_hdr

+{

+          net8_t               type;

+          net8_t               max_resp_time;

+          net16_t             chksum;

+          net32_t             group_address;

+}         PACK_SUFFIX igmp_v2_hdr_t;

+/*

+* FIELDS

+*         type

+*                     type of IGMPv2 message: query/report/leave

+*

+*         max_resp_time

+*                     The Max Response Time field is meaningful only in
Membership Query

+*                     messages, and specifies the maximum allowed time
before sending a

+*                     responding report in units of 1/10 second.  In
all other messages, it

+*                     is set to zero by the sender and ignored by
receivers.

+*

+*         checksum

+*                     The checksum is the 16-bit one's complement of
the one's complement

+*         sum of the whole IGMP message (the entire IP payload).  

+*

+*         group_address

+*                     In a Membership Query message, the group address
field is set to zero

+*       when sending a General Query, and set to the group address
being

+*       queried when sending a Group-Specific Query.

+*

+*       In a Membership Report or Leave Group message, the group
address

+*       field holds the IP multicast group address of the group being

+*       reported or left.

+*

+* SEE ALSO

+*         IB Network Drivers, eth_hdr_t, arp_pkt_t, ip_hdr_t, tcp_hdr_t

+*********/

+#include <complib/cl_packoff.h>

+

 #define DHCP_PORT_SERVER             CL_HTON16(67)

 #define DHCP_PORT_CLIENT              CL_HTON16(68)

 

Index: ulp/ipoib/kernel/ipoib_adapter.h

===================================================================

--- ulp/ipoib/kernel/ipoib_adapter.h          (revision 1261)

+++ ulp/ipoib/kernel/ipoib_adapter.h       (working copy)

@@ -74,6 +74,9 @@

            uint32_t payload_mtu;

            uint32_t xfer_block_size;

            mac_addr_t       conf_mac;

+    boolean_t    mc_garbage_collector;

+          uint32_t mc_leave_rescan;

+          uint32_t mc_aging_time;

 

 }          ipoib_params_t;

 /*

Index: ulp/ipoib/kernel/ipoib_driver.c

===================================================================

--- ulp/ipoib/kernel/ipoib_driver.c (revision 1261)

+++ ulp/ipoib/kernel/ipoib_driver.c          (working copy)

@@ -526,6 +526,38 @@

            }

            p_adapter->params.recv_pool_ratio =
p_param->ParameterData.IntegerData;

 

+    /* Required: MC garbage collector. */

+    RtlInitUnicodeString( &keyword, L"MCGarbageCollector" );

+    NdisReadConfiguration(

+        &status, &p_param, h_config, &keyword, NdisParameterInteger );

+    if( status != NDIS_STATUS_SUCCESS )

+    {

+        IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,

+            ("MC garbage collector parameter is missing.\n") );

+        return status;

+    }

+    p_adapter->params.mc_garbage_collector =
(p_param->ParameterData.IntegerData != 0);

+          /* Optional: MC leave rescan (sec) for the MC garable
collector thread. */

+          RtlInitUnicodeString( &keyword, L"MCLeaveRescan" );

+          NdisReadConfiguration(

+                      &status, &p_param, h_config, &keyword,
NdisParameterInteger );

+          if( status != NDIS_STATUS_SUCCESS )

+          {

+                      p_adapter->params.mc_leave_rescan = 130;

+          }

+          else

+                      p_adapter->params.mc_leave_rescan =
p_param->ParameterData.IntegerData;

+

+          /* Optional: MC aging time (sec) */

+          RtlInitUnicodeString( &keyword, L"MCAgingTime" );

+          NdisReadConfiguration(

+                      &status, &p_param, h_config, &keyword,
NdisParameterInteger );

+          if( status != NDIS_STATUS_SUCCESS )

+          {

+                      p_adapter->params.mc_aging_time = 260;

+          }

+          else

+                      p_adapter->params.mc_aging_time =
p_param->ParameterData.IntegerData;

            /* required: MTU size. */

            RtlInitUnicodeString( &keyword, L"PayloadMtu" );

            NdisReadConfiguration(

Index: ulp/ipoib/kernel/ipoib_endpoint.h

===================================================================

--- ulp/ipoib/kernel/ipoib_endpoint.h        (revision 1261)

+++ ulp/ipoib/kernel/ipoib_endpoint.h      (working copy)

@@ -61,7 +61,10 @@

 TO_LONG_PTR(           ib_av_handle_t ,                        h_av) ; 

            boolean_t
expired;

            ib_al_ifc_t
*p_ifc;

-

+          uint32_t
mcast_send_timestamp;

+    int32_t                 mcast_count;

+    boolean_t                                        is_mcast_endpoint;

+          boolean_t
is_mcast_listener;

 }          ipoib_endpt_t;

 /*

 * FIELDS

Index: ulp/ipoib/kernel/ipoib_port.c

===================================================================

--- ulp/ipoib/kernel/ipoib_port.c   (revision 1261)

+++ ulp/ipoib/kernel/ipoib_port.c (working copy)

@@ -66,7 +66,7 @@

 ipoib_port_t      *gp_ipoib_port;

 #endif

 

-

+static void __port_do_mcast_garbage(ipoib_port_t *p_port);

 
/***********************************************************************
*******

 *

 * Declarations

@@ -94,6 +94,8 @@

 __port_free(

            IN                                             cl_obj_t*
const                                       p_obj );

 

+static void CL_API __port_mcast_garbage_collector

+          (IN                                            void*
context );

 

 
/***********************************************************************
*******

 *

@@ -290,6 +292,14 @@

            IN         OUT
ipoib_send_desc_t* const           p_desc );

 

 static NDIS_STATUS

+__send_mgr_filter_igmp_v2(

+          IN                                             ipoib_port_t*
const                                 p_port,

+    IN               const    ip_hdr_t* const
p_ip_hdr,

+          IN                                             size_t
iph_options_size,

+          IN                                             NDIS_BUFFER*
p_buf,

+          IN                                             size_t
buf_len );

+

+static NDIS_STATUS

 __send_mgr_filter_udp(

            IN                                             ipoib_port_t*
const                                 p_port,

            IN                     const    ip_hdr_t* const
p_ip_hdr,

@@ -579,6 +589,11 @@

            KeInitializeEvent( &p_port->sa_event, NotificationEvent,
TRUE );

            KeInitializeEvent( &p_port->leave_mcast_event,
NotificationEvent, TRUE );

            

+          p_port->mcast_event_init = FALSE;

+          cl_event_construct(&p_port->mcast_event);

+

+          p_port->mcast_thread_init = FALSE;

+          cl_thread_construct(&p_port->mcast_thread);

            IPOIB_EXIT( IPOIB_DBG_INIT );

 }

 

@@ -653,6 +668,18 @@

                        return status;

            }

 

+    if (p_port->p_adapter->params.mc_garbage_collector) 

+    {

+        /* Initialize multicast garbage collector event */

+        cl_status = cl_event_init(&p_port->mcast_event, TRUE);

+        if( cl_status != CL_SUCCESS )

+        {

+            IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,

+                ("cl_event_init returned %#x\n", cl_status) );

+            return IB_ERROR;

+        }

+        p_port->mcast_event_init = TRUE;

+          }

            /* We only ever destroy from the PnP callback thread. */

            cl_status = cl_obj_init( &p_port->obj, CL_DESTROY_SYNC,

                        __port_destroying, __port_cleanup, __port_free
);

@@ -746,7 +773,26 @@

            CL_ASSERT( p_obj );

 

            p_port = PARENT_STRUCT( p_obj, ipoib_port_t, obj );

+    if (p_port->p_adapter->params.mc_garbage_collector) 

+    {

+        /* Destroy multicast garbage collector thread */

 

+        if(p_port->mcast_thread_init)

+        {

+            CL_ASSERT(p_port->mcast_event_init);

+            cl_event_signal(&p_port->mcast_event);

+

+            cl_thread_destroy(&p_port->mcast_thread);

+            p_port->mcast_thread_init = FALSE;

+        }

+

+        if (p_port->mcast_event_init) 

+        {

+            cl_event_destroy(&p_port->mcast_event);

+            p_port->mcast_event_init = FALSE;

+        }

+    }

+

            __endpt_mgr_destroy( p_port );

            __recv_mgr_destroy( p_port );

            __send_mgr_destroy( p_port );

@@ -2131,6 +2177,30 @@

            p_eth->hdr.src = p_src->mac;

            p_eth->hdr.dst = p_dst->mac;

 

+          /* Check if multicast packet and update endpoint timestamp if
needed */

+

+          if ( ETH_IS_MULTICAST(p_eth->hdr.dst.addr) && 

+                      p_eth->hdr.type == ETH_PROT_TYPE_IP &&

+                      !ETH_IS_BROADCAST(p_eth->hdr.dst.addr) ) 

+          {

+                      /*

+
p_port->p_adapter->params.mc_garbage_collector doesn't

+                                  exist in this context , so we use
p_dst->is_mcast_endpoint

+                                  as indicator for mc_garbage collector
activity ( enable/disable )

+                      */

+        if ( p_dst->is_mcast_endpoint &&

+                                  ++(p_dst->mcast_count) >
IPOIB_MCAST_TIMESTAMP_THRESHOLD) 

+        {

+                                  CL_ASSERT(p_dst->h_mcast != NULL);

+                                  CL_ASSERT(p_dst->is_mcast_endpoint);

+

+            p_dst->mcast_count = 0;

+            p_dst->mcast_send_timestamp = cl_get_time_stamp_sec();

+        }

+

+                      p_eth->hdr.dst.addr[1] = 0;

+                      p_eth->hdr.dst.addr[3] = p_eth->hdr.dst.addr[3] &
0x7f;

+          }

            IPOIB_EXIT( IPOIB_DBG_RECV );

            return IB_SUCCESS;

 }

@@ -3110,6 +3180,26 @@

            if( p_ip_hdr->offset ||

                        p_ip_hdr->prot != IP_PROT_UDP )

            {

+                      /* Check if this packet is IGMP */

+                      if ( p_ip_hdr->prot == IP_PROT_IGMP ) 

+                      {

+                                  /*

+                                      In igmp packet I saw that iph
arrive in 2 NDIS_BUFFERs:

+                                              1. iph

+                                              2. ip options

+                                              So to get the IGMP packet
we need to skip the ip options NDIS_BUFFER

+                                  */

+                                  size_t iph_size_in_bytes =
(p_ip_hdr->ver_hl & 0xf) * 4;

+                                  size_t iph_options_size =
iph_size_in_bytes - buf_len;

+                                  buf_len -= sizeof(ip_hdr_t);

+

+                                  /*

+                                      Could be a case that arrived igmp
packet not from type IGMPv2 ,

+                                              but IGMPv1 or IGMPv3.

+                                              We anyway pass it to
__send_mgr_filter_igmp_v2().

+                                  */

+                                  __send_mgr_filter_igmp_v2(p_port,
p_ip_hdr, iph_options_size, p_buf, buf_len);

+                      }

                        /* Not a UDP packet. */

                        cl_perf_start( SendTcp );

                        status = __send_gen( p_port, p_desc );

@@ -3131,6 +3221,129 @@

 

 

 static NDIS_STATUS

+__send_mgr_filter_igmp_v2(

+          IN                                             ipoib_port_t*
const                                 p_port,

+    IN               const    ip_hdr_t* const
p_ip_hdr,

+          IN                                             size_t
iph_options_size,

+          IN                                             NDIS_BUFFER*
p_buf,

+          IN                                             size_t
buf_len )

+{

+    igmp_v2_hdr_t                     *p_igmp_v2_hdr = NULL;

+          NDIS_STATUS                          endpt_status;

+          ipoib_endpt_t*               p_endpt = NULL;

+          mac_addr_t                               fake_mcast_mac;

+

+          IPOIB_ENTER( IPOIB_DBG_SEND );

+

+    if( !buf_len )

+          {

+                      // To get the IGMP packet we need to skip the ip
options NDIS_BUFFER (if exists)

+                      while ( iph_options_size )

+                      {

+                                  NdisGetNextBuffer( p_buf, &p_buf );

+                                  if( !p_buf )

+                                  {

+                                              IPOIB_PRINT_EXIT(
TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,

+                                                          ("Failed to
get IGMPv2 header buffer.\n") );

+                                              return
NDIS_STATUS_FAILURE;

+                                  }

+                                  NdisQueryBufferSafe( p_buf,
&p_igmp_v2_hdr, &buf_len, NormalPagePriority );

+                                  if( !p_igmp_v2_hdr )

+                                  {

+                                              IPOIB_PRINT_EXIT(
TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,

+                                                          ("Failed to
query IGMPv2 header buffer.\n") );

+                                              return
NDIS_STATUS_FAILURE;

+                                  }

+

+                                  iph_options_size-=buf_len;

+                      }

+        

+                      NdisGetNextBuffer( p_buf, &p_buf );

+                      if( !p_buf )

+                      {

+                                  IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,

+                                              ("Failed to get IGMPv2
header buffer.\n") );

+                                  return NDIS_STATUS_FAILURE;

+                      }

+                      NdisQueryBufferSafe( p_buf, &p_igmp_v2_hdr,
&buf_len, NormalPagePriority );

+                      if( !p_igmp_v2_hdr )

+                      {

+                                  IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,

+                                              ("Failed to query IGMPv2
header buffer.\n") );

+                                  return NDIS_STATUS_FAILURE;

+                      }

+          }

+          else

+          {

+                      p_igmp_v2_hdr = (igmp_v2_hdr_t*)(p_ip_hdr + 1);

+          }

+          /* Get the IGMP header length. */

+          if( buf_len < sizeof(igmp_v2_hdr_t) )

+          {

+                      IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,

+                                  ("Buffer not large enough for IGMPv2
packet.\n") );

+                      return NDIS_STATUS_BUFFER_TOO_SHORT;

+          }

+

+          // build fake mac from igmp packet group address

+          fake_mcast_mac.addr[0] = 1;

+    fake_mcast_mac.addr[1] = ((unsigned
char*)&p_igmp_v2_hdr->group_address)[0] & 0x0f;

+    fake_mcast_mac.addr[2] = 0x5E;

+          fake_mcast_mac.addr[3] = ((unsigned
char*)&p_igmp_v2_hdr->group_address)[1];

+          fake_mcast_mac.addr[4] = ((unsigned
char*)&p_igmp_v2_hdr->group_address)[2];

+          fake_mcast_mac.addr[5] = ((unsigned
char*)&p_igmp_v2_hdr->group_address)[3];

+

+          switch ( p_igmp_v2_hdr->type )

+          {

+          case IGMP_V2_MEMBERSHIP_REPORT:

+                      /* 

+                                  This mean that some body open
listener on this group 

+                          Change type of mcast endpt to SEND_RECV
endpt. So mcast garbage collector 

+                                  will not delete this mcast endpt.

+                      */

+        IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,

+                                  ("Catched IGMP_V2_MEMBERSHIP_REPORT
message\n") );

+        endpt_status = __endpt_mgr_ref( p_port, fake_mcast_mac,
&p_endpt );

+                      if ( p_endpt )

+                      {

+                                  cl_obj_lock( &p_port->obj );

+                                  p_endpt->is_mcast_listener = TRUE;

+                                  cl_obj_unlock( &p_port->obj );

+            ipoib_endpt_deref( p_endpt );

+                      }

+                      break;

+

+          case IGMP_V2_LEAVE_GROUP:

+        /* 

+                                  This mean that somebody CLOSE
listener on this group .

+                          Change type of mcast endpt to SEND_ONLY
endpt. So mcast 

+                                  garbage collector will delete this
mcast endpt next time.

+                      */

+        IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,

+                                       ("Catched IGMP_V2_LEAVE_GROUP
message\n") );

+        endpt_status = __endpt_mgr_ref( p_port, fake_mcast_mac,
&p_endpt );

+                      if ( p_endpt )

+                      {

+                                  cl_obj_lock( &p_port->obj );

+                                  p_endpt->is_mcast_listener = FALSE;

+

+                                  cl_obj_unlock( &p_port->obj );

+                                  ipoib_endpt_deref( p_endpt );

+                      }

+

+                      break;

+

+          default:

+        IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,

+                                       ("Send Unknown IGMP message:
0x%x \n", p_igmp_v2_hdr->type ) );

+                      break;

+          }

+

+          IPOIB_EXIT( IPOIB_DBG_SEND );

+          return NDIS_STATUS_SUCCESS;

+}

+

+static NDIS_STATUS

 __send_mgr_filter_udp(

            IN                                             ipoib_port_t*
const                                 p_port,

            IN                     const    ip_hdr_t* const
p_ip_hdr,

@@ -3577,7 +3790,22 @@

                                    return NDIS_STATUS_PENDING;

                        }

            }

+          else if ( p_port->p_adapter->params.mc_garbage_collector &&

+                            status == NDIS_STATUS_SUCCESS && 

+                                    ETH_IS_MULTICAST(
p_eth_hdr->dst.addr ) &&  

+                                    !ETH_IS_BROADCAST(
p_eth_hdr->dst.addr ) )

+          {

+                      CL_ASSERT( (*pp_endpt) );

+                      CL_ASSERT((*pp_endpt)->h_mcast != NULL);

+                      CL_ASSERT((*pp_endpt)->is_mcast_endpoint);

 

+                      if (++((*pp_endpt)->mcast_count) >
IPOIB_MCAST_TIMESTAMP_THRESHOLD) 

+                      {

+                                  (*pp_endpt)->mcast_count = 0;

+                                  (*pp_endpt)->mcast_send_timestamp =
cl_get_time_stamp_sec();

+                      }

+          }

+

            IPOIB_EXIT( IPOIB_DBG_SEND );

            return status;

 }

@@ -4706,6 +4934,8 @@

            ib_query_req_t                          query;

            ib_user_query_t                        info;

            ib_portinfo_record_t       port_rec;

+    cl_status_t             cl_status;

+    BOOLEAN                 success = TRUE;

 

            IPOIB_ENTER( IPOIB_DBG_INIT );

 

@@ -4740,17 +4970,54 @@

            /* reference the object for the multicast query. */

            ipoib_port_ref( p_port, ref_port_up );

 

+    __try

+    {

            status = p_port->p_adapter->p_ifc->query(

                        p_port->p_adapter->h_al, &query,
&p_port->ib_mgr.h_query );

            if( status != IB_SUCCESS )

            {

-                       KeSetEvent( &p_port->sa_event, EVENT_INCREMENT,
FALSE );

-                       ipoib_set_inactive( p_port->p_adapter );

-                       ipoib_port_deref( p_port, ref_port_up );

+            success = FALSE;

                        IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,

                                    ("ib_query returned %s\n", 

 
p_port->p_adapter->p_ifc->get_err_str( status )) );

-                       return;

+            __leave;

+        }

+

+        if (p_port->p_adapter->params.mc_garbage_collector) 

+        {

+            CL_ASSERT(p_port->mcast_event_init);

+            cl_status = cl_event_reset(&p_port->mcast_event);

+            if( cl_status != CL_SUCCESS )

+            {

+                success = FALSE;

+                IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,

+                    ("cl_event_reset returned %#x\n", cl_status) );

+                __leave;

+            }

+

+            cl_status = cl_thread_init(

+                &p_port->mcast_thread, 

+                __port_mcast_garbage_collector, 

+                p_port, 

+                "mcast_garbage");

+            if( cl_status != CL_SUCCESS )

+            {

+                success = FALSE;

+                IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,

+                    ("cl_thread_init returned %#x\n", cl_status) );

+                __leave;

+            }

+            p_port->mcast_thread_init = TRUE;

+        }

+    }

+    __finally

+    {

+        if (!success) 

+        {

+            KeSetEvent( &p_port->sa_event, EVENT_INCREMENT, FALSE );

+            ipoib_set_inactive( p_port->p_adapter );

+            ipoib_port_deref( p_port, ref_port_up );

+        }

            }

 

            IPOIB_EXIT( IPOIB_DBG_INIT );

@@ -5229,6 +5496,16 @@

                        return;

            }

 

+    /* Destroy multicast garbage collector thread */

+

+    if(p_port->p_adapter->params.mc_garbage_collector &&
p_port->mcast_thread_init)

+    {

+        CL_ASSERT(p_port->mcast_event_init);

+        cl_event_signal(&p_port->mcast_event);

+

+        cl_thread_destroy(&p_port->mcast_thread);

+        p_port->mcast_thread_init = FALSE;

+    }

            KeResetEvent(&p_port->leave_mcast_event);

 

            /* Reset all endpoints so we don't flush our ARP cache. */

@@ -5656,6 +5933,15 @@

                                    &p_port->endpt_mgr.lid_endpts,
p_endpt->dlid, &p_endpt->lid_item );

                        CL_ASSERT( p_qitem == &p_endpt->lid_item );

            }

+    /* Add the endpoint to the multicast endpoints list */

+          if ( p_port->p_adapter->params.mc_garbage_collector )

+          {

+                      p_endpt->is_mcast_endpoint = TRUE;

+                      p_endpt->mcast_count = 0;

+                      p_endpt->mcast_send_timestamp =
cl_get_time_stamp_sec();

+          }

+          else

+                      p_endpt->is_mcast_endpoint = FALSE;

            cl_obj_unlock( &p_port->obj );

            

            /* Try to send all pending sends. */

@@ -5712,6 +5998,87 @@

            IPOIB_EXIT( IPOIB_DBG_MCAST );

 }

 

+static void __port_do_mcast_garbage(ipoib_port_t *p_port)

+{

+    const mac_addr_t DEFAULT_MCAST_GROUP = {0x01, 0x00, 0x5e, 0x00,
0x00, 0x01};

+          /* Do garbage collecting... */

 

+          cl_map_item_t   *p_item;

+          ipoib_endpt_t     *p_endpt;

+          cl_qlist_t                        destroy_mc_list;

+          uint8_t                          cnt;

+          const static GC_MAX_LEAVE_NUM = 80;

+          const uint32_t CURRENT_TIME_SEC = cl_get_time_stamp_sec();

 

+          cl_qlist_init( &destroy_mc_list );

 

+          cl_obj_lock( &p_port->obj );

+          cnt = 0;

+          p_item = cl_qmap_head( &p_port->endpt_mgr.mac_endpts );

+          while( (p_item != cl_qmap_end( &p_port->endpt_mgr.mac_endpts
)) && (cnt < GC_MAX_LEAVE_NUM))

+          {

+                      p_endpt = PARENT_STRUCT( p_item, ipoib_endpt_t,
mac_item );

+                      p_item = cl_qmap_next( p_item );

+

+                      /* Check if the current endpoint is an old
multicast item */

+

+                      if( p_endpt->h_mcast && 

+                                  p_endpt->is_mcast_endpoint &&

+                                  (!p_endpt->is_mcast_listener) &&

+                                  ( cl_memcmp( &p_endpt->mac,
&DEFAULT_MCAST_GROUP, sizeof(mac_addr_t) ) &&

+                                  CURRENT_TIME_SEC -
p_endpt->mcast_send_timestamp >
p_port->p_adapter->params.mc_aging_time))

+                      {

+                                  cl_qmap_remove_item(
&p_port->endpt_mgr.mac_endpts,

+                                              &p_endpt->mac_item );

+                                  cl_fmap_remove_item(
&p_port->endpt_mgr.gid_endpts,

+                                              &p_endpt->gid_item );

+

+                                  if( p_endpt->dlid )

+                                  {

+                                              cl_qmap_remove_item(
&p_port->endpt_mgr.lid_endpts,

+
&p_endpt->lid_item );

+                                              p_endpt->dlid = 0;

+                                  }

+

+                                  cl_qlist_insert_tail(

+                                              &destroy_mc_list,
&p_endpt->mac_item.pool_item.list_item );

+                                  cnt++;

+                      }

+          }

+          cl_obj_unlock( &p_port->obj );

+

+          /* Destroy all multicast endpoints now that we have released
the lock. */

+          while( cl_qlist_count( &destroy_mc_list ) )

+          {

+                      p_endpt = PARENT_STRUCT( cl_qlist_head(
&destroy_mc_list ),

+
ipoib_endpt_t, mac_item.pool_item.list_item );

+                      IPOIB_PRINT( TRACE_LEVEL_INFORMATION,
IPOIB_DBG_ENDPT,

+                                  ("mcast garbage collector: destroying
endpoint %02x:%02x:%02x:%02x:%02x:%02x \n", 

+                                              p_endpt->mac.addr[0],

+                                              p_endpt->mac.addr[1],

+                                              p_endpt->mac.addr[2],

+                                              p_endpt->mac.addr[3],

+                                              p_endpt->mac.addr[4],

+                                              p_endpt->mac.addr[5]) );

+

+                      cl_obj_destroy( &PARENT_STRUCT(
cl_qlist_remove_head( &destroy_mc_list ),

+                                  ipoib_endpt_t,
mac_item.pool_item.list_item )->obj );

+          }

+}

+

+static void CL_API __port_mcast_garbage_collector

+          (IN                                            void*
context )

+{

+          cl_status_t status;

+          ipoib_port_t *p_port = context;

+          const uint32_t WAIT_US =
p_port->p_adapter->params.mc_leave_rescan * 1000000;       /*
cl_event_wait_on use usec */

+    IPOIB_ENTER( IPOIB_DBG_ENDPT );

+

+    CL_ASSERT( p_port->p_adapter->params.mc_garbage_collector );

+          

+          while((status =
cl_event_wait_on(&p_port->mcast_event,WAIT_US,FALSE)) != STATUS_SUCCESS)

+          {

+                      __port_do_mcast_garbage(p_port);

+          }

+    IPOIB_EXIT( IPOIB_DBG_ENDPT );

+}

Index: ulp/ipoib/kernel/ipoib_port.h

===================================================================

--- ulp/ipoib/kernel/ipoib_port.h   (revision 1261)

+++ ulp/ipoib/kernel/ipoib_port.h (working copy)

@@ -63,6 +63,7 @@

  */

 #define IPOIB_USE_DMA         1

 

+#define IPOIB_MCAST_TIMESTAMP_THRESHOLD     10000

 

 #define IPOIB_PORT_FROM_PACKET( P )        \

            (((ipoib_port_t**)P->MiniportReservedEx)[0])

@@ -506,6 +507,11 @@

            atomic32_t
endpt_rdr;

 

            atomic32_t
hdr_idx;

+          boolean_t
mcast_event_init;           

+          cl_event_t
mcast_event;                 /* Multicast garabage collector thread
terminate event */

+

+          boolean_t
mcast_thread_init;         

+          cl_thread_t
mcast_thread;               /* Multicast garbage collector thread */

            uint16_t                                     pkey_index;

            ipoib_hdr_t
hdr[1];  /* Must be last! */

 

Index: ulp/ipoib/kernel/netipoib.inf

===================================================================

--- ulp/ipoib/kernel/netipoib.inf    (revision 1261)

+++ ulp/ipoib/kernel/netipoib.inf  (working copy)

@@ -126,6 +126,26 @@

 HKR, Ndi\Params\PayloadMtu,              Min,                  0, "60"

 HKR, Ndi\Params\PayloadMtu,              Max,                 0, "2044"

 

+HKR, Ndi\Params\MCGarbageCollector,            ParamDesc,      0, "MC
garbage collector"

+HKR, Ndi\Params\MCGarbageCollector,            Type,                0,
"enum"

+HKR, Ndi\Params\MCGarbageCollector,            Default, 0, "1"

+HKR, Ndi\Params\MCGarbageCollector,            Optional,           0,
"0"

+HKR, Ndi\Params\MCGarbageCollector\enum,"0",                      0,
"Disabled"

+HKR, Ndi\Params\MCGarbageCollector\enum,"1",                      0,
"Enabled"

+

+HKR, Ndi\Params\MCLeaveRescan,                  ParamDesc,      0, "MC
leave rescan (sec)"

+HKR, Ndi\Params\MCLeaveRescan,                  Type,                0,
"dword"

+HKR, Ndi\Params\MCLeaveRescan,                  Default, 0, "130"

+HKR, Ndi\Params\MCLeaveRescan,                  Optional,           0,
"0"

+HKR, Ndi\Params\MCLeaveRescan,                  Min,
0, "1"

+HKR, Ndi\Params\MCLeaveRescan,                  Max,                 0,
"3600"

+

+HKR, Ndi\Params\MCAgingTime,                      ParamDesc,      0,
"MC aging time (sec)"

+HKR, Ndi\Params\MCAgingTime,                      Type,
0, "dword"

+HKR, Ndi\Params\MCAgingTime,                      Default, 0, "260"

+HKR, Ndi\Params\MCAgingTime,                      Optional,
0, "0"

+HKR, Ndi\Params\MCAgingTime,                      Min,
0, "1"

+HKR, Ndi\Params\MCAgingTime,                      Max,
0, "3600"

 [IpoibService]

 DisplayName     = %IpoibServiceDispName%

 ServiceType     = 1 ;%SERVICE_KERNEL_DRIVER%

 

Slava Strebkov

SW Engineer

Voltaire

099718750

 

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080612/640bbde7/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: mcast_garbage_collector.diff
Type: application/octet-stream
Size: 25961 bytes
Desc: mcast_garbage_collector.diff
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080612/640bbde7/attachment.obj>


More information about the ofw mailing list