[ofw] [PATCH] mcast garbage collector
    Slava Strebkov 
    slavas at voltaire.com
       
    Mon Apr  7 05:48:02 PDT 2008
    
    
  
Hi, the following code provides garbage collection for mcast listeners
in IPoIB.
 
Index: inc/complib/cl_types.h
===================================================================
--- inc/complib/cl_types.h           (revision 1047)
+++ inc/complib/cl_types.h         (working copy)
@@ -46,7 +46,7 @@
 
 #include <complib/cl_types_osd.h>
 
-
+typedef uint8_t                         net8_t;
 typedef uint16_t                        net16_t;
 typedef uint32_t                        net32_t;
 typedef uint64_t                        net64_t;
Index: inc/kernel/ip_packet.h
===================================================================
--- inc/kernel/ip_packet.h           (revision 1047)
+++ inc/kernel/ip_packet.h         (working copy)
@@ -196,6 +196,7 @@
 #define IP_PROT_IP                             4
 #define IP_PROT_TCP                         6
 #define IP_PROT_UDP                         17
+#define IP_PROT_IGMP                       2
 
 
 #include <complib/cl_packon.h>
@@ -355,6 +356,55 @@
 *********/
 #include <complib/cl_packoff.h>
 
+#define IGMP_V2_MEMBERSHIP_QUERY        0x11
+#define IGMP_V2_MEMBERSHIP_REPORT      0x16
+#define IGMP_V1_MEMBERSHIP_REPORT      0x12     // for backward
compatibility with IGMPv1
+#define IGMP_V2_LEAVE_GROUP                               0x17
+#include <complib/cl_packon.h>
+/****s* IB Network Drivers/igmp__v2_hdr_t
+* NAME
+*         igmp_v2_hdr_t
+*
+* DESCRIPTION
+*         Defines the IGMPv2 header for IP packets.
+*
+* SYNOPSIS
+*/
+typedef struct _igmp_v2_hdr
+{
+          net8_t               type;
+          net8_t               max_resp_time;
+          net16_t             chksum;
+          net32_t             group_address;
+}         PACK_SUFFIX igmp_v2_hdr_t;
+/*
+* FIELDS
+*         type
+*                     type of IGMPv2 message: query/report/leave
+*
+*         max_resp_time
+*                     The Max Response Time field is meaningful only in
Membership Query
+*                     messages, and specifies the maximum allowed time
before sending a
+*                     responding report in units of 1/10 second.  In
all other messages, it
+*                     is set to zero by the sender and ignored by
receivers.
+*
+*         checksum
+*                     The checksum is the 16-bit one's complement of
the one's complement
+*         sum of the whole IGMP message (the entire IP payload).  
+*
+*         group_address
+*                     In a Membership Query message, the group address
field is set to zero
+*       when sending a General Query, and set to the group address
being
+*       queried when sending a Group-Specific Query.
+*
+*       In a Membership Report or Leave Group message, the group
address
+*       field holds the IP multicast group address of the group being
+*       reported or left.
+*
+* SEE ALSO
+*         IB Network Drivers, eth_hdr_t, arp_pkt_t, ip_hdr_t, tcp_hdr_t
+*********/
+#include <complib/cl_packoff.h>
 
 #define DHCP_PORT_SERVER             CL_HTON16(67)
 #define DHCP_PORT_CLIENT              CL_HTON16(68)
Index: ulp/ipoib/kernel/ipoib_adapter.c
===================================================================
--- ulp/ipoib/kernel/ipoib_adapter.c          (revision 1047)
+++ ulp/ipoib/kernel/ipoib_adapter.c       (working copy)
@@ -762,7 +762,14 @@
                                    if( j != p_adapter->mcast_array_size
)
                                                continue;
 
-                                   ipoib_port_join_mcast( p_port,
p_mac_array[i] ,IB_MC_REC_STATE_FULL_MEMBER);
+                                  // Join to "All hosts mc group" for
IP and to NON-IP MC groups
+                                  if ( ( p_mac_array[i].addr[0] == 1 &&
p_mac_array[i].addr[1] == 0 && p_mac_array[i].addr[2] == 0x5e &&
+                                                 p_mac_array[i].addr[3]
== 0 && p_mac_array[i].addr[4] == 0 && p_mac_array[i].addr[5] == 1 ) ||
+                                                !(
p_mac_array[i].addr[0] == 1 && p_mac_array[i].addr[1] == 0 &&
p_mac_array[i].addr[2] == 0x5e )
+                                              )
+                                  {
+                                              ipoib_port_join_mcast(
p_port, p_mac_array[i], IB_MC_REC_STATE_FULL_MEMBER );
+                                  }
                        }
            }
 
Index: ulp/ipoib/kernel/ipoib_adapter.h
===================================================================
--- ulp/ipoib/kernel/ipoib_adapter.h          (revision 1047)
+++ ulp/ipoib/kernel/ipoib_adapter.h       (working copy)
@@ -74,6 +74,9 @@
            uint32_t payload_mtu;
            uint32_t xfer_block_size;
            mac_addr_t       conf_mac;
+    boolean_t    mc_garbage_collector;
+          uint32_t mc_leave_rescan;
+          uint32_t mc_aging_time;
 
 }          ipoib_params_t;
 /*
Index: ulp/ipoib/kernel/ipoib_driver.c
===================================================================
--- ulp/ipoib/kernel/ipoib_driver.c (revision 1047)
+++ ulp/ipoib/kernel/ipoib_driver.c          (working copy)
@@ -526,6 +526,40 @@
            }
            p_adapter->params.recv_pool_ratio =
p_param->ParameterData.IntegerData;
 
+    /* Required: MC garbage collector. */
+    RtlInitUnicodeString( &keyword, L"MCGarbageCollector" );
+    NdisReadConfiguration(
+        &status, &p_param, h_config, &keyword, NdisParameterInteger );
+    if( status != NDIS_STATUS_SUCCESS )
+    {
+        IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+            ("MC garbage collector parameter is missing.\n") );
+        return status;
+    }
+    p_adapter->params.mc_garbage_collector =
(p_param->ParameterData.IntegerData != 0);
+                                                             
+          /* Optional: MC leave rescan (sec) for the MC garable
collector thread. */
+          RtlInitUnicodeString( &keyword, L"MCLeaveRescan" );
+          NdisReadConfiguration(
+                      &status, &p_param, h_config, &keyword,
NdisParameterInteger );
+          if( status != NDIS_STATUS_SUCCESS )
+          {
+                      p_adapter->params.mc_leave_rescan = 130;
+          }
+          else
+                      p_adapter->params.mc_leave_rescan =
p_param->ParameterData.IntegerData;
+
+          /* Optional: MC aging time (sec) */
+          RtlInitUnicodeString( &keyword, L"MCAgingTime" );
+          NdisReadConfiguration(
+                      &status, &p_param, h_config, &keyword,
NdisParameterInteger );
+          if( status != NDIS_STATUS_SUCCESS )
+          {
+                      p_adapter->params.mc_aging_time = 260;
+          }
+          else
+                      p_adapter->params.mc_aging_time =
p_param->ParameterData.IntegerData;
+
            /* required: MTU size. */
            RtlInitUnicodeString( &keyword, L"PayloadMtu" );
            NdisReadConfiguration(
Index: ulp/ipoib/kernel/ipoib_endpoint.h
===================================================================
--- ulp/ipoib/kernel/ipoib_endpoint.h        (revision 1047)
+++ ulp/ipoib/kernel/ipoib_endpoint.h      (working copy)
@@ -61,7 +61,10 @@
            ib_av_handle_t                          h_av;
            boolean_t
expired;
            ib_al_ifc_t
*p_ifc;
-
+          uint32_t
mcast_send_timestamp;
+    int32_t                 mcast_count;
+    boolean_t                                        is_mcast_endpoint;
+          boolean_t
is_mcast_listener;
 }          ipoib_endpt_t;
 /*
 * FIELDS
Index: ulp/ipoib/kernel/ipoib_port.c
===================================================================
--- ulp/ipoib/kernel/ipoib_port.c   (revision 1047)
+++ ulp/ipoib/kernel/ipoib_port.c (working copy)
@@ -94,6 +94,8 @@
 __port_free(
            IN                                             cl_obj_t*
const                                       p_obj );
 
+static void CL_API __port_mcast_garbage_collector
+          (IN                                            void*
context );
 
 
/***********************************************************************
*******
 *
@@ -290,6 +292,14 @@
            IN         OUT
ipoib_send_desc_t* const           p_desc );
 
 static NDIS_STATUS
+__send_mgr_filter_igmp_v2(
+          IN                                             ipoib_port_t*
const                                 p_port,
+    IN               const    ip_hdr_t* const
p_ip_hdr,
+          IN                                             size_t
iph_options_size,
+          IN                                             NDIS_BUFFER*
p_buf,
+          IN                                             size_t
buf_len );
+
+static NDIS_STATUS
 __send_mgr_filter_udp(
            IN                                             ipoib_port_t*
const                                 p_port,
            IN                     const    ip_hdr_t* const
p_ip_hdr,
@@ -579,6 +589,11 @@
            KeInitializeEvent( &p_port->sa_event, NotificationEvent,
TRUE );
            KeInitializeEvent( &p_port->leave_mcast_event,
NotificationEvent, TRUE );
            
+          p_port->mcast_event_init = FALSE;
+          cl_event_construct(&p_port->mcast_event);
+
+          p_port->mcast_thread_init = FALSE;
+          cl_thread_construct(&p_port->mcast_thread);
            IPOIB_EXIT( IPOIB_DBG_INIT );
 }
 
@@ -653,6 +668,18 @@
                        return status;
            }
 
+    if (p_port->p_adapter->params.mc_garbage_collector) 
+    {
+        /* Initialize multicast garbage collector event */
+        cl_status = cl_event_init(&p_port->mcast_event, TRUE);
+        if( cl_status != CL_SUCCESS )
+        {
+            IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+                ("cl_event_init returned %s\n",
cl_status_text[cl_status]) );
+            return IB_ERROR;
+        }
+        p_port->mcast_event_init = TRUE;
+          }
            /* We only ever destroy from the PnP callback thread. */
            cl_status = cl_obj_init( &p_port->obj, CL_DESTROY_SYNC,
                        __port_destroying, __port_cleanup, __port_free
);
@@ -746,7 +773,26 @@
            CL_ASSERT( p_obj );
 
            p_port = PARENT_STRUCT( p_obj, ipoib_port_t, obj );
+    if (p_port->p_adapter->params.mc_garbage_collector) 
+    {
+        /* Destroy multicast garbage collector thread */
 
+        if(p_port->mcast_thread_init)
+        {
+            CL_ASSERT(p_port->mcast_event_init);
+            cl_event_signal(&p_port->mcast_event);
+
+            cl_thread_destroy(&p_port->mcast_thread);
+            p_port->mcast_thread_init = FALSE;
+        }
+
+        if (p_port->mcast_event_init) 
+        {
+            cl_event_destroy(&p_port->mcast_event);
+            p_port->mcast_event_init = FALSE;
+        }
+    }
+
            __endpt_mgr_destroy( p_port );
            __recv_mgr_destroy( p_port );
            __send_mgr_destroy( p_port );
@@ -2083,6 +2129,30 @@
            p_eth->hdr.src = p_src->mac;
            p_eth->hdr.dst = p_dst->mac;
 
+          /* Check if multicast packet and update endpoint timestamp if
needed */
+
+          if ( ETH_IS_MULTICAST(p_eth->hdr.dst.addr) && 
+                      p_eth->hdr.type == ETH_PROT_TYPE_IP &&
+                      !ETH_IS_BROADCAST(p_eth->hdr.dst.addr) ) 
+          {
+                      /*
+
p_port->p_adapter->params.mc_garbage_collector doesn't
+                                  exist in this context , so we use
p_dst->is_mcast_endpoint
+                                  as indicator for mc_garbage collector
activity ( enable/disable )
+                      */
+        if ( p_dst->is_mcast_endpoint &&
+                                  ++(p_dst->mcast_count) >
IPOIB_MCAST_TIMESTAMP_THRESHOLD) 
+        {
+                                  CL_ASSERT(p_dst->h_mcast != NULL);
+                                  CL_ASSERT(p_dst->is_mcast_endpoint);
+
+            p_dst->mcast_count = 0;
+            p_dst->mcast_send_timestamp = cl_get_time_stamp_sec();
+        }
+
+                      p_eth->hdr.dst.addr[1] = 0;
+                      p_eth->hdr.dst.addr[3] = p_eth->hdr.dst.addr[3] &
0x7f;
+          }
            IPOIB_EXIT( IPOIB_DBG_RECV );
            return IB_SUCCESS;
 }
@@ -3062,6 +3132,26 @@
            if( p_ip_hdr->offset ||
                        p_ip_hdr->prot != IP_PROT_UDP )
            {
+                      /* Check if this packet is IGMP */
+                      if ( p_ip_hdr->prot == IP_PROT_IGMP ) 
+                      {
+                                  /*
+                                      In igmp packet I saw that iph
arrive in 2 NDIS_BUFFERs:
+                                              1. iph
+                                              2. ip options
+                                              So to get the IGMP packet
we need to skip the ip options NDIS_BUFFER
+                                  */
+                                  size_t iph_size_in_bytes =
(p_ip_hdr->ver_hl & 0xf) * 4;
+                                  size_t iph_options_size =
iph_size_in_bytes - buf_len;
+                                  buf_len -= sizeof(ip_hdr_t);
+
+                                  /*
+                                      Could be a case that arrived igmp
packet not from type IGMPv2 ,
+                                              but IGMPv1 or IGMPv3.
+                                              We anyway pass it to
__send_mgr_filter_igmp_v2().
+                                  */
+                                  __send_mgr_filter_igmp_v2(p_port,
p_ip_hdr, iph_options_size, p_buf, buf_len);
+                      }
                        /* Not a UDP packet. */
                        cl_perf_start( SendTcp );
                        status = __send_gen( p_port, p_desc );
@@ -3081,7 +3171,128 @@
            return status;
 }
 
+static NDIS_STATUS
+__send_mgr_filter_igmp_v2(
+          IN                                             ipoib_port_t*
const                                 p_port,
+    IN               const    ip_hdr_t* const
p_ip_hdr,
+          IN                                             size_t
iph_options_size,
+          IN                                             NDIS_BUFFER*
p_buf,
+          IN                                             size_t
buf_len )
+{
+    igmp_v2_hdr_t                     *p_igmp_v2_hdr = NULL;
+          NDIS_STATUS                          endpt_status;
+          ipoib_endpt_t*               p_endpt = NULL;
+          mac_addr_t                               fake_mcast_mac;
 
+          IPOIB_ENTER( IPOIB_DBG_SEND );
+
+    if( !buf_len )
+          {
+                      // To get the IGMP packet we need to skip the ip
options NDIS_BUFFER (if exists)
+                      while ( iph_options_size )
+                      {
+                                  NdisGetNextBuffer( p_buf, &p_buf );
+                                  if( !p_buf )
+                                  {
+                                              IPOIB_PRINT_EXIT(
TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+                                                          ("Failed to
get IGMPv2 header buffer.\n") );
+                                              return
NDIS_STATUS_FAILURE;
+                                  }
+                                  NdisQueryBufferSafe( p_buf,
&p_igmp_v2_hdr, &buf_len, NormalPagePriority );
+                                  if( !p_igmp_v2_hdr )
+                                  {
+                                              IPOIB_PRINT_EXIT(
TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+                                                          ("Failed to
query IGMPv2 header buffer.\n") );
+                                              return
NDIS_STATUS_FAILURE;
+                                  }
+
+                                  iph_options_size-=buf_len;
+                      }
+        
+                      NdisGetNextBuffer( p_buf, &p_buf );
+                      if( !p_buf )
+                      {
+                                  IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
+                                              ("Failed to get IGMPv2
header buffer.\n") );
+                                  return NDIS_STATUS_FAILURE;
+                      }
+                      NdisQueryBufferSafe( p_buf, &p_igmp_v2_hdr,
&buf_len, NormalPagePriority );
+                      if( !p_igmp_v2_hdr )
+                      {
+                                  IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
+                                              ("Failed to query IGMPv2
header buffer.\n") );
+                                  return NDIS_STATUS_FAILURE;
+                      }
+          }
+          else
+          {
+                      p_igmp_v2_hdr = (igmp_v2_hdr_t*)(p_ip_hdr + 1);
+          }
+          /* Get the IGMP header length. */
+          if( buf_len < sizeof(igmp_v2_hdr_t) )
+          {
+                      IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
+                                  ("Buffer not large enough for IGMPv2
packet.\n") );
+                      return NDIS_STATUS_BUFFER_TOO_SHORT;
+          }
+
+          // build fake mac from igmp packet group address
+          fake_mcast_mac.addr[0] = 1;
+    fake_mcast_mac.addr[1] = ((unsigned
char*)&p_igmp_v2_hdr->group_address)[0] & 0x0f;
+    fake_mcast_mac.addr[2] = 0x5E;
+          fake_mcast_mac.addr[3] = ((unsigned
char*)&p_igmp_v2_hdr->group_address)[1];
+          fake_mcast_mac.addr[4] = ((unsigned
char*)&p_igmp_v2_hdr->group_address)[2];
+          fake_mcast_mac.addr[5] = ((unsigned
char*)&p_igmp_v2_hdr->group_address)[3];
+
+          switch ( p_igmp_v2_hdr->type )
+          {
+          case IGMP_V2_MEMBERSHIP_REPORT:
+                      /* 
+                                  This mean that some body open
listener on this group 
+                          Change type of mcast endpt to SEND_RECV
endpt. So mcast garbage collector 
+                                  will not delete this mcast endpt.
+                      */
+        IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,
+                                  ("Catched IGMP_V2_MEMBERSHIP_REPORT
message\n") );
+        endpt_status = __endpt_mgr_ref( p_port, fake_mcast_mac,
&p_endpt );
+                      if ( p_endpt )
+                      {
+                                  cl_obj_lock( &p_port->obj );
+                                  p_endpt->is_mcast_listener = TRUE;
+                                  cl_obj_unlock( &p_port->obj );
+            ipoib_endpt_deref( p_endpt );
+                      }
+                      break;
+
+          case IGMP_V2_LEAVE_GROUP:
+        /* 
+                                  This mean that somebody CLOSE
listener on this group .
+                          Change type of mcast endpt to SEND_ONLY
endpt. So mcast 
+                                  garbage collector will delete this
mcast endpt next time.
+                      */
+        IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,
+                                       ("Catched IGMP_V2_LEAVE_GROUP
message\n") );
+        endpt_status = __endpt_mgr_ref( p_port, fake_mcast_mac,
&p_endpt );
+                      if ( p_endpt )
+                      {
+                                  cl_obj_lock( &p_port->obj );
+                                  p_endpt->is_mcast_listener = FALSE;
+
+                                  cl_obj_unlock( &p_port->obj );
+                                  ipoib_endpt_deref( p_endpt );
+                      }
+                      break;
+
+          default:
+        IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,
+                                       ("Send Unknown IGMP message:
0x%x \n", p_igmp_v2_hdr->type ) );
+                      break;
+          }
+
+          IPOIB_EXIT( IPOIB_DBG_SEND );
+          return NDIS_STATUS_SUCCESS;
+}
+
 static NDIS_STATUS
 __send_mgr_filter_udp(
            IN                                             ipoib_port_t*
const                                 p_port,
@@ -3522,14 +3733,29 @@
                        ETH_IS_MULTICAST( p_eth_hdr->dst.addr ) )
            {
                        if( ipoib_port_join_mcast( p_port,
p_eth_hdr->dst, 
-                                   IB_MC_REC_STATE_SEND_ONLY_MEMBER) ==
IB_SUCCESS )
+                                  IB_MC_REC_STATE_FULL_MEMBER) ==
IB_SUCCESS )
                        {
                                    IPOIB_PRINT_EXIT(
TRACE_LEVEL_INFORMATION, IPOIB_DBG_SEND,
                                                ("Multicast Mac - trying
to join.\n") );
                                    return NDIS_STATUS_PENDING;
                        }
            }
+          else if ( p_port->p_adapter->params.mc_garbage_collector &&
+                            status == NDIS_STATUS_SUCCESS && 
+                                    ETH_IS_MULTICAST(
p_eth_hdr->dst.addr ) &&  
+                                    !ETH_IS_BROADCAST(
p_eth_hdr->dst.addr ) )
+          {
+                      CL_ASSERT( (*pp_endpt) );
+                      CL_ASSERT((*pp_endpt)->h_mcast != NULL);
+                      CL_ASSERT((*pp_endpt)->is_mcast_endpoint);
 
+                      if (++((*pp_endpt)->mcast_count) >
IPOIB_MCAST_TIMESTAMP_THRESHOLD) 
+                      {
+                                  (*pp_endpt)->mcast_count = 0;
+                                  (*pp_endpt)->mcast_send_timestamp =
cl_get_time_stamp_sec();
+                      }
+          }
+
            IPOIB_EXIT( IPOIB_DBG_SEND );
            return status;
 }
@@ -3688,6 +3914,47 @@
                        }
 
                        cl_perf_start( SendMgrQueue );
+#pragma warning(disable:4127)
+                      do{
+        if ( ETH_IS_MULTICAST( p_eth_hdr->dst.addr ) && 
+                                  p_eth_hdr->type == ETH_PROT_TYPE_IP
&&
+                                  !ETH_IS_BROADCAST(
p_eth_hdr->dst.addr ) ) 
+                      {
+            ip_hdr_t                                  *p_ip_hdr;
+                                  NDIS_BUFFER
*p_ip_hdr_buf;
+                                  UINT
ip_hdr_buf_len;
+
+                                  // Extract the ip hdr 
+            NdisGetNextBuffer( p_buf, &p_ip_hdr_buf );
+                                  if( !p_ip_hdr_buf )
+                                  {
+                                              IPOIB_PRINT_EXIT(
TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+                                                          ("Failed to
get IP header buffer.\n") );
+                                              break;
+                                  }
+          
+                                  NdisQueryBufferSafe( p_ip_hdr_buf,
&p_ip_hdr, &ip_hdr_buf_len, NormalPagePriority );
+                                  if( !p_ip_hdr )
+                                  {
+                                              IPOIB_PRINT_EXIT(
TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+                                                          ("Failed to
query IP header buffer.\n") );
+                                              break;
+                                  }
+
+                                  if( ip_hdr_buf_len < sizeof(ip_hdr_t)
)
+                                  {
+                                              /* This buffer is done
for.  Get the next buffer. */
+                                              IPOIB_PRINT_EXIT(
TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+                                                          ("Buffer too
small for IP packet.\n") );
+                                              //return
NDIS_STATUS_BUFFER_TOO_SHORT;
+                                              break;
+                                  }
+                      
+            p_eth_hdr->dst.addr[1] = ((unsigned
char*)&p_ip_hdr->dst_ip)[0] & 0x0f;
+                                  p_eth_hdr->dst.addr[3] = ((unsigned
char*)&p_ip_hdr->dst_ip)[1];
+                      }
+                      }while(FALSE);
+#pragma warning(default:4127)
                        status = __send_mgr_queue( p_port, p_eth_hdr,
&desc.p_endpt );
                        cl_perf_stop( &p_port->p_adapter->perf,
SendMgrQueue );
                        if( status == NDIS_STATUS_PENDING )
@@ -3827,7 +4094,7 @@
                                    if( ETH_IS_MULTICAST(
p_eth_hdr->dst.addr ) )
                                    {
                                                if(
ipoib_port_join_mcast( p_port, p_eth_hdr->dst,
-
IB_MC_REC_STATE_SEND_ONLY_MEMBER) == IB_SUCCESS )
+
IB_MC_REC_STATE_FULL_MEMBER) == IB_SUCCESS )
                                                {
 
IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_SEND,
 
("Multicast Mac - trying to join.\n") );
@@ -4658,6 +4925,8 @@
            ib_query_req_t                          query;
            ib_user_query_t                        info;
            ib_portinfo_record_t       port_rec;
+    cl_status_t             cl_status;
+    BOOLEAN                 success = TRUE;
 
            IPOIB_ENTER( IPOIB_DBG_INIT );
 
@@ -4692,17 +4961,54 @@
            /* reference the object for the multicast query. */
            ipoib_port_ref( p_port, ref_port_up );
 
+    __try
+    {
            status = p_port->p_adapter->p_ifc->query(
                        p_port->p_adapter->h_al, &query,
&p_port->ib_mgr.h_query );
            if( status != IB_SUCCESS )
            {
-                       KeSetEvent( &p_port->sa_event, EVENT_INCREMENT,
FALSE );
-                       ipoib_set_inactive( p_port->p_adapter );
-                       ipoib_port_deref( p_port, ref_port_up );
+            success = FALSE;
                        IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
                                    ("ib_query returned %s\n", 
 
p_port->p_adapter->p_ifc->get_err_str( status )) );
-                       return;
+            __leave;
+        }
+
+        if (p_port->p_adapter->params.mc_garbage_collector) 
+        {
+            CL_ASSERT(p_port->mcast_event_init);
+            cl_status = cl_event_reset(&p_port->mcast_event);
+            if( cl_status != CL_SUCCESS )
+            {
+                success = FALSE;
+                IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+                    ("cl_event_reset returned %s\n",
cl_status_text[cl_status]) );
+                __leave;
+            }
+
+            cl_status = cl_thread_init(
+                &p_port->mcast_thread, 
+                __port_mcast_garbage_collector, 
+                p_port, 
+                "mcast_garbage");
+            if( cl_status != CL_SUCCESS )
+            {
+                success = FALSE;
+                IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+                    ("cl_thread_init returned %s\n",
cl_status_text[cl_status]) );
+                __leave;
+            }
+            p_port->mcast_thread_init = TRUE;
+        }
+    }
+    __finally
+    {
+        if (!success) 
+        {
+            KeSetEvent( &p_port->sa_event, EVENT_INCREMENT, FALSE );
+            ipoib_set_inactive( p_port->p_adapter );
+            ipoib_port_deref( p_port, ref_port_up );
+        }
            }
 
            IPOIB_EXIT( IPOIB_DBG_INIT );
@@ -5176,6 +5482,16 @@
                        return;
            }
 
+    /* Destroy multicast garbage collector thread */
+
+    if(p_port->p_adapter->params.mc_garbage_collector &&
p_port->mcast_thread_init)
+    {
+        CL_ASSERT(p_port->mcast_event_init);
+        cl_event_signal(&p_port->mcast_event);
+
+        cl_thread_destroy(&p_port->mcast_thread);
+        p_port->mcast_thread_init = FALSE;
+    }
            KeResetEvent(&p_port->leave_mcast_event);
 
            /* Reset all endpoints so we don't flush our ARP cache. */
@@ -5437,7 +5753,8 @@
            mcast_req.member_rec.mlid = 0;
            ib_member_set_state(
&mcast_req.member_rec.scope_state,state);
 
-           if( mac.addr[0] == 1 && mac.addr[1] == 0 && mac.addr[2] ==
0x5E )
+
+          if( mac.addr[0] == 1 && mac.addr[2] == 0x5E )
            {
                        /*
                         * Update the address portion of the MGID with
the 28 lower bits of the
@@ -5445,7 +5762,7 @@
                         * the 24 lower bits of that
network-byte-ordered value (assuming MSb
                         * is zero).
                         */
-                       mcast_req.member_rec.mgid.raw[12] = 0;
+                      mcast_req.member_rec.mgid.raw[12] = mac.addr[1];
                        mcast_req.member_rec.mgid.raw[13] = mac.addr[3];
                        mcast_req.member_rec.mgid.raw[14] = mac.addr[4];
                        mcast_req.member_rec.mgid.raw[15] = mac.addr[5];
@@ -5603,6 +5920,15 @@
                                    &p_port->endpt_mgr.lid_endpts,
p_endpt->dlid, &p_endpt->lid_item );
                        CL_ASSERT( p_qitem == &p_endpt->lid_item );
            }
+    /* Add the endpoint to the multicast endpoints list */
+          if ( p_port->p_adapter->params.mc_garbage_collector )
+          {
+                      p_endpt->is_mcast_endpoint = TRUE;
+                      p_endpt->mcast_count = 0;
+                      p_endpt->mcast_send_timestamp =
cl_get_time_stamp_sec();
+          }
+          else
+                      p_endpt->is_mcast_endpoint = FALSE;
            cl_obj_unlock( &p_port->obj );
            
            /* Try to send all pending sends. */
@@ -5659,4 +5985,81 @@
            IPOIB_EXIT( IPOIB_DBG_MCAST );
 }
 
+static void CL_API __port_mcast_garbage_collector
+          (IN                                            void*
context )
+{
+          ipoib_port_t *p_port = context;
+          const uint32_t WAIT_US =
p_port->p_adapter->params.mc_leave_rescan * 1000000;       /*
cl_event_wait_on use usec */
+    const mac_addr_t DEFAULT_MCAST_GROUP = {0x01, 0x00, 0x5e, 0x00,
0x00, 0x01};
 
+    IPOIB_ENTER( IPOIB_DBG_ENDPT );
+
+    CL_ASSERT( p_port->p_adapter->params.mc_garbage_collector );
+
+          while(cl_event_wait_on(&p_port->mcast_event,WAIT_US,FALSE) !=
STATUS_SUCCESS)
+          {
+                      /* Do garbage collecting... */
+
+        cl_map_item_t     *p_item;
+        ipoib_endpt_t       *p_endpt;
+        cl_qlist_t              destroy_mc_list;
+        const uint32_t CURRENT_TIME_SEC = cl_get_time_stamp_sec();
+
+        cl_qlist_init( &destroy_mc_list );
+
+        cl_obj_lock( &p_port->obj );
+
+        p_item = cl_qmap_head( &p_port->endpt_mgr.mac_endpts );
+        while( p_item != cl_qmap_end( &p_port->endpt_mgr.mac_endpts ) )
+        {
+            p_endpt = PARENT_STRUCT( p_item, ipoib_endpt_t, mac_item );
+            p_item = cl_qmap_next( p_item );
+
+            /* Check if the current endpoint is an old multicast item
*/
+
+            if( p_endpt->h_mcast && 
+                p_endpt->is_mcast_endpoint &&
+
(!p_endpt->is_mcast_listener) &&
+                cl_memcmp( &p_endpt->mac, &DEFAULT_MCAST_GROUP,
sizeof(mac_addr_t) ) &&
+                CURRENT_TIME_SEC - p_endpt->mcast_send_timestamp >
p_port->p_adapter->params.mc_aging_time)
+            {
+                cl_qmap_remove_item( &p_port->endpt_mgr.mac_endpts,
+                    &p_endpt->mac_item );
+                cl_fmap_remove_item( &p_port->endpt_mgr.gid_endpts,
+                    &p_endpt->gid_item );
+
+                if( p_endpt->dlid )
+                {
+                    cl_qmap_remove_item( &p_port->endpt_mgr.lid_endpts,
+                        &p_endpt->lid_item );
+                    p_endpt->dlid = 0;
+                }
+
+                cl_qlist_insert_tail(
+                    &destroy_mc_list,
&p_endpt->mac_item.pool_item.list_item );
+            }
+        }
+        cl_obj_unlock( &p_port->obj );
+
+        /* Destroy all multicast endpoints now that we have released
the lock. */
+        while( cl_qlist_count( &destroy_mc_list ) )
+        {
+            p_endpt = PARENT_STRUCT( cl_qlist_head( &destroy_mc_list ),
+                                     ipoib_endpt_t,
mac_item.pool_item.list_item );
+
+            IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_ENDPT,
+                ("mcast garbage collector: destroying endpoint
%02x:%02x:%02x:%02x:%02x:%02x \n", 
+                     p_endpt->mac.addr[0],
+                     p_endpt->mac.addr[1],
+                     p_endpt->mac.addr[2],
+                     p_endpt->mac.addr[3],
+                     p_endpt->mac.addr[4],
+                     p_endpt->mac.addr[5]) );
+
+            cl_obj_destroy( &PARENT_STRUCT( cl_qlist_remove_head(
&destroy_mc_list ),
+                ipoib_endpt_t, mac_item.pool_item.list_item )->obj );
+        }
+          }
+
+    IPOIB_EXIT( IPOIB_DBG_ENDPT );
+}
Index: ulp/ipoib/kernel/ipoib_port.h
===================================================================
--- ulp/ipoib/kernel/ipoib_port.h   (revision 1047)
+++ ulp/ipoib/kernel/ipoib_port.h (working copy)
@@ -63,6 +63,7 @@
  */
 #define IPOIB_USE_DMA         1
 
+#define IPOIB_MCAST_TIMESTAMP_THRESHOLD     10000
 
 #define IPOIB_PORT_FROM_PACKET( P )        \
            (((ipoib_port_t**)P->MiniportReservedEx)[0])
@@ -506,6 +507,11 @@
            atomic32_t
endpt_rdr;
 
            atomic32_t
hdr_idx;
+          boolean_t
mcast_event_init;           /* Not really necessary, since
cl_event_destroy is NULL */
+          cl_event_t
mcast_event;                 /* Multicast garabage collector thread
terminate event */
+
+          boolean_t
mcast_thread_init;         
+          cl_thread_t
mcast_thread;               /* Multicast garbage collector thread */
            ipoib_hdr_t
hdr[1];  /* Must be last! */
 
 }          ipoib_port_t;
Index: ulp/ipoib/kernel/netipoib.inf
===================================================================
--- ulp/ipoib/kernel/netipoib.inf    (revision 1047)
+++ ulp/ipoib/kernel/netipoib.inf  (working copy)
@@ -125,7 +125,28 @@
 HKR, Ndi\Params\PayloadMtu,              Default, 0, "2044"
 HKR, Ndi\Params\PayloadMtu,              Min,                  0, "60"
 HKR, Ndi\Params\PayloadMtu,              Max,                 0, "2044"
+HKR, Ndi\Params\MCGarbageCollector,            ParamDesc,      0, "MC
garbage collector"
+HKR, Ndi\Params\MCGarbageCollector,            Type,                0,
"enum"
+HKR, Ndi\Params\MCGarbageCollector,            Default, 0, "1"
+HKR, Ndi\Params\MCGarbageCollector,            Optional,           0,
"0"
+HKR, Ndi\Params\MCGarbageCollector\enum,"0",                      0,
"Disabled"
+HKR, Ndi\Params\MCGarbageCollector\enum,"1",                      0,
"Enabled"
 
+HKR, Ndi\Params\MCLeaveRescan,                  ParamDesc,      0, "MC
leave rescan (sec)"
+HKR, Ndi\Params\MCLeaveRescan,                  Type,                0,
"dword"
+HKR, Ndi\Params\MCLeaveRescan,                  Default, 0, "130"
+HKR, Ndi\Params\MCLeaveRescan,                  Optional,           0,
"0"
+HKR, Ndi\Params\MCLeaveRescan,                  Min,
0, "1"
+HKR, Ndi\Params\MCLeaveRescan,                  Max,                 0,
"3600"
+
+HKR, Ndi\Params\MCAgingTime,                      ParamDesc,      0,
"MC aging time (sec)"
+HKR, Ndi\Params\MCAgingTime,                      Type,
0, "dword"
+HKR, Ndi\Params\MCAgingTime,                      Default, 0, "260"
+HKR, Ndi\Params\MCAgingTime,                      Optional,
0, "0"
+HKR, Ndi\Params\MCAgingTime,                      Min,
0, "1"
+HKR, Ndi\Params\MCAgingTime,                      Max,
0, "3600"
+HKLM, System\CurrentControlSet\Services\Tcpip\Parameters, IGMPVersion,
0x00010001, 3
+
 [IpoibService]
 DisplayName     = %IpoibServiceDispName%
 ServiceType     = 1 ;%SERVICE_KERNEL_DRIVER%
 
 
 
 
Slava Strebkov
SW Engineer
Voltaire
099718750
 
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080407/6efd9517/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: mcast_garbage_collector.diff
Type: application/octet-stream
Size: 27356 bytes
Desc: mcast_garbage_collector.diff
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080407/6efd9517/attachment.obj>
    
    
More information about the ofw
mailing list