[ofw] [PATCH] mcast garbage collector

Slava Strebkov slavas at voltaire.com
Mon Apr 7 05:48:02 PDT 2008


Hi, the following code provides garbage collection for mcast listeners
in IPoIB.

 

Index: inc/complib/cl_types.h

===================================================================

--- inc/complib/cl_types.h           (revision 1047)

+++ inc/complib/cl_types.h         (working copy)

@@ -46,7 +46,7 @@

 

 #include <complib/cl_types_osd.h>

 

-

+typedef uint8_t                         net8_t;

 typedef uint16_t                        net16_t;

 typedef uint32_t                        net32_t;

 typedef uint64_t                        net64_t;

Index: inc/kernel/ip_packet.h

===================================================================

--- inc/kernel/ip_packet.h           (revision 1047)

+++ inc/kernel/ip_packet.h         (working copy)

@@ -196,6 +196,7 @@

 #define IP_PROT_IP                             4

 #define IP_PROT_TCP                         6

 #define IP_PROT_UDP                         17

+#define IP_PROT_IGMP                       2

 

 

 #include <complib/cl_packon.h>

@@ -355,6 +356,55 @@

 *********/

 #include <complib/cl_packoff.h>

 

+#define IGMP_V2_MEMBERSHIP_QUERY        0x11

+#define IGMP_V2_MEMBERSHIP_REPORT      0x16

+#define IGMP_V1_MEMBERSHIP_REPORT      0x12     // for backward
compatibility with IGMPv1

+#define IGMP_V2_LEAVE_GROUP                               0x17

+#include <complib/cl_packon.h>

+/****s* IB Network Drivers/igmp__v2_hdr_t

+* NAME

+*         igmp_v2_hdr_t

+*

+* DESCRIPTION

+*         Defines the IGMPv2 header for IP packets.

+*

+* SYNOPSIS

+*/

+typedef struct _igmp_v2_hdr

+{

+          net8_t               type;

+          net8_t               max_resp_time;

+          net16_t             chksum;

+          net32_t             group_address;

+}         PACK_SUFFIX igmp_v2_hdr_t;

+/*

+* FIELDS

+*         type

+*                     type of IGMPv2 message: query/report/leave

+*

+*         max_resp_time

+*                     The Max Response Time field is meaningful only in
Membership Query

+*                     messages, and specifies the maximum allowed time
before sending a

+*                     responding report in units of 1/10 second.  In
all other messages, it

+*                     is set to zero by the sender and ignored by
receivers.

+*

+*         checksum

+*                     The checksum is the 16-bit one's complement of
the one's complement

+*         sum of the whole IGMP message (the entire IP payload).  

+*

+*         group_address

+*                     In a Membership Query message, the group address
field is set to zero

+*       when sending a General Query, and set to the group address
being

+*       queried when sending a Group-Specific Query.

+*

+*       In a Membership Report or Leave Group message, the group
address

+*       field holds the IP multicast group address of the group being

+*       reported or left.

+*

+* SEE ALSO

+*         IB Network Drivers, eth_hdr_t, arp_pkt_t, ip_hdr_t, tcp_hdr_t

+*********/

+#include <complib/cl_packoff.h>

 

 #define DHCP_PORT_SERVER             CL_HTON16(67)

 #define DHCP_PORT_CLIENT              CL_HTON16(68)

Index: ulp/ipoib/kernel/ipoib_adapter.c

===================================================================

--- ulp/ipoib/kernel/ipoib_adapter.c          (revision 1047)

+++ ulp/ipoib/kernel/ipoib_adapter.c       (working copy)

@@ -762,7 +762,14 @@

                                    if( j != p_adapter->mcast_array_size
)

                                                continue;

 

-                                   ipoib_port_join_mcast( p_port,
p_mac_array[i] ,IB_MC_REC_STATE_FULL_MEMBER);

+                                  // Join to "All hosts mc group" for
IP and to NON-IP MC groups

+                                  if ( ( p_mac_array[i].addr[0] == 1 &&
p_mac_array[i].addr[1] == 0 && p_mac_array[i].addr[2] == 0x5e &&

+                                                 p_mac_array[i].addr[3]
== 0 && p_mac_array[i].addr[4] == 0 && p_mac_array[i].addr[5] == 1 ) ||

+                                                !(
p_mac_array[i].addr[0] == 1 && p_mac_array[i].addr[1] == 0 &&
p_mac_array[i].addr[2] == 0x5e )

+                                              )

+                                  {

+                                              ipoib_port_join_mcast(
p_port, p_mac_array[i], IB_MC_REC_STATE_FULL_MEMBER );

+                                  }

                        }

            }

 

Index: ulp/ipoib/kernel/ipoib_adapter.h

===================================================================

--- ulp/ipoib/kernel/ipoib_adapter.h          (revision 1047)

+++ ulp/ipoib/kernel/ipoib_adapter.h       (working copy)

@@ -74,6 +74,9 @@

            uint32_t payload_mtu;

            uint32_t xfer_block_size;

            mac_addr_t       conf_mac;

+    boolean_t    mc_garbage_collector;

+          uint32_t mc_leave_rescan;

+          uint32_t mc_aging_time;

 

 }          ipoib_params_t;

 /*

Index: ulp/ipoib/kernel/ipoib_driver.c

===================================================================

--- ulp/ipoib/kernel/ipoib_driver.c (revision 1047)

+++ ulp/ipoib/kernel/ipoib_driver.c          (working copy)

@@ -526,6 +526,40 @@

            }

            p_adapter->params.recv_pool_ratio =
p_param->ParameterData.IntegerData;

 

+    /* Required: MC garbage collector. */

+    RtlInitUnicodeString( &keyword, L"MCGarbageCollector" );

+    NdisReadConfiguration(

+        &status, &p_param, h_config, &keyword, NdisParameterInteger );

+    if( status != NDIS_STATUS_SUCCESS )

+    {

+        IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,

+            ("MC garbage collector parameter is missing.\n") );

+        return status;

+    }

+    p_adapter->params.mc_garbage_collector =
(p_param->ParameterData.IntegerData != 0);

+                                                             

+          /* Optional: MC leave rescan (sec) for the MC garable
collector thread. */

+          RtlInitUnicodeString( &keyword, L"MCLeaveRescan" );

+          NdisReadConfiguration(

+                      &status, &p_param, h_config, &keyword,
NdisParameterInteger );

+          if( status != NDIS_STATUS_SUCCESS )

+          {

+                      p_adapter->params.mc_leave_rescan = 130;

+          }

+          else

+                      p_adapter->params.mc_leave_rescan =
p_param->ParameterData.IntegerData;

+

+          /* Optional: MC aging time (sec) */

+          RtlInitUnicodeString( &keyword, L"MCAgingTime" );

+          NdisReadConfiguration(

+                      &status, &p_param, h_config, &keyword,
NdisParameterInteger );

+          if( status != NDIS_STATUS_SUCCESS )

+          {

+                      p_adapter->params.mc_aging_time = 260;

+          }

+          else

+                      p_adapter->params.mc_aging_time =
p_param->ParameterData.IntegerData;

+

            /* required: MTU size. */

            RtlInitUnicodeString( &keyword, L"PayloadMtu" );

            NdisReadConfiguration(

Index: ulp/ipoib/kernel/ipoib_endpoint.h

===================================================================

--- ulp/ipoib/kernel/ipoib_endpoint.h        (revision 1047)

+++ ulp/ipoib/kernel/ipoib_endpoint.h      (working copy)

@@ -61,7 +61,10 @@

            ib_av_handle_t                          h_av;

            boolean_t
expired;

            ib_al_ifc_t
*p_ifc;

-

+          uint32_t
mcast_send_timestamp;

+    int32_t                 mcast_count;

+    boolean_t                                        is_mcast_endpoint;

+          boolean_t
is_mcast_listener;

 }          ipoib_endpt_t;

 /*

 * FIELDS

Index: ulp/ipoib/kernel/ipoib_port.c

===================================================================

--- ulp/ipoib/kernel/ipoib_port.c   (revision 1047)

+++ ulp/ipoib/kernel/ipoib_port.c (working copy)

@@ -94,6 +94,8 @@

 __port_free(

            IN                                             cl_obj_t*
const                                       p_obj );

 

+static void CL_API __port_mcast_garbage_collector

+          (IN                                            void*
context );

 

 
/***********************************************************************
*******

 *

@@ -290,6 +292,14 @@

            IN         OUT
ipoib_send_desc_t* const           p_desc );

 

 static NDIS_STATUS

+__send_mgr_filter_igmp_v2(

+          IN                                             ipoib_port_t*
const                                 p_port,

+    IN               const    ip_hdr_t* const
p_ip_hdr,

+          IN                                             size_t
iph_options_size,

+          IN                                             NDIS_BUFFER*
p_buf,

+          IN                                             size_t
buf_len );

+

+static NDIS_STATUS

 __send_mgr_filter_udp(

            IN                                             ipoib_port_t*
const                                 p_port,

            IN                     const    ip_hdr_t* const
p_ip_hdr,

@@ -579,6 +589,11 @@

            KeInitializeEvent( &p_port->sa_event, NotificationEvent,
TRUE );

            KeInitializeEvent( &p_port->leave_mcast_event,
NotificationEvent, TRUE );

            

+          p_port->mcast_event_init = FALSE;

+          cl_event_construct(&p_port->mcast_event);

+

+          p_port->mcast_thread_init = FALSE;

+          cl_thread_construct(&p_port->mcast_thread);

            IPOIB_EXIT( IPOIB_DBG_INIT );

 }

 

@@ -653,6 +668,18 @@

                        return status;

            }

 

+    if (p_port->p_adapter->params.mc_garbage_collector) 

+    {

+        /* Initialize multicast garbage collector event */

+        cl_status = cl_event_init(&p_port->mcast_event, TRUE);

+        if( cl_status != CL_SUCCESS )

+        {

+            IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,

+                ("cl_event_init returned %s\n",
cl_status_text[cl_status]) );

+            return IB_ERROR;

+        }

+        p_port->mcast_event_init = TRUE;

+          }

            /* We only ever destroy from the PnP callback thread. */

            cl_status = cl_obj_init( &p_port->obj, CL_DESTROY_SYNC,

                        __port_destroying, __port_cleanup, __port_free
);

@@ -746,7 +773,26 @@

            CL_ASSERT( p_obj );

 

            p_port = PARENT_STRUCT( p_obj, ipoib_port_t, obj );

+    if (p_port->p_adapter->params.mc_garbage_collector) 

+    {

+        /* Destroy multicast garbage collector thread */

 

+        if(p_port->mcast_thread_init)

+        {

+            CL_ASSERT(p_port->mcast_event_init);

+            cl_event_signal(&p_port->mcast_event);

+

+            cl_thread_destroy(&p_port->mcast_thread);

+            p_port->mcast_thread_init = FALSE;

+        }

+

+        if (p_port->mcast_event_init) 

+        {

+            cl_event_destroy(&p_port->mcast_event);

+            p_port->mcast_event_init = FALSE;

+        }

+    }

+

            __endpt_mgr_destroy( p_port );

            __recv_mgr_destroy( p_port );

            __send_mgr_destroy( p_port );

@@ -2083,6 +2129,30 @@

            p_eth->hdr.src = p_src->mac;

            p_eth->hdr.dst = p_dst->mac;

 

+          /* Check if multicast packet and update endpoint timestamp if
needed */

+

+          if ( ETH_IS_MULTICAST(p_eth->hdr.dst.addr) && 

+                      p_eth->hdr.type == ETH_PROT_TYPE_IP &&

+                      !ETH_IS_BROADCAST(p_eth->hdr.dst.addr) ) 

+          {

+                      /*

+
p_port->p_adapter->params.mc_garbage_collector doesn't

+                                  exist in this context , so we use
p_dst->is_mcast_endpoint

+                                  as indicator for mc_garbage collector
activity ( enable/disable )

+                      */

+        if ( p_dst->is_mcast_endpoint &&

+                                  ++(p_dst->mcast_count) >
IPOIB_MCAST_TIMESTAMP_THRESHOLD) 

+        {

+                                  CL_ASSERT(p_dst->h_mcast != NULL);

+                                  CL_ASSERT(p_dst->is_mcast_endpoint);

+

+            p_dst->mcast_count = 0;

+            p_dst->mcast_send_timestamp = cl_get_time_stamp_sec();

+        }

+

+                      p_eth->hdr.dst.addr[1] = 0;

+                      p_eth->hdr.dst.addr[3] = p_eth->hdr.dst.addr[3] &
0x7f;

+          }

            IPOIB_EXIT( IPOIB_DBG_RECV );

            return IB_SUCCESS;

 }

@@ -3062,6 +3132,26 @@

            if( p_ip_hdr->offset ||

                        p_ip_hdr->prot != IP_PROT_UDP )

            {

+                      /* Check if this packet is IGMP */

+                      if ( p_ip_hdr->prot == IP_PROT_IGMP ) 

+                      {

+                                  /*

+                                      In igmp packet I saw that iph
arrive in 2 NDIS_BUFFERs:

+                                              1. iph

+                                              2. ip options

+                                              So to get the IGMP packet
we need to skip the ip options NDIS_BUFFER

+                                  */

+                                  size_t iph_size_in_bytes =
(p_ip_hdr->ver_hl & 0xf) * 4;

+                                  size_t iph_options_size =
iph_size_in_bytes - buf_len;

+                                  buf_len -= sizeof(ip_hdr_t);

+

+                                  /*

+                                      Could be a case that arrived igmp
packet not from type IGMPv2 ,

+                                              but IGMPv1 or IGMPv3.

+                                              We anyway pass it to
__send_mgr_filter_igmp_v2().

+                                  */

+                                  __send_mgr_filter_igmp_v2(p_port,
p_ip_hdr, iph_options_size, p_buf, buf_len);

+                      }

                        /* Not a UDP packet. */

                        cl_perf_start( SendTcp );

                        status = __send_gen( p_port, p_desc );

@@ -3081,7 +3171,128 @@

            return status;

 }

 

+static NDIS_STATUS

+__send_mgr_filter_igmp_v2(

+          IN                                             ipoib_port_t*
const                                 p_port,

+    IN               const    ip_hdr_t* const
p_ip_hdr,

+          IN                                             size_t
iph_options_size,

+          IN                                             NDIS_BUFFER*
p_buf,

+          IN                                             size_t
buf_len )

+{

+    igmp_v2_hdr_t                     *p_igmp_v2_hdr = NULL;

+          NDIS_STATUS                          endpt_status;

+          ipoib_endpt_t*               p_endpt = NULL;

+          mac_addr_t                               fake_mcast_mac;

 

+          IPOIB_ENTER( IPOIB_DBG_SEND );

+

+    if( !buf_len )

+          {

+                      // To get the IGMP packet we need to skip the ip
options NDIS_BUFFER (if exists)

+                      while ( iph_options_size )

+                      {

+                                  NdisGetNextBuffer( p_buf, &p_buf );

+                                  if( !p_buf )

+                                  {

+                                              IPOIB_PRINT_EXIT(
TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,

+                                                          ("Failed to
get IGMPv2 header buffer.\n") );

+                                              return
NDIS_STATUS_FAILURE;

+                                  }

+                                  NdisQueryBufferSafe( p_buf,
&p_igmp_v2_hdr, &buf_len, NormalPagePriority );

+                                  if( !p_igmp_v2_hdr )

+                                  {

+                                              IPOIB_PRINT_EXIT(
TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,

+                                                          ("Failed to
query IGMPv2 header buffer.\n") );

+                                              return
NDIS_STATUS_FAILURE;

+                                  }

+

+                                  iph_options_size-=buf_len;

+                      }

+        

+                      NdisGetNextBuffer( p_buf, &p_buf );

+                      if( !p_buf )

+                      {

+                                  IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,

+                                              ("Failed to get IGMPv2
header buffer.\n") );

+                                  return NDIS_STATUS_FAILURE;

+                      }

+                      NdisQueryBufferSafe( p_buf, &p_igmp_v2_hdr,
&buf_len, NormalPagePriority );

+                      if( !p_igmp_v2_hdr )

+                      {

+                                  IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,

+                                              ("Failed to query IGMPv2
header buffer.\n") );

+                                  return NDIS_STATUS_FAILURE;

+                      }

+          }

+          else

+          {

+                      p_igmp_v2_hdr = (igmp_v2_hdr_t*)(p_ip_hdr + 1);

+          }

+          /* Get the IGMP header length. */

+          if( buf_len < sizeof(igmp_v2_hdr_t) )

+          {

+                      IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,

+                                  ("Buffer not large enough for IGMPv2
packet.\n") );

+                      return NDIS_STATUS_BUFFER_TOO_SHORT;

+          }

+

+          // build fake mac from igmp packet group address

+          fake_mcast_mac.addr[0] = 1;

+    fake_mcast_mac.addr[1] = ((unsigned
char*)&p_igmp_v2_hdr->group_address)[0] & 0x0f;

+    fake_mcast_mac.addr[2] = 0x5E;

+          fake_mcast_mac.addr[3] = ((unsigned
char*)&p_igmp_v2_hdr->group_address)[1];

+          fake_mcast_mac.addr[4] = ((unsigned
char*)&p_igmp_v2_hdr->group_address)[2];

+          fake_mcast_mac.addr[5] = ((unsigned
char*)&p_igmp_v2_hdr->group_address)[3];

+

+          switch ( p_igmp_v2_hdr->type )

+          {

+          case IGMP_V2_MEMBERSHIP_REPORT:

+                      /* 

+                                  This mean that some body open
listener on this group 

+                          Change type of mcast endpt to SEND_RECV
endpt. So mcast garbage collector 

+                                  will not delete this mcast endpt.

+                      */

+        IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,

+                                  ("Catched IGMP_V2_MEMBERSHIP_REPORT
message\n") );

+        endpt_status = __endpt_mgr_ref( p_port, fake_mcast_mac,
&p_endpt );

+                      if ( p_endpt )

+                      {

+                                  cl_obj_lock( &p_port->obj );

+                                  p_endpt->is_mcast_listener = TRUE;

+                                  cl_obj_unlock( &p_port->obj );

+            ipoib_endpt_deref( p_endpt );

+                      }

+                      break;

+

+          case IGMP_V2_LEAVE_GROUP:

+        /* 

+                                  This mean that somebody CLOSE
listener on this group .

+                          Change type of mcast endpt to SEND_ONLY
endpt. So mcast 

+                                  garbage collector will delete this
mcast endpt next time.

+                      */

+        IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,

+                                       ("Catched IGMP_V2_LEAVE_GROUP
message\n") );

+        endpt_status = __endpt_mgr_ref( p_port, fake_mcast_mac,
&p_endpt );

+                      if ( p_endpt )

+                      {

+                                  cl_obj_lock( &p_port->obj );

+                                  p_endpt->is_mcast_listener = FALSE;

+

+                                  cl_obj_unlock( &p_port->obj );

+                                  ipoib_endpt_deref( p_endpt );

+                      }

+                      break;

+

+          default:

+        IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,

+                                       ("Send Unknown IGMP message:
0x%x \n", p_igmp_v2_hdr->type ) );

+                      break;

+          }

+

+          IPOIB_EXIT( IPOIB_DBG_SEND );

+          return NDIS_STATUS_SUCCESS;

+}

+

 static NDIS_STATUS

 __send_mgr_filter_udp(

            IN                                             ipoib_port_t*
const                                 p_port,

@@ -3522,14 +3733,29 @@

                        ETH_IS_MULTICAST( p_eth_hdr->dst.addr ) )

            {

                        if( ipoib_port_join_mcast( p_port,
p_eth_hdr->dst, 

-                                   IB_MC_REC_STATE_SEND_ONLY_MEMBER) ==
IB_SUCCESS )

+                                  IB_MC_REC_STATE_FULL_MEMBER) ==
IB_SUCCESS )

                        {

                                    IPOIB_PRINT_EXIT(
TRACE_LEVEL_INFORMATION, IPOIB_DBG_SEND,

                                                ("Multicast Mac - trying
to join.\n") );

                                    return NDIS_STATUS_PENDING;

                        }

            }

+          else if ( p_port->p_adapter->params.mc_garbage_collector &&

+                            status == NDIS_STATUS_SUCCESS && 

+                                    ETH_IS_MULTICAST(
p_eth_hdr->dst.addr ) &&  

+                                    !ETH_IS_BROADCAST(
p_eth_hdr->dst.addr ) )

+          {

+                      CL_ASSERT( (*pp_endpt) );

+                      CL_ASSERT((*pp_endpt)->h_mcast != NULL);

+                      CL_ASSERT((*pp_endpt)->is_mcast_endpoint);

 

+                      if (++((*pp_endpt)->mcast_count) >
IPOIB_MCAST_TIMESTAMP_THRESHOLD) 

+                      {

+                                  (*pp_endpt)->mcast_count = 0;

+                                  (*pp_endpt)->mcast_send_timestamp =
cl_get_time_stamp_sec();

+                      }

+          }

+

            IPOIB_EXIT( IPOIB_DBG_SEND );

            return status;

 }

@@ -3688,6 +3914,47 @@

                        }

 

                        cl_perf_start( SendMgrQueue );

+#pragma warning(disable:4127)

+                      do{

+        if ( ETH_IS_MULTICAST( p_eth_hdr->dst.addr ) && 

+                                  p_eth_hdr->type == ETH_PROT_TYPE_IP
&&

+                                  !ETH_IS_BROADCAST(
p_eth_hdr->dst.addr ) ) 

+                      {

+            ip_hdr_t                                  *p_ip_hdr;

+                                  NDIS_BUFFER
*p_ip_hdr_buf;

+                                  UINT
ip_hdr_buf_len;

+

+                                  // Extract the ip hdr 

+            NdisGetNextBuffer( p_buf, &p_ip_hdr_buf );

+                                  if( !p_ip_hdr_buf )

+                                  {

+                                              IPOIB_PRINT_EXIT(
TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,

+                                                          ("Failed to
get IP header buffer.\n") );

+                                              break;

+                                  }

+          

+                                  NdisQueryBufferSafe( p_ip_hdr_buf,
&p_ip_hdr, &ip_hdr_buf_len, NormalPagePriority );

+                                  if( !p_ip_hdr )

+                                  {

+                                              IPOIB_PRINT_EXIT(
TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,

+                                                          ("Failed to
query IP header buffer.\n") );

+                                              break;

+                                  }

+

+                                  if( ip_hdr_buf_len < sizeof(ip_hdr_t)
)

+                                  {

+                                              /* This buffer is done
for.  Get the next buffer. */

+                                              IPOIB_PRINT_EXIT(
TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,

+                                                          ("Buffer too
small for IP packet.\n") );

+                                              //return
NDIS_STATUS_BUFFER_TOO_SHORT;

+                                              break;

+                                  }

+                      

+            p_eth_hdr->dst.addr[1] = ((unsigned
char*)&p_ip_hdr->dst_ip)[0] & 0x0f;

+                                  p_eth_hdr->dst.addr[3] = ((unsigned
char*)&p_ip_hdr->dst_ip)[1];

+                      }

+                      }while(FALSE);

+#pragma warning(default:4127)

                        status = __send_mgr_queue( p_port, p_eth_hdr,
&desc.p_endpt );

                        cl_perf_stop( &p_port->p_adapter->perf,
SendMgrQueue );

                        if( status == NDIS_STATUS_PENDING )

@@ -3827,7 +4094,7 @@

                                    if( ETH_IS_MULTICAST(
p_eth_hdr->dst.addr ) )

                                    {

                                                if(
ipoib_port_join_mcast( p_port, p_eth_hdr->dst,

-
IB_MC_REC_STATE_SEND_ONLY_MEMBER) == IB_SUCCESS )

+
IB_MC_REC_STATE_FULL_MEMBER) == IB_SUCCESS )

                                                {

 
IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_SEND,

 
("Multicast Mac - trying to join.\n") );

@@ -4658,6 +4925,8 @@

            ib_query_req_t                          query;

            ib_user_query_t                        info;

            ib_portinfo_record_t       port_rec;

+    cl_status_t             cl_status;

+    BOOLEAN                 success = TRUE;

 

            IPOIB_ENTER( IPOIB_DBG_INIT );

 

@@ -4692,17 +4961,54 @@

            /* reference the object for the multicast query. */

            ipoib_port_ref( p_port, ref_port_up );

 

+    __try

+    {

            status = p_port->p_adapter->p_ifc->query(

                        p_port->p_adapter->h_al, &query,
&p_port->ib_mgr.h_query );

            if( status != IB_SUCCESS )

            {

-                       KeSetEvent( &p_port->sa_event, EVENT_INCREMENT,
FALSE );

-                       ipoib_set_inactive( p_port->p_adapter );

-                       ipoib_port_deref( p_port, ref_port_up );

+            success = FALSE;

                        IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,

                                    ("ib_query returned %s\n", 

 
p_port->p_adapter->p_ifc->get_err_str( status )) );

-                       return;

+            __leave;

+        }

+

+        if (p_port->p_adapter->params.mc_garbage_collector) 

+        {

+            CL_ASSERT(p_port->mcast_event_init);

+            cl_status = cl_event_reset(&p_port->mcast_event);

+            if( cl_status != CL_SUCCESS )

+            {

+                success = FALSE;

+                IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,

+                    ("cl_event_reset returned %s\n",
cl_status_text[cl_status]) );

+                __leave;

+            }

+

+            cl_status = cl_thread_init(

+                &p_port->mcast_thread, 

+                __port_mcast_garbage_collector, 

+                p_port, 

+                "mcast_garbage");

+            if( cl_status != CL_SUCCESS )

+            {

+                success = FALSE;

+                IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,

+                    ("cl_thread_init returned %s\n",
cl_status_text[cl_status]) );

+                __leave;

+            }

+            p_port->mcast_thread_init = TRUE;

+        }

+    }

+    __finally

+    {

+        if (!success) 

+        {

+            KeSetEvent( &p_port->sa_event, EVENT_INCREMENT, FALSE );

+            ipoib_set_inactive( p_port->p_adapter );

+            ipoib_port_deref( p_port, ref_port_up );

+        }

            }

 

            IPOIB_EXIT( IPOIB_DBG_INIT );

@@ -5176,6 +5482,16 @@

                        return;

            }

 

+    /* Destroy multicast garbage collector thread */

+

+    if(p_port->p_adapter->params.mc_garbage_collector &&
p_port->mcast_thread_init)

+    {

+        CL_ASSERT(p_port->mcast_event_init);

+        cl_event_signal(&p_port->mcast_event);

+

+        cl_thread_destroy(&p_port->mcast_thread);

+        p_port->mcast_thread_init = FALSE;

+    }

            KeResetEvent(&p_port->leave_mcast_event);

 

            /* Reset all endpoints so we don't flush our ARP cache. */

@@ -5437,7 +5753,8 @@

            mcast_req.member_rec.mlid = 0;

            ib_member_set_state(
&mcast_req.member_rec.scope_state,state);

 

-           if( mac.addr[0] == 1 && mac.addr[1] == 0 && mac.addr[2] ==
0x5E )

+

+          if( mac.addr[0] == 1 && mac.addr[2] == 0x5E )

            {

                        /*

                         * Update the address portion of the MGID with
the 28 lower bits of the

@@ -5445,7 +5762,7 @@

                         * the 24 lower bits of that
network-byte-ordered value (assuming MSb

                         * is zero).

                         */

-                       mcast_req.member_rec.mgid.raw[12] = 0;

+                      mcast_req.member_rec.mgid.raw[12] = mac.addr[1];

                        mcast_req.member_rec.mgid.raw[13] = mac.addr[3];

                        mcast_req.member_rec.mgid.raw[14] = mac.addr[4];

                        mcast_req.member_rec.mgid.raw[15] = mac.addr[5];

@@ -5603,6 +5920,15 @@

                                    &p_port->endpt_mgr.lid_endpts,
p_endpt->dlid, &p_endpt->lid_item );

                        CL_ASSERT( p_qitem == &p_endpt->lid_item );

            }

+    /* Add the endpoint to the multicast endpoints list */

+          if ( p_port->p_adapter->params.mc_garbage_collector )

+          {

+                      p_endpt->is_mcast_endpoint = TRUE;

+                      p_endpt->mcast_count = 0;

+                      p_endpt->mcast_send_timestamp =
cl_get_time_stamp_sec();

+          }

+          else

+                      p_endpt->is_mcast_endpoint = FALSE;

            cl_obj_unlock( &p_port->obj );

            

            /* Try to send all pending sends. */

@@ -5659,4 +5985,81 @@

            IPOIB_EXIT( IPOIB_DBG_MCAST );

 }

 

+static void CL_API __port_mcast_garbage_collector

+          (IN                                            void*
context )

+{

+          ipoib_port_t *p_port = context;

+          const uint32_t WAIT_US =
p_port->p_adapter->params.mc_leave_rescan * 1000000;       /*
cl_event_wait_on use usec */

+    const mac_addr_t DEFAULT_MCAST_GROUP = {0x01, 0x00, 0x5e, 0x00,
0x00, 0x01};

 

+    IPOIB_ENTER( IPOIB_DBG_ENDPT );

+

+    CL_ASSERT( p_port->p_adapter->params.mc_garbage_collector );

+

+          while(cl_event_wait_on(&p_port->mcast_event,WAIT_US,FALSE) !=
STATUS_SUCCESS)

+          {

+                      /* Do garbage collecting... */

+

+        cl_map_item_t     *p_item;

+        ipoib_endpt_t       *p_endpt;

+        cl_qlist_t              destroy_mc_list;

+        const uint32_t CURRENT_TIME_SEC = cl_get_time_stamp_sec();

+

+        cl_qlist_init( &destroy_mc_list );

+

+        cl_obj_lock( &p_port->obj );

+

+        p_item = cl_qmap_head( &p_port->endpt_mgr.mac_endpts );

+        while( p_item != cl_qmap_end( &p_port->endpt_mgr.mac_endpts ) )

+        {

+            p_endpt = PARENT_STRUCT( p_item, ipoib_endpt_t, mac_item );

+            p_item = cl_qmap_next( p_item );

+

+            /* Check if the current endpoint is an old multicast item
*/

+

+            if( p_endpt->h_mcast && 

+                p_endpt->is_mcast_endpoint &&

+
(!p_endpt->is_mcast_listener) &&

+                cl_memcmp( &p_endpt->mac, &DEFAULT_MCAST_GROUP,
sizeof(mac_addr_t) ) &&

+                CURRENT_TIME_SEC - p_endpt->mcast_send_timestamp >
p_port->p_adapter->params.mc_aging_time)

+            {

+                cl_qmap_remove_item( &p_port->endpt_mgr.mac_endpts,

+                    &p_endpt->mac_item );

+                cl_fmap_remove_item( &p_port->endpt_mgr.gid_endpts,

+                    &p_endpt->gid_item );

+

+                if( p_endpt->dlid )

+                {

+                    cl_qmap_remove_item( &p_port->endpt_mgr.lid_endpts,

+                        &p_endpt->lid_item );

+                    p_endpt->dlid = 0;

+                }

+

+                cl_qlist_insert_tail(

+                    &destroy_mc_list,
&p_endpt->mac_item.pool_item.list_item );

+            }

+        }

+        cl_obj_unlock( &p_port->obj );

+

+        /* Destroy all multicast endpoints now that we have released
the lock. */

+        while( cl_qlist_count( &destroy_mc_list ) )

+        {

+            p_endpt = PARENT_STRUCT( cl_qlist_head( &destroy_mc_list ),

+                                     ipoib_endpt_t,
mac_item.pool_item.list_item );

+

+            IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_ENDPT,

+                ("mcast garbage collector: destroying endpoint
%02x:%02x:%02x:%02x:%02x:%02x \n", 

+                     p_endpt->mac.addr[0],

+                     p_endpt->mac.addr[1],

+                     p_endpt->mac.addr[2],

+                     p_endpt->mac.addr[3],

+                     p_endpt->mac.addr[4],

+                     p_endpt->mac.addr[5]) );

+

+            cl_obj_destroy( &PARENT_STRUCT( cl_qlist_remove_head(
&destroy_mc_list ),

+                ipoib_endpt_t, mac_item.pool_item.list_item )->obj );

+        }

+          }

+

+    IPOIB_EXIT( IPOIB_DBG_ENDPT );

+}

Index: ulp/ipoib/kernel/ipoib_port.h

===================================================================

--- ulp/ipoib/kernel/ipoib_port.h   (revision 1047)

+++ ulp/ipoib/kernel/ipoib_port.h (working copy)

@@ -63,6 +63,7 @@

  */

 #define IPOIB_USE_DMA         1

 

+#define IPOIB_MCAST_TIMESTAMP_THRESHOLD     10000

 

 #define IPOIB_PORT_FROM_PACKET( P )        \

            (((ipoib_port_t**)P->MiniportReservedEx)[0])

@@ -506,6 +507,11 @@

            atomic32_t
endpt_rdr;

 

            atomic32_t
hdr_idx;

+          boolean_t
mcast_event_init;           /* Not really necessary, since
cl_event_destroy is NULL */

+          cl_event_t
mcast_event;                 /* Multicast garabage collector thread
terminate event */

+

+          boolean_t
mcast_thread_init;         

+          cl_thread_t
mcast_thread;               /* Multicast garbage collector thread */

            ipoib_hdr_t
hdr[1];  /* Must be last! */

 

 }          ipoib_port_t;

Index: ulp/ipoib/kernel/netipoib.inf

===================================================================

--- ulp/ipoib/kernel/netipoib.inf    (revision 1047)

+++ ulp/ipoib/kernel/netipoib.inf  (working copy)

@@ -125,7 +125,28 @@

 HKR, Ndi\Params\PayloadMtu,              Default, 0, "2044"

 HKR, Ndi\Params\PayloadMtu,              Min,                  0, "60"

 HKR, Ndi\Params\PayloadMtu,              Max,                 0, "2044"

+HKR, Ndi\Params\MCGarbageCollector,            ParamDesc,      0, "MC
garbage collector"

+HKR, Ndi\Params\MCGarbageCollector,            Type,                0,
"enum"

+HKR, Ndi\Params\MCGarbageCollector,            Default, 0, "1"

+HKR, Ndi\Params\MCGarbageCollector,            Optional,           0,
"0"

+HKR, Ndi\Params\MCGarbageCollector\enum,"0",                      0,
"Disabled"

+HKR, Ndi\Params\MCGarbageCollector\enum,"1",                      0,
"Enabled"

 

+HKR, Ndi\Params\MCLeaveRescan,                  ParamDesc,      0, "MC
leave rescan (sec)"

+HKR, Ndi\Params\MCLeaveRescan,                  Type,                0,
"dword"

+HKR, Ndi\Params\MCLeaveRescan,                  Default, 0, "130"

+HKR, Ndi\Params\MCLeaveRescan,                  Optional,           0,
"0"

+HKR, Ndi\Params\MCLeaveRescan,                  Min,
0, "1"

+HKR, Ndi\Params\MCLeaveRescan,                  Max,                 0,
"3600"

+

+HKR, Ndi\Params\MCAgingTime,                      ParamDesc,      0,
"MC aging time (sec)"

+HKR, Ndi\Params\MCAgingTime,                      Type,
0, "dword"

+HKR, Ndi\Params\MCAgingTime,                      Default, 0, "260"

+HKR, Ndi\Params\MCAgingTime,                      Optional,
0, "0"

+HKR, Ndi\Params\MCAgingTime,                      Min,
0, "1"

+HKR, Ndi\Params\MCAgingTime,                      Max,
0, "3600"

+HKLM, System\CurrentControlSet\Services\Tcpip\Parameters, IGMPVersion,
0x00010001, 3

+

 [IpoibService]

 DisplayName     = %IpoibServiceDispName%

 ServiceType     = 1 ;%SERVICE_KERNEL_DRIVER%

 

 

 

 

Slava Strebkov

SW Engineer

Voltaire

099718750

 

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080407/6efd9517/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: mcast_garbage_collector.diff
Type: application/octet-stream
Size: 27356 bytes
Desc: mcast_garbage_collector.diff
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080407/6efd9517/attachment.obj>


More information about the ofw mailing list