[ofw] [PATCH] mcast garbage collector
Slava Strebkov
slavas at voltaire.com
Mon Apr 7 05:48:02 PDT 2008
Hi, the following code provides garbage collection for mcast listeners
in IPoIB.
Index: inc/complib/cl_types.h
===================================================================
--- inc/complib/cl_types.h (revision 1047)
+++ inc/complib/cl_types.h (working copy)
@@ -46,7 +46,7 @@
#include <complib/cl_types_osd.h>
-
+typedef uint8_t net8_t;
typedef uint16_t net16_t;
typedef uint32_t net32_t;
typedef uint64_t net64_t;
Index: inc/kernel/ip_packet.h
===================================================================
--- inc/kernel/ip_packet.h (revision 1047)
+++ inc/kernel/ip_packet.h (working copy)
@@ -196,6 +196,7 @@
#define IP_PROT_IP 4
#define IP_PROT_TCP 6
#define IP_PROT_UDP 17
+#define IP_PROT_IGMP 2
#include <complib/cl_packon.h>
@@ -355,6 +356,55 @@
*********/
#include <complib/cl_packoff.h>
+#define IGMP_V2_MEMBERSHIP_QUERY 0x11
+#define IGMP_V2_MEMBERSHIP_REPORT 0x16
+#define IGMP_V1_MEMBERSHIP_REPORT 0x12 // for backward
compatibility with IGMPv1
+#define IGMP_V2_LEAVE_GROUP 0x17
+#include <complib/cl_packon.h>
+/****s* IB Network Drivers/igmp__v2_hdr_t
+* NAME
+* igmp_v2_hdr_t
+*
+* DESCRIPTION
+* Defines the IGMPv2 header for IP packets.
+*
+* SYNOPSIS
+*/
+typedef struct _igmp_v2_hdr
+{
+ net8_t type;
+ net8_t max_resp_time;
+ net16_t chksum;
+ net32_t group_address;
+} PACK_SUFFIX igmp_v2_hdr_t;
+/*
+* FIELDS
+* type
+* type of IGMPv2 message: query/report/leave
+*
+* max_resp_time
+* The Max Response Time field is meaningful only in
Membership Query
+* messages, and specifies the maximum allowed time
before sending a
+* responding report in units of 1/10 second. In
all other messages, it
+* is set to zero by the sender and ignored by
receivers.
+*
+* checksum
+* The checksum is the 16-bit one's complement of
the one's complement
+* sum of the whole IGMP message (the entire IP payload).
+*
+* group_address
+* In a Membership Query message, the group address
field is set to zero
+* when sending a General Query, and set to the group address
being
+* queried when sending a Group-Specific Query.
+*
+* In a Membership Report or Leave Group message, the group
address
+* field holds the IP multicast group address of the group being
+* reported or left.
+*
+* SEE ALSO
+* IB Network Drivers, eth_hdr_t, arp_pkt_t, ip_hdr_t, tcp_hdr_t
+*********/
+#include <complib/cl_packoff.h>
#define DHCP_PORT_SERVER CL_HTON16(67)
#define DHCP_PORT_CLIENT CL_HTON16(68)
Index: ulp/ipoib/kernel/ipoib_adapter.c
===================================================================
--- ulp/ipoib/kernel/ipoib_adapter.c (revision 1047)
+++ ulp/ipoib/kernel/ipoib_adapter.c (working copy)
@@ -762,7 +762,14 @@
if( j != p_adapter->mcast_array_size
)
continue;
- ipoib_port_join_mcast( p_port,
p_mac_array[i] ,IB_MC_REC_STATE_FULL_MEMBER);
+ // Join to "All hosts mc group" for
IP and to NON-IP MC groups
+ if ( ( p_mac_array[i].addr[0] == 1 &&
p_mac_array[i].addr[1] == 0 && p_mac_array[i].addr[2] == 0x5e &&
+ p_mac_array[i].addr[3]
== 0 && p_mac_array[i].addr[4] == 0 && p_mac_array[i].addr[5] == 1 ) ||
+ !(
p_mac_array[i].addr[0] == 1 && p_mac_array[i].addr[1] == 0 &&
p_mac_array[i].addr[2] == 0x5e )
+ )
+ {
+ ipoib_port_join_mcast(
p_port, p_mac_array[i], IB_MC_REC_STATE_FULL_MEMBER );
+ }
}
}
Index: ulp/ipoib/kernel/ipoib_adapter.h
===================================================================
--- ulp/ipoib/kernel/ipoib_adapter.h (revision 1047)
+++ ulp/ipoib/kernel/ipoib_adapter.h (working copy)
@@ -74,6 +74,9 @@
uint32_t payload_mtu;
uint32_t xfer_block_size;
mac_addr_t conf_mac;
+ boolean_t mc_garbage_collector;
+ uint32_t mc_leave_rescan;
+ uint32_t mc_aging_time;
} ipoib_params_t;
/*
Index: ulp/ipoib/kernel/ipoib_driver.c
===================================================================
--- ulp/ipoib/kernel/ipoib_driver.c (revision 1047)
+++ ulp/ipoib/kernel/ipoib_driver.c (working copy)
@@ -526,6 +526,40 @@
}
p_adapter->params.recv_pool_ratio =
p_param->ParameterData.IntegerData;
+ /* Required: MC garbage collector. */
+ RtlInitUnicodeString( &keyword, L"MCGarbageCollector" );
+ NdisReadConfiguration(
+ &status, &p_param, h_config, &keyword, NdisParameterInteger );
+ if( status != NDIS_STATUS_SUCCESS )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("MC garbage collector parameter is missing.\n") );
+ return status;
+ }
+ p_adapter->params.mc_garbage_collector =
(p_param->ParameterData.IntegerData != 0);
+
+ /* Optional: MC leave rescan (sec) for the MC garable
collector thread. */
+ RtlInitUnicodeString( &keyword, L"MCLeaveRescan" );
+ NdisReadConfiguration(
+ &status, &p_param, h_config, &keyword,
NdisParameterInteger );
+ if( status != NDIS_STATUS_SUCCESS )
+ {
+ p_adapter->params.mc_leave_rescan = 130;
+ }
+ else
+ p_adapter->params.mc_leave_rescan =
p_param->ParameterData.IntegerData;
+
+ /* Optional: MC aging time (sec) */
+ RtlInitUnicodeString( &keyword, L"MCAgingTime" );
+ NdisReadConfiguration(
+ &status, &p_param, h_config, &keyword,
NdisParameterInteger );
+ if( status != NDIS_STATUS_SUCCESS )
+ {
+ p_adapter->params.mc_aging_time = 260;
+ }
+ else
+ p_adapter->params.mc_aging_time =
p_param->ParameterData.IntegerData;
+
/* required: MTU size. */
RtlInitUnicodeString( &keyword, L"PayloadMtu" );
NdisReadConfiguration(
Index: ulp/ipoib/kernel/ipoib_endpoint.h
===================================================================
--- ulp/ipoib/kernel/ipoib_endpoint.h (revision 1047)
+++ ulp/ipoib/kernel/ipoib_endpoint.h (working copy)
@@ -61,7 +61,10 @@
ib_av_handle_t h_av;
boolean_t
expired;
ib_al_ifc_t
*p_ifc;
-
+ uint32_t
mcast_send_timestamp;
+ int32_t mcast_count;
+ boolean_t is_mcast_endpoint;
+ boolean_t
is_mcast_listener;
} ipoib_endpt_t;
/*
* FIELDS
Index: ulp/ipoib/kernel/ipoib_port.c
===================================================================
--- ulp/ipoib/kernel/ipoib_port.c (revision 1047)
+++ ulp/ipoib/kernel/ipoib_port.c (working copy)
@@ -94,6 +94,8 @@
__port_free(
IN cl_obj_t*
const p_obj );
+static void CL_API __port_mcast_garbage_collector
+ (IN void*
context );
/***********************************************************************
*******
*
@@ -290,6 +292,14 @@
IN OUT
ipoib_send_desc_t* const p_desc );
static NDIS_STATUS
+__send_mgr_filter_igmp_v2(
+ IN ipoib_port_t*
const p_port,
+ IN const ip_hdr_t* const
p_ip_hdr,
+ IN size_t
iph_options_size,
+ IN NDIS_BUFFER*
p_buf,
+ IN size_t
buf_len );
+
+static NDIS_STATUS
__send_mgr_filter_udp(
IN ipoib_port_t*
const p_port,
IN const ip_hdr_t* const
p_ip_hdr,
@@ -579,6 +589,11 @@
KeInitializeEvent( &p_port->sa_event, NotificationEvent,
TRUE );
KeInitializeEvent( &p_port->leave_mcast_event,
NotificationEvent, TRUE );
+ p_port->mcast_event_init = FALSE;
+ cl_event_construct(&p_port->mcast_event);
+
+ p_port->mcast_thread_init = FALSE;
+ cl_thread_construct(&p_port->mcast_thread);
IPOIB_EXIT( IPOIB_DBG_INIT );
}
@@ -653,6 +668,18 @@
return status;
}
+ if (p_port->p_adapter->params.mc_garbage_collector)
+ {
+ /* Initialize multicast garbage collector event */
+ cl_status = cl_event_init(&p_port->mcast_event, TRUE);
+ if( cl_status != CL_SUCCESS )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("cl_event_init returned %s\n",
cl_status_text[cl_status]) );
+ return IB_ERROR;
+ }
+ p_port->mcast_event_init = TRUE;
+ }
/* We only ever destroy from the PnP callback thread. */
cl_status = cl_obj_init( &p_port->obj, CL_DESTROY_SYNC,
__port_destroying, __port_cleanup, __port_free
);
@@ -746,7 +773,26 @@
CL_ASSERT( p_obj );
p_port = PARENT_STRUCT( p_obj, ipoib_port_t, obj );
+ if (p_port->p_adapter->params.mc_garbage_collector)
+ {
+ /* Destroy multicast garbage collector thread */
+ if(p_port->mcast_thread_init)
+ {
+ CL_ASSERT(p_port->mcast_event_init);
+ cl_event_signal(&p_port->mcast_event);
+
+ cl_thread_destroy(&p_port->mcast_thread);
+ p_port->mcast_thread_init = FALSE;
+ }
+
+ if (p_port->mcast_event_init)
+ {
+ cl_event_destroy(&p_port->mcast_event);
+ p_port->mcast_event_init = FALSE;
+ }
+ }
+
__endpt_mgr_destroy( p_port );
__recv_mgr_destroy( p_port );
__send_mgr_destroy( p_port );
@@ -2083,6 +2129,30 @@
p_eth->hdr.src = p_src->mac;
p_eth->hdr.dst = p_dst->mac;
+ /* Check if multicast packet and update endpoint timestamp if
needed */
+
+ if ( ETH_IS_MULTICAST(p_eth->hdr.dst.addr) &&
+ p_eth->hdr.type == ETH_PROT_TYPE_IP &&
+ !ETH_IS_BROADCAST(p_eth->hdr.dst.addr) )
+ {
+ /*
+
p_port->p_adapter->params.mc_garbage_collector doesn't
+ exist in this context , so we use
p_dst->is_mcast_endpoint
+ as indicator for mc_garbage collector
activity ( enable/disable )
+ */
+ if ( p_dst->is_mcast_endpoint &&
+ ++(p_dst->mcast_count) >
IPOIB_MCAST_TIMESTAMP_THRESHOLD)
+ {
+ CL_ASSERT(p_dst->h_mcast != NULL);
+ CL_ASSERT(p_dst->is_mcast_endpoint);
+
+ p_dst->mcast_count = 0;
+ p_dst->mcast_send_timestamp = cl_get_time_stamp_sec();
+ }
+
+ p_eth->hdr.dst.addr[1] = 0;
+ p_eth->hdr.dst.addr[3] = p_eth->hdr.dst.addr[3] &
0x7f;
+ }
IPOIB_EXIT( IPOIB_DBG_RECV );
return IB_SUCCESS;
}
@@ -3062,6 +3132,26 @@
if( p_ip_hdr->offset ||
p_ip_hdr->prot != IP_PROT_UDP )
{
+ /* Check if this packet is IGMP */
+ if ( p_ip_hdr->prot == IP_PROT_IGMP )
+ {
+ /*
+ In igmp packet I saw that iph
arrive in 2 NDIS_BUFFERs:
+ 1. iph
+ 2. ip options
+ So to get the IGMP packet
we need to skip the ip options NDIS_BUFFER
+ */
+ size_t iph_size_in_bytes =
(p_ip_hdr->ver_hl & 0xf) * 4;
+ size_t iph_options_size =
iph_size_in_bytes - buf_len;
+ buf_len -= sizeof(ip_hdr_t);
+
+ /*
+ Could be a case that arrived igmp
packet not from type IGMPv2 ,
+ but IGMPv1 or IGMPv3.
+ We anyway pass it to
__send_mgr_filter_igmp_v2().
+ */
+ __send_mgr_filter_igmp_v2(p_port,
p_ip_hdr, iph_options_size, p_buf, buf_len);
+ }
/* Not a UDP packet. */
cl_perf_start( SendTcp );
status = __send_gen( p_port, p_desc );
@@ -3081,7 +3171,128 @@
return status;
}
+static NDIS_STATUS
+__send_mgr_filter_igmp_v2(
+ IN ipoib_port_t*
const p_port,
+ IN const ip_hdr_t* const
p_ip_hdr,
+ IN size_t
iph_options_size,
+ IN NDIS_BUFFER*
p_buf,
+ IN size_t
buf_len )
+{
+ igmp_v2_hdr_t *p_igmp_v2_hdr = NULL;
+ NDIS_STATUS endpt_status;
+ ipoib_endpt_t* p_endpt = NULL;
+ mac_addr_t fake_mcast_mac;
+ IPOIB_ENTER( IPOIB_DBG_SEND );
+
+ if( !buf_len )
+ {
+ // To get the IGMP packet we need to skip the ip
options NDIS_BUFFER (if exists)
+ while ( iph_options_size )
+ {
+ NdisGetNextBuffer( p_buf, &p_buf );
+ if( !p_buf )
+ {
+ IPOIB_PRINT_EXIT(
TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Failed to
get IGMPv2 header buffer.\n") );
+ return
NDIS_STATUS_FAILURE;
+ }
+ NdisQueryBufferSafe( p_buf,
&p_igmp_v2_hdr, &buf_len, NormalPagePriority );
+ if( !p_igmp_v2_hdr )
+ {
+ IPOIB_PRINT_EXIT(
TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Failed to
query IGMPv2 header buffer.\n") );
+ return
NDIS_STATUS_FAILURE;
+ }
+
+ iph_options_size-=buf_len;
+ }
+
+ NdisGetNextBuffer( p_buf, &p_buf );
+ if( !p_buf )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
+ ("Failed to get IGMPv2
header buffer.\n") );
+ return NDIS_STATUS_FAILURE;
+ }
+ NdisQueryBufferSafe( p_buf, &p_igmp_v2_hdr,
&buf_len, NormalPagePriority );
+ if( !p_igmp_v2_hdr )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
+ ("Failed to query IGMPv2
header buffer.\n") );
+ return NDIS_STATUS_FAILURE;
+ }
+ }
+ else
+ {
+ p_igmp_v2_hdr = (igmp_v2_hdr_t*)(p_ip_hdr + 1);
+ }
+ /* Get the IGMP header length. */
+ if( buf_len < sizeof(igmp_v2_hdr_t) )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
+ ("Buffer not large enough for IGMPv2
packet.\n") );
+ return NDIS_STATUS_BUFFER_TOO_SHORT;
+ }
+
+ // build fake mac from igmp packet group address
+ fake_mcast_mac.addr[0] = 1;
+ fake_mcast_mac.addr[1] = ((unsigned
char*)&p_igmp_v2_hdr->group_address)[0] & 0x0f;
+ fake_mcast_mac.addr[2] = 0x5E;
+ fake_mcast_mac.addr[3] = ((unsigned
char*)&p_igmp_v2_hdr->group_address)[1];
+ fake_mcast_mac.addr[4] = ((unsigned
char*)&p_igmp_v2_hdr->group_address)[2];
+ fake_mcast_mac.addr[5] = ((unsigned
char*)&p_igmp_v2_hdr->group_address)[3];
+
+ switch ( p_igmp_v2_hdr->type )
+ {
+ case IGMP_V2_MEMBERSHIP_REPORT:
+ /*
+ This mean that some body open
listener on this group
+ Change type of mcast endpt to SEND_RECV
endpt. So mcast garbage collector
+ will not delete this mcast endpt.
+ */
+ IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,
+ ("Catched IGMP_V2_MEMBERSHIP_REPORT
message\n") );
+ endpt_status = __endpt_mgr_ref( p_port, fake_mcast_mac,
&p_endpt );
+ if ( p_endpt )
+ {
+ cl_obj_lock( &p_port->obj );
+ p_endpt->is_mcast_listener = TRUE;
+ cl_obj_unlock( &p_port->obj );
+ ipoib_endpt_deref( p_endpt );
+ }
+ break;
+
+ case IGMP_V2_LEAVE_GROUP:
+ /*
+ This mean that somebody CLOSE
listener on this group .
+ Change type of mcast endpt to SEND_ONLY
endpt. So mcast
+ garbage collector will delete this
mcast endpt next time.
+ */
+ IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,
+ ("Catched IGMP_V2_LEAVE_GROUP
message\n") );
+ endpt_status = __endpt_mgr_ref( p_port, fake_mcast_mac,
&p_endpt );
+ if ( p_endpt )
+ {
+ cl_obj_lock( &p_port->obj );
+ p_endpt->is_mcast_listener = FALSE;
+
+ cl_obj_unlock( &p_port->obj );
+ ipoib_endpt_deref( p_endpt );
+ }
+ break;
+
+ default:
+ IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,
+ ("Send Unknown IGMP message:
0x%x \n", p_igmp_v2_hdr->type ) );
+ break;
+ }
+
+ IPOIB_EXIT( IPOIB_DBG_SEND );
+ return NDIS_STATUS_SUCCESS;
+}
+
static NDIS_STATUS
__send_mgr_filter_udp(
IN ipoib_port_t*
const p_port,
@@ -3522,14 +3733,29 @@
ETH_IS_MULTICAST( p_eth_hdr->dst.addr ) )
{
if( ipoib_port_join_mcast( p_port,
p_eth_hdr->dst,
- IB_MC_REC_STATE_SEND_ONLY_MEMBER) ==
IB_SUCCESS )
+ IB_MC_REC_STATE_FULL_MEMBER) ==
IB_SUCCESS )
{
IPOIB_PRINT_EXIT(
TRACE_LEVEL_INFORMATION, IPOIB_DBG_SEND,
("Multicast Mac - trying
to join.\n") );
return NDIS_STATUS_PENDING;
}
}
+ else if ( p_port->p_adapter->params.mc_garbage_collector &&
+ status == NDIS_STATUS_SUCCESS &&
+ ETH_IS_MULTICAST(
p_eth_hdr->dst.addr ) &&
+ !ETH_IS_BROADCAST(
p_eth_hdr->dst.addr ) )
+ {
+ CL_ASSERT( (*pp_endpt) );
+ CL_ASSERT((*pp_endpt)->h_mcast != NULL);
+ CL_ASSERT((*pp_endpt)->is_mcast_endpoint);
+ if (++((*pp_endpt)->mcast_count) >
IPOIB_MCAST_TIMESTAMP_THRESHOLD)
+ {
+ (*pp_endpt)->mcast_count = 0;
+ (*pp_endpt)->mcast_send_timestamp =
cl_get_time_stamp_sec();
+ }
+ }
+
IPOIB_EXIT( IPOIB_DBG_SEND );
return status;
}
@@ -3688,6 +3914,47 @@
}
cl_perf_start( SendMgrQueue );
+#pragma warning(disable:4127)
+ do{
+ if ( ETH_IS_MULTICAST( p_eth_hdr->dst.addr ) &&
+ p_eth_hdr->type == ETH_PROT_TYPE_IP
&&
+ !ETH_IS_BROADCAST(
p_eth_hdr->dst.addr ) )
+ {
+ ip_hdr_t *p_ip_hdr;
+ NDIS_BUFFER
*p_ip_hdr_buf;
+ UINT
ip_hdr_buf_len;
+
+ // Extract the ip hdr
+ NdisGetNextBuffer( p_buf, &p_ip_hdr_buf );
+ if( !p_ip_hdr_buf )
+ {
+ IPOIB_PRINT_EXIT(
TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Failed to
get IP header buffer.\n") );
+ break;
+ }
+
+ NdisQueryBufferSafe( p_ip_hdr_buf,
&p_ip_hdr, &ip_hdr_buf_len, NormalPagePriority );
+ if( !p_ip_hdr )
+ {
+ IPOIB_PRINT_EXIT(
TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Failed to
query IP header buffer.\n") );
+ break;
+ }
+
+ if( ip_hdr_buf_len < sizeof(ip_hdr_t)
)
+ {
+ /* This buffer is done
for. Get the next buffer. */
+ IPOIB_PRINT_EXIT(
TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Buffer too
small for IP packet.\n") );
+ //return
NDIS_STATUS_BUFFER_TOO_SHORT;
+ break;
+ }
+
+ p_eth_hdr->dst.addr[1] = ((unsigned
char*)&p_ip_hdr->dst_ip)[0] & 0x0f;
+ p_eth_hdr->dst.addr[3] = ((unsigned
char*)&p_ip_hdr->dst_ip)[1];
+ }
+ }while(FALSE);
+#pragma warning(default:4127)
status = __send_mgr_queue( p_port, p_eth_hdr,
&desc.p_endpt );
cl_perf_stop( &p_port->p_adapter->perf,
SendMgrQueue );
if( status == NDIS_STATUS_PENDING )
@@ -3827,7 +4094,7 @@
if( ETH_IS_MULTICAST(
p_eth_hdr->dst.addr ) )
{
if(
ipoib_port_join_mcast( p_port, p_eth_hdr->dst,
-
IB_MC_REC_STATE_SEND_ONLY_MEMBER) == IB_SUCCESS )
+
IB_MC_REC_STATE_FULL_MEMBER) == IB_SUCCESS )
{
IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_SEND,
("Multicast Mac - trying to join.\n") );
@@ -4658,6 +4925,8 @@
ib_query_req_t query;
ib_user_query_t info;
ib_portinfo_record_t port_rec;
+ cl_status_t cl_status;
+ BOOLEAN success = TRUE;
IPOIB_ENTER( IPOIB_DBG_INIT );
@@ -4692,17 +4961,54 @@
/* reference the object for the multicast query. */
ipoib_port_ref( p_port, ref_port_up );
+ __try
+ {
status = p_port->p_adapter->p_ifc->query(
p_port->p_adapter->h_al, &query,
&p_port->ib_mgr.h_query );
if( status != IB_SUCCESS )
{
- KeSetEvent( &p_port->sa_event, EVENT_INCREMENT,
FALSE );
- ipoib_set_inactive( p_port->p_adapter );
- ipoib_port_deref( p_port, ref_port_up );
+ success = FALSE;
IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
("ib_query returned %s\n",
p_port->p_adapter->p_ifc->get_err_str( status )) );
- return;
+ __leave;
+ }
+
+ if (p_port->p_adapter->params.mc_garbage_collector)
+ {
+ CL_ASSERT(p_port->mcast_event_init);
+ cl_status = cl_event_reset(&p_port->mcast_event);
+ if( cl_status != CL_SUCCESS )
+ {
+ success = FALSE;
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("cl_event_reset returned %s\n",
cl_status_text[cl_status]) );
+ __leave;
+ }
+
+ cl_status = cl_thread_init(
+ &p_port->mcast_thread,
+ __port_mcast_garbage_collector,
+ p_port,
+ "mcast_garbage");
+ if( cl_status != CL_SUCCESS )
+ {
+ success = FALSE;
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("cl_thread_init returned %s\n",
cl_status_text[cl_status]) );
+ __leave;
+ }
+ p_port->mcast_thread_init = TRUE;
+ }
+ }
+ __finally
+ {
+ if (!success)
+ {
+ KeSetEvent( &p_port->sa_event, EVENT_INCREMENT, FALSE );
+ ipoib_set_inactive( p_port->p_adapter );
+ ipoib_port_deref( p_port, ref_port_up );
+ }
}
IPOIB_EXIT( IPOIB_DBG_INIT );
@@ -5176,6 +5482,16 @@
return;
}
+ /* Destroy multicast garbage collector thread */
+
+ if(p_port->p_adapter->params.mc_garbage_collector &&
p_port->mcast_thread_init)
+ {
+ CL_ASSERT(p_port->mcast_event_init);
+ cl_event_signal(&p_port->mcast_event);
+
+ cl_thread_destroy(&p_port->mcast_thread);
+ p_port->mcast_thread_init = FALSE;
+ }
KeResetEvent(&p_port->leave_mcast_event);
/* Reset all endpoints so we don't flush our ARP cache. */
@@ -5437,7 +5753,8 @@
mcast_req.member_rec.mlid = 0;
ib_member_set_state(
&mcast_req.member_rec.scope_state,state);
- if( mac.addr[0] == 1 && mac.addr[1] == 0 && mac.addr[2] ==
0x5E )
+
+ if( mac.addr[0] == 1 && mac.addr[2] == 0x5E )
{
/*
* Update the address portion of the MGID with
the 28 lower bits of the
@@ -5445,7 +5762,7 @@
* the 24 lower bits of that
network-byte-ordered value (assuming MSb
* is zero).
*/
- mcast_req.member_rec.mgid.raw[12] = 0;
+ mcast_req.member_rec.mgid.raw[12] = mac.addr[1];
mcast_req.member_rec.mgid.raw[13] = mac.addr[3];
mcast_req.member_rec.mgid.raw[14] = mac.addr[4];
mcast_req.member_rec.mgid.raw[15] = mac.addr[5];
@@ -5603,6 +5920,15 @@
&p_port->endpt_mgr.lid_endpts,
p_endpt->dlid, &p_endpt->lid_item );
CL_ASSERT( p_qitem == &p_endpt->lid_item );
}
+ /* Add the endpoint to the multicast endpoints list */
+ if ( p_port->p_adapter->params.mc_garbage_collector )
+ {
+ p_endpt->is_mcast_endpoint = TRUE;
+ p_endpt->mcast_count = 0;
+ p_endpt->mcast_send_timestamp =
cl_get_time_stamp_sec();
+ }
+ else
+ p_endpt->is_mcast_endpoint = FALSE;
cl_obj_unlock( &p_port->obj );
/* Try to send all pending sends. */
@@ -5659,4 +5985,81 @@
IPOIB_EXIT( IPOIB_DBG_MCAST );
}
+static void CL_API __port_mcast_garbage_collector
+ (IN void*
context )
+{
+ ipoib_port_t *p_port = context;
+ const uint32_t WAIT_US =
p_port->p_adapter->params.mc_leave_rescan * 1000000; /*
cl_event_wait_on use usec */
+ const mac_addr_t DEFAULT_MCAST_GROUP = {0x01, 0x00, 0x5e, 0x00,
0x00, 0x01};
+ IPOIB_ENTER( IPOIB_DBG_ENDPT );
+
+ CL_ASSERT( p_port->p_adapter->params.mc_garbage_collector );
+
+ while(cl_event_wait_on(&p_port->mcast_event,WAIT_US,FALSE) !=
STATUS_SUCCESS)
+ {
+ /* Do garbage collecting... */
+
+ cl_map_item_t *p_item;
+ ipoib_endpt_t *p_endpt;
+ cl_qlist_t destroy_mc_list;
+ const uint32_t CURRENT_TIME_SEC = cl_get_time_stamp_sec();
+
+ cl_qlist_init( &destroy_mc_list );
+
+ cl_obj_lock( &p_port->obj );
+
+ p_item = cl_qmap_head( &p_port->endpt_mgr.mac_endpts );
+ while( p_item != cl_qmap_end( &p_port->endpt_mgr.mac_endpts ) )
+ {
+ p_endpt = PARENT_STRUCT( p_item, ipoib_endpt_t, mac_item );
+ p_item = cl_qmap_next( p_item );
+
+ /* Check if the current endpoint is an old multicast item
*/
+
+ if( p_endpt->h_mcast &&
+ p_endpt->is_mcast_endpoint &&
+
(!p_endpt->is_mcast_listener) &&
+ cl_memcmp( &p_endpt->mac, &DEFAULT_MCAST_GROUP,
sizeof(mac_addr_t) ) &&
+ CURRENT_TIME_SEC - p_endpt->mcast_send_timestamp >
p_port->p_adapter->params.mc_aging_time)
+ {
+ cl_qmap_remove_item( &p_port->endpt_mgr.mac_endpts,
+ &p_endpt->mac_item );
+ cl_fmap_remove_item( &p_port->endpt_mgr.gid_endpts,
+ &p_endpt->gid_item );
+
+ if( p_endpt->dlid )
+ {
+ cl_qmap_remove_item( &p_port->endpt_mgr.lid_endpts,
+ &p_endpt->lid_item );
+ p_endpt->dlid = 0;
+ }
+
+ cl_qlist_insert_tail(
+ &destroy_mc_list,
&p_endpt->mac_item.pool_item.list_item );
+ }
+ }
+ cl_obj_unlock( &p_port->obj );
+
+ /* Destroy all multicast endpoints now that we have released
the lock. */
+ while( cl_qlist_count( &destroy_mc_list ) )
+ {
+ p_endpt = PARENT_STRUCT( cl_qlist_head( &destroy_mc_list ),
+ ipoib_endpt_t,
mac_item.pool_item.list_item );
+
+ IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_ENDPT,
+ ("mcast garbage collector: destroying endpoint
%02x:%02x:%02x:%02x:%02x:%02x \n",
+ p_endpt->mac.addr[0],
+ p_endpt->mac.addr[1],
+ p_endpt->mac.addr[2],
+ p_endpt->mac.addr[3],
+ p_endpt->mac.addr[4],
+ p_endpt->mac.addr[5]) );
+
+ cl_obj_destroy( &PARENT_STRUCT( cl_qlist_remove_head(
&destroy_mc_list ),
+ ipoib_endpt_t, mac_item.pool_item.list_item )->obj );
+ }
+ }
+
+ IPOIB_EXIT( IPOIB_DBG_ENDPT );
+}
Index: ulp/ipoib/kernel/ipoib_port.h
===================================================================
--- ulp/ipoib/kernel/ipoib_port.h (revision 1047)
+++ ulp/ipoib/kernel/ipoib_port.h (working copy)
@@ -63,6 +63,7 @@
*/
#define IPOIB_USE_DMA 1
+#define IPOIB_MCAST_TIMESTAMP_THRESHOLD 10000
#define IPOIB_PORT_FROM_PACKET( P ) \
(((ipoib_port_t**)P->MiniportReservedEx)[0])
@@ -506,6 +507,11 @@
atomic32_t
endpt_rdr;
atomic32_t
hdr_idx;
+ boolean_t
mcast_event_init; /* Not really necessary, since
cl_event_destroy is NULL */
+ cl_event_t
mcast_event; /* Multicast garabage collector thread
terminate event */
+
+ boolean_t
mcast_thread_init;
+ cl_thread_t
mcast_thread; /* Multicast garbage collector thread */
ipoib_hdr_t
hdr[1]; /* Must be last! */
} ipoib_port_t;
Index: ulp/ipoib/kernel/netipoib.inf
===================================================================
--- ulp/ipoib/kernel/netipoib.inf (revision 1047)
+++ ulp/ipoib/kernel/netipoib.inf (working copy)
@@ -125,7 +125,28 @@
HKR, Ndi\Params\PayloadMtu, Default, 0, "2044"
HKR, Ndi\Params\PayloadMtu, Min, 0, "60"
HKR, Ndi\Params\PayloadMtu, Max, 0, "2044"
+HKR, Ndi\Params\MCGarbageCollector, ParamDesc, 0, "MC
garbage collector"
+HKR, Ndi\Params\MCGarbageCollector, Type, 0,
"enum"
+HKR, Ndi\Params\MCGarbageCollector, Default, 0, "1"
+HKR, Ndi\Params\MCGarbageCollector, Optional, 0,
"0"
+HKR, Ndi\Params\MCGarbageCollector\enum,"0", 0,
"Disabled"
+HKR, Ndi\Params\MCGarbageCollector\enum,"1", 0,
"Enabled"
+HKR, Ndi\Params\MCLeaveRescan, ParamDesc, 0, "MC
leave rescan (sec)"
+HKR, Ndi\Params\MCLeaveRescan, Type, 0,
"dword"
+HKR, Ndi\Params\MCLeaveRescan, Default, 0, "130"
+HKR, Ndi\Params\MCLeaveRescan, Optional, 0,
"0"
+HKR, Ndi\Params\MCLeaveRescan, Min,
0, "1"
+HKR, Ndi\Params\MCLeaveRescan, Max, 0,
"3600"
+
+HKR, Ndi\Params\MCAgingTime, ParamDesc, 0,
"MC aging time (sec)"
+HKR, Ndi\Params\MCAgingTime, Type,
0, "dword"
+HKR, Ndi\Params\MCAgingTime, Default, 0, "260"
+HKR, Ndi\Params\MCAgingTime, Optional,
0, "0"
+HKR, Ndi\Params\MCAgingTime, Min,
0, "1"
+HKR, Ndi\Params\MCAgingTime, Max,
0, "3600"
+HKLM, System\CurrentControlSet\Services\Tcpip\Parameters, IGMPVersion,
0x00010001, 3
+
[IpoibService]
DisplayName = %IpoibServiceDispName%
ServiceType = 1 ;%SERVICE_KERNEL_DRIVER%
Slava Strebkov
SW Engineer
Voltaire
099718750
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080407/6efd9517/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: mcast_garbage_collector.diff
Type: application/octet-stream
Size: 27356 bytes
Desc: mcast_garbage_collector.diff
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080407/6efd9517/attachment.obj>
More information about the ofw
mailing list