[ofw][patch] mcast garbage collector
Slava Strebkov
slavas at voltaire.com
Thu Jun 12 02:49:53 PDT 2008
Hi,
Please review mcast garbage collector, which removes old endpoints of
ipoib port.
Rescan time and aging time are parameters and can be configured from
device property page.
SA retry count and timeout are also configurable from registry.
Index: core/al/al_common.h
===================================================================
--- core/al/al_common.h (revision 1261)
+++ core/al/al_common.h (working copy)
@@ -49,6 +49,8 @@
extern uint32_t g_ioc_query_retries;
extern uint32_t g_ioc_poll_interval;
+extern uint32_t g_mc_destr_retr_timeout;
+extern uint32_t g_mc_dest_retr_count;
/* Wait operations performed in user-mode must be alertable. */
#ifdef CL_KERNEL
Index: core/al/al_mcast.c
===================================================================
--- core/al/al_mcast.c (revision 1261)
+++ core/al/al_mcast.c (working copy)
@@ -96,6 +96,9 @@
static void
__free_attach(
IN al_obj_t
*p_obj );
+/* Mcast destroy timeout and retry count read from registry */
+uint32_t g_mc_destr_retr_timeout = 1000;
+uint32_t g_mc_dest_retr_count = 10;
#endif
@@ -271,8 +274,14 @@
sa_mad_data.p_attr = &h_mcast->member_rec;
ref_al_obj( &h_mcast->obj );
- status = al_send_sa_req(
- &h_mcast->sa_dereg_req, h_mcast->port_guid, 500,
0, &sa_mad_data, 0 );
+ status =
+#if defined( CL_KERNEL )
+ al_send_sa_req(
+ &h_mcast->sa_dereg_req, h_mcast->port_guid,
g_mc_destr_retr_timeout, g_mc_dest_retr_count, &sa_mad_data, 0 );
+#else
+ al_send_sa_req(
+ &h_mcast->sa_dereg_req, h_mcast->port_guid, 500, 0,
&sa_mad_data, 0 );
+#endif
if( status != IB_SUCCESS )
deref_al_obj( &h_mcast->obj );
Index: core/bus/kernel/bus_driver.c
===================================================================
--- core/bus/kernel/bus_driver.c (revision 1261)
+++ core/bus/kernel/bus_driver.c (working copy)
@@ -326,7 +326,7 @@
{
NTSTATUS
status;
/* Remember the terminating entry in the table below. */
- RTL_QUERY_REGISTRY_TABLE table[10];
+ RTL_QUERY_REGISTRY_TABLE table[12];
UNICODE_STRING
param_path;
UNICODE_STRING
pkeyString;
UNICODE_STRING
empy_string;
@@ -431,7 +431,19 @@
table[8].EntryContext = &pkeyString;
table[8].DefaultType = REG_SZ;
table[8].DefaultData = &empy_string;
- table[8].DefaultLength = 1024*sizeof(WCHAR);
+ table[9].Flags = RTL_QUERY_REGISTRY_DIRECT;
+ table[9].Name = L"McDestrRetrCnt";
+ table[9].EntryContext = &g_mc_dest_retr_count;
+ table[9].DefaultType = REG_DWORD;
+ table[9].DefaultData = &g_mc_dest_retr_count;
+ table[9].DefaultLength = sizeof(ULONG);
+
+ table[10].Flags = RTL_QUERY_REGISTRY_DIRECT;
+ table[10].Name = L"McDestrRetrTimeout";
+ table[10].EntryContext = &g_mc_destr_retr_timeout;
+ table[10].DefaultType = REG_DWORD;
+ table[10].DefaultData = &g_mc_destr_retr_timeout;
+ table[10].DefaultLength = sizeof(ULONG);
/* Have at it! */
status = RtlQueryRegistryValues( RTL_REGISTRY_ABSOLUTE,
param_path.Buffer, table, NULL, NULL );
Index: inc/complib/cl_types.h
===================================================================
--- inc/complib/cl_types.h (revision 1261)
+++ inc/complib/cl_types.h (working copy)
@@ -46,7 +46,7 @@
#include <complib/cl_types_osd.h>
-
+typedef uint8_t net8_t;
typedef uint16_t net16_t;
typedef uint32_t net32_t;
typedef uint64_t net64_t;
Index: inc/kernel/ip_packet.h
===================================================================
--- inc/kernel/ip_packet.h (revision 1261)
+++ inc/kernel/ip_packet.h (working copy)
@@ -196,6 +196,7 @@
#define IP_PROT_IP 4
#define IP_PROT_TCP 6
#define IP_PROT_UDP 17
+#define IP_PROT_IGMP 2
#include <complib/cl_packon.h>
@@ -355,7 +356,58 @@
*********/
#include <complib/cl_packoff.h>
+#define IGMP_V2_MEMBERSHIP_QUERY 0x11
+#define IGMP_V2_MEMBERSHIP_REPORT 0x16
+#define IGMP_V1_MEMBERSHIP_REPORT 0x12 // for backward
compatibility with IGMPv1
+#define IGMP_V2_LEAVE_GROUP 0x17
+
+#include <complib/cl_packon.h>
+/****s* IB Network Drivers/igmp__v2_hdr_t
+* NAME
+* igmp_v2_hdr_t
+*
+* DESCRIPTION
+* Defines the IGMPv2 header for IP packets.
+*
+* SYNOPSIS
+*/
+typedef struct _igmp_v2_hdr
+{
+ net8_t type;
+ net8_t max_resp_time;
+ net16_t chksum;
+ net32_t group_address;
+} PACK_SUFFIX igmp_v2_hdr_t;
+/*
+* FIELDS
+* type
+* type of IGMPv2 message: query/report/leave
+*
+* max_resp_time
+* The Max Response Time field is meaningful only in
Membership Query
+* messages, and specifies the maximum allowed time
before sending a
+* responding report in units of 1/10 second. In
all other messages, it
+* is set to zero by the sender and ignored by
receivers.
+*
+* checksum
+* The checksum is the 16-bit one's complement of
the one's complement
+* sum of the whole IGMP message (the entire IP payload).
+*
+* group_address
+* In a Membership Query message, the group address
field is set to zero
+* when sending a General Query, and set to the group address
being
+* queried when sending a Group-Specific Query.
+*
+* In a Membership Report or Leave Group message, the group
address
+* field holds the IP multicast group address of the group being
+* reported or left.
+*
+* SEE ALSO
+* IB Network Drivers, eth_hdr_t, arp_pkt_t, ip_hdr_t, tcp_hdr_t
+*********/
+#include <complib/cl_packoff.h>
+
#define DHCP_PORT_SERVER CL_HTON16(67)
#define DHCP_PORT_CLIENT CL_HTON16(68)
Index: ulp/ipoib/kernel/ipoib_adapter.h
===================================================================
--- ulp/ipoib/kernel/ipoib_adapter.h (revision 1261)
+++ ulp/ipoib/kernel/ipoib_adapter.h (working copy)
@@ -74,6 +74,9 @@
uint32_t payload_mtu;
uint32_t xfer_block_size;
mac_addr_t conf_mac;
+ boolean_t mc_garbage_collector;
+ uint32_t mc_leave_rescan;
+ uint32_t mc_aging_time;
} ipoib_params_t;
/*
Index: ulp/ipoib/kernel/ipoib_driver.c
===================================================================
--- ulp/ipoib/kernel/ipoib_driver.c (revision 1261)
+++ ulp/ipoib/kernel/ipoib_driver.c (working copy)
@@ -526,6 +526,38 @@
}
p_adapter->params.recv_pool_ratio =
p_param->ParameterData.IntegerData;
+ /* Required: MC garbage collector. */
+ RtlInitUnicodeString( &keyword, L"MCGarbageCollector" );
+ NdisReadConfiguration(
+ &status, &p_param, h_config, &keyword, NdisParameterInteger );
+ if( status != NDIS_STATUS_SUCCESS )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("MC garbage collector parameter is missing.\n") );
+ return status;
+ }
+ p_adapter->params.mc_garbage_collector =
(p_param->ParameterData.IntegerData != 0);
+ /* Optional: MC leave rescan (sec) for the MC garable
collector thread. */
+ RtlInitUnicodeString( &keyword, L"MCLeaveRescan" );
+ NdisReadConfiguration(
+ &status, &p_param, h_config, &keyword,
NdisParameterInteger );
+ if( status != NDIS_STATUS_SUCCESS )
+ {
+ p_adapter->params.mc_leave_rescan = 130;
+ }
+ else
+ p_adapter->params.mc_leave_rescan =
p_param->ParameterData.IntegerData;
+
+ /* Optional: MC aging time (sec) */
+ RtlInitUnicodeString( &keyword, L"MCAgingTime" );
+ NdisReadConfiguration(
+ &status, &p_param, h_config, &keyword,
NdisParameterInteger );
+ if( status != NDIS_STATUS_SUCCESS )
+ {
+ p_adapter->params.mc_aging_time = 260;
+ }
+ else
+ p_adapter->params.mc_aging_time =
p_param->ParameterData.IntegerData;
/* required: MTU size. */
RtlInitUnicodeString( &keyword, L"PayloadMtu" );
NdisReadConfiguration(
Index: ulp/ipoib/kernel/ipoib_endpoint.h
===================================================================
--- ulp/ipoib/kernel/ipoib_endpoint.h (revision 1261)
+++ ulp/ipoib/kernel/ipoib_endpoint.h (working copy)
@@ -61,7 +61,10 @@
TO_LONG_PTR( ib_av_handle_t , h_av) ;
boolean_t
expired;
ib_al_ifc_t
*p_ifc;
-
+ uint32_t
mcast_send_timestamp;
+ int32_t mcast_count;
+ boolean_t is_mcast_endpoint;
+ boolean_t
is_mcast_listener;
} ipoib_endpt_t;
/*
* FIELDS
Index: ulp/ipoib/kernel/ipoib_port.c
===================================================================
--- ulp/ipoib/kernel/ipoib_port.c (revision 1261)
+++ ulp/ipoib/kernel/ipoib_port.c (working copy)
@@ -66,7 +66,7 @@
ipoib_port_t *gp_ipoib_port;
#endif
-
+static void __port_do_mcast_garbage(ipoib_port_t *p_port);
/***********************************************************************
*******
*
* Declarations
@@ -94,6 +94,8 @@
__port_free(
IN cl_obj_t*
const p_obj );
+static void CL_API __port_mcast_garbage_collector
+ (IN void*
context );
/***********************************************************************
*******
*
@@ -290,6 +292,14 @@
IN OUT
ipoib_send_desc_t* const p_desc );
static NDIS_STATUS
+__send_mgr_filter_igmp_v2(
+ IN ipoib_port_t*
const p_port,
+ IN const ip_hdr_t* const
p_ip_hdr,
+ IN size_t
iph_options_size,
+ IN NDIS_BUFFER*
p_buf,
+ IN size_t
buf_len );
+
+static NDIS_STATUS
__send_mgr_filter_udp(
IN ipoib_port_t*
const p_port,
IN const ip_hdr_t* const
p_ip_hdr,
@@ -579,6 +589,11 @@
KeInitializeEvent( &p_port->sa_event, NotificationEvent,
TRUE );
KeInitializeEvent( &p_port->leave_mcast_event,
NotificationEvent, TRUE );
+ p_port->mcast_event_init = FALSE;
+ cl_event_construct(&p_port->mcast_event);
+
+ p_port->mcast_thread_init = FALSE;
+ cl_thread_construct(&p_port->mcast_thread);
IPOIB_EXIT( IPOIB_DBG_INIT );
}
@@ -653,6 +668,18 @@
return status;
}
+ if (p_port->p_adapter->params.mc_garbage_collector)
+ {
+ /* Initialize multicast garbage collector event */
+ cl_status = cl_event_init(&p_port->mcast_event, TRUE);
+ if( cl_status != CL_SUCCESS )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("cl_event_init returned %#x\n", cl_status) );
+ return IB_ERROR;
+ }
+ p_port->mcast_event_init = TRUE;
+ }
/* We only ever destroy from the PnP callback thread. */
cl_status = cl_obj_init( &p_port->obj, CL_DESTROY_SYNC,
__port_destroying, __port_cleanup, __port_free
);
@@ -746,7 +773,26 @@
CL_ASSERT( p_obj );
p_port = PARENT_STRUCT( p_obj, ipoib_port_t, obj );
+ if (p_port->p_adapter->params.mc_garbage_collector)
+ {
+ /* Destroy multicast garbage collector thread */
+ if(p_port->mcast_thread_init)
+ {
+ CL_ASSERT(p_port->mcast_event_init);
+ cl_event_signal(&p_port->mcast_event);
+
+ cl_thread_destroy(&p_port->mcast_thread);
+ p_port->mcast_thread_init = FALSE;
+ }
+
+ if (p_port->mcast_event_init)
+ {
+ cl_event_destroy(&p_port->mcast_event);
+ p_port->mcast_event_init = FALSE;
+ }
+ }
+
__endpt_mgr_destroy( p_port );
__recv_mgr_destroy( p_port );
__send_mgr_destroy( p_port );
@@ -2131,6 +2177,30 @@
p_eth->hdr.src = p_src->mac;
p_eth->hdr.dst = p_dst->mac;
+ /* Check if multicast packet and update endpoint timestamp if
needed */
+
+ if ( ETH_IS_MULTICAST(p_eth->hdr.dst.addr) &&
+ p_eth->hdr.type == ETH_PROT_TYPE_IP &&
+ !ETH_IS_BROADCAST(p_eth->hdr.dst.addr) )
+ {
+ /*
+
p_port->p_adapter->params.mc_garbage_collector doesn't
+ exist in this context , so we use
p_dst->is_mcast_endpoint
+ as indicator for mc_garbage collector
activity ( enable/disable )
+ */
+ if ( p_dst->is_mcast_endpoint &&
+ ++(p_dst->mcast_count) >
IPOIB_MCAST_TIMESTAMP_THRESHOLD)
+ {
+ CL_ASSERT(p_dst->h_mcast != NULL);
+ CL_ASSERT(p_dst->is_mcast_endpoint);
+
+ p_dst->mcast_count = 0;
+ p_dst->mcast_send_timestamp = cl_get_time_stamp_sec();
+ }
+
+ p_eth->hdr.dst.addr[1] = 0;
+ p_eth->hdr.dst.addr[3] = p_eth->hdr.dst.addr[3] &
0x7f;
+ }
IPOIB_EXIT( IPOIB_DBG_RECV );
return IB_SUCCESS;
}
@@ -3110,6 +3180,26 @@
if( p_ip_hdr->offset ||
p_ip_hdr->prot != IP_PROT_UDP )
{
+ /* Check if this packet is IGMP */
+ if ( p_ip_hdr->prot == IP_PROT_IGMP )
+ {
+ /*
+ In igmp packet I saw that iph
arrive in 2 NDIS_BUFFERs:
+ 1. iph
+ 2. ip options
+ So to get the IGMP packet
we need to skip the ip options NDIS_BUFFER
+ */
+ size_t iph_size_in_bytes =
(p_ip_hdr->ver_hl & 0xf) * 4;
+ size_t iph_options_size =
iph_size_in_bytes - buf_len;
+ buf_len -= sizeof(ip_hdr_t);
+
+ /*
+ Could be a case that arrived igmp
packet not from type IGMPv2 ,
+ but IGMPv1 or IGMPv3.
+ We anyway pass it to
__send_mgr_filter_igmp_v2().
+ */
+ __send_mgr_filter_igmp_v2(p_port,
p_ip_hdr, iph_options_size, p_buf, buf_len);
+ }
/* Not a UDP packet. */
cl_perf_start( SendTcp );
status = __send_gen( p_port, p_desc );
@@ -3131,6 +3221,129 @@
static NDIS_STATUS
+__send_mgr_filter_igmp_v2(
+ IN ipoib_port_t*
const p_port,
+ IN const ip_hdr_t* const
p_ip_hdr,
+ IN size_t
iph_options_size,
+ IN NDIS_BUFFER*
p_buf,
+ IN size_t
buf_len )
+{
+ igmp_v2_hdr_t *p_igmp_v2_hdr = NULL;
+ NDIS_STATUS endpt_status;
+ ipoib_endpt_t* p_endpt = NULL;
+ mac_addr_t fake_mcast_mac;
+
+ IPOIB_ENTER( IPOIB_DBG_SEND );
+
+ if( !buf_len )
+ {
+ // To get the IGMP packet we need to skip the ip
options NDIS_BUFFER (if exists)
+ while ( iph_options_size )
+ {
+ NdisGetNextBuffer( p_buf, &p_buf );
+ if( !p_buf )
+ {
+ IPOIB_PRINT_EXIT(
TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Failed to
get IGMPv2 header buffer.\n") );
+ return
NDIS_STATUS_FAILURE;
+ }
+ NdisQueryBufferSafe( p_buf,
&p_igmp_v2_hdr, &buf_len, NormalPagePriority );
+ if( !p_igmp_v2_hdr )
+ {
+ IPOIB_PRINT_EXIT(
TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Failed to
query IGMPv2 header buffer.\n") );
+ return
NDIS_STATUS_FAILURE;
+ }
+
+ iph_options_size-=buf_len;
+ }
+
+ NdisGetNextBuffer( p_buf, &p_buf );
+ if( !p_buf )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
+ ("Failed to get IGMPv2
header buffer.\n") );
+ return NDIS_STATUS_FAILURE;
+ }
+ NdisQueryBufferSafe( p_buf, &p_igmp_v2_hdr,
&buf_len, NormalPagePriority );
+ if( !p_igmp_v2_hdr )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
+ ("Failed to query IGMPv2
header buffer.\n") );
+ return NDIS_STATUS_FAILURE;
+ }
+ }
+ else
+ {
+ p_igmp_v2_hdr = (igmp_v2_hdr_t*)(p_ip_hdr + 1);
+ }
+ /* Get the IGMP header length. */
+ if( buf_len < sizeof(igmp_v2_hdr_t) )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
+ ("Buffer not large enough for IGMPv2
packet.\n") );
+ return NDIS_STATUS_BUFFER_TOO_SHORT;
+ }
+
+ // build fake mac from igmp packet group address
+ fake_mcast_mac.addr[0] = 1;
+ fake_mcast_mac.addr[1] = ((unsigned
char*)&p_igmp_v2_hdr->group_address)[0] & 0x0f;
+ fake_mcast_mac.addr[2] = 0x5E;
+ fake_mcast_mac.addr[3] = ((unsigned
char*)&p_igmp_v2_hdr->group_address)[1];
+ fake_mcast_mac.addr[4] = ((unsigned
char*)&p_igmp_v2_hdr->group_address)[2];
+ fake_mcast_mac.addr[5] = ((unsigned
char*)&p_igmp_v2_hdr->group_address)[3];
+
+ switch ( p_igmp_v2_hdr->type )
+ {
+ case IGMP_V2_MEMBERSHIP_REPORT:
+ /*
+ This mean that some body open
listener on this group
+ Change type of mcast endpt to SEND_RECV
endpt. So mcast garbage collector
+ will not delete this mcast endpt.
+ */
+ IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,
+ ("Catched IGMP_V2_MEMBERSHIP_REPORT
message\n") );
+ endpt_status = __endpt_mgr_ref( p_port, fake_mcast_mac,
&p_endpt );
+ if ( p_endpt )
+ {
+ cl_obj_lock( &p_port->obj );
+ p_endpt->is_mcast_listener = TRUE;
+ cl_obj_unlock( &p_port->obj );
+ ipoib_endpt_deref( p_endpt );
+ }
+ break;
+
+ case IGMP_V2_LEAVE_GROUP:
+ /*
+ This mean that somebody CLOSE
listener on this group .
+ Change type of mcast endpt to SEND_ONLY
endpt. So mcast
+ garbage collector will delete this
mcast endpt next time.
+ */
+ IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,
+ ("Catched IGMP_V2_LEAVE_GROUP
message\n") );
+ endpt_status = __endpt_mgr_ref( p_port, fake_mcast_mac,
&p_endpt );
+ if ( p_endpt )
+ {
+ cl_obj_lock( &p_port->obj );
+ p_endpt->is_mcast_listener = FALSE;
+
+ cl_obj_unlock( &p_port->obj );
+ ipoib_endpt_deref( p_endpt );
+ }
+
+ break;
+
+ default:
+ IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,
+ ("Send Unknown IGMP message:
0x%x \n", p_igmp_v2_hdr->type ) );
+ break;
+ }
+
+ IPOIB_EXIT( IPOIB_DBG_SEND );
+ return NDIS_STATUS_SUCCESS;
+}
+
+static NDIS_STATUS
__send_mgr_filter_udp(
IN ipoib_port_t*
const p_port,
IN const ip_hdr_t* const
p_ip_hdr,
@@ -3577,7 +3790,22 @@
return NDIS_STATUS_PENDING;
}
}
+ else if ( p_port->p_adapter->params.mc_garbage_collector &&
+ status == NDIS_STATUS_SUCCESS &&
+ ETH_IS_MULTICAST(
p_eth_hdr->dst.addr ) &&
+ !ETH_IS_BROADCAST(
p_eth_hdr->dst.addr ) )
+ {
+ CL_ASSERT( (*pp_endpt) );
+ CL_ASSERT((*pp_endpt)->h_mcast != NULL);
+ CL_ASSERT((*pp_endpt)->is_mcast_endpoint);
+ if (++((*pp_endpt)->mcast_count) >
IPOIB_MCAST_TIMESTAMP_THRESHOLD)
+ {
+ (*pp_endpt)->mcast_count = 0;
+ (*pp_endpt)->mcast_send_timestamp =
cl_get_time_stamp_sec();
+ }
+ }
+
IPOIB_EXIT( IPOIB_DBG_SEND );
return status;
}
@@ -4706,6 +4934,8 @@
ib_query_req_t query;
ib_user_query_t info;
ib_portinfo_record_t port_rec;
+ cl_status_t cl_status;
+ BOOLEAN success = TRUE;
IPOIB_ENTER( IPOIB_DBG_INIT );
@@ -4740,17 +4970,54 @@
/* reference the object for the multicast query. */
ipoib_port_ref( p_port, ref_port_up );
+ __try
+ {
status = p_port->p_adapter->p_ifc->query(
p_port->p_adapter->h_al, &query,
&p_port->ib_mgr.h_query );
if( status != IB_SUCCESS )
{
- KeSetEvent( &p_port->sa_event, EVENT_INCREMENT,
FALSE );
- ipoib_set_inactive( p_port->p_adapter );
- ipoib_port_deref( p_port, ref_port_up );
+ success = FALSE;
IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
("ib_query returned %s\n",
p_port->p_adapter->p_ifc->get_err_str( status )) );
- return;
+ __leave;
+ }
+
+ if (p_port->p_adapter->params.mc_garbage_collector)
+ {
+ CL_ASSERT(p_port->mcast_event_init);
+ cl_status = cl_event_reset(&p_port->mcast_event);
+ if( cl_status != CL_SUCCESS )
+ {
+ success = FALSE;
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("cl_event_reset returned %#x\n", cl_status) );
+ __leave;
+ }
+
+ cl_status = cl_thread_init(
+ &p_port->mcast_thread,
+ __port_mcast_garbage_collector,
+ p_port,
+ "mcast_garbage");
+ if( cl_status != CL_SUCCESS )
+ {
+ success = FALSE;
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("cl_thread_init returned %#x\n", cl_status) );
+ __leave;
+ }
+ p_port->mcast_thread_init = TRUE;
+ }
+ }
+ __finally
+ {
+ if (!success)
+ {
+ KeSetEvent( &p_port->sa_event, EVENT_INCREMENT, FALSE );
+ ipoib_set_inactive( p_port->p_adapter );
+ ipoib_port_deref( p_port, ref_port_up );
+ }
}
IPOIB_EXIT( IPOIB_DBG_INIT );
@@ -5229,6 +5496,16 @@
return;
}
+ /* Destroy multicast garbage collector thread */
+
+ if(p_port->p_adapter->params.mc_garbage_collector &&
p_port->mcast_thread_init)
+ {
+ CL_ASSERT(p_port->mcast_event_init);
+ cl_event_signal(&p_port->mcast_event);
+
+ cl_thread_destroy(&p_port->mcast_thread);
+ p_port->mcast_thread_init = FALSE;
+ }
KeResetEvent(&p_port->leave_mcast_event);
/* Reset all endpoints so we don't flush our ARP cache. */
@@ -5656,6 +5933,15 @@
&p_port->endpt_mgr.lid_endpts,
p_endpt->dlid, &p_endpt->lid_item );
CL_ASSERT( p_qitem == &p_endpt->lid_item );
}
+ /* Add the endpoint to the multicast endpoints list */
+ if ( p_port->p_adapter->params.mc_garbage_collector )
+ {
+ p_endpt->is_mcast_endpoint = TRUE;
+ p_endpt->mcast_count = 0;
+ p_endpt->mcast_send_timestamp =
cl_get_time_stamp_sec();
+ }
+ else
+ p_endpt->is_mcast_endpoint = FALSE;
cl_obj_unlock( &p_port->obj );
/* Try to send all pending sends. */
@@ -5712,6 +5998,87 @@
IPOIB_EXIT( IPOIB_DBG_MCAST );
}
+static void __port_do_mcast_garbage(ipoib_port_t *p_port)
+{
+ const mac_addr_t DEFAULT_MCAST_GROUP = {0x01, 0x00, 0x5e, 0x00,
0x00, 0x01};
+ /* Do garbage collecting... */
+ cl_map_item_t *p_item;
+ ipoib_endpt_t *p_endpt;
+ cl_qlist_t destroy_mc_list;
+ uint8_t cnt;
+ const static GC_MAX_LEAVE_NUM = 80;
+ const uint32_t CURRENT_TIME_SEC = cl_get_time_stamp_sec();
+ cl_qlist_init( &destroy_mc_list );
+ cl_obj_lock( &p_port->obj );
+ cnt = 0;
+ p_item = cl_qmap_head( &p_port->endpt_mgr.mac_endpts );
+ while( (p_item != cl_qmap_end( &p_port->endpt_mgr.mac_endpts
)) && (cnt < GC_MAX_LEAVE_NUM))
+ {
+ p_endpt = PARENT_STRUCT( p_item, ipoib_endpt_t,
mac_item );
+ p_item = cl_qmap_next( p_item );
+
+ /* Check if the current endpoint is an old
multicast item */
+
+ if( p_endpt->h_mcast &&
+ p_endpt->is_mcast_endpoint &&
+ (!p_endpt->is_mcast_listener) &&
+ ( cl_memcmp( &p_endpt->mac,
&DEFAULT_MCAST_GROUP, sizeof(mac_addr_t) ) &&
+ CURRENT_TIME_SEC -
p_endpt->mcast_send_timestamp >
p_port->p_adapter->params.mc_aging_time))
+ {
+ cl_qmap_remove_item(
&p_port->endpt_mgr.mac_endpts,
+ &p_endpt->mac_item );
+ cl_fmap_remove_item(
&p_port->endpt_mgr.gid_endpts,
+ &p_endpt->gid_item );
+
+ if( p_endpt->dlid )
+ {
+ cl_qmap_remove_item(
&p_port->endpt_mgr.lid_endpts,
+
&p_endpt->lid_item );
+ p_endpt->dlid = 0;
+ }
+
+ cl_qlist_insert_tail(
+ &destroy_mc_list,
&p_endpt->mac_item.pool_item.list_item );
+ cnt++;
+ }
+ }
+ cl_obj_unlock( &p_port->obj );
+
+ /* Destroy all multicast endpoints now that we have released
the lock. */
+ while( cl_qlist_count( &destroy_mc_list ) )
+ {
+ p_endpt = PARENT_STRUCT( cl_qlist_head(
&destroy_mc_list ),
+
ipoib_endpt_t, mac_item.pool_item.list_item );
+ IPOIB_PRINT( TRACE_LEVEL_INFORMATION,
IPOIB_DBG_ENDPT,
+ ("mcast garbage collector: destroying
endpoint %02x:%02x:%02x:%02x:%02x:%02x \n",
+ p_endpt->mac.addr[0],
+ p_endpt->mac.addr[1],
+ p_endpt->mac.addr[2],
+ p_endpt->mac.addr[3],
+ p_endpt->mac.addr[4],
+ p_endpt->mac.addr[5]) );
+
+ cl_obj_destroy( &PARENT_STRUCT(
cl_qlist_remove_head( &destroy_mc_list ),
+ ipoib_endpt_t,
mac_item.pool_item.list_item )->obj );
+ }
+}
+
+static void CL_API __port_mcast_garbage_collector
+ (IN void*
context )
+{
+ cl_status_t status;
+ ipoib_port_t *p_port = context;
+ const uint32_t WAIT_US =
p_port->p_adapter->params.mc_leave_rescan * 1000000; /*
cl_event_wait_on use usec */
+ IPOIB_ENTER( IPOIB_DBG_ENDPT );
+
+ CL_ASSERT( p_port->p_adapter->params.mc_garbage_collector );
+
+ while((status =
cl_event_wait_on(&p_port->mcast_event,WAIT_US,FALSE)) != STATUS_SUCCESS)
+ {
+ __port_do_mcast_garbage(p_port);
+ }
+ IPOIB_EXIT( IPOIB_DBG_ENDPT );
+}
Index: ulp/ipoib/kernel/ipoib_port.h
===================================================================
--- ulp/ipoib/kernel/ipoib_port.h (revision 1261)
+++ ulp/ipoib/kernel/ipoib_port.h (working copy)
@@ -63,6 +63,7 @@
*/
#define IPOIB_USE_DMA 1
+#define IPOIB_MCAST_TIMESTAMP_THRESHOLD 10000
#define IPOIB_PORT_FROM_PACKET( P ) \
(((ipoib_port_t**)P->MiniportReservedEx)[0])
@@ -506,6 +507,11 @@
atomic32_t
endpt_rdr;
atomic32_t
hdr_idx;
+ boolean_t
mcast_event_init;
+ cl_event_t
mcast_event; /* Multicast garabage collector thread
terminate event */
+
+ boolean_t
mcast_thread_init;
+ cl_thread_t
mcast_thread; /* Multicast garbage collector thread */
uint16_t pkey_index;
ipoib_hdr_t
hdr[1]; /* Must be last! */
Index: ulp/ipoib/kernel/netipoib.inf
===================================================================
--- ulp/ipoib/kernel/netipoib.inf (revision 1261)
+++ ulp/ipoib/kernel/netipoib.inf (working copy)
@@ -126,6 +126,26 @@
HKR, Ndi\Params\PayloadMtu, Min, 0, "60"
HKR, Ndi\Params\PayloadMtu, Max, 0, "2044"
+HKR, Ndi\Params\MCGarbageCollector, ParamDesc, 0, "MC
garbage collector"
+HKR, Ndi\Params\MCGarbageCollector, Type, 0,
"enum"
+HKR, Ndi\Params\MCGarbageCollector, Default, 0, "1"
+HKR, Ndi\Params\MCGarbageCollector, Optional, 0,
"0"
+HKR, Ndi\Params\MCGarbageCollector\enum,"0", 0,
"Disabled"
+HKR, Ndi\Params\MCGarbageCollector\enum,"1", 0,
"Enabled"
+
+HKR, Ndi\Params\MCLeaveRescan, ParamDesc, 0, "MC
leave rescan (sec)"
+HKR, Ndi\Params\MCLeaveRescan, Type, 0,
"dword"
+HKR, Ndi\Params\MCLeaveRescan, Default, 0, "130"
+HKR, Ndi\Params\MCLeaveRescan, Optional, 0,
"0"
+HKR, Ndi\Params\MCLeaveRescan, Min,
0, "1"
+HKR, Ndi\Params\MCLeaveRescan, Max, 0,
"3600"
+
+HKR, Ndi\Params\MCAgingTime, ParamDesc, 0,
"MC aging time (sec)"
+HKR, Ndi\Params\MCAgingTime, Type,
0, "dword"
+HKR, Ndi\Params\MCAgingTime, Default, 0, "260"
+HKR, Ndi\Params\MCAgingTime, Optional,
0, "0"
+HKR, Ndi\Params\MCAgingTime, Min,
0, "1"
+HKR, Ndi\Params\MCAgingTime, Max,
0, "3600"
[IpoibService]
DisplayName = %IpoibServiceDispName%
ServiceType = 1 ;%SERVICE_KERNEL_DRIVER%
Slava Strebkov
SW Engineer
Voltaire
099718750
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080612/640bbde7/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: mcast_garbage_collector.diff
Type: application/octet-stream
Size: 25961 bytes
Desc: mcast_garbage_collector.diff
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080612/640bbde7/attachment.obj>
More information about the ofw
mailing list