[ofw][patch] mcast garbage collector
Slava Strebkov
slavas at voltaire.com
Sun Jun 29 06:52:17 PDT 2008
Hi,
Please review mcast garbage collector, which removes old endpoints of
ipoib port. Rescan time can be configured from device property page.
Garbage collector may be disabled from the device property page, it's on
by default. Garbage collector uses implementation of igmp v2 protocol.
Index: ulp/ipoib/kernel/ipoib_adapter.h
===================================================================
--- ulp/ipoib/kernel/ipoib_adapter.h (revision 1302)
+++ ulp/ipoib/kernel/ipoib_adapter.h (working copy)
@@ -74,7 +74,8 @@
uint32_t payload_mtu;
uint32_t xfer_block_size;
mac_addr_t conf_mac;
-
+ boolean_t mc_garbage_collector;
+ uint32_t mc_leave_rescan;
} ipoib_params_t;
/*
* FIELDS
Index: ulp/ipoib/kernel/ipoib_driver.c
===================================================================
--- ulp/ipoib/kernel/ipoib_driver.c (revision 1302)
+++ ulp/ipoib/kernel/ipoib_driver.c (working copy)
@@ -526,6 +526,29 @@
}
p_adapter->params.recv_pool_ratio =
p_param->ParameterData.IntegerData;
+ /* Required: MC garbage collector. */
+ RtlInitUnicodeString( &keyword, L"MCGarbageCollector" );
+ NdisReadConfiguration(
+ &status, &p_param, h_config, &keyword, NdisParameterInteger );
+ if( status != NDIS_STATUS_SUCCESS )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("MC garbage collector parameter is missing.\n") );
+ return status;
+ }
+ p_adapter->params.mc_garbage_collector =
(p_param->ParameterData.IntegerData != 0);
+ /* Optional: MC leave rescan (sec) for the MC garable collector
thread. */
+ RtlInitUnicodeString( &keyword, L"MCLeaveRescan" );
+ NdisReadConfiguration(
+ &status, &p_param, h_config, &keyword,
NdisParameterInteger );
+ if( status != NDIS_STATUS_SUCCESS )
+ {
+ p_adapter->params.mc_leave_rescan = 260;
+ }
+ else
+ {
+ p_adapter->params.mc_leave_rescan =
p_param->ParameterData.IntegerData;
+ }
/* required: MTU size. */
RtlInitUnicodeString( &keyword, L"PayloadMtu" );
NdisReadConfiguration(
Index: ulp/ipoib/kernel/ipoib_endpoint.h
===================================================================
--- ulp/ipoib/kernel/ipoib_endpoint.h (revision 1302)
+++ ulp/ipoib/kernel/ipoib_endpoint.h (working copy)
@@ -61,7 +61,9 @@
TO_LONG_PTR( ib_av_handle_t , h_av) ;
boolean_t expired;
ib_al_ifc_t *p_ifc;
-
+ boolean_t is_in_use;
+ boolean_t is_gc_in_use;
+ boolean_t is_mcast_listener;
} ipoib_endpt_t;
/*
* FIELDS
Index: ulp/ipoib/kernel/ipoib_port.c
===================================================================
--- ulp/ipoib/kernel/ipoib_port.c (revision 1302)
+++ ulp/ipoib/kernel/ipoib_port.c (working copy)
@@ -66,7 +66,7 @@
ipoib_port_t *gp_ipoib_port;
#endif
-
+static void __port_do_mcast_garbage(ipoib_port_t *p_port);
/***********************************************************************
*******
*
* Declarations
@@ -94,6 +94,8 @@
__port_free(
IN cl_obj_t* const
p_obj );
+static void CL_API __port_mcast_garbage_collector
+ (IN void*
context );
/***********************************************************************
*******
*
@@ -290,6 +292,14 @@
IN OUT ipoib_send_desc_t* const
p_desc );
static NDIS_STATUS
+__send_mgr_filter_igmp_v2(
+ IN ipoib_port_t* const
p_port,
+ IN const ip_hdr_t* const
p_ip_hdr,
+ IN size_t
iph_options_size,
+ IN NDIS_BUFFER*
p_buf,
+ IN size_t
buf_len );
+
+static NDIS_STATUS
__send_mgr_filter_udp(
IN ipoib_port_t* const
p_port,
IN const ip_hdr_t* const
p_ip_hdr,
@@ -489,6 +499,13 @@
#endif
}
+/* function returns pointer to payload that is going after IP header.
+* asssuming that payload and IP header are in the same buffer
+*/
+static void* GetIpPayloadPtr(const ip_hdr_t* const p_ip_hdr)
+{
+ return (void*)((uint8_t*)p_ip_hdr + 4*(p_ip_hdr->ver_hl & 0xf));
+}
/***********************************************************************
*******
*
@@ -579,6 +596,11 @@
KeInitializeEvent( &p_port->sa_event, NotificationEvent, TRUE );
KeInitializeEvent( &p_port->leave_mcast_event,
NotificationEvent, TRUE );
+ p_port->mcast_event_init = FALSE;
+ cl_event_construct(&p_port->mcast_event);
+
+ p_port->mcast_thread_init = FALSE;
+ cl_thread_construct(&p_port->mcast_thread);
IPOIB_EXIT( IPOIB_DBG_INIT );
}
@@ -653,6 +675,12 @@
return status;
}
+ if (p_port->p_adapter->params.mc_garbage_collector)
+ {
+ /* Initialize multicast garbage collector event */
+ KeInitializeEvent( &p_port->mcast_event,
NotificationEvent, FALSE );
+ p_port->mcast_event_init = TRUE;
+ }
/* We only ever destroy from the PnP callback thread. */
cl_status = cl_obj_init( &p_port->obj, CL_DESTROY_SYNC,
__port_destroying, __port_cleanup, __port_free );
@@ -746,7 +774,19 @@
CL_ASSERT( p_obj );
p_port = PARENT_STRUCT( p_obj, ipoib_port_t, obj );
+ if (p_port->p_adapter->params.mc_garbage_collector)
+ {
+ /* Destroy multicast garbage collector thread */
+ if(p_port->mcast_thread_init)
+ {
+ CL_ASSERT(p_port->mcast_event_init);
+ KeSetEvent( &p_port->mcast_event, 0, FALSE );
+ cl_thread_destroy(&p_port->mcast_thread);
+ p_port->mcast_thread_init = FALSE;
+ }
+ }
+
__endpt_mgr_destroy( p_port );
__recv_mgr_destroy( p_port );
__send_mgr_destroy( p_port );
@@ -2131,6 +2171,11 @@
p_eth->hdr.src = p_src->mac;
p_eth->hdr.dst = p_dst->mac;
+ if ( p_dst->is_gc_in_use)
+ {
+ CL_ASSERT(p_dst->h_mcast != NULL);
+ p_dst->is_in_use = TRUE;
+ }
IPOIB_EXIT( IPOIB_DBG_RECV );
return IB_SUCCESS;
}
@@ -3129,7 +3174,132 @@
return status;
}
+static NDIS_STATUS
+__send_mgr_filter_igmp_v2(
+ IN ipoib_port_t* const
p_port,
+ IN const ip_hdr_t* const
p_ip_hdr,
+ IN size_t
iph_options_size,
+ IN NDIS_BUFFER*
p_buf,
+ IN size_t
buf_len )
+{
+ igmp_v2_hdr_t *p_igmp_v2_hdr = NULL;
+ NDIS_STATUS endpt_status;
+ ipoib_endpt_t* p_endpt = NULL;
+ mac_addr_t fake_mcast_mac;
+ IPOIB_ENTER( IPOIB_DBG_SEND );
+
+ if( !buf_len )
+ {
+ // To get the IGMP packet we need to skip the ip options
NDIS_BUFFER (if exists)
+ while ( iph_options_size )
+ {
+ NdisGetNextBuffer( p_buf, &p_buf );
+ if( !p_buf )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
+ ("Failed to get IGMPv2 header
buffer.\n") );
+ return NDIS_STATUS_FAILURE;
+ }
+ NdisQueryBufferSafe( p_buf, &p_igmp_v2_hdr,
&buf_len, NormalPagePriority );
+ if( !p_igmp_v2_hdr )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
+ ("Failed to query IGMPv2 header
buffer.\n") );
+ return NDIS_STATUS_FAILURE;
+ }
+
+ iph_options_size-=buf_len;
+ }
+
+ NdisGetNextBuffer( p_buf, &p_buf );
+ if( !p_buf )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
+ ("Failed to get IGMPv2 header
buffer.\n") );
+ return NDIS_STATUS_FAILURE;
+ }
+ NdisQueryBufferSafe( p_buf, &p_igmp_v2_hdr, &buf_len,
NormalPagePriority );
+ if( !p_igmp_v2_hdr )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
+ ("Failed to query IGMPv2 header
buffer.\n") );
+ return NDIS_STATUS_FAILURE;
+ }
+ }
+ else
+ {
+ /* assuming ip header and options are in the same packet
*/
+ p_igmp_v2_hdr = GetIpPayloadPtr(p_ip_hdr);
+ }
+ /* Get the IGMP header length. */
+ if( buf_len < sizeof(igmp_v2_hdr_t) )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Buffer not large enough for IGMPv2 packet.\n")
);
+ return NDIS_STATUS_BUFFER_TOO_SHORT;
+ }
+
+ // build fake mac from igmp packet group address
+ fake_mcast_mac.addr[0] = 1;
+ fake_mcast_mac.addr[1] = ((unsigned
char*)&p_igmp_v2_hdr->group_address)[0] & 0x0f;
+ fake_mcast_mac.addr[2] = 0x5E;
+ fake_mcast_mac.addr[3] = ((unsigned
char*)&p_igmp_v2_hdr->group_address)[1];
+ fake_mcast_mac.addr[4] = ((unsigned
char*)&p_igmp_v2_hdr->group_address)[2];
+ fake_mcast_mac.addr[5] = ((unsigned
char*)&p_igmp_v2_hdr->group_address)[3];
+
+ switch ( p_igmp_v2_hdr->type )
+ {
+ case IGMP_V2_MEMBERSHIP_REPORT:
+ /*
+ This mean that some body open listener on this
group
+ Change type of mcast endpt to SEND_RECV endpt. So
mcast garbage collector
+ will not delete this mcast endpt.
+ */
+ IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,
+ ("Catched IGMP_V2_MEMBERSHIP_REPORT message\n")
);
+ endpt_status = __endpt_mgr_ref( p_port, fake_mcast_mac,
&p_endpt );
+ if ( p_endpt )
+ {
+ cl_obj_lock( &p_port->obj );
+ p_endpt->is_mcast_listener = TRUE;
+ cl_obj_unlock( &p_port->obj );
+ ipoib_endpt_deref( p_endpt );
+ }
+ break;
+
+ case IGMP_V2_LEAVE_GROUP:
+ /*
+ This mean that somebody CLOSE listener on this
group .
+ Change type of mcast endpt to SEND_ONLY endpt. So
mcast
+ garbage collector will delete this mcast endpt
next time.
+ */
+ IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,
+ ("Catched IGMP_V2_LEAVE_GROUP message\n")
);
+ endpt_status = __endpt_mgr_ref( p_port, fake_mcast_mac,
&p_endpt );
+ if ( p_endpt )
+ {
+ cl_obj_lock( &p_port->obj );
+ p_endpt->is_mcast_listener = FALSE;
+ p_endpt->is_in_use = FALSE;
+ cl_obj_unlock( &p_port->obj );
+ ipoib_endpt_deref( p_endpt );
+ }
+
+ __port_do_mcast_garbage(p_port);
+
+ break;
+
+ default:
+ IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,
+ ("Send Unknown IGMP message: 0x%x \n",
p_igmp_v2_hdr->type ) );
+ break;
+ }
+
+ IPOIB_EXIT( IPOIB_DBG_SEND );
+ return NDIS_STATUS_SUCCESS;
+}
+
static NDIS_STATUS
__send_mgr_filter_udp(
IN ipoib_port_t* const
p_port,
@@ -3577,7 +3747,18 @@
return NDIS_STATUS_PENDING;
}
}
+ else if ( p_port->p_adapter->params.mc_garbage_collector &&
+ status == NDIS_STATUS_SUCCESS &&
+ ETH_IS_MULTICAST( p_eth_hdr->dst.addr ) &&
+ !ETH_IS_BROADCAST( p_eth_hdr->dst.addr ) )
+ {
+ CL_ASSERT( (*pp_endpt) );
+ CL_ASSERT((*pp_endpt)->h_mcast != NULL);
+ CL_ASSERT((*pp_endpt)->is_gc_in_use);
+ (*pp_endpt)->is_in_use = TRUE;
+ }
+
IPOIB_EXIT( IPOIB_DBG_SEND );
return status;
}
@@ -4706,6 +4887,8 @@
ib_query_req_t query;
ib_user_query_t info;
ib_portinfo_record_t port_rec;
+ cl_status_t cl_status;
+ BOOLEAN success = TRUE;
IPOIB_ENTER( IPOIB_DBG_INIT );
@@ -4744,15 +4927,41 @@
p_port->p_adapter->h_al, &query, &p_port->ib_mgr.h_query
);
if( status != IB_SUCCESS )
{
- KeSetEvent( &p_port->sa_event, EVENT_INCREMENT, FALSE );
- ipoib_set_inactive( p_port->p_adapter );
- ipoib_port_deref( p_port, ref_port_up );
+ success = FALSE;
IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
("ib_query returned %s\n",
p_port->p_adapter->p_ifc->get_err_str( status ))
);
- return;
- }
+ goto port_up_end;
+ }
+ /* garbage collector is needed when link is active */
+ if (p_port->p_adapter->params.mc_garbage_collector)
+ {
+ CL_ASSERT(p_port->mcast_event_init);
+ KeClearEvent( &p_port->mcast_event );
+
+ cl_status = cl_thread_init(
+ &p_port->mcast_thread,
+ __port_mcast_garbage_collector,
+ p_port,
+ "mcast_garbage");
+ if( cl_status != CL_SUCCESS )
+ {
+ success = FALSE;
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("cl_thread_init returned %#x\n", cl_status) );
+ goto port_up_end;
+ }
+ p_port->mcast_thread_init = TRUE;
+ }
+port_up_end:
+ if (!success)
+ {
+ KeSetEvent( &p_port->sa_event, EVENT_INCREMENT, FALSE );
+ ipoib_set_inactive( p_port->p_adapter );
+ ipoib_port_deref( p_port, ref_port_up );
+ }
+
IPOIB_EXIT( IPOIB_DBG_INIT );
}
@@ -5229,6 +5438,16 @@
return;
}
+ /* Destroy multicast garbage collector thread */
+ /* garbage collector is not needed when link is down */
+ if(p_port->p_adapter->params.mc_garbage_collector &&
p_port->mcast_thread_init)
+ {
+ CL_ASSERT(p_port->mcast_event_init);
+ KeSetEvent( &p_port->mcast_event, 0, FALSE );
+
+ cl_thread_destroy(&p_port->mcast_thread);
+ p_port->mcast_thread_init = FALSE;
+ }
KeResetEvent(&p_port->leave_mcast_event);
/* Reset all endpoints so we don't flush our ARP cache. */
@@ -5656,6 +5875,17 @@
&p_port->endpt_mgr.lid_endpts, p_endpt->dlid,
&p_endpt->lid_item );
CL_ASSERT( p_qitem == &p_endpt->lid_item );
}
+ /* Add the endpoint to the multicast endpoints list */
+ if ( p_port->p_adapter->params.mc_garbage_collector )
+ {
+ /* set flag that garbage collector is enabled */
+ p_endpt->is_gc_in_use = TRUE;
+ p_endpt->is_in_use = TRUE;
+ }
+ else
+ {
+ p_endpt->is_gc_in_use = FALSE;
+ }
cl_obj_unlock( &p_port->obj );
/* Try to send all pending sends. */
@@ -5712,6 +5942,93 @@
IPOIB_EXIT( IPOIB_DBG_MCAST );
}
+static void __port_do_mcast_garbage(ipoib_port_t *p_port)
+{
+ const mac_addr_t DEFAULT_MCAST_GROUP = {0x01, 0x00, 0x5e, 0x00,
0x00, 0x01};
+ /* Do garbage collecting... */
+ cl_map_item_t *p_item;
+ ipoib_endpt_t *p_endpt;
+ cl_qlist_t destroy_mc_list;
+ uint8_t cnt;
+ const static GC_MAX_LEAVE_NUM = 80;
+ cl_qlist_init( &destroy_mc_list );
+ cl_obj_lock( &p_port->obj );
+ cnt = 0;
+ p_item = cl_qmap_head( &p_port->endpt_mgr.mac_endpts );
+ while( (p_item != cl_qmap_end( &p_port->endpt_mgr.mac_endpts ))
&& (cnt < GC_MAX_LEAVE_NUM))
+ {
+ p_endpt = PARENT_STRUCT( p_item, ipoib_endpt_t, mac_item
);
+ p_item = cl_qmap_next( p_item );
+
+ /* Check if the current endpoint is not a multicast
listener */
+
+ if( p_endpt->h_mcast &&
+ p_endpt->is_gc_in_use &&
+ (!p_endpt->is_mcast_listener) &&
+ ( cl_memcmp( &p_endpt->mac,
&DEFAULT_MCAST_GROUP, sizeof(mac_addr_t) ) &&
+ (!p_endpt->is_in_use) ))
+ {
+ cl_qmap_remove_item(
&p_port->endpt_mgr.mac_endpts,
+ &p_endpt->mac_item );
+ cl_fmap_remove_item(
&p_port->endpt_mgr.gid_endpts,
+ &p_endpt->gid_item );
+
+ if( p_endpt->dlid )
+ {
+ cl_qmap_remove_item(
&p_port->endpt_mgr.lid_endpts,
+ &p_endpt->lid_item );
+ p_endpt->dlid = 0;
+ }
+
+ cl_qlist_insert_tail(
+ &destroy_mc_list,
&p_endpt->mac_item.pool_item.list_item );
+ cnt++;
+ }
+ else
+ p_endpt->is_in_use = FALSE;
+ }
+ cl_obj_unlock( &p_port->obj );
+
+ /* Destroy all multicast endpoints now that we have released the
lock. */
+ while( cl_qlist_count( &destroy_mc_list ) )
+ {
+ p_endpt = PARENT_STRUCT( cl_qlist_head( &destroy_mc_list
),
+
ipoib_endpt_t, mac_item.pool_item.list_item );
+ IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_ENDPT,
+ ("mcast garbage collector: destroying endpoint
%02x:%02x:%02x:%02x:%02x:%02x \n",
+ p_endpt->mac.addr[0],
+ p_endpt->mac.addr[1],
+ p_endpt->mac.addr[2],
+ p_endpt->mac.addr[3],
+ p_endpt->mac.addr[4],
+ p_endpt->mac.addr[5]) );
+
+ cl_obj_destroy( &PARENT_STRUCT( cl_qlist_remove_head(
&destroy_mc_list ),
+ ipoib_endpt_t, mac_item.pool_item.list_item
)->obj );
+ }
+}
+
+static void CL_API __port_mcast_garbage_collector
+ (IN void*
context )
+{
+ cl_status_t status;
+ LARGE_INTEGER wait;
+ ipoib_port_t *p_port = context;
+ IPOIB_ENTER( IPOIB_DBG_ENDPT );
+
+ CL_ASSERT( p_port->p_adapter->params.mc_garbage_collector );
+
+ wait.QuadPart =
-(int64_t)(((uint64_t)p_port->p_adapter->params.mc_leave_rescan *
1000000) * 10);
+ status = KeWaitForSingleObject( &p_port->mcast_event, Executive,
KernelMode,
+ FALSE, &wait );
+
+ while((status = KeWaitForSingleObject( &p_port->mcast_event,
Executive, KernelMode,
+
FALSE, &wait )) != STATUS_SUCCESS)
+ {
+ __port_do_mcast_garbage(p_port);
+ }
+ IPOIB_EXIT( IPOIB_DBG_ENDPT );
+}
Index: ulp/ipoib/kernel/ipoib_port.h
===================================================================
--- ulp/ipoib/kernel/ipoib_port.h (revision 1302)
+++ ulp/ipoib/kernel/ipoib_port.h (working copy)
@@ -506,6 +506,11 @@
atomic32_t endpt_rdr;
atomic32_t hdr_idx;
+ boolean_t mcast_event_init;
+ KEVENT mcast_event;
/* Multicast garabage collector thread terminate event */
+
+ boolean_t mcast_thread_init;
+ cl_thread_t mcast_thread;
/* Multicast garbage collector thread */
uint16_t pkey_index;
ipoib_hdr_t hdr[1]; /* Must be last!
*/
Index: ulp/ipoib/kernel/netipoib.inf
===================================================================
--- ulp/ipoib/kernel/netipoib.inf (revision 1302)
+++ ulp/ipoib/kernel/netipoib.inf (working copy)
@@ -126,6 +126,19 @@
HKR, Ndi\Params\PayloadMtu, Min, 0, "60"
HKR, Ndi\Params\PayloadMtu, Max, 0, "2044"
+HKR, Ndi\Params\MCGarbageCollector, ParamDesc, 0, "MC garbage
collector"
+HKR, Ndi\Params\MCGarbageCollector, Type, 0, "enum"
+HKR, Ndi\Params\MCGarbageCollector, Default, 0, "1"
+HKR, Ndi\Params\MCGarbageCollector, Optional, 0, "0"
+HKR, Ndi\Params\MCGarbageCollector\enum,"0", 0, "Disabled"
+HKR, Ndi\Params\MCGarbageCollector\enum,"1", 0, "Enabled"
+
+HKR, Ndi\Params\MCLeaveRescan, ParamDesc, 0, "MC leave
rescan (sec)"
+HKR, Ndi\Params\MCLeaveRescan, Type, 0, "dword"
+HKR, Ndi\Params\MCLeaveRescan, Default, 0, "260"
+HKR, Ndi\Params\MCLeaveRescan, Optional, 0, "0"
+HKR, Ndi\Params\MCLeaveRescan, Min, 0, "1"
+HKR, Ndi\Params\MCLeaveRescan, Max, 0, "3600"
[IpoibService]
DisplayName = %IpoibServiceDispName%
ServiceType = 1 ;%SERVICE_KERNEL_DRIVER%
-------------- next part --------------
A non-text attachment was scrubbed...
Name: mcast_garbage_collector.diff
Type: application/octet-stream
Size: 17780 bytes
Desc: mcast_garbage_collector.diff
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080629/69fec8b6/attachment.obj>
More information about the ofw
mailing list