[ofw][patch] mcast garbage collector
Sean Hefty
sean.hefty at intel.com
Thu Jun 12 10:45:38 PDT 2008
Index: core/al/al_common.h
===================================================================
--- core/al/al_common.h (revision 1261)
+++ core/al/al_common.h (working copy)
@@ -49,6 +49,8 @@
extern uint32_t g_ioc_query_retries;
extern uint32_t g_ioc_poll_interval;
+extern uint32_t g_mc_destr_retr_timeout;
+extern uint32_t g_mc_dest_retr_count;
Why the extra 'r' in the first name? Any reason not to add the 'oy' and 'y' to
these names to clarify their use?
/* Wait operations performed in user-mode must be alertable. */
#ifdef CL_KERNEL
Index: core/al/al_mcast.c
===================================================================
--- core/al/al_mcast.c (revision 1261)
+++ core/al/al_mcast.c (working copy)
@@ -96,6 +96,9 @@
static void
__free_attach(
IN al_obj_t
*p_obj );
+/* Mcast destroy timeout and retry count read from registry */
+uint32_t g_mc_destr_retr_timeout = 1000;
+uint32_t g_mc_dest_retr_count = 10;
#endif
I don't understand why ibal is changing for ipoib multicast handling.
@@ -271,8 +274,14 @@
sa_mad_data.p_attr = &h_mcast->member_rec;
ref_al_obj( &h_mcast->obj );
- status = al_send_sa_req(
- &h_mcast->sa_dereg_req, h_mcast->port_guid, 500, 0,
&sa_mad_data, 0 );
+ status =
+#if defined( CL_KERNEL )
+ al_send_sa_req(
+ &h_mcast->sa_dereg_req, h_mcast->port_guid,
g_mc_destr_retr_timeout, g_mc_dest_retr_count, &sa_mad_data, 0 );
+#else
+ al_send_sa_req(
+ &h_mcast->sa_dereg_req, h_mcast->port_guid, 500, 0, &sa_mad_data, 0
);
+#endif
This is separate from this patch, but can someone explain this part to me? The
only way joining multicast groups can work is if it's done in a single location,
like the kernel. Does al_send_sa_req() drop to the kernel and get filtered?
if( status != IB_SUCCESS )
deref_al_obj( &h_mcast->obj );
Index: core/bus/kernel/bus_driver.c
===================================================================
--- core/bus/kernel/bus_driver.c (revision 1261)
+++ core/bus/kernel/bus_driver.c (working copy)
@@ -326,7 +326,7 @@
{
NTSTATUS
status;
/* Remember the terminating entry in the table below. */
- RTL_QUERY_REGISTRY_TABLE table[10];
+ RTL_QUERY_REGISTRY_TABLE table[12];
UNICODE_STRING
param_path;
UNICODE_STRING
pkeyString;
UNICODE_STRING
empy_string;
@@ -431,7 +431,19 @@
table[8].EntryContext = &pkeyString;
table[8].DefaultType = REG_SZ;
table[8].DefaultData = &empy_string;
- table[8].DefaultLength = 1024*sizeof(WCHAR);
+ table[9].Flags = RTL_QUERY_REGISTRY_DIRECT;
+ table[9].Name = L"McDestrRetrCnt";
+ table[9].EntryContext = &g_mc_dest_retr_count;
+ table[9].DefaultType = REG_DWORD;
+ table[9].DefaultData = &g_mc_dest_retr_count;
+ table[9].DefaultLength = sizeof(ULONG);
+
+ table[10].Flags = RTL_QUERY_REGISTRY_DIRECT;
+ table[10].Name = L"McDestrRetrTimeout";
Registry names should be explicit and spelled out. This makes me think of
McDonald's. :-)
+ table[10].EntryContext = &g_mc_destr_retr_timeout;
+ table[10].DefaultType = REG_DWORD;
+ table[10].DefaultData = &g_mc_destr_retr_timeout;
+ table[10].DefaultLength = sizeof(ULONG);
/* Have at it! */
status = RtlQueryRegistryValues( RTL_REGISTRY_ABSOLUTE,
param_path.Buffer, table, NULL, NULL );
Index: inc/complib/cl_types.h
===================================================================
--- inc/complib/cl_types.h (revision 1261)
+++ inc/complib/cl_types.h (working copy)
@@ -46,7 +46,7 @@
#include <complib/cl_types_osd.h>
-
+typedef uint8_t net8_t;
There's no need for this. A byte is a byte. Adding this would require changes
throughout the code base to mark which uint8_t are expected to hit the wire,
versus those that do not.
typedef uint16_t net16_t;
typedef uint32_t net32_t;
typedef uint64_t net64_t;
Index: inc/kernel/ip_packet.h
===================================================================
--- inc/kernel/ip_packet.h (revision 1261)
+++ inc/kernel/ip_packet.h (working copy)
@@ -196,6 +196,7 @@
#define IP_PROT_IP 4
#define IP_PROT_TCP 6
#define IP_PROT_UDP 17
+#define IP_PROT_IGMP 2
#include <complib/cl_packon.h>
@@ -355,7 +356,58 @@
*********/
#include <complib/cl_packoff.h>
+#define IGMP_V2_MEMBERSHIP_QUERY 0x11
+#define IGMP_V2_MEMBERSHIP_REPORT 0x16
+#define IGMP_V1_MEMBERSHIP_REPORT 0x12 // for backward compatibility
with IGMPv1
+#define IGMP_V2_LEAVE_GROUP 0x17
+
+#include <complib/cl_packon.h>
+/****s* IB Network Drivers/igmp__v2_hdr_t
+* NAME
+* igmp_v2_hdr_t
+*
+* DESCRIPTION
+* Defines the IGMPv2 header for IP packets.
+*
+* SYNOPSIS
+*/
+typedef struct _igmp_v2_hdr
+{
+ net8_t type;
+ net8_t max_resp_time;
+ net16_t chksum;
+ net32_t group_address;
+} PACK_SUFFIX igmp_v2_hdr_t;
+/*
+* FIELDS
+* type
+* type of IGMPv2 message: query/report/leave
+*
+* max_resp_time
+* The Max Response Time field is meaningful only in
Membership Query
+* messages, and specifies the maximum allowed time before
sending a
+* responding report in units of 1/10 second. In all other
messages, it
+* is set to zero by the sender and ignored by receivers.
+*
+* checksum
+* The checksum is the 16-bit one's complement of the one's
complement
+* sum of the whole IGMP message (the entire IP payload).
+*
+* group_address
+* In a Membership Query message, the group address field is
set to zero
+* when sending a General Query, and set to the group address being
+* queried when sending a Group-Specific Query.
+*
+* In a Membership Report or Leave Group message, the group address
+* field holds the IP multicast group address of the group being
+* reported or left.
+*
+* SEE ALSO
+* IB Network Drivers, eth_hdr_t, arp_pkt_t, ip_hdr_t, tcp_hdr_t
+*********/
+#include <complib/cl_packoff.h>
+
#define DHCP_PORT_SERVER CL_HTON16(67)
#define DHCP_PORT_CLIENT CL_HTON16(68)
Index: ulp/ipoib/kernel/ipoib_adapter.h
===================================================================
--- ulp/ipoib/kernel/ipoib_adapter.h (revision 1261)
+++ ulp/ipoib/kernel/ipoib_adapter.h (working copy)
@@ -74,6 +74,9 @@
uint32_t payload_mtu;
uint32_t xfer_block_size;
mac_addr_t conf_mac;
+ boolean_t mc_garbage_collector;
+ uint32_t mc_leave_rescan;
+ uint32_t mc_aging_time;
} ipoib_params_t;
/*
Index: ulp/ipoib/kernel/ipoib_driver.c
===================================================================
--- ulp/ipoib/kernel/ipoib_driver.c (revision 1261)
+++ ulp/ipoib/kernel/ipoib_driver.c (working copy)
@@ -526,6 +526,38 @@
}
p_adapter->params.recv_pool_ratio =
p_param->ParameterData.IntegerData;
+ /* Required: MC garbage collector. */
+ RtlInitUnicodeString( &keyword, L"MCGarbageCollector" );
+ NdisReadConfiguration(
+ &status, &p_param, h_config, &keyword, NdisParameterInteger );
+ if( status != NDIS_STATUS_SUCCESS )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("MC garbage collector parameter is missing.\n") );
+ return status;
+ }
Why make this required? Just default to true or false.
+ p_adapter->params.mc_garbage_collector =
(p_param->ParameterData.IntegerData != 0);
+ /* Optional: MC leave rescan (sec) for the MC garable collector
thread. */
+ RtlInitUnicodeString( &keyword, L"MCLeaveRescan" );
It's not obvious to me from the name what this parameter does exactly. We
should try to limit parameters that the user has to configure.
+ NdisReadConfiguration(
+ &status, &p_param, h_config, &keyword,
NdisParameterInteger );
+ if( status != NDIS_STATUS_SUCCESS )
+ {
+ p_adapter->params.mc_leave_rescan = 130;
+ }
+ else
+ p_adapter->params.mc_leave_rescan =
p_param->ParameterData.IntegerData;
+
+ /* Optional: MC aging time (sec) */
+ RtlInitUnicodeString( &keyword, L"MCAgingTime" );
Same here. I would rather see a single parameter that indicated how long a
multicast group can be idle before it's removed. The underlying implementation
can use this to determine when to scan for idle groups.
+ NdisReadConfiguration(
+ &status, &p_param, h_config, &keyword,
NdisParameterInteger );
+ if( status != NDIS_STATUS_SUCCESS )
+ {
+ p_adapter->params.mc_aging_time = 260;
+ }
+ else
+ p_adapter->params.mc_aging_time =
p_param->ParameterData.IntegerData;
/* required: MTU size. */
RtlInitUnicodeString( &keyword, L"PayloadMtu" );
NdisReadConfiguration(
Index: ulp/ipoib/kernel/ipoib_endpoint.h
===================================================================
--- ulp/ipoib/kernel/ipoib_endpoint.h (revision 1261)
+++ ulp/ipoib/kernel/ipoib_endpoint.h (working copy)
@@ -61,7 +61,10 @@
TO_LONG_PTR( ib_av_handle_t , h_av) ;
boolean_t expired;
ib_al_ifc_t *p_ifc;
-
+ uint32_t mcast_send_timestamp;
+ int32_t mcast_count;
+ boolean_t is_mcast_endpoint;
+ boolean_t
is_mcast_listener;
} ipoib_endpt_t;
/*
* FIELDS
Index: ulp/ipoib/kernel/ipoib_port.c
===================================================================
--- ulp/ipoib/kernel/ipoib_port.c (revision 1261)
+++ ulp/ipoib/kernel/ipoib_port.c (working copy)
@@ -66,7 +66,7 @@
ipoib_port_t *gp_ipoib_port;
#endif
-
+static void __port_do_mcast_garbage(ipoib_port_t *p_port);
/******************************************************************************
*
* Declarations
@@ -94,6 +94,8 @@
__port_free(
IN cl_obj_t* const
p_obj );
+static void CL_API __port_mcast_garbage_collector
+ (IN void*
context );
/******************************************************************************
*
@@ -290,6 +292,14 @@
IN OUT ipoib_send_desc_t* const
p_desc );
static NDIS_STATUS
+__send_mgr_filter_igmp_v2(
+ IN ipoib_port_t* const
p_port,
+ IN const ip_hdr_t* const
p_ip_hdr,
+ IN size_t
iph_options_size,
+ IN NDIS_BUFFER*
p_buf,
+ IN size_t
buf_len );
+
Unclear how this relates to garbage collection.
+static NDIS_STATUS
__send_mgr_filter_udp(
IN ipoib_port_t* const
p_port,
IN const ip_hdr_t* const
p_ip_hdr,
@@ -579,6 +589,11 @@
KeInitializeEvent( &p_port->sa_event, NotificationEvent, TRUE );
KeInitializeEvent( &p_port->leave_mcast_event, NotificationEvent,
TRUE );
+ p_port->mcast_event_init = FALSE;
+ cl_event_construct(&p_port->mcast_event);
+
+ p_port->mcast_thread_init = FALSE;
+ cl_thread_construct(&p_port->mcast_thread);
IPOIB_EXIT( IPOIB_DBG_INIT );
}
@@ -653,6 +668,18 @@
return status;
}
+ if (p_port->p_adapter->params.mc_garbage_collector)
+ {
+ /* Initialize multicast garbage collector event */
+ cl_status = cl_event_init(&p_port->mcast_event, TRUE);
+ if( cl_status != CL_SUCCESS )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("cl_event_init returned %#x\n", cl_status) );
+ return IB_ERROR;
+ }
+ p_port->mcast_event_init = TRUE;
+ }
/* We only ever destroy from the PnP callback thread. */
cl_status = cl_obj_init( &p_port->obj, CL_DESTROY_SYNC,
__port_destroying, __port_cleanup, __port_free );
@@ -746,7 +773,26 @@
CL_ASSERT( p_obj );
p_port = PARENT_STRUCT( p_obj, ipoib_port_t, obj );
+ if (p_port->p_adapter->params.mc_garbage_collector)
+ {
+ /* Destroy multicast garbage collector thread */
+ if(p_port->mcast_thread_init)
+ {
+ CL_ASSERT(p_port->mcast_event_init);
+ cl_event_signal(&p_port->mcast_event);
+
+ cl_thread_destroy(&p_port->mcast_thread);
+ p_port->mcast_thread_init = FALSE;
+ }
+
+ if (p_port->mcast_event_init)
+ {
+ cl_event_destroy(&p_port->mcast_event);
+ p_port->mcast_event_init = FALSE;
+ }
+ }
Do we have access to some generic thread work queue that we can post work items
to, rather than allocating a new thread for this?
- Sean
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080612/9d94c25a/attachment.html>
More information about the ofw
mailing list