[ofw][patch] mcast destroy retry count - parameters

Sean Hefty sean.hefty at intel.com
Tue Jun 17 10:04:54 PDT 2008


We would be better off assuming that SA requests are delayed, rather than
completely lost.  SA timeouts should use an exponential back-off, otherwise a
busy SA continues to get hit too quickly to respond to requests.  (This is a
problem that we've seen in practice using linear timeout values on larger
clusters.)

 

- Sean

 

 

Attached patch allows specifying timeout and retrying count values for multicast
leave operation.

 

Index: core/al/al_common.h

===================================================================

--- core/al/al_common.h (revision 1261)

+++ core/al/al_common.h           (working copy)

@@ -49,6 +49,8 @@

 extern uint32_t              g_ioc_query_retries;

 extern uint32_t              g_ioc_poll_interval;

 

+extern uint32_t             g_mc_destroy_retry_timeout;

+extern uint32_t             g_mc_destroy_retry_count;

 

 /* Wait operations performed in user-mode must be alertable. */

 #ifdef CL_KERNEL

Index: core/al/al_mcast.c

===================================================================

--- core/al/al_mcast.c     (revision 1261)

+++ core/al/al_mcast.c  (working copy)

@@ -96,10 +96,12 @@

 static void

 __free_attach(

            IN                                             al_obj_t
*p_obj );

+/* Mcast destroy timeout and retry count read from registry */

+uint32_t           g_mc_destroy_retry_timeout = 1000;

+uint32_t           g_mc_destroy_retry_count   = 10;

+

 #endif

 

-

-

 ib_api_status_t

 al_join_mcast(

            IN                     const    ib_qp_handle_t FUNC_PTR64
h_qp,

@@ -271,8 +273,14 @@

            sa_mad_data.p_attr = &h_mcast->member_rec;

 

            ref_al_obj( &h_mcast->obj );

-           status = al_send_sa_req(

-                       &h_mcast->sa_dereg_req, h_mcast->port_guid, 500, 0,
&sa_mad_data, 0 );

+          status = 

+#if defined( CL_KERNEL )

+                      al_send_sa_req(

+                      &h_mcast->sa_dereg_req, h_mcast->port_guid,
g_mc_destroy_retry_timeout, g_mc_destroy_retry_count, &sa_mad_data, 0 );

+#else

+          al_send_sa_req(

+          &h_mcast->sa_dereg_req, h_mcast->port_guid, 500, 0, &sa_mad_data, 0
);

+#endif

            if( status != IB_SUCCESS )

                        deref_al_obj( &h_mcast->obj );

 

Index: core/bus/kernel/bus_driver.c

===================================================================

--- core/bus/kernel/bus_driver.c  (revision 1261)

+++ core/bus/kernel/bus_driver.c           (working copy)

@@ -326,7 +326,7 @@

 {

            NTSTATUS
status;

            /* Remember the terminating entry in the table below. */

-           RTL_QUERY_REGISTRY_TABLE           table[10];

+          RTL_QUERY_REGISTRY_TABLE           table[12];

            UNICODE_STRING
param_path;

            UNICODE_STRING
pkeyString;

            UNICODE_STRING
empy_string;

@@ -431,7 +431,19 @@

            table[8].EntryContext = &pkeyString;

            table[8].DefaultType  = REG_SZ;

            table[8].DefaultData  = &empy_string;

-           table[8].DefaultLength = 1024*sizeof(WCHAR);

+          table[9].Flags = RTL_QUERY_REGISTRY_DIRECT;

+          table[9].Name = L"McDestroyRetryCount";

+          table[9].EntryContext = &g_mc_destroy_retry_count;

+          table[9].DefaultType = REG_DWORD;

+          table[9].DefaultData = &g_mc_destroy_retry_count;

+          table[9].DefaultLength = sizeof(ULONG);

+

+          table[10].Flags = RTL_QUERY_REGISTRY_DIRECT;

+          table[10].Name = L"McDestroyRetryTimeout";

+          table[10].EntryContext = &g_mc_destroy_retry_timeout;

+          table[10].DefaultType = REG_DWORD;

+          table[10].DefaultData = &g_mc_destroy_retry_timeout;

+          table[10].DefaultLength = sizeof(ULONG);

            /* Have at it! */

            status = RtlQueryRegistryValues( RTL_REGISTRY_ABSOLUTE, 

                        param_path.Buffer, table, NULL, NULL );

 

Slava Strebkov

SW Engineer

Voltaire

099718750

 

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080617/ebb03fbe/attachment.html>


More information about the ofw mailing list