<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<HTML><HEAD>
<META http-equiv=Content-Type content="text/html; charset=us-ascii">
<META content="MSHTML 6.00.2900.3243" name=GENERATOR></HEAD>
<BODY>
<DIV><FONT face=Arial size=2>Summary: Ill-defined mechanism of event
propagation</FONT></DIV>
<DIV> </DIV>
<DIV><FONT face=Arial size=2>Bug description and reproduction:</FONT></DIV>
<DIV> </DIV>
<DIV><FONT face=Arial size=2>1. Connect to machines (A and B) via IB
switch<BR>2. Run subnet manager (say, opensm) on B<BR>3. Kill opensm and clear
arp tables<BR>4. Rerun opensm - ping will not longer work<BR>5. That's because
new opensm instance will clear old multicast groups, and side A will be not
aware about opensm restart and will not request to join new MCAST
group</FONT></DIV>
<DIV> </DIV><FONT face=Arial size=2>
<DIV><BR>Explanations:</DIV>
<DIV> </DIV>
<DIV>There are 2 types of events relevant in our case: PnP and AE.</DIV>
<DIV> </DIV>
<DIV>The problem had happened due to:</DIV>
<DIV> </DIV>
<DIV>1. During opensm restart, port will generate AE event: IB_EVENT_LID_CHANGE
or (in other cases) IB_EVENT_CLIENT_REREGISTER<BR>These events will be generated
even in the case when SM was restarted, but LID has not been not actually
changed.</DIV>
<DIV> </DIV>
<DIV><BR>2. All PnP events were handled properly; but these events were mapped
to IB_AE_FATAL<BR>This patch fixes it and maps IB_EVENT_* events to appropriate
IB_AE_* events and then to IB_PNP_* events</DIV>
<DIV> </DIV>
<DIV><BR>3. function force_smi_poll() will now update it's subscribers about LID
change event iff LID was changed.<BR>So, we still have the problem when opensm
was restarted and no one of the port attributes was changed<BR>This patch
generated appropriate IB_PNP event to resolve this issue.</DIV>
<DIV> </DIV>
<DIV>Signed off by: xalex (<A
href="mailto:xalex@mellanox.co.il">xalex@mellanox.co.il</A>)</DIV>
<DIV></FONT> </DIV>
<DIV><FONT face=Arial size=2></FONT> </DIV>
<DIV><FONT face=Arial size=2>Index:
core/al/al_ci_ca.h<BR>===================================================================<BR>---
core/al/al_ci_ca.h (revision 2195)<BR>+++ core/al/al_ci_ca.h (working
copy)<BR>@@ -65,6 +65,11
@@<BR> IN const ib_ca_handle_t h_ca
);<BR> #endif<BR> <BR>+#define
MAX_AE 32<BR>+typedef struct _al_ae_info
{<BR>+ ib_pnp_event_t pnp_event;<BR>+ uint8_t port_index;<BR>+}
al_ae_info_t;<BR> <BR> <BR> typedef struct _al_ci_ca<BR>@@ -106,6
+111,12 @@<BR> /* "end of PnP handling" event
*/<BR> cl_event_t event;<BR> <BR>+ /*
Array of pending AEs (Asynchronic events)
*/<BR>+ al_ae_info_t ae[MAX_AE];<BR>+ int
ci;<BR>+ int
pi;<BR>+ atomic32_t cnt;<BR>+<BR> } al_ci_ca_t;<BR> <BR> <BR>Index:
core/al/al_ci_ca_shared.c<BR>===================================================================<BR>---
core/al/al_ci_ca_shared.c (revision 2195)<BR>+++
core/al/al_ci_ca_shared.c (working copy)<BR>@@ -250,6 +250,7
@@<BR> p_event_item = PARENT_STRUCT( p_item, event_item_t,
async_item.pool_item );<BR> p_event_item->event_rec.code =
p_event_rec->code;<BR> p_event_item->event_rec.context =
p_event_rec->context;<BR>+ p_event_item->event_rec.port_number =
p_event_rec->port_number;<BR> <BR> /* Queue the item on the
asynchronous callback thread for processing.
*/<BR> p_event_item->async_item.pfn_callback =
ci_ca_process_event_cb;<BR>@@ -300,10 +301,27
@@<BR> cq_async_event_cb( &p_event_item->event_rec
);<BR> break;<BR> <BR>+#ifdef
CL_KERNEL<BR>+<BR>+ case IB_AE_LID_CHANGE:<BR>+ case
IB_AE_CLIENT_REREGISTER:<BR>+ // These AE events will be generated
even in the case when<BR>+ // SM was restaretd but LID will not
actually change.<BR>+ // It's important to propagate these event (via
PnP mechanism)<BR>+ // up to subscribers. Otherwise, there will be no
ping after<BR>+ // subnet manager restart<BR>+ //if
(AL_OBJ_IS_TYPE(p_obj, AL_OBJ_TYPE_CI_CA)<BR>+ if (AL_BASE_TYPE(
p_obj->type) == AL_OBJ_TYPE_CI_CA)
{<BR>+ pnp_force_event( (struct _al_ci_ca *) p_obj,
IB_PNP_LID_CHANGE,<BR>+ p_event_item->event_rec.port_number
);<BR>+ }<BR>+ break;<BR>+#endif
//CL_KERNEL<BR>+<BR> case IB_AE_PORT_TRAP:<BR> case
IB_AE_PORT_DOWN:<BR> case IB_AE_PORT_ACTIVE:<BR>- case
IB_AE_CLIENT_REREGISTER:<BR>+ <BR> #ifdef
CL_KERNEL<BR> /* The SMI polling routine may report a PnP
event. */<BR> force_smi_poll();<BR>Index:
core/al/al_pnp.h<BR>===================================================================<BR>---
core/al/al_pnp.h (revision 2195)<BR>+++ core/al/al_pnp.h (working
copy)<BR>@@ -216,6 +216,13
@@<BR> IN KEVENT *p_sync_event,<BR> OUT ib_pnp_handle_t*
const ph_pnp );<BR> <BR>+void<BR>+pnp_force_event(<BR>+ IN
struct _al_ci_ca * p_ci_ca,<BR>+ IN
ib_pnp_event_t pnp_event,<BR>+ IN uint8_t
port_num);<BR>+<BR>+<BR> #endif /* CL_KERNEL
*/<BR> <BR> static inline ib_pnp_class_t<BR>Index:
core/al/kernel/al_ci_ca.c<BR>===================================================================<BR>---
core/al/kernel/al_ci_ca.c (revision 2195)<BR>+++
core/al/kernel/al_ci_ca.c (working copy)<BR>@@ -354,6 +354,7
@@<BR> event_rec.code =
p_event_record->type;<BR> event_rec.context =
p_event_record->context;<BR> event_rec.vendor_specific =
p_event_record->vendor_specific;<BR>+ event_rec.port_number =
p_event_record->port_number;<BR> <BR> ci_ca_async_event(
&event_rec );<BR> <BR>Index:
core/al/kernel/al_pnp.c<BR>===================================================================<BR>---
core/al/kernel/al_pnp.c (revision 2195)<BR>+++
core/al/kernel/al_pnp.c (working copy)<BR>@@ -605,6 +605,7
@@<BR> {<BR> AL_PRINT( TRACE_LEVEL_INFORMATION,
AL_DBG_PNP,<BR> ("p_context is already in context map
%I64x \n",p_context->guid));<BR>+ cl_free( p_context
);<BR> p_context = NULL;<BR> }<BR> <BR>@@
-1490,6 +1491,24 @@<BR> __pnp_process_port_forward(
&event_rec );<BR> }<BR> }<BR>+<BR>+ /* send
asynchronous events */<BR>+ {<BR>+ int ci =
p_ci_ca->ci;<BR>+ int cnt = p_ci_ca->cnt %
MAX_AE;<BR>+<BR>+ while ( cnt-- > 0
)<BR>+ {<BR>+ event_rec.pnp_event =
p_ci_ca->ae[ci].pnp_event;<BR>+ event_rec.port_index =
p_ci_ca->ae[ci].port_index;<BR>+ cl_atomic_dec(
&p_ci_ca->cnt );<BR>+ __pnp_process_port_forward(
&event_rec );<BR>+ if ( ++ci >= MAX_AE
)<BR>+ ci =
0;<BR>+ }<BR>+ p_ci_ca->ci =
ci;<BR>+ }<BR>+<BR> }<BR> <BR> <BR>@@ -1749,3 +1768,26
@@<BR> AL_EXIT( AL_DBG_PNP );<BR> return
IB_UNSUPPORTED;<BR> }<BR>+<BR>+void<BR>+pnp_force_event(<BR>+ IN
struct _al_ci_ca * p_ci_ca,<BR>+ IN
ib_pnp_event_t pnp_event,<BR>+ IN uint8_t
port_num)<BR>+{<BR>+ <BR>+#define PORT_INDEX_OFFSET
1<BR>+<BR>+ ASSERT(p_ci_ca);<BR>+ <BR>+ if
(!p_ci_ca)<BR>+ return;<BR>+ <BR>+ p_ci_ca->ae[p_ci_ca->pi].pnp_event
= pnp_event;<BR>+ p_ci_ca->ae[p_ci_ca->pi].port_index = port_num -
PORT_INDEX_OFFSET;<BR>+ cl_atomic_inc( &p_ci_ca->cnt );<BR>+ if
( ++p_ci_ca->pi >= MAX_AE )<BR>+ p_ci_ca->pi =
0;<BR>+}<BR>+<BR>+<BR>Index:
inc/iba/ib_al.h<BR>===================================================================<BR>---
inc/iba/ib_al.h (revision 2195)<BR>+++ inc/iba/ib_al.h (working
copy)<BR>@@ -473,6 +473,8 @@<BR> TO_LONG_PTR(struct
_ib_srq*, h_srq);<BR> <BR> }
handle;<BR>+ <BR>+ uint8_t port_number;<BR> <BR> } ib_async_event_rec_t;<BR> /*<BR></FONT></DIV></BODY></HTML>