[openib-general] [PATCH] osm: handle local events
Yevgeny Kliteynik
kliteyn at mellanox.co.il
Tue Aug 22 08:41:58 PDT 2006
Hi Hal
This patch implements first item of the OSM todo list.
OpenSM opens a thread that is listening for events on the SM's port.
The events that are being taken care of are IBV_EVENT_DEVICE_FATAL and
IBV_EVENT_PORT_ERROR.
In case of IBV_EVENT_DEVICE_FATAL, osm is forced to exit.
in case of IBV_EVENT_PORT_ERROR, osm initiates heavy sweep.
Yevgeny
Signed-off-by: Yevgeny Kliteynik <kliteyn at mellanox.co.il>
Index: include/opensm/osm_sm_mad_ctrl.h
===================================================================
--- include/opensm/osm_sm_mad_ctrl.h (revision 8998)
+++ include/opensm/osm_sm_mad_ctrl.h (working copy)
@@ -109,6 +109,7 @@ typedef struct _osm_sm_mad_ctrl
osm_mad_pool_t *p_mad_pool;
osm_vl15_t *p_vl15;
osm_vendor_t *p_vendor;
+ struct _osm_state_mgr *p_state_mgr;
osm_bind_handle_t h_bind;
cl_plock_t *p_lock;
cl_dispatcher_t *p_disp;
@@ -130,6 +131,9 @@ typedef struct _osm_sm_mad_ctrl
* p_vendor
* Pointer to the vendor specific interfaces object.
*
+* p_state_mgr
+* Pointer to the state manager object.
+*
* h_bind
* Bind handle returned by the transport layer.
*
@@ -233,6 +237,7 @@ osm_sm_mad_ctrl_init(
IN osm_mad_pool_t* const p_mad_pool,
IN osm_vl15_t* const p_vl15,
IN osm_vendor_t* const p_vendor,
+ IN struct _osm_state_mgr* const p_state_mgr,
IN osm_log_t* const p_log,
IN osm_stats_t* const p_stats,
IN cl_plock_t* const p_lock,
@@ -251,6 +256,9 @@ osm_sm_mad_ctrl_init(
* p_vendor
* [in] Pointer to the vendor specific interfaces object.
*
+* p_state_mgr
+* [in] Pointer to the state manager object.
+*
* p_log
* [in] Pointer to the log object.
*
Index: include/vendor/osm_vendor_ibumad.h
===================================================================
--- include/vendor/osm_vendor_ibumad.h (revision 8998)
+++ include/vendor/osm_vendor_ibumad.h (working copy)
@@ -74,6 +74,8 @@ BEGIN_C_DECLS
#define OSM_UMAD_MAX_CAS 32
#define OSM_UMAD_MAX_PORTS_PER_CA 2
+#define OSM_VENDOR_SUPPORT_EVENTS
+
/* OpenIB gen2 doesn't support RMPP yet */
/****s* OpenSM: Vendor UMAD/osm_ca_info_t
@@ -179,6 +181,10 @@ typedef struct _osm_vendor
int umad_port_id;
void *receiver;
int issmfd;
+ cl_thread_t events_thread;
+ void * events_callback;
+ void * sm_context;
+ struct ibv_context * ibv_context;
} osm_vendor_t;
#define OSM_BIND_INVALID_HANDLE 0
Index: include/vendor/osm_vendor_api.h
===================================================================
--- include/vendor/osm_vendor_api.h (revision 8998)
+++ include/vendor/osm_vendor_api.h (working copy)
@@ -526,6 +526,110 @@ osm_vendor_set_debug(
* SEE ALSO
*********/
+#ifdef OSM_VENDOR_SUPPORT_EVENTS
+
+#define OSM_EVENT_FATAL 1
+#define OSM_EVENT_PORT_ERR 2
+
+/****s* OpenSM Vendor API/osm_vend_events_callback_t
+* NAME
+* osm_vend_events_callback_t
+*
+* DESCRIPTION
+* Function prototype for the vendor events callback.
+* The vendor layer calls this function on driver events.
+*
+* SYNOPSIS
+*/
+typedef void
+(*osm_vend_events_callback_t)(
+ IN int events_mask,
+ IN void * const context );
+/*
+* PARAMETERS
+* events_mask
+* [in] The received event(s).
+*
+* context
+* [in] Context supplied as the "sm_context" argument in
+* the osm_vendor_unreg_events_cb call
+*
+* RETURN VALUES
+* None.
+*
+* NOTES
+*
+* SEE ALSO
+* osm_vendor_reg_events_cb osm_vendor_unreg_events_cb
+*********/
+
+/****f* OpenSM Vendor API/osm_vendor_reg_events_cb
+* NAME
+* osm_vendor_reg_events_cb
+*
+* DESCRIPTION
+* Registers the events callback function and start the events
+* thread
+*
+* SYNOPSIS
+*/
+int
+osm_vendor_reg_events_cb(
+ IN osm_vendor_t * const p_vend,
+ IN void * const sm_callback,
+ IN void * const sm_context);
+/*
+* PARAMETERS
+* p_vend
+* [in] vendor handle.
+*
+* sm_callback
+* [in] Callback function that should be called when
+* the event is received.
+*
+* sm_context
+* [in] Context supplied as the "context" argument in
+* the subsequenct calls to the sm_callback function
+*
+* RETURN VALUE
+* IB_SUCCESS if OK.
+*
+* NOTES
+*
+* SEE ALSO
+* osm_vend_events_callback_t osm_vendor_unreg_events_cb
+*********/
+
+/****f* OpenSM Vendor API/osm_vendor_unreg_events_cb
+* NAME
+* osm_vendor_unreg_events_cb
+*
+* DESCRIPTION
+* Un-Registers the events callback function and stops the events
+* thread
+*
+* SYNOPSIS
+*/
+void
+osm_vendor_unreg_events_cb(
+ IN osm_vendor_t * const p_vend);
+/*
+* PARAMETERS
+* p_vend
+* [in] vendor handle.
+*
+*
+* RETURN VALUE
+* None.
+*
+* NOTES
+*
+* SEE ALSO
+* osm_vend_events_callback_t osm_vendor_reg_events_cb
+*********/
+
+#endif /* OSM_VENDOR_SUPPORT_EVENTS */
+
END_C_DECLS
#endif /* _OSM_VENDOR_API_H_ */
Index: libvendor/osm_vendor_ibumad.c
===================================================================
--- libvendor/osm_vendor_ibumad.c (revision 8998)
+++ libvendor/osm_vendor_ibumad.c (working copy)
@@ -72,6 +72,7 @@
#include <opensm/osm_log.h>
#include <opensm/osm_mad_pool.h>
#include <vendor/osm_vendor_api.h>
+#include <infiniband/verbs.h>
/****s* OpenSM: Vendor AL/osm_umad_bind_info_t
* NAME
@@ -441,6 +442,91 @@ Exit:
/**********************************************************************
**********************************************************************/
+static void
+umad_events_thread(
+ IN void * vend_context)
+{
+ int res = 0;
+ osm_vendor_t * p_vend = (osm_vendor_t *) vend_context;
+ struct ibv_async_event event;
+
+ OSM_LOG_ENTER( p_vend->p_log, umad_events_thread );
+
+ osm_log(p_vend->p_log, OSM_LOG_DEBUG,
+ "umad_events_thread: Device %s, async event FD: %d\n",
+ p_vend->umad_port.ca_name, p_vend->ibv_context->async_fd);
+ osm_log(p_vend->p_log, OSM_LOG_DEBUG,
+ "umad_events_thread: Listening for events on device %s, port %d\n",
+ p_vend->umad_port.ca_name, p_vend->umad_port.portnum);
+
+ while (1) {
+
+ res = ibv_get_async_event(p_vend->ibv_context, &event);
+ if (res)
+ {
+ osm_log(p_vend->p_log, OSM_LOG_ERROR,
+ "umad_events_thread: ERR 5450: "
+ "Failed getting async event (device %s, port %d)\n",
+ p_vend->umad_port.ca_name, p_vend->umad_port.portnum);
+ goto Exit;
+ }
+
+ if (!p_vend->events_callback)
+ {
+ osm_log(p_vend->p_log, OSM_LOG_DEBUG,
+ "umad_events_thread: Events callback has been unregistered\n");
+ ibv_ack_async_event(&event);
+ goto Exit;
+ }
+ /*
+ * We're listening to events on the SM's port only
+ */
+ if ( event.element.port_num == p_vend->umad_port.portnum )
+ {
+ switch (event.event_type)
+ {
+ case IBV_EVENT_DEVICE_FATAL:
+ osm_log(p_vend->p_log, OSM_LOG_INFO,
+ "umad_events_thread: Received IBV_EVENT_DEVICE_FATAL\n");
+ ((osm_vend_events_callback_t)
+ (p_vend->events_callback))(OSM_EVENT_FATAL, p_vend->sm_context);
+
+ ibv_ack_async_event(&event);
+ goto Exit;
+ break;
+
+ case IBV_EVENT_PORT_ERR:
+ osm_log(p_vend->p_log, OSM_LOG_VERBOSE,
+ "umad_events_thread: Received IBV_EVENT_PORT_ERR\n");
+ ((osm_vend_events_callback_t)
+ (p_vend->events_callback))(OSM_EVENT_PORT_ERR, p_vend->sm_context);
+ break;
+
+ default:
+ osm_log(p_vend->p_log, OSM_LOG_DEBUG,
+ "umad_events_thread: Received event #%d on port %d - Ignoring\n",
+ event.event_type, event.element.port_num);
+ }
+ }
+ else
+ {
+ osm_log(p_vend->p_log, OSM_LOG_DEBUG,
+ "umad_events_thread: Received event #%d on port %d - Ignoring\n",
+ event.event_type, event.element.port_num);
+ }
+
+ ibv_ack_async_event(&event);
+ }
+
+ Exit:
+ osm_log(p_vend->p_log, OSM_LOG_DEBUG,
+ "umad_events_thread: Terminating thread\n");
+ OSM_LOG_EXIT(p_vend->p_log);
+ return;
+}
+
+/**********************************************************************
+ **********************************************************************/
ib_api_status_t
osm_vendor_init(
IN osm_vendor_t* const p_vend,
@@ -456,6 +542,7 @@ osm_vendor_init(
p_vend->max_retries = OSM_DEFAULT_RETRY_COUNT;
cl_spinlock_construct( &p_vend->cb_lock );
cl_spinlock_construct( &p_vend->match_tbl_lock );
+ cl_thread_construct( &p_vend->events_thread );
p_vend->umad_port_id = -1;
p_vend->issmfd = -1;
@@ -1217,4 +1304,114 @@ osm_vendor_set_debug(
umad_debug(level);
}
+/**********************************************************************
+ **********************************************************************/
+int
+osm_vendor_reg_events_cb(
+ IN osm_vendor_t * const p_vend,
+ IN void * const sm_callback,
+ IN void * const sm_context)
+{
+ ib_api_status_t status = IB_SUCCESS;
+ struct ibv_device ** dev_list;
+ struct ibv_device * device;
+
+ OSM_LOG_ENTER( p_vend->p_log, osm_vendor_reg_events_cb );
+
+ p_vend->events_callback = sm_callback;
+ p_vend->sm_context = sm_context;
+
+ dev_list = ibv_get_device_list(NULL);
+ if (!dev_list || !(*dev_list)) {
+ osm_log(p_vend->p_log, OSM_LOG_ERROR,
+ "osm_vendor_reg_events_cb: ERR 5440: "
+ "No IB devices found\n");
+ status = IB_ERROR;
+ goto Exit;
+ }
+
+ if (!p_vend->umad_port.ca_name || !p_vend->umad_port.ca_name[0])
+ {
+ osm_log(p_vend->p_log, OSM_LOG_ERROR,
+ "osm_vendor_reg_events_cb: ERR 5441: "
+ "Vendor initialization is not completed yet\n");
+ status = IB_ERROR;
+ goto Exit;
+ }
+
+ osm_log(p_vend->p_log, OSM_LOG_DEBUG,
+ "osm_vendor_reg_events_cb: Registering on device %s\n",
+ p_vend->umad_port.ca_name);
+
+ /*
+ * find device whos name matches the SM's device
+ */
+ for ( device = *dev_list;
+ (device != NULL) &&
+ (strcmp(p_vend->umad_port.ca_name, ibv_get_device_name(device)) != 0);
+ device += sizeof(struct ibv_device *) )
+ ;
+ if (!device)
+ {
+ osm_log(p_vend->p_log, OSM_LOG_ERROR,
+ "osm_vendor_reg_events_cb: ERR 5442: "
+ "Device %s hasn't been found in the device list\n"
+ ,p_vend->umad_port.ca_name);
+ status = IB_ERROR;
+ goto Exit;
+ }
+
+ p_vend->ibv_context = ibv_open_device(device);
+ if (!p_vend->ibv_context) {
+ osm_log(p_vend->p_log, OSM_LOG_ERROR,
+ "osm_vendor_reg_events_cb: ERR 5443: "
+ "Couldn't get context for %s\n",
+ p_vend->umad_port.ca_name);
+ status = IB_ERROR;
+ goto Exit;
+ }
+
+ /*
+ * Initiate the events thread
+ */
+ if (cl_thread_init(&p_vend->events_thread,
+ umad_events_thread,
+ p_vend,
+ "ibumad events thread") != CL_SUCCESS) {
+ osm_log(p_vend->p_log, OSM_LOG_ERROR,
+ "osm_vendor_reg_events_cb: ERR 5444: "
+ "Failed initiating event listening thread\n");
+ status = IB_ERROR;
+ goto Exit;
+ }
+
+ Exit:
+ if (status != IB_SUCCESS)
+ {
+ p_vend->events_callback = NULL;
+ p_vend->sm_context = NULL;
+ p_vend->ibv_context = NULL;
+ p_vend->events_callback = NULL;
+ }
+ OSM_LOG_EXIT( p_vend->p_log );
+ return status;
+}
+
+/**********************************************************************
+ **********************************************************************/
+void
+osm_vendor_unreg_events_cb(
+ IN osm_vendor_t * const p_vend)
+{
+ OSM_LOG_ENTER( p_vend->p_log, osm_vendor_unreg_events_cb );
+ p_vend->events_callback = NULL;
+ p_vend->sm_context = NULL;
+ p_vend->ibv_context = NULL;
+ p_vend->events_callback = NULL;
+ OSM_LOG_EXIT( p_vend->p_log );
+}
+
+/**********************************************************************
+ **********************************************************************/
+
#endif /* OSM_VENDOR_INTF_OPENIB */
Index: libvendor/libosmvendor.map
===================================================================
--- libvendor/libosmvendor.map (revision 8998)
+++ libvendor/libosmvendor.map (working copy)
@@ -1,4 +1,4 @@
-OSMVENDOR_2.0 {
+OSMVENDOR_2.1 {
global:
umad_receiver;
osm_vendor_init;
@@ -23,5 +23,7 @@ OSMVENDOR_2.0 {
osmv_bind_sa;
osmv_query_sa;
osm_vendor_get_guid_ca_and_port;
+ osm_vendor_reg_events_cb;
+ osm_vendor_unreg_events_cb;
local: *;
};
Index: opensm/osm_sm.c
===================================================================
--- opensm/osm_sm.c (revision 8998)
+++ opensm/osm_sm.c (working copy)
@@ -313,6 +313,7 @@ osm_sm_init(
p_sm->p_mad_pool,
p_sm->p_vl15,
p_sm->p_vendor,
+ &p_sm->state_mgr,
p_log, p_stats, p_lock, p_disp );
if( status != IB_SUCCESS )
goto Exit;
Index: opensm/osm_sm_mad_ctrl.c
===================================================================
--- opensm/osm_sm_mad_ctrl.c (revision 8998)
+++ opensm/osm_sm_mad_ctrl.c (working copy)
@@ -59,6 +59,7 @@
#include <opensm/osm_msgdef.h>
#include <opensm/osm_helper.h>
#include <opensm/osm_opensm.h>
+#include <opensm/osm_state_mgr.h>
/****f* opensm: SM/__osm_sm_mad_ctrl_retire_trans_mad
* NAME
@@ -953,6 +954,7 @@ osm_sm_mad_ctrl_init(
IN osm_mad_pool_t* const p_mad_pool,
IN osm_vl15_t* const p_vl15,
IN osm_vendor_t* const p_vendor,
+ IN struct _osm_state_mgr* const p_state_mgr,
IN osm_log_t* const p_log,
IN osm_stats_t* const p_stats,
IN cl_plock_t* const p_lock,
@@ -969,6 +971,7 @@ osm_sm_mad_ctrl_init(
p_ctrl->p_disp = p_disp;
p_ctrl->p_mad_pool = p_mad_pool;
p_ctrl->p_vendor = p_vendor;
+ p_ctrl->p_state_mgr = p_state_mgr;
p_ctrl->p_stats = p_stats;
p_ctrl->p_lock = p_lock;
p_ctrl->p_vl15 = p_vl15;
@@ -995,6 +998,47 @@ osm_sm_mad_ctrl_init(
/**********************************************************************
**********************************************************************/
+void
+__osm_vend_events_callback(
+ IN int events_mask,
+ IN void * const context )
+{
+ osm_sm_mad_ctrl_t * const p_ctrl = (osm_sm_mad_ctrl_t * const) context;
+
+ OSM_LOG_ENTER(p_ctrl->p_log, __osm_vend_events_callback);
+
+ if (events_mask & OSM_EVENT_FATAL)
+ {
+ osm_log(p_ctrl->p_log, OSM_LOG_INFO,
+ "__osm_vend_events_callback: "
+ "Events callback got OSM_EVENT_FATAL\n");
+ osm_log(p_ctrl->p_log, OSM_LOG_SYS,
+ "Fatal HCA error - forcing OpenSM exit\n");
+ osm_exit_flag = 1;
+ OSM_LOG_EXIT(p_ctrl->p_log);
+ return;
+ }
+
+ if (events_mask & OSM_EVENT_PORT_ERR)
+ {
+ osm_log(p_ctrl->p_log, OSM_LOG_INFO,
+ "__osm_vend_events_callback: "
+ "Events callback got OSM_EVENT_PORT_ERR - forcing heavy sweep\n");
+ p_ctrl->p_subn->force_immediate_heavy_sweep = TRUE;
+ osm_state_mgr_process((osm_state_mgr_t * const)p_ctrl->p_state_mgr,
+ OSM_SIGNAL_SWEEP);
+ OSM_LOG_EXIT(p_ctrl->p_log);
+ return;
+ }
+
+ osm_log(p_ctrl->p_log, OSM_LOG_INFO,
+ "__osm_vend_events_callback: "
+ "Events callback got event mask of %d - No action taken\n");
+ OSM_LOG_EXIT(p_ctrl->p_log);
+}
+
+/**********************************************************************
+ **********************************************************************/
ib_api_status_t
osm_sm_mad_ctrl_bind(
IN osm_sm_mad_ctrl_t* const p_ctrl,
@@ -1044,6 +1088,17 @@ osm_sm_mad_ctrl_bind(
goto Exit;
}
+ if ( osm_vendor_reg_events_cb(p_ctrl->p_vendor,
+ __osm_vend_events_callback,
+ p_ctrl) )
+ {
+ status = IB_ERROR;
+ osm_log( p_ctrl->p_log, OSM_LOG_ERROR,
+ "osm_sm_mad_ctrl_bind: ERR 3120: "
+ "Vendor failed to register for events\n" );
+ goto Exit;
+ }
+
Exit:
OSM_LOG_EXIT( p_ctrl->p_log );
return( status );
Index: config/osmvsel.m4
===================================================================
--- config/osmvsel.m4 (revision 8998)
+++ config/osmvsel.m4 (working copy)
@@ -63,9 +63,9 @@ if test $with_osmv = "openib"; then
OSMV_CFLAGS="-DOSM_VENDOR_INTF_OPENIB"
OSMV_INCLUDES="-I\$(srcdir)/../include -I\$(srcdir)/../../libibcommon/include/infiniband -I\$(srcdir)/../../libibumad/include/infiniband"
if test "x$with_umad_libs" = "x"; then
- OSMV_LDADD="-libumad"
+ OSMV_LDADD="-libumad -libverbs"
else
- OSMV_LDADD="-L$with_umad_libs -libumad"
+ OSMV_LDADD="-L$with_umad_libs -libumad -libverbs"
fi
if test "x$with_umad_includes" != "x"; then
@@ -137,6 +137,8 @@ if test "$disable_libcheck" != "yes"; th
LDFLAGS="$LDFLAGS $OSMV_LDADD"
AC_CHECK_LIB(ibumad, umad_init, [],
AC_MSG_ERROR([umad_init() not found. libosmvendor of type openib requires libibumad.]))
+ AC_CHECK_LIB(ibverbs, ibv_get_device_list, [],
+ AC_MSG_ERROR([umad_init() not found. libosmvendor of type openib requires libibverbs.]))
LD_FLAGS=$osmv_save_ldflags
elif test $with_osmv = "sim" ; then
LDFLAGS="$LDFLAGS -L$with_sim/lib"
More information about the general
mailing list