[openib-general] [PATCH] osm: handle local events
Hal Rosenstock
halr at voltaire.com
Tue Aug 22 10:35:58 PDT 2006
Hi Yevgeny,
On Tue, 2006-08-22 at 11:41, Yevgeny Kliteynik wrote:
> Hi Hal
>
> This patch implements first item of the OSM todo list.
Thanks!
Am I correct in assuming this is both for trunk and 1.1 ?
> OpenSM opens a thread that is listening for events on the SM's port.
> The events that are being taken care of are IBV_EVENT_DEVICE_FATAL and
> IBV_EVENT_PORT_ERROR.
>
> In case of IBV_EVENT_DEVICE_FATAL, osm is forced to exit.
> in case of IBV_EVENT_PORT_ERROR, osm initiates heavy sweep.
Some minor comments below. Let me know what you think. You don't have to
resubmit for these.
> Yevgeny
>
> Signed-off-by: Yevgeny Kliteynik <kliteyn at mellanox.co.il>
>
> Index: include/opensm/osm_sm_mad_ctrl.h
> ===================================================================
> -- include/opensm/osm_sm_mad_ctrl.h (revision 8998)
> +++ include/opensm/osm_sm_mad_ctrl.h (working copy)
> @@ -109,6 +109,7 @@ typedef struct _osm_sm_mad_ctrl
> osm_mad_pool_t *p_mad_pool;
> osm_vl15_t *p_vl15;
> osm_vendor_t *p_vendor;
> + struct _osm_state_mgr *p_state_mgr;
> osm_bind_handle_t h_bind;
> cl_plock_t *p_lock;
> cl_dispatcher_t *p_disp;
> @@ -130,6 +131,9 @@ typedef struct _osm_sm_mad_ctrl
> * p_vendor
> * Pointer to the vendor specific interfaces object.
> *
> +* p_state_mgr
> +* Pointer to the state manager object.
> +*
> * h_bind
> * Bind handle returned by the transport layer.
> *
> @@ -233,6 +237,7 @@ osm_sm_mad_ctrl_init(
> IN osm_mad_pool_t* const p_mad_pool,
> IN osm_vl15_t* const p_vl15,
> IN osm_vendor_t* const p_vendor,
> + IN struct _osm_state_mgr* const p_state_mgr,
> IN osm_log_t* const p_log,
> IN osm_stats_t* const p_stats,
> IN cl_plock_t* const p_lock,
> @@ -251,6 +256,9 @@ osm_sm_mad_ctrl_init(
> * p_vendor
> * [in] Pointer to the vendor specific interfaces object.
> *
> +* p_state_mgr
> +* [in] Pointer to the state manager object.
> +*
> * p_log
> * [in] Pointer to the log object.
> *
> Index: include/vendor/osm_vendor_ibumad.h
> ===================================================================
> -- include/vendor/osm_vendor_ibumad.h (revision 8998)
> +++ include/vendor/osm_vendor_ibumad.h (working copy)
> @@ -74,6 +74,8 @@ BEGIN_C_DECLS
> #define OSM_UMAD_MAX_CAS 32
> #define OSM_UMAD_MAX_PORTS_PER_CA 2
>
> +#define OSM_VENDOR_SUPPORT_EVENTS
> +
I prefer this as an additional flag turned on in the build for OpenIB.
> /* OpenIB gen2 doesn't support RMPP yet */
>
> /****s* OpenSM: Vendor UMAD/osm_ca_info_t
> @@ -179,6 +181,10 @@ typedef struct _osm_vendor
> int umad_port_id;
> void *receiver;
> int issmfd;
> + cl_thread_t events_thread;
> + void * events_callback;
> + void * sm_context;
> + struct ibv_context * ibv_context;
> } osm_vendor_t;
>
> #define OSM_BIND_INVALID_HANDLE 0
> Index: include/vendor/osm_vendor_api.h
> ===================================================================
> -- include/vendor/osm_vendor_api.h (revision 8998)
> +++ include/vendor/osm_vendor_api.h (working copy)
> @@ -526,6 +526,110 @@ osm_vendor_set_debug(
> * SEE ALSO
> *********/
>
> +#ifdef OSM_VENDOR_SUPPORT_EVENTS
> +
> +#define OSM_EVENT_FATAL 1
> +#define OSM_EVENT_PORT_ERR 2
> +
> +/****s* OpenSM Vendor API/osm_vend_events_callback_t
> +* NAME
> +* osm_vend_events_callback_t
> +*
> +* DESCRIPTION
> +* Function prototype for the vendor events callback.
> +* The vendor layer calls this function on driver events.
> +*
> +* SYNOPSIS
> +*/
> +typedef void
> +(*osm_vend_events_callback_t)(
> + IN int events_mask,
> + IN void * const context );
> +/*
> +* PARAMETERS
> +* events_mask
> +* [in] The received event(s).
> +*
> +* context
> +* [in] Context supplied as the "sm_context" argument in
> +* the osm_vendor_unreg_events_cb call
> +*
> +* RETURN VALUES
> +* None.
> +*
> +* NOTES
> +*
> +* SEE ALSO
> +* osm_vendor_reg_events_cb osm_vendor_unreg_events_cb
> +*********/
> +
> +/****f* OpenSM Vendor API/osm_vendor_reg_events_cb
> +* NAME
> +* osm_vendor_reg_events_cb
> +*
> +* DESCRIPTION
> +* Registers the events callback function and start the events
> +* thread
> +*
> +* SYNOPSIS
> +*/
> +int
> +osm_vendor_reg_events_cb(
> + IN osm_vendor_t * const p_vend,
> + IN void * const sm_callback,
> + IN void * const sm_context);
> +/*
> +* PARAMETERS
> +* p_vend
> +* [in] vendor handle.
> +*
> +* sm_callback
> +* [in] Callback function that should be called when
> +* the event is received.
> +*
> +* sm_context
> +* [in] Context supplied as the "context" argument in
> +* the subsequenct calls to the sm_callback function
> +*
> +* RETURN VALUE
> +* IB_SUCCESS if OK.
> +*
> +* NOTES
> +*
> +* SEE ALSO
> +* osm_vend_events_callback_t osm_vendor_unreg_events_cb
> +*********/
> +
> +/****f* OpenSM Vendor API/osm_vendor_unreg_events_cb
> +* NAME
> +* osm_vendor_unreg_events_cb
> +*
> +* DESCRIPTION
> +* Un-Registers the events callback function and stops the events
> +* thread
> +*
> +* SYNOPSIS
> +*/
> +void
> +osm_vendor_unreg_events_cb(
> + IN osm_vendor_t * const p_vend);
> +/*
> +* PARAMETERS
> +* p_vend
> +* [in] vendor handle.
> +*
> +*
> +* RETURN VALUE
> +* None.
> +*
> +* NOTES
> +*
> +* SEE ALSO
> +* osm_vend_events_callback_t osm_vendor_reg_events_cb
> +*********/
> +
> +#endif /* OSM_VENDOR_SUPPORT_EVENTS */
> +
> END_C_DECLS
>
> #endif /* _OSM_VENDOR_API_H_ */
> Index: libvendor/osm_vendor_ibumad.c
> ===================================================================
> -- libvendor/osm_vendor_ibumad.c (revision 8998)
> +++ libvendor/osm_vendor_ibumad.c (working copy)
> @@ -72,6 +72,7 @@
> #include <opensm/osm_log.h>
> #include <opensm/osm_mad_pool.h>
> #include <vendor/osm_vendor_api.h>
> +#include <infiniband/verbs.h>
>
> /****s* OpenSM: Vendor AL/osm_umad_bind_info_t
> * NAME
> @@ -441,6 +442,91 @@ Exit:
>
> /**********************************************************************
> **********************************************************************/
> +static void
> +umad_events_thread(
> + IN void * vend_context)
> +{
> + int res = 0;
> + osm_vendor_t * p_vend = (osm_vendor_t *) vend_context;
> + struct ibv_async_event event;
> +
> + OSM_LOG_ENTER( p_vend->p_log, umad_events_thread );
> +
> + osm_log(p_vend->p_log, OSM_LOG_DEBUG,
> + "umad_events_thread: Device %s, async event FD: %d\n",
> + p_vend->umad_port.ca_name, p_vend->ibv_context->async_fd);
> + osm_log(p_vend->p_log, OSM_LOG_DEBUG,
> + "umad_events_thread: Listening for events on device %s, port %d\n",
> + p_vend->umad_port.ca_name, p_vend->umad_port.portnum);
> +
> + while (1) {
> +
> + res = ibv_get_async_event(p_vend->ibv_context, &event);
> + if (res)
> + {
> + osm_log(p_vend->p_log, OSM_LOG_ERROR,
> + "umad_events_thread: ERR 5450: "
> + "Failed getting async event (device %s, port %d)\n",
> + p_vend->umad_port.ca_name, p_vend->umad_port.portnum);
> + goto Exit;
> + }
> +
> + if (!p_vend->events_callback)
> + {
> + osm_log(p_vend->p_log, OSM_LOG_DEBUG,
> + "umad_events_thread: Events callback has been unregistered\n");
> + ibv_ack_async_event(&event);
> + goto Exit;
> + }
> + /*
> + * We're listening to events on the SM's port only
> + */
> + if ( event.element.port_num == p_vend->umad_port.portnum )
> + {
> + switch (event.event_type)
> + {
> + case IBV_EVENT_DEVICE_FATAL:
> + osm_log(p_vend->p_log, OSM_LOG_INFO,
> + "umad_events_thread: Received IBV_EVENT_DEVICE_FATAL\n");
> + ((osm_vend_events_callback_t)
> + (p_vend->events_callback))(OSM_EVENT_FATAL, p_vend->sm_context);
> +
> + ibv_ack_async_event(&event);
> + goto Exit;
> + break;
> +
> + case IBV_EVENT_PORT_ERR:
> + osm_log(p_vend->p_log, OSM_LOG_VERBOSE,
> + "umad_events_thread: Received IBV_EVENT_PORT_ERR\n");
> + ((osm_vend_events_callback_t)
> + (p_vend->events_callback))(OSM_EVENT_PORT_ERR, p_vend->sm_context);
> + break;
> +
> + default:
> + osm_log(p_vend->p_log, OSM_LOG_DEBUG,
> + "umad_events_thread: Received event #%d on port %d - Ignoring\n",
> + event.event_type, event.element.port_num);
> + }
> + }
> + else
> + {
> + osm_log(p_vend->p_log, OSM_LOG_DEBUG,
> + "umad_events_thread: Received event #%d on port %d - Ignoring\n",
> + event.event_type, event.element.port_num);
> + }
> +
> + ibv_ack_async_event(&event);
> + }
> +
> + Exit:
> + osm_log(p_vend->p_log, OSM_LOG_DEBUG,
> + "umad_events_thread: Terminating thread\n");
> + OSM_LOG_EXIT(p_vend->p_log);
> + return;
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> ib_api_status_t
> osm_vendor_init(
> IN osm_vendor_t* const p_vend,
> @@ -456,6 +542,7 @@ osm_vendor_init(
> p_vend->max_retries = OSM_DEFAULT_RETRY_COUNT;
> cl_spinlock_construct( &p_vend->cb_lock );
> cl_spinlock_construct( &p_vend->match_tbl_lock );
> + cl_thread_construct( &p_vend->events_thread );
> p_vend->umad_port_id = -1;
> p_vend->issmfd = -1;
>
> @@ -1217,4 +1304,114 @@ osm_vendor_set_debug(
> umad_debug(level);
> }
>
> +/**********************************************************************
> + **********************************************************************/
> +int
> +osm_vendor_reg_events_cb(
> + IN osm_vendor_t * const p_vend,
> + IN void * const sm_callback,
> + IN void * const sm_context)
> +{
> + ib_api_status_t status = IB_SUCCESS;
> + struct ibv_device ** dev_list;
> + struct ibv_device * device;
> +
> + OSM_LOG_ENTER( p_vend->p_log, osm_vendor_reg_events_cb );
> +
> + p_vend->events_callback = sm_callback;
> + p_vend->sm_context = sm_context;
> +
> + dev_list = ibv_get_device_list(NULL);
> + if (!dev_list || !(*dev_list)) {
> + osm_log(p_vend->p_log, OSM_LOG_ERROR,
> + "osm_vendor_reg_events_cb: ERR 5440: "
> + "No IB devices found\n");
> + status = IB_ERROR;
> + goto Exit;
> + }
> +
> + if (!p_vend->umad_port.ca_name || !p_vend->umad_port.ca_name[0])
> + {
> + osm_log(p_vend->p_log, OSM_LOG_ERROR,
> + "osm_vendor_reg_events_cb: ERR 5441: "
> + "Vendor initialization is not completed yet\n");
> + status = IB_ERROR;
> + goto Exit;
> + }
> +
> + osm_log(p_vend->p_log, OSM_LOG_DEBUG,
> + "osm_vendor_reg_events_cb: Registering on device %s\n",
> + p_vend->umad_port.ca_name);
> +
> + /*
> + * find device whos name matches the SM's device
> + */
> + for ( device = *dev_list;
> + (device != NULL) &&
> + (strcmp(p_vend->umad_port.ca_name, ibv_get_device_name(device)) != 0);
> + device += sizeof(struct ibv_device *) )
> + ;
> + if (!device)
> + {
> + osm_log(p_vend->p_log, OSM_LOG_ERROR,
> + "osm_vendor_reg_events_cb: ERR 5442: "
> + "Device %s hasn't been found in the device list\n"
> + ,p_vend->umad_port.ca_name);
> + status = IB_ERROR;
> + goto Exit;
> + }
> +
> + p_vend->ibv_context = ibv_open_device(device);
> + if (!p_vend->ibv_context) {
> + osm_log(p_vend->p_log, OSM_LOG_ERROR,
> + "osm_vendor_reg_events_cb: ERR 5443: "
> + "Couldn't get context for %s\n",
> + p_vend->umad_port.ca_name);
> + status = IB_ERROR;
> + goto Exit;
> + }
> +
> + /*
> + * Initiate the events thread
> + */
> + if (cl_thread_init(&p_vend->events_thread,
> + umad_events_thread,
> + p_vend,
> + "ibumad events thread") != CL_SUCCESS) {
> + osm_log(p_vend->p_log, OSM_LOG_ERROR,
> + "osm_vendor_reg_events_cb: ERR 5444: "
> + "Failed initiating event listening thread\n");
> + status = IB_ERROR;
> + goto Exit;
> + }
> +
> + Exit:
> + if (status != IB_SUCCESS)
> + {
> + p_vend->events_callback = NULL;
> + p_vend->sm_context = NULL;
> + p_vend->ibv_context = NULL;
> + p_vend->events_callback = NULL;
> + }
> + OSM_LOG_EXIT( p_vend->p_log );
> + return status;
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +void
> +osm_vendor_unreg_events_cb(
> + IN osm_vendor_t * const p_vend)
> +{
> + OSM_LOG_ENTER( p_vend->p_log, osm_vendor_unreg_events_cb );
> + p_vend->events_callback = NULL;
> + p_vend->sm_context = NULL;
> + p_vend->ibv_context = NULL;
> + p_vend->events_callback = NULL;
> + OSM_LOG_EXIT( p_vend->p_log );
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> #endif /* OSM_VENDOR_INTF_OPENIB */
> Index: libvendor/libosmvendor.map
> ===================================================================
> -- libvendor/libosmvendor.map (revision 8998)
> +++ libvendor/libosmvendor.map (working copy)
> @@ -1,4 +1,4 @@
> -OSMVENDOR_2.0 {
> +OSMVENDOR_2.1 {
> global:
> umad_receiver;
> osm_vendor_init;
> @@ -23,5 +23,7 @@ OSMVENDOR_2.0 {
> osmv_bind_sa;
> osmv_query_sa;
> osm_vendor_get_guid_ca_and_port;
> + osm_vendor_reg_events_cb;
> + osm_vendor_unreg_events_cb;
> local: *;
> };
> Index: opensm/osm_sm.c
> ===================================================================
> -- opensm/osm_sm.c (revision 8998)
> +++ opensm/osm_sm.c (working copy)
> @@ -313,6 +313,7 @@ osm_sm_init(
> p_sm->p_mad_pool,
> p_sm->p_vl15,
> p_sm->p_vendor,
> + &p_sm->state_mgr,
> p_log, p_stats, p_lock, p_disp );
> if( status != IB_SUCCESS )
> goto Exit;
> Index: opensm/osm_sm_mad_ctrl.c
> ===================================================================
> -- opensm/osm_sm_mad_ctrl.c (revision 8998)
> +++ opensm/osm_sm_mad_ctrl.c (working copy)
> @@ -59,6 +59,7 @@
> #include <opensm/osm_msgdef.h>
> #include <opensm/osm_helper.h>
> #include <opensm/osm_opensm.h>
> +#include <opensm/osm_state_mgr.h>
>
> /****f* opensm: SM/__osm_sm_mad_ctrl_retire_trans_mad
> * NAME
> @@ -953,6 +954,7 @@ osm_sm_mad_ctrl_init(
> IN osm_mad_pool_t* const p_mad_pool,
> IN osm_vl15_t* const p_vl15,
> IN osm_vendor_t* const p_vendor,
> + IN struct _osm_state_mgr* const p_state_mgr,
> IN osm_log_t* const p_log,
> IN osm_stats_t* const p_stats,
> IN cl_plock_t* const p_lock,
> @@ -969,6 +971,7 @@ osm_sm_mad_ctrl_init(
> p_ctrl->p_disp = p_disp;
> p_ctrl->p_mad_pool = p_mad_pool;
> p_ctrl->p_vendor = p_vendor;
> + p_ctrl->p_state_mgr = p_state_mgr;
> p_ctrl->p_stats = p_stats;
> p_ctrl->p_lock = p_lock;
> p_ctrl->p_vl15 = p_vl15;
> @@ -995,6 +998,47 @@ osm_sm_mad_ctrl_init(
>
> /**********************************************************************
> **********************************************************************/
> +void
> +__osm_vend_events_callback(
> + IN int events_mask,
> + IN void * const context )
Shouldn't this be conditionalized on OSM_VENDOR_SUPPORT_EVENTS ?
> +{
> + osm_sm_mad_ctrl_t * const p_ctrl = (osm_sm_mad_ctrl_t * const) context;
> +
> + OSM_LOG_ENTER(p_ctrl->p_log, __osm_vend_events_callback);
> +
> + if (events_mask & OSM_EVENT_FATAL)
> + {
> + osm_log(p_ctrl->p_log, OSM_LOG_INFO,
> + "__osm_vend_events_callback: "
> + "Events callback got OSM_EVENT_FATAL\n");
> + osm_log(p_ctrl->p_log, OSM_LOG_SYS,
> + "Fatal HCA error - forcing OpenSM exit\n");
> + osm_exit_flag = 1;
> + OSM_LOG_EXIT(p_ctrl->p_log);
> + return;
> + }
> +
> + if (events_mask & OSM_EVENT_PORT_ERR)
> + {
> + osm_log(p_ctrl->p_log, OSM_LOG_INFO,
> + "__osm_vend_events_callback: "
> + "Events callback got OSM_EVENT_PORT_ERR - forcing heavy sweep\n");
> + p_ctrl->p_subn->force_immediate_heavy_sweep = TRUE;
> + osm_state_mgr_process((osm_state_mgr_t * const)p_ctrl->p_state_mgr,
> + OSM_SIGNAL_SWEEP);
> + OSM_LOG_EXIT(p_ctrl->p_log);
> + return;
> + }
> +
> + osm_log(p_ctrl->p_log, OSM_LOG_INFO,
> + "__osm_vend_events_callback: "
> + "Events callback got event mask of %d - No action taken\n");
> + OSM_LOG_EXIT(p_ctrl->p_log);
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> ib_api_status_t
> osm_sm_mad_ctrl_bind(
> IN osm_sm_mad_ctrl_t* const p_ctrl,
> @@ -1044,6 +1088,17 @@ osm_sm_mad_ctrl_bind(
> goto Exit;
> }
>
> + if ( osm_vendor_reg_events_cb(p_ctrl->p_vendor,
> + __osm_vend_events_callback,
> + p_ctrl) )
> + {
> + status = IB_ERROR;
> + osm_log( p_ctrl->p_log, OSM_LOG_ERROR,
> + "osm_sm_mad_ctrl_bind: ERR 3120: "
> + "Vendor failed to register for events\n" );
> + goto Exit;
> + }
> +
This should be conditionalized on OSM_VENDOR_SUPPORT_EVENTS.
> Exit:
> OSM_LOG_EXIT( p_ctrl->p_log );
> return( status );
> Index: config/osmvsel.m4
> ===================================================================
> -- config/osmvsel.m4 (revision 8998)
> +++ config/osmvsel.m4 (working copy)
> @@ -63,9 +63,9 @@ if test $with_osmv = "openib"; then
> OSMV_CFLAGS="-DOSM_VENDOR_INTF_OPENIB"
> OSMV_INCLUDES="-I\$(srcdir)/../include -I\$(srcdir)/../../libibcommon/include/infiniband -I\$(srcdir)/../../libibumad/include/infiniband"
> if test "x$with_umad_libs" = "x"; then
> - OSMV_LDADD="-libumad"
> + OSMV_LDADD="-libumad -libverbs"
> else
> - OSMV_LDADD="-L$with_umad_libs -libumad"
> + OSMV_LDADD="-L$with_umad_libs -libumad -libverbs"
> fi
>
> if test "x$with_umad_includes" != "x"; then
> @@ -137,6 +137,8 @@ if test "$disable_libcheck" != "yes"; th
> LDFLAGS="$LDFLAGS $OSMV_LDADD"
> AC_CHECK_LIB(ibumad, umad_init, [],
> AC_MSG_ERROR([umad_init() not found. libosmvendor of type openib requires libibumad.]))
> + AC_CHECK_LIB(ibverbs, ibv_get_device_list, [],
> + AC_MSG_ERROR([umad_init() not found. libosmvendor of type openib requires libibverbs.]))
Cut and paste error: Error message should indicate ibv_get_device_list
rather than umad_init.
> LD_FLAGS=$osmv_save_ldflags
> elif test $with_osmv = "sim" ; then
> LDFLAGS="$LDFLAGS -L$with_sim/lib"
-- Hal
More information about the general
mailing list