[ofa-general] ***SPAM*** [PATCHv2] opensm/PerfMgr: Better redirection support

Hal Rosenstock hnrose at comcast.net
Thu Mar 12 13:21:34 PDT 2009


Handle PKey and QPN redirection information
GID redirection handling remains

Signed-off-by: Hal Rosenstock <hal.rosenstock at gmail.com>

---
Changes since v1:
Added include of osm_helper.h to osm_perfmgr.c

Notes: osm_console redirection patch is unchanged
Also, relies on ib_gid_is_notzero patch

diff --git a/opensm/include/opensm/osm_perfmgr.h b/opensm/include/opensm/osm_perfmgr.h
index 45dec54..651ea80 100644
--- a/opensm/include/opensm/osm_perfmgr.h
+++ b/opensm/include/opensm/osm_perfmgr.h
@@ -92,8 +92,14 @@ typedef enum {
 
 /* Redirection information */
 typedef struct redir {
+	boolean_t redirection;
+	boolean_t invalid;
+	ib_gid_t redir_gid;
 	ib_net16_t redir_lid;
+	ib_net16_t redir_pkey;
 	ib_net32_t redir_qp;
+	uint16_t redir_pkey_ix;
+	ib_net16_t orig_lid;
 } redir_t;
 
 /* Node to store information about which nodes we are monitoring */
@@ -134,6 +140,7 @@ typedef struct osm_perfmgr {
 	uint32_t max_outstanding_queries;
 	cl_qmap_t monitored_map;	/* map the nodes we are tracking */
 	__monitored_node_t *remove_list;
+	int16_t local_port;
 } osm_perfmgr_t;
 /*
 * FIELDS
diff --git a/opensm/opensm/osm_perfmgr.c b/opensm/opensm/osm_perfmgr.c
index 4a6f65c..fc1b7cb 100644
--- a/opensm/opensm/osm_perfmgr.c
+++ b/opensm/opensm/osm_perfmgr.c
@@ -47,7 +47,6 @@
 #endif				/* HAVE_CONFIG_H */
 
 #ifdef ENABLE_OSM_PERF_MGR
-
 #include <stdlib.h>
 #include <stdint.h>
 #include <string.h>
@@ -65,8 +64,11 @@
 #include <opensm/osm_log.h>
 #include <opensm/osm_node.h>
 #include <opensm/osm_opensm.h>
+#include <opensm/osm_helper.h>
 
 #define OSM_PERFMGR_INITIAL_TID_VALUE 0xcafe
+#define MAX_LOCAL_IBPORTS 64
+#define MAX_LOCAL_PKEYS   256
 
 #if ENABLE_OSM_PERF_MGR_PROFILE
 struct {
@@ -118,8 +120,6 @@ static inline void diff_time(struct timeval *before,
 }
 #endif
 
-extern int wait_for_pending_transactions(osm_stats_t * stats);
-
 /**********************************************************************
  * Internal helper functions.
  **********************************************************************/
@@ -203,6 +203,7 @@ osm_perfmgr_mad_send_err_callback(void *bind_context, osm_madw_t * p_madw)
 	uint8_t port = context->perfmgr_context.port;
 	cl_map_item_t *p_node;
 	__monitored_node_t *p_mon_node;
+	ib_net16_t orig_lid;
 
 	OSM_LOG_ENTER(pm->log);
 
@@ -233,9 +234,10 @@ osm_perfmgr_mad_send_err_callback(void *bind_context, osm_madw_t * p_madw)
 				p_mon_node->guid, p_mon_node->redir_tbl_size);
 			goto Exit;
 		}
-		/* Clear redirection info */
-		p_mon_node->redir_port[port].redir_lid = 0;
-		p_mon_node->redir_port[port].redir_qp = 0;
+		/* Clear redirection info for this port except orig_lid */
+		orig_lid = p_mon_node->redir_port[port].orig_lid;
+		memset(&p_mon_node->redir_port[port], 0, sizeof(redir_t));
+		p_mon_node->redir_port[port].orig_lid = orig_lid;
 		cl_plock_release(pm->lock);
 	}
 
@@ -292,6 +294,40 @@ osm_perfmgr_bind(osm_perfmgr_t * const pm, const ib_net64_t port_guid)
 		goto Exit;
 	}
 
+	/* if redirection enabled, determine local port from port GUID */
+	if (pm->subn->opt.perfmgr_redir) {
+		ib_port_attr_t attr_array[MAX_LOCAL_IBPORTS];
+		uint32_t num_ports = MAX_LOCAL_IBPORTS;
+		int i;
+
+		for (i = 0; i < num_ports; i++) {
+			attr_array[i].num_pkeys = 0;
+			attr_array[i].p_pkey_table = NULL;
+		}
+
+		/* call transport layer for a list of local port GUIDs */
+		status = osm_vendor_get_all_port_attr(pm->subn->p_osm->p_vendor,
+						      attr_array, &num_ports);
+		if (status != IB_SUCCESS) {
+			OSM_LOG(pm->log, OSM_LOG_ERROR,
+				"ERR 4C1C: osm_vendor_get_all_port_attr status 0x%x\n",
+				status);
+			goto Exit;
+		}
+		if (num_ports == 0) {
+			OSM_LOG(pm->log, OSM_LOG_ERROR,
+				"ERR 4C1D: No local ports detected!\n");
+			goto Exit;
+		}
+
+		for (i = 0; i < num_ports; i++) {
+			if (port_guid == attr_array[i].port_guid) {
+				pm->local_port = attr_array[i].port_num;
+				break;
+			}
+		}
+	}
+
 Exit:
 	OSM_LOG_EXIT(pm->log);
 	return (status);
@@ -321,25 +357,16 @@ static ib_net32_t get_qp(__monitored_node_t * mon_node, uint8_t port)
 
 	if (mon_node && mon_node->redir_tbl_size &&
 	    port < mon_node->redir_tbl_size &&
-	    mon_node->redir_port[port].redir_lid &&
+	    mon_node->redir_port[port].redirection &&
 	    mon_node->redir_port[port].redir_qp)
 		qp = mon_node->redir_port[port].redir_qp;
 
 	return qp;
 }
 
-/**********************************************************************
- * Given a node, a port, and an optional monitored node,
- * return the appropriate lid to query that port
- **********************************************************************/
 static ib_net16_t
-get_lid(osm_node_t * p_node, uint8_t port, __monitored_node_t * mon_node)
+get_base_lid(osm_node_t * p_node, uint8_t port)
 {
-	if (mon_node && mon_node->redir_tbl_size &&
-	    port < mon_node->redir_tbl_size &&
-	    mon_node->redir_port[port].redir_lid)
-		return mon_node->redir_port[port].redir_lid;
-
 	switch (p_node->node_info.node_type) {
 	case IB_NODE_TYPE_CA:
 	case IB_NODE_TYPE_ROUTER:
@@ -352,12 +379,27 @@ get_lid(osm_node_t * p_node, uint8_t port, __monitored_node_t * mon_node)
 }
 
 /**********************************************************************
+ * Given a node, a port, and an optional monitored node,
+ * return the lid appropriate to query that port
+ **********************************************************************/
+static ib_net16_t
+get_lid(osm_node_t * p_node, uint8_t port, __monitored_node_t * mon_node)
+{
+	if (mon_node && mon_node->redir_tbl_size &&
+	    port < mon_node->redir_tbl_size &&
+	    mon_node->redir_port[port].redir_lid)
+		return mon_node->redir_port[port].redir_lid;
+
+	return get_base_lid(p_node, port);
+}
+
+/**********************************************************************
  * Form and send the Port Counters MAD for a single port.
  **********************************************************************/
 static ib_api_status_t
 osm_perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr, ib_net16_t dest_lid,
-			ib_net32_t dest_qp, uint8_t port, uint8_t mad_method,
-			osm_madw_context_t * const p_context)
+			ib_net32_t dest_qp, uint16_t pkey_ix, uint8_t port,
+			uint8_t mad_method, osm_madw_context_t * const p_context)
 {
 	ib_api_status_t status = IB_SUCCESS;
 	ib_port_counters_t *port_counter = NULL;
@@ -396,8 +438,7 @@ osm_perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr, ib_net16_t dest_lid,
 	p_madw->mad_addr.addr_type.gsi.remote_qp = dest_qp;
 	p_madw->mad_addr.addr_type.gsi.remote_qkey =
 	    cl_hton32(IB_QP1_WELL_KNOWN_Q_KEY);
-	/* FIXME what about other partitions */
-	p_madw->mad_addr.addr_type.gsi.pkey_ix = 0;
+	p_madw->mad_addr.addr_type.gsi.pkey_ix = pkey_ix;
 	p_madw->mad_addr.addr_type.gsi.service_level = 0;
 	p_madw->mad_addr.addr_type.gsi.global_route = FALSE;
 	p_madw->resp_expected = TRUE;
@@ -432,28 +473,32 @@ static void __collect_guids(cl_map_item_t * const p_map_item, void *context)
 	uint64_t node_guid = cl_ntoh64(node->node_info.node_guid);
 	osm_perfmgr_t *pm = (osm_perfmgr_t *) context;
 	__monitored_node_t *mon_node = NULL;
-	uint32_t size;
+	uint32_t num_ports;
+	int port;
 
 	OSM_LOG_ENTER(pm->log);
 
 	if (cl_qmap_get(&pm->monitored_map, node_guid)
 	    == cl_qmap_end(&pm->monitored_map)) {
 		/* if not already in our map add it */
-		size = osm_node_get_num_physp(node);
-		mon_node = malloc(sizeof(*mon_node) + sizeof(redir_t) * size);
+		num_ports = osm_node_get_num_physp(node);
+		mon_node = malloc(sizeof(*mon_node) + sizeof(redir_t) * num_ports);
 		if (!mon_node) {
 			OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 4C06: "
 				"malloc failed: not handling node %s"
 				"(GUID 0x%" PRIx64 ")\n", node->print_desc, node_guid);
 			goto Exit;
 		}
-		memset(mon_node, 0, sizeof(*mon_node) + sizeof(redir_t) * size);
+		memset(mon_node, 0, sizeof(*mon_node) + sizeof(redir_t) * num_ports);
 		mon_node->guid = node_guid;
 		mon_node->name = strdup(node->print_desc);
-		mon_node->redir_tbl_size = size;
+		mon_node->redir_tbl_size = num_ports;
 		/* check for enhanced switch port 0 */
 		mon_node->esp0 = (node->sw &&
 		    ib_switch_info_is_enhanced_port0(&node->sw->switch_info));
+		for (port = (mon_node->esp0) ? 0 : 1; port < num_ports; port++)
+			mon_node->redir_port[port].orig_lid = get_base_lid(node, port);
+
 		cl_qmap_insert(&(pm->monitored_map), node_guid,
 			       (cl_map_item_t *) mon_node);
 	}
@@ -511,6 +556,10 @@ __osm_perfmgr_query_counters(cl_map_item_t * const p_map_item, void *context)
 		if (!osm_node_get_physp_ptr(node, port))
 			continue;
 
+		if (mon_node->redir_port[port].redirection &&
+		    mon_node->redir_port[port].invalid)
+			continue;
+
 		lid = get_lid(node, port, mon_node);
 		if (lid == 0) {
 			OSM_LOG(pm->log, OSM_LOG_DEBUG, "WARN: node 0x%" PRIx64
@@ -532,8 +581,10 @@ __osm_perfmgr_query_counters(cl_map_item_t * const p_map_item, void *context)
 			PRIx64 " port %d (lid %u) (%s)\n", node_guid, port,
 			cl_ntoh16(lid), node->print_desc);
 		status =
-		    osm_perfmgr_send_pc_mad(pm, lid, remote_qp, port,
-					    IB_MAD_METHOD_GET, &mad_context);
+		    osm_perfmgr_send_pc_mad(pm, lid, remote_qp,
+					    mon_node->redir_port[port].redir_pkey_ix,
+					    port, IB_MAD_METHOD_GET,
+					    &mad_context);
 		if (status != IB_SUCCESS)
 			OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C09: "
 				"Failed to issue port counter query for node 0x%"
@@ -550,6 +601,7 @@ Exit:
  * Discovery stuff.
  * Basically this code should not be here, but merged with main OpenSM
  **********************************************************************/
+extern int wait_for_pending_transactions(osm_stats_t * stats);
 extern void osm_drop_mgr_process(IN osm_sm_t *sm);
 
 static int sweep_hop_1(osm_sm_t * sm)
@@ -980,6 +1032,10 @@ osm_perfmgr_check_overflow(osm_perfmgr_t * pm, __monitored_node_t *mon_node,
 		osm_node_t *p_node = NULL;
 		ib_net16_t lid = 0;
 
+		if (mon_node->redir_port[port].redirection &&
+		    mon_node->redir_port[port].invalid)
+			goto Exit;
+
 		osm_log(pm->log, OSM_LOG_VERBOSE,
 			"PerfMgr: Counter overflow: %s (0x%" PRIx64
 			") port %d; clearing counters\n",
@@ -1004,8 +1060,10 @@ osm_perfmgr_check_overflow(osm_perfmgr_t * pm, __monitored_node_t *mon_node,
 		mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_SET;
 		/* clear port counters */
 		status =
-		    osm_perfmgr_send_pc_mad(pm, lid, remote_qp, port,
-					    IB_MAD_METHOD_SET, &mad_context);
+		    osm_perfmgr_send_pc_mad(pm, lid, remote_qp,
+					    mon_node->redir_port[port].redir_pkey_ix,
+					    port, IB_MAD_METHOD_SET,
+					    &mad_context);
 		if (status != IB_SUCCESS)
 			OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 4C11: "
 				"Failed to send clear counters MAD for %s (0x%"
@@ -1063,6 +1121,73 @@ osm_perfmgr_log_events(osm_perfmgr_t * pm, __monitored_node_t *mon_node, uint8_t
 			time_diff, mon_node->name, mon_node->guid, port);
 }
 
+static boolean_t validate_redir_pkey(osm_perfmgr_t *pm, ib_net16_t pkey,
+				     uint16_t *pkey_ix)
+{
+	ib_port_attr_t attr_array[MAX_LOCAL_IBPORTS];
+	uint32_t num_ports = MAX_LOCAL_IBPORTS;
+	ib_api_status_t status;
+	boolean_t sts = FALSE;
+	uint16_t i = 0;
+
+	OSM_LOG_ENTER(pm->log);
+
+	for (i = 0; i < num_ports; i++) {
+		attr_array[i].num_pkeys = 0;
+		attr_array[i].p_pkey_table = NULL;
+	}
+
+	/* If local port couldn't be determined previously */
+	if (pm->local_port == -1)
+		goto not_found;
+
+	attr_array[pm->local_port].num_pkeys = MAX_LOCAL_PKEYS;
+	attr_array[pm->local_port].p_pkey_table =
+				malloc(MAX_LOCAL_PKEYS * sizeof(ib_net16_t));
+	if (!attr_array[pm->local_port].p_pkey_table) { 
+		OSM_LOG(pm->log, OSM_LOG_ERROR,
+			"ERR 4C20: No memory for port %d pkey table\n",
+			pm->local_port);
+		goto not_found;
+	}
+
+	/* call the transport layer for a list of local port pkeys */
+	status = osm_vendor_get_all_port_attr(pm->subn->p_osm->p_vendor,
+					      attr_array, &num_ports);
+	if (status != IB_SUCCESS) {
+		OSM_LOG(pm->log, OSM_LOG_ERROR,
+			"ERR 4C1E: osm_vendor_get_all_port_attr status 0x%x\n",
+			status);
+		goto not_found;
+	}
+	if (num_ports == 0 || pm->local_port > num_ports) {
+		OSM_LOG(pm->log, OSM_LOG_ERROR,
+			"ERR 4C1F: No local ports detected or local port out of range!\n");
+		goto not_found;
+	}
+	ib_net16_t *pkey_table = attr_array[pm->local_port].p_pkey_table;
+	for (i = 0; i < attr_array[pm->local_port].num_pkeys; i++) 
+		if (pkey_table[i] == pkey)
+			break;
+	if (i == attr_array[pm->local_port].num_pkeys) {
+		i = 0;
+		goto not_found;
+	}
+	free(attr_array[pm->local_port].p_pkey_table);
+	sts = TRUE;
+	goto Exit;
+
+not_found:
+	if (attr_array[pm->local_port].p_pkey_table)
+		free(attr_array[pm->local_port].p_pkey_table);
+	sts = FALSE;
+Exit:
+	if (pkey_ix)
+		*pkey_ix = i;
+	OSM_LOG_EXIT(pm->log);
+	return sts;
+}
+
 /**********************************************************************
  * The dispatcher uses a thread pool which will call this function when
  * we have a thread available to process our mad received from the wire.
@@ -1082,6 +1207,8 @@ static void osm_pc_rcv_process(void *context, void *data)
 	perfmgr_db_data_cnt_reading_t data_reading;
 	cl_map_item_t *p_node;
 	__monitored_node_t *p_mon_node;
+	uint16_t pkey_ix;
+	boolean_t invalid = FALSE;
 
 	OSM_LOG_ENTER(pm->log);
 
@@ -1105,7 +1232,8 @@ static void osm_pc_rcv_process(void *context, void *data)
 		  p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO);
 
 	/* Response could also be redirection (IBM eHCA PMA does this) */
-	if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) {
+	if (p_mad->status & IB_MAD_STATUS_REDIRECT &&
+	    p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) {
 		char gid_str[INET6_ADDRSTRLEN];
 		ib_class_port_info_t *cpi =
 		    (ib_class_port_info_t *) &
@@ -1119,17 +1247,48 @@ static void osm_pc_rcv_process(void *context, void *data)
 				  sizeof gid_str),
 			cl_ntoh32(cpi->redir_qp));
 
-		/* LID or GID redirection ? */
-		/* For GID redirection, need to get PathRecord from SA */
+		/* valid redirection ? */
 		if (cpi->redir_lid == 0) {
-			OSM_LOG(pm->log, OSM_LOG_VERBOSE,
-				"GID redirection not currently implemented!\n");
-			goto Exit;
+			if (!ib_gid_is_notzero(&cpi->redir_gid)) {
+				OSM_LOG(pm->log, OSM_LOG_ERROR,
+					"ERR 4C17: Invalid redirection "
+					"(both redirect LID and GID are zero)\n");
+				invalid = TRUE;
+			}
+		}
+		if (cpi->redir_qp == 0) {
+			OSM_LOG(pm->log, OSM_LOG_ERROR,
+				"ERR 4C18: Invalid RedirectQP\n");
+			invalid = TRUE;
+		}
+		if (cpi->redir_pkey == 0) {
+			OSM_LOG(pm->log, OSM_LOG_ERROR,
+				"ERR 4C19: Invalid RedirectP_Key\n");
+			invalid = TRUE;
+		}
+		if (cpi->redir_qkey != IB_QP1_WELL_KNOWN_Q_KEY) {
+			OSM_LOG(pm->log, OSM_LOG_ERROR,
+				"ERR 4C1A: Invalid RedirectQ_Key\n");
+			invalid = TRUE;
+		}
+
+		if (!validate_redir_pkey(pm, cpi->redir_pkey, &pkey_ix)) {
+			OSM_LOG(pm->log, OSM_LOG_ERROR,
+				"ERR 4C1B: Index for Pkey 0x%x not found\n",
+				cl_ntoh16(cpi->redir_pkey));
+			invalid = TRUE;
 		}
 
 		if (!pm->subn->opt.perfmgr_redir) {
-				OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C16: "
-				       "redirection requested but disabled\n");
+			OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C16: "
+				"redirection requested but disabled\n");
+			invalid = TRUE;
+		}
+
+		if (cpi->redir_lid == 0) {
+			/* GID redirection: get PathRecord information */
+			OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C21: "
+				"GID redirection not currently supported\n");
 			goto Exit;
 		}
 
@@ -1144,14 +1303,23 @@ static void osm_pc_rcv_process(void *context, void *data)
 				p_mon_node->redir_tbl_size);
 			goto Exit;
 		}
+		p_mon_node->redir_port[port].redirection = TRUE;
+		p_mon_node->redir_port[port].invalid = invalid;
+		memcpy(&p_mon_node->redir_port[port].redir_gid,
+		       &cpi->redir_gid, sizeof(ib_gid_t));
 		p_mon_node->redir_port[port].redir_lid = cpi->redir_lid;
 		p_mon_node->redir_port[port].redir_qp = cpi->redir_qp;
+		p_mon_node->redir_port[port].redir_pkey = cpi->redir_pkey;
+		p_mon_node->redir_port[port].redir_pkey_ix = pkey_ix;
 		cl_plock_release(pm->lock);
 
+		if (invalid)
+			goto Exit;
+
 		/* Finally, reissue the query to the redirected location */
 		status =
 		    osm_perfmgr_send_pc_mad(pm, cpi->redir_lid, cpi->redir_qp,
-					    port,
+					    pkey_ix, port,
 					    mad_context->perfmgr_context.
 					    mad_method, mad_context);
 		if (status != IB_SUCCESS)
@@ -1234,6 +1402,7 @@ osm_perfmgr_init(osm_perfmgr_t * const pm, osm_opensm_t *osm,
 	pm->sweep_time_s = p_opt->perfmgr_sweep_time_s;
 	pm->max_outstanding_queries = p_opt->perfmgr_max_outstanding_queries;
 	pm->osm = osm;
+	pm->local_port = -1;
 
 	status = cl_timer_init(&pm->sweep_timer, perfmgr_sweep, pm);
 	if (status != IB_SUCCESS)



More information about the general mailing list