[ofa-general] [PATCH] opensm/PerfMgr: Better redirection support

Hal Rosenstock hnrose at comcast.net
Thu May 7 05:59:18 PDT 2009


Handle PKey and QPN redirection information
GID redirection handling remains

Signed-off-by: Hal Rosenstock <hal.rosenstock at gmail.com>

---
Changes since v2:
Use OpenSM DB rather than vendor layer for local port number and PKeys
Change most log levels from ERROR to VERBOSE
Redirection info validity now determined by single flag
validate_redir_pkey returns pkey index or -1 rather than boolean
Removed redir_ prefixes

Changes since v1:
Added include of osm_helper.h to osm_perfmgr.c

diff --git a/opensm/include/opensm/osm_perfmgr.h b/opensm/include/opensm/osm_perfmgr.h
index 855a2ff..70d68f0 100644
--- a/opensm/include/opensm/osm_perfmgr.h
+++ b/opensm/include/opensm/osm_perfmgr.h
@@ -90,11 +90,17 @@ typedef enum {
 	PERFMGR_SWEEP_SUSPENDED
 } osm_perfmgr_sweep_state_t;
 
-/* Redirection information */
-typedef struct redir {
-	ib_net16_t redir_lid;
-	ib_net32_t redir_qp;
-} redir_t;
+typedef struct monitored_port {
+	uint16_t pkey_ix;
+	ib_net16_t orig_lid;
+	boolean_t redirection;
+	boolean_t valid;
+	/* Redirection fields from ClassPortInfo */
+	ib_gid_t gid;
+	ib_net16_t lid;
+	ib_net16_t pkey;
+	ib_net32_t qp;
+} monitored_port_t;
 
 /* Node to store information about nodes being monitored */
 typedef struct monitored_node {
@@ -104,7 +110,7 @@ typedef struct monitored_node {
 	boolean_t esp0;
 	char *name;
 	uint32_t num_ports;
-	redir_t redir_port[1];	/* redirection on a per port basis */
+	monitored_port_t port[1];
 } monitored_node_t;
 
 struct osm_opensm;
@@ -135,6 +141,8 @@ typedef struct osm_perfmgr {
 	uint32_t max_outstanding_queries;
 	cl_qmap_t monitored_map;	/* map the nodes being tracked */
 	monitored_node_t *remove_list;
+	ib_net64_t port_guid;
+	int16_t local_port;
 } osm_perfmgr_t;
 /*
 * FIELDS
diff --git a/opensm/opensm/osm_perfmgr.c b/opensm/opensm/osm_perfmgr.c
index ecfdbda..9c47a8f 100644
--- a/opensm/opensm/osm_perfmgr.c
+++ b/opensm/opensm/osm_perfmgr.c
@@ -64,6 +64,7 @@
 #include <opensm/osm_log.h>
 #include <opensm/osm_node.h>
 #include <opensm/osm_opensm.h>
+#include <opensm/osm_helper.h>
 
 #define PERFMGR_INITIAL_TID_VALUE 0xcafe
 
@@ -194,6 +195,7 @@ static void perfmgr_mad_send_err_callback(void *bind_context,
 	uint8_t port = context->perfmgr_context.port;
 	cl_map_item_t *p_node;
 	monitored_node_t *p_mon_node;
+	ib_net16_t orig_lid;
 
 	OSM_LOG_ENTER(pm->log);
 
@@ -225,9 +227,11 @@ static void perfmgr_mad_send_err_callback(void *bind_context,
 				p_mon_node->num_ports);
 			goto Exit;
 		}
-		/* Clear redirection info */
-		p_mon_node->redir_port[port].redir_lid = 0;
-		p_mon_node->redir_port[port].redir_qp = 0;
+		/* Clear redirection info for this port except orig_lid */
+		orig_lid = p_mon_node->port[port].orig_lid;
+		memset(&p_mon_node->port[port], 0, sizeof(monitored_port_t));
+		p_mon_node->port[port].orig_lid = orig_lid;
+		p_mon_node->port[port].valid = TRUE;
 		cl_plock_release(pm->lock);
 	}
 
@@ -256,7 +260,7 @@ ib_api_status_t osm_perfmgr_bind(osm_perfmgr_t * pm, const ib_net64_t port_guid)
 		goto Exit;
 	}
 
-	bind_info.port_guid = port_guid;
+	bind_info.port_guid = pm->port_guid = port_guid;
 	bind_info.mad_class = IB_MCLASS_PERF;
 	bind_info.class_version = 1;
 	bind_info.is_responder = FALSE;
@@ -277,7 +281,6 @@ ib_api_status_t osm_perfmgr_bind(osm_perfmgr_t * pm, const ib_net64_t port_guid)
 		OSM_LOG(pm->log, OSM_LOG_ERROR,
 			"ERR 4C04: Vendor specific bind failed (%s)\n",
 			ib_get_err_str(status));
-		goto Exit;
 	}
 
 Exit:
@@ -308,24 +311,14 @@ static ib_net32_t get_qp(monitored_node_t * mon_node, uint8_t port)
 	ib_net32_t qp = IB_QP1;
 
 	if (mon_node && mon_node->num_ports && port < mon_node->num_ports &&
-	    mon_node->redir_port[port].redir_lid &&
-	    mon_node->redir_port[port].redir_qp)
-		qp = mon_node->redir_port[port].redir_qp;
+	    mon_node->port[port].redirection && mon_node->port[port].qp)
+		qp = mon_node->port[port].qp;
 
 	return qp;
 }
 
-/**********************************************************************
- * Given a node, a port, and an optional monitored node,
- * return the appropriate lid to query that port
- **********************************************************************/
-static ib_net16_t get_lid(osm_node_t * p_node, uint8_t port,
-			  monitored_node_t * mon_node)
+static ib_net16_t get_base_lid(osm_node_t * p_node, uint8_t port)
 {
-	if (mon_node && mon_node->num_ports && port < mon_node->num_ports &&
-	    mon_node->redir_port[port].redir_lid)
-		return mon_node->redir_port[port].redir_lid;
-
 	switch (p_node->node_info.node_type) {
 	case IB_NODE_TYPE_CA:
 	case IB_NODE_TYPE_ROUTER:
@@ -338,12 +331,26 @@ static ib_net16_t get_lid(osm_node_t * p_node, uint8_t port,
 }
 
 /**********************************************************************
+ * Given a node, a port, and an optional monitored node,
+ * return the lid appropriate to query that port
+ **********************************************************************/
+static ib_net16_t get_lid(osm_node_t * p_node, uint8_t port,
+			  monitored_node_t * mon_node)
+{
+	if (mon_node && mon_node->num_ports && port < mon_node->num_ports &&
+	    mon_node->port[port].lid)
+		return mon_node->port[port].lid;
+
+	return get_base_lid(p_node, port);
+}
+
+/**********************************************************************
  * Form and send the Port Counters MAD for a single port.
  **********************************************************************/
 static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
 					   ib_net16_t dest_lid,
-					   ib_net32_t dest_qp, uint8_t port,
-					   uint8_t mad_method,
+					   ib_net32_t dest_qp, uint16_t pkey_ix,
+					   uint8_t port, uint8_t mad_method,
 					   osm_madw_context_t * p_context)
 {
 	ib_api_status_t status = IB_SUCCESS;
@@ -382,8 +389,7 @@ static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
 	p_madw->mad_addr.addr_type.gsi.remote_qp = dest_qp;
 	p_madw->mad_addr.addr_type.gsi.remote_qkey =
 	    cl_hton32(IB_QP1_WELL_KNOWN_Q_KEY);
-	/* FIXME what about other partitions */
-	p_madw->mad_addr.addr_type.gsi.pkey_ix = 0;
+	p_madw->mad_addr.addr_type.gsi.pkey_ix = pkey_ix;
 	p_madw->mad_addr.addr_type.gsi.service_level = 0;
 	p_madw->mad_addr.addr_type.gsi.global_route = FALSE;
 	p_madw->resp_expected = TRUE;
@@ -419,6 +425,7 @@ static void collect_guids(cl_map_item_t * p_map_item, void *context)
 	osm_perfmgr_t *pm = (osm_perfmgr_t *) context;
 	monitored_node_t *mon_node = NULL;
 	uint32_t num_ports;
+	int port;
 
 	OSM_LOG_ENTER(pm->log);
 
@@ -427,7 +434,7 @@ static void collect_guids(cl_map_item_t * p_map_item, void *context)
 		/* if not already in map add it */
 		num_ports = osm_node_get_num_physp(node);
 		mon_node = malloc(sizeof(*mon_node) +
-				  sizeof(redir_t) * num_ports);
+				  sizeof(monitored_port_t) * num_ports);
 		if (!mon_node) {
 			OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 4C06: "
 				"malloc failed: not handling node %s"
@@ -436,7 +443,7 @@ static void collect_guids(cl_map_item_t * p_map_item, void *context)
 			goto Exit;
 		}
 		memset(mon_node, 0,
-		       sizeof(*mon_node) + sizeof(redir_t) * num_ports);
+		       sizeof(*mon_node) + sizeof(monitored_port_t) * num_ports);
 		mon_node->guid = node_guid;
 		mon_node->name = strdup(node->print_desc);
 		mon_node->num_ports = num_ports;
@@ -444,6 +451,11 @@ static void collect_guids(cl_map_item_t * p_map_item, void *context)
 		mon_node->esp0 = (node->sw &&
 				  ib_switch_info_is_enhanced_port0(&node->sw->
 								   switch_info));
+		for (port = mon_node->esp0 ? 0 : 1; port < num_ports; port++) {
+			mon_node->port[port].orig_lid = get_base_lid(node, port);
+			mon_node->port[port].valid = TRUE;
+		}
+
 		cl_qmap_insert(&pm->monitored_map, node_guid,
 			       (cl_map_item_t *) mon_node);
 	}
@@ -500,6 +512,9 @@ static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context)
 		if (!osm_node_get_physp_ptr(node, port))
 			continue;
 
+		if (!mon_node->port[port].valid)
+			continue;
+
 		lid = get_lid(node, port, mon_node);
 		if (lid == 0) {
 			OSM_LOG(pm->log, OSM_LOG_DEBUG, "WARN: node 0x%" PRIx64
@@ -520,8 +535,10 @@ static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context)
 		OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%"
 			PRIx64 " port %d (lid %u) (%s)\n", node_guid, port,
 			cl_ntoh16(lid), node->print_desc);
-		status = perfmgr_send_pc_mad(pm, lid, remote_qp, port,
-					     IB_MAD_METHOD_GET, &mad_context);
+		status = perfmgr_send_pc_mad(pm, lid, remote_qp,
+					     mon_node->port[port].pkey_ix,
+					     port, IB_MAD_METHOD_GET,
+					     &mad_context);
 		if (status != IB_SUCCESS)
 			OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C09: "
 				"Failed to issue port counter query for node 0x%"
@@ -768,6 +785,24 @@ void osm_perfmgr_process(osm_perfmgr_t * pm)
 	    pm->subn->sm_state == IB_SMINFO_STATE_NOTACTIVE)
 		perfmgr_discovery(pm->subn->p_osm);
 
+	/* if redirection enabled, determine local port */
+	if (pm->subn->opt.perfmgr_redir && pm->local_port == -1) {
+		osm_node_t *p_node;
+		osm_port_t *p_port;
+
+		CL_PLOCK_ACQUIRE(pm->sm->p_lock);
+		p_port = osm_get_port_by_guid(pm->subn, pm->port_guid);
+		if (p_port) {
+			p_node = p_port->p_node;
+			CL_ASSERT(p_node);
+			pm->local_port =
+			    ib_node_info_get_local_port_num(&p_node->node_info);
+		} else
+			OSM_LOG(pm->log, OSM_LOG_ERROR,
+				"ERR 4C87: No PerfMgr port object\n");
+		CL_PLOCK_RELEASE(pm->sm->p_lock);
+	}
+
 #if ENABLE_OSM_PERF_MGR_PROFILE
 	gettimeofday(&before, NULL);
 #endif
@@ -935,8 +970,8 @@ static int counter_overflow_32(ib_net32_t val)
  * MAD to the port.
  **********************************************************************/
 static void perfmgr_check_overflow(osm_perfmgr_t * pm,
-				   monitored_node_t * mon_node, uint8_t port,
-				   ib_port_counters_t * pc)
+				   monitored_node_t * mon_node, int16_t pkey_ix,
+				   uint8_t port, ib_port_counters_t * pc)
 {
 	osm_madw_context_t mad_context;
 	ib_api_status_t status;
@@ -963,6 +998,9 @@ static void perfmgr_check_overflow(osm_perfmgr_t * pm,
 		osm_node_t *p_node = NULL;
 		ib_net16_t lid = 0;
 
+		if (!mon_node->port[port].valid)
+			goto Exit;
+
 		osm_log(pm->log, OSM_LOG_VERBOSE,
 			"PerfMgr: Counter overflow: %s (0x%" PRIx64
 			") port %d; clearing counters\n",
@@ -987,8 +1025,9 @@ static void perfmgr_check_overflow(osm_perfmgr_t * pm,
 		mad_context.perfmgr_context.port = port;
 		mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_SET;
 		/* clear port counters */
-		status = perfmgr_send_pc_mad(pm, lid, remote_qp, port,
-					     IB_MAD_METHOD_SET, &mad_context);
+		status = perfmgr_send_pc_mad(pm, lid, remote_qp, pkey_ix,
+					     port, IB_MAD_METHOD_SET,
+					     &mad_context);
 		if (status != IB_SUCCESS)
 			OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 4C11: "
 				"Failed to send clear counters MAD for %s (0x%"
@@ -1046,6 +1085,64 @@ static void perfmgr_log_events(osm_perfmgr_t * pm,
 			time_diff, mon_node->name, mon_node->guid, port);
 }
 
+static int16_t validate_redir_pkey(osm_perfmgr_t *pm, ib_net16_t pkey)
+{
+	int16_t pkey_ix = -1;
+	osm_port_t *p_port;
+	osm_pkey_tbl_t *p_pkey_tbl;
+	ib_net16_t *p_orig_pkey;
+	uint16_t block;
+	uint8_t index;
+
+	OSM_LOG_ENTER(pm->log);
+
+	CL_PLOCK_ACQUIRE(pm->sm->p_lock);
+	p_port = osm_get_port_by_guid(pm->subn, pm->port_guid);
+	if (!p_port) {
+		CL_PLOCK_RELEASE(pm->sm->p_lock);		
+		OSM_LOG(pm->log, OSM_LOG_ERROR,
+			"ERR 4C1E: No PerfMgr port object\n");
+		goto Exit;
+	}
+	if (p_port->p_physp && osm_physp_is_valid(p_port->p_physp)) {
+		p_pkey_tbl = &p_port->p_physp->pkeys;
+		if (!p_pkey_tbl) {
+			CL_PLOCK_RELEASE(pm->sm->p_lock);
+			OSM_LOG(pm->log, OSM_LOG_VERBOSE,
+				"No PKey table found for PerfMgr port\n");
+			goto Exit;
+		}
+		p_orig_pkey = cl_map_get(&p_pkey_tbl->keys,
+					 ib_pkey_get_base(pkey));
+		if (!p_orig_pkey) {
+			CL_PLOCK_RELEASE(pm->sm->p_lock);
+			OSM_LOG(pm->log, OSM_LOG_VERBOSE,
+				"PKey 0x%x not found for PerfMgr port\n",
+				cl_ntoh16(pkey));
+			goto Exit;
+		}
+		if (osm_pkey_tbl_get_block_and_idx(p_pkey_tbl, p_orig_pkey,
+						   &block, &index) == IB_SUCCESS) {
+			CL_PLOCK_RELEASE(pm->sm->p_lock);
+			pkey_ix = block * IB_NUM_PKEY_ELEMENTS_IN_BLOCK + index;
+		} else {
+			CL_PLOCK_RELEASE(pm->sm->p_lock);
+			OSM_LOG(pm->log, OSM_LOG_ERROR, 
+				"ERR 0x4C1F: Failed to obtain P_Key 0x%04x "
+				"block and index for PerfMgr port\n",
+				cl_ntoh16(pkey));
+		}
+	} else {
+		CL_PLOCK_RELEASE(pm->sm->p_lock);
+		OSM_LOG(pm->log, OSM_LOG_ERROR,
+			"ERR 4C20: Local PerfMgt port physp invalid\n");
+	}
+
+Exit:
+	OSM_LOG_EXIT(pm->log);
+	return pkey_ix;
+}
+
 /**********************************************************************
  * The dispatcher uses a thread pool which will call this function when
  * there is a thread available to process the mad received on the wire.
@@ -1064,6 +1161,8 @@ static void pc_recv_process(void *context, void *data)
 	perfmgr_db_data_cnt_reading_t data_reading;
 	cl_map_item_t *p_node;
 	monitored_node_t *p_mon_node;
+	int16_t pkey_ix = 0;
+	boolean_t valid = TRUE;
 
 	OSM_LOG_ENTER(pm->log);
 
@@ -1087,7 +1186,8 @@ static void pc_recv_process(void *context, void *data)
 		  p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO);
 
 	/* Response could also be redirection (IBM eHCA PMA does this) */
-	if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) {
+	if (p_mad->status & IB_MAD_STATUS_REDIRECT &&
+	    p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) {
 		char gid_str[INET6_ADDRSTRLEN];
 		ib_class_port_info_t *cpi =
 		    (ib_class_port_info_t *) &
@@ -1100,17 +1200,46 @@ static void pc_recv_process(void *context, void *data)
 			inet_ntop(AF_INET6, cpi->redir_gid.raw, gid_str,
 				  sizeof gid_str), cl_ntoh32(cpi->redir_qp));
 
-		/* LID or GID redirection ? */
-		/* For GID redirection, need to get PathRecord from SA */
+		if (!pm->subn->opt.perfmgr_redir) {
+			OSM_LOG(pm->log, OSM_LOG_VERBOSE,
+				"Redirection requested but disabled\n");
+			valid = FALSE;
+		}
+
+		/* valid redirection ? */
 		if (cpi->redir_lid == 0) {
+			if (!ib_gid_is_notzero(&cpi->redir_gid)) {
+				OSM_LOG(pm->log, OSM_LOG_VERBOSE,
+					"Invalid redirection "
+					"(both redirect LID and GID are zero)\n");
+				valid = FALSE;
+			}
+		}
+		if (cpi->redir_qp == 0) {
+			OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid RedirectQP\n");
+			valid = FALSE;
+		}
+		if (cpi->redir_pkey == 0) {
+			OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid RedirectP_Key\n");
+			valid = FALSE;
+		}
+		if (cpi->redir_qkey != IB_QP1_WELL_KNOWN_Q_KEY) {
+			OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid RedirectQ_Key\n");
+			valid = FALSE;
+		}
+
+		pkey_ix = validate_redir_pkey(pm, cpi->redir_pkey);
+		if (pkey_ix == -1) {
 			OSM_LOG(pm->log, OSM_LOG_VERBOSE,
-				"GID redirection not currently implemented!\n");
-			goto Exit;
+				"Index for Pkey 0x%x not found\n",
+				cl_ntoh16(cpi->redir_pkey));
+			valid = FALSE;
 		}
 
-		if (!pm->subn->opt.perfmgr_redir) {
-			OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C16: "
-				"redirection requested but disabled\n");
+		if (cpi->redir_lid == 0) {
+			/* GID redirection: get PathRecord information */
+			OSM_LOG(pm->log, OSM_LOG_VERBOSE,
+				"GID redirection not currently supported\n");
 			goto Exit;
 		}
 
@@ -1125,13 +1254,24 @@ static void pc_recv_process(void *context, void *data)
 				p_mon_node->num_ports);
 			goto Exit;
 		}
-		p_mon_node->redir_port[port].redir_lid = cpi->redir_lid;
-		p_mon_node->redir_port[port].redir_qp = cpi->redir_qp;
+		p_mon_node->port[port].redirection = TRUE;
+		p_mon_node->port[port].valid = valid;
+		memcpy(&p_mon_node->port[port].gid, &cpi->redir_gid,
+		       sizeof(ib_gid_t));
+		p_mon_node->port[port].lid = cpi->redir_lid;
+		p_mon_node->port[port].qp = cpi->redir_qp;
+		p_mon_node->port[port].pkey = cpi->redir_pkey;
+		if (pkey_ix != -1)
+			p_mon_node->port[port].pkey_ix = pkey_ix;
 		cl_plock_release(pm->lock);
 
+		if (!valid)
+			goto Exit;
+
 		/* Finally, reissue the query to the redirected location */
 		status = perfmgr_send_pc_mad(pm, cpi->redir_lid, cpi->redir_qp,
-					     port, mad_context->perfmgr_context.
+					     pkey_ix, port,
+					     mad_context->perfmgr_context.
 					     mad_method, mad_context);
 		if (status != IB_SUCCESS)
 			OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C14: "
@@ -1166,7 +1306,7 @@ static void pc_recv_process(void *context, void *data)
 		perfmgr_db_clear_prev_dc(pm->db, node_guid, port);
 	}
 
-	perfmgr_check_overflow(pm, p_mon_node, port, wire_read);
+	perfmgr_check_overflow(pm, p_mon_node, pkey_ix, port, wire_read);
 
 #if ENABLE_OSM_PERF_MGR_PROFILE
 	do {
@@ -1212,6 +1352,7 @@ ib_api_status_t osm_perfmgr_init(osm_perfmgr_t * pm, osm_opensm_t * osm,
 	pm->sweep_time_s = p_opt->perfmgr_sweep_time_s;
 	pm->max_outstanding_queries = p_opt->perfmgr_max_outstanding_queries;
 	pm->osm = osm;
+	pm->local_port = -1;
 
 	status = cl_timer_init(&pm->sweep_timer, perfmgr_sweep, pm);
 	if (status != IB_SUCCESS)



More information about the general mailing list