[ofa-general] ***SPAM*** [PATCH 1/2] opensm/PerfMgr: Better redirection support
Hal Rosenstock
hnrose at comcast.net
Wed Mar 11 04:58:01 PDT 2009
Handle PKey and QPN redirection information
GID redirection handling remains
Signed-off-by: Hal Rosenstock <hal.rosenstock at gmail.com>
---
This patch relies on patch sent yesterday adding ib_gid_is_notzero
diff --git a/opensm/include/opensm/osm_perfmgr.h b/opensm/include/opensm/osm_perfmgr.h
index 45dec54..651ea80 100644
--- a/opensm/include/opensm/osm_perfmgr.h
+++ b/opensm/include/opensm/osm_perfmgr.h
@@ -92,8 +92,14 @@ typedef enum {
/* Redirection information */
typedef struct redir {
+ boolean_t redirection;
+ boolean_t invalid;
+ ib_gid_t redir_gid;
ib_net16_t redir_lid;
+ ib_net16_t redir_pkey;
ib_net32_t redir_qp;
+ uint16_t redir_pkey_ix;
+ ib_net16_t orig_lid;
} redir_t;
/* Node to store information about which nodes we are monitoring */
@@ -134,6 +140,7 @@ typedef struct osm_perfmgr {
uint32_t max_outstanding_queries;
cl_qmap_t monitored_map; /* map the nodes we are tracking */
__monitored_node_t *remove_list;
+ int16_t local_port;
} osm_perfmgr_t;
/*
* FIELDS
diff --git a/opensm/opensm/osm_perfmgr.c b/opensm/opensm/osm_perfmgr.c
index 4a6f65c..7f8c8a4 100644
--- a/opensm/opensm/osm_perfmgr.c
+++ b/opensm/opensm/osm_perfmgr.c
@@ -47,7 +47,6 @@
#endif /* HAVE_CONFIG_H */
#ifdef ENABLE_OSM_PERF_MGR
-
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -67,6 +66,8 @@
#include <opensm/osm_opensm.h>
#define OSM_PERFMGR_INITIAL_TID_VALUE 0xcafe
+#define MAX_LOCAL_IBPORTS 64
+#define MAX_LOCAL_PKEYS 256
#if ENABLE_OSM_PERF_MGR_PROFILE
struct {
@@ -118,8 +119,6 @@ static inline void diff_time(struct timeval *before,
}
#endif
-extern int wait_for_pending_transactions(osm_stats_t * stats);
-
/**********************************************************************
* Internal helper functions.
**********************************************************************/
@@ -203,6 +202,7 @@ osm_perfmgr_mad_send_err_callback(void *bind_context, osm_madw_t * p_madw)
uint8_t port = context->perfmgr_context.port;
cl_map_item_t *p_node;
__monitored_node_t *p_mon_node;
+ ib_net16_t orig_lid;
OSM_LOG_ENTER(pm->log);
@@ -233,9 +233,10 @@ osm_perfmgr_mad_send_err_callback(void *bind_context, osm_madw_t * p_madw)
p_mon_node->guid, p_mon_node->redir_tbl_size);
goto Exit;
}
- /* Clear redirection info */
- p_mon_node->redir_port[port].redir_lid = 0;
- p_mon_node->redir_port[port].redir_qp = 0;
+ /* Clear redirection info for this port except orig_lid */
+ orig_lid = p_mon_node->redir_port[port].orig_lid;
+ memset(&p_mon_node->redir_port[port], 0, sizeof(redir_t));
+ p_mon_node->redir_port[port].orig_lid = orig_lid;
cl_plock_release(pm->lock);
}
@@ -292,6 +293,40 @@ osm_perfmgr_bind(osm_perfmgr_t * const pm, const ib_net64_t port_guid)
goto Exit;
}
+ /* if redirection enabled, determine local port from port GUID */
+ if (pm->subn->opt.perfmgr_redir) {
+ ib_port_attr_t attr_array[MAX_LOCAL_IBPORTS];
+ uint32_t num_ports = MAX_LOCAL_IBPORTS;
+ int i;
+
+ for (i = 0; i < num_ports; i++) {
+ attr_array[i].num_pkeys = 0;
+ attr_array[i].p_pkey_table = NULL;
+ }
+
+ /* call transport layer for a list of local port GUIDs */
+ status = osm_vendor_get_all_port_attr(pm->subn->p_osm->p_vendor,
+ attr_array, &num_ports);
+ if (status != IB_SUCCESS) {
+ OSM_LOG(pm->log, OSM_LOG_ERROR,
+ "ERR 4C1C: osm_vendor_get_all_port_attr status 0x%x\n",
+ status);
+ goto Exit;
+ }
+ if (num_ports == 0) {
+ OSM_LOG(pm->log, OSM_LOG_ERROR,
+ "ERR 4C1D: No local ports detected!\n");
+ goto Exit;
+ }
+
+ for (i = 0; i < num_ports; i++) {
+ if (port_guid == attr_array[i].port_guid) {
+ pm->local_port = attr_array[i].port_num;
+ break;
+ }
+ }
+ }
+
Exit:
OSM_LOG_EXIT(pm->log);
return (status);
@@ -321,25 +356,16 @@ static ib_net32_t get_qp(__monitored_node_t * mon_node, uint8_t port)
if (mon_node && mon_node->redir_tbl_size &&
port < mon_node->redir_tbl_size &&
- mon_node->redir_port[port].redir_lid &&
+ mon_node->redir_port[port].redirection &&
mon_node->redir_port[port].redir_qp)
qp = mon_node->redir_port[port].redir_qp;
return qp;
}
-/**********************************************************************
- * Given a node, a port, and an optional monitored node,
- * return the appropriate lid to query that port
- **********************************************************************/
static ib_net16_t
-get_lid(osm_node_t * p_node, uint8_t port, __monitored_node_t * mon_node)
+get_base_lid(osm_node_t * p_node, uint8_t port)
{
- if (mon_node && mon_node->redir_tbl_size &&
- port < mon_node->redir_tbl_size &&
- mon_node->redir_port[port].redir_lid)
- return mon_node->redir_port[port].redir_lid;
-
switch (p_node->node_info.node_type) {
case IB_NODE_TYPE_CA:
case IB_NODE_TYPE_ROUTER:
@@ -352,12 +378,27 @@ get_lid(osm_node_t * p_node, uint8_t port, __monitored_node_t * mon_node)
}
/**********************************************************************
+ * Given a node, a port, and an optional monitored node,
+ * return the lid appropriate to query that port
+ **********************************************************************/
+static ib_net16_t
+get_lid(osm_node_t * p_node, uint8_t port, __monitored_node_t * mon_node)
+{
+ if (mon_node && mon_node->redir_tbl_size &&
+ port < mon_node->redir_tbl_size &&
+ mon_node->redir_port[port].redir_lid)
+ return mon_node->redir_port[port].redir_lid;
+
+ return get_base_lid(p_node, port);
+}
+
+/**********************************************************************
* Form and send the Port Counters MAD for a single port.
**********************************************************************/
static ib_api_status_t
osm_perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr, ib_net16_t dest_lid,
- ib_net32_t dest_qp, uint8_t port, uint8_t mad_method,
- osm_madw_context_t * const p_context)
+ ib_net32_t dest_qp, uint16_t pkey_ix, uint8_t port,
+ uint8_t mad_method, osm_madw_context_t * const p_context)
{
ib_api_status_t status = IB_SUCCESS;
ib_port_counters_t *port_counter = NULL;
@@ -396,8 +437,7 @@ osm_perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr, ib_net16_t dest_lid,
p_madw->mad_addr.addr_type.gsi.remote_qp = dest_qp;
p_madw->mad_addr.addr_type.gsi.remote_qkey =
cl_hton32(IB_QP1_WELL_KNOWN_Q_KEY);
- /* FIXME what about other partitions */
- p_madw->mad_addr.addr_type.gsi.pkey_ix = 0;
+ p_madw->mad_addr.addr_type.gsi.pkey_ix = pkey_ix;
p_madw->mad_addr.addr_type.gsi.service_level = 0;
p_madw->mad_addr.addr_type.gsi.global_route = FALSE;
p_madw->resp_expected = TRUE;
@@ -432,28 +472,32 @@ static void __collect_guids(cl_map_item_t * const p_map_item, void *context)
uint64_t node_guid = cl_ntoh64(node->node_info.node_guid);
osm_perfmgr_t *pm = (osm_perfmgr_t *) context;
__monitored_node_t *mon_node = NULL;
- uint32_t size;
+ uint32_t num_ports;
+ int port;
OSM_LOG_ENTER(pm->log);
if (cl_qmap_get(&pm->monitored_map, node_guid)
== cl_qmap_end(&pm->monitored_map)) {
/* if not already in our map add it */
- size = osm_node_get_num_physp(node);
- mon_node = malloc(sizeof(*mon_node) + sizeof(redir_t) * size);
+ num_ports = osm_node_get_num_physp(node);
+ mon_node = malloc(sizeof(*mon_node) + sizeof(redir_t) * num_ports);
if (!mon_node) {
OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 4C06: "
"malloc failed: not handling node %s"
"(GUID 0x%" PRIx64 ")\n", node->print_desc, node_guid);
goto Exit;
}
- memset(mon_node, 0, sizeof(*mon_node) + sizeof(redir_t) * size);
+ memset(mon_node, 0, sizeof(*mon_node) + sizeof(redir_t) * num_ports);
mon_node->guid = node_guid;
mon_node->name = strdup(node->print_desc);
- mon_node->redir_tbl_size = size;
+ mon_node->redir_tbl_size = num_ports;
/* check for enhanced switch port 0 */
mon_node->esp0 = (node->sw &&
ib_switch_info_is_enhanced_port0(&node->sw->switch_info));
+ for (port = (mon_node->esp0) ? 0 : 1; port < num_ports; port++)
+ mon_node->redir_port[port].orig_lid = get_base_lid(node, port);
+
cl_qmap_insert(&(pm->monitored_map), node_guid,
(cl_map_item_t *) mon_node);
}
@@ -511,6 +555,10 @@ __osm_perfmgr_query_counters(cl_map_item_t * const p_map_item, void *context)
if (!osm_node_get_physp_ptr(node, port))
continue;
+ if (mon_node->redir_port[port].redirection &&
+ mon_node->redir_port[port].invalid)
+ continue;
+
lid = get_lid(node, port, mon_node);
if (lid == 0) {
OSM_LOG(pm->log, OSM_LOG_DEBUG, "WARN: node 0x%" PRIx64
@@ -532,8 +580,10 @@ __osm_perfmgr_query_counters(cl_map_item_t * const p_map_item, void *context)
PRIx64 " port %d (lid %u) (%s)\n", node_guid, port,
cl_ntoh16(lid), node->print_desc);
status =
- osm_perfmgr_send_pc_mad(pm, lid, remote_qp, port,
- IB_MAD_METHOD_GET, &mad_context);
+ osm_perfmgr_send_pc_mad(pm, lid, remote_qp,
+ mon_node->redir_port[port].redir_pkey_ix,
+ port, IB_MAD_METHOD_GET,
+ &mad_context);
if (status != IB_SUCCESS)
OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C09: "
"Failed to issue port counter query for node 0x%"
@@ -550,6 +600,7 @@ Exit:
* Discovery stuff.
* Basically this code should not be here, but merged with main OpenSM
**********************************************************************/
+extern int wait_for_pending_transactions(osm_stats_t * stats);
extern void osm_drop_mgr_process(IN osm_sm_t *sm);
static int sweep_hop_1(osm_sm_t * sm)
@@ -980,6 +1031,10 @@ osm_perfmgr_check_overflow(osm_perfmgr_t * pm, __monitored_node_t *mon_node,
osm_node_t *p_node = NULL;
ib_net16_t lid = 0;
+ if (mon_node->redir_port[port].redirection &&
+ mon_node->redir_port[port].invalid)
+ goto Exit;
+
osm_log(pm->log, OSM_LOG_VERBOSE,
"PerfMgr: Counter overflow: %s (0x%" PRIx64
") port %d; clearing counters\n",
@@ -1004,8 +1059,10 @@ osm_perfmgr_check_overflow(osm_perfmgr_t * pm, __monitored_node_t *mon_node,
mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_SET;
/* clear port counters */
status =
- osm_perfmgr_send_pc_mad(pm, lid, remote_qp, port,
- IB_MAD_METHOD_SET, &mad_context);
+ osm_perfmgr_send_pc_mad(pm, lid, remote_qp,
+ mon_node->redir_port[port].redir_pkey_ix,
+ port, IB_MAD_METHOD_SET,
+ &mad_context);
if (status != IB_SUCCESS)
OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 4C11: "
"Failed to send clear counters MAD for %s (0x%"
@@ -1063,6 +1120,73 @@ osm_perfmgr_log_events(osm_perfmgr_t * pm, __monitored_node_t *mon_node, uint8_t
time_diff, mon_node->name, mon_node->guid, port);
}
+static boolean_t validate_redir_pkey(osm_perfmgr_t *pm, ib_net16_t pkey,
+ uint16_t *pkey_ix)
+{
+ ib_port_attr_t attr_array[MAX_LOCAL_IBPORTS];
+ uint32_t num_ports = MAX_LOCAL_IBPORTS;
+ ib_api_status_t status;
+ boolean_t sts = FALSE;
+ uint16_t i = 0;
+
+ OSM_LOG_ENTER(pm->log);
+
+ for (i = 0; i < num_ports; i++) {
+ attr_array[i].num_pkeys = 0;
+ attr_array[i].p_pkey_table = NULL;
+ }
+
+ /* If local port couldn't be determined previously */
+ if (pm->local_port == -1)
+ goto not_found;
+
+ attr_array[pm->local_port].num_pkeys = MAX_LOCAL_PKEYS;
+ attr_array[pm->local_port].p_pkey_table =
+ malloc(MAX_LOCAL_PKEYS * sizeof(ib_net16_t));
+ if (!attr_array[pm->local_port].p_pkey_table) {
+ OSM_LOG(pm->log, OSM_LOG_ERROR,
+ "ERR 4C20: No memory for port %d pkey table\n",
+ pm->local_port);
+ goto not_found;
+ }
+
+ /* call the transport layer for a list of local port pkeys */
+ status = osm_vendor_get_all_port_attr(pm->subn->p_osm->p_vendor,
+ attr_array, &num_ports);
+ if (status != IB_SUCCESS) {
+ OSM_LOG(pm->log, OSM_LOG_ERROR,
+ "ERR 4C1E: osm_vendor_get_all_port_attr status 0x%x\n",
+ status);
+ goto not_found;
+ }
+ if (num_ports == 0 || pm->local_port > num_ports) {
+ OSM_LOG(pm->log, OSM_LOG_ERROR,
+ "ERR 4C1F: No local ports detected or local port out of range!\n");
+ goto not_found;
+ }
+ ib_net16_t *pkey_table = attr_array[pm->local_port].p_pkey_table;
+ for (i = 0; i < attr_array[pm->local_port].num_pkeys; i++)
+ if (pkey_table[i] == pkey)
+ break;
+ if (i == attr_array[pm->local_port].num_pkeys) {
+ i = 0;
+ goto not_found;
+ }
+ free(attr_array[pm->local_port].p_pkey_table);
+ sts = TRUE;
+ goto Exit;
+
+not_found:
+ if (attr_array[pm->local_port].p_pkey_table)
+ free(attr_array[pm->local_port].p_pkey_table);
+ sts = FALSE;
+Exit:
+ if (pkey_ix)
+ *pkey_ix = i;
+ OSM_LOG_EXIT(pm->log);
+ return sts;
+}
+
/**********************************************************************
* The dispatcher uses a thread pool which will call this function when
* we have a thread available to process our mad received from the wire.
@@ -1082,6 +1206,8 @@ static void osm_pc_rcv_process(void *context, void *data)
perfmgr_db_data_cnt_reading_t data_reading;
cl_map_item_t *p_node;
__monitored_node_t *p_mon_node;
+ uint16_t pkey_ix;
+ boolean_t invalid = FALSE;
OSM_LOG_ENTER(pm->log);
@@ -1105,7 +1231,8 @@ static void osm_pc_rcv_process(void *context, void *data)
p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO);
/* Response could also be redirection (IBM eHCA PMA does this) */
- if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) {
+ if (p_mad->status & IB_MAD_STATUS_REDIRECT &&
+ p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) {
char gid_str[INET6_ADDRSTRLEN];
ib_class_port_info_t *cpi =
(ib_class_port_info_t *) &
@@ -1119,17 +1246,48 @@ static void osm_pc_rcv_process(void *context, void *data)
sizeof gid_str),
cl_ntoh32(cpi->redir_qp));
- /* LID or GID redirection ? */
- /* For GID redirection, need to get PathRecord from SA */
+ /* valid redirection ? */
if (cpi->redir_lid == 0) {
- OSM_LOG(pm->log, OSM_LOG_VERBOSE,
- "GID redirection not currently implemented!\n");
- goto Exit;
+ if (!ib_gid_is_notzero(&cpi->redir_gid)) {
+ OSM_LOG(pm->log, OSM_LOG_ERROR,
+ "ERR 4C17: Invalid redirection "
+ "(both redirect LID and GID are zero)\n");
+ invalid = TRUE;
+ }
+ }
+ if (cpi->redir_qp == 0) {
+ OSM_LOG(pm->log, OSM_LOG_ERROR,
+ "ERR 4C18: Invalid RedirectQP\n");
+ invalid = TRUE;
+ }
+ if (cpi->redir_pkey == 0) {
+ OSM_LOG(pm->log, OSM_LOG_ERROR,
+ "ERR 4C19: Invalid RedirectP_Key\n");
+ invalid = TRUE;
+ }
+ if (cpi->redir_qkey != IB_QP1_WELL_KNOWN_Q_KEY) {
+ OSM_LOG(pm->log, OSM_LOG_ERROR,
+ "ERR 4C1A: Invalid RedirectQ_Key\n");
+ invalid = TRUE;
+ }
+
+ if (!validate_redir_pkey(pm, cpi->redir_pkey, &pkey_ix)) {
+ OSM_LOG(pm->log, OSM_LOG_ERROR,
+ "ERR 4C1B: Index for Pkey 0x%x not found\n",
+ cl_ntoh16(cpi->redir_pkey));
+ invalid = TRUE;
}
if (!pm->subn->opt.perfmgr_redir) {
- OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C16: "
- "redirection requested but disabled\n");
+ OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C16: "
+ "redirection requested but disabled\n");
+ invalid = TRUE;
+ }
+
+ if (cpi->redir_lid == 0) {
+ /* GID redirection: get PathRecord information */
+ OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C21: "
+ "GID redirection not currently supported\n");
goto Exit;
}
@@ -1144,14 +1302,23 @@ static void osm_pc_rcv_process(void *context, void *data)
p_mon_node->redir_tbl_size);
goto Exit;
}
+ p_mon_node->redir_port[port].redirection = TRUE;
+ p_mon_node->redir_port[port].invalid = invalid;
+ memcpy(&p_mon_node->redir_port[port].redir_gid,
+ &cpi->redir_gid, sizeof(ib_gid_t));
p_mon_node->redir_port[port].redir_lid = cpi->redir_lid;
p_mon_node->redir_port[port].redir_qp = cpi->redir_qp;
+ p_mon_node->redir_port[port].redir_pkey = cpi->redir_pkey;
+ p_mon_node->redir_port[port].redir_pkey_ix = pkey_ix;
cl_plock_release(pm->lock);
+ if (invalid)
+ goto Exit;
+
/* Finally, reissue the query to the redirected location */
status =
osm_perfmgr_send_pc_mad(pm, cpi->redir_lid, cpi->redir_qp,
- port,
+ pkey_ix, port,
mad_context->perfmgr_context.
mad_method, mad_context);
if (status != IB_SUCCESS)
@@ -1234,6 +1401,7 @@ osm_perfmgr_init(osm_perfmgr_t * const pm, osm_opensm_t *osm,
pm->sweep_time_s = p_opt->perfmgr_sweep_time_s;
pm->max_outstanding_queries = p_opt->perfmgr_max_outstanding_queries;
pm->osm = osm;
+ pm->local_port = -1;
status = cl_timer_init(&pm->sweep_timer, perfmgr_sweep, pm);
if (status != IB_SUCCESS)
More information about the general
mailing list