[ofa-general] [PATCH] osm: QoS: selecting PathRecord according to QoS policy

Yevgeny Kliteynik kliteyn at dev.mellanox.co.il
Mon Sep 3 05:15:55 PDT 2007


Selecting path according to QoS policy level that
the PathRecord query matches.

Signed-off-by: Yevgeny Kliteynik <kliteyn at dev.mellanox.co.il>
---
 opensm/opensm/osm_sa_path_record.c |  383 ++++++++++++++++++++++++++++++------
 1 files changed, 320 insertions(+), 63 deletions(-)

diff --git a/opensm/opensm/osm_sa_path_record.c b/opensm/opensm/osm_sa_path_record.c
index 1b781f0..8fc5eac 100644
--- a/opensm/opensm/osm_sa_path_record.c
+++ b/opensm/opensm/osm_sa_path_record.c
@@ -67,6 +67,7 @@
 #include <opensm/osm_multicast.h>
 #include <opensm/osm_partition.h>
 #include <opensm/osm_opensm.h>
+#include <opensm/osm_qos_policy.h>
 #ifdef ROUTER_EXP
 #include <opensm/osm_router.h>
 #include <opensm/osm_sa_mcmember_record.h>
@@ -236,8 +237,9 @@ __osm_pr_rcv_get_path_parms(IN osm_pr_rcv_t * const p_rcv,
 {
 	const osm_node_t *p_node;
 	const osm_physp_t *p_physp;
+	const osm_physp_t *p_src_physp;
 	const osm_physp_t *p_dest_physp;
-	const osm_prtn_t *p_prtn;
+	const osm_prtn_t *p_prtn = NULL;
 	const ib_port_info_t *p_pi;
 	ib_api_status_t status = IB_SUCCESS;
 	ib_net16_t pkey;
@@ -248,14 +250,24 @@ __osm_pr_rcv_get_path_parms(IN osm_pr_rcv_t * const p_rcv,
 	uint8_t required_rate;
 	uint8_t required_pkt_life;
 	uint8_t sl;
+	uint8_t in_port_num;
 	ib_net16_t dest_lid;
+	uint8_t i;
+	uint8_t vl;
+	ib_slvl_table_t *p_slvl_tbl = NULL;
+	boolean_t valid_sls[IB_MAX_NUM_VLS];
+	boolean_t sl2vl_valid_path;
+	uint8_t first_valid_sl;
+	osm_qos_level_t *p_qos_level = NULL;

 	OSM_LOG_ENTER(p_rcv->p_log, __osm_pr_rcv_get_path_parms);

+	memset(valid_sls, TRUE, sizeof(valid_sls));
 	dest_lid = cl_hton16(dest_lid_ho);

 	p_dest_physp = p_dest_port->p_physp;
 	p_physp = p_src_port->p_physp;
+	p_src_physp = p_physp;
 	p_pi = &p_physp->port_info;

 	mtu = ib_port_info_get_mtu_cap(p_pi);
@@ -288,13 +300,16 @@ __osm_pr_rcv_get_path_parms(IN osm_pr_rcv_t * const p_rcv,
 	p_node = osm_physp_get_node_ptr(p_physp);

 	if (p_node->sw) {
+		/* source node is a switch */
+		in_port_num = osm_physp_get_port_num(p_physp);
+
 		/*
 		 * If the dest_lid_ho is equal to the lid of the switch pointed by
 		 * p_sw then p_physp will be the physical port of the switch port zero.
+		 * Make sure that p_physp points to the out port of the
+		 * switch that routes to the destination lid (dest_lid_ho)
 		 */
-		p_physp =
-		    osm_switch_get_route_by_lid(p_node->sw,
-						cl_ntoh16(dest_lid_ho));
+		p_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid);
 		if (p_physp == 0) {
 			osm_log(p_rcv->p_log, OSM_LOG_ERROR,
 				"__osm_pr_rcv_get_path_parms: ERR 1F02: "
@@ -304,17 +319,36 @@ __osm_pr_rcv_get_path_parms(IN osm_pr_rcv_t * const p_rcv,
 			status = IB_NOT_FOUND;
 			goto Exit;
 		}
+		if (!p_rcv->p_subn->opt.no_qos)
+			p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, in_port_num);
+	}
+
+	if (!p_rcv->p_subn->opt.no_qos) {
+		if (p_node->sw)
+			p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, in_port_num);
+		else
+			p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, 0);
+
+		/* update valid SLs that still exist on this route */
+		for (i = 0; i < IB_MAX_NUM_VLS; i++) {
+			if (valid_sls[i]) {
+				vl = ib_slvl_table_get(p_slvl_tbl, i);
+				if (vl == IB_DROP_VL)
+					valid_sls[i] = FALSE;
+			}
+		}
 	}

 	/*
-	 * Same as above
+	 * now get pointer to the destination port (same as above)
 	 */
 	p_node = osm_physp_get_node_ptr(p_dest_physp);

 	if (p_node->sw) {
-		p_dest_physp =
-		    osm_switch_get_route_by_lid(p_node->sw,
-						cl_ntoh16(dest_lid_ho));
+		/*
+		 * if destination is switch, we want p_dest_physp to point to port 0
+		 */
+		p_dest_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid);

 		if (p_dest_physp == 0) {
 			osm_log(p_rcv->p_log, OSM_LOG_ERROR,
@@ -328,6 +362,10 @@ __osm_pr_rcv_get_path_parms(IN osm_pr_rcv_t * const p_rcv,

 	}

+	/*
+	 * Now go through the path step by step
+	 */
+
 	while (p_physp != p_dest_physp) {
 		p_physp = osm_physp_get_remote(p_physp);

@@ -341,6 +379,8 @@ __osm_pr_rcv_get_path_parms(IN osm_pr_rcv_t * const p_rcv,
 			goto Exit;
 		}

+		in_port_num = osm_physp_get_port_num(p_physp);
+
 		/*
 		   This is point to point case (no switch in between)
 		 */
@@ -409,6 +449,20 @@ __osm_pr_rcv_get_path_parms(IN osm_pr_rcv_t * const p_rcv,
 		CL_ASSERT(p_physp);
 		CL_ASSERT(osm_physp_is_valid(p_physp));

+		p_node = osm_physp_get_node_ptr(p_physp);
+		if (!p_node->sw) {
+			/*
+			 * There is some sort of problem in the subnet object!
+			 * If this isn't a switch, we should have reached
+			 * the destination by now!
+			 */
+			osm_log(p_rcv->p_log, OSM_LOG_ERROR,
+				"__osm_pr_rcv_get_path_parms: ERR 1F05: "
+				"Internal error, bad path\n");
+			status = IB_ERROR;
+			goto Exit;
+		}
+
 		p_pi = &p_physp->port_info;

 		if (mtu > ib_port_info_get_mtu_cap(p_pi)) {
@@ -435,6 +489,21 @@ __osm_pr_rcv_get_path_parms(IN osm_pr_rcv_t * const p_rcv,
 					osm_physp_get_port_num(p_physp));
 		}

+		if (!p_rcv->p_subn->opt.no_qos) {
+			/*
+			 * Check SL2VL table of the switch and update valid SLs
+			 */
+			p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, in_port_num);
+			for (i = 0; i < IB_MAX_NUM_VLS; i++) {
+				if (valid_sls[i]) {
+					vl = ib_slvl_table_get(p_slvl_tbl, i);
+					if (vl == IB_DROP_VL)
+						valid_sls[i] = FALSE;
+				}
+			}
+		}
+
+		/* go to the next step in the path */
 	}

 	/*
@@ -467,9 +536,118 @@ __osm_pr_rcv_get_path_parms(IN osm_pr_rcv_t * const p_rcv,
 			"__osm_pr_rcv_get_path_parms: "
 			"Path min MTU = %u, min rate = %u\n", mtu, rate);

+	if (!p_rcv->p_subn->opt.no_qos) {
+		/* check whether there is some SL that won't lead to VL15 eventually */
+		sl2vl_valid_path = FALSE;
+		for (i = 0; i < IB_MAX_NUM_VLS; i++) {
+			if (valid_sls[i]) {
+				sl2vl_valid_path = TRUE;
+				first_valid_sl = i;
+				break;
+			}
+		}
+
+		if (!sl2vl_valid_path) {
+			if (osm_log_is_active(p_rcv->p_log, OSM_LOG_DEBUG)) {
+				osm_log(p_rcv->p_log, OSM_LOG_DEBUG,
+					"__osm_pr_rcv_get_path_parms: "
+					"All the SLs lead to VL15 on this path\n");
+			}
+			status = IB_NOT_FOUND;
+			goto Exit;
+		}
+	}
+
+	if (!p_rcv->p_subn->opt.no_qos && p_rcv->p_subn->p_qos_policy) {
+		/* Get QoS Level object according to the path request */
+		osm_qos_policy_get_qos_level_by_pr(p_rcv->p_subn->p_qos_policy,
+						   p_rcv, p_pr,
+						   p_src_physp, p_dest_physp,
+						   comp_mask, &p_qos_level);
+
+		if (p_qos_level
+		    && osm_log_is_active(p_rcv->p_log, OSM_LOG_DEBUG)) {
+			osm_log(p_rcv->p_log, OSM_LOG_DEBUG,
+				"__osm_pr_rcv_get_path_parms: "
+				"PathRecord request matches QoS Level '%s' (%s)\n",
+				p_qos_level->name,
+				(p_qos_level->use) ? p_qos_level->
+				use : "no description");
+		}
+	}
+
+	/* Adjust path parameters according to QoS settings */
+
+	if (p_qos_level) {
+		/* adjust MTU limit according to QoS constraints */
+		if (p_qos_level->mtu_limit_set
+		    && (mtu > p_qos_level->mtu_limit)) {
+			mtu = p_qos_level->mtu_limit;
+			if (osm_log_is_active(p_rcv->p_log, OSM_LOG_DEBUG)) {
+				osm_log(p_rcv->p_log, OSM_LOG_DEBUG,
+					"__osm_pr_rcv_get_path_parms: "
+					"QoS constaraints: new smallest MTU = %u\n",
+					mtu);
+			}
+		}
+
+		/* adjust Rate limit according to QoS constraints */
+		if (p_qos_level->rate_limit_set
+		    && (rate > p_qos_level->rate_limit)) {
+			rate = p_qos_level->rate_limit;
+			if (osm_log_is_active(p_rcv->p_log, OSM_LOG_DEBUG)) {
+				osm_log(p_rcv->p_log, OSM_LOG_DEBUG,
+					"__osm_pr_rcv_get_path_parms: "
+					"QoS constaraints: new smallest Rate = %u\n",
+					rate);
+			}
+		}
+
+		/* adjust Packet Lifetime according to QoS constraints */
+		if (p_qos_level->pkt_life_set
+		    && (pkt_life > p_qos_level->pkt_life)) {
+			pkt_life = p_qos_level->pkt_life;
+			if (osm_log_is_active(p_rcv->p_log, OSM_LOG_DEBUG)) {
+				osm_log(p_rcv->p_log, OSM_LOG_DEBUG,
+					"__osm_pr_rcv_get_path_parms: "
+					"QoS constaraints: new smallest Packet Lifetime = %u\n",
+					pkt_life);
+			}
+		}
+
+		/* adjust SL according to QoS constraints */
+		if (p_qos_level->sl_set) {
+			if (!valid_sls[p_qos_level->sl]) {
+				status = IB_NOT_FOUND;
+				goto Exit;
+			}
+			sl = p_qos_level->sl;
+			if (osm_log_is_active(p_rcv->p_log, OSM_LOG_DEBUG)) {
+				osm_log(p_rcv->p_log, OSM_LOG_DEBUG,
+					"__osm_pr_rcv_get_path_parms: "
+					"QoS constaraints: new SL = %u\n",
+					sl);
+			}
+		}
+	}
+
+	/*
+	 * Set packet lifetime.
+	 * According to spec definition IBA 1.2 Table 205
+	 * PacketLifeTime description, for loopback paths,
+	 * packetLifeTime shall be zero.
+	 */
+	if (p_src_port == p_dest_port)
+		pkt_life = 0;
+	else
+		if ( !(p_qos_level && p_qos_level->pkt_life_set) )
+			pkt_life = OSM_DEFAULT_SUBNET_TIMEOUT;
+
+
 	/*
-	   Determine if these values meet the user criteria
-	   and adjust appropriately
+	 * Done adjusting parameters according to QoS constraints.
+	 * Determine if these values meet the user criteria and
+	 * adjust appropriately.
 	 */

 	/* we silently ignore cases where only the MTU selector is defined */
@@ -511,6 +689,8 @@ __osm_pr_rcv_get_path_parms(IN osm_pr_rcv_t * const p_rcv,
 			break;
 		}
 	}
+	if (status != IB_SUCCESS)
+		goto Exit;

 	/* we silently ignore cases where only the Rate selector is defined */
 	if ((comp_mask & IB_PR_COMPMASK_RATESELEC) &&
@@ -551,14 +731,8 @@ __osm_pr_rcv_get_path_parms(IN osm_pr_rcv_t * const p_rcv,
 			break;
 		}
 	}
-
-	/* Verify the pkt_life_time */
-	/* According to spec definition IBA 1.2 Table 205 PacketLifeTime description,
-	   for loopback paths, packetLifeTime shall be zero. */
-	if (p_src_port == p_dest_port)
-		pkt_life = 0;	/* loopback */
-	else
-		pkt_life = OSM_DEFAULT_SUBNET_TIMEOUT;
+	if (status != IB_SUCCESS)
+		goto Exit;

 	/* we silently ignore cases where only the PktLife selector is defined */
 	if ((comp_mask & IB_PR_COMPMASK_PKTLIFETIMESELEC) &&
@@ -603,38 +777,68 @@ __osm_pr_rcv_get_path_parms(IN osm_pr_rcv_t * const p_rcv,
 	if (status != IB_SUCCESS)
 		goto Exit;

-	if (comp_mask & IB_PR_COMPMASK_RAWTRAFFIC &&
-	    cl_ntoh32(p_pr->hop_flow_raw) & (1 << 31))
-		pkey = osm_physp_find_common_pkey(p_physp, p_dest_physp);
-	else if (comp_mask & IB_PR_COMPMASK_PKEY) {
-		pkey = p_pr->pkey;
-		if (!osm_physp_share_this_pkey(p_physp, p_dest_physp, pkey)) {
-			osm_log(p_rcv->p_log, OSM_LOG_ERROR,
-				"__osm_pr_rcv_get_path_parms: ERR 1F1A: "
-				"Ports do not share specified PKey 0x%04x\n",
-				cl_ntoh16(pkey));
-			status = IB_NOT_FOUND;
-			goto Exit;
-		}
-	} else {
-		pkey = osm_physp_find_common_pkey(p_physp, p_dest_physp);
-		if (!pkey) {
-			osm_log(p_rcv->p_log, OSM_LOG_ERROR,
-				"__osm_pr_rcv_get_path_parms: ERR 1F1B: "
-				"Ports do not have any shared PKeys\n");
-			status = IB_NOT_FOUND;
-			goto Exit;
+	/*
+	 * set Pkey for this path record request
+	 */
+
+	if ((comp_mask & IB_PR_COMPMASK_RAWTRAFFIC) &&
+	    (cl_ntoh32(p_pr->hop_flow_raw) & (1 << 31)))
+		pkey = osm_physp_find_common_pkey(p_src_physp, p_dest_physp);
+	else {
+		if (comp_mask & IB_PR_COMPMASK_PKEY) {
+			/*
+			 * PR request has a specific pkey:
+			 * Check that source and destination share this pkey.
+			 * If QoS level has pkeys, check that this pkey exists
+			 * in the QoS level pkeys.
+			 * PR returned pkey is the requested pkey.
+			 */
+			pkey = p_pr->pkey;
+			if (!osm_physp_share_this_pkey(p_src_physp, p_dest_physp, pkey)) {
+				osm_log(p_rcv->p_log, OSM_LOG_ERROR,
+					"__osm_pr_rcv_get_path_parms: ERR 1F1A: "
+					"Ports do not share specified PKey 0x%04x\n",
+					cl_ntoh16(pkey));
+				status = IB_NOT_FOUND;
+				goto Exit;
+			}
+			if (p_qos_level && p_qos_level->pkey_range_len &&
+			    !osm_qos_level_has_pkey(p_qos_level, pkey)) {
+				osm_log(p_rcv->p_log, OSM_LOG_ERROR,
+					"__osm_pr_rcv_get_path_parms: ERR 1F1D: "
+					"Ports do not share PKeys defined by QoS level\n");
+				status = IB_NOT_FOUND;
+				goto Exit;
+			}
+		} else {
+			/* PR request doesn't have a specific pkey */
+
+			if (p_qos_level && p_qos_level->pkey_range_len) {
+				/* If QoS level has pkeys, get shared pkey from QoS level pkeys */
+				pkey = osm_qos_level_get_shared_pkey(p_qos_level,
+								     p_src_physp,
+								     p_dest_physp);
+				if (!pkey) {
+					osm_log(p_rcv->p_log, OSM_LOG_ERROR,
+						"__osm_pr_rcv_get_path_parms: ERR 1F1E: "
+						"Ports do not share PKeys defined by QoS level\n");
+					status = IB_NOT_FOUND;
+					goto Exit;
+				}
+			} else {
+				pkey = osm_physp_find_common_pkey(p_src_physp,
+								  p_dest_physp);
+				if (!pkey) {
+					osm_log(p_rcv->p_log, OSM_LOG_ERROR,
+						"__osm_pr_rcv_get_path_parms: ERR 1F1B: "
+						"Ports do not have any shared PKeys\n");
+					status = IB_NOT_FOUND;
+					goto Exit;
+				}
+			}
 		}
 	}

-	if (p_rcv->p_subn->opt.routing_engine_name &&
-	    strcmp(p_rcv->p_subn->opt.routing_engine_name, "lash") == 0)
-		/* slid and dest_lid are stored in network in lash */
-		sl = osm_get_lash_sl(p_rcv->p_subn->p_osm, p_src_port,
-				     p_dest_port);
-	else
-		sl = OSM_DEFAULT_SL;
-
 	if (pkey) {
 		p_prtn =
 		    (osm_prtn_t *) cl_qmap_get(&p_rcv->p_subn->prtn_pkey_tbl,
@@ -642,34 +846,87 @@ __osm_pr_rcv_get_path_parms(IN osm_pr_rcv_t * const p_rcv,
 								0x8000));
 		if (p_prtn ==
 		    (osm_prtn_t *) cl_qmap_end(&p_rcv->p_subn->prtn_pkey_tbl))
+			p_prtn = NULL;
+	}
+
+	/*
+	 * Set PathRecord SL.
+	 *
+	 * ToDo: What about QoS and LASH routing? How can they coexist?
+	 *       And what happens when there's a pkey, hence there is a
+	 *       partition with a certain SL, and this SL doesn't match
+	 *       the one that's defined by LASH?
+	 */
+
+	if (comp_mask & IB_PR_COMPMASK_SL) {
+		/*
+		 * Specific SL was requested
+		 */
+		sl = ib_path_rec_sl(p_pr);
+		if (p_qos_level && p_qos_level->sl_set && (p_qos_level->sl != sl)) {
+			if (osm_log_is_active(p_rcv->p_log, OSM_LOG_DEBUG)) {
+				osm_log(p_rcv->p_log, OSM_LOG_DEBUG,
+					"__osm_pr_rcv_get_path_parms: "
+					"QoS constaraints: required PR SL (%u) doesn't match QoS SL (%u)\n",
+					sl, p_qos_level->sl);
+			}
+			status = IB_NOT_FOUND;
+			goto Exit;
+		}
+	} else if (p_qos_level && p_qos_level->sl_set) {
+		/*
+		 * No specific SL was requested,
+		 * but there is an SL in QoS level
+		 */
+		sl = p_qos_level->sl;
+		if (osm_log_is_active(p_rcv->p_log, OSM_LOG_DEBUG)) {
+			if (pkey && p_prtn && p_prtn->sl != p_qos_level->sl) {
+				osm_log(p_rcv->p_log, OSM_LOG_DEBUG,
+					"__osm_pr_rcv_get_path_parms: "
+					"QoS level SL (%u) overrides partition SL (%u)\n",
+					p_qos_level->sl, p_prtn->sl);
+			}
+		}
+	} else if (pkey) {
+		/*
+		 * No specific SL in request or in QoS level - use partition SL
+		 */
+		if (!p_prtn) {
 			/* this may be possible when pkey tables are created somehow in
 			   previous runs or things are going wrong here */
 			osm_log(p_rcv->p_log, OSM_LOG_ERROR,
 				"__osm_pr_rcv_get_path_parms: ERR 1F1C: "
 				"No partition found for PKey 0x%04x - using default SL %d\n",
 				cl_ntoh16(pkey), sl);
-		else {
-			if (p_rcv->p_subn->opt.routing_engine_name &&
-			    strcmp(p_rcv->p_subn->opt.routing_engine_name,
-				   "lash") == 0)
-				/* slid and dest_lid are stored in network in lash */
-				sl = osm_get_lash_sl(p_rcv->p_subn->p_osm,
-						     p_src_port, p_dest_port);
-			else
-				sl = p_prtn->sl;
-		}
-
-		/* reset pkey when raw traffic */
-		if (comp_mask & IB_PR_COMPMASK_RAWTRAFFIC &&
-		    cl_ntoh32(p_pr->hop_flow_raw) & (1 << 31))
-			pkey = 0;
+		} else
+			sl = p_prtn->sl;
+	} else if (p_rcv->p_subn->opt.routing_engine_name &&
+		   strcmp(p_rcv->p_subn->opt.routing_engine_name, "lash") == 0) {
+		/* slid and dest_lid are stored in network in lash */
+		sl = osm_get_lash_sl(p_rcv->p_subn->p_osm,
+				     p_src_port, p_dest_port);
+	} else if (!p_rcv->p_subn->opt.no_qos) {
+		sl = first_valid_sl;
 	}
+	else
+		sl = OSM_DEFAULT_SL;

-	if ((comp_mask & IB_PR_COMPMASK_SL) && ib_path_rec_sl(p_pr) != sl) {
+	if (!p_rcv->p_subn->opt.no_qos && !valid_sls[sl]) {
+		/* selected SL will eventually lead to VL15 */
+		if (osm_log_is_active(p_rcv->p_log, OSM_LOG_DEBUG)) {
+			osm_log(p_rcv->p_log, OSM_LOG_DEBUG,
+				"__osm_pr_rcv_get_path_parms: "
+				"Selected SL (%u) leads to VL15\n", p_prtn->sl);
+		}
 		status = IB_NOT_FOUND;
 		goto Exit;
 	}

+	/* reset pkey when raw traffic */
+	if (comp_mask & IB_PR_COMPMASK_RAWTRAFFIC &&
+	    cl_ntoh32(p_pr->hop_flow_raw) & (1 << 31))
+		pkey = 0;
+
 	p_parms->mtu = mtu;
 	p_parms->rate = rate;
 	p_parms->pkt_life = pkt_life;
-- 
1.5.1.4




More information about the general mailing list