[ofa-general] [PATCH] ibutils: Optimal permutation routing in fat-tree
Yevgeny Kliteynik
kliteyn at dev.mellanox.co.il
Tue Oct 7 02:31:34 PDT 2008
[On behalf of Vladimir Zdornov]
Optimal permutation routing in fat-tree.
Fat-tree is regarded to as high-order Clos network.
Each Clos is routed using graph coloring of a
bipartite graph. The result of the routing procedure
affects ascending only (descending fdb entries stay
unchanged).
Signed-off-by: Vladimir Zdornov <zdv at mellanox.co.il>
---
ibdm/datamodel/Bipartite.cc | 672 +++++++++++++++++
ibdm/datamodel/Bipartite.h | 194 +++++
ibdm/datamodel/FatTree.cpp | 1743 ++++++++++++++++++++++++-------------------
ibdm/datamodel/Makefile.am | 4 +-
ibdm/datamodel/RouteSys.cc | 258 +++++++
ibdm/datamodel/RouteSys.h | 98 +++
ibdm/datamodel/SubnMgt.h | 4 +
ibdm/datamodel/ibdm.i | 3 +
8 files changed, 2226 insertions(+), 750 deletions(-)
create mode 100644 ibdm/datamodel/Bipartite.cc
create mode 100644 ibdm/datamodel/Bipartite.h
create mode 100644 ibdm/datamodel/RouteSys.cc
create mode 100644 ibdm/datamodel/RouteSys.h
diff --git a/ibdm/datamodel/Bipartite.cc b/ibdm/datamodel/Bipartite.cc
new file mode 100644
index 0000000..760b040
--- /dev/null
+++ b/ibdm/datamodel/Bipartite.cc
@@ -0,0 +1,672 @@
+/*
+ * Copyright (c) 2008 Mellanox Technologies LTD. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#include "Bipartite.h"
+
+//CLASS edge///////////////////////////////////
+
+bool edge::isMatched() {
+ vertex* ver1 = (vertex*)v1;
+ vertex* ver2 = (vertex*)v2;
+
+ if (((this == ver1->getPartner()) && (this != ver2->getPartner())) || ((this == ver2->getPartner()) && (this != ver1->getPartner())))
+ cout << "-E- Error in edge matching" << endl;
+
+ return (this == ver1->getPartner()) && (this == ver2->getPartner());
+}
+
+//CLASS vertex/////////////////////////////////
+
+vertex::vertex(int n, side sd, int rad):id(n),s(sd),radix(rad)
+{
+ connections = new edge*[radix];
+ pred = new edge*[radix];
+ succ = new edge*[radix];
+
+ partner = NULL;
+ for (int i=0; i<radix; i++)
+ connections[i] = pred[i] = succ[i] = NULL;
+
+ predCount = succCount = 0;
+ maxUsed = -1;
+}
+
+vertex::~vertex()
+{
+ delete[] connections;
+ delete[] pred;
+ delete[] succ;
+}
+
+int vertex::getID() const
+{
+ return id;
+}
+
+side vertex::getSide() const
+{
+ return s;
+}
+
+void vertex::delConnection(edge* e)
+{
+ int my_idx, other_idx;
+ vertex* v;
+
+ // Find the side we are connected at and edge index
+ if (e->v1 == this) {
+ my_idx = e->idx1;
+ other_idx = e->idx2;
+ v = (vertex*)(e->v2);
+ }
+ else if (e->v2 == this) {
+ my_idx = e->idx2;
+ other_idx = e->idx1;
+ v = (vertex*)(e->v1);
+ }
+ else {
+ cout << "-E- Edge not connected to current vertex" << endl;
+ return;
+ }
+
+ if (my_idx >= radix || other_idx >= radix) {
+ cout << "-E- Edge index illegal" << endl;
+ return;
+ }
+
+ // Now disconnect
+ connections[my_idx] = NULL;
+ maxUsed--;
+ v->connections[other_idx] = NULL;
+ v->maxUsed--;
+}
+
+void vertex::pushConnection(edge* e)
+{
+ maxUsed++;
+ if (maxUsed == radix) {
+ cout << "-E- Can't push connection to vertex: " << id << ", exceeding radix!" << endl;
+ return;
+ }
+ // Mark our side
+ if (e->v1 == NULL) {
+ e->v1 = this;
+ e->idx1 = maxUsed;
+ }
+ else if (e->v2 == NULL) {
+ e->v2 = this;
+ e->idx2 = maxUsed;
+ }
+ else {
+ cout << "-E- Can't push connection both edges are already filled" << endl;
+ return;
+ }
+
+ if (maxUsed >= radix) {
+ cout << "-E- maxUsed illegal" << endl;
+ return;
+ }
+
+ // Now connect
+ connections[maxUsed] = e;
+}
+
+edge* vertex::popConnection()
+{
+ // Look for a connection
+ int i=0;
+ while ((i<radix) && !connections[i])
+ i++;
+ // No real connection found
+ if (i == radix)
+ return NULL;
+
+ edge* tmp = connections[i];
+
+ // Disconnect chosen edge
+ connections[i] = NULL;
+ if (tmp->v1 == this) {
+ vertex* v = (vertex*)(tmp->v2);
+ v->connections[tmp->idx2] = NULL;
+ }
+ else if (tmp->v2 == this) {
+ vertex* v = (vertex*)(tmp->v1);
+ v->connections[tmp->idx1] = NULL;
+ }
+ else {
+ cout << "-E- Edge not connected to current vertex" << endl;
+ return NULL;
+ }
+
+ if (tmp->idx1 >= radix || tmp->idx2 >= radix) {
+ cout << "-E- Edge index illegal" << endl;
+ return NULL;
+ }
+
+ return tmp;
+}
+
+// For unmacthed vertex, find an unmatched neighbor and match the pair
+bool vertex::match()
+{
+ // Already matched
+ if (partner)
+ return false;
+ // Look for neighbor
+ for (int i=0; i<radix; i++) {
+ if (connections[i]) {
+ vertex* v = (vertex*)(connections[i]->otherSide(this));
+ if (!v->partner) {
+ // Match
+ partner = connections[i];
+ v->partner = connections[i];
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+edge* vertex::getPartner() const
+{
+ return partner;
+}
+
+bool vertex::getInLayers() const
+{
+ return inLayers;
+}
+
+void vertex::setInLayers(bool b)
+{
+ inLayers = b;
+}
+
+void vertex::resetLayersInfo()
+{
+ inLayers = false;
+ succCount = predCount = 0;
+ for (int i=0; i<radix; i++)
+ succ[i] = pred[i] = NULL;
+}
+
+void vertex::addPartnerLayers(list<vertex*>& l)
+{
+ // No partner
+ if (!partner)
+ return;
+ vertex* p = (vertex*)(partner->otherSide(this));
+ // Partner already in one of the layers
+ if (p->inLayers)
+ return;
+ // Add partner to the layer
+ l.push_front(p);
+ p->inLayers = true;
+ // Update pred/succ relations
+ if (succCount >= radix) {
+ cout << "-E- More successors than radix" << endl;
+ return;
+ }
+ succ[succCount] = partner;
+ succCount++;
+
+ if (p->predCount >= radix) {
+ cout << "-E- More predecessors than radix" << endl;
+ return;
+ }
+ p->pred[p->predCount] = partner;
+ p->predCount++;
+}
+
+bool vertex::addNonPartnersLayers(list<vertex*>& l)
+{
+ vertex* prtn = NULL;
+ bool res = false;
+
+ if (partner)
+ prtn = (vertex*)(partner->otherSide(this));
+
+ for (int i=0; i<radix; i++) {
+ vertex* v = (vertex*)(connections[i]->otherSide(this));
+ if ((v != prtn) && (!v->inLayers)) {
+ // free vertex found
+ if (!v->partner)
+ res = true;
+ // Add vertex to the layer
+ l.push_front(v);
+ v->inLayers = true;
+ // Update pred/succ relations
+ if (succCount >= radix) {
+ cout << "-E- More successors than radix" << endl;
+ return false;
+ }
+ succ[succCount] = connections[i];
+ succCount++;
+
+ if (v->predCount >= radix) {
+ cout << "-E- More predecessors than radix" << endl;
+ return false;
+ }
+ v->pred[v->predCount] = connections[i];
+ v->predCount++;
+ }
+ }
+ return res;
+}
+
+vertex* vertex::getPredecessor() const
+{
+ vertex* v = NULL;
+ // Find a valid predecessor still in layers
+ for (int i=0; i<radix; i++) {
+ if (pred[i]) {
+ vertex* v2 = (vertex*)(pred[i]->otherSide(this));
+ if (v2->inLayers) {
+ v = v2;
+ break;
+ }
+ }
+ }
+ return v;
+}
+
+// Flip edge status on augmenting path
+void vertex::flipPredEdge(int idx)
+{
+ int i=0;
+ // Find an edge to a predecessor
+ for (i=0; i<radix; i++)
+ if (pred[i]) {
+ vertex* v1 = (vertex*)pred[i]->v1;
+ vertex* v2 = (vertex*)pred[i]->v2;
+ if (v1->getInLayers() && v2->getInLayers())
+ break;
+ }
+
+ if (i == radix) {
+ cout << "-E- Could find predecessor for flip" << endl;
+ return;
+ }
+ // The predecessor vertex
+ vertex* v = (vertex*) pred[i]->otherSide(this);
+
+ // Unmatch edge
+ if (idx)
+ v->partner = NULL;
+ // Match edge
+ else {
+ partner = pred[i];
+ v->partner = pred[i];
+ }
+}
+
+// Removing vertex from layers and updating successors/predecessors
+void vertex::unLink(list<vertex*>& l)
+{
+ inLayers = false;
+ for (int i=0; i<radix; i++) {
+ if (succ[i]) {
+ vertex* v = (vertex*)succ[i]->otherSide(this);
+ if (v->inLayers) {
+ v->predCount--;
+ // v has no more predecessors, schedule for removal from layers...
+ if (!v->predCount)
+ l.push_back(v);
+ succ[i] = NULL;
+ }
+ }
+ }
+ succCount = 0;
+}
+
+//CLASS Bipartite
+
+// C'tor
+
+Bipartite::Bipartite(int s, int r):size(s),radix(r)
+{
+ leftSide = new vertex*[size];
+ rightSide = new vertex*[size];
+
+ for (int i=0; i<size; i++) {
+ leftSide[i] = new vertex(i,LEFT,radix);
+ rightSide[i] = new vertex(i,RIGHT,radix);
+ }
+}
+
+///////////////////////////////////////////////////////////
+
+// D'tor
+
+Bipartite::~Bipartite()
+{
+ // Free vertices
+ for (int i=0; i<size; i++) {
+ delete leftSide[i];
+ delete rightSide[i];
+ }
+ delete[] leftSide;
+ delete[] rightSide;
+
+ // Free edges
+ while (List.size()) {
+ edge* e = (edge*)(List.front());
+ List.pop_front();
+ delete e;
+ }
+}
+
+////////////////////////////////////////////////////////////
+
+// Get radix
+
+int Bipartite::getRadix() const
+{
+ return radix;
+}
+
+////////////////////////////////////////////////////////////
+
+// Set edges listt iterator to first
+
+bool Bipartite::setIterFirst()
+{
+ it = List.begin();
+ if (it == List.end())
+ return false;
+ return true;
+}
+
+///////////////////////////////////////////////////////////
+
+// Set edges list iterator to next
+
+bool Bipartite::setIterNext()
+{
+ if (it == List.end())
+ return false;
+ it++;
+ if (it == List.end())
+ return false;
+ return true;
+}
+
+////////////////////////////////////////////////////////////
+
+// Get current edge's request data
+
+inputData Bipartite::getReqDat()
+{
+ if (it == List.end()) {
+ cout << "-E- Iterator points to list end" << endl;
+ }
+ return ((edge*)(*it))->reqDat;
+}
+
+/////////////////////////////////////////////////////////////
+
+// Add connection between the nodes to the graph
+
+void Bipartite::connectNodes(int n1, int n2, inputData reqDat)
+{
+ if ((n1 >= size) || (n2 >= size)) {
+ cout << "-E- Node index exceeds size" << endl;
+ return;
+ }
+ // Create new edge
+ edge* newEdge = new edge;
+
+ // Init edge fields and add to it the edges list
+ newEdge->it = List.insert(List.end(), newEdge);
+ newEdge->reqDat = reqDat;
+ newEdge->v1 = newEdge->v2 = NULL;
+
+ // Connect the vertices
+ leftSide[n1]->pushConnection(newEdge);
+ rightSide[n2]->pushConnection(newEdge);
+}
+
+////////////////////////////////////////////////////////////////
+
+// Find maximal matching
+
+void Bipartite::maximalMatch()
+{
+ // Invoke match on left-side vertices
+ for (int i=0; i<size; i++)
+ leftSide[i]->match();
+}
+
+////////////////////////////////////////////////////////////////
+
+// Find maximum matching
+
+// Hopcroft-Karp algorithm
+Bipartite* Bipartite::maximumMatch()
+{
+ // First find a maximal match
+ maximalMatch();
+
+ list<void*>::iterator it2 = List.begin();
+ list<vertex*> l1, l2;
+ list<vertex*>::iterator it;
+
+ // Loop on algorithm phases
+ while (1) {
+ bool hardStop = false;
+ // First reset layers info
+ for (int i=0; i<size; i++) {
+ leftSide[i]->resetLayersInfo();
+ rightSide[i]->resetLayersInfo();
+ }
+ // Add free left-side vertices to l1 (building layer 0)
+ l1.clear();
+ for (int i=0; i<size; i++) {
+ if (!leftSide[i]->getPartner()) {
+ l1.push_front(leftSide[i]);
+ leftSide[i]->setInLayers(true);
+ }
+ }
+ // This is our termination condition
+ // Maximum matching achieved
+ if (l1.empty())
+ break;
+ // Loop on building layers
+ while (1) {
+ bool stop = false;
+ l2.clear();
+ // Add all non-partners of vertices in l1 to layers (l2)
+ // We signal to stop if a free (right-side) vertex is entering l2
+ for (it = l1.begin(); it != l1.end(); it++)
+ if ((*it)->addNonPartnersLayers(l2))
+ stop = true;
+ // Found free vertex among right-side vertices
+ if (stop) {
+ // There are augmenting paths, apply them
+ augment(l2);
+ break;
+ }
+ // This is a terminal condition
+ if (l2.empty()) {
+ hardStop = true;
+ break;
+ }
+ // Add partners of vertices in l2 to layers (l1)
+ l1.clear();
+ for (it = l2.begin(); it!= l2.end(); it++)
+ (*it)->addPartnerLayers(l1);
+ // This is a terminal condition
+ if (l1.empty()) {
+ hardStop = true;
+ break;
+ }
+ }
+ // Maximum matching achieved
+ if (hardStop)
+ break;
+ }
+ // Build the matching graph
+ Bipartite* M = new Bipartite(size, 1);
+ // Go over all edges and move matched one to the new graph
+ it2 = List.begin();
+ while (it2 != List.end()) {
+ edge* e = (edge*)(*it2);
+ if (e->isMatched()) {
+ vertex* v1 = (vertex*)(e->v1);
+ vertex* v2 = (vertex*)(e->v2);
+ // Unlink vertices
+ ((vertex*)(e->v1))->delConnection(e);
+ // Update edges list
+ it2 = List.erase(it2);
+ // Add link to the new graph
+ if (v1->getSide() == LEFT)
+ M->connectNodes(v1->getID(), v2->getID(), e->reqDat);
+ else
+ M->connectNodes(v2->getID(), v1->getID(), e->reqDat);
+ // Free memory
+ delete e;
+ }
+ else
+ it2++;
+ }
+ return M;
+}
+
+//////////////////////////////////////////////////////////////////////
+
+// Apply augmenting paths on the matching
+
+void Bipartite::augment(list<vertex*>& l)
+{
+ // Use delQ to store vertex scheduled for the removal from layers
+ list<vertex*> delQ;
+ // Remove non-free vertices
+ list<vertex*>::iterator it = l.begin();
+ while (it != l.end()) {
+ if ((*it)->getPartner()) {
+ delQ.push_front((*it));
+ it = l.erase(it);
+ }
+ else
+ it++;
+ }
+ // Get rid of non-free vertices
+ while (!delQ.empty()) {
+ vertex* fr = delQ.front();
+ delQ.pop_front();
+ fr->unLink(delQ);
+ }
+
+ if (l.empty()) {
+ cout << "-E- No free vertices left!" << endl;
+ return;
+ }
+ // Augment and reset pred/succ relations
+ while (!l.empty()) {
+ vertex* curr = l.front();
+ l.pop_front();
+ int idx = 0;
+ // Backtrace the path and augment
+ int length=0;
+ while (1) {
+ delQ.push_front(curr);
+ // Its either the end of a path or an error
+ if (!curr->getPredecessor()) {
+ if (!idx && length) {
+ cout << "-E- This vertex must have predecessor" << endl;
+ return;
+ }
+ break;
+ }
+ // Flip edge status
+ curr->flipPredEdge(idx);
+ // Move back
+ curr = curr->getPredecessor();
+ idx = (idx+1)%2;
+ length++;
+ }
+ // Now clean the delQ
+ while (!delQ.empty()) {
+ vertex* fr = delQ.front();
+ delQ.pop_front();
+ fr->unLink(delQ);
+ }
+ }
+}
+
+////////////////////////////////////////////////////////////////////////
+
+// Perform Euler decomposition
+
+void Bipartite::decompose(Bipartite** bp1, Bipartite** bp2)
+{
+ if (radix < 2) {
+ cout << "-E- Radix value illegal: " << radix << endl;
+ return;
+ }
+
+ // Create new graphs
+ Bipartite* arr[2];
+ arr[0] = new Bipartite(size, radix/2);
+ arr[1] = new Bipartite(size, radix/2);
+
+ // Continue as long as unassigned edges left
+ while (!List.empty()) {
+ int idx = 0;
+ edge* e = (edge*)List.front();
+ vertex* current = (vertex*)e->v1;
+ e = current->popConnection();
+
+ while (e) {
+ // Connect nodes in the new graphs
+ vertex* v1 = (vertex*)e->v1;
+ vertex* v2 = (vertex*)e->v2;
+ if (v1->getSide() == LEFT)
+ arr[idx]->connectNodes(v1->getID(), v2->getID(), e->reqDat);
+ else
+ arr[idx]->connectNodes(v2->getID(), v1->getID(), e->reqDat);
+ idx = (idx+1)%2;
+ // Remove edge from list
+ List.erase(e->it);
+ // Pick next vertex
+ current = (vertex*) e->otherSide(current);
+ // Free memory
+ delete e;
+ // Pick next edge
+ e = current->popConnection();
+ }
+ }
+ *bp1 = arr[0];
+ *bp2 = arr[1];
+}
diff --git a/ibdm/datamodel/Bipartite.h b/ibdm/datamodel/Bipartite.h
new file mode 100644
index 0000000..cdac81a
--- /dev/null
+++ b/ibdm/datamodel/Bipartite.h
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2008 Mellanox Technologies LTD. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+/*
+ * Fabric Utilities Project
+ *
+ * Bipartite Graph Header file
+ *
+ * Author: Vladimir Zdornov, Mellanox Technologies
+ *
+ */
+
+#ifndef IBDM_BIPARTITE_H_
+#define IBDM_BIPARTITE_H_
+
+#include <list>
+#include "RouteSys.h"
+
+using namespace std;
+
+typedef list<void*>::iterator peIter;
+typedef list<void*> peList;
+
+typedef enum side_ {LEFT=0, RIGHT} side;
+
+class edge
+{
+ public:
+ // Vertices
+ void* v1;
+ void* v2;
+ // Connection indices
+ int idx1;
+ int idx2;
+
+ // Edge list iterator
+ peIter it;
+
+ // Request data
+ inputData reqDat;
+
+ // C'tor
+ edge():v1(NULL),v2(NULL),idx1(-1),idx2(-1){};
+
+ // Get the vertex on the other side of the edge
+ void* otherSide(const void* v) {
+ if (v == v1)
+ return v2;
+ if (v == v2)
+ return v1;
+ return NULL;
+ }
+
+ // Check whether the edge is matched
+ bool isMatched();
+};
+
+class vertex
+{
+ // ID
+ int id;
+ // Side (0 - left, 1 - right)
+ side s;
+ // Array of connected edges
+ edge** connections;
+ // Initial number of neighbors
+ int radix;
+ // Current number of neighbors
+ int maxUsed;
+
+ // Matching fields
+
+ // Edge leading to the partner (NULL if none)
+ edge* partner;
+ // Array of layers predecessors
+ edge** pred;
+ // Number of predecessors
+ int predCount;
+ // Array of layers successors
+ edge** succ;
+ // Number of successors
+ int succCount;
+ // Denotes whether vertex is currently in layers
+ bool inLayers;
+
+ public:
+ // C'tor
+ vertex(int n, side sd, int rad);
+ // D'tor
+ ~vertex();
+ // Getters + Setters
+ int getID() const;
+ side getSide() const;
+ edge* getPartner() const;
+ vertex* getPredecessor() const;
+ bool getInLayers() const;
+ void setInLayers(bool b);
+ // Reset matching info
+ void resetLayersInfo();
+ // Adding partner to match layers
+ void addPartnerLayers(list<vertex*>& l);
+ // Adding non-partners to match layers
+ // Return true if one of the neighbors was free
+ bool addNonPartnersLayers(list<vertex*>& l);
+ // Add new connection (this vertex only)
+ void pushConnection(edge* e);
+ // Remove given connection (vertices on both sides)
+ void delConnection(edge* e);
+ // Flip predecessor edge status
+ // idx represents the layer index % 2 in the augmenting backward traversal (starting from 0)
+ void flipPredEdge(int idx);
+ // Remove vertex from layers, update predecessors and successors
+ void unLink(list<vertex*>& l);
+ // Get SOME connection and remove it (from both sides)
+ // If no connections present NULL is returned
+ edge* popConnection();
+ // Match vertex to SOME unmatched neighbor
+ // In case of success true is returned, false in case of failure
+ bool match();
+};
+
+class Bipartite
+{
+ // Number of vertices on each side
+ int size;
+ // Number of edges per vertex
+ int radix;
+ // Vertices arrays
+ vertex** leftSide;
+ vertex** rightSide;
+
+ peIter it;
+ // Edges list
+ peList List;
+
+ // Apply augmenting paths
+ void augment(list<vertex*>& l);
+
+ public:
+ // C'tor
+ Bipartite(int s, int r);
+ // D'tor
+ ~Bipartite();
+ int getRadix() const;
+ // Set iterator to first edge (returns false if list is empty)
+ bool setIterFirst();
+ // Set iterator to next edge (return false if list end is reached)
+ bool setIterNext();
+ // Get inputData pointed by iterator
+ inputData getReqDat();
+ // Add connection between two nodes
+ void connectNodes(int n1, int n2, inputData reqDat);
+ // Find maximal matching on current graph
+ void maximalMatch();
+ // Find maximum matching and remove it from the graph
+ // We use a variant of Hopcroft-Karp algorithm
+ Bipartite* maximumMatch();
+ // Decompose bipartite into to edge disjoint radix/2-regular bps
+ // We use a variant of Euler Decomposition
+ void decompose(Bipartite** bp1, Bipartite** bp2);
+};
+
+#endif
diff --git a/ibdm/datamodel/FatTree.cpp b/ibdm/datamodel/FatTree.cpp
index 55ff116..9fb2858 100644
--- a/ibdm/datamodel/FatTree.cpp
+++ b/ibdm/datamodel/FatTree.cpp
@@ -43,10 +43,12 @@ FatTree Utilities:
#include <iomanip>
#include "Fabric.h"
#include "SubnMgt.h"
+#include "RouteSys.h"
+
//////////////////////////////////////////////////////////////////////////////
// Build a Fat Tree data structure for the given topology.
// Prerequisites: Ranking performed and stored at p_node->rank.
-// Ranking is such that roots are marked with rank=0 and leaf switches with
+/// Ranking is such that roots are marked with rank=0 and leaf switches with
// highest value.
//
// The algorithm BFS from an arbitrary leaf switch.
@@ -70,169 +72,188 @@ FatTree Utilities:
// for comparing tupples
struct FatTreeTuppleLess : public binary_function <vec_byte, vec_byte, bool> {
- bool operator()(const vec_byte& x, const vec_byte& y) const {
- if (x.size() > y.size()) return false;
- if (y.size() > x.size()) return true;
+ bool operator()(const vec_byte& x, const vec_byte& y) const {
+ if (x.size() > y.size()) return false;
+ if (y.size() > x.size()) return true;
- for (unsigned int i = 0 ; i < x.size() ; i++)
+ for (unsigned int i = 0 ; i < x.size() ; i++)
{
- if (x[i] > y[i]) return false;
- if (x[i] < y[i]) return true;
+ if (x[i] > y[i]) return false;
+ if (x[i] < y[i]) return true;
}
- return false;
- }
+ return false;
+ }
};
typedef map< IBNode *, vec_byte, less< IBNode *> > map_pnode_vec_byte;
typedef vector< list< int > > vec_list_int;
class FatTreeNode {
- IBNode *p_node; // points to the fabric node for this node
- vec_list_int childPorts; // port nums connected to child by changing digit
- vec_list_int parentPorts;// port nums connected to parent by changing digit
+ IBNode *p_node; // points to the fabric node for this node
+ vec_list_int childPorts; // port nums connected to child by changing digit
+ vec_list_int parentPorts;// port nums connected to parent by changing digit
public:
- FatTreeNode(IBNode *p_node);
- FatTreeNode(){p_node = NULL;};
- int numParents();
- int numChildren();
- int numParentGroups();
- int numChildGroups();
- friend class FatTree;
+ FatTreeNode(IBNode *p_node);
+ FatTreeNode(){p_node = NULL;};
+ int numParents();
+ int numChildren();
+ int numParentGroups();
+ int numChildGroups();
+ bool goingDown(int lid);
+ friend class FatTree;
};
FatTreeNode::FatTreeNode(IBNode *p_n)
{
- p_node = p_n;
- list< int > emptyList;
- for (unsigned int pn = 0; pn <= p_node->numPorts; pn++)
- {
+ p_node = p_n;
+ list< int > emptyList;
+ for (unsigned int pn = 0; pn <= p_node->numPorts; pn++)
+ {
childPorts.push_back(emptyList);
parentPorts.push_back(emptyList);
- }
+ }
}
// get the total number of children a switch have
int
FatTreeNode::numChildren()
{
- int s = 0;
- for (int i = 0; i < childPorts.size(); i++)
- s += childPorts[i].size();
- return s;
+ int s = 0;
+ for (int i = 0; i < childPorts.size(); i++)
+ s += childPorts[i].size();
+ return s;
}
// get the total number of children a switch have
int
FatTreeNode::numParents()
{
- int s = 0;
- for (int i = 0; i < parentPorts.size(); i++)
- s += parentPorts[i].size();
- return s;
+ int s = 0;
+ for (int i = 0; i < parentPorts.size(); i++)
+ s += parentPorts[i].size();
+ return s;
}
// get the total number of children groups
int
FatTreeNode::numChildGroups()
{
- int s = 0;
- for (int i = 0; i < childPorts.size(); i++)
- if (childPorts[i].size()) s++;
- return s;
+ int s = 0;
+ for (int i = 0; i < childPorts.size(); i++)
+ if (childPorts[i].size()) s++;
+ return s;
}
int
FatTreeNode::numParentGroups()
{
- int s = 0;
- for (int i = 0; i < parentPorts.size(); i++)
- if (parentPorts[i].size()) s++;
- return s;
+ int s = 0;
+ for (int i = 0; i < parentPorts.size(); i++)
+ if (parentPorts[i].size()) s++;
+ return s;
+}
+
+// Check whether there is downwards path towards the given lid
+bool FatTreeNode::goingDown(int lid)
+{
+ int portNum = p_node->getLFTPortForLid(lid);
+ if (portNum == IB_LFT_UNASSIGNED)
+ return false;
+ for (int i=0; i<childPorts.size(); i++)
+ for (list<int>::iterator lI = childPorts[i].begin();lI != childPorts[i].end(); lI++) {
+ if (portNum == *lI)
+ return true;
+ }
+ return false;
}
typedef map< vec_byte, class FatTreeNode, FatTreeTuppleLess > map_tupple_ftnode;
class FatTree {
- // the node tupple is built out of the following:
- // d[0] = rank
- // d[1..N-1] = ID digits
- IBFabric *p_fabric; // The fabric we attach to
- map_pnode_vec_byte TuppleByNode;
- map_tupple_ftnode NodeByTupple;
- vec_int LidByIdx; // store target HCA lid by its index
- unsigned int N; // number of levels in the fabric
-
- // obtain the Fat Tree node for a given IBNode
- FatTreeNode* getFatTreeNodeByNode(IBNode *p_node);
-
- // get the first lowest level switch while making sure all HCAs
- // are connected to same rank
- // return NULL if this check is not met or no ranking available
- IBNode *getLowestLevelSwitchNode();
-
- // get a free tupple given the reference one and the index to change:
- vec_byte getFreeTupple(vec_byte refTupple, unsigned int changeIdx);
-
- // convert tupple to string
- string getTuppleStr(vec_byte tupple);
-
- // simply dump out the FatTree data:
- void dump();
-
- // track a connection to remote switch
- int trackConnection(
- FatTreeNode *p_ftNode,
- vec_byte tupple, // the connected node tupple
- unsigned int rank, // rank of the local node
- unsigned int remRank, // rank of the remote node
- unsigned int portNum, // the port number connecting to the remote node
- unsigned int remDigit // the digit which changed on the remote node
- );
-
- // set of coefficients that represent the structure
- int maxHcasPerLeafSwitch;
- vec_int childrenPerRank; // not valid for leafs
- vec_int parentsPerRank;
- vec_int numSwInRank; // number of switches for that level
- vec_int downByRank; // number of remote child switches s at rank
- vec_int upByRank; // number of remote parent switches at rank
-
- // extract fat tree coefficients and update validity flag
- // return 0 if OK
- int extractCoefficients();
+ // the node tupple is built out of the following:
+ // d[0] = rank
+ // d[1..N-1] = ID digits
+ IBFabric *p_fabric; // The fabric we attach to
+ map_pnode_vec_byte TuppleByNode;
+ map_tupple_ftnode NodeByTupple;
+ vec_int LidByIdx; // store target HCA lid by its index
+ unsigned int N; // number of levels in the fabric
+ map_str_int IdxByName;
+
+ // obtain the Fat Tree node for a given IBNode
+ FatTreeNode* getFatTreeNodeByNode(IBNode *p_node);
+
+ // get the first lowest level switch while making sure all HCAs
+ // are connected to same rank
+ // return NULL if this check is not met or no ranking available
+ IBNode *getLowestLevelSwitchNode();
+
+ // get a free tupple given the reference one and the index to change:
+ vec_byte getFreeTupple(vec_byte refTupple, unsigned int changeIdx);
+
+ // convert tupple to string
+ string getTuppleStr(vec_byte tupple);
+
+ // simply dump out the FatTree data:
+ void dump();
+
+ // track a connection to remote switch
+ int trackConnection(
+ FatTreeNode *p_ftNode,
+ vec_byte tupple, // the connected node tupple
+ unsigned int rank, // rank of the local node
+ unsigned int remRank, // rank of the remote node
+ unsigned int portNum, // the port number connecting to the remote node
+ unsigned int remDigit // the digit which changed on the remote node
+ );
+
+ // set of coefficients that represent the structure
+ int maxHcasPerLeafSwitch;
+ vec_int childrenPerRank; // not valid for leafs
+ vec_int parentsPerRank;
+ vec_int numSwInRank; // number of switches for that level
+ vec_int downByRank; // number of remote child switches s at rank
+ vec_int upByRank; // number of remote parent switches at rank
+
+ // extract fat tree coefficients and update validity flag
+ // return 0 if OK
+ int extractCoefficients();
public:
- // construct the fat tree by matching the topology to it.
- // note that this might return an invalid tree for routing
- // as indicated by isValid flag
- FatTree(IBFabric *p_fabric);
+ // construct the fat tree by matching the topology to it.
+ // note that this might return an invalid tree for routing
+ // as indicated by isValid flag
+ FatTree(IBFabric *p_fabric);
+
+ // true if the fabric can be mapped to a fat tree
+ bool isValid;
- // true if the fabric can be mapped to a fat tree
- bool isValid;
+ // propagate FDB assignments going up the tree ignoring the out port
+ int assignLftUpWards(FatTreeNode *p_ftNode, uint16_t dLid, int outPortNum, int switchPathOnly);
- // propagate FDB assignments going up the tree ignoring the out port
- int assignLftUpWards(FatTreeNode *p_ftNode, uint16_t dLid,
- int outPortNum, int switchPathOnly);
+ // set FDB values as given in the input
+ int forceLftUpWards(FatTreeNode *p_ftNode, uint16_t dLid, vec_int ports);
- // propagate FDB assignments going down the tree
- int
- assignLftDownWards(FatTreeNode *p_ftNode, uint16_t dLid,
- int outPortNum, int switchPathOnly);
+ // propagate FDB assignments going down the tree
+ int assignLftDownWards(FatTreeNode *p_ftNode, uint16_t dLid, int outPortNum, int switchPathOnly, int downOnly);
- // route the fat tree
- int route();
+ // route the fat tree
+ int route();
- // create the file ftree.hcas with the list of HCA port names
- // and LIDs in the correct order
- void dumpHcaOrder();
+ // route requested permutation in the fat tree
+ int permRoute(vector<string> src, vector<string> dst);
+
+ // create the file ftree.hcas with the list of HCA port names
+ // and LIDs in the correct order
+ void dumpHcaOrder();
};
FatTreeNode* FatTree::getFatTreeNodeByNode(IBNode *p_node) {
- FatTreeNode* p_ftNode;
- vec_byte tupple(N, 0);
- tupple = TuppleByNode[p_node];
- p_ftNode = &NodeByTupple[tupple];
- return p_ftNode;
+ FatTreeNode* p_ftNode;
+ vec_byte tupple(N, 0);
+ tupple = TuppleByNode[p_node];
+ p_ftNode = &NodeByTupple[tupple];
+ return p_ftNode;
}
// get the first lowest level switch while making sure all HCAs
@@ -240,124 +261,124 @@ FatTreeNode* FatTree::getFatTreeNodeByNode(IBNode *p_node) {
// return NULL if this check is not met or no ranking available
IBNode *FatTree::getLowestLevelSwitchNode()
{
- unsigned int leafRank = 0;
- IBNode *p_leafSwitch = NULL;
- IBPort *p_port;
-
- // go over all HCAs and track the rank of the node connected to them
- for( map_str_pnode::iterator nI = p_fabric->NodeByName.begin();
- nI != p_fabric->NodeByName.end();
- nI++)
- {
+ unsigned int leafRank = 0;
+ IBNode *p_leafSwitch = NULL;
+ IBPort *p_port;
+
+ // go over all HCAs and track the rank of the node connected to them
+ for( map_str_pnode::iterator nI = p_fabric->NodeByName.begin();
+ nI != p_fabric->NodeByName.end();
+ nI++)
+ {
IBNode *p_node = (*nI).second;
if (p_node->type != IB_CA_NODE) continue;
for (unsigned int pn = 1; pn <= p_node->numPorts; pn++)
- {
- p_port = p_node->getPort(pn);
- if (p_port && p_port->p_remotePort)
- {
- IBNode *p_remNode = p_port->p_remotePort->p_node;
-
- if (p_remNode->type != IB_SW_NODE) continue;
-
- // is the remote node ranked?
- if (!p_remNode->rank) continue;
-
- // must be identical for all leaf switches:
- if (!leafRank)
- {
- leafRank = p_remNode->rank;
- p_leafSwitch = p_remNode;
- }
- else
- {
- // get the lowest name
- if (p_remNode->name < p_leafSwitch->name )
- p_leafSwitch = p_remNode;
-
- if (p_remNode->rank != leafRank)
- {
- cout << "-E- Given topology is not a fat tree. HCA:"
- << p_remNode->name
- << " found not on lowest level!" << endl;
- return(NULL);
- }
- }
- }
- }
- }
- return(p_leafSwitch);
+ {
+ p_port = p_node->getPort(pn);
+ if (p_port && p_port->p_remotePort)
+ {
+ IBNode *p_remNode = p_port->p_remotePort->p_node;
+
+ if (p_remNode->type != IB_SW_NODE) continue;
+
+ // is the remote node ranked?
+ if (!p_remNode->rank) continue;
+
+ // must be identical for all leaf switches:
+ if (!leafRank)
+ {
+ leafRank = p_remNode->rank;
+ p_leafSwitch = p_remNode;
+ }
+ else
+ {
+ // get the lowest name
+ if (p_remNode->name < p_leafSwitch->name )
+ p_leafSwitch = p_remNode;
+
+ if (p_remNode->rank != leafRank)
+ {
+ cout << "-E- Given topology is not a fat tree. HCA:"
+ << p_remNode->name
+ << " found not on lowest level!" << endl;
+ return(NULL);
+ }
+ }
+ }
+ }
+ }
+ return(p_leafSwitch);
}
// get a free tupple given the reference one and the index to change:
// also track the max digit allocated per index
vec_byte FatTree::getFreeTupple(vec_byte refTupple, unsigned int changeIdx)
{
- vec_byte res = refTupple;
- int rank = changeIdx - 1;
- for (uint8_t i = 0; i < 255; i++)
- {
+ vec_byte res = refTupple;
+ int rank = changeIdx - 1;
+ for (uint8_t i = 0; i < 255; i++)
+ {
res[changeIdx] = i;
map_tupple_ftnode::const_iterator tI = NodeByTupple.find(res);
if (tI == NodeByTupple.end())
- return res;
- }
- cout << "ABORT: fail to get free tupple! (in 255 indexies)" << endl;
- abort();
+ return res;
+ }
+ cout << "ABORT: fail to get free tupple! (in 255 indexies)" << endl;
+ abort();
}
string FatTree::getTuppleStr(vec_byte tupple)
{
- char buf[128];
- buf[0] = '\0';
- for (unsigned int i = 0; i < tupple.size(); i++)
- {
+ char buf[128];
+ buf[0] = '\0';
+ for (unsigned int i = 0; i < tupple.size(); i++)
+ {
if (i) strcat(buf,".");
sprintf(buf, "%s%d", buf, tupple[i]);
- }
- return(string(buf));
+ }
+ return(string(buf));
}
// track connection going up or down by registering the port in the
// correct fat tree node childPorts and parentPorts
int FatTree::trackConnection(
- FatTreeNode *p_ftNode, // the connected node
- vec_byte tupple, // the connected node tupple
- unsigned int rank, // rank of the local node
- unsigned int remRank, // rank of the remote node
- unsigned int portNum, // the port number connecting to the remote node
- unsigned int remDigit // the digit of the tupple changing to the remote node
- )
+ FatTreeNode *p_ftNode, // the connected node
+ vec_byte tupple, // the connected node tupple
+ unsigned int rank, // rank of the local node
+ unsigned int remRank, // rank of the remote node
+ unsigned int portNum, // the port number connecting to the remote node
+ unsigned int remDigit // the digit of the tupple changing to the remote node
+ )
{
- if ( rank < remRank )
- {
+ if ( rank < remRank )
+ {
// going down
// make sure we have enough entries in the vector
if (remDigit >= p_ftNode->childPorts.size())
- {
- list< int > emptyPortList;
- for (unsigned int i = p_ftNode->childPorts.size();
- i <= remDigit; i++)
+ {
+ list< int > emptyPortList;
+ for (unsigned int i = p_ftNode->childPorts.size();
+ i <= remDigit; i++)
p_ftNode->childPorts.push_back(emptyPortList);
- }
+ }
p_ftNode->childPorts[remDigit].push_back(portNum);
- }
- else
- {
+ }
+ else
+ {
// going up
// make sure we have enough entries in the vector
if (remDigit >= p_ftNode->parentPorts.size())
- {
- list< int > emptyPortList;
- for (unsigned int i = p_ftNode->parentPorts.size();
- i <= remDigit; i++)
+ {
+ list< int > emptyPortList;
+ for (unsigned int i = p_ftNode->parentPorts.size();
+ i <= remDigit; i++)
p_ftNode->parentPorts.push_back(emptyPortList);
- }
+ }
p_ftNode->parentPorts[remDigit].push_back(portNum);
- }
+ }
- return(0);
+ return(0);
}
// Extract fat tree coefficiants and double check its
@@ -365,19 +386,19 @@ int FatTree::trackConnection(
int
FatTree::extractCoefficients()
{
- // Go over all levels of the tree.
- // Collect number of nodes per each level
- // Require the number of children is equal
- // Require the number of parents is equal
-
- int prevLevel = -1;
- int anyErr = 0;
-
- // go over all nodes
- for (map_tupple_ftnode::iterator tI = NodeByTupple.begin();
- tI != NodeByTupple.end();
- tI++)
- {
+ // Go over all levels of the tree.
+ // Collect number of nodes per each level
+ // Require the number of children is equal
+ // Require the number of parents is equal
+
+ int prevLevel = -1;
+ int anyErr = 0;
+
+ // go over all nodes
+ for (map_tupple_ftnode::iterator tI = NodeByTupple.begin();
+ tI != NodeByTupple.end();
+ tI++)
+ {
FatTreeNode *p_ftNode = &((*tI).second);
int level = (*tI).first[0];
bool isFirstInLevel;
@@ -386,118 +407,118 @@ FatTree::extractCoefficients()
prevLevel = level;
if (isFirstInLevel)
- {
- numSwInRank.push_back(1);
- parentsPerRank.push_back(p_ftNode->numParents());
- childrenPerRank.push_back(p_ftNode->numChildren());
- downByRank.push_back(p_ftNode->numChildGroups());
- upByRank.push_back(p_ftNode->numParentGroups());
- }
+ {
+ numSwInRank.push_back(1);
+ parentsPerRank.push_back(p_ftNode->numParents());
+ childrenPerRank.push_back(p_ftNode->numChildren());
+ downByRank.push_back(p_ftNode->numChildGroups());
+ upByRank.push_back(p_ftNode->numParentGroups());
+ }
else
- {
- numSwInRank[level]++;
- if (parentsPerRank[level] != p_ftNode->numParents())
- {
- if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
- cout << "-E- node:" << p_ftNode->p_node->name
- << " has unequal number of parent ports to its level"
- << endl;
- anyErr++;
- }
-
- // we do not require symmetrical routing for leafs
- if (level < N-1)
- {
- if (childrenPerRank[level] != p_ftNode->numChildren())
- {
- if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
- cout << "-E- node:" << p_ftNode->p_node->name <<
- " has unequal number of child ports to its level" << endl;
- anyErr++;
- }
- }
- }
- }
+ {
+ numSwInRank[level]++;
+ if (parentsPerRank[level] != p_ftNode->numParents())
+ {
+ if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+ cout << "-E- node:" << p_ftNode->p_node->name
+ << " has unequal number of parent ports to its level"
+ << endl;
+ anyErr++;
+ }
+
+ // we do not require symmetrical routing for leafs
+ if (level < N-1)
+ {
+ if (childrenPerRank[level] != p_ftNode->numChildren())
+ {
+ if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+ cout << "-E- node:" << p_ftNode->p_node->name <<
+ " has unequal number of child ports to its level" << endl;
+ anyErr++;
+ }
+ }
+ }
+ }
- if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
- {
+ if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+ {
for (int rank = 0; rank < numSwInRank.size(); rank++) {
- cout << "-I- rank:" << rank
- << " switches:" << numSwInRank[rank]
- << " parents: " << parentsPerRank[rank]
- << " (" << upByRank[rank] << " groups)"
- << " children:" << childrenPerRank[rank]
- << " (" << downByRank[rank] << " groups)"
- << endl;
+ cout << "-I- rank:" << rank
+ << " switches:" << numSwInRank[rank]
+ << " parents: " << parentsPerRank[rank]
+ << " (" << upByRank[rank] << " groups)"
+ << " children:" << childrenPerRank[rank]
+ << " (" << downByRank[rank] << " groups)"
+ << endl;
}
- }
+ }
- if (anyErr) return 1;
+ if (anyErr) return 1;
- vec_byte firstLeafTupple(N, 0);
- firstLeafTupple[0] = N-1;
- maxHcasPerLeafSwitch = 0;
- for (map_tupple_ftnode::iterator tI = NodeByTupple.find(firstLeafTupple);
- tI != NodeByTupple.end();
- tI++)
- {
+ vec_byte firstLeafTupple(N, 0);
+ firstLeafTupple[0] = N-1;
+ maxHcasPerLeafSwitch = 0;
+ for (map_tupple_ftnode::iterator tI = NodeByTupple.find(firstLeafTupple);
+ tI != NodeByTupple.end();
+ tI++)
+ {
FatTreeNode *p_ftNode = &((*tI).second);
IBNode *p_node = p_ftNode->p_node;
int numHcaPorts = 0;
for (unsigned int pn = 1; pn <= p_node->numPorts; pn++)
- {
- IBPort *p_port = p_node->getPort(pn);
- if (p_port && p_port->p_remotePort &&
- (p_port->p_remotePort->p_node->type == IB_CA_NODE))
- {
- numHcaPorts++;
- }
+ {
+ IBPort *p_port = p_node->getPort(pn);
+ if (p_port && p_port->p_remotePort &&
+ (p_port->p_remotePort->p_node->type == IB_CA_NODE))
+ {
+ numHcaPorts++;
+ }
- }
+ }
if (numHcaPorts > maxHcasPerLeafSwitch)
- maxHcasPerLeafSwitch = numHcaPorts;
- }
+ maxHcasPerLeafSwitch = numHcaPorts;
+ }
- if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
- cout << "-I- HCAs per leaf switch set to:"
- << maxHcasPerLeafSwitch << endl;
+ if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+ cout << "-I- HCAs per leaf switch set to:"
+ << maxHcasPerLeafSwitch << endl;
- cout << "-I- Topology is a valid Fat Tree" << endl;
- isValid = 1;
+ cout << "-I- Topology is a valid Fat Tree" << endl;
+ isValid = 1;
- return 0;
+ return 0;
}
// construct the fat tree by matching the topology to it.
FatTree::FatTree(IBFabric *p_f)
{
- isValid = 0;
- p_fabric = p_f;
+ isValid = 0;
+ p_fabric = p_f;
- IBNode *p_node = getLowestLevelSwitchNode();
- IBPort *p_port;
- FatTreeNode *p_ftNode;
+ IBNode *p_node = getLowestLevelSwitchNode();
+ IBPort *p_port;
+ FatTreeNode *p_ftNode;
- if (! p_node) return;
- N = p_node->rank + 1; // N = number of levels (our first rank is 0 ...)
+ if (! p_node) return;
+ N = p_node->rank + 1; // N = number of levels (our first rank is 0 ...)
- // BFS from the first switch connected to HCA found on the fabric
- list< IBNode * > bfsQueue;
- bfsQueue.push_back(p_node);
+ // BFS from the first switch connected to HCA found on the fabric
+ list< IBNode * > bfsQueue;
+ bfsQueue.push_back(p_node);
- // also we always allocate the address 0..0 with "rank" digits to the node:
- vec_byte tupple(N, 0);
+ // also we always allocate the address 0..0 with "rank" digits to the node:
+ vec_byte tupple(N, 0);
- // adjust the level:
- tupple[0] = p_node->rank;
- TuppleByNode[p_node] = tupple;
- NodeByTupple[tupple] = FatTreeNode(p_node);
- if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
- cout << "-I- Assigning tupple:" << getTuppleStr(tupple) << " to:"
- << p_node->name << endl;
+ // adjust the level:
+ tupple[0] = p_node->rank;
+ TuppleByNode[p_node] = tupple;
+ NodeByTupple[tupple] = FatTreeNode(p_node);
+ if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+ cout << "-I- Assigning tupple:" << getTuppleStr(tupple) << " to:"
+ << p_node->name << endl;
- while (! bfsQueue.empty())
- {
+ while (! bfsQueue.empty())
+ {
p_node = bfsQueue.front();
bfsQueue.pop_front();
// we must have a tupple stored - get it
@@ -507,137 +528,141 @@ FatTree::FatTree(IBFabric *p_f)
// go over all the node ports
for (unsigned int pn = 1; pn <= p_node->numPorts; pn++)
- {
- p_port = p_node->getPort(pn);
- if (!p_port || !p_port->p_remotePort) continue;
-
- IBNode *p_remNode = p_port->p_remotePort->p_node;
-
- if (p_remNode->type != IB_SW_NODE)
- {
- // for HCAs we only track the conenctions
- list< int > tmpList;
- tmpList.push_back(pn);
- p_ftNode->childPorts.push_back(tmpList);
- continue;
- }
-
- // now try to see if this node has already a map:
- map_pnode_vec_byte::iterator tI = TuppleByNode.find(p_remNode);
-
- // we are allowed to change the digit based on the direction we go:
- unsigned int changingDigitIdx;
- if (p_node->rank < p_remNode->rank)
+ {
+ p_port = p_node->getPort(pn);
+ if (!p_port || !p_port->p_remotePort) continue;
+
+ IBNode *p_remNode = p_port->p_remotePort->p_node;
+
+ if (p_remNode->type != IB_SW_NODE)
+ {
+ // for HCAs we only track the conenctions
+ list< int > tmpList;
+ tmpList.push_back(pn);
+ p_ftNode->childPorts.push_back(tmpList);
+ continue;
+ }
+
+ // now try to see if this node has already a map:
+ map_pnode_vec_byte::iterator tI = TuppleByNode.find(p_remNode);
+
+ // we are allowed to change the digit based on the direction we go:
+ unsigned int changingDigitIdx;
+ if (p_node->rank < p_remNode->rank)
// going down the tree = use the current rank + 1
// (save one for level)
changingDigitIdx = p_node->rank + 1;
- else if (p_node->rank > p_remNode->rank)
+ else if (p_node->rank > p_remNode->rank)
// goin up the tree = use current rank (first one is level)
changingDigitIdx = p_node->rank;
- else
- {
- cout << "-E- Connections on the same rank level "
- << " are not allowed in Fat Tree routing." << endl;
- cout << " from:" << p_node->name << "/P" << pn
- << " to:" << p_remNode->name << endl;
- return;
- }
-
- // do we need to allocate a new tupple?
- if (tI == TuppleByNode.end())
- {
-
- // the node is new - so get a new tupple for it:
- vec_byte newTupple = tupple;
- // change the level accordingly
- newTupple[0] = p_remNode->rank;
- // obtain a free one
- newTupple = getFreeTupple(newTupple, changingDigitIdx);
-
- // assign the new tupple and add to next steps:
- if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
- cout << "-I- Assigning tupple:" << getTuppleStr(newTupple)
- << " to:" << p_remNode->name << " changed idx:"
- << changingDigitIdx << " from:" << getTuppleStr(tupple)
- << endl;
-
- TuppleByNode[p_remNode] = newTupple;
- NodeByTupple[newTupple] = FatTreeNode(p_remNode);
-
- unsigned int digit = newTupple[changingDigitIdx];
-
- // track the connection
- if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
- cout << "-I- Connecting:" << p_node->name << " to:"
- << p_remNode->name << " through port:" << pn
- << " remDigit:" << digit << endl;
- if (trackConnection(
- p_ftNode, tupple, p_node->rank, p_remNode->rank, pn, digit))
- return;
-
- bfsQueue.push_back(p_remNode);
- }
- else
- {
- // other side already has a tupple - so just track the connection
- vec_byte remTupple = (*tI).second;
- vec_byte mergedTupple = remTupple;
-
- unsigned int digit = remTupple[changingDigitIdx];
-
- if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
- cout << "-I- Connecting:" << p_node->name << " to:"
- << p_remNode->name << " through port:" << pn
- << " remDigit:" << digit << endl;
- if (trackConnection(
- p_ftNode, tupple, p_node->rank, p_remNode->rank, pn, digit))
- return;
- }
-
- } // all ports
- } // anything to do
-
- // make sure the extracted tropology can be declared "fat tree"
- if (extractCoefficients()) return;
-
- // build mapping between HCA index and LIDs.
- // We need to decide what will be the K of the lowest switches level.
- // It is possible that for all of them the number of HCAs is < num
- // left ports thus we should probably use the lowest number of all
- vec_byte firstLeafTupple(N, 0);
- firstLeafTupple[0] = N-1;
-
- // now restart going over all leaf switches by their tupple order and
- // allocate mapping
- for (map_tupple_ftnode::iterator tI = NodeByTupple.find(firstLeafTupple);
- tI != NodeByTupple.end();
- tI++)
- {
+ else
+ {
+ cout << "-E- Connections on the same rank level "
+ << " are not allowed in Fat Tree routing." << endl;
+ cout << " from:" << p_node->name << "/P" << pn
+ << " to:" << p_remNode->name << endl;
+ return;
+ }
+
+ // do we need to allocate a new tupple?
+ if (tI == TuppleByNode.end())
+ {
+
+ // the node is new - so get a new tupple for it:
+ vec_byte newTupple = tupple;
+ // change the level accordingly
+ newTupple[0] = p_remNode->rank;
+ // obtain a free one
+ newTupple = getFreeTupple(newTupple, changingDigitIdx);
+
+ // assign the new tupple and add to next steps:
+ if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+ cout << "-I- Assigning tupple:" << getTuppleStr(newTupple)
+ << " to:" << p_remNode->name << " changed idx:"
+ << changingDigitIdx << " from:" << getTuppleStr(tupple)
+ << endl;
+
+ TuppleByNode[p_remNode] = newTupple;
+ NodeByTupple[newTupple] = FatTreeNode(p_remNode);
+
+ unsigned int digit = newTupple[changingDigitIdx];
+
+ // track the connection
+ if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+ cout << "-I- Connecting:" << p_node->name << " to:"
+ << p_remNode->name << " through port:" << pn
+ << " remDigit:" << digit << endl;
+ if (trackConnection(
+ p_ftNode, tupple, p_node->rank, p_remNode->rank, pn, digit))
+ return;
+
+ bfsQueue.push_back(p_remNode);
+ }
+ else
+ {
+ // other side already has a tupple - so just track the connection
+ vec_byte remTupple = (*tI).second;
+ vec_byte mergedTupple = remTupple;
+
+ unsigned int digit = remTupple[changingDigitIdx];
+
+ if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+ cout << "-I- Connecting:" << p_node->name << " to:"
+ << p_remNode->name << " through port:" << pn
+ << " remDigit:" << digit << endl;
+ if (trackConnection(
+ p_ftNode, tupple, p_node->rank, p_remNode->rank, pn, digit))
+ return;
+ }
+
+ } // all ports
+ } // anything to do
+
+ // make sure the extracted tropology can be declared "fat tree"
+ if (extractCoefficients()) return;
+
+ // build mapping between HCA index and LIDs.
+ // We need to decide what will be the K of the lowest switches level.
+ // It is possible that for all of them the number of HCAs is < num
+ // left ports thus we should probably use the lowest number of all
+ vec_byte firstLeafTupple(N, 0);
+ firstLeafTupple[0] = N-1;
+
+ // now restart going over all leaf switches by their tupple order and
+ // allocate mapping
+ int hcaIdx = 0;
+ for (map_tupple_ftnode::iterator tI = NodeByTupple.find(firstLeafTupple);
+ tI != NodeByTupple.end();
+ tI++)
+ {
// we collect HCAs connected to the leaf switch and set their childPort
// starting at the index associated with the switch tupple.
FatTreeNode *p_ftNode = &((*tI).second);
IBNode *p_node = p_ftNode->p_node;
unsigned int pIdx = 0;
for (unsigned int pn = 1; pn <= p_node->numPorts; pn++)
- {
- IBPort *p_port = p_node->getPort(pn);
- if (p_port && p_port->p_remotePort &&
- (p_port->p_remotePort->p_node->type == IB_CA_NODE))
- {
- LidByIdx.push_back(p_port->p_remotePort->base_lid);
- pIdx++;
- }
- }
+ {
+ IBPort *p_port = p_node->getPort(pn);
+ if (p_port && p_port->p_remotePort &&
+ (p_port->p_remotePort->p_node->type == IB_CA_NODE))
+ {
+ LidByIdx.push_back(p_port->p_remotePort->base_lid);
+ IdxByName[p_port->p_remotePort->p_node->name] = hcaIdx;
+ pIdx++;
+ hcaIdx++;
+ }
+ }
// we might need some padding
for (; pIdx < maxHcasPerLeafSwitch; pIdx++) {
- LidByIdx.push_back(0);
+ LidByIdx.push_back(0);
+ hcaIdx++;
}
- }
+ }
- cout << "-I- Fat Tree Created" << endl;
+ cout << "-I- Fat Tree Created" << endl;
- if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
- dump();
+ if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+ dump();
}
//////////////////////////////////////////////////////////////////////////////
@@ -664,100 +689,100 @@ int
FatTree::assignLftUpWards(FatTreeNode *p_ftNode, uint16_t dLid,
int outPortNum, int switchPathOnly)
{
- IBPort* p_port;
- IBNode *p_node = p_ftNode->p_node;
-
- if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
- cout << "-V- assignLftUpWards invoked on node:" << p_node->name
- << " out-port:" << outPortNum
- << " to dlid:" << dLid
- << " switchPathOnly:" << switchPathOnly
- << endl;
-
- // Foreach one of the child port groups select the port which is
- // less utilized and set its LFT - then recurse into it
- // go over all child ports
- for (int i = 0; i < p_ftNode->childPorts.size(); i++) {
- if (!p_ftNode->childPorts[i].size()) continue;
+ IBPort* p_port;
+ IBNode *p_node = p_ftNode->p_node;
+
+ if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+ cout << "-V- assignLftUpWards invoked on node:" << p_node->name
+ << " out-port:" << outPortNum
+ << " to dlid:" << dLid
+ << " switchPathOnly:" << switchPathOnly
+ << endl;
+
+ // Foreach one of the child port groups select the port which is
+ // less utilized and set its LFT - then recurse into it
+ // go over all child ports
+ for (int i = 0; i < p_ftNode->childPorts.size(); i++) {
+ if (!p_ftNode->childPorts[i].size()) continue;
+
+ // we can skip handling the remote node if
+ // it already has an assigned LFT for this target lid
+ int firstPortNum = p_ftNode->childPorts[i].front();
+ IBPort *p_firstPort = p_node->getPort(firstPortNum);
+ IBNode *p_remNode = p_firstPort->p_remotePort->p_node;
+ if (p_remNode->getLFTPortForLid(dLid) != IB_LFT_UNASSIGNED)
+ {
+ if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+ cout << "-V- assignLftUpWards skip already assigned remote node:"
+ << p_remNode->name
+ << " switchPathOnly:" << switchPathOnly
+ << endl;
+ continue;
+ }
- // we can skip handling the remote node if
- // it already has an assigned LFT for this target lid
- int firstPortNum = p_ftNode->childPorts[i].front();
- IBPort *p_firstPort = p_node->getPort(firstPortNum);
- IBNode *p_remNode = p_firstPort->p_remotePort->p_node;
- if (p_remNode->getLFTPortForLid(dLid) != IB_LFT_UNASSIGNED)
- {
- if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
- cout << "-V- assignLftUpWards skip already assigned remote node:"
- << p_remNode->name
- << " switchPathOnly:" << switchPathOnly
- << endl;
- continue;
- }
+ int bestUsage = 0;
+ IBPort *p_bestPort = NULL;
+ int found = 0;
- int bestUsage = 0;
- IBPort *p_bestPort = NULL;
- int found = 0;
-
- // we only need one best port on each group
- for (list<int>::iterator lI = p_ftNode->childPorts[i].begin();
- !found && (lI != p_ftNode->childPorts[i].end());
- lI++) {
-
- // can not have more then one port in group...
- int portNum = *lI;
-
- // we do not want to descend back to the original port
- if (portNum == outPortNum)
- {
- p_bestPort = NULL;
- found = 1;
- continue;
- }
-
- IBPort *p_port = p_node->getPort(portNum);
- // not required but what the hack...
- if (!p_port || !p_port->p_remotePort) continue;
- IBPort *p_remPort = p_port->p_remotePort;
-
- // ignore remote HCA nodes
- if (p_remPort->p_node->type != IB_SW_NODE) continue;
-
- // look on the local usage as we mark usage entering a port
- int usage = p_port->counter1;
- if (switchPathOnly)
- usage += p_port->counter2;
- if ((p_bestPort == NULL) || (usage < bestUsage))
- {
- p_bestPort = p_port;
- bestUsage = usage;
- }
- }
+ // we only need one best port on each group
+ for (list<int>::iterator lI = p_ftNode->childPorts[i].begin();
+ !found && (lI != p_ftNode->childPorts[i].end());
+ lI++) {
+
+ // can not have more then one port in group...
+ int portNum = *lI;
+
+ // we do not want to descend back to the original port
+ if (portNum == outPortNum)
+ {
+ p_bestPort = NULL;
+ found = 1;
+ continue;
+ }
+
+ IBPort *p_port = p_node->getPort(portNum);
+ // not required but what the hack...
+ if (!p_port || !p_port->p_remotePort) continue;
+ IBPort *p_remPort = p_port->p_remotePort;
+
+ // ignore remote HCA nodes
+ if (p_remPort->p_node->type != IB_SW_NODE) continue;
- if (p_bestPort != NULL)
+ // look on the local usage as we mark usage entering a port
+ int usage = p_port->counter1;
+ if (switchPathOnly)
+ usage += p_port->counter2;
+ if ((p_bestPort == NULL) || (usage < bestUsage))
+ {
+ p_bestPort = p_port;
+ bestUsage = usage;
+ }
+ }
+
+ if (p_bestPort != NULL)
{
- // mark utilization
- if (switchPathOnly)
- p_bestPort->counter2++;
- else
- p_bestPort->counter1++;
-
- IBPort *p_bestRemPort = p_bestPort->p_remotePort;
- p_remNode->setLFTPortForLid(dLid, p_bestRemPort->num);
-
- if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
- cout << "-V- assignLftUpWards setting lft on:" << p_remNode->name
- << " to port:" << p_bestRemPort->num
- << " to dlid:" << dLid << endl;
-
- FatTreeNode *p_remFTNode =
- getFatTreeNodeByNode(p_bestRemPort->p_node);
- assignLftUpWards(p_remFTNode, dLid, p_bestRemPort->num,
- switchPathOnly);
+ // mark utilization
+ if (switchPathOnly)
+ p_bestPort->counter2++;
+ else
+ p_bestPort->counter1++;
+
+ IBPort *p_bestRemPort = p_bestPort->p_remotePort;
+ p_remNode->setLFTPortForLid(dLid, p_bestRemPort->num);
+
+ if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+ cout << "-V- assignLftUpWards setting lft on:" << p_remNode->name
+ << " to port:" << p_bestRemPort->num
+ << " to dlid:" << dLid << endl;
+
+ FatTreeNode *p_remFTNode =
+ getFatTreeNodeByNode(p_bestRemPort->p_node);
+ assignLftUpWards(p_remFTNode, dLid, p_bestRemPort->num,
+ switchPathOnly);
}
- }
+ }
- return(0);
+ return(0);
}
// to allocate a port downwards we look at all ports
@@ -766,145 +791,139 @@ FatTree::assignLftUpWards(FatTreeNode *p_ftNode, uint16_t dLid,
// we also start an upwards assignment to this node
int
FatTree::assignLftDownWards(FatTreeNode *p_ftNode, uint16_t dLid,
- int outPortNum, int switchPathOnly)
+ int outPortNum, int switchPathOnly, int downOnly)
{
- IBPort *p_port;
- IBNode *p_node = p_ftNode->p_node;
-
- if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
- cout << "-V- assignLftDownWards from:" << p_node->name
- << " dlid:" << dLid
- << " through port:" << outPortNum
- << " switchPathOnly:" << switchPathOnly
- << endl;
-
- if (outPortNum != 0xFF)
- {
- // Set FDB to that LID only if not preset or we are on "main" route
- if (!switchPathOnly ||
- (p_node->getLFTPortForLid(dLid) == IB_LFT_UNASSIGNED)) {
- p_node->setLFTPortForLid(dLid, outPortNum);
-
- p_port = p_node->getPort(outPortNum);
-
- // mark the usage of this port
- if (p_port) {
- if (switchPathOnly) {
- p_port->counter2++;
- } else {
- p_port->counter1++;
- }
- }
- }
- }
-
- // find the remote port (following the parents list order)
- // that is not used or less used.
- int bestUsage = 0;
- int bestGroup = -1;
- IBPort *p_bestRemPort = NULL;
- int found = 0;
- // go over all child ports
- for (int i = 0; !found && (i < p_ftNode->parentPorts.size()); i++) {
- if (!p_ftNode->parentPorts[i].size()) continue;
-
- for (list<int>::iterator lI = p_ftNode->parentPorts[i].begin();
- !found && (lI != p_ftNode->parentPorts[i].end());
- lI++) {
-
- // can not have more then one port in group...
- int portNum = *lI;
- IBPort *p_port = p_node->getPort(portNum); // must be if marked parent
- IBPort *p_remPort = p_port->p_remotePort;
- if (p_remPort == NULL) continue;
- int usage = p_remPort->counter1;
- if (switchPathOnly)
- usage += p_remPort->counter2;
-
- if ((p_bestRemPort == NULL) || (usage < bestUsage))
- {
- p_bestRemPort = p_remPort;
- bestUsage = usage;
- bestGroup = i;
- // can not have better usage then no usage
- if (usage == 0)
- found = 1;
- }
+ IBPort *p_port;
+ IBNode *p_node = p_ftNode->p_node;
+
+ if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+ cout << "-V- assignLftDownWards from:" << p_node->name
+ << " dlid:" << dLid
+ << " through port:" << outPortNum
+ << " switchPathOnly:" << switchPathOnly
+ << endl;
+
+ if (outPortNum != 0xFF)
+ {
+ // Set FDB to that LID only if not preset or we are on "main" route
+ if (!switchPathOnly || (p_node->getLFTPortForLid(dLid) == IB_LFT_UNASSIGNED)) {
+ p_node->setLFTPortForLid(dLid, outPortNum);
+
+ p_port = p_node->getPort(outPortNum);
+
+ // mark the usage of this port
+ if (p_port) {
+ if (switchPathOnly) {
+ p_port->counter2++;
+ } else {
+ p_port->counter1++;
+ }
+ }
}
- }
-
- FatTreeNode *p_remFTNode;
- // first visit the official path!
- if (bestGroup != -1)
+ }
+
+ // find the remote port (following the parents list order)
+ // that is not used or less used.
+ int bestUsage = 0;
+ int bestGroup = -1;
+ IBPort *p_bestRemPort = NULL;
+ int found = 0;
+ // go over all child ports
+ for (int i = 0; !found && (i < p_ftNode->parentPorts.size()); i++) {
+ if (!p_ftNode->parentPorts[i].size()) continue;
+
+ for (list<int>::iterator lI = p_ftNode->parentPorts[i].begin();
+ !found && (lI != p_ftNode->parentPorts[i].end());
+ lI++) {
+
+ // can not have more then one port in group...
+ int portNum = *lI;
+ IBPort *p_port = p_node->getPort(portNum); // must be if marked parent
+ IBPort *p_remPort = p_port->p_remotePort;
+ if (p_remPort == NULL) continue;
+ int usage = p_remPort->counter1;
+ if (switchPathOnly)
+ usage += p_remPort->counter2;
+
+ if ((p_bestRemPort == NULL) || (usage < bestUsage))
{
- p_remFTNode = getFatTreeNodeByNode(p_bestRemPort->p_node);
- if (!p_remFTNode)
- cout << "-E- Fail to get FatTree Node for node:"
- << p_bestRemPort->p_node->name << endl;
- else
- assignLftDownWards(p_remFTNode, dLid, p_bestRemPort->num,
- switchPathOnly);
+ p_bestRemPort = p_remPort;
+ bestUsage = usage;
+ bestGroup = i;
+ // can not have better usage then no usage
+ if (usage == 0)
+ found = 1;
}
-
- // need to go all up all the possible ways to make sure all switch are
- // connected to all HCAs
- for (int i = 0; i < p_ftNode->parentPorts.size(); i++) {
- if (!p_ftNode->parentPorts[i].size()) continue;
- IBPort* p_remPort;
- // if we are on the "best group" we know the best port
- if (bestGroup == i) continue;
-
- // find the best port of the group i
- p_bestRemPort = NULL;
- found = 0;
- for (list<int>::iterator lI = p_ftNode->parentPorts[i].begin();
- !found && (lI != p_ftNode->parentPorts[i].end());
- lI++) {
-
- // can not have more then one port in group...
- int portNum = *lI;
- IBPort *p_port = p_node->getPort(portNum); // must be if marked parent
- IBPort *p_remPort = p_port->p_remotePort;
- if (p_remPort == NULL) continue;
- int usage = p_remPort->counter1 + p_remPort->counter2;
-
- if ((p_bestRemPort == NULL) || (usage < bestUsage))
- {
- p_bestRemPort = p_remPort;
- bestUsage = usage;
- // can not have better usage then no usage
- if (usage == 0)
- found = 1;
- }
+ }
+ }
+
+ FatTreeNode *p_remFTNode;
+ // first visit the official path!
+ if (bestGroup != -1) {
+ p_remFTNode = getFatTreeNodeByNode(p_bestRemPort->p_node);
+ if (!p_remFTNode)
+ cout << "-E- Fail to get FatTree Node for node:"
+ << p_bestRemPort->p_node->name << endl;
+ else
+ assignLftDownWards(p_remFTNode, dLid, p_bestRemPort->num,switchPathOnly,downOnly);
+ }
+
+ // need to go all up all the possible ways to make sure all switch are
+ // connected to all HCAs
+ for (int i = 0; i < p_ftNode->parentPorts.size(); i++) {
+ if (!p_ftNode->parentPorts[i].size()) continue;
+ IBPort* p_remPort;
+ // if we are on the "best group" we know the best port
+ if (bestGroup == i) continue;
+
+ // find the best port of the group i
+ p_bestRemPort = NULL;
+ found = 0;
+ for (list<int>::iterator lI = p_ftNode->parentPorts[i].begin();!found && (lI != p_ftNode->parentPorts[i].end()); lI++) {
+ // can not have more then one port in group...
+ int portNum = *lI;
+ IBPort *p_port = p_node->getPort(portNum); // must be if marked parent
+ IBPort *p_remPort = p_port->p_remotePort;
+ if (p_remPort == NULL) continue;
+ int usage = p_remPort->counter1 + p_remPort->counter2;
+
+ if ((p_bestRemPort == NULL) || (usage < bestUsage)) {
+ p_bestRemPort = p_remPort;
+ bestUsage = usage;
+ // can not have better usage then no usage
+ if (usage == 0)
+ found = 1;
}
- p_remFTNode = getFatTreeNodeByNode(p_bestRemPort->p_node);
- if (!p_remFTNode)
- cout << "-E- Fail to get FatTree Node for node:"
- << p_bestRemPort->p_node->name << endl;
- else
- assignLftDownWards(p_remFTNode, dLid, p_bestRemPort->num, 1);
- }
-
- // Perform Backward traversal through all ports connected to lower
- // level switches in-port = out-port
- assignLftUpWards(p_ftNode, dLid, outPortNum, switchPathOnly);
-
- return(0);
+ }
+ p_remFTNode = getFatTreeNodeByNode(p_bestRemPort->p_node);
+ if (!p_remFTNode)
+ cout << "-E- Fail to get FatTree Node for node:"
+ << p_bestRemPort->p_node->name << endl;
+ else
+ assignLftDownWards(p_remFTNode, dLid, p_bestRemPort->num, 1, downOnly);
+ }
+
+ // Perform Backward traversal through all ports connected to lower
+ // level switches in-port = out-port
+ if (!downOnly)
+ assignLftUpWards(p_ftNode, dLid, outPortNum, switchPathOnly);
+
+ return(0);
}
// perform the routing by filling in the fabric LFTs
int FatTree::route()
{
- int hcaIdx = 0;
- int lid; // the target LID we propagate for this time
+ int hcaIdx = 0;
+ int lid; // the target LID we propagate for this time
- // go over all fat tree nodes of the lowest level
- vec_byte firstLeafTupple(N, 0);
- firstLeafTupple[0] = N-1;
- for (map_tupple_ftnode::iterator tI = NodeByTupple.find(firstLeafTupple);
- tI != NodeByTupple.end();
- tI++)
- {
+ // go over all fat tree nodes of the lowest level
+ vec_byte firstLeafTupple(N, 0);
+ firstLeafTupple[0] = N-1;
+ for (map_tupple_ftnode::iterator tI = NodeByTupple.find(firstLeafTupple);
+ tI != NodeByTupple.end();
+ tI++)
+ {
FatTreeNode *p_ftNode = &((*tI).second);
IBNode *p_node = p_ftNode->p_node;
@@ -913,186 +932,414 @@ int FatTree::route()
// go over all child ports
for (int i = 0; i < p_ftNode->childPorts.size(); i++) {
- if (!p_ftNode->childPorts[i].size()) continue;
- // can not have more then one port in group...
- int portNum = p_ftNode->childPorts[i].front();
- numPortWithHCA++;
+ if (!p_ftNode->childPorts[i].size()) continue;
+ // can not have more then one port in group...
+ int portNum = p_ftNode->childPorts[i].front();
+ numPortWithHCA++;
- lid = LidByIdx[hcaIdx];
+ lid = LidByIdx[hcaIdx];
- if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
- cout << "-V- Start routing LID:" << lid
- << " at HCA idx:" << hcaIdx << endl;
- assignLftDownWards(p_ftNode, lid, portNum, 0);
+ if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+ cout << "-V- Start routing LID:" << lid
+ << " at HCA idx:" << hcaIdx << endl;
+ assignLftDownWards(p_ftNode, lid, portNum, 0,0);
- hcaIdx++;
+ hcaIdx++;
}
// for ports without HCA we assign dummy LID but need to
// propagate
for (; numPortWithHCA < maxHcasPerLeafSwitch; numPortWithHCA++)
- {
- // HACK: for now we can propagate 0 as lid
- if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+ {
+ // HACK: for now we can propagate 0 as lid
+ if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
cout << "-V- adding dummy LID to switch:"
<< p_node->name
<< " at HCA idx:" << hcaIdx << endl;
- assignLftDownWards(p_ftNode, 0, 0xFF, 0);
+ assignLftDownWards(p_ftNode, 0, 0xFF, 0,0);
- hcaIdx++;
- }
- }
+ hcaIdx++;
+ }
+ }
- // now go over all switches and route to them
- for (map_tupple_ftnode::iterator tI = NodeByTupple.begin();
- tI != NodeByTupple.end();
- tI++)
- {
+ // now go over all switches and route to them
+ for (map_tupple_ftnode::iterator tI = NodeByTupple.begin();
+ tI != NodeByTupple.end();
+ tI++)
+ {
FatTreeNode *p_ftNode = &((*tI).second);
IBNode *p_node = p_ftNode->p_node;
- if (p_node->type != IB_SW_NODE) continue;
+ if (p_node->type != IB_SW_NODE) continue;
- // find the LID of the switch:
- int lid = 0;
- for (unsigned int pn = 1; (lid == 0) && (pn <= p_node->numPorts); pn++)
- {
- IBPort *p_port = p_node->getPort(pn);
- if (p_port)
- lid = p_port->base_lid;
- }
- if (lid == 0)
- {
- cout << "-E- failed to find LID for switch:" << p_node->name << endl;
- } else {
- if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+ // find the LID of the switch:
+ int lid = 0;
+ for (unsigned int pn = 1; (lid == 0) && (pn <= p_node->numPorts); pn++)
+ {
+ IBPort *p_port = p_node->getPort(pn);
+ if (p_port)
+ lid = p_port->base_lid;
+ }
+ if (lid == 0)
+ {
+ cout << "-E- failed to find LID for switch:" << p_node->name << endl;
+ } else {
+ if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
cout << "-V- routing to LID:" << lid << " of switch:"
<< p_node->name << endl;
- assignLftDownWards(p_ftNode, lid, 0, 0);
- }
+ assignLftDownWards(p_ftNode, lid, 0, 0, 0);
}
+ }
+
+ return(0);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Optimally Route a permutation in the Fat Tree
+// Prerequisites: Fat Tree structure was built.
+//
+// Algorithm:
+// For each leaf switch (in order)
+// For each HCA index (even if it does not have a LID - invent one)
+// Setup downward paths as previously
+// Traverse up from destination HCA and force output ports as
+// computed by the optimal routing
+//
+// Data Model:
+// We use the fat tree to get ordering.
+// "main" routing is the routing from HCA to HCA.
+// "side" routing is used from all SW to all HCAs (and dynamic routing)
+// Track port utilization for the "main" routing by the "counter1"
+// Track port utilzation of the "side" routing in "counter2" field of the port
+//
+//////////////////////////////////////////////////////////////////////////////
- return(0);
+// set FDB values as given in the input
+int FatTree::forceLftUpWards(FatTreeNode *p_ftNode, uint16_t dLid, vec_int ports)
+{
+ // go over all steps
+ for (int i=0; i<ports.size(); i++) {
+ // if LID is going down we are finished
+ if (p_ftNode->goingDown(dLid))
+ return 0;
+ // sanity check
+ if ((ports[i] < 0) || (ports[i] > p_ftNode->parentPorts.size())) {
+ cout << "-E- Illegal port number!" << endl;
+ return 1;
+ }
+ IBNode *p_node = p_ftNode->p_node;
+ int portNum = p_ftNode->parentPorts[ports[i]].front();
+
+ IBPort* p_port = p_node->getPort(portNum);
+ if (!p_port || !p_port->p_remotePort) {
+ cout << "-E- Ports do not exist!" << endl;
+ return 1;
+ }
+ IBNode* p_remNode = p_port->p_remotePort->p_node;
+ // Set LFT entry
+ p_node->setLFTPortForLid(dLid, portNum);
+ // Move to next step
+ p_ftNode = getFatTreeNodeByNode(p_remNode);
+ }
+ return 0;
+}
+
+// perform the routing by filling in the fabric LFTs
+int FatTree::permRoute(vector<string> src, vector<string> dst)
+{
+ int hcaIdx = 0;
+ int lid; // the target LID we propagate for this time
+ // extract radix
+ vec_byte tmpLeafTupple(N,0);
+ tmpLeafTupple[0] = N-1;
+ FatTreeNode* p_ftN = &NodeByTupple[tmpLeafTupple];
+ int radix = 0;
+ for (int i = 0; i < p_ftN->parentPorts.size(); i++)
+ if(p_ftN->parentPorts[i].size())
+ radix++;
+
+ // create requests vector
+ vec_int req;
+ req.resize(src.size());
+ for (int i=0; i<src.size(); i++)
+ req[IdxByName[src[i]]] = IdxByName[dst[i]];
+
+ if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+ cout << "-V- Tree height: " << N << ", radix: " << radix << endl;
+
+ // create routing system
+ RouteSys rS(radix,N);
+ // push requests
+ rS.pushRequests(req);
+ // compute optimal routing
+ vec_vec_int routeOutput;
+ rS.doRouting(routeOutput);
+
+ // build the LFTs
+ // go over all fat tree nodes of the lowest level
+ vec_byte firstLeafTupple(N, 0);
+ firstLeafTupple[0] = N-1;
+ // First prepare downwards routing
+ for (map_tupple_ftnode::iterator tI = NodeByTupple.find(firstLeafTupple); tI != NodeByTupple.end(); tI++) {
+ FatTreeNode *p_ftNode = &((*tI).second);
+ IBNode *p_node = p_ftNode->p_node;
+ // we need to track the number of ports to handle case of missing HCAs
+ int numPortWithHCA = 0;
+
+ // go over all child ports
+ for (int i = 0; i < p_ftNode->childPorts.size(); i++) {
+ if (!p_ftNode->childPorts[i].size()) continue;
+ // can not have more then one port in group...
+ int portNum = p_ftNode->childPorts[i].front();
+ numPortWithHCA++;
+
+ lid = LidByIdx[hcaIdx];
+
+ if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+ cout << "-V- Start routing LID:" << lid
+ << " at HCA idx:" << hcaIdx << endl;
+
+ // Assign downward LFT values
+ if (assignLftDownWards(p_ftNode, lid, portNum, 0, 1))
+ return 1;
+
+ hcaIdx++;
+ }
+
+ // for ports without HCA we assign dummy LID but need to
+ // propagate
+ for (; numPortWithHCA < maxHcasPerLeafSwitch; numPortWithHCA++) {
+ // HACK: for now we can propagate 0 as lid
+ if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+ cout << "-V- adding dummy LID to switch:"
+ << p_node->name
+ << " at HCA idx:" << hcaIdx << endl;
+
+ assignLftDownWards(p_ftNode, 0, 0xFF, 0, 0);
+
+ hcaIdx++;
+ }
+ }
+
+ // Now prepare upwards routing
+ hcaIdx = 0;
+ for (map_tupple_ftnode::iterator tI = NodeByTupple.find(firstLeafTupple); tI != NodeByTupple.end(); tI++) {
+ FatTreeNode *p_ftNode = &((*tI).second);
+ IBNode *p_node = p_ftNode->p_node;
+
+ // go over all child ports
+ for (int i = 0; i < p_ftNode->childPorts.size(); i++) {
+ if (!p_ftNode->childPorts[i].size()) continue;
+ // can not have more then one port in group...
+ int portNum = p_ftNode->childPorts[i].front();
+
+ lid = LidByIdx[hcaIdx];
+
+ if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+ cout << "-V- Start routing LID:" << lid
+ << " at HCA idx:" << hcaIdx << endl;
+
+ lid = LidByIdx[req[hcaIdx]];
+
+ if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+ cout << "-V- Creating routing from "<<hcaIdx<<"-"<<src[hcaIdx]<<"(lid: "<<LidByIdx[hcaIdx]<<") to "
+ <<req[hcaIdx]<<"-"<<dst[hcaIdx]<<"(lid: "<<LidByIdx[req[hcaIdx]]<<") using up-ports:" << endl;
+
+ // Assign upward LFT values
+ if (forceLftUpWards(p_ftNode, lid, routeOutput[hcaIdx]))
+ return 1;
+
+ hcaIdx++;
+ }
+
+ }
+
+ // now go over all switches and route to them
+ for (map_tupple_ftnode::iterator tI = NodeByTupple.begin();
+ tI != NodeByTupple.end();
+ tI++)
+ {
+
+ FatTreeNode *p_ftNode = &((*tI).second);
+ IBNode *p_node = p_ftNode->p_node;
+
+ if (p_node->type != IB_SW_NODE) continue;
+
+ // find the LID of the switch:
+ int lid = 0;
+ for (unsigned int pn = 1; (lid == 0) && (pn <= p_node->numPorts); pn++)
+ {
+ IBPort *p_port = p_node->getPort(pn);
+ if (p_port)
+ lid = p_port->base_lid;
+ }
+ if (lid == 0)
+ {
+ cout << "-E- failed to find LID for switch:" << p_node->name << endl;
+ } else {
+ if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+ cout << "-V- routing to LID:" << lid << " of switch:"
+ << p_node->name << endl;
+ assignLftDownWards(p_ftNode, lid, 0, 0, 0);
+ }
+ }
+
+ return(0);
}
// dumps out the HCA order into a file ftree.hca
void FatTree::dumpHcaOrder()
{
- ofstream f("ftree.hcas");
- for (unsigned int i = 0; i < LidByIdx.size(); i++)
- {
+ ofstream f("ftree.hcas");
+ for (unsigned int i = 0; i < LidByIdx.size(); i++)
+ {
// find the HCA node by the base lid given
unsigned int lid = LidByIdx[i];
if (lid <= 0)
- {
- f << "DUMMY_HOST LID" << endl;
- }
+ {
+ f << "DUMMY_HOST LID" << endl;
+ }
else
- {
- IBPort *p_port = p_fabric->PortByLid[lid];
-
- if (! p_port)
- {
- cout << "-E- fail to find port for lid:" << lid << endl;
- f << "ERROR_HOST LID" << endl;
- }
- else
- {
- f << p_port->p_node->name << "/" << p_port->num << " " << lid << endl;
- }
- }
- }
- f.close();
+ {
+ IBPort *p_port = p_fabric->PortByLid[lid];
+
+ if (! p_port)
+ {
+ cout << "-E- fail to find port for lid:" << lid << endl;
+ f << "ERROR_HOST LID" << endl;
+ }
+ else
+ {
+ f << p_port->p_node->name << "/" << p_port->num << " " << lid << endl;
+ }
+ }
+ }
+ f.close();
}
void FatTree::dump()
{
- unsigned int level, prevLevel = 2;
- cout << "---------------------------------- FAT TREE DUMP -----------------------------" << endl;
- for (map_tupple_ftnode::const_iterator tI = NodeByTupple.begin();
- tI != NodeByTupple.end();
- tI++)
- {
+ unsigned int level, prevLevel = 2;
+ cout << "---------------------------------- FAT TREE DUMP -----------------------------" << endl;
+ for (map_tupple_ftnode::const_iterator tI = NodeByTupple.begin();
+ tI != NodeByTupple.end();
+ tI++)
+ {
level = (*tI).first[0];
if (level != prevLevel)
- {
- prevLevel = level;
- cout << "LEVEL:" << level << endl;
- }
+ {
+ prevLevel = level;
+ cout << "LEVEL:" << level << endl;
+ }
FatTreeNode const *p_ftNode = &((*tI).second);
cout << " " << p_ftNode->p_node->name << " tupple:" << getTuppleStr((*tI).first) << endl;
for (unsigned int i = 0; i < p_ftNode->parentPorts.size(); i++)
- {
- if (p_ftNode->parentPorts[i].size())
- {
- cout << " Parents:" << i << endl;
- for (list< int >::const_iterator lI = p_ftNode->parentPorts[i].begin();
- lI != p_ftNode->parentPorts[i].end();
- lI++)
- {
- unsigned int portNum = *lI;
- cout << " p:" << portNum << " ";
- IBPort *p_port = p_ftNode->p_node->getPort(portNum);
- if (!p_port || !p_port->p_remotePort)
- cout << " ERROR " << endl;
- else
- cout << p_port->p_remotePort->p_node->name << endl;
- }
- }
- }
+ {
+ if (p_ftNode->parentPorts[i].size())
+ {
+ cout << " Parents:" << i << endl;
+ for (list< int >::const_iterator lI = p_ftNode->parentPorts[i].begin();
+ lI != p_ftNode->parentPorts[i].end();
+ lI++)
+ {
+ unsigned int portNum = *lI;
+ cout << " p:" << portNum << " ";
+ IBPort *p_port = p_ftNode->p_node->getPort(portNum);
+ if (!p_port || !p_port->p_remotePort)
+ cout << " ERROR " << endl;
+ else
+ cout << p_port->p_remotePort->p_node->name << endl;
+ }
+ }
+ }
for (unsigned int i = 0; i < p_ftNode->childPorts.size(); i++)
- {
- if (p_ftNode->childPorts[i].size())
- {
- cout << " Children:" << i << endl;
- for (list< int >::const_iterator lI = p_ftNode->childPorts[i].begin();
- lI != p_ftNode->childPorts[i].end();
- lI++)
- {
- unsigned int portNum = *lI;
- cout << " p:" << portNum << " ";
- IBPort *p_port = p_ftNode->p_node->getPort(portNum);
- if (!p_port || !p_port->p_remotePort)
- cout << "ERROR " << endl;
- else
- cout << p_port->p_remotePort->p_node->name << endl;
- }
- }
- }
- }
+ {
+ if (p_ftNode->childPorts[i].size())
+ {
+ cout << " Children:" << i << endl;
+ for (list< int >::const_iterator lI = p_ftNode->childPorts[i].begin();
+ lI != p_ftNode->childPorts[i].end();
+ lI++)
+ {
+ unsigned int portNum = *lI;
+ cout << " p:" << portNum << " ";
+ IBPort *p_port = p_ftNode->p_node->getPort(portNum);
+ if (!p_port || !p_port->p_remotePort)
+ cout << "ERROR " << endl;
+ else
+ cout << p_port->p_remotePort->p_node->name << endl;
+ }
+ }
+ }
+ }
- // now dump the HCA by index:
- cout << "\nLID BY INDEX" << endl;
- for (unsigned int i = 0; i < LidByIdx.size(); i++) {
- int lid = LidByIdx[i];
- IBPort *p_port;
+ // now dump the HCA by index:
+ cout << "\nLID BY INDEX" << endl;
+ for (unsigned int i = 0; i < LidByIdx.size(); i++) {
+ int lid = LidByIdx[i];
+ IBPort *p_port;
- if (lid != 0)
+ if (lid != 0)
{
- p_port = p_fabric->PortByLid[lid];
- if (p_port)
- {
+ p_port = p_fabric->PortByLid[lid];
+ if (p_port)
+ {
cout << " " << i << " -> " << LidByIdx[i]
<< " " << p_port->getName() << endl;
- }
- else
- {
+ }
+ else
+ {
cout << " ERROR : no port for lid:" << lid << endl;
- }
+ }
}
- }
+ }
}
// perform the whole thing
int FatTreeAnalysis(IBFabric *p_fabric)
{
- FatTree ftree(p_fabric);
- if (!ftree.isValid) return(1);
- ftree.dumpHcaOrder();
- if (ftree.route()) return(1);
- return(0);
+ FatTree ftree(p_fabric);
+ if (!ftree.isValid) return(1);
+ ftree.dumpHcaOrder();
+ if (ftree.route()) return(1);
+ return(0);
+}
+
+int FatTreeRouteByPermutation( IBFabric *p_fabric, char *srcs, char* dsts )
+{
+ vector <string> sources;
+ vector <string> destinations;
+
+ char *s1, *s2, *cp;
+ char *saveptr;
+ s1 = strdup(srcs);
+ s2 = strdup(dsts);
+ cp = strtok_r(s1, " \t", &saveptr);
+ do {
+ sources.push_back(cp);
+ cp = strtok_r(NULL, " \t", &saveptr);
+ }
+ while (cp);
+
+ cp = strtok_r(s2, " \t", &saveptr);
+ do {
+ destinations.push_back(cp);
+ cp = strtok_r(NULL, " \t", &saveptr);
+ }
+ while (cp);
+
+ if (sources.size() != destinations.size()) {
+ cout << "-E- Different number of sources and destinations" << endl;
+ return 1;
+ }
+
+ FatTree ftree(p_fabric);
+ if (!ftree.isValid) return(1);
+ //ftree.dumpHcaOrder();
+ if (ftree.permRoute(sources, destinations)) return(1);
+ return(0);
}
diff --git a/ibdm/datamodel/Makefile.am b/ibdm/datamodel/Makefile.am
index 0003bba..b0958fc 100644
--- a/ibdm/datamodel/Makefile.am
+++ b/ibdm/datamodel/Makefile.am
@@ -33,7 +33,7 @@
#--
# we would like to export these headers during install
-pkginclude_HEADERS = git_version.h Fabric.h \
+pkginclude_HEADERS = git_version.h Fabric.h RouteSys.h Bipartite.h \
SubnMgt.h TraceRoute.h CredLoops.h Regexp.h \
TopoMatch.h SysDef.h Congestion.h ibnl_parser.h ibdm.i
@@ -53,7 +53,7 @@ AM_YFLAGS = -d
# libibdm - the TCL shared lib to be used as a package
# ibdmsh - the TCL shell
-common_SOURCES = Fabric.cpp \
+common_SOURCES = Fabric.cpp RouteSys.cc Bipartite.cc \
SubnMgt.cpp TraceRoute.cpp CredLoops.cpp TopoMatch.cpp SysDef.cpp \
LinkCover.cpp Congestion.cpp ibnl_parser.cc ibnl_scanner.cc FatTree.cpp
diff --git a/ibdm/datamodel/RouteSys.cc b/ibdm/datamodel/RouteSys.cc
new file mode 100644
index 0000000..6c33224
--- /dev/null
+++ b/ibdm/datamodel/RouteSys.cc
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2008 Mellanox Technologies LTD. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#include "RouteSys.h"
+#include "Bipartite.h"
+
+// Helper power function
+
+int RouteSys::myPow(int base, int pow) {
+ int res = 1;
+ for (int i=0; i<pow; i++)
+ res = res*base;
+
+ return res;
+}
+
+/////////////////////////////////////////////////////////////////////////////
+
+// C'tor
+
+RouteSys::RouteSys(int rad, int hgth, int s):radix(rad),height(hgth),step(s) {
+ // Calculate num in/out ports
+ ports = myPow(rad,height);
+ // Create and init ports
+ inPorts = new inputData[ports];
+ outPorts = new bool[ports];
+
+ for (int i=0; i<ports; i++) {
+ inPorts[i].used = false;
+ outPorts[i] = false;
+ }
+ // Create sub-systems
+ if (height > 1) {
+ subSys = new RouteSys* [rad];
+ for (int i=0; i<radix; i++)
+ subSys[i] = new RouteSys(rad,height-1,s+1);
+ }
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+// D'tor
+
+RouteSys::~RouteSys() {
+ // Just kill everything
+ delete[] inPorts;
+ delete[] outPorts;
+
+ if (height > 1) {
+ for (int i=0; i<radix; i++)
+ delete subSys[i];
+ delete[] subSys;
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+// Add requests to the system
+
+int RouteSys::pushRequests(vec_int req)
+{
+ if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+ cout << "-V- Pushing requests" << endl;
+
+ for (int i=0; i<req.size(); i++) {
+ // Extract comm pair
+ int src = i;
+ int dst = req[i];
+
+ if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+ cout << "-V- Req: " << src << "->" << dst << endl;
+
+ // Check port existence
+ if ((src >= ports) || (dst >= ports)) {
+ cout << "-E- Port index exceeds num ports! Ports: " << ports << ", src: " << src << ", dst: " << dst << endl;
+ return 1;
+ }
+ // Check port availability
+ if (inPorts[src].used || outPorts[dst]) {
+ cout << "-E- Port already used! src: " << src << ", dst: " << dst << endl;
+ return 1;
+ }
+ // Mark ports as used
+ inPorts[src].used = true;
+ inPorts[src].src = src;
+ inPorts[src].dst = dst;
+ inPorts[src].inputNum = src;
+ inPorts[src].outNum = dst;
+
+ outPorts[dst] = true;
+ }
+ return 0;
+}
+
+/////////////////////////////////////////////////////////////////////////////
+
+// Perform routing after requests were pushed
+
+int RouteSys::doRouting (vec_vec_int& out)
+{
+ // Atomic system nothing to do
+ if (ports == radix)
+ return 0;
+
+ if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+ cout << "-V- Starting routing, step: " << step << ", height " << height << endl;
+
+ // Init the output structure
+ if (out.size() < ports) {
+ out.resize(ports);
+ for (int i=0; i<ports; i++) {
+ out[i].resize(height-1);
+ for (int j=0; j<height-1; j++)
+ out[i][j] = -1;
+ }
+ }
+
+ // We use three graph arrays for coloring
+ Bipartite** buff[3];
+ buff[0] = new Bipartite*[radix];
+ buff[1] = new Bipartite*[radix];
+ buff[2] = new Bipartite*[radix];
+
+ for (int i=0; i<radix; i++)
+ buff[0][i] = buff[1][i] = buff[2][i] = NULL;
+
+ // Number of colors derived through perfect matching
+ int matchGraphs = 0;
+ int currRadix = radix;
+ int idx = 0;
+ int idx_new = 1;
+
+ // Create the first graph
+ if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+ cout << "-V- Ports: " << ports << ", radix: " << radix << endl;
+ buff[0][0] = new Bipartite(ports/radix,radix);
+
+ // Add connections
+ for (int i=0; i<ports; i++)
+ buff[0][0]->connectNodes(i/radix,inPorts[i].outNum/radix,inPorts[i]);
+
+ // Now decompose the graph to radix-1 graphs
+ while (1 < currRadix) {
+ for (int i=0; buff[idx][i] && i<radix; i++) {
+ // Odd radix, perfect matching is required
+ if (currRadix % 2) {
+ if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+ cout << "-V- Perfect matching is required" << endl;
+ buff[2][matchGraphs] = buff[idx][i]->maximumMatch();
+ matchGraphs++;
+ }
+ // Now we can perform Euler decomposition
+ if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+ cout << "-V- Performing Euler decompostion" << endl;
+ if (2*i+1 >= radix) {
+ cout << "-E- Graph index illegal"<< endl;
+ return 1;
+ }
+ buff[idx][i]->decompose(&buff[idx_new][2*i],&buff[idx_new][2*i+1]);
+ delete buff[idx][i];
+ buff[idx][i] = NULL;
+ }
+ idx = idx_new;
+ idx_new = (idx_new+1)%2;
+ currRadix = currRadix / 2;
+ }
+ // Collect all result graphs to array buff[2][i]
+ for (int i=matchGraphs; i<radix; i++)
+ buff[2][i] = buff[idx][i-matchGraphs];
+
+ // Apply decomposition results
+ for (int i=0; i < radix; i++) {
+ Bipartite* G = buff[2][i];
+ if (!G->setIterFirst()) {
+ cout << "-E- Empty graph found!" << endl;
+ return 1;
+ }
+ bool stop = false;
+ while (!stop) {
+ inputData d = G->getReqDat();
+ // Build output
+ if (out.size() <= d.src || out[d.src].size() <= step) {
+ cout << "Output index illegal" << endl;
+ return 1;
+ }
+ out[d.src][step] = i;
+ // Add request to sub-system
+ RouteSys* sub = subSys[i];
+ int inPort = d.inputNum/radix;
+ int outPort = d.outNum/radix;
+ if (sub->inPorts[inPort].used || sub->outPorts[outPort]) {
+ cout << "Port already used! inPort: " << inPort << ", outPort: " << outPort << endl;
+ return 1;
+ }
+ // Mark ports as used
+ sub->inPorts[inPort].used = true;
+ sub->inPorts[inPort].src = d.src;
+ sub->inPorts[inPort].dst = d.dst;
+ sub->inPorts[inPort].inputNum = inPort;
+ sub->inPorts[inPort].outNum = outPort;
+
+ outPorts[outPort] = true;
+ // Next request
+ if (!G->setIterNext()) {
+ stop = true;
+ }
+ }
+ }
+
+ // Free memory
+ for (int i=0; i<radix; i++)
+ delete buff[2][i];
+
+ delete[] buff[0];
+ delete[] buff[1];
+ delete[] buff[2];
+
+ // Run routing on sub-systems
+ for (int i=0; i<radix; i++) {
+ if (subSys[i]->doRouting(out)) {
+ cout << "-E- Subsystem routing failed!" << endl;
+ return 1;
+ }
+ }
+
+ return 0;
+}
diff --git a/ibdm/datamodel/RouteSys.h b/ibdm/datamodel/RouteSys.h
new file mode 100644
index 0000000..a5f5cfc
--- /dev/null
+++ b/ibdm/datamodel/RouteSys.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2008 Mellanox Technologies LTD. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+/*
+ * Fabric Utilities Project
+ *
+ * Permutation Routing System abstraction Header file
+ *
+ * Author: Vladimir Zdornov, Mellanox Technologies
+ *
+ */
+
+#ifndef IBDM_ROUTE_SYS_H_
+#define IBDM_ROUTE_SYS_H_
+
+#include <unistd.h>
+#include "Fabric.h"
+
+using namespace std;
+
+class inputData
+{
+ public:
+ bool used;
+ int src;
+ int dst;
+ int inputNum;
+ int outNum;
+
+ inputData():used(false){}
+};
+
+// Routing system abstraction class
+class RouteSys {
+ // Basic parameters
+ int radix;
+ int height;
+ int step;
+ int ports;
+ // Ports data
+ inputData* inPorts;
+ bool* outPorts;
+ // Sub-systems
+ RouteSys** subSys;
+
+ int myPow(int base, int pow);
+
+ public:
+ // Constructor
+ RouteSys(int rad, int hgth, int s=0);
+ // Add communication requests to the system
+ // Format: i -> req[i]
+ // Restriction: Requests must form a complete permutation
+ int pushRequests(vec_int req);
+ // D'tor
+ ~RouteSys();
+ // Get input data for input port i
+ inputData& getInput(int i);
+ // Is output port i busy already?
+ bool& getOutput(int i);
+
+ // Invoke the system level routing
+ int doRouting(vec_vec_int& out);
+};
+
+
+#endif
diff --git a/ibdm/datamodel/SubnMgt.h b/ibdm/datamodel/SubnMgt.h
index 8e68818..99e763f 100644
--- a/ibdm/datamodel/SubnMgt.h
+++ b/ibdm/datamodel/SubnMgt.h
@@ -134,5 +134,9 @@ LinkCoverageAnalysis(IBFabric *p_fabric, list_pnode rootNodes);
int
FatTreeAnalysis(IBFabric *p_fabric);
+// Perform FatTree optimal permutation routing
+int
+FatTreeRouteByPermutation(IBFabric* p_fabric, char* srcs, char* dsts);
+
#endif /* IBDM_SUBN_MGT_H */
diff --git a/ibdm/datamodel/ibdm.i b/ibdm/datamodel/ibdm.i
index b4541ad..7f3e0dc 100644
--- a/ibdm/datamodel/ibdm.i
+++ b/ibdm/datamodel/ibdm.i
@@ -1263,6 +1263,9 @@ int ibdmFatTreeRoute(IBFabric *p_fabric, list_pnode rootNodes);
%name(ibdmFatTreeAnalysis) int FatTreeAnalysis(IBFabric *p_fabric);
// Performs FatTree structural analysis
+%name(ibdmFatTreeRouteByPermutation) int FatTreeRouteByPermutation(IBFabric *p_fabric, char* srcs, char* dsts);
+// Performs optimal permutation routing in FatTree
+
%name(ibdmVerifyCAtoCARoutes)
int SubnMgtVerifyAllCaToCaRoutes(IBFabric *p_fabric);
// Verify point to point connectivity
--
1.5.1.4
More information about the general
mailing list