[ofa-general] [PATCH] ibutils: Optimal permutation routing in fat-tree

Yevgeny Kliteynik kliteyn at dev.mellanox.co.il
Tue Oct 7 02:31:34 PDT 2008


[On behalf of Vladimir Zdornov]

Optimal permutation routing in fat-tree.
Fat-tree is regarded to as high-order Clos network.
Each Clos is routed using graph coloring of a
bipartite graph. The result of the routing procedure
affects ascending only (descending fdb entries stay
unchanged).

Signed-off-by:  Vladimir Zdornov <zdv at mellanox.co.il>

---
 ibdm/datamodel/Bipartite.cc |  672 +++++++++++++++++
 ibdm/datamodel/Bipartite.h  |  194 +++++
 ibdm/datamodel/FatTree.cpp  | 1743 ++++++++++++++++++++++++-------------------
 ibdm/datamodel/Makefile.am  |    4 +-
 ibdm/datamodel/RouteSys.cc  |  258 +++++++
 ibdm/datamodel/RouteSys.h   |   98 +++
 ibdm/datamodel/SubnMgt.h    |    4 +
 ibdm/datamodel/ibdm.i       |    3 +
 8 files changed, 2226 insertions(+), 750 deletions(-)
 create mode 100644 ibdm/datamodel/Bipartite.cc
 create mode 100644 ibdm/datamodel/Bipartite.h
 create mode 100644 ibdm/datamodel/RouteSys.cc
 create mode 100644 ibdm/datamodel/RouteSys.h

diff --git a/ibdm/datamodel/Bipartite.cc b/ibdm/datamodel/Bipartite.cc
new file mode 100644
index 0000000..760b040
--- /dev/null
+++ b/ibdm/datamodel/Bipartite.cc
@@ -0,0 +1,672 @@
+/*
+ * Copyright (c) 2008 Mellanox Technologies LTD. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#include "Bipartite.h"
+
+//CLASS edge///////////////////////////////////
+
+bool edge::isMatched() {
+  vertex* ver1 = (vertex*)v1;
+  vertex* ver2 = (vertex*)v2;
+
+  if (((this == ver1->getPartner()) && (this != ver2->getPartner())) || ((this == ver2->getPartner()) && (this != ver1->getPartner())))
+    cout << "-E- Error in edge matching" << endl;
+
+  return (this == ver1->getPartner()) && (this == ver2->getPartner());
+}
+
+//CLASS vertex/////////////////////////////////
+
+vertex::vertex(int n, side sd, int rad):id(n),s(sd),radix(rad)
+{
+  connections = new edge*[radix];
+  pred = new edge*[radix];
+  succ = new edge*[radix];
+
+  partner = NULL;
+  for (int i=0; i<radix; i++)
+    connections[i] = pred[i] = succ[i] = NULL;
+
+  predCount = succCount = 0;
+  maxUsed = -1;
+}
+
+vertex::~vertex()
+{
+  delete[] connections;
+  delete[] pred;
+  delete[] succ;
+}
+
+int vertex::getID() const
+{
+  return id;
+}
+
+side vertex::getSide() const
+{
+  return s;
+}
+
+void vertex::delConnection(edge* e)
+{
+  int my_idx, other_idx;
+  vertex* v;
+
+  // Find the side we are connected at and edge index
+  if (e->v1 == this) {
+    my_idx = e->idx1;
+    other_idx = e->idx2;
+    v = (vertex*)(e->v2);
+  }
+  else if (e->v2 == this) {
+    my_idx = e->idx2;
+    other_idx = e->idx1;
+    v = (vertex*)(e->v1);
+  }
+  else {
+    cout << "-E- Edge not connected to current vertex" << endl;
+    return;
+  }
+
+  if (my_idx >= radix || other_idx >= radix) {
+    cout << "-E- Edge index illegal" << endl;
+    return;
+  }
+
+  // Now disconnect
+  connections[my_idx] = NULL;
+  maxUsed--;
+  v->connections[other_idx] = NULL;
+  v->maxUsed--;
+}
+
+void vertex::pushConnection(edge* e)
+{
+  maxUsed++;
+  if (maxUsed == radix) {
+    cout << "-E- Can't push connection to vertex: " << id << ", exceeding radix!" << endl;
+    return;
+  }
+  // Mark our side
+  if (e->v1 == NULL) {
+    e->v1 = this;
+    e->idx1 = maxUsed;
+  }
+  else if (e->v2 == NULL) {
+    e->v2 = this;
+    e->idx2 = maxUsed;
+  }
+  else {
+    cout << "-E- Can't push connection both edges are already filled" << endl;
+    return;
+  }
+
+  if (maxUsed >= radix) {
+    cout << "-E- maxUsed illegal" << endl;
+    return;
+  }
+
+  // Now connect
+  connections[maxUsed] = e;
+}
+
+edge* vertex::popConnection()
+{
+  // Look for a connection
+  int i=0;
+  while ((i<radix) && !connections[i])
+    i++;
+  // No real connection found
+  if (i == radix)
+    return NULL;
+
+  edge* tmp = connections[i];
+
+  // Disconnect chosen edge
+  connections[i] = NULL;
+  if (tmp->v1 == this) {
+    vertex* v = (vertex*)(tmp->v2);
+    v->connections[tmp->idx2] = NULL;
+  }
+  else if (tmp->v2 == this) {
+    vertex* v = (vertex*)(tmp->v1);
+    v->connections[tmp->idx1] = NULL;
+  }
+  else {
+    cout << "-E- Edge not connected to current vertex" << endl;
+    return NULL;
+  }
+
+  if (tmp->idx1 >= radix || tmp->idx2 >= radix) {
+    cout << "-E- Edge index illegal" << endl;
+    return NULL;
+  }
+
+  return tmp;
+}
+
+// For unmacthed vertex, find an unmatched neighbor and match the pair
+bool vertex::match()
+{
+  // Already matched
+  if (partner)
+    return false;
+  // Look for neighbor
+  for (int i=0; i<radix; i++) {
+    if (connections[i]) {
+      vertex* v = (vertex*)(connections[i]->otherSide(this));
+      if (!v->partner) {
+        // Match
+	partner = connections[i];
+	v->partner = connections[i];
+	return true;
+      }
+    }
+  }
+  return false;
+}
+
+edge* vertex::getPartner() const
+{
+  return partner;
+}
+
+bool vertex::getInLayers() const
+{
+  return inLayers;
+}
+
+void vertex::setInLayers(bool b)
+{
+  inLayers = b;
+}
+
+void vertex::resetLayersInfo()
+{
+  inLayers = false;
+  succCount = predCount = 0;
+  for (int i=0; i<radix; i++)
+    succ[i] = pred[i] = NULL;
+}
+
+void vertex::addPartnerLayers(list<vertex*>& l)
+{
+  // No partner
+  if (!partner)
+    return;
+  vertex* p = (vertex*)(partner->otherSide(this));
+  // Partner already in one of the layers
+  if (p->inLayers)
+    return;
+  // Add partner to the layer
+  l.push_front(p);
+  p->inLayers = true;
+  // Update pred/succ relations
+  if (succCount >= radix) {
+    cout << "-E- More successors than radix" << endl;
+    return;
+  }
+  succ[succCount] = partner;
+  succCount++;
+
+  if (p->predCount >= radix) {
+    cout << "-E- More predecessors than radix" << endl;
+    return;
+  }
+  p->pred[p->predCount] = partner;
+  p->predCount++;
+}
+
+bool vertex::addNonPartnersLayers(list<vertex*>& l)
+{
+  vertex* prtn = NULL;
+  bool res = false;
+
+  if (partner)
+    prtn = (vertex*)(partner->otherSide(this));
+
+  for (int i=0; i<radix; i++) {
+    vertex* v = (vertex*)(connections[i]->otherSide(this));
+    if ((v != prtn) && (!v->inLayers)) {
+      // free vertex found
+      if (!v->partner)
+	res = true;
+      // Add vertex to the layer
+      l.push_front(v);
+      v->inLayers = true;
+      // Update pred/succ relations
+      if (succCount >= radix) {
+	cout << "-E- More successors than radix" << endl;
+	return false;
+      }
+      succ[succCount] = connections[i];
+      succCount++;
+
+      if (v->predCount >= radix) {
+	cout << "-E- More predecessors than radix" << endl;
+	return false;
+      }
+      v->pred[v->predCount] = connections[i];
+      v->predCount++;
+    }
+  }
+  return res;
+}
+
+vertex* vertex::getPredecessor() const
+{
+  vertex* v = NULL;
+  // Find a valid predecessor still in layers
+  for (int i=0; i<radix; i++) {
+    if (pred[i]) {
+      vertex* v2 = (vertex*)(pred[i]->otherSide(this));
+      if (v2->inLayers) {
+	v = v2;
+	break;
+      }
+    }
+  }
+  return v;
+}
+
+// Flip edge status on augmenting path
+void vertex::flipPredEdge(int idx)
+{
+  int i=0;
+  // Find an edge to a predecessor
+  for (i=0; i<radix; i++)
+    if (pred[i]) {
+      vertex* v1 = (vertex*)pred[i]->v1;
+      vertex* v2 = (vertex*)pred[i]->v2;
+      if (v1->getInLayers() && v2->getInLayers())
+	break;
+    }
+
+  if (i == radix) {
+    cout << "-E- Could find predecessor for flip" << endl;
+    return;
+  }
+  // The predecessor vertex
+  vertex* v = (vertex*) pred[i]->otherSide(this);
+
+  // Unmatch edge
+  if (idx)
+	v->partner = NULL;
+  // Match edge
+  else {
+    partner = pred[i];
+    v->partner = pred[i];
+  }
+}
+
+// Removing vertex from layers and updating successors/predecessors
+void vertex::unLink(list<vertex*>& l)
+{
+  inLayers = false;
+  for (int i=0; i<radix; i++) {
+    if (succ[i]) {
+      vertex* v = (vertex*)succ[i]->otherSide(this);
+      if (v->inLayers) {
+	v->predCount--;
+	// v has no more predecessors, schedule for removal from layers...
+	if (!v->predCount)
+	  l.push_back(v);
+	succ[i] = NULL;
+      }
+    }
+  }
+  succCount = 0;
+}
+
+//CLASS Bipartite
+
+// C'tor
+
+Bipartite::Bipartite(int s, int r):size(s),radix(r)
+{
+  leftSide = new vertex*[size];
+  rightSide = new vertex*[size];
+
+  for (int i=0; i<size; i++) {
+    leftSide[i] = new vertex(i,LEFT,radix);
+    rightSide[i] = new vertex(i,RIGHT,radix);
+  }
+}
+
+///////////////////////////////////////////////////////////
+
+// D'tor
+
+Bipartite::~Bipartite()
+{
+  // Free vertices
+  for (int i=0; i<size; i++) {
+    delete leftSide[i];
+    delete rightSide[i];
+  }
+  delete[] leftSide;
+  delete[] rightSide;
+
+  // Free edges
+  while (List.size()) {
+    edge* e = (edge*)(List.front());
+    List.pop_front();
+    delete e;
+  }
+}
+
+////////////////////////////////////////////////////////////
+
+// Get radix
+
+int Bipartite::getRadix() const
+{
+  return radix;
+}
+
+////////////////////////////////////////////////////////////
+
+// Set edges listt iterator to first
+
+bool Bipartite::setIterFirst()
+{
+  it = List.begin();
+  if (it == List.end())
+    return false;
+  return true;
+}
+
+///////////////////////////////////////////////////////////
+
+// Set edges list iterator to next
+
+bool Bipartite::setIterNext()
+{
+  if (it == List.end())
+    return false;
+  it++;
+  if (it == List.end())
+    return false;
+  return true;
+}
+
+////////////////////////////////////////////////////////////
+
+// Get current edge's request data
+
+inputData Bipartite::getReqDat()
+{
+  if (it == List.end()) {
+    cout << "-E- Iterator points to list end" << endl;
+  }
+  return ((edge*)(*it))->reqDat;
+}
+
+/////////////////////////////////////////////////////////////
+
+// Add connection between the nodes to the graph
+
+void Bipartite::connectNodes(int n1, int n2, inputData reqDat)
+{
+  if ((n1 >= size) || (n2 >= size)) {
+    cout << "-E- Node index exceeds size" << endl;
+    return;
+  }
+  // Create new edge
+  edge* newEdge = new edge;
+
+  // Init edge fields and add to it the edges list
+  newEdge->it = List.insert(List.end(), newEdge);
+  newEdge->reqDat = reqDat;
+  newEdge->v1 = newEdge->v2 = NULL;
+
+  // Connect the vertices
+  leftSide[n1]->pushConnection(newEdge);
+  rightSide[n2]->pushConnection(newEdge);
+}
+
+////////////////////////////////////////////////////////////////
+
+// Find maximal matching
+
+void Bipartite::maximalMatch()
+{
+  // Invoke match on left-side vertices
+  for (int i=0; i<size; i++)
+    leftSide[i]->match();
+}
+
+////////////////////////////////////////////////////////////////
+
+// Find maximum matching
+
+// Hopcroft-Karp algorithm
+Bipartite* Bipartite::maximumMatch()
+{
+  // First find a maximal match
+  maximalMatch();
+
+  list<void*>::iterator it2 = List.begin();
+  list<vertex*> l1, l2;
+  list<vertex*>::iterator it;
+
+  // Loop on algorithm phases
+  while (1) {
+    bool hardStop = false;
+    // First reset layers info
+    for (int i=0; i<size; i++) {
+      leftSide[i]->resetLayersInfo();
+      rightSide[i]->resetLayersInfo();
+    }
+    // Add free left-side vertices to l1 (building layer 0)
+    l1.clear();
+    for (int i=0; i<size; i++) {
+      if (!leftSide[i]->getPartner()) {
+	l1.push_front(leftSide[i]);
+	leftSide[i]->setInLayers(true);
+      }
+    }
+    // This is our termination condition
+    // Maximum matching achieved
+    if (l1.empty())
+      break;
+    // Loop on building layers
+    while (1) {
+      bool stop = false;
+      l2.clear();
+      // Add all non-partners of vertices in l1 to layers (l2)
+      // We signal to stop if a free (right-side) vertex is entering l2
+      for (it = l1.begin(); it != l1.end(); it++)
+	if ((*it)->addNonPartnersLayers(l2))
+	  stop = true;
+      // Found free vertex among right-side vertices
+      if (stop) {
+	// There are augmenting paths, apply them
+	augment(l2);
+	break;
+      }
+      // This is a terminal condition
+      if (l2.empty()) {
+	hardStop = true;
+	break;
+      }
+      // Add partners of vertices in l2 to layers (l1)
+      l1.clear();
+      for (it = l2.begin(); it!= l2.end(); it++)
+	(*it)->addPartnerLayers(l1);
+      // This is a terminal condition
+      if (l1.empty()) {
+	hardStop = true;
+	break;
+      }
+    }
+    // Maximum matching achieved
+    if (hardStop)
+      break;
+  }
+  // Build the matching graph
+  Bipartite* M = new Bipartite(size, 1);
+  // Go over all edges and move matched one to the new graph
+  it2 = List.begin();
+  while (it2 != List.end()) {
+    edge* e = (edge*)(*it2);
+    if (e->isMatched()) {
+      vertex* v1 = (vertex*)(e->v1);
+      vertex* v2 = (vertex*)(e->v2);
+      // Unlink vertices
+      ((vertex*)(e->v1))->delConnection(e);
+      // Update edges list
+      it2 = List.erase(it2);
+      // Add link to the new graph
+      if (v1->getSide() == LEFT)
+	M->connectNodes(v1->getID(), v2->getID(), e->reqDat);
+      else
+	M->connectNodes(v2->getID(), v1->getID(), e->reqDat);
+      // Free memory
+      delete e;
+    }
+    else
+      it2++;
+  }
+  return M;
+}
+
+//////////////////////////////////////////////////////////////////////
+
+// Apply augmenting paths on the matching
+
+void Bipartite::augment(list<vertex*>& l)
+{
+  // Use delQ to store vertex scheduled for the removal from layers
+  list<vertex*> delQ;
+  // Remove non-free vertices
+  list<vertex*>::iterator it = l.begin();
+  while (it != l.end()) {
+    if ((*it)->getPartner()) {
+      delQ.push_front((*it));
+      it = l.erase(it);
+    }
+    else
+      it++;
+  }
+  // Get rid of non-free vertices
+  while (!delQ.empty()) {
+    vertex* fr = delQ.front();
+    delQ.pop_front();
+    fr->unLink(delQ);
+  }
+
+  if (l.empty()) {
+    cout << "-E- No free vertices left!" << endl;
+    return;
+  }
+  // Augment and reset pred/succ relations
+  while (!l.empty()) {
+    vertex* curr = l.front();
+    l.pop_front();
+    int idx = 0;
+    // Backtrace the path and augment
+    int length=0;
+    while (1) {
+      delQ.push_front(curr);
+      // Its either the end of a path or an error
+      if (!curr->getPredecessor()) {
+	if (!idx && length) {
+	  cout << "-E- This vertex must have predecessor" << endl;
+	  return;
+	}
+	break;
+      }
+      // Flip edge status
+      curr->flipPredEdge(idx);
+      // Move back
+      curr = curr->getPredecessor();
+      idx = (idx+1)%2;
+      length++;
+    }
+    // Now clean the delQ
+    while (!delQ.empty()) {
+      vertex* fr = delQ.front();
+      delQ.pop_front();
+      fr->unLink(delQ);
+    }
+  }
+}
+
+////////////////////////////////////////////////////////////////////////
+
+// Perform Euler decomposition
+
+void Bipartite::decompose(Bipartite** bp1, Bipartite** bp2)
+{
+  if (radix < 2) {
+    cout << "-E- Radix value illegal: " << radix << endl;
+    return;
+  }
+
+  // Create new graphs
+  Bipartite* arr[2];
+  arr[0] = new Bipartite(size, radix/2);
+  arr[1] = new Bipartite(size, radix/2);
+
+  // Continue as long as unassigned edges left
+  while (!List.empty()) {
+    int idx = 0;
+    edge* e = (edge*)List.front();
+    vertex* current = (vertex*)e->v1;
+    e = current->popConnection();
+
+    while (e) {
+      // Connect nodes in the new graphs
+      vertex* v1 = (vertex*)e->v1;
+      vertex* v2 = (vertex*)e->v2;
+      if (v1->getSide() == LEFT)
+	arr[idx]->connectNodes(v1->getID(), v2->getID(), e->reqDat);
+      else
+	arr[idx]->connectNodes(v2->getID(), v1->getID(), e->reqDat);
+      idx = (idx+1)%2;
+      // Remove edge from list
+      List.erase(e->it);
+      // Pick next vertex
+      current = (vertex*) e->otherSide(current);
+      // Free memory
+      delete e;
+      // Pick next edge
+      e = current->popConnection();
+    }
+  }
+  *bp1 = arr[0];
+  *bp2 = arr[1];
+}
diff --git a/ibdm/datamodel/Bipartite.h b/ibdm/datamodel/Bipartite.h
new file mode 100644
index 0000000..cdac81a
--- /dev/null
+++ b/ibdm/datamodel/Bipartite.h
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2008 Mellanox Technologies LTD. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+/*
+ * Fabric Utilities Project
+ *
+ * Bipartite Graph Header file
+ *
+ * Author: Vladimir Zdornov, Mellanox Technologies
+ *
+ */
+
+#ifndef IBDM_BIPARTITE_H_
+#define IBDM_BIPARTITE_H_
+
+#include <list>
+#include "RouteSys.h"
+
+using namespace std;
+
+typedef list<void*>::iterator peIter;
+typedef list<void*> peList;
+
+typedef enum side_ {LEFT=0, RIGHT} side;
+
+class edge
+{
+ public:
+  // Vertices
+  void* v1;
+  void* v2;
+  // Connection indices
+  int idx1;
+  int idx2;
+
+  // Edge list iterator
+  peIter it;
+
+  // Request data
+  inputData reqDat;
+
+  // C'tor
+  edge():v1(NULL),v2(NULL),idx1(-1),idx2(-1){};
+
+  // Get the vertex on the other side of the edge
+  void* otherSide(const void* v) {
+    if (v == v1)
+      return v2;
+    if (v == v2)
+      return v1;
+    return NULL;
+  }
+
+  // Check whether the edge is matched
+  bool isMatched();
+};
+
+class vertex
+{
+  // ID
+  int id;
+  // Side (0 - left, 1 - right)
+  side s;
+  // Array of connected edges
+  edge** connections;
+  // Initial number of neighbors
+  int radix;
+  // Current number of neighbors
+  int maxUsed;
+
+  // Matching fields
+
+  // Edge leading to the partner (NULL if none)
+  edge* partner;
+  // Array of layers predecessors
+  edge** pred;
+  // Number of predecessors
+  int predCount;
+  // Array of layers successors
+  edge** succ;
+  // Number of successors
+  int succCount;
+  // Denotes whether vertex is currently in layers
+  bool inLayers;
+
+ public:
+  // C'tor
+  vertex(int n, side sd, int rad);
+  // D'tor
+  ~vertex();
+  // Getters + Setters
+  int getID() const;
+  side getSide() const;
+  edge* getPartner() const;
+  vertex* getPredecessor() const;
+  bool getInLayers() const;
+  void setInLayers(bool b);
+  // Reset matching info
+  void resetLayersInfo();
+  // Adding partner to match layers
+  void addPartnerLayers(list<vertex*>& l);
+  // Adding non-partners to match layers
+  // Return true if one of the neighbors was free
+  bool addNonPartnersLayers(list<vertex*>& l);
+  // Add new connection (this vertex only)
+  void pushConnection(edge* e);
+  // Remove given connection (vertices on both sides)
+  void delConnection(edge* e);
+  // Flip predecessor edge status
+  // idx represents the layer index % 2 in the augmenting backward traversal (starting from 0)
+  void flipPredEdge(int idx);
+  // Remove vertex from layers, update predecessors and successors
+  void unLink(list<vertex*>& l);
+  // Get SOME connection and remove it (from both sides)
+  // If no connections present NULL is returned
+  edge* popConnection();
+  // Match vertex to SOME unmatched neighbor
+  // In case of success true is returned, false in case of failure
+  bool match();
+};
+
+class Bipartite
+{
+  // Number of vertices on each side
+  int size;
+  // Number of edges per vertex
+  int radix;
+  // Vertices arrays
+  vertex** leftSide;
+  vertex** rightSide;
+
+  peIter it;
+  // Edges list
+  peList List;
+
+  // Apply augmenting paths
+  void augment(list<vertex*>& l);
+
+ public:
+  // C'tor
+  Bipartite(int s, int r);
+  // D'tor
+  ~Bipartite();
+  int getRadix() const;
+  // Set iterator to first edge (returns false if list is empty)
+  bool setIterFirst();
+  // Set iterator to next edge (return false if list end is reached)
+  bool setIterNext();
+  // Get inputData pointed by iterator
+  inputData getReqDat();
+  // Add connection between two nodes
+  void connectNodes(int n1, int n2, inputData reqDat);
+  // Find maximal matching on current graph
+  void maximalMatch();
+  // Find maximum matching and remove it from the graph
+  // We use a variant of Hopcroft-Karp algorithm
+  Bipartite* maximumMatch();
+  // Decompose bipartite into to edge disjoint radix/2-regular bps
+  // We use a variant of Euler Decomposition
+  void decompose(Bipartite** bp1, Bipartite** bp2);
+};
+
+#endif
diff --git a/ibdm/datamodel/FatTree.cpp b/ibdm/datamodel/FatTree.cpp
index 55ff116..9fb2858 100644
--- a/ibdm/datamodel/FatTree.cpp
+++ b/ibdm/datamodel/FatTree.cpp
@@ -43,10 +43,12 @@ FatTree Utilities:
 #include <iomanip>
 #include "Fabric.h"
 #include "SubnMgt.h"
+#include "RouteSys.h"
+
 //////////////////////////////////////////////////////////////////////////////
 // Build a Fat Tree data structure for the given topology.
 // Prerequisites: Ranking performed and stored at p_node->rank.
-// Ranking is such that roots are marked with rank=0 and leaf switches with
+/// Ranking is such that roots are marked with rank=0 and leaf switches with
 // highest value.
 //
 // The algorithm BFS from an arbitrary leaf switch.
@@ -70,169 +72,188 @@ FatTree Utilities:

 // for comparing tupples
 struct FatTreeTuppleLess : public binary_function <vec_byte, vec_byte, bool> {
-   bool operator()(const vec_byte& x, const vec_byte& y) const {
-      if (x.size() > y.size()) return false;
-      if (y.size() > x.size()) return true;
+  bool operator()(const vec_byte& x, const vec_byte& y) const {
+    if (x.size() > y.size()) return false;
+    if (y.size() > x.size()) return true;

-      for (unsigned int i = 0 ; i < x.size() ; i++)
+    for (unsigned int i = 0 ; i < x.size() ; i++)
       {
-         if (x[i] > y[i]) return false;
-         if (x[i] < y[i]) return true;
+	if (x[i] > y[i]) return false;
+	if (x[i] < y[i]) return true;
       }
-      return false;
-   }
+    return false;
+  }
 };

 typedef map< IBNode *, vec_byte, less< IBNode *> > map_pnode_vec_byte;
 typedef vector< list< int > > vec_list_int;

 class FatTreeNode {
-   IBNode *p_node;          // points to the fabric node for this node
-   vec_list_int childPorts; // port nums connected to child by changing digit
-   vec_list_int parentPorts;// port nums connected to parent by changing digit
+  IBNode *p_node;          // points to the fabric node for this node
+  vec_list_int childPorts; // port nums connected to child by changing digit
+  vec_list_int parentPorts;// port nums connected to parent by changing digit
 public:
-   FatTreeNode(IBNode *p_node);
-   FatTreeNode(){p_node = NULL;};
-   int numParents();
-   int numChildren();
-   int numParentGroups();
-   int numChildGroups();
-   friend class FatTree;
+  FatTreeNode(IBNode *p_node);
+  FatTreeNode(){p_node = NULL;};
+  int numParents();
+  int numChildren();
+  int numParentGroups();
+  int numChildGroups();
+  bool goingDown(int lid);
+  friend class FatTree;
 };

 FatTreeNode::FatTreeNode(IBNode *p_n)
 {
-   p_node = p_n;
-   list< int > emptyList;
-   for (unsigned int pn = 0; pn <= p_node->numPorts; pn++)
-   {
+  p_node = p_n;
+  list< int > emptyList;
+  for (unsigned int pn = 0; pn <= p_node->numPorts; pn++)
+    {
       childPorts.push_back(emptyList);
       parentPorts.push_back(emptyList);
-   }
+    }
 }

 // get the total number of children a switch have
 int
 FatTreeNode::numChildren()
 {
-   int s = 0;
-   for (int i = 0; i < childPorts.size(); i++)
-      s += childPorts[i].size();
-   return s;
+  int s = 0;
+  for (int i = 0; i < childPorts.size(); i++)
+    s += childPorts[i].size();
+  return s;
 }

 // get the total number of children a switch have
 int
 FatTreeNode::numParents()
 {
-   int s = 0;
-   for (int i = 0; i < parentPorts.size(); i++)
-      s += parentPorts[i].size();
-   return s;
+  int s = 0;
+  for (int i = 0; i < parentPorts.size(); i++)
+    s += parentPorts[i].size();
+  return s;
 }

 // get the total number of children groups
 int
 FatTreeNode::numChildGroups()
 {
-   int s = 0;
-   for (int i = 0; i < childPorts.size(); i++)
-      if (childPorts[i].size()) s++;
-   return s;
+  int s = 0;
+  for (int i = 0; i < childPorts.size(); i++)
+    if (childPorts[i].size()) s++;
+  return s;
 }

 int
 FatTreeNode::numParentGroups()
 {
-   int s = 0;
-   for (int i = 0; i < parentPorts.size(); i++)
-      if (parentPorts[i].size()) s++;
-   return s;
+  int s = 0;
+  for (int i = 0; i < parentPorts.size(); i++)
+    if (parentPorts[i].size()) s++;
+  return s;
+}
+
+// Check whether there is downwards path towards the given lid
+bool FatTreeNode::goingDown(int lid)
+{
+  int portNum = p_node->getLFTPortForLid(lid);
+  if (portNum == IB_LFT_UNASSIGNED)
+    return false;
+  for (int i=0; i<childPorts.size(); i++)
+    for (list<int>::iterator lI = childPorts[i].begin();lI != childPorts[i].end(); lI++) {
+      if (portNum == *lI)
+	return true;
+    }
+  return false;
 }

 typedef map< vec_byte, class FatTreeNode, FatTreeTuppleLess > map_tupple_ftnode;

 class FatTree {
-   // the node tupple is built out of the following:
-   // d[0] = rank
-   // d[1..N-1] = ID digits
-   IBFabric          *p_fabric;     // The fabric we attach to
-   map_pnode_vec_byte TuppleByNode;
-   map_tupple_ftnode  NodeByTupple;
-   vec_int            LidByIdx;     // store target HCA lid by its index
-   unsigned int       N;            // number of levels in the fabric
-
-   // obtain the Fat Tree node for a given IBNode
-   FatTreeNode* getFatTreeNodeByNode(IBNode *p_node);
-
-   // get the first lowest level switch while making sure all HCAs
-   // are connected to same rank
-   // return NULL if this check is not met or no ranking available
-   IBNode *getLowestLevelSwitchNode();
-
-   // get a free tupple given the reference one and the index to change:
-   vec_byte getFreeTupple(vec_byte refTupple, unsigned int changeIdx);
-
-   // convert tupple to string
-   string getTuppleStr(vec_byte tupple);
-
-   // simply dump out the FatTree data:
-   void dump();
-
-   // track a connection to remote switch
-   int trackConnection(
-      FatTreeNode *p_ftNode,
-      vec_byte     tupple,   // the connected node tupple
-      unsigned int rank,     // rank of the local node
-      unsigned int remRank,  // rank of the remote node
-      unsigned int portNum,  // the port number connecting to the remote node
-      unsigned int remDigit  // the digit which changed on the remote node
-      );
-
-   // set of coefficients that represent the structure
-   int maxHcasPerLeafSwitch;
-   vec_int childrenPerRank; // not valid for leafs
-   vec_int parentsPerRank;
-   vec_int numSwInRank;     // number of switches for that level
-   vec_int downByRank;      // number of remote child switches s at rank
-   vec_int upByRank;        // number of remote parent switches at rank
-
-   // extract fat tree coefficients and update validity flag
-   // return 0 if OK
-   int extractCoefficients();
+  // the node tupple is built out of the following:
+  // d[0] = rank
+  // d[1..N-1] = ID digits
+  IBFabric          *p_fabric;     // The fabric we attach to
+  map_pnode_vec_byte TuppleByNode;
+  map_tupple_ftnode  NodeByTupple;
+  vec_int            LidByIdx;     // store target HCA lid by its index
+  unsigned int       N;            // number of levels in the fabric
+  map_str_int        IdxByName;
+
+  // obtain the Fat Tree node for a given IBNode
+  FatTreeNode* getFatTreeNodeByNode(IBNode *p_node);
+
+  // get the first lowest level switch while making sure all HCAs
+  // are connected to same rank
+  // return NULL if this check is not met or no ranking available
+  IBNode *getLowestLevelSwitchNode();
+
+  // get a free tupple given the reference one and the index to change:
+  vec_byte getFreeTupple(vec_byte refTupple, unsigned int changeIdx);
+
+  // convert tupple to string
+  string getTuppleStr(vec_byte tupple);
+
+  // simply dump out the FatTree data:
+  void dump();
+
+  // track a connection to remote switch
+  int trackConnection(
+		      FatTreeNode *p_ftNode,
+		      vec_byte     tupple,   // the connected node tupple
+		      unsigned int rank,     // rank of the local node
+		      unsigned int remRank,  // rank of the remote node
+		      unsigned int portNum,  // the port number connecting to the remote node
+		      unsigned int remDigit  // the digit which changed on the remote node
+		      );
+
+  // set of coefficients that represent the structure
+  int maxHcasPerLeafSwitch;
+  vec_int childrenPerRank; // not valid for leafs
+  vec_int parentsPerRank;
+  vec_int numSwInRank;     // number of switches for that level
+  vec_int downByRank;      // number of remote child switches s at rank
+  vec_int upByRank;        // number of remote parent switches at rank
+
+  // extract fat tree coefficients and update validity flag
+  // return 0 if OK
+  int extractCoefficients();

 public:
-   // construct the fat tree by matching the topology to it.
-   // note that this might return an invalid tree for routing
-   // as indicated by isValid flag
-   FatTree(IBFabric *p_fabric);
+  // construct the fat tree by matching the topology to it.
+  // note that this might return an invalid tree for routing
+  // as indicated by isValid flag
+  FatTree(IBFabric *p_fabric);
+
+  // true if the fabric can be mapped to a fat tree
+  bool isValid;

-   // true if the fabric can be mapped to a fat tree
-   bool isValid;
+  // propagate FDB assignments going up the tree ignoring the out port
+  int assignLftUpWards(FatTreeNode *p_ftNode, uint16_t dLid, int outPortNum, int switchPathOnly);

-   // propagate FDB assignments going up the tree ignoring the out port
-   int assignLftUpWards(FatTreeNode *p_ftNode, uint16_t dLid,
-								int outPortNum, int switchPathOnly);
+  // set FDB values as given in the input
+  int forceLftUpWards(FatTreeNode *p_ftNode, uint16_t dLid, vec_int ports);

-   // propagate FDB assignments going down the tree
-   int
-   assignLftDownWards(FatTreeNode *p_ftNode, uint16_t dLid,
-                      int outPortNum, int switchPathOnly);
+  // propagate FDB assignments going down the tree
+  int assignLftDownWards(FatTreeNode *p_ftNode, uint16_t dLid, int outPortNum, int switchPathOnly, int downOnly);

-   // route the fat tree
-   int route();
+  // route the fat tree
+  int route();

-   // create the file ftree.hcas with the list of HCA port names
-   // and LIDs in the correct order
-   void dumpHcaOrder();
+  // route requested permutation in the fat tree
+  int permRoute(vector<string> src, vector<string> dst);
+
+  // create the file ftree.hcas with the list of HCA port names
+  // and LIDs in the correct order
+  void dumpHcaOrder();
 };

 FatTreeNode* FatTree::getFatTreeNodeByNode(IBNode *p_node) {
-   FatTreeNode* p_ftNode;
-   vec_byte tupple(N, 0);
-   tupple = TuppleByNode[p_node];
-   p_ftNode = &NodeByTupple[tupple];
-   return p_ftNode;
+  FatTreeNode* p_ftNode;
+  vec_byte tupple(N, 0);
+  tupple = TuppleByNode[p_node];
+  p_ftNode = &NodeByTupple[tupple];
+  return p_ftNode;
 }

 // get the first lowest level switch while making sure all HCAs
@@ -240,124 +261,124 @@ FatTreeNode* FatTree::getFatTreeNodeByNode(IBNode *p_node) {
 // return NULL if this check is not met or no ranking available
 IBNode *FatTree::getLowestLevelSwitchNode()
 {
-   unsigned int leafRank = 0;
-   IBNode *p_leafSwitch = NULL;
-   IBPort *p_port;
-
-   // go over all HCAs and track the rank of the node connected to them
-   for( map_str_pnode::iterator nI = p_fabric->NodeByName.begin();
-        nI != p_fabric->NodeByName.end();
-        nI++)
-   {
+  unsigned int leafRank = 0;
+  IBNode *p_leafSwitch = NULL;
+  IBPort *p_port;
+
+  // go over all HCAs and track the rank of the node connected to them
+  for( map_str_pnode::iterator nI = p_fabric->NodeByName.begin();
+       nI != p_fabric->NodeByName.end();
+       nI++)
+    {
       IBNode *p_node = (*nI).second;
       if (p_node->type != IB_CA_NODE) continue;

       for (unsigned int pn = 1; pn <= p_node->numPorts; pn++)
-      {
-         p_port = p_node->getPort(pn);
-         if (p_port && p_port->p_remotePort)
-         {
-            IBNode *p_remNode = p_port->p_remotePort->p_node;
-
-            if (p_remNode->type != IB_SW_NODE) continue;
-
-            // is the remote node ranked?
-            if (!p_remNode->rank)  continue;
-
-            // must be identical for all leaf switches:
-            if (!leafRank)
-            {
-               leafRank = p_remNode->rank;
-               p_leafSwitch = p_remNode;
-            }
-            else
-            {
-               // get the lowest name
-               if (p_remNode->name < p_leafSwitch->name )
-                  p_leafSwitch = p_remNode;
-
-               if (p_remNode->rank != leafRank)
-               {
-                  cout << "-E- Given topology is not a fat tree. HCA:"
-                       << p_remNode->name
-                       << " found not on lowest level!" << endl;
-                  return(NULL);
-               }
-            }
-         }
-      }
-   }
-   return(p_leafSwitch);
+	{
+	  p_port = p_node->getPort(pn);
+	  if (p_port && p_port->p_remotePort)
+	    {
+	      IBNode *p_remNode = p_port->p_remotePort->p_node;
+
+	      if (p_remNode->type != IB_SW_NODE) continue;
+
+	      // is the remote node ranked?
+	      if (!p_remNode->rank)  continue;
+
+	      // must be identical for all leaf switches:
+	      if (!leafRank)
+		{
+		  leafRank = p_remNode->rank;
+		  p_leafSwitch = p_remNode;
+		}
+	      else
+		{
+		  // get the lowest name
+		  if (p_remNode->name < p_leafSwitch->name )
+		    p_leafSwitch = p_remNode;
+
+		  if (p_remNode->rank != leafRank)
+		    {
+		      cout << "-E- Given topology is not a fat tree. HCA:"
+			   << p_remNode->name
+			   << " found not on lowest level!" << endl;
+		      return(NULL);
+		    }
+		}
+	    }
+	}
+    }
+  return(p_leafSwitch);
 }

 // get a free tupple given the reference one and the index to change:
 // also track the max digit allocated per index
 vec_byte FatTree::getFreeTupple(vec_byte refTupple, unsigned int changeIdx)
 {
-   vec_byte res = refTupple;
-   int rank = changeIdx - 1;
-   for (uint8_t i = 0; i < 255; i++)
-   {
+  vec_byte res = refTupple;
+  int rank = changeIdx - 1;
+  for (uint8_t i = 0; i < 255; i++)
+    {
       res[changeIdx] = i;
       map_tupple_ftnode::const_iterator tI = NodeByTupple.find(res);
       if (tI == NodeByTupple.end())
-         return res;
-   }
-   cout << "ABORT: fail to get free tupple! (in 255 indexies)" << endl;
-   abort();
+	return res;
+    }
+  cout << "ABORT: fail to get free tupple! (in 255 indexies)" << endl;
+  abort();
 }

 string FatTree::getTuppleStr(vec_byte tupple)
 {
-   char buf[128];
-   buf[0] = '\0';
-   for (unsigned int i = 0; i < tupple.size(); i++)
-   {
+  char buf[128];
+  buf[0] = '\0';
+  for (unsigned int i = 0; i < tupple.size(); i++)
+    {
       if (i) strcat(buf,".");
       sprintf(buf, "%s%d", buf, tupple[i]);
-   }
-   return(string(buf));
+    }
+  return(string(buf));
 }

 // track connection going up or down by registering the port in the
 // correct fat tree node childPorts and parentPorts
 int FatTree::trackConnection(
-   FatTreeNode *p_ftNode, // the connected node
-   vec_byte     tupple,   // the connected node tupple
-   unsigned int rank,     // rank of the local node
-   unsigned int remRank,  // rank of the remote node
-   unsigned int portNum,  // the port number connecting to the remote node
-   unsigned int remDigit  // the digit of the tupple changing to the remote node
-   )
+			     FatTreeNode *p_ftNode, // the connected node
+			     vec_byte     tupple,   // the connected node tupple
+			     unsigned int rank,     // rank of the local node
+			     unsigned int remRank,  // rank of the remote node
+			     unsigned int portNum,  // the port number connecting to the remote node
+			     unsigned int remDigit  // the digit of the tupple changing to the remote node
+			     )
 {
-   if ( rank < remRank )
-   {
+  if ( rank < remRank )
+    {
       // going down
       // make sure we have enough entries in the vector
       if (remDigit >= p_ftNode->childPorts.size())
-      {
-         list<  int > emptyPortList;
-         for (unsigned int i = p_ftNode->childPorts.size();
-              i <= remDigit; i++)
+	{
+	  list<  int > emptyPortList;
+	  for (unsigned int i = p_ftNode->childPorts.size();
+	       i <= remDigit; i++)
             p_ftNode->childPorts.push_back(emptyPortList);
-      }
+	}
       p_ftNode->childPorts[remDigit].push_back(portNum);
-   }
-   else
-   {
+    }
+  else
+    {
       // going up
       // make sure we have enough entries in the vector
       if (remDigit >= p_ftNode->parentPorts.size())
-      {
-         list< int > emptyPortList;
-         for (unsigned int i = p_ftNode->parentPorts.size();
-              i <= remDigit; i++)
+	{
+	  list< int > emptyPortList;
+	  for (unsigned int i = p_ftNode->parentPorts.size();
+	       i <= remDigit; i++)
             p_ftNode->parentPorts.push_back(emptyPortList);
-      }
+	}
       p_ftNode->parentPorts[remDigit].push_back(portNum);
-   }
+    }

-   return(0);
+  return(0);
 }

 // Extract fat tree coefficiants and double check its
@@ -365,19 +386,19 @@ int FatTree::trackConnection(
 int
 FatTree::extractCoefficients()
 {
-   // Go over all levels of the tree.
-   // Collect number of nodes per each level
-   // Require the number of children is equal
-   // Require the number of parents is equal
-
-   int prevLevel = -1;
-   int anyErr = 0;
-
-   // go over all nodes
-   for (map_tupple_ftnode::iterator tI = NodeByTupple.begin();
-        tI != NodeByTupple.end();
-        tI++)
-   {
+  // Go over all levels of the tree.
+  // Collect number of nodes per each level
+  // Require the number of children is equal
+  // Require the number of parents is equal
+
+  int prevLevel = -1;
+  int anyErr = 0;
+
+  // go over all nodes
+  for (map_tupple_ftnode::iterator tI = NodeByTupple.begin();
+       tI != NodeByTupple.end();
+       tI++)
+    {
       FatTreeNode *p_ftNode = &((*tI).second);
       int level = (*tI).first[0];
       bool isFirstInLevel;
@@ -386,118 +407,118 @@ FatTree::extractCoefficients()
       prevLevel = level;

       if (isFirstInLevel)
-      {
-         numSwInRank.push_back(1);
-         parentsPerRank.push_back(p_ftNode->numParents());
-         childrenPerRank.push_back(p_ftNode->numChildren());
-         downByRank.push_back(p_ftNode->numChildGroups());
-         upByRank.push_back(p_ftNode->numParentGroups());
-      }
+	{
+	  numSwInRank.push_back(1);
+	  parentsPerRank.push_back(p_ftNode->numParents());
+	  childrenPerRank.push_back(p_ftNode->numChildren());
+	  downByRank.push_back(p_ftNode->numChildGroups());
+	  upByRank.push_back(p_ftNode->numParentGroups());
+	}
       else
-      {
-         numSwInRank[level]++;
-         if (parentsPerRank[level] != p_ftNode->numParents())
-         {
-            if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
-               cout << "-E- node:" << p_ftNode->p_node->name
-                    << " has unequal number of parent ports to its level"
-                    << endl;
-            anyErr++;
-         }
-
-         // we do not require symmetrical routing for leafs
-         if (level < N-1)
-         {
-            if (childrenPerRank[level] != p_ftNode->numChildren())
-            {
-               if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
-                  cout << "-E- node:" << p_ftNode->p_node->name <<
-                     " has unequal number of child ports to its level" << endl;
-               anyErr++;
-            }
-         }
-      }
-   }
+	{
+	  numSwInRank[level]++;
+	  if (parentsPerRank[level] != p_ftNode->numParents())
+	    {
+	      if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+		cout << "-E- node:" << p_ftNode->p_node->name
+		     << " has unequal number of parent ports to its level"
+		     << endl;
+	      anyErr++;
+	    }
+
+	  // we do not require symmetrical routing for leafs
+	  if (level < N-1)
+	    {
+	      if (childrenPerRank[level] != p_ftNode->numChildren())
+		{
+		  if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+		    cout << "-E- node:" << p_ftNode->p_node->name <<
+		      " has unequal number of child ports to its level" << endl;
+		  anyErr++;
+		}
+	    }
+	}
+    }

-   if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
-   {
+  if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+    {
       for (int rank = 0; rank < numSwInRank.size(); rank++) {
-         cout << "-I- rank:" << rank
-              << " switches:" << numSwInRank[rank]
-              << " parents: " << parentsPerRank[rank]
-              << " (" << upByRank[rank] << " groups)"
-              << " children:" << childrenPerRank[rank]
-              << " (" << downByRank[rank] << " groups)"
-              << endl;
+	cout << "-I- rank:" << rank
+	     << " switches:" << numSwInRank[rank]
+	     << " parents: " << parentsPerRank[rank]
+	     << " (" << upByRank[rank] << " groups)"
+	     << " children:" << childrenPerRank[rank]
+	     << " (" << downByRank[rank] << " groups)"
+	     << endl;
       }
-   }
+    }

-   if (anyErr) return 1;
+  if (anyErr) return 1;

-   vec_byte firstLeafTupple(N, 0);
-   firstLeafTupple[0] = N-1;
-   maxHcasPerLeafSwitch = 0;
-   for (map_tupple_ftnode::iterator tI = NodeByTupple.find(firstLeafTupple);
-        tI != NodeByTupple.end();
-        tI++)
-   {
+  vec_byte firstLeafTupple(N, 0);
+  firstLeafTupple[0] = N-1;
+  maxHcasPerLeafSwitch = 0;
+  for (map_tupple_ftnode::iterator tI = NodeByTupple.find(firstLeafTupple);
+       tI != NodeByTupple.end();
+       tI++)
+    {
       FatTreeNode *p_ftNode = &((*tI).second);
       IBNode *p_node = p_ftNode->p_node;
       int numHcaPorts = 0;
       for (unsigned int pn = 1; pn <= p_node->numPorts; pn++)
-      {
-         IBPort *p_port = p_node->getPort(pn);
-         if (p_port && p_port->p_remotePort &&
-             (p_port->p_remotePort->p_node->type == IB_CA_NODE))
-         {
-            numHcaPorts++;
-         }
+	{
+	  IBPort *p_port = p_node->getPort(pn);
+	  if (p_port && p_port->p_remotePort &&
+	      (p_port->p_remotePort->p_node->type == IB_CA_NODE))
+	    {
+	      numHcaPorts++;
+	    }

-      }
+	}
       if (numHcaPorts > maxHcasPerLeafSwitch)
-         maxHcasPerLeafSwitch = numHcaPorts;
-   }
+	maxHcasPerLeafSwitch = numHcaPorts;
+    }

-   if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
-      cout << "-I- HCAs per leaf switch set to:"
-           << maxHcasPerLeafSwitch << endl;
+  if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+    cout << "-I- HCAs per leaf switch set to:"
+	 << maxHcasPerLeafSwitch << endl;

-   cout << "-I- Topology is a valid Fat Tree" << endl;
-   isValid = 1;
+  cout << "-I- Topology is a valid Fat Tree" << endl;
+  isValid = 1;

-   return 0;
+  return 0;
 }

 // construct the fat tree by matching the topology to it.
 FatTree::FatTree(IBFabric *p_f)
 {
-   isValid = 0;
-   p_fabric = p_f;
+  isValid = 0;
+  p_fabric = p_f;

-   IBNode *p_node = getLowestLevelSwitchNode();
-   IBPort *p_port;
-   FatTreeNode *p_ftNode;
+  IBNode *p_node = getLowestLevelSwitchNode();
+  IBPort *p_port;
+  FatTreeNode *p_ftNode;

-   if (! p_node) return;
-   N = p_node->rank + 1; // N = number of levels (our first rank is 0 ...)
+  if (! p_node) return;
+  N = p_node->rank + 1; // N = number of levels (our first rank is 0 ...)

-   // BFS from the first switch connected to HCA found on the fabric
-   list< IBNode * > bfsQueue;
-   bfsQueue.push_back(p_node);
+  // BFS from the first switch connected to HCA found on the fabric
+  list< IBNode * > bfsQueue;
+  bfsQueue.push_back(p_node);

-   // also we always allocate the address 0..0 with "rank" digits to the node:
-   vec_byte tupple(N, 0);
+  // also we always allocate the address 0..0 with "rank" digits to the node:
+  vec_byte tupple(N, 0);

-   // adjust the level:
-   tupple[0] = p_node->rank;
-   TuppleByNode[p_node] = tupple;
-   NodeByTupple[tupple] = FatTreeNode(p_node);
-   if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
-      cout << "-I- Assigning tupple:" << getTuppleStr(tupple) << " to:"
-           << p_node->name << endl;
+  // adjust the level:
+  tupple[0] = p_node->rank;
+  TuppleByNode[p_node] = tupple;
+  NodeByTupple[tupple] = FatTreeNode(p_node);
+  if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+    cout << "-I- Assigning tupple:" << getTuppleStr(tupple) << " to:"
+	 << p_node->name << endl;

-   while (! bfsQueue.empty())
-   {
+  while (! bfsQueue.empty())
+    {
       p_node = bfsQueue.front();
       bfsQueue.pop_front();
       // we must have a tupple stored - get it
@@ -507,137 +528,141 @@ FatTree::FatTree(IBFabric *p_f)

       // go over all the node ports
       for (unsigned int pn = 1; pn <= p_node->numPorts; pn++)
-      {
-         p_port = p_node->getPort(pn);
-         if (!p_port || !p_port->p_remotePort) continue;
-
-         IBNode *p_remNode = p_port->p_remotePort->p_node;
-
-         if (p_remNode->type != IB_SW_NODE)
-         {
-            // for HCAs we only track the conenctions
-            list< int > tmpList;
-            tmpList.push_back(pn);
-            p_ftNode->childPorts.push_back(tmpList);
-            continue;
-         }
-
-         // now try to see if this node has already a map:
-         map_pnode_vec_byte::iterator tI = TuppleByNode.find(p_remNode);
-
-         // we are allowed to change the digit based on the direction we go:
-         unsigned int changingDigitIdx;
-         if (p_node->rank < p_remNode->rank)
+	{
+	  p_port = p_node->getPort(pn);
+	  if (!p_port || !p_port->p_remotePort) continue;
+
+	  IBNode *p_remNode = p_port->p_remotePort->p_node;
+
+	  if (p_remNode->type != IB_SW_NODE)
+	    {
+	      // for HCAs we only track the conenctions
+	      list< int > tmpList;
+	      tmpList.push_back(pn);
+	      p_ftNode->childPorts.push_back(tmpList);
+	      continue;
+	    }
+
+	  // now try to see if this node has already a map:
+	  map_pnode_vec_byte::iterator tI = TuppleByNode.find(p_remNode);
+
+	  // we are allowed to change the digit based on the direction we go:
+	  unsigned int changingDigitIdx;
+	  if (p_node->rank < p_remNode->rank)
             // going down the tree = use the current rank + 1
             // (save one for level)
             changingDigitIdx = p_node->rank + 1;
-         else if (p_node->rank > p_remNode->rank)
+	  else if (p_node->rank > p_remNode->rank)
             // goin up the tree = use current rank (first one is level)
             changingDigitIdx = p_node->rank;
-         else
-         {
-            cout << "-E- Connections on the same rank level "
-                 << " are not allowed in Fat Tree routing." << endl;
-            cout << "    from:" << p_node->name << "/P" << pn
-                 << " to:" << p_remNode->name << endl;
-            return;
-         }
-
-         // do we need to allocate a new tupple?
-         if (tI == TuppleByNode.end())
-         {
-
-            // the node is new - so get a new tupple for it:
-            vec_byte newTupple = tupple;
-            // change the level accordingly
-            newTupple[0] = p_remNode->rank;
-            // obtain a free one
-            newTupple = getFreeTupple(newTupple, changingDigitIdx);
-
-            // assign the new tupple and add to next steps:
-            if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
-               cout << "-I- Assigning tupple:" << getTuppleStr(newTupple)
-                    << " to:" << p_remNode->name << " changed idx:"
-                    << changingDigitIdx << " from:" << getTuppleStr(tupple)
-                    << endl;
-
-            TuppleByNode[p_remNode] = newTupple;
-            NodeByTupple[newTupple] = FatTreeNode(p_remNode);
-
-            unsigned int digit = newTupple[changingDigitIdx];
-
-            // track the connection
-            if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
-               cout << "-I- Connecting:" << p_node->name << " to:"
-                    << p_remNode->name << " through port:" << pn
-                    << " remDigit:" << digit << endl;
-            if (trackConnection(
-                   p_ftNode, tupple, p_node->rank, p_remNode->rank, pn, digit))
-               return;
-
-            bfsQueue.push_back(p_remNode);
-         }
-         else
-         {
-            // other side already has a tupple - so just track the connection
-            vec_byte remTupple = (*tI).second;
-            vec_byte mergedTupple = remTupple;
-
-            unsigned int digit = remTupple[changingDigitIdx];
-
-            if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
-               cout << "-I- Connecting:" << p_node->name  << " to:"
-                    << p_remNode->name << " through port:" << pn
-                    << " remDigit:" << digit  << endl;
-            if (trackConnection(
-                   p_ftNode, tupple, p_node->rank, p_remNode->rank, pn, digit))
-               return;
-         }
-
-      } // all ports
-   } // anything to do
-
-   // make sure the extracted tropology can be declared "fat tree"
-   if (extractCoefficients()) return;
-
-   // build mapping between HCA index and LIDs.
-   // We need to decide what will be the K of the lowest switches level.
-   // It is possible that for all of them the number of HCAs is < num
-   // left ports thus we should probably use the lowest number of all
-   vec_byte firstLeafTupple(N, 0);
-   firstLeafTupple[0] = N-1;
-
-   // now restart going over all leaf switches by their tupple order and
-   // allocate mapping
-   for (map_tupple_ftnode::iterator tI = NodeByTupple.find(firstLeafTupple);
-        tI != NodeByTupple.end();
-        tI++)
-   {
+	  else
+	    {
+	      cout << "-E- Connections on the same rank level "
+		   << " are not allowed in Fat Tree routing." << endl;
+	      cout << "    from:" << p_node->name << "/P" << pn
+		   << " to:" << p_remNode->name << endl;
+	      return;
+	    }
+
+	  // do we need to allocate a new tupple?
+	  if (tI == TuppleByNode.end())
+	    {
+
+	      // the node is new - so get a new tupple for it:
+	      vec_byte newTupple = tupple;
+	      // change the level accordingly
+	      newTupple[0] = p_remNode->rank;
+	      // obtain a free one
+	      newTupple = getFreeTupple(newTupple, changingDigitIdx);
+
+	      // assign the new tupple and add to next steps:
+	      if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+		cout << "-I- Assigning tupple:" << getTuppleStr(newTupple)
+		     << " to:" << p_remNode->name << " changed idx:"
+		     << changingDigitIdx << " from:" << getTuppleStr(tupple)
+		     << endl;
+
+	      TuppleByNode[p_remNode] = newTupple;
+	      NodeByTupple[newTupple] = FatTreeNode(p_remNode);
+
+	      unsigned int digit = newTupple[changingDigitIdx];
+
+	      // track the connection
+	      if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+		cout << "-I- Connecting:" << p_node->name << " to:"
+		     << p_remNode->name << " through port:" << pn
+		     << " remDigit:" << digit << endl;
+	      if (trackConnection(
+				  p_ftNode, tupple, p_node->rank, p_remNode->rank, pn, digit))
+		return;
+
+	      bfsQueue.push_back(p_remNode);
+	    }
+	  else
+	    {
+	      // other side already has a tupple - so just track the connection
+	      vec_byte remTupple = (*tI).second;
+	      vec_byte mergedTupple = remTupple;
+
+	      unsigned int digit = remTupple[changingDigitIdx];
+
+	      if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+		cout << "-I- Connecting:" << p_node->name  << " to:"
+		     << p_remNode->name << " through port:" << pn
+		     << " remDigit:" << digit  << endl;
+	      if (trackConnection(
+				  p_ftNode, tupple, p_node->rank, p_remNode->rank, pn, digit))
+		return;
+	    }
+
+	} // all ports
+    } // anything to do
+
+  // make sure the extracted tropology can be declared "fat tree"
+  if (extractCoefficients()) return;
+
+  // build mapping between HCA index and LIDs.
+  // We need to decide what will be the K of the lowest switches level.
+  // It is possible that for all of them the number of HCAs is < num
+  // left ports thus we should probably use the lowest number of all
+  vec_byte firstLeafTupple(N, 0);
+  firstLeafTupple[0] = N-1;
+
+  // now restart going over all leaf switches by their tupple order and
+  // allocate mapping
+  int hcaIdx = 0;
+  for (map_tupple_ftnode::iterator tI = NodeByTupple.find(firstLeafTupple);
+       tI != NodeByTupple.end();
+       tI++)
+    {
       // we collect HCAs connected to the leaf switch and set their childPort
       // starting at the index associated with the switch tupple.
       FatTreeNode *p_ftNode = &((*tI).second);
       IBNode *p_node = p_ftNode->p_node;
       unsigned int pIdx = 0;
       for (unsigned int pn = 1; pn <= p_node->numPorts; pn++)
-      {
-         IBPort *p_port = p_node->getPort(pn);
-         if (p_port && p_port->p_remotePort &&
-             (p_port->p_remotePort->p_node->type == IB_CA_NODE))
-         {
-            LidByIdx.push_back(p_port->p_remotePort->base_lid);
-            pIdx++;
-         }
-      }
+	{
+	  IBPort *p_port = p_node->getPort(pn);
+	  if (p_port && p_port->p_remotePort &&
+	      (p_port->p_remotePort->p_node->type == IB_CA_NODE))
+	    {
+	      LidByIdx.push_back(p_port->p_remotePort->base_lid);
+	      IdxByName[p_port->p_remotePort->p_node->name] = hcaIdx;
+	      pIdx++;
+	      hcaIdx++;
+	    }
+	}
       // we might need some padding
       for (; pIdx < maxHcasPerLeafSwitch; pIdx++) {
-         LidByIdx.push_back(0);
+	LidByIdx.push_back(0);
+	hcaIdx++;
       }
-   }
+    }

-   cout << "-I- Fat Tree Created" << endl;
+  cout << "-I- Fat Tree Created" << endl;

-   if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
-      dump();
+  if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+    dump();
 }

 //////////////////////////////////////////////////////////////////////////////
@@ -664,100 +689,100 @@ int
 FatTree::assignLftUpWards(FatTreeNode *p_ftNode, uint16_t dLid,
                           int outPortNum, int switchPathOnly)
 {
-   IBPort* p_port;
-   IBNode *p_node = p_ftNode->p_node;
-
-   if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
-      cout << "-V- assignLftUpWards invoked on node:" << p_node->name
-           << " out-port:" << outPortNum
-           << " to dlid:" << dLid
-			  << " switchPathOnly:" << switchPathOnly
-			  << endl;
-
-   // Foreach one of the child port groups select the port which is
-   // less utilized and set its LFT - then recurse into it
-   // go over all child ports
-   for (int i = 0; i < p_ftNode->childPorts.size(); i++) {
-      if (!p_ftNode->childPorts[i].size()) continue;
+  IBPort* p_port;
+  IBNode *p_node = p_ftNode->p_node;
+
+  if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+    cout << "-V- assignLftUpWards invoked on node:" << p_node->name
+	 << " out-port:" << outPortNum
+	 << " to dlid:" << dLid
+	 << " switchPathOnly:" << switchPathOnly
+	 << endl;
+
+  // Foreach one of the child port groups select the port which is
+  // less utilized and set its LFT - then recurse into it
+  // go over all child ports
+  for (int i = 0; i < p_ftNode->childPorts.size(); i++) {
+    if (!p_ftNode->childPorts[i].size()) continue;
+
+    // we can skip handling the remote node if
+    // it already has an assigned LFT for this target lid
+    int firstPortNum = p_ftNode->childPorts[i].front();
+    IBPort *p_firstPort = p_node->getPort(firstPortNum);
+    IBNode *p_remNode = p_firstPort->p_remotePort->p_node;
+    if (p_remNode->getLFTPortForLid(dLid) != IB_LFT_UNASSIGNED)
+      {
+	if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+	  cout << "-V- assignLftUpWards skip already assigned remote node:"
+	       << p_remNode->name
+	       << " switchPathOnly:" << switchPathOnly
+	       << endl;
+	continue;
+      }

-		// we can skip handling the remote node if
-		// it already has an assigned LFT for this target lid
-		int firstPortNum = p_ftNode->childPorts[i].front();
-		IBPort *p_firstPort = p_node->getPort(firstPortNum);
-		IBNode *p_remNode = p_firstPort->p_remotePort->p_node;
-		if (p_remNode->getLFTPortForLid(dLid) != IB_LFT_UNASSIGNED)
-		{
-			if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
-				cout << "-V- assignLftUpWards skip already assigned remote node:"
-					  << p_remNode->name
-					  << " switchPathOnly:" << switchPathOnly
-					  << endl;
-			continue;
-		}
+    int bestUsage = 0;
+    IBPort *p_bestPort = NULL;
+    int found = 0;

-      int bestUsage = 0;
-      IBPort *p_bestPort = NULL;
-      int found = 0;
-
-      // we only need one best port on each group
-      for (list<int>::iterator lI = p_ftNode->childPorts[i].begin();
-           !found && (lI != p_ftNode->childPorts[i].end());
-           lI++) {
-
-         // can not have more then one port in group...
-         int portNum = *lI;
-
-         // we do not want to descend back to the original port
-         if (portNum == outPortNum)
-         {
-            p_bestPort = NULL;
-            found = 1;
-            continue;
-         }
-
-         IBPort *p_port = p_node->getPort(portNum);
-         // not required but what the hack...
-         if (!p_port || !p_port->p_remotePort) continue;
-         IBPort *p_remPort = p_port->p_remotePort;
-
-         // ignore remote HCA nodes
-         if (p_remPort->p_node->type != IB_SW_NODE) continue;
-
-         // look on the local usage as we mark usage entering a port
-         int usage = p_port->counter1;
-			if (switchPathOnly)
-				usage += p_port->counter2;
-         if ((p_bestPort == NULL) || (usage < bestUsage))
-         {
-            p_bestPort = p_port;
-            bestUsage = usage;
-         }
-      }
+    // we only need one best port on each group
+    for (list<int>::iterator lI = p_ftNode->childPorts[i].begin();
+	 !found && (lI != p_ftNode->childPorts[i].end());
+	 lI++) {
+
+      // can not have more then one port in group...
+      int portNum = *lI;
+
+      // we do not want to descend back to the original port
+      if (portNum == outPortNum)
+	{
+	  p_bestPort = NULL;
+	  found = 1;
+	  continue;
+	}
+
+      IBPort *p_port = p_node->getPort(portNum);
+      // not required but what the hack...
+      if (!p_port || !p_port->p_remotePort) continue;
+      IBPort *p_remPort = p_port->p_remotePort;
+
+      // ignore remote HCA nodes
+      if (p_remPort->p_node->type != IB_SW_NODE) continue;

-      if (p_bestPort != NULL)
+      // look on the local usage as we mark usage entering a port
+      int usage = p_port->counter1;
+      if (switchPathOnly)
+	usage += p_port->counter2;
+      if ((p_bestPort == NULL) || (usage < bestUsage))
+	{
+	  p_bestPort = p_port;
+	  bestUsage = usage;
+	}
+    }
+
+    if (p_bestPort != NULL)
       {
-			// mark utilization
-			if (switchPathOnly)
-				p_bestPort->counter2++;
-			else
-				p_bestPort->counter1++;
-
-         IBPort *p_bestRemPort = p_bestPort->p_remotePort;
-         p_remNode->setLFTPortForLid(dLid, p_bestRemPort->num);
-
-         if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
-            cout << "-V- assignLftUpWards setting lft on:" << p_remNode->name
-                 << " to port:" << p_bestRemPort->num
-                 << " to dlid:" << dLid  << endl;
-
-         FatTreeNode *p_remFTNode =
-            getFatTreeNodeByNode(p_bestRemPort->p_node);
-         assignLftUpWards(p_remFTNode, dLid, p_bestRemPort->num,
-								  switchPathOnly);
+	// mark utilization
+	if (switchPathOnly)
+	  p_bestPort->counter2++;
+	else
+	  p_bestPort->counter1++;
+
+	IBPort *p_bestRemPort = p_bestPort->p_remotePort;
+	p_remNode->setLFTPortForLid(dLid, p_bestRemPort->num);
+
+	if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+	  cout << "-V- assignLftUpWards setting lft on:" << p_remNode->name
+	       << " to port:" << p_bestRemPort->num
+	       << " to dlid:" << dLid  << endl;
+
+	FatTreeNode *p_remFTNode =
+	  getFatTreeNodeByNode(p_bestRemPort->p_node);
+	assignLftUpWards(p_remFTNode, dLid, p_bestRemPort->num,
+			 switchPathOnly);
       }
-   }
+  }

-   return(0);
+  return(0);
 }

 // to allocate a port downwards we look at all ports
@@ -766,145 +791,139 @@ FatTree::assignLftUpWards(FatTreeNode *p_ftNode, uint16_t dLid,
 // we also start an upwards assignment to this node
 int
 FatTree::assignLftDownWards(FatTreeNode *p_ftNode, uint16_t dLid,
-                            int outPortNum, int switchPathOnly)
+                            int outPortNum, int switchPathOnly, int downOnly)
 {
-   IBPort *p_port;
-   IBNode *p_node = p_ftNode->p_node;
-
-   if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
-      cout << "-V- assignLftDownWards from:" << p_node->name
-           << " dlid:" << dLid
-           << " through port:" << outPortNum
-			  << " switchPathOnly:" << switchPathOnly
-			  << endl;
-
-   if (outPortNum != 0xFF)
-   {
-		// Set FDB to that LID only if not preset or we are on "main" route
-		if (!switchPathOnly ||
-			 (p_node->getLFTPortForLid(dLid) == IB_LFT_UNASSIGNED)) {
-			p_node->setLFTPortForLid(dLid, outPortNum);
-
-			p_port = p_node->getPort(outPortNum);
-
-			// mark the usage of this port
-			if (p_port) {
-				if (switchPathOnly) {
-					p_port->counter2++;
-				} else {
-					p_port->counter1++;
-				}
-			}
-		}
-   }
-
-   // find the remote port (following the parents list order)
-   // that is not used or less used.
-   int bestUsage = 0;
-	int bestGroup = -1;
-   IBPort *p_bestRemPort = NULL;
-   int found = 0;
-   // go over all child ports
-   for (int i = 0; !found && (i < p_ftNode->parentPorts.size()); i++) {
-      if (!p_ftNode->parentPorts[i].size()) continue;
-
-      for (list<int>::iterator lI = p_ftNode->parentPorts[i].begin();
-           !found && (lI != p_ftNode->parentPorts[i].end());
-           lI++) {
-
-         // can not have more then one port in group...
-         int portNum = *lI;
-         IBPort *p_port = p_node->getPort(portNum); // must be if marked parent
-         IBPort *p_remPort = p_port->p_remotePort;
-         if (p_remPort == NULL) continue;
-         int usage = p_remPort->counter1;
-			if (switchPathOnly)
-				usage += p_remPort->counter2;
-
-         if ((p_bestRemPort == NULL) || (usage < bestUsage))
-         {
-            p_bestRemPort = p_remPort;
-            bestUsage = usage;
-				bestGroup = i;
-            // can not have better usage then no usage
-            if (usage == 0)
-               found = 1;
-         }
+  IBPort *p_port;
+  IBNode *p_node = p_ftNode->p_node;
+
+  if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+    cout << "-V- assignLftDownWards from:" << p_node->name
+	 << " dlid:" << dLid
+	 << " through port:" << outPortNum
+	 << " switchPathOnly:" << switchPathOnly
+	 << endl;
+
+  if (outPortNum != 0xFF)
+    {
+      // Set FDB to that LID only if not preset or we are on "main" route
+      if (!switchPathOnly || (p_node->getLFTPortForLid(dLid) == IB_LFT_UNASSIGNED)) {
+	p_node->setLFTPortForLid(dLid, outPortNum);
+
+	p_port = p_node->getPort(outPortNum);
+
+	// mark the usage of this port
+	if (p_port) {
+	  if (switchPathOnly) {
+	    p_port->counter2++;
+	  } else {
+	    p_port->counter1++;
+	  }
+	}
       }
-   }
-
-	FatTreeNode *p_remFTNode;
-	// first visit the official path!
-	if (bestGroup != -1)
+    }
+
+  // find the remote port (following the parents list order)
+  // that is not used or less used.
+  int bestUsage = 0;
+  int bestGroup = -1;
+  IBPort *p_bestRemPort = NULL;
+  int found = 0;
+  // go over all child ports
+  for (int i = 0; !found && (i < p_ftNode->parentPorts.size()); i++) {
+    if (!p_ftNode->parentPorts[i].size()) continue;
+
+    for (list<int>::iterator lI = p_ftNode->parentPorts[i].begin();
+	 !found && (lI != p_ftNode->parentPorts[i].end());
+	 lI++) {
+
+      // can not have more then one port in group...
+      int portNum = *lI;
+      IBPort *p_port = p_node->getPort(portNum); // must be if marked parent
+      IBPort *p_remPort = p_port->p_remotePort;
+      if (p_remPort == NULL) continue;
+      int usage = p_remPort->counter1;
+      if (switchPathOnly)
+	usage += p_remPort->counter2;
+
+      if ((p_bestRemPort == NULL) || (usage < bestUsage))
 	{
-		p_remFTNode = getFatTreeNodeByNode(p_bestRemPort->p_node);
-		if (!p_remFTNode)
-			cout << "-E- Fail to get FatTree Node for node:"
-				  << p_bestRemPort->p_node->name << endl;
-		else
-         assignLftDownWards(p_remFTNode, dLid, p_bestRemPort->num,
-									 switchPathOnly);
+	  p_bestRemPort = p_remPort;
+	  bestUsage = usage;
+	  bestGroup = i;
+	  // can not have better usage then no usage
+	  if (usage == 0)
+	    found = 1;
 	}
-
-	// need to go all up all the possible ways to make sure all switch are
-	// connected to all HCAs
-	for (int i = 0; i < p_ftNode->parentPorts.size(); i++) {
-		if (!p_ftNode->parentPorts[i].size()) continue;
-		IBPort* p_remPort;
-		// if we are on the "best group" we know the best port
-		if (bestGroup == i) continue;
-
-		// find the best port of the group i
-		p_bestRemPort = NULL;
-		found = 0;
-      for (list<int>::iterator lI = p_ftNode->parentPorts[i].begin();
-           !found && (lI != p_ftNode->parentPorts[i].end());
-           lI++) {
-
-         // can not have more then one port in group...
-         int portNum = *lI;
-         IBPort *p_port = p_node->getPort(portNum); // must be if marked parent
-         IBPort *p_remPort = p_port->p_remotePort;
-         if (p_remPort == NULL) continue;
-         int usage = p_remPort->counter1 + p_remPort->counter2;
-
-         if ((p_bestRemPort == NULL) || (usage < bestUsage))
-         {
-            p_bestRemPort = p_remPort;
-            bestUsage = usage;
-            // can not have better usage then no usage
-            if (usage == 0)
-               found = 1;
-         }
+    }
+  }
+
+  FatTreeNode *p_remFTNode;
+  // first visit the official path!
+  if (bestGroup != -1) {
+    p_remFTNode = getFatTreeNodeByNode(p_bestRemPort->p_node);
+    if (!p_remFTNode)
+      cout << "-E- Fail to get FatTree Node for node:"
+	   << p_bestRemPort->p_node->name << endl;
+    else
+      assignLftDownWards(p_remFTNode, dLid, p_bestRemPort->num,switchPathOnly,downOnly);
+  }
+
+  // need to go all up all the possible ways to make sure all switch are
+  // connected to all HCAs
+  for (int i = 0; i < p_ftNode->parentPorts.size(); i++) {
+    if (!p_ftNode->parentPorts[i].size()) continue;
+    IBPort* p_remPort;
+    // if we are on the "best group" we know the best port
+    if (bestGroup == i) continue;
+
+    // find the best port of the group i
+    p_bestRemPort = NULL;
+    found = 0;
+    for (list<int>::iterator lI = p_ftNode->parentPorts[i].begin();!found && (lI != p_ftNode->parentPorts[i].end()); lI++) {
+      // can not have more then one port in group...
+      int portNum = *lI;
+      IBPort *p_port = p_node->getPort(portNum); // must be if marked parent
+      IBPort *p_remPort = p_port->p_remotePort;
+      if (p_remPort == NULL) continue;
+      int usage = p_remPort->counter1 + p_remPort->counter2;
+
+      if ((p_bestRemPort == NULL) || (usage < bestUsage)) {
+	p_bestRemPort = p_remPort;
+	bestUsage = usage;
+	// can not have better usage then no usage
+	if (usage == 0)
+	  found = 1;
       }
-		p_remFTNode = getFatTreeNodeByNode(p_bestRemPort->p_node);
-      if (!p_remFTNode)
-         cout << "-E- Fail to get FatTree Node for node:"
-              << p_bestRemPort->p_node->name << endl;
-      else
-         assignLftDownWards(p_remFTNode, dLid, p_bestRemPort->num, 1);
-   }
-
-   // Perform Backward traversal through all ports connected to lower
-   // level switches in-port = out-port
-	assignLftUpWards(p_ftNode, dLid, outPortNum, switchPathOnly);
-
-   return(0);
+    }
+    p_remFTNode = getFatTreeNodeByNode(p_bestRemPort->p_node);
+    if (!p_remFTNode)
+      cout << "-E- Fail to get FatTree Node for node:"
+	   << p_bestRemPort->p_node->name << endl;
+    else
+      assignLftDownWards(p_remFTNode, dLid, p_bestRemPort->num, 1, downOnly);
+  }
+
+  // Perform Backward traversal through all ports connected to lower
+  // level switches in-port = out-port
+  if (!downOnly)
+    assignLftUpWards(p_ftNode, dLid, outPortNum, switchPathOnly);
+
+  return(0);
 }

 // perform the routing by filling in the fabric LFTs
 int FatTree::route()
 {
-   int hcaIdx = 0;
-   int lid; // the target LID we propagate for this time
+  int hcaIdx = 0;
+  int lid; // the target LID we propagate for this time

-   // go over all fat tree nodes of the lowest level
-   vec_byte firstLeafTupple(N, 0);
-   firstLeafTupple[0] = N-1;
-   for (map_tupple_ftnode::iterator tI = NodeByTupple.find(firstLeafTupple);
-        tI != NodeByTupple.end();
-        tI++)
-   {
+  // go over all fat tree nodes of the lowest level
+  vec_byte firstLeafTupple(N, 0);
+  firstLeafTupple[0] = N-1;
+  for (map_tupple_ftnode::iterator tI = NodeByTupple.find(firstLeafTupple);
+       tI != NodeByTupple.end();
+       tI++)
+    {

       FatTreeNode *p_ftNode = &((*tI).second);
       IBNode *p_node = p_ftNode->p_node;
@@ -913,186 +932,414 @@ int FatTree::route()

       // go over all child ports
       for (int i = 0; i < p_ftNode->childPorts.size(); i++) {
-         if (!p_ftNode->childPorts[i].size()) continue;
-         // can not have more then one port in group...
-         int portNum = p_ftNode->childPorts[i].front();
-         numPortWithHCA++;
+	if (!p_ftNode->childPorts[i].size()) continue;
+	// can not have more then one port in group...
+	int portNum = p_ftNode->childPorts[i].front();
+	numPortWithHCA++;

-         lid = LidByIdx[hcaIdx];
+	lid = LidByIdx[hcaIdx];

-         if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
-            cout << "-V- Start routing LID:" << lid
-                 << " at HCA idx:" << hcaIdx << endl;
-         assignLftDownWards(p_ftNode, lid, portNum, 0);
+	if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+	  cout << "-V- Start routing LID:" << lid
+	       << " at HCA idx:" << hcaIdx << endl;
+	assignLftDownWards(p_ftNode, lid, portNum, 0,0);

-         hcaIdx++;
+	hcaIdx++;
       }

       // for ports without HCA we assign dummy LID but need to
       // propagate
       for (; numPortWithHCA < maxHcasPerLeafSwitch; numPortWithHCA++)
-      {
-         // HACK: for now we can propagate 0 as lid
-         if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+	{
+	  // HACK: for now we can propagate 0 as lid
+	  if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
             cout << "-V- adding dummy LID to switch:"
                  << p_node->name
                  << " at HCA idx:" << hcaIdx << endl;

-         assignLftDownWards(p_ftNode, 0, 0xFF, 0);
+	  assignLftDownWards(p_ftNode, 0, 0xFF, 0,0);

-         hcaIdx++;
-      }
-   }
+	  hcaIdx++;
+	}
+    }

-	// now go over all switches and route to them
-   for (map_tupple_ftnode::iterator tI = NodeByTupple.begin();
-        tI != NodeByTupple.end();
-        tI++)
-   {
+  // now go over all switches and route to them
+  for (map_tupple_ftnode::iterator tI = NodeByTupple.begin();
+       tI != NodeByTupple.end();
+       tI++)
+    {

       FatTreeNode *p_ftNode = &((*tI).second);
       IBNode *p_node = p_ftNode->p_node;

-		if (p_node->type != IB_SW_NODE) continue;
+      if (p_node->type != IB_SW_NODE) continue;

-		// find the LID of the switch:
-		int lid = 0;
-	   for (unsigned int pn = 1; (lid == 0) && (pn <= p_node->numPorts); pn++)
-	   {
-			IBPort *p_port = p_node->getPort(pn);
-			if (p_port)
-				lid = p_port->base_lid;
-		}
-		if (lid == 0)
-		{
-			cout << "-E- failed to find LID for switch:" << p_node->name << endl;
-		} else {
-         if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+      // find the LID of the switch:
+      int lid = 0;
+      for (unsigned int pn = 1; (lid == 0) && (pn <= p_node->numPorts); pn++)
+	{
+	  IBPort *p_port = p_node->getPort(pn);
+	  if (p_port)
+	    lid = p_port->base_lid;
+	}
+      if (lid == 0)
+	{
+	  cout << "-E- failed to find LID for switch:" << p_node->name << endl;
+	} else {
+	  if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
             cout << "-V- routing to LID:" << lid << " of switch:"
                  << p_node->name << endl;
-			assignLftDownWards(p_ftNode, lid, 0, 0);
-		}
+	  assignLftDownWards(p_ftNode, lid, 0, 0, 0);
 	}
+    }
+
+  return(0);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Optimally Route a permutation in the Fat Tree
+// Prerequisites: Fat Tree structure was built.
+//
+// Algorithm:
+// For each leaf switch (in order)
+//   For each HCA index (even if it does not have a LID - invent one)
+//     Setup downward paths as previously
+//     Traverse up from destination HCA and force output ports as
+//     computed by the optimal routing
+//
+// Data Model:
+// We use the fat tree to get ordering.
+// "main" routing is the routing from HCA to HCA.
+// "side" routing is used from all SW to all HCAs (and dynamic routing)
+// Track port utilization for the "main" routing by the "counter1"
+// Track port utilzation of the "side" routing in "counter2" field of the port
+//
+//////////////////////////////////////////////////////////////////////////////

-   return(0);
+// set FDB values as given in the input
+int FatTree::forceLftUpWards(FatTreeNode *p_ftNode, uint16_t dLid, vec_int ports)
+{
+  // go over all steps
+  for (int i=0; i<ports.size(); i++) {
+    // if LID is going down we are finished
+    if (p_ftNode->goingDown(dLid))
+      return 0;
+    // sanity check
+    if ((ports[i] < 0) || (ports[i] > p_ftNode->parentPorts.size())) {
+      cout << "-E- Illegal port number!" << endl;
+      return 1;
+    }
+    IBNode *p_node = p_ftNode->p_node;
+    int portNum = p_ftNode->parentPorts[ports[i]].front();
+
+    IBPort* p_port = p_node->getPort(portNum);
+    if (!p_port || !p_port->p_remotePort) {
+      cout << "-E- Ports do not exist!" << endl;
+      return 1;
+    }
+    IBNode* p_remNode = p_port->p_remotePort->p_node;
+    // Set LFT entry
+    p_node->setLFTPortForLid(dLid, portNum);
+    // Move to next step
+    p_ftNode = getFatTreeNodeByNode(p_remNode);
+  }
+  return 0;
+}
+
+// perform the routing by filling in the fabric LFTs
+int FatTree::permRoute(vector<string> src, vector<string> dst)
+{
+  int hcaIdx = 0;
+  int lid; // the target LID we propagate for this time
+  // extract radix
+  vec_byte tmpLeafTupple(N,0);
+  tmpLeafTupple[0] = N-1;
+  FatTreeNode* p_ftN = &NodeByTupple[tmpLeafTupple];
+  int radix = 0;
+  for (int i = 0; i < p_ftN->parentPorts.size(); i++)
+    if(p_ftN->parentPorts[i].size())
+      radix++;
+
+  // create requests vector
+  vec_int req;
+  req.resize(src.size());
+  for (int i=0; i<src.size(); i++)
+    req[IdxByName[src[i]]] = IdxByName[dst[i]];
+
+  if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+    cout << "-V- Tree height: " << N << ", radix: " << radix << endl;
+
+  // create routing system
+  RouteSys rS(radix,N);
+  // push requests
+  rS.pushRequests(req);
+  // compute optimal routing
+  vec_vec_int routeOutput;
+  rS.doRouting(routeOutput);
+
+  // build the LFTs
+  // go over all fat tree nodes of the lowest level
+  vec_byte firstLeafTupple(N, 0);
+  firstLeafTupple[0] = N-1;
+  // First prepare downwards routing
+  for (map_tupple_ftnode::iterator tI = NodeByTupple.find(firstLeafTupple); tI != NodeByTupple.end(); tI++) {
+    FatTreeNode *p_ftNode = &((*tI).second);
+    IBNode *p_node = p_ftNode->p_node;
+    // we need to track the number of ports to handle case of missing HCAs
+    int numPortWithHCA = 0;
+
+    // go over all child ports
+    for (int i = 0; i < p_ftNode->childPorts.size(); i++) {
+      if (!p_ftNode->childPorts[i].size()) continue;
+      // can not have more then one port in group...
+      int portNum = p_ftNode->childPorts[i].front();
+      numPortWithHCA++;
+
+      lid = LidByIdx[hcaIdx];
+
+      if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+	cout << "-V- Start routing LID:" << lid
+	     << " at HCA idx:" << hcaIdx << endl;
+
+      // Assign downward LFT values
+      if (assignLftDownWards(p_ftNode, lid, portNum, 0, 1))
+	return 1;
+
+      hcaIdx++;
+    }
+
+    // for ports without HCA we assign dummy LID but need to
+    // propagate
+    for (; numPortWithHCA < maxHcasPerLeafSwitch; numPortWithHCA++) {
+      // HACK: for now we can propagate 0 as lid
+      if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+	cout << "-V- adding dummy LID to switch:"
+	     << p_node->name
+	     << " at HCA idx:" << hcaIdx << endl;
+
+      assignLftDownWards(p_ftNode, 0, 0xFF, 0, 0);
+
+      hcaIdx++;
+    }
+  }
+
+  // Now prepare upwards routing
+  hcaIdx = 0;
+  for (map_tupple_ftnode::iterator tI = NodeByTupple.find(firstLeafTupple); tI != NodeByTupple.end(); tI++) {
+    FatTreeNode *p_ftNode = &((*tI).second);
+    IBNode *p_node = p_ftNode->p_node;
+
+    // go over all child ports
+    for (int i = 0; i < p_ftNode->childPorts.size(); i++) {
+      if (!p_ftNode->childPorts[i].size()) continue;
+      // can not have more then one port in group...
+      int portNum = p_ftNode->childPorts[i].front();
+
+      lid = LidByIdx[hcaIdx];
+
+      if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+	cout << "-V- Start routing LID:" << lid
+	     << " at HCA idx:" << hcaIdx << endl;
+
+      lid = LidByIdx[req[hcaIdx]];
+
+      if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+	cout << "-V- Creating routing from "<<hcaIdx<<"-"<<src[hcaIdx]<<"(lid: "<<LidByIdx[hcaIdx]<<") to "
+	     <<req[hcaIdx]<<"-"<<dst[hcaIdx]<<"(lid: "<<LidByIdx[req[hcaIdx]]<<") using up-ports:" << endl;
+
+      // Assign upward LFT values
+      if (forceLftUpWards(p_ftNode, lid, routeOutput[hcaIdx]))
+	return 1;
+
+      hcaIdx++;
+    }
+
+  }
+
+  // now go over all switches and route to them
+  for (map_tupple_ftnode::iterator tI = NodeByTupple.begin();
+       tI != NodeByTupple.end();
+       tI++)
+    {
+
+      FatTreeNode *p_ftNode = &((*tI).second);
+      IBNode *p_node = p_ftNode->p_node;
+
+      if (p_node->type != IB_SW_NODE) continue;
+
+      // find the LID of the switch:
+      int lid = 0;
+      for (unsigned int pn = 1; (lid == 0) && (pn <= p_node->numPorts); pn++)
+	{
+	  IBPort *p_port = p_node->getPort(pn);
+	  if (p_port)
+	    lid = p_port->base_lid;
+	}
+      if (lid == 0)
+	{
+	  cout << "-E- failed to find LID for switch:" << p_node->name << endl;
+	} else {
+	  if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+            cout << "-V- routing to LID:" << lid << " of switch:"
+                 << p_node->name << endl;
+	  assignLftDownWards(p_ftNode, lid, 0, 0, 0);
+	}
+    }
+
+  return(0);
 }

 // dumps out the HCA order into a file ftree.hca
 void FatTree::dumpHcaOrder()
 {
-   ofstream f("ftree.hcas");
-   for (unsigned int i = 0; i < LidByIdx.size(); i++)
-   {
+  ofstream f("ftree.hcas");
+  for (unsigned int i = 0; i < LidByIdx.size(); i++)
+    {
       // find the HCA node by the base lid given
       unsigned int lid = LidByIdx[i];
       if (lid <= 0)
-      {
-         f << "DUMMY_HOST LID" << endl;
-      }
+	{
+	  f << "DUMMY_HOST LID" << endl;
+	}
       else
-      {
-         IBPort *p_port = p_fabric->PortByLid[lid];
-
-         if (! p_port)
-         {
-            cout << "-E- fail to find port for lid:" << lid << endl;
-            f << "ERROR_HOST LID" << endl;
-         }
-         else
-         {
-            f << p_port->p_node->name << "/" << p_port->num << " " << lid << endl;
-         }
-      }
-   }
-   f.close();
+	{
+	  IBPort *p_port = p_fabric->PortByLid[lid];
+
+	  if (! p_port)
+	    {
+	      cout << "-E- fail to find port for lid:" << lid << endl;
+	      f << "ERROR_HOST LID" << endl;
+	    }
+	  else
+	    {
+	      f << p_port->p_node->name << "/" << p_port->num << " " << lid << endl;
+	    }
+	}
+    }
+  f.close();
 }

 void FatTree::dump()
 {
-   unsigned int level, prevLevel = 2;
-   cout << "---------------------------------- FAT TREE DUMP -----------------------------" << endl;
-   for (map_tupple_ftnode::const_iterator tI = NodeByTupple.begin();
-        tI != NodeByTupple.end();
-        tI++)
-   {
+  unsigned int level, prevLevel = 2;
+  cout << "---------------------------------- FAT TREE DUMP -----------------------------" << endl;
+  for (map_tupple_ftnode::const_iterator tI = NodeByTupple.begin();
+       tI != NodeByTupple.end();
+       tI++)
+    {
       level = (*tI).first[0];
       if (level != prevLevel)
-      {
-         prevLevel = level;
-         cout << "LEVEL:" << level << endl;
-      }
+	{
+	  prevLevel = level;
+	  cout << "LEVEL:" << level << endl;
+	}

       FatTreeNode const *p_ftNode = &((*tI).second);
       cout << "    " << p_ftNode->p_node->name << " tupple:" << getTuppleStr((*tI).first) << endl;
       for (unsigned int i = 0; i < p_ftNode->parentPorts.size(); i++)
-      {
-         if (p_ftNode->parentPorts[i].size())
-         {
-            cout << "       Parents:" << i << endl;
-            for (list< int >::const_iterator lI = p_ftNode->parentPorts[i].begin();
-                 lI != p_ftNode->parentPorts[i].end();
-                 lI++)
-            {
-               unsigned int portNum = *lI;
-               cout << "          p:" << portNum << " ";
-               IBPort *p_port = p_ftNode->p_node->getPort(portNum);
-               if (!p_port || !p_port->p_remotePort)
-                  cout << " ERROR " << endl;
-               else
-                  cout << p_port->p_remotePort->p_node->name << endl;
-            }
-         }
-      }
+	{
+	  if (p_ftNode->parentPorts[i].size())
+	    {
+	      cout << "       Parents:" << i << endl;
+	      for (list< int >::const_iterator lI = p_ftNode->parentPorts[i].begin();
+		   lI != p_ftNode->parentPorts[i].end();
+		   lI++)
+		{
+		  unsigned int portNum = *lI;
+		  cout << "          p:" << portNum << " ";
+		  IBPort *p_port = p_ftNode->p_node->getPort(portNum);
+		  if (!p_port || !p_port->p_remotePort)
+		    cout << " ERROR " << endl;
+		  else
+		    cout << p_port->p_remotePort->p_node->name << endl;
+		}
+	    }
+	}

       for (unsigned int i = 0; i < p_ftNode->childPorts.size(); i++)
-      {
-         if (p_ftNode->childPorts[i].size())
-         {
-            cout << "       Children:" << i << endl;
-            for (list< int >::const_iterator lI = p_ftNode->childPorts[i].begin();
-                 lI != p_ftNode->childPorts[i].end();
-                 lI++)
-            {
-               unsigned int portNum = *lI;
-               cout << "         p:" << portNum << " ";
-               IBPort *p_port = p_ftNode->p_node->getPort(portNum);
-               if (!p_port || !p_port->p_remotePort)
-                  cout << "ERROR " << endl;
-               else
-                  cout << p_port->p_remotePort->p_node->name << endl;
-            }
-         }
-      }
-   }
+	{
+	  if (p_ftNode->childPorts[i].size())
+	    {
+	      cout << "       Children:" << i << endl;
+	      for (list< int >::const_iterator lI = p_ftNode->childPorts[i].begin();
+		   lI != p_ftNode->childPorts[i].end();
+		   lI++)
+		{
+		  unsigned int portNum = *lI;
+		  cout << "         p:" << portNum << " ";
+		  IBPort *p_port = p_ftNode->p_node->getPort(portNum);
+		  if (!p_port || !p_port->p_remotePort)
+		    cout << "ERROR " << endl;
+		  else
+		    cout << p_port->p_remotePort->p_node->name << endl;
+		}
+	    }
+	}
+    }

-   // now dump the HCA by index:
-   cout << "\nLID BY INDEX" << endl;
-   for (unsigned int i = 0; i < LidByIdx.size(); i++) {
-      int lid = LidByIdx[i];
-      IBPort *p_port;
+  // now dump the HCA by index:
+  cout << "\nLID BY INDEX" << endl;
+  for (unsigned int i = 0; i < LidByIdx.size(); i++) {
+    int lid = LidByIdx[i];
+    IBPort *p_port;

-      if (lid != 0)
+    if (lid != 0)
       {
-         p_port = p_fabric->PortByLid[lid];
-         if (p_port)
-         {
+	p_port = p_fabric->PortByLid[lid];
+	if (p_port)
+	  {
             cout << "   " << i << " -> " << LidByIdx[i]
                  << " " << p_port->getName() << endl;
-         }
-         else
-         {
+	  }
+	else
+	  {
             cout << "   ERROR : no port for lid:" << lid << endl;
-         }
+	  }
       }
-   }
+  }
 }

 // perform the whole thing
 int FatTreeAnalysis(IBFabric *p_fabric)
 {
-   FatTree ftree(p_fabric);
-   if (!ftree.isValid) return(1);
-   ftree.dumpHcaOrder();
-   if (ftree.route()) return(1);
-   return(0);
+  FatTree ftree(p_fabric);
+  if (!ftree.isValid) return(1);
+  ftree.dumpHcaOrder();
+  if (ftree.route()) return(1);
+  return(0);
+}
+
+int FatTreeRouteByPermutation( IBFabric *p_fabric, char *srcs, char* dsts )
+{
+  vector <string> sources;
+  vector <string> destinations;
+
+  char *s1, *s2, *cp;
+  char *saveptr;
+  s1 = strdup(srcs);
+  s2 = strdup(dsts);
+  cp = strtok_r(s1, " \t", &saveptr);
+  do {
+    sources.push_back(cp);
+    cp = strtok_r(NULL, " \t", &saveptr);
+  }
+  while (cp);
+
+  cp = strtok_r(s2, " \t", &saveptr);
+  do {
+    destinations.push_back(cp);
+    cp = strtok_r(NULL, " \t", &saveptr);
+  }
+  while (cp);
+
+  if (sources.size() != destinations.size()) {
+    cout << "-E- Different number of sources and destinations" << endl;
+    return 1;
+  }
+
+  FatTree ftree(p_fabric);
+  if (!ftree.isValid) return(1);
+  //ftree.dumpHcaOrder();
+  if (ftree.permRoute(sources, destinations)) return(1);
+  return(0);
 }
diff --git a/ibdm/datamodel/Makefile.am b/ibdm/datamodel/Makefile.am
index 0003bba..b0958fc 100644
--- a/ibdm/datamodel/Makefile.am
+++ b/ibdm/datamodel/Makefile.am
@@ -33,7 +33,7 @@
 #--

 # we would like to export these headers during install
-pkginclude_HEADERS = git_version.h Fabric.h \
+pkginclude_HEADERS = git_version.h Fabric.h RouteSys.h Bipartite.h \
 		 SubnMgt.h TraceRoute.h CredLoops.h Regexp.h \
 		 TopoMatch.h SysDef.h Congestion.h ibnl_parser.h ibdm.i

@@ -53,7 +53,7 @@ AM_YFLAGS = -d
 # libibdm - the TCL shared lib to be used as a package
 # ibdmsh - the TCL shell

-common_SOURCES = Fabric.cpp \
+common_SOURCES = Fabric.cpp RouteSys.cc Bipartite.cc \
 	SubnMgt.cpp TraceRoute.cpp CredLoops.cpp TopoMatch.cpp SysDef.cpp \
 	LinkCover.cpp Congestion.cpp ibnl_parser.cc ibnl_scanner.cc FatTree.cpp

diff --git a/ibdm/datamodel/RouteSys.cc b/ibdm/datamodel/RouteSys.cc
new file mode 100644
index 0000000..6c33224
--- /dev/null
+++ b/ibdm/datamodel/RouteSys.cc
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2008 Mellanox Technologies LTD. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#include "RouteSys.h"
+#include "Bipartite.h"
+
+// Helper power function
+
+int RouteSys::myPow(int base, int pow) {
+  int res = 1;
+  for (int i=0; i<pow; i++)
+    res = res*base;
+
+  return res;
+}
+
+/////////////////////////////////////////////////////////////////////////////
+
+// C'tor
+
+RouteSys::RouteSys(int rad, int hgth, int s):radix(rad),height(hgth),step(s) {
+  // Calculate num in/out ports
+  ports = myPow(rad,height);
+  // Create and init ports
+  inPorts = new inputData[ports];
+  outPorts = new bool[ports];
+
+  for (int i=0; i<ports; i++) {
+    inPorts[i].used = false;
+    outPorts[i] = false;
+  }
+  // Create sub-systems
+  if (height > 1) {
+    subSys = new RouteSys* [rad];
+    for (int i=0; i<radix; i++)
+      subSys[i] = new RouteSys(rad,height-1,s+1);
+  }
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+// D'tor
+
+RouteSys::~RouteSys() {
+  // Just kill everything
+  delete[] inPorts;
+  delete[] outPorts;
+
+  if (height > 1) {
+    for (int i=0; i<radix; i++)
+      delete subSys[i];
+    delete[] subSys;
+  }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+// Add requests to the system
+
+int RouteSys::pushRequests(vec_int req)
+{
+  if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+    cout << "-V- Pushing requests" << endl;
+
+  for (int i=0; i<req.size(); i++) {
+    // Extract comm pair
+    int src = i;
+    int dst = req[i];
+
+    if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+      cout << "-V- Req: " << src << "->" << dst << endl;
+
+    // Check port existence
+    if ((src >= ports) || (dst >= ports)) {
+      cout << "-E- Port index exceeds num ports! Ports: " << ports << ", src: " << src << ", dst: " << dst << endl;
+      return 1;
+    }
+    // Check port availability
+    if (inPorts[src].used || outPorts[dst]) {
+      cout << "-E- Port already used! src: " << src << ", dst: " << dst << endl;
+      return 1;
+    }
+    // Mark ports as used
+    inPorts[src].used = true;
+    inPorts[src].src = src;
+    inPorts[src].dst = dst;
+    inPorts[src].inputNum = src;
+    inPorts[src].outNum = dst;
+
+    outPorts[dst] = true;
+  }
+  return 0;
+}
+
+/////////////////////////////////////////////////////////////////////////////
+
+// Perform routing after requests were pushed
+
+int RouteSys::doRouting (vec_vec_int& out)
+{
+  // Atomic system nothing to do
+  if (ports == radix)
+    return 0;
+
+  if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+    cout << "-V- Starting routing, step: " << step << ", height " << height << endl;
+
+  // Init the output structure
+  if (out.size() < ports) {
+    out.resize(ports);
+    for (int i=0; i<ports; i++) {
+      out[i].resize(height-1);
+      for (int j=0; j<height-1; j++)
+	out[i][j] = -1;
+    }
+  }
+
+  // We use three graph arrays for coloring
+  Bipartite** buff[3];
+  buff[0] = new Bipartite*[radix];
+  buff[1] = new Bipartite*[radix];
+  buff[2] = new Bipartite*[radix];
+
+  for (int i=0; i<radix; i++)
+    buff[0][i] = buff[1][i] = buff[2][i] = NULL;
+
+  // Number of colors derived through perfect matching
+  int matchGraphs = 0;
+  int currRadix = radix;
+  int idx = 0;
+  int idx_new = 1;
+
+  // Create the first graph
+  if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+    cout << "-V- Ports: " << ports << ", radix: " << radix << endl;
+  buff[0][0] = new Bipartite(ports/radix,radix);
+
+  // Add connections
+  for (int i=0; i<ports; i++)
+    buff[0][0]->connectNodes(i/radix,inPorts[i].outNum/radix,inPorts[i]);
+
+  // Now decompose the graph to radix-1 graphs
+  while (1 < currRadix) {
+    for (int i=0; buff[idx][i] && i<radix; i++) {
+      // Odd radix, perfect matching is required
+      if (currRadix % 2) {
+	if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+	  cout << "-V- Perfect matching is required" << endl;
+	buff[2][matchGraphs] = buff[idx][i]->maximumMatch();
+	matchGraphs++;
+      }
+      // Now we can perform Euler decomposition
+      if (FabricUtilsVerboseLevel & FABU_LOG_VERBOSE)
+	cout << "-V- Performing Euler decompostion" << endl;
+      if (2*i+1 >= radix) {
+	cout << "-E- Graph index illegal"<< endl;
+	return 1;
+      }
+      buff[idx][i]->decompose(&buff[idx_new][2*i],&buff[idx_new][2*i+1]);
+      delete buff[idx][i];
+      buff[idx][i] = NULL;
+    }
+    idx = idx_new;
+    idx_new = (idx_new+1)%2;
+    currRadix = currRadix / 2;
+  }
+  // Collect all result graphs to array buff[2][i]
+  for (int i=matchGraphs; i<radix; i++)
+    buff[2][i] = buff[idx][i-matchGraphs];
+
+  // Apply decomposition results
+  for (int i=0; i < radix; i++) {
+    Bipartite* G = buff[2][i];
+    if (!G->setIterFirst()) {
+      cout << "-E- Empty graph found!" << endl;
+      return 1;
+    }
+    bool stop = false;
+    while (!stop) {
+      inputData d = G->getReqDat();
+      // Build output
+      if (out.size() <= d.src || out[d.src].size() <= step) {
+	cout << "Output index illegal" << endl;
+	return 1;
+      }
+      out[d.src][step] = i;
+      // Add request to sub-system
+      RouteSys* sub = subSys[i];
+      int inPort = d.inputNum/radix;
+      int outPort = d.outNum/radix;
+      if (sub->inPorts[inPort].used || sub->outPorts[outPort]) {
+	cout << "Port already used! inPort: " << inPort << ", outPort: " << outPort << endl;
+	return 1;
+      }
+      // Mark ports as used
+      sub->inPorts[inPort].used = true;
+      sub->inPorts[inPort].src = d.src;
+      sub->inPorts[inPort].dst = d.dst;
+      sub->inPorts[inPort].inputNum = inPort;
+      sub->inPorts[inPort].outNum = outPort;
+
+      outPorts[outPort] = true;
+      // Next request
+      if (!G->setIterNext()) {
+	stop = true;
+      }
+    }
+  }
+
+  // Free memory
+  for (int i=0; i<radix; i++)
+    delete buff[2][i];
+
+  delete[] buff[0];
+  delete[] buff[1];
+  delete[] buff[2];
+
+  // Run routing on sub-systems
+  for (int i=0; i<radix; i++) {
+    if (subSys[i]->doRouting(out)) {
+      cout << "-E- Subsystem routing failed!" << endl;
+      return 1;
+    }
+  }
+
+  return 0;
+}
diff --git a/ibdm/datamodel/RouteSys.h b/ibdm/datamodel/RouteSys.h
new file mode 100644
index 0000000..a5f5cfc
--- /dev/null
+++ b/ibdm/datamodel/RouteSys.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2008 Mellanox Technologies LTD. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+/*
+ * Fabric Utilities Project
+ *
+ * Permutation Routing System abstraction Header file
+ *
+ * Author: Vladimir Zdornov, Mellanox Technologies
+ *
+ */
+
+#ifndef IBDM_ROUTE_SYS_H_
+#define IBDM_ROUTE_SYS_H_
+
+#include <unistd.h>
+#include "Fabric.h"
+
+using namespace std;
+
+class inputData
+{
+ public:
+  bool used;
+  int src;
+  int dst;
+  int inputNum;
+  int outNum;
+
+  inputData():used(false){}
+};
+
+// Routing system abstraction class
+class RouteSys {
+  // Basic parameters
+  int radix;
+  int height;
+  int step;
+  int ports;
+  // Ports data
+  inputData* inPorts;
+  bool* outPorts;
+  // Sub-systems
+  RouteSys** subSys;
+
+  int myPow(int base, int pow);
+
+ public:
+  // Constructor
+  RouteSys(int rad, int hgth, int s=0);
+  // Add communication requests to the system
+  // Format: i -> req[i]
+  // Restriction: Requests must form a complete permutation
+  int pushRequests(vec_int req);
+  // D'tor
+  ~RouteSys();
+  // Get input data for input port i
+  inputData& getInput(int i);
+  // Is output port i busy already?
+  bool& getOutput(int i);
+
+  // Invoke the system level routing
+  int doRouting(vec_vec_int& out);
+};
+
+
+#endif
diff --git a/ibdm/datamodel/SubnMgt.h b/ibdm/datamodel/SubnMgt.h
index 8e68818..99e763f 100644
--- a/ibdm/datamodel/SubnMgt.h
+++ b/ibdm/datamodel/SubnMgt.h
@@ -134,5 +134,9 @@ LinkCoverageAnalysis(IBFabric *p_fabric, list_pnode rootNodes);
 int
 FatTreeAnalysis(IBFabric *p_fabric);

+// Perform FatTree optimal permutation routing
+int
+FatTreeRouteByPermutation(IBFabric* p_fabric, char* srcs, char* dsts);
+
 #endif /* IBDM_SUBN_MGT_H */

diff --git a/ibdm/datamodel/ibdm.i b/ibdm/datamodel/ibdm.i
index b4541ad..7f3e0dc 100644
--- a/ibdm/datamodel/ibdm.i
+++ b/ibdm/datamodel/ibdm.i
@@ -1263,6 +1263,9 @@ int ibdmFatTreeRoute(IBFabric *p_fabric, list_pnode rootNodes);
 %name(ibdmFatTreeAnalysis) int FatTreeAnalysis(IBFabric *p_fabric);
 // Performs FatTree structural analysis

+%name(ibdmFatTreeRouteByPermutation) int FatTreeRouteByPermutation(IBFabric *p_fabric, char* srcs, char* dsts);
+// Performs optimal permutation routing in FatTree
+
 %name(ibdmVerifyCAtoCARoutes)
  int SubnMgtVerifyAllCaToCaRoutes(IBFabric *p_fabric);
 // Verify point to point connectivity
-- 
1.5.1.4




More information about the general mailing list