[ofa-general][PATCH] mlx4_core: Multi Protocol support

Yevgeny Petrilin yevgenyp at mellanox.co.il
Wed Apr 16 00:59:02 PDT 2008


Multi Protocol supplies the user with the ability to run
Infiniband and Ethernet protocols on the same HCA
(separately or at the same time).

Main changes to mlx4_core:
         1.  Mlx4 device now holds the actual protocol for each port.
             The port types are determined through module parameters of through sysfs
             interface. The requested types are verified with firmware capabilities
             in order to determine the actual port protocol.
         2.  The driver now manages Mac and Vlan tables used by customers of the low
             level driver. Corresponding commands were added.
         3.  Completion eq's are created per cpu. Created cq's are attached to an eq by
             "Round Robin" algorithm, unless a specific eq was requested.
         4.  Creation of a collapsed cq support was added.
         5.  Additional reserved qp ranges were added. There is a range for the customers
             of the low level driver (IB, Ethernet, FCoE).
         6.  Qp allocation process changed.
             First a qp range should be reserved, then qps can be allocated from that
             range. This is to support the ability to allocate consecutive qps.
             Appropriate changes were made in the allocation mechanism.
         7.  Common actions to all HW resource management (Doorbell allocation,
             Buffer allocation, Mtt write) were moved to the low level driver.

Signed-off-by: Yevgeny Petrilin <yevgenyp at mellanox.co.il>
Signed-off-by: Oren Duer <oren at mellanox.co.il>
Reviewed-by: Eli Cohen <eli at mellanox.co.il>
---
  drivers/net/mlx4/Makefile   |    2 +-
  drivers/net/mlx4/alloc.c    |  258 ++++++++++++++++++++++++++++++++++-
  drivers/net/mlx4/cq.c       |   26 +++-
  drivers/net/mlx4/eq.c       |   41 ++++--
  drivers/net/mlx4/fw.c       |   18 ++-
  drivers/net/mlx4/fw.h       |    7 +-
  drivers/net/mlx4/main.c     |  315 +++++++++++++++++++++++++++++++++++++++++--
  drivers/net/mlx4/mlx4.h     |   50 +++++++-
  drivers/net/mlx4/mr.c       |  157 ++++++++++++++++++++--
  drivers/net/mlx4/port.c     |  282 ++++++++++++++++++++++++++++++++++++++
  drivers/net/mlx4/qp.c       |  133 ++++++++++++++++---
  include/linux/mlx4/cmd.h    |    9 ++
  include/linux/mlx4/device.h |  118 ++++++++++++++++-
  include/linux/mlx4/qp.h     |   19 +++-
  14 files changed, 1354 insertions(+), 81 deletions(-)
  create mode 100644 drivers/net/mlx4/port.c

diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
index 0952a65..f4932d8 100644
--- a/drivers/net/mlx4/Makefile
+++ b/drivers/net/mlx4/Makefile
@@ -1,4 +1,4 @@
  obj-$(CONFIG_MLX4_CORE)		+= mlx4_core.o

  mlx4_core-y :=	alloc.o catas.o cmd.o cq.o eq.o fw.o icm.o intf.o main.o mcg.o \
-		mr.o pd.o profile.o qp.o reset.o srq.o
+		mr.o pd.o profile.o qp.o reset.o srq.o port.o
diff --git a/drivers/net/mlx4/alloc.c b/drivers/net/mlx4/alloc.c
index 75ef9d0..044614f 100644
--- a/drivers/net/mlx4/alloc.c
+++ b/drivers/net/mlx4/alloc.c
@@ -44,15 +44,19 @@ u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap)

  	spin_lock(&bitmap->lock);

-	obj = find_next_zero_bit(bitmap->table, bitmap->max, bitmap->last);
-	if (obj >= bitmap->max) {
+	obj = find_next_zero_bit(bitmap->table,
+				 bitmap->effective_max,
+				 bitmap->last);
+	if (obj >= bitmap->effective_max) {
  		bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask;
-		obj = find_first_zero_bit(bitmap->table, bitmap->max);
+		obj = find_first_zero_bit(bitmap->table, bitmap->effective_max);
  	}

-	if (obj < bitmap->max) {
+	if (obj < bitmap->effective_max) {
  		set_bit(obj, bitmap->table);
-		bitmap->last = (obj + 1) & (bitmap->max - 1);
+		bitmap->last = (obj + 1);
+		if (bitmap->last == bitmap->effective_max)
+			bitmap->last = 0;
  		obj |= bitmap->top;
  	} else
  		obj = -1;
@@ -73,7 +77,84 @@ void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj)
  	spin_unlock(&bitmap->lock);
  }

-int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, u32 reserved)
+static unsigned long find_next_zero_string_aligned(unsigned long *bitmap,
+						   u32 start, u32 nbits,
+						   int len, int align)
+{
+	unsigned long end, i;
+
+again:
+	start = ALIGN(start, align);
+	while ((start < nbits) && test_bit(start, bitmap))
+		start += align;
+	if (start >= nbits)
+		return -1;
+
+	end = start+len;
+	if (end > nbits)
+		return -1;
+	for (i = start+1; i < end; i++) {
+		if (test_bit(i, bitmap)) {
+			start = i+1;
+			goto again;
+		}
+	}
+	return start;
+}
+
+u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, int align)
+{
+	u32 obj, i;
+
+	if (likely(cnt == 1 && align == 1))
+		return mlx4_bitmap_alloc(bitmap);
+
+	spin_lock(&bitmap->lock);
+
+	obj = find_next_zero_string_aligned(bitmap->table, bitmap->last,
+					    bitmap->effective_max, cnt, align);
+	if (obj >= bitmap->effective_max) {
+		bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask;
+		obj = find_next_zero_string_aligned(bitmap->table, 0,
+						    bitmap->effective_max,
+						    cnt, align);
+	}
+
+	if (obj < bitmap->effective_max) {
+		for (i = 0; i < cnt; i++)
+			set_bit(obj+i, bitmap->table);
+		if (obj == bitmap->last) {
+			bitmap->last = (obj + cnt);
+			if (bitmap->last >= bitmap->effective_max)
+				bitmap->last = 0;
+		}
+		obj |= bitmap->top;
+	} else
+		obj = -1;
+
+	spin_unlock(&bitmap->lock);
+
+
+	return obj;
+}
+
+void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt)
+{
+	u32	i;
+
+	obj &= bitmap->max - 1;
+
+	spin_lock(&bitmap->lock);
+	for (i = 0; i < cnt; i++)
+		clear_bit(obj+i, bitmap->table);
+	bitmap->last = min(bitmap->last, obj);
+	bitmap->top = (bitmap->top + bitmap->max) & bitmap->mask;
+	spin_unlock(&bitmap->lock);
+}
+
+int mlx4_bitmap_init_with_effective_max(struct mlx4_bitmap *bitmap,
+					u32 num, u32 mask, u32 reserved,
+					u32 effective_max)
  {
  	int i;

@@ -85,6 +166,7 @@ int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, u32 reserved
  	bitmap->top  = 0;
  	bitmap->max  = num;
  	bitmap->mask = mask;
+	bitmap->effective_max = effective_max;
  	spin_lock_init(&bitmap->lock);
  	bitmap->table = kzalloc(BITS_TO_LONGS(num) * sizeof (long), GFP_KERNEL);
  	if (!bitmap->table)
@@ -96,6 +178,13 @@ int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, u32 reserved
  	return 0;
  }

+int mlx4_bitmap_init(struct mlx4_bitmap *bitmap,
+		     u32 num, u32 mask, u32 reserved)
+{
+	return mlx4_bitmap_init_with_effective_max(bitmap, num, mask,
+						   reserved, num);
+}
+
  void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap)
  {
  	kfree(bitmap->table);
@@ -196,3 +285,160 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf)
  	}
  }
  EXPORT_SYMBOL_GPL(mlx4_buf_free);
+
+
+static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device)
+{
+	struct mlx4_db_pgdir *pgdir;
+
+	pgdir = kzalloc(sizeof *pgdir, GFP_KERNEL);
+	if (!pgdir)
+		return NULL;
+
+	bitmap_fill(pgdir->order1, MLX4_DB_PER_PAGE / 2);
+	pgdir->bits[0] = pgdir->order0;
+	pgdir->bits[1] = pgdir->order1;
+	pgdir->db_page = dma_alloc_coherent(dma_device, PAGE_SIZE,
+					    &pgdir->db_dma, GFP_KERNEL);
+	if (!pgdir->db_page) {
+		kfree(pgdir);
+		return NULL;
+	}
+
+	return pgdir;
+}
+
+static int mlx4_alloc_db_from_pgdir(struct mlx4_db_pgdir *pgdir,
+				    struct mlx4_db *db, int order)
+{
+	int o;
+	int i;
+
+	for (o = order; o <= 1; ++o) {
+		i = find_first_bit(pgdir->bits[o], MLX4_DB_PER_PAGE >> o);
+		if (i < MLX4_DB_PER_PAGE >> o)
+			goto found;
+	}
+
+	return -ENOMEM;
+
+found:
+	clear_bit(i, pgdir->bits[o]);
+
+	i <<= o;
+
+	if (o > order)
+		set_bit(i ^ 1, pgdir->bits[order]);
+
+	db->pgdir = pgdir;
+	db->index   = i;
+	db->db      = pgdir->db_page + db->index;
+	db->dma     = pgdir->db_dma  + db->index * 4;
+	db->order   = order;
+
+	return 0;
+}
+
+static int mlx4_db_alloc(struct mlx4_dev *dev, struct device *dma_device,
+			 struct mlx4_db *db, int order)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	struct mlx4_db_pgdir *pgdir;
+	int ret = 0;
+
+	mutex_lock(&priv->pgdir_mutex);
+
+	list_for_each_entry(pgdir, &priv->pgdir_list, list)
+		if (!mlx4_alloc_db_from_pgdir(pgdir, db, order))
+			goto out;
+
+	pgdir = mlx4_alloc_db_pgdir(dma_device);
+	if (!pgdir) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	list_add(&pgdir->list, &priv->pgdir_list);
+
+	/* This should never fail -- we just allocated an empty page: */
+	WARN_ON(mlx4_alloc_db_from_pgdir(pgdir, db, order));
+
+out:
+	mutex_unlock(&priv->pgdir_mutex);
+
+	return ret;
+}
+
+static void mlx4_db_free(struct mlx4_dev *dev, struct device *dma_device,
+			 struct mlx4_db *db)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	int o;
+	int i;
+
+	mutex_lock(&priv->pgdir_mutex);
+
+	o = db->order;
+	i = db->index;
+
+	if (db->order == 0 && test_bit(i ^ 1, db->pgdir->order0)) {
+		clear_bit(i ^ 1, db->pgdir->order0);
+		++o;
+	}
+
+	i >>= o;
+	set_bit(i, db->pgdir->bits[o]);
+
+	if (bitmap_full(db->pgdir->order1, MLX4_DB_PER_PAGE / 2)) {
+		dma_free_coherent(dma_device, PAGE_SIZE,
+				  db->pgdir->db_page, db->pgdir->db_dma);
+		list_del(&db->pgdir->list);
+		kfree(db->pgdir);
+	}
+
+	mutex_unlock(&priv->pgdir_mutex);
+}
+
+int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
+		       struct device *dma_device, int size, int max_direct)
+{
+	int err;
+
+	err = mlx4_db_alloc(dev, dma_device, &wqres->db, 1);
+	if (err)
+		return err;
+	*wqres->db.db = 0;
+
+	if (mlx4_buf_alloc(dev, size, max_direct, &wqres->buf)) {
+		err = -ENOMEM;
+		goto err_db;
+	}
+
+	err = mlx4_mtt_init(dev, wqres->buf.npages, wqres->buf.page_shift,
+			    &wqres->mtt);
+	if (err)
+		goto err_buf;
+	err = mlx4_buf_write_mtt(dev, &wqres->mtt, &wqres->buf);
+	if (err)
+		goto err_mtt;
+
+	return 0;
+
+err_mtt:
+	mlx4_mtt_cleanup(dev, &wqres->mtt);
+err_buf:
+	mlx4_buf_free(dev, size, &wqres->buf);
+err_db:
+	mlx4_db_free(dev, dma_device, &wqres->db);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_alloc_hwq_res);
+
+void mlx4_free_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
+		      struct device *dma_device, int size)
+{
+	mlx4_mtt_cleanup(dev, &wqres->mtt);
+	mlx4_buf_free(dev, size, &wqres->buf);
+	mlx4_db_free(dev, dma_device, &wqres->db);
+}
+EXPORT_SYMBOL_GPL(mlx4_free_hwq_res);
diff --git a/drivers/net/mlx4/cq.c b/drivers/net/mlx4/cq.c
index caa5bcf..e905e61 100644
--- a/drivers/net/mlx4/cq.c
+++ b/drivers/net/mlx4/cq.c
@@ -188,7 +188,8 @@ int mlx4_cq_resize(struct mlx4_dev *dev, struct mlx4_cq *cq,
  EXPORT_SYMBOL_GPL(mlx4_cq_resize);

  int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
-		  struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq)
+		  struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq,
+		  unsigned vector, int collapsed)
  {
  	struct mlx4_priv *priv = mlx4_priv(dev);
  	struct mlx4_cq_table *cq_table = &priv->cq_table;
@@ -197,6 +198,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
  	u64 mtt_addr;
  	int err;

+#define COLLAPSED_SHIFT	18
+#define ENTRIES_SHIFT	24
+
  	cq->cqn = mlx4_bitmap_alloc(&cq_table->bitmap);
  	if (cq->cqn == -1)
  		return -ENOMEM;
@@ -224,8 +228,22 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
  	cq_context = mailbox->buf;
  	memset(cq_context, 0, sizeof *cq_context);

-	cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index);
-	cq_context->comp_eqn        = priv->eq_table.eq[MLX4_EQ_COMP].eqn;
+	cq_context->flags = cpu_to_be32(!!collapsed << COLLAPSED_SHIFT);
+	cq_context->logsize_usrpage = cpu_to_be32(
+		(ilog2(nent) << ENTRIES_SHIFT) | uar->index);
+	if(vector > priv->eq_table.num_comp_eqs) {
+		err = -EINVAL;
+		goto err_radix;
+	}
+
+	if (vector == 0) {
+		vector = priv->eq_table.last_comp_eq %
+			priv->eq_table.num_comp_eqs + 1;
+		priv->eq_table.last_comp_eq = vector;
+	}
+	cq->comp_eq_idx             = MLX4_EQ_COMP_CPU0 + vector - 1;
+	cq_context->comp_eqn        = priv->eq_table.eq[MLX4_EQ_COMP_CPU0 +
+							vector - 1].eqn;
  	cq_context->log_page_size   = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;

  	mtt_addr = mlx4_mtt_addr(dev, mtt);
@@ -274,7 +292,7 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq)
  	if (err)
  		mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn);

-	synchronize_irq(priv->eq_table.eq[MLX4_EQ_COMP].irq);
+	synchronize_irq(priv->eq_table.eq[cq->comp_eq_idx].irq);

  	spin_lock_irq(&cq_table->lock);
  	radix_tree_delete(&cq_table->tree, cq->cqn);
diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c
index e141a15..67af1b1 100644
--- a/drivers/net/mlx4/eq.c
+++ b/drivers/net/mlx4/eq.c
@@ -265,7 +265,7 @@ static irqreturn_t mlx4_interrupt(int irq, void *dev_ptr)

  	writel(priv->eq_table.clr_mask, priv->eq_table.clr_int);

-	for (i = 0; i < MLX4_NUM_EQ; ++i)
+	for (i = 0; i < MLX4_EQ_COMP_CPU0 + priv->eq_table.num_comp_eqs; ++i)
  		work |= mlx4_eq_int(dev, &priv->eq_table.eq[i]);

  	return IRQ_RETVAL(work);
@@ -482,7 +482,7 @@ static void mlx4_free_irqs(struct mlx4_dev *dev)

  	if (eq_table->have_irq)
  		free_irq(dev->pdev->irq, dev);
-	for (i = 0; i < MLX4_NUM_EQ; ++i)
+	for (i = 0; i < MLX4_EQ_COMP_CPU0 + eq_table->num_comp_eqs; ++i)
  		if (eq_table->eq[i].have_irq)
  			free_irq(eq_table->eq[i].irq, eq_table->eq + i);
  }
@@ -555,6 +555,7 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
  	struct mlx4_priv *priv = mlx4_priv(dev);
  	int err;
  	int i;
+	int req_eqs;

  	err = mlx4_bitmap_init(&priv->eq_table.bitmap, dev->caps.num_eqs,
  			       dev->caps.num_eqs - 1, dev->caps.reserved_eqs);
@@ -573,11 +574,21 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
  	priv->eq_table.clr_int  = priv->clr_base +
  		(priv->eq_table.inta_pin < 32 ? 4 : 0);

-	err = mlx4_create_eq(dev, dev->caps.num_cqs + MLX4_NUM_SPARE_EQE,
-			     (dev->flags & MLX4_FLAG_MSI_X) ? MLX4_EQ_COMP : 0,
-			     &priv->eq_table.eq[MLX4_EQ_COMP]);
-	if (err)
-		goto err_out_unmap;
+	priv->eq_table.num_comp_eqs = 0;
+	req_eqs = (dev->flags & MLX4_FLAG_MSI_X) ? num_online_cpus() : 1;
+	while (req_eqs) {
+		err = mlx4_create_eq(
+			dev, dev->caps.num_cqs + MLX4_NUM_SPARE_EQE,
+			(dev->flags & MLX4_FLAG_MSI_X) ?
+			(MLX4_EQ_COMP_CPU0 + priv->eq_table.num_comp_eqs) : 0,
+			&priv->eq_table.eq[MLX4_EQ_COMP_CPU0 +
+			priv->eq_table.num_comp_eqs]);
+		if (err)
+			goto err_out_comp;
+		priv->eq_table.num_comp_eqs++;
+		req_eqs--;
+	}
+	priv->eq_table.last_comp_eq = 0;

  	err = mlx4_create_eq(dev, MLX4_NUM_ASYNC_EQE + MLX4_NUM_SPARE_EQE,
  			     (dev->flags & MLX4_FLAG_MSI_X) ? MLX4_EQ_ASYNC : 0,
@@ -587,11 +598,11 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)

  	if (dev->flags & MLX4_FLAG_MSI_X) {
  		static const char *eq_name[] = {
-			[MLX4_EQ_COMP]  = DRV_NAME " (comp)",
-			[MLX4_EQ_ASYNC] = DRV_NAME " (async)"
+			[MLX4_EQ_ASYNC] = DRV_NAME "(async)",
+			[MLX4_EQ_COMP_CPU0...MLX4_NUM_EQ] = "eth" DRV_NAME,
  		};
-
-		for (i = 0; i < MLX4_NUM_EQ; ++i) {
+		for (i = 0;
+		     i < MLX4_EQ_COMP_CPU0 + priv->eq_table.num_comp_eqs; ++i) {
  			err = request_irq(priv->eq_table.eq[i].irq,
  					  mlx4_msi_x_interrupt,
  					  0, eq_name[i], priv->eq_table.eq + i);
@@ -616,7 +627,7 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
  		mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n",
  			   priv->eq_table.eq[MLX4_EQ_ASYNC].eqn, err);

-	for (i = 0; i < MLX4_NUM_EQ; ++i)
+	for (i = 0; i < MLX4_EQ_COMP_CPU0 + priv->eq_table.num_comp_eqs; ++i)
  		eq_set_ci(&priv->eq_table.eq[i], 1);

  	return 0;
@@ -625,9 +636,9 @@ err_out_async:
  	mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_ASYNC]);

  err_out_comp:
-	mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_COMP]);
+	for (i = 0; i < priv->eq_table.num_comp_eqs; ++i)
+		mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_COMP_CPU0 + i]);

-err_out_unmap:
  	mlx4_unmap_clr_int(dev);
  	mlx4_free_irqs(dev);

@@ -646,7 +657,7 @@ void mlx4_cleanup_eq_table(struct mlx4_dev *dev)

  	mlx4_free_irqs(dev);

-	for (i = 0; i < MLX4_NUM_EQ; ++i)
+	for (i = 0; i < MLX4_EQ_COMP_CPU0 + priv->eq_table.num_comp_eqs; ++i)
  		mlx4_free_eq(dev, &priv->eq_table.eq[i]);

  	mlx4_unmap_clr_int(dev);
diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index d82f275..fe0f6b3 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -314,7 +314,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
  			MLX4_GET(field, outbox, QUERY_DEV_CAP_VL_PORT_OFFSET);
  			dev_cap->max_vl[i]	   = field >> 4;
  			MLX4_GET(field, outbox, QUERY_DEV_CAP_MTU_WIDTH_OFFSET);
-			dev_cap->max_mtu[i]	   = field >> 4;
+			dev_cap->ib_mtu[i]	   = field >> 4;
  			dev_cap->max_port_width[i] = field & 0xf;
  			MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_GID_OFFSET);
  			dev_cap->max_gids[i]	   = 1 << (field & 0xf);
@@ -322,9 +322,11 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
  			dev_cap->max_pkeys[i]	   = 1 << (field & 0xf);
  		}
  	} else {
+#define QUERY_PORT_SUPPORTED_TYPE_OFFSET	0x00
  #define QUERY_PORT_MTU_OFFSET			0x01
  #define QUERY_PORT_WIDTH_OFFSET			0x06
  #define QUERY_PORT_MAX_GID_PKEY_OFFSET		0x07
+#define QUERY_PORT_MAX_MACVLAN_OFFSET		0x0a
  #define QUERY_PORT_MAX_VL_OFFSET		0x0b

  		for (i = 1; i <= dev_cap->num_ports; ++i) {
@@ -334,7 +336,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
  				goto out;

  			MLX4_GET(field, outbox, QUERY_PORT_MTU_OFFSET);
-			dev_cap->max_mtu[i]	   = field & 0xf;
+			dev_cap->ib_mtu[i]	   = field & 0xf;
  			MLX4_GET(field, outbox, QUERY_PORT_WIDTH_OFFSET);
  			dev_cap->max_port_width[i] = field & 0xf;
  			MLX4_GET(field, outbox, QUERY_PORT_MAX_GID_PKEY_OFFSET);
@@ -342,6 +344,14 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
  			dev_cap->max_pkeys[i]	   = 1 << (field & 0xf);
  			MLX4_GET(field, outbox, QUERY_PORT_MAX_VL_OFFSET);
  			dev_cap->max_vl[i]	   = field & 0xf;
+			MLX4_GET(field, outbox,
+				 QUERY_PORT_SUPPORTED_TYPE_OFFSET);
+			dev_cap->supported_port_types[i] = field & 3;
+			MLX4_GET(field, outbox, QUERY_PORT_MAX_MACVLAN_OFFSET);
+			dev_cap->log_max_macs[i]  = field & 0xf;
+			dev_cap->log_max_vlans[i] = field >> 4;
+			dev_cap->eth_mtu[i] = be16_to_cpu(((u16 *) outbox)[1]);
+			dev_cap->def_mac[i] = be64_to_cpu(((u64 *) outbox)[2]);
  		}
  	}

@@ -379,7 +389,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
  	mlx4_dbg(dev, "Max CQEs: %d, max WQEs: %d, max SRQ WQEs: %d\n",
  		 dev_cap->max_cq_sz, dev_cap->max_qp_sz, dev_cap->max_srq_sz);
  	mlx4_dbg(dev, "Local CA ACK delay: %d, max MTU: %d, port width cap: %d\n",
-		 dev_cap->local_ca_ack_delay, 128 << dev_cap->max_mtu[1],
+		 dev_cap->local_ca_ack_delay, 128 << dev_cap->ib_mtu[1],
  		 dev_cap->max_port_width[1]);
  	mlx4_dbg(dev, "Max SQ desc size: %d, max SQ S/G: %d\n",
  		 dev_cap->max_sq_desc_sz, dev_cap->max_sq_sg);
@@ -787,7 +797,7 @@ int mlx4_INIT_PORT(struct mlx4_dev *dev, int port)
  		flags |= (dev->caps.port_width_cap[port] & 0xf) << INIT_PORT_PORT_WIDTH_SHIFT;
  		MLX4_PUT(inbox, flags,		  INIT_PORT_FLAGS_OFFSET);

-		field = 128 << dev->caps.mtu_cap[port];
+		field = 128 << dev->caps.ib_mtu_cap[port];
  		MLX4_PUT(inbox, field, INIT_PORT_MTU_OFFSET);
  		field = dev->caps.gid_table_len[port];
  		MLX4_PUT(inbox, field, INIT_PORT_MAX_GID_OFFSET);
diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h
index 306cb9b..ef964d5 100644
--- a/drivers/net/mlx4/fw.h
+++ b/drivers/net/mlx4/fw.h
@@ -61,11 +61,13 @@ struct mlx4_dev_cap {
  	int local_ca_ack_delay;
  	int num_ports;
  	u32 max_msg_sz;
-	int max_mtu[MLX4_MAX_PORTS + 1];
+	int ib_mtu[MLX4_MAX_PORTS + 1];
  	int max_port_width[MLX4_MAX_PORTS + 1];
  	int max_vl[MLX4_MAX_PORTS + 1];
  	int max_gids[MLX4_MAX_PORTS + 1];
  	int max_pkeys[MLX4_MAX_PORTS + 1];
+	u64 def_mac[MLX4_MAX_PORTS + 1];
+	int eth_mtu[MLX4_MAX_PORTS + 1];
  	u16 stat_rate_support;
  	u32 flags;
  	int reserved_uars;
@@ -97,6 +99,9 @@ struct mlx4_dev_cap {
  	u32 reserved_lkey;
  	u64 max_icm_sz;
  	int max_gso_sz;
+	u8  supported_port_types[MLX4_MAX_PORTS + 1];
+	u8  log_max_macs[MLX4_MAX_PORTS + 1];
+	u8  log_max_vlans[MLX4_MAX_PORTS + 1];
  };

  struct mlx4_adapter {
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index 49a4aca..50b5eb7 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -38,6 +38,8 @@
  #include <linux/errno.h>
  #include <linux/pci.h>
  #include <linux/dma-mapping.h>
+#include <linux/log2.h>
+#include <linux/if_ether.h>

  #include <linux/mlx4/device.h>
  #include <linux/mlx4/doorbell.h>
@@ -81,14 +83,83 @@ static struct mlx4_profile default_profile = {
  	.rdmarc_per_qp	= 1 << 4,
  	.num_cq		= 1 << 16,
  	.num_mcg	= 1 << 13,
-	.num_mpt	= 1 << 17,
+	.num_mpt	= 1 << 18,
  	.num_mtt	= 1 << 20,
  };

+static int mod_param_num_mac = 1;
+module_param_named(num_mac, mod_param_num_mac, int, 0444);
+MODULE_PARM_DESC(num_mac, "Maximum number of MACs per ETH port "
+			  "(1-127, default 1)");
+
+static int mod_param_num_vlan;
+module_param_named(num_vlan, mod_param_num_vlan, int, 0444);
+MODULE_PARM_DESC(num_vlan, "Maximum number of VLANs per ETH port "
+			   "(0-126, default 0)");
+
+static int mod_param_use_prio;
+module_param_named(use_prio, mod_param_use_prio, bool, 0444);
+MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports "
+			   "(0/1, default 0)");
+
+static int mod_param_if_eth = 1;
+module_param_named(if_eth, mod_param_if_eth, bool, 0444);
+MODULE_PARM_DESC(if_eth, "Enable ETH interface be loaded (0/1, default 1)");
+
+static int mod_param_if_fc = 1;
+module_param_named(if_fc, mod_param_if_fc, bool, 0444);
+MODULE_PARM_DESC(if_fc, "Enable FC interface be loaded (0/1, default 1)");
+
+static char *mod_param_port_type[MLX4_MAX_PORTS] =
+				{ [0 ... (MLX4_MAX_PORTS-1)] = "ib"};
+module_param_array_named(port_type, mod_param_port_type, charp, NULL, 0444);
+MODULE_PARM_DESC(port_type, "Ports L2 type (ib/eth/auto, entry per port, "
+			    "comma seperated, default ib for all)");
+
+static int mod_param_port_mtu[MLX4_MAX_PORTS] =
+				{ [0 ... (MLX4_MAX_PORTS-1)] = 9600};
+module_param_array_named(port_mtu, mod_param_port_mtu, int, NULL, 0444);
+MODULE_PARM_DESC(port_mtu, "Ports max mtu in Bytes, entry per port, "
+			   "comma seperated, default 9600 for all");
+
+static int mlx4_check_port_params(struct mlx4_dev *dev,
+				  enum mlx4_port_type *port_type)
+{
+	if (port_type[0] != port_type[1] &&
+	    !(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
+		mlx4_err(dev, "Only same port types supported "
+			      "on this HCA, aborting.\n");
+		return -EINVAL;
+	}
+	if ((port_type[0] == MLX4_PORT_TYPE_ETH) &&
+	    (port_type[1] == MLX4_PORT_TYPE_IB)) {
+		mlx4_err(dev, "eth-ib configuration is not supported.\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void mlx4_str2port_type(char **port_str,
+			       enum mlx4_port_type *port_type)
+{
+	int i;
+
+	for (i = 0; i < MLX4_MAX_PORTS; i++) {
+		if (!strcmp(port_str[i], "eth"))
+			port_type[i] = MLX4_PORT_TYPE_ETH;
+		else
+			port_type[i] = MLX4_PORT_TYPE_IB;
+	}
+}
+
  static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
  {
  	int err;
  	int i;
+	int num_eth_ports = 0;
+	enum mlx4_port_type port_type[MLX4_MAX_PORTS];
+
+	mlx4_str2port_type(mod_param_port_type, port_type);

  	err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
  	if (err) {
@@ -120,10 +191,12 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
  	dev->caps.num_ports	     = dev_cap->num_ports;
  	for (i = 1; i <= dev->caps.num_ports; ++i) {
  		dev->caps.vl_cap[i]	    = dev_cap->max_vl[i];
-		dev->caps.mtu_cap[i]	    = dev_cap->max_mtu[i];
+		dev->caps.ib_mtu_cap[i]	    = dev_cap->ib_mtu[i];
  		dev->caps.gid_table_len[i]  = dev_cap->max_gids[i];
  		dev->caps.pkey_table_len[i] = dev_cap->max_pkeys[i];
  		dev->caps.port_width_cap[i] = dev_cap->max_port_width[i];
+		dev->caps.eth_mtu_cap[i]    = dev_cap->eth_mtu[i];
+		dev->caps.def_mac[i]	    = dev_cap->def_mac[i];
  	}

  	dev->caps.num_uars	     = dev_cap->uar_size / PAGE_SIZE;
@@ -134,7 +207,6 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
  	dev->caps.max_rq_sg	     = dev_cap->max_rq_sg;
  	dev->caps.max_wqes	     = dev_cap->max_qp_sz;
  	dev->caps.max_qp_init_rdma   = dev_cap->max_requester_per_qp;
-	dev->caps.reserved_qps	     = dev_cap->reserved_qps;
  	dev->caps.max_srq_wqes	     = dev_cap->max_srq_sz;
  	dev->caps.max_srq_sge	     = dev_cap->max_rq_sg - 1;
  	dev->caps.reserved_srqs	     = dev_cap->reserved_srqs;
@@ -161,9 +233,155 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
  	dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
  	dev->caps.max_gso_sz	     = dev_cap->max_gso_sz;

+	dev->caps.log_num_macs 	= ilog2(roundup_pow_of_two
+					(mod_param_num_mac + 1));
+	dev->caps.log_num_vlans = ilog2(roundup_pow_of_two
+					(mod_param_num_vlan + 2));
+	dev->caps.log_num_prios = mod_param_use_prio ? 3: 0;
+
+	err = mlx4_check_port_params(dev, port_type);
+	if (err)
+		return err;
+
+	for (i = 1; i <= dev->caps.num_ports; ++i) {
+		if (!dev_cap->supported_port_types[i]) {
+			mlx4_warn(dev, "FW doesn't support Multi Protocol, "
+				       "loading IB only\n");
+			dev->caps.port_type[i] = MLX4_PORT_TYPE_IB;
+			continue;
+		}
+		if (port_type[i-1] & dev_cap->supported_port_types[i])
+			dev->caps.port_type[i] = port_type[i-1];
+		else {
+			mlx4_err(dev, "Requested port type for port %d "
+				      "not supported by HW\n", i);
+			return -ENODEV;
+		}
+		if (mod_param_port_mtu[i-1] <= dev->caps.eth_mtu_cap[i])
+			dev->caps.eth_mtu_cap[i] = mod_param_port_mtu[i-1];
+		else
+			mlx4_warn(dev, "Requested mtu for port %d is larger "
+				       "then supported, reducing to %d\n",
+					i, dev->caps.eth_mtu_cap[i]);
+		if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) {
+			dev->caps.log_num_macs = dev_cap->log_max_macs[i];
+			mlx4_warn(dev, "Requested number of MACs is too much "
+				       "for port %d, reducing to %d.\n",
+					i, 1 << dev->caps.log_num_macs);
+		}
+		if (dev->caps.log_num_vlans > dev_cap->log_max_vlans[i]) {
+			dev->caps.log_num_vlans = dev_cap->log_max_vlans[i];
+			mlx4_warn(dev, "Requested number of VLANs is too much "
+				       "for port %d, reducing to %d.\n",
+					i, 1 << dev->caps.log_num_vlans);
+		}
+		if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
+			++num_eth_ports;
+	}
+
+	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
+	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] =
+		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
+		(1 << dev->caps.log_num_macs)*
+		(1 << dev->caps.log_num_vlans)*
+		(1 << dev->caps.log_num_prios)*
+		num_eth_ports;
+	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
+
  	return 0;
  }

+static int mlx4_change_port_types(struct mlx4_dev *dev,
+				  enum mlx4_port_type *port_types)
+{
+	int i;
+	int err = 0;
+	int change = 0;
+	int port;
+
+	for (i = 0; i <  MLX4_MAX_PORTS; i++) {
+		if (port_types[i] != dev->caps.port_type[i + 1]) {
+			change = 1;
+			dev->caps.port_type[i + 1] = port_types[i];
+		}
+	}
+	if (change) {
+		mlx4_unregister_device(dev);
+		for (port = 1; port <= dev->caps.num_ports; port++) {
+			mlx4_CLOSE_PORT(dev, port);
+			err = mlx4_SET_PORT(dev, port);
+			if (err) {
+				mlx4_err(dev, "Failed to set port %d, "
+					      "aborting\n", port);
+				return err;
+			}
+		}
+		err = mlx4_register_device(dev);
+	}
+	return err;
+}
+
+static ssize_t show_port_type(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct mlx4_dev *mdev = pci_get_drvdata(pdev);
+	int i;
+
+	sprintf(buf, "Current port types:\n");
+	for (i = 1; i <= MLX4_MAX_PORTS; i++) {
+		sprintf(buf, "%sPort%d: %s\n", buf, i,
+			(mdev->caps.port_type[i] == MLX4_PORT_TYPE_IB)?
+			"ib": "eth");
+	}
+	return strlen(buf);
+}
+
+
+static ssize_t set_port_type(struct device *dev,
+			     struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct mlx4_dev *mdev = pci_get_drvdata(pdev);
+	char *type;
+	enum mlx4_port_type port_types[MLX4_MAX_PORTS];
+	char *loc_buf;
+	char *ptr;
+	int i;
+	int err = 0;
+
+	loc_buf = kmalloc(count + 1, GFP_KERNEL);
+	if (!loc_buf)
+		return -ENOMEM;
+
+	ptr = loc_buf;
+	memcpy(loc_buf, buf, count + 1);
+	for (i = 0; i < MLX4_MAX_PORTS; i++) {
+		type = strsep(&loc_buf, ",");
+		if (!strcmp(type, "ib"))
+			port_types[i] = MLX4_PORT_TYPE_IB;
+		else if (!strcmp(type, "eth"))
+			port_types[i] = MLX4_PORT_TYPE_ETH;
+		else {
+			dev_warn(dev, "%s is not acceptable port type "
+				 "(use 'eth' or 'ib' only)\n", type);
+			err = -EINVAL;
+			goto out;
+		}
+	}
+	err = mlx4_check_port_params(mdev, port_types);
+	if (err)
+		goto out;
+
+	err = mlx4_change_port_types(mdev, port_types);
+out:
+	kfree(ptr);
+	return err ? err: count;
+}
+static DEVICE_ATTR(mlx4_port_type, S_IWUGO | S_IRUGO, show_port_type, set_port_type);
+
  static int mlx4_load_fw(struct mlx4_dev *dev)
  {
  	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -209,7 +427,8 @@ static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
  				  ((u64) (MLX4_CMPT_TYPE_QP *
  					  cmpt_entry_sz) << MLX4_CMPT_SHIFT),
  				  cmpt_entry_sz, dev->caps.num_qps,
-				  dev->caps.reserved_qps, 0, 0);
+				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
+				  0, 0);
  	if (err)
  		goto err;

@@ -334,7 +553,8 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
  				  init_hca->qpc_base,
  				  dev_cap->qpc_entry_sz,
  				  dev->caps.num_qps,
-				  dev->caps.reserved_qps, 0, 0);
+				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
+				  0, 0);
  	if (err) {
  		mlx4_err(dev, "Failed to map QP context memory, aborting.\n");
  		goto err_unmap_dmpt;
@@ -344,7 +564,8 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
  				  init_hca->auxc_base,
  				  dev_cap->aux_entry_sz,
  				  dev->caps.num_qps,
-				  dev->caps.reserved_qps, 0, 0);
+				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
+				  0, 0);
  	if (err) {
  		mlx4_err(dev, "Failed to map AUXC context memory, aborting.\n");
  		goto err_unmap_qp;
@@ -354,7 +575,8 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
  				  init_hca->altc_base,
  				  dev_cap->altc_entry_sz,
  				  dev->caps.num_qps,
-				  dev->caps.reserved_qps, 0, 0);
+				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
+				  0, 0);
  	if (err) {
  		mlx4_err(dev, "Failed to map ALTC context memory, aborting.\n");
  		goto err_unmap_auxc;
@@ -364,7 +586,8 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
  				  init_hca->rdmarc_base,
  				  dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift,
  				  dev->caps.num_qps,
-				  dev->caps.reserved_qps, 0, 0);
+				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
+				  0, 0);
  	if (err) {
  		mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n");
  		goto err_unmap_altc;
@@ -556,6 +779,7 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
  {
  	struct mlx4_priv *priv = mlx4_priv(dev);
  	int err;
+	int port;

  	err = mlx4_init_uar_table(dev);
  	if (err) {
@@ -654,8 +878,25 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
  		goto err_qp_table_free;
  	}

+	for (port = 1; port <= dev->caps.num_ports; port++) {
+		err = mlx4_SET_PORT(dev, port);
+		if (err) {
+			mlx4_err(dev, "Failed to set port %d, aborting\n",
+				 port);
+			goto err_mcg_table_free;
+		}
+	}
+
+	for (port = 0; port < dev->caps.num_ports; port++) {
+		mlx4_init_mac_table(dev, port);
+		mlx4_init_vlan_table(dev, port);
+	}
+
  	return 0;

+err_mcg_table_free:
+	mlx4_cleanup_mcg_table(dev);
+
  err_qp_table_free:
  	mlx4_cleanup_qp_table(dev);

@@ -692,22 +933,25 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev)
  {
  	struct mlx4_priv *priv = mlx4_priv(dev);
  	struct msix_entry entries[MLX4_NUM_EQ];
+	int needed_vectors = MLX4_EQ_COMP_CPU0 + num_online_cpus();
  	int err;
  	int i;

  	if (msi_x) {
-		for (i = 0; i < MLX4_NUM_EQ; ++i)
+		for (i = 0; i < needed_vectors; ++i)
  			entries[i].entry = i;

-		err = pci_enable_msix(dev->pdev, entries, ARRAY_SIZE(entries));
+		err = pci_enable_msix(dev->pdev, entries, needed_vectors);
  		if (err) {
  			if (err > 0)
-				mlx4_info(dev, "Only %d MSI-X vectors available, "
-					  "not using MSI-X\n", err);
+				mlx4_info(dev, "Only %d MSI-X vectors "
+					       "available, need %d. "
+						"Not using MSI-X\n",
+						err, needed_vectors);
  			goto no_msi;
  		}

-		for (i = 0; i < MLX4_NUM_EQ; ++i)
+		for (i = 0; i < needed_vectors; ++i)
  			priv->eq_table.eq[i].irq = entries[i].vector;

  		dev->flags |= MLX4_FLAG_MSI_X;
@@ -715,7 +959,7 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev)
  	}

  no_msi:
-	for (i = 0; i < MLX4_NUM_EQ; ++i)
+	for (i = 0; i < needed_vectors; ++i)
  		priv->eq_table.eq[i].irq = dev->pdev->irq;
  }

@@ -798,6 +1042,9 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
  	INIT_LIST_HEAD(&priv->ctx_list);
  	spin_lock_init(&priv->ctx_lock);

+	INIT_LIST_HEAD(&priv->pgdir_list);
+	mutex_init(&priv->pgdir_mutex);
+
  	/*
  	 * Now reset the HCA before we touch the PCI capabilities or
  	 * attempt a firmware command, since a boot ROM may have left
@@ -836,8 +1083,14 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)

  	pci_set_drvdata(pdev, dev);

+	if (device_create_file(&pdev->dev, &dev_attr_mlx4_port_type))
+		goto sysfs_failed;
+
  	return 0;

+sysfs_failed:
+	mlx4_unregister_device(dev);
+
  err_cleanup:
  	mlx4_cleanup_mcg_table(dev);
  	mlx4_cleanup_qp_table(dev);
@@ -893,6 +1146,7 @@ static void mlx4_remove_one(struct pci_dev *pdev)
  	int p;

  	if (dev) {
+		device_remove_file(&pdev->dev, &dev_attr_mlx4_port_type);
  		mlx4_unregister_device(dev);

  		for (p = 1; p <= dev->caps.num_ports; ++p)
@@ -948,10 +1202,43 @@ static struct pci_driver mlx4_driver = {
  	.remove		= __devexit_p(mlx4_remove_one)
  };

+static int __init mlx4_verify_params(void)
+{
+	int i;
+
+	for (i = 0; i < MLX4_MAX_PORTS; ++i) {
+		if (strcmp(mod_param_port_type[i], "eth") &&
+		    strcmp(mod_param_port_type[i], "ib")) {
+			printk(KERN_WARNING "mlx4_core: bad port_type for "
+					    "port %d: %s\n",
+					    i, mod_param_port_type[i]);
+			return -1;
+		}
+	}
+	if ((mod_param_num_mac < 1) ||
+	    (mod_param_num_mac > 127)) {
+		printk(KERN_WARNING "mlx4_core: bad num_mac: %d\n",
+		       mod_param_num_mac);
+		return -1;
+	}
+
+	if ((mod_param_num_vlan < 0) ||
+	     (mod_param_num_vlan > 126)) {
+		printk(KERN_WARNING "mlx4_core: bad num_vlan: %d\n",
+		       mod_param_num_vlan);
+		return -1;
+	}
+
+	return 0;
+}
+
  static int __init mlx4_init(void)
  {
  	int ret;

+	if (mlx4_verify_params())
+		return -EINVAL;
+
  	ret = mlx4_catas_init();
  	if (ret)
  		return ret;
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 7333681..2af3d07 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -64,8 +64,8 @@ enum {

  enum {
  	MLX4_EQ_ASYNC,
-	MLX4_EQ_COMP,
-	MLX4_NUM_EQ
+	MLX4_EQ_COMP_CPU0,
+	MLX4_NUM_EQ		= MLX4_EQ_COMP_CPU0 + NR_CPUS,
  };

  enum {
@@ -111,6 +111,7 @@ struct mlx4_bitmap {
  	u32			last;
  	u32			top;
  	u32			max;
+	u32			effective_max;
  	u32			mask;
  	spinlock_t		lock;
  	unsigned long	       *table;
@@ -210,6 +211,8 @@ struct mlx4_eq_table {
  	void __iomem	       *uar_map[(MLX4_NUM_EQ + 6) / 4];
  	u32			clr_mask;
  	struct mlx4_eq		eq[MLX4_NUM_EQ];
+	int			num_comp_eqs;
+	int			last_comp_eq;
  	u64			icm_virt;
  	struct page	       *icm_page;
  	dma_addr_t		icm_dma;
@@ -250,6 +253,35 @@ struct mlx4_catas_err {
  	struct list_head	list;
  };

+struct mlx4_mac_table {
+#define MLX4_MAX_MAC_NUM	128
+#define MLX4_MAC_MASK		0xffffffffffff
+#define MLX4_MAC_VALID_SHIFT	63
+#define MLX4_MAC_TABLE_SIZE	MLX4_MAX_MAC_NUM << 3
+	__be64 entries[MLX4_MAX_MAC_NUM];
+	int refs[MLX4_MAX_MAC_NUM];
+	struct semaphore mac_sem;
+	int total;
+	int max;
+};
+
+struct mlx4_vlan_table {
+#define MLX4_MAX_VLAN_NUM	126
+#define MLX4_VLAN_MASK		0xfff
+#define MLX4_VLAN_VALID		1 << 31
+#define MLX4_VLAN_TABLE_SIZE	MLX4_MAX_VLAN_NUM << 2
+	__be32 entries[MLX4_MAX_VLAN_NUM];
+	int refs[MLX4_MAX_VLAN_NUM];
+	struct semaphore vlan_sem;
+	int total;
+	int max;
+};
+
+struct mlx4_port_info {
+	struct mlx4_mac_table	mac_table;
+	struct mlx4_vlan_table	vlan_table;
+};
+
  struct mlx4_priv {
  	struct mlx4_dev		dev;

@@ -257,6 +289,9 @@ struct mlx4_priv {
  	struct list_head	ctx_list;
  	spinlock_t		ctx_lock;

+	struct list_head        pgdir_list;
+	struct mutex            pgdir_mutex;
+
  	struct mlx4_fw		fw;
  	struct mlx4_cmd		cmd;

@@ -275,6 +310,7 @@ struct mlx4_priv {

  	struct mlx4_uar		driver_uar;
  	void __iomem	       *kar;
+	struct mlx4_port_info	port[MLX4_MAX_PORTS];
  };

  static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev)
@@ -284,7 +320,12 @@ static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev)

  u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap);
  void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj);
+u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, int align);
+void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt);
  int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, u32 reserved);
+int mlx4_bitmap_init_with_effective_max(struct mlx4_bitmap *bitmap,
+					u32 num, u32 mask, u32 reserved,
+					u32 effective_max);
  void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap);

  int mlx4_reset(struct mlx4_dev *dev);
@@ -336,10 +377,15 @@ void mlx4_cmd_use_polling(struct mlx4_dev *dev);
  void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn);
  void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type);

+void mlx4_init_mac_table(struct mlx4_dev *dev, u8 port);
+void mlx4_init_vlan_table(struct mlx4_dev *dev, u8 port);
+
  void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type);

  void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type);

  void mlx4_handle_catas_err(struct mlx4_dev *dev);

+int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port);
+
  #endif /* MLX4_H */
diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c
index 79b317b..2fbf6a3 100644
--- a/drivers/net/mlx4/mr.c
+++ b/drivers/net/mlx4/mr.c
@@ -52,7 +52,9 @@ struct mlx4_mpt_entry {
  	__be64 length;
  	__be32 lkey;
  	__be32 win_cnt;
-	u8	reserved1[3];
+	u8	reserved1;
+	u8	flags2;
+	u8	reserved2;
  	u8	mtt_rep;
  	__be64 mtt_seg;
  	__be32 mtt_sz;
@@ -68,6 +70,8 @@ struct mlx4_mpt_entry {

  #define MLX4_MTT_FLAG_PRESENT		1

+#define MLX4_MPT_FLAG2_FBO_EN	    (1 <<  7)
+
  #define MLX4_MPT_STATUS_SW		0xF0
  #define MLX4_MPT_STATUS_HW		0x00

@@ -250,6 +254,21 @@ static int mlx4_HW2SW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox
  			    !mailbox, MLX4_CMD_HW2SW_MPT, MLX4_CMD_TIME_CLASS_B);
  }

+int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd,
+			   u64 iova, u64 size, u32 access, int npages,
+			   int page_shift, struct mlx4_mr *mr)
+{
+	mr->iova       = iova;
+	mr->size       = size;
+	mr->pd	       = pd;
+	mr->access     = access;
+	mr->enabled    = 0;
+	mr->key	       = hw_index_to_key(mridx);
+
+	return mlx4_mtt_init(dev, npages, page_shift, &mr->mtt);
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_alloc_reserved);
+
  int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access,
  		  int npages, int page_shift, struct mlx4_mr *mr)
  {
@@ -261,14 +280,8 @@ int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access,
  	if (index == -1)
  		return -ENOMEM;

-	mr->iova       = iova;
-	mr->size       = size;
-	mr->pd	       = pd;
-	mr->access     = access;
-	mr->enabled    = 0;
-	mr->key	       = hw_index_to_key(index);
-
-	err = mlx4_mtt_init(dev, npages, page_shift, &mr->mtt);
+	err = mlx4_mr_alloc_reserved(dev, index, pd, iova, size,
+				     access, npages, page_shift, mr);
  	if (err)
  		mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, index);

@@ -276,9 +289,8 @@ int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access,
  }
  EXPORT_SYMBOL_GPL(mlx4_mr_alloc);

-void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr)
+void mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr)
  {
-	struct mlx4_priv *priv = mlx4_priv(dev);
  	int err;

  	if (mr->enabled) {
@@ -290,6 +302,13 @@ void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr)
  	}

  	mlx4_mtt_cleanup(dev, &mr->mtt);
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_free_reserved);
+
+void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	mlx4_mr_free_reserved(dev, mr);
  	mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, key_to_hw_index(mr->key));
  }
  EXPORT_SYMBOL_GPL(mlx4_mr_free);
@@ -435,8 +454,15 @@ int mlx4_init_mr_table(struct mlx4_dev *dev)
  	struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
  	int err;

-	err = mlx4_bitmap_init(&mr_table->mpt_bitmap, dev->caps.num_mpts,
-			       ~0, dev->caps.reserved_mrws);
+	if (!is_power_of_2(dev->caps.num_mpts))
+		return -EINVAL;
+
+	dev->caps.reserved_fexch_mpts_base = dev->caps.num_mpts -
+		(2 * dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH]);
+	err = mlx4_bitmap_init_with_effective_max(&mr_table->mpt_bitmap,
+					dev->caps.num_mpts,
+					~0, dev->caps.reserved_mrws,
+					dev->caps.reserved_fexch_mpts_base);
  	if (err)
  		return err;

@@ -544,6 +570,56 @@ int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list
  }
  EXPORT_SYMBOL_GPL(mlx4_map_phys_fmr);

+int mlx4_map_phys_fmr_fbo(struct mlx4_dev *dev,
+			  struct mlx4_fmr *fmr,
+			  u64 *page_list, int npages,
+			  u64 iova, u32 fbo, u32 len,
+			  u32 *lkey, u32 *rkey)
+{
+	u32 key;
+	int i, err;
+
+	err = mlx4_check_fmr(fmr, page_list, npages, iova);
+	if (err)
+		return err;
+
+	++fmr->maps;
+
+	key = key_to_hw_index(fmr->mr.key);
+
+	*lkey = *rkey = fmr->mr.key = hw_index_to_key(key);
+
+	*(u8 *) fmr->mpt = MLX4_MPT_STATUS_SW;
+
+	/* Make sure MPT status is visible before writing MTT entries */
+	wmb();
+
+	for (i = 0; i < npages; ++i)
+		fmr->mtts[i] = cpu_to_be64(page_list[i] |
+					   MLX4_MTT_FLAG_PRESENT);
+
+	dma_sync_single(&dev->pdev->dev, fmr->dma_handle,
+			npages * sizeof(u64), DMA_TO_DEVICE);
+
+	fmr->mpt->key    = cpu_to_be32(key);
+	fmr->mpt->lkey   = cpu_to_be32(key);
+	fmr->mpt->length = cpu_to_be64(len);
+	fmr->mpt->start  = cpu_to_be64(iova);
+	fmr->mpt->first_byte_offset = cpu_to_be32(fbo & 0x001fffff);
+	fmr->mpt->flags2 = (fbo ? MLX4_MPT_FLAG2_FBO_EN : 0);
+
+	/* Make MTT entries are visible before setting MPT status */
+	wmb();
+
+	*(u8 *) fmr->mpt = MLX4_MPT_STATUS_HW;
+
+	/* Make sure MPT status is visible before consumer can use FMR */
+	wmb();
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_map_phys_fmr_fbo);
+
  int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages,
  		   int max_maps, u8 page_shift, struct mlx4_fmr *fmr)
  {
@@ -586,6 +662,49 @@ err_free:
  }
  EXPORT_SYMBOL_GPL(mlx4_fmr_alloc);

+int mlx4_fmr_alloc_reserved(struct mlx4_dev *dev, u32 mridx,
+			    u32 pd, u32 access, int max_pages,
+			    int max_maps, u8 page_shift, struct mlx4_fmr *fmr)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	u64 mtt_seg;
+	int err = -ENOMEM;
+
+	if (page_shift < 12 || page_shift >= 32)
+		return -EINVAL;
+
+	/* All MTTs must fit in the same page */
+	if (max_pages * sizeof *fmr->mtts > PAGE_SIZE)
+		return -EINVAL;
+
+	fmr->page_shift = page_shift;
+	fmr->max_pages  = max_pages;
+	fmr->max_maps   = max_maps;
+	fmr->maps = 0;
+
+	err = mlx4_mr_alloc_reserved(dev, mridx, pd, 0, 0, access, max_pages,
+				     page_shift, &fmr->mr);
+	if (err)
+		return err;
+
+	mtt_seg = fmr->mr.mtt.first_seg * dev->caps.mtt_entry_sz;
+
+	fmr->mtts = mlx4_table_find(&priv->mr_table.mtt_table,
+				    fmr->mr.mtt.first_seg,
+				    &fmr->dma_handle);
+	if (!fmr->mtts) {
+		err = -ENOMEM;
+		goto err_free;
+	}
+
+	return 0;
+
+err_free:
+	mlx4_mr_free_reserved(dev, &fmr->mr);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_fmr_alloc_reserved);
+
  int mlx4_fmr_enable(struct mlx4_dev *dev, struct mlx4_fmr *fmr)
  {
  	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -634,6 +753,18 @@ int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr)
  }
  EXPORT_SYMBOL_GPL(mlx4_fmr_free);

+int mlx4_fmr_free_reserved(struct mlx4_dev *dev, struct mlx4_fmr *fmr)
+{
+	if (fmr->maps)
+		return -EBUSY;
+
+	fmr->mr.enabled = 0;
+	mlx4_mr_free_reserved(dev, &fmr->mr);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_fmr_free_reserved);
+
  int mlx4_SYNC_TPT(struct mlx4_dev *dev)
  {
  	return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_SYNC_TPT, 1000);
diff --git a/drivers/net/mlx4/port.c b/drivers/net/mlx4/port.c
new file mode 100644
index 0000000..5e685ca
--- /dev/null
+++ b/drivers/net/mlx4/port.c
@@ -0,0 +1,282 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/if_ether.h>
+
+#include <linux/mlx4/cmd.h>
+
+#include "mlx4.h"
+
+void mlx4_init_mac_table(struct mlx4_dev *dev, u8 port)
+{
+	struct mlx4_mac_table *table = &mlx4_priv(dev)->port[port].mac_table;
+	int i;
+
+	sema_init(&table->mac_sem, 1);
+	for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
+		table->entries[i] = 0;
+		table->refs[i] = 0;
+	}
+	table->max = 1 << dev->caps.log_num_macs;
+	table->total = 0;
+}
+
+void mlx4_init_vlan_table(struct mlx4_dev *dev, u8 port)
+{
+	struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table;
+	int i;
+
+	sema_init(&table->vlan_sem, 1);
+	for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
+		table->entries[i] = 0;
+		table->refs[i] = 0;
+	}
+	table->max = 1 << dev->caps.log_num_vlans;
+	table->total = 0;
+}
+
+static int mlx4_SET_PORT_mac_table(struct mlx4_dev *dev, u8 port,
+				   __be64 *entries)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	u32 in_mod;
+	int err;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+
+	memcpy(mailbox->buf, entries, MLX4_MAC_TABLE_SIZE);
+
+	in_mod = MLX4_SET_PORT_MAC_TABLE << 8 | port;
+	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
+		       MLX4_CMD_TIME_CLASS_B);
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+
+int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *index)
+{
+	struct mlx4_mac_table *table =
+	&mlx4_priv(dev)->port[port - 1].mac_table;
+	int i, err = 0;
+	int free = -1;
+	u64 valid = 1;
+
+	mlx4_dbg(dev, "Registering mac : 0x%llx\n", mac);
+	down(&table->mac_sem);
+	for (i = 0; i < MLX4_MAX_MAC_NUM - 1; i++) {
+		if (free < 0 && !table->refs[i]) {
+			free = i;
+			continue;
+		}
+
+		if (mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) {
+			/* Mac already registered, increase refernce count */
+			*index = i;
+			++table->refs[i];
+			goto out;
+		}
+	}
+	mlx4_dbg(dev, "Free mac index is %d\n", free);
+
+	if (table->total == table->max) {
+		/* No free mac entries */
+		err = -ENOSPC;
+		goto out;
+	}
+
+	/* Register new MAC */
+	table->refs[free] = 1;
+	table->entries[free] = cpu_to_be64(mac | valid << MLX4_MAC_VALID_SHIFT);
+
+	err = mlx4_SET_PORT_mac_table(dev, port, table->entries);
+	if (unlikely(err)) {
+		mlx4_err(dev, "Failed adding mac: 0x%llx\n", mac);
+		table->refs[free] = 0;
+		table->entries[free] = 0;
+		goto out;
+	}
+
+	*index = free;
+	++table->total;
+out:
+	up(&table->mac_sem);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_register_mac);
+
+void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int index)
+{
+	struct mlx4_mac_table *table =
+	&mlx4_priv(dev)->port[port - 1].mac_table;
+
+	down(&table->mac_sem);
+	if (!table->refs[index]) {
+		mlx4_warn(dev, "No mac entry for index %d\n", index);
+		goto out;
+	}
+	if (--table->refs[index]) {
+		mlx4_warn(dev, "Have more references for index %d,"
+			  "no need to modify mac table\n", index);
+		goto out;
+	}
+	table->entries[index] = 0;
+	mlx4_SET_PORT_mac_table(dev, port, table->entries);
+	--table->total;
+out:
+	up(&table->mac_sem);
+}
+EXPORT_SYMBOL_GPL(mlx4_unregister_mac);
+
+static int mlx4_SET_PORT_vlan_table(struct mlx4_dev *dev, u8 port,
+				    __be32 *entries)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	u32 in_mod;
+	int err;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+
+	memcpy(mailbox->buf, entries, MLX4_VLAN_TABLE_SIZE);
+	in_mod = MLX4_SET_PORT_VLAN_TABLE << 8 | port;
+	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
+		       MLX4_CMD_TIME_CLASS_B);
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+
+	return err;
+}
+
+int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index)
+{
+	struct mlx4_vlan_table *table =
+	&mlx4_priv(dev)->port[port - 1].vlan_table;
+	int i, err = 0;
+	int free = -1;
+
+	down(&table->vlan_sem);
+	for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) {
+		if (free < 0 && (table->refs[i] == 0)) {
+			free = i;
+			continue;
+		}
+
+		if (table->refs[i] &&
+		    (vlan == (MLX4_VLAN_MASK &
+			      be32_to_cpu(table->entries[i])))) {
+			/* Vlan already registered, increase refernce count */
+			*index = i;
+			++table->refs[i];
+			goto out;
+		}
+	}
+
+	if (table->total == table->max) {
+		/* No free vlan entries */
+		err = -ENOSPC;
+		goto out;
+	}
+
+	/* Register new MAC */
+	table->refs[free] = 1;
+	table->entries[free] = cpu_to_be32(vlan | MLX4_VLAN_VALID);
+
+	err = mlx4_SET_PORT_vlan_table(dev, port, table->entries);
+	if (unlikely(err)) {
+		mlx4_warn(dev, "Failed adding vlan: %u\n", vlan);
+		table->refs[free] = 0;
+		table->entries[free] = 0;
+		goto out;
+	}
+
+	*index = free;
+	++table->total;
+out:
+	up(&table->vlan_sem);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_register_vlan);
+
+void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index)
+{
+	struct mlx4_vlan_table *table =
+	&mlx4_priv(dev)->port[port - 1].vlan_table;
+
+	down(&table->vlan_sem);
+	if (!table->refs[index]) {
+		mlx4_warn(dev, "No vlan entry for index %d\n", index);
+		goto out;
+	}
+	if (--table->refs[index]) {
+		mlx4_dbg(dev, "Have more references for index %d,"
+			 "no need to modify vlan table\n", index);
+		goto out;
+	}
+	table->entries[index] = 0;
+	mlx4_SET_PORT_vlan_table(dev, port, table->entries);
+	--table->total;
+out:
+	up(&table->vlan_sem);
+}
+EXPORT_SYMBOL_GPL(mlx4_unregister_vlan);
+
+int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	int err;
+	u8 is_eth = (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) ? 1 : 0;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+
+	memset(mailbox->buf, 0, 256);
+	if (is_eth) {
+		((u8 *) mailbox->buf)[3] = 7;
+		((__be16 *) mailbox->buf)[3] =
+			cpu_to_be16(dev->caps.eth_mtu_cap[port] +
+				    ETH_HLEN + ETH_FCS_LEN);
+		((__be16 *) mailbox->buf)[4] = cpu_to_be16(1 << 15);
+		((__be16 *) mailbox->buf)[6] = cpu_to_be16(1 << 15);
+	}
+	err = mlx4_cmd(dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT,
+		       MLX4_CMD_TIME_CLASS_B);
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
diff --git a/drivers/net/mlx4/qp.c b/drivers/net/mlx4/qp.c
index fa24e65..1b2b7c4 100644
--- a/drivers/net/mlx4/qp.c
+++ b/drivers/net/mlx4/qp.c
@@ -147,19 +147,42 @@ int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
  }
  EXPORT_SYMBOL_GPL(mlx4_qp_modify);

-int mlx4_qp_alloc(struct mlx4_dev *dev, int sqpn, struct mlx4_qp *qp)
+int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	struct mlx4_qp_table *qp_table = &priv->qp_table;
+	int qpn;
+
+	qpn = mlx4_bitmap_alloc_range(&qp_table->bitmap, cnt, align);
+	if (qpn == -1)
+		return -ENOMEM;
+
+	*base = qpn;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_qp_reserve_range);
+
+void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	struct mlx4_qp_table *qp_table = &priv->qp_table;
+	if (base_qpn < dev->caps.sqp_start + 8)
+		return;
+
+	mlx4_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt);
+}
+EXPORT_SYMBOL_GPL(mlx4_qp_release_range);
+
+int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp)
  {
  	struct mlx4_priv *priv = mlx4_priv(dev);
  	struct mlx4_qp_table *qp_table = &priv->qp_table;
  	int err;

-	if (sqpn)
-		qp->qpn = sqpn;
-	else {
-		qp->qpn = mlx4_bitmap_alloc(&qp_table->bitmap);
-		if (qp->qpn == -1)
-			return -ENOMEM;
-	}
+	if (!qpn)
+		return -EINVAL;
+
+	qp->qpn = qpn;

  	err = mlx4_table_get(dev, &qp_table->qp_table, qp->qpn);
  	if (err)
@@ -208,9 +231,6 @@ err_put_qp:
  	mlx4_table_put(dev, &qp_table->qp_table, qp->qpn);

  err_out:
-	if (!sqpn)
-		mlx4_bitmap_free(&qp_table->bitmap, qp->qpn);
-
  	return err;
  }
  EXPORT_SYMBOL_GPL(mlx4_qp_alloc);
@@ -240,8 +260,6 @@ void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp)
  	mlx4_table_put(dev, &qp_table->auxc_table, qp->qpn);
  	mlx4_table_put(dev, &qp_table->qp_table, qp->qpn);

-	if (qp->qpn >= dev->caps.sqp_start + 8)
-		mlx4_bitmap_free(&qp_table->bitmap, qp->qpn);
  }
  EXPORT_SYMBOL_GPL(mlx4_qp_free);

@@ -255,6 +273,7 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
  {
  	struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
  	int err;
+	int reserved_from_top = 0;

  	spin_lock_init(&qp_table->lock);
  	INIT_RADIX_TREE(&dev->qp_table_tree, GFP_ATOMIC);
@@ -264,9 +283,45 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
  	 * block of special QPs must be aligned to a multiple of 8, so
  	 * round up.
  	 */
-	dev->caps.sqp_start = ALIGN(dev->caps.reserved_qps, 8);
-	err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps,
-			       (1 << 24) - 1, dev->caps.sqp_start + 8);
+	dev->caps.sqp_start =
+		ALIGN(dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 8);
+
+	{
+		int sort[MLX4_QP_REGION_COUNT];
+		int i, j, tmp;
+		int last_base = dev->caps.num_qps;
+
+		for (i = 1; i < MLX4_QP_REGION_COUNT; ++i)
+			sort[i] = i;
+
+		for (i = MLX4_QP_REGION_COUNT; i > 0; --i) {
+			for (j = 2; j < i; ++j) {
+				if (dev->caps.reserved_qps_cnt[sort[j]] >
+				    dev->caps.reserved_qps_cnt[sort[j - 1]]) {
+					tmp	       	= sort[j];
+					sort[j]     	= sort[j - 1];
+					sort[j - 1] 	= tmp;
+				}
+			}
+		}
+
+		for (i = 1; i < MLX4_QP_REGION_COUNT; ++i) {
+			last_base -= dev->caps.reserved_qps_cnt[sort[i]];
+			dev->caps.reserved_qps_base[sort[i]] = last_base;
+			reserved_from_top +=
+				dev->caps.reserved_qps_cnt[sort[i]];
+		}
+
+	}
+
+	err = mlx4_bitmap_init_with_effective_max(&qp_table->bitmap,
+						  dev->caps.num_qps,
+						  (1 << 23) - 1,
+						  dev->caps.sqp_start + 8,
+						  dev->caps.num_qps -
+							reserved_from_top);
+	
+
  	if (err)
  		return err;

@@ -279,6 +334,20 @@ void mlx4_cleanup_qp_table(struct mlx4_dev *dev)
  	mlx4_bitmap_cleanup(&mlx4_priv(dev)->qp_table.bitmap);
  }

+int mlx4_qp_get_region(struct mlx4_dev *dev,
+		       enum qp_region region,
+		       int *base_qpn, int *cnt)
+{
+	if ((region < 0) || (region >= MLX4_QP_REGION_COUNT))
+		return -EINVAL;
+
+	*base_qpn 	= dev->caps.reserved_qps_base[region];
+	*cnt		= dev->caps.reserved_qps_cnt[region];
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_qp_get_region);
+
  int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp,
  		  struct mlx4_qp_context *context)
  {
@@ -299,3 +368,35 @@ int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp,
  }
  EXPORT_SYMBOL_GPL(mlx4_qp_query);

+int mlx4_qp_to_ready(struct mlx4_dev *dev,
+		     struct mlx4_mtt *mtt,
+		     struct mlx4_qp_context *context,
+		     struct mlx4_qp *qp,
+		     enum mlx4_qp_state *qp_state)
+{
+#define STATE_ARR_SIZE 4
+	int err = 0;
+	int i;
+	enum mlx4_qp_state states[STATE_ARR_SIZE] = {
+		MLX4_QP_STATE_RST,
+		MLX4_QP_STATE_INIT,
+		MLX4_QP_STATE_RTR,
+		MLX4_QP_STATE_RTS
+	};
+
+	for (i = 0; i < STATE_ARR_SIZE - 1; i++) {
+		context->flags |= cpu_to_be32(states[i+1] << 28);
+		err = mlx4_qp_modify(dev, mtt, states[i],
+				     states[i+1], context, 0, 0, qp);
+		if (err) {
+			mlx4_err(dev, "Failed to bring qp to state:"
+				      "%d with error: %d\n",
+					states[i+1], err);
+			return err;
+		}
+		*qp_state = states[i+1];
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_qp_to_ready);
+
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index 77323a7..cf9c679 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -132,6 +132,15 @@ enum {
  	MLX4_MAILBOX_SIZE	=  4096
  };

+enum {
+	/* set port opcode modifiers */
+	MLX4_SET_PORT_GENERAL   = 0x0,
+	MLX4_SET_PORT_RQP_CALC  = 0x1,
+	MLX4_SET_PORT_MAC_TABLE = 0x2,
+	MLX4_SET_PORT_VLAN_TABLE = 0x3,
+	MLX4_SET_PORT_PRIO_MAP  = 0x4,
+};
+
  struct mlx4_dev;

  struct mlx4_cmd_mailbox {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index ff7df1a..2d08c4f 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -60,6 +60,7 @@ enum {
  	MLX4_DEV_CAP_FLAG_IPOIB_CSUM	= 1 <<  7,
  	MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR	= 1 <<  8,
  	MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR	= 1 <<  9,
+	MLX4_DEV_CAP_FLAG_DPDP		= 1 << 12,
  	MLX4_DEV_CAP_FLAG_MEM_WINDOW	= 1 << 16,
  	MLX4_DEV_CAP_FLAG_APM		= 1 << 17,
  	MLX4_DEV_CAP_FLAG_ATOMIC	= 1 << 18,
@@ -133,6 +134,23 @@ enum {
  	MLX4_STAT_RATE_OFFSET	= 5
  };

+enum qp_region {
+	MLX4_QP_REGION_FW = 0,
+	MLX4_QP_REGION_ETH_ADDR,
+	MLX4_QP_REGION_FC_ADDR,
+	MLX4_QP_REGION_FC_EXCH,
+	MLX4_QP_REGION_COUNT		/* Must be last */
+};
+
+enum mlx4_port_type {
+	MLX4_PORT_TYPE_IB	= 1 << 0,
+	MLX4_PORT_TYPE_ETH	= 1 << 1,
+};
+
+enum {
+	MLX4_NUM_FEXCH		= 64 * 1024,
+};
+
  static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor)
  {
  	return (major << 32) | (minor << 16) | subminor;
@@ -142,7 +160,9 @@ struct mlx4_caps {
  	u64			fw_ver;
  	int			num_ports;
  	int			vl_cap[MLX4_MAX_PORTS + 1];
-	int			mtu_cap[MLX4_MAX_PORTS + 1];
+	int			ib_mtu_cap[MLX4_MAX_PORTS + 1];
+	u64			def_mac[MLX4_MAX_PORTS + 1];
+	int			eth_mtu_cap[MLX4_MAX_PORTS + 1];
  	int			gid_table_len[MLX4_MAX_PORTS + 1];
  	int			pkey_table_len[MLX4_MAX_PORTS + 1];
  	int			local_ca_ack_delay;
@@ -157,7 +177,6 @@ struct mlx4_caps {
  	int			max_rq_desc_sz;
  	int			max_qp_init_rdma;
  	int			max_qp_dest_rdma;
-	int			reserved_qps;
  	int			sqp_start;
  	int			num_srqs;
  	int			max_srq_wqes;
@@ -187,6 +206,13 @@ struct mlx4_caps {
  	u16			stat_rate_support;
  	u8			port_width_cap[MLX4_MAX_PORTS + 1];
  	int			max_gso_sz;
+	int			reserved_qps_cnt[MLX4_QP_REGION_COUNT];
+	int			reserved_qps_base[MLX4_QP_REGION_COUNT];
+	int			log_num_macs;
+	int			log_num_vlans;
+	int			log_num_prios;
+	enum mlx4_port_type	port_type[MLX4_MAX_PORTS + 1];
+	int			reserved_fexch_mpts_base;
  };

  struct mlx4_buf_list {
@@ -208,6 +234,34 @@ struct mlx4_mtt {
  	int			page_shift;
  };

+enum {
+	MLX4_DB_PER_PAGE = PAGE_SIZE / 4
+};
+
+struct mlx4_db_pgdir {
+	struct list_head        list;
+	DECLARE_BITMAP(order0, MLX4_DB_PER_PAGE);
+	DECLARE_BITMAP(order1, MLX4_DB_PER_PAGE / 2);
+	unsigned long          *bits[2];
+	__be32                 *db_page;
+	dma_addr_t              db_dma;
+};
+
+struct mlx4_db {
+	__be32                  *db;
+	struct mlx4_db_pgdir 	*pgdir;
+	dma_addr_t              dma;
+	int                     index;
+	int                     order;
+};
+
+
+struct mlx4_hwq_resources {
+	struct mlx4_db		db;
+	struct mlx4_mtt 	mtt;
+	struct mlx4_buf 	buf;
+};
+
  struct mlx4_mr {
  	struct mlx4_mtt		mtt;
  	u64			iova;
@@ -247,6 +301,7 @@ struct mlx4_cq {
  	int			arm_sn;

  	int			cqn;
+	int			comp_eq_idx;

  	atomic_t		refcount;
  	struct completion	free;
@@ -309,6 +364,36 @@ struct mlx4_init_port_param {
  	u64			si_guid;
  };

+static inline void mlx4_query_steer_cap(struct mlx4_dev *dev, int *log_mac,
+					int *log_vlan, int *log_prio)
+{
+	*log_mac = dev->caps.log_num_macs;
+	*log_vlan = dev->caps.log_num_vlans;
+	*log_prio = dev->caps.log_num_prios;
+}
+
+static inline u32 mlx4_get_ports_of_type(struct mlx4_dev *dev,
+					enum mlx4_port_type ptype)
+{
+	u32 ret = 0;
+	int i;
+
+	for (i = 1; i <= dev->caps.num_ports; ++i) {
+		if (dev->caps.port_type[i] == ptype)
+			ret |= 1 << (i-1);
+	}
+	return ret;
+}
+
+#define foreach_port(port, bitmap) \
+	for ((port) = 1; (port) <= MLX4_MAX_PORTS; ++(port)) \
+		if (bitmap & 1 << ((port)-1))
+
+static inline int mlx4_get_fexch_mpts_base(struct mlx4_dev *dev)
+{
+	return dev->caps.reserved_fexch_mpts_base;
+}
+
  int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
  		   struct mlx4_buf *buf);
  void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf);
@@ -332,8 +417,12 @@ int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift,
  void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt);
  u64 mlx4_mtt_addr(struct mlx4_dev *dev, struct mlx4_mtt *mtt);

+int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd,
+			   u64 iova, u64 size, u32 access, int npages,
+			   int page_shift, struct mlx4_mr *mr);
  int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access,
  		  int npages, int page_shift, struct mlx4_mr *mr);
+void mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr);
  void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr);
  int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr);
  int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
@@ -341,11 +430,20 @@ int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
  int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
  		       struct mlx4_buf *buf);

+int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
+		      struct device *dma_device, int size, int max_direct);
+void mlx4_free_hwq_res(struct mlx4_dev *mdev, struct mlx4_hwq_resources *wqres,
+		      struct device *dma_device, int size);
+
  int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
-		  struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq);
+		  struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq,
+		  unsigned vector, int collapsed);
  void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq);

-int mlx4_qp_alloc(struct mlx4_dev *dev, int sqpn, struct mlx4_qp *qp);
+int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base);
+void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt);
+
+int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp);
  void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp);

  int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt,
@@ -360,14 +458,26 @@ int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port);
  int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]);
  int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]);

+int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *index);
+void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int index);
+int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
+void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index);
+
  int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list,
  		      int npages, u64 iova, u32 *lkey, u32 *rkey);
+int mlx4_map_phys_fmr_fbo(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
+			  u64 *page_list, int npages, u64 iova,
+			  u32 fbo, u32 len, u32 *lkey, u32 *rkey);
  int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages,
  		   int max_maps, u8 page_shift, struct mlx4_fmr *fmr);
+int mlx4_fmr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd,
+			    u32 access, int max_pages, int max_maps,
+			    u8 page_shift, struct mlx4_fmr *fmr);
  int mlx4_fmr_enable(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
  void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
  		    u32 *lkey, u32 *rkey);
  int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
+int mlx4_fmr_free_reserved(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
  int mlx4_SYNC_TPT(struct mlx4_dev *dev);

  #endif /* MLX4_DEVICE_H */
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index a5e43fe..5a02980 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -151,7 +151,16 @@ struct mlx4_qp_context {
  	u8			reserved4[2];
  	u8			mtt_base_addr_h;
  	__be32			mtt_base_addr_l;
-	u32			reserved5[10];
+	u8                      VE;
+	u8                      reserved5;
+	__be16                  VFT_id_prio;
+	u8                      reserved6;
+	u8                      exch_size;
+	__be16                  exch_base;
+	u8                      VFT_hop_cnt;
+	u8                      my_fc_id_idx;
+	__be16                  reserved7;
+	u32                     reserved8[7];
  };

  /* Which firmware version adds support for NEC (NoErrorCompletion) bit */
@@ -296,6 +305,10 @@ int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
  int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp,
  		  struct mlx4_qp_context *context);

+int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
+		     struct mlx4_qp_context *context,
+		     struct mlx4_qp *qp, enum mlx4_qp_state *qp_state);
+
  static inline struct mlx4_qp *__mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn)
  {
  	return radix_tree_lookup(&dev->qp_table_tree, qpn & (dev->caps.num_qps - 1));
@@ -303,4 +316,8 @@ static inline struct mlx4_qp *__mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn)

  void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp);

+int mlx4_qp_get_region(struct mlx4_dev *dev,
+		       enum qp_region region,
+		       int *base_qpn, int *cnt);
+
  #endif /* MLX4_QP_H */
-- 
1.5.4

_______________________________________________
general mailing list
general at lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general




More information about the general mailing list