[ofa-general][PATCH 11/12 v1] mlx4: Fiber Channel support

Yevgeny Petrilin yevgenyp at mellanox.co.il
Wed Apr 23 08:09:10 PDT 2008


>From ab14366d6cbf590c6a6a6a4d16e86a0d120facc6 Mon Sep 17 00:00:00 2001
From: Yevgeny Petrilin <yevgenyp at mellanox.co.il>
Date: Wed, 23 Apr 2008 15:19:16 +0300
Subject: [PATCH] mlx4: Fiber Channel support

As we did with QPs, some of the MPTs are pre-reserved
(the MPTs that are mapped for FEXCHs, 2*64K of them).
So needed to split the operation of allocating an MPT to two:
	The allocation of a bit from the bitmap
	The actual creation of the entry (and it's MTT).
So, mr_alloc_reserved() is the second part, where you know which MPT number was allocated.
mr_alloc() is the one that allocates a number from the bitmap.
Normal users keep using the original mr_alloc().
For FEXCH, when we know the pre-reserved MPT entry, we call mr_alloc_reserved() directly.

Same with the mr_free() and corresponding mr_free_reserved().
The first will just put back the bit, the later will actually
destroy the entry, but will leave the bit set.

map_phys_fmr_fbo() is very much like the original map_phys_fmr()
- allows setting an FBO (First Byte Offset) for the MPT
- allows setting the data length for the MPT
- does not increase the higher bits of the key after every map.

Signed-off-by: Yevgeny Petrilin <yevgenyp at mellanox.co.il>
---
 drivers/net/mlx4/main.c     |    2 +-
 drivers/net/mlx4/mr.c       |  131 +++++++++++++++++++++++++++++++++++++------
 include/linux/mlx4/device.h |   18 ++++++
 include/linux/mlx4/qp.h     |   11 +++-
 4 files changed, 142 insertions(+), 20 deletions(-)

diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index aecb1f2..93a4e4b 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -81,7 +81,7 @@ static struct mlx4_profile default_profile = {
 	.rdmarc_per_qp	= 1 << 4,
 	.num_cq		= 1 << 16,
 	.num_mcg	= 1 << 13,
-	.num_mpt	= 1 << 17,
+	.num_mpt	= 1 << 18,
 	.num_mtt	= 1 << 20,
 };

diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c
index 79b317b..ae376ae 100644
--- a/drivers/net/mlx4/mr.c
+++ b/drivers/net/mlx4/mr.c
@@ -52,7 +52,9 @@ struct mlx4_mpt_entry {
 	__be64 length;
 	__be32 lkey;
 	__be32 win_cnt;
-	u8	reserved1[3];
+	u8	reserved1;
+	u8	flags2;
+	u8	reserved2;
 	u8	mtt_rep;
 	__be64 mtt_seg;
 	__be32 mtt_sz;
@@ -68,6 +70,8 @@ struct mlx4_mpt_entry {

 #define MLX4_MTT_FLAG_PRESENT		1

+#define MLX4_MPT_FLAG2_FBO_EN	    (1 <<  7)
+
 #define MLX4_MPT_STATUS_SW		0xF0
 #define MLX4_MPT_STATUS_HW		0x00

@@ -122,7 +126,7 @@ static void mlx4_buddy_free(struct mlx4_buddy *buddy, u32 seg, int order)
 	spin_unlock(&buddy->lock);
 }

-static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order)
+static int __devinit mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order)
 {
 	int i, s;

@@ -250,6 +254,21 @@ static int mlx4_HW2SW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox
 			    !mailbox, MLX4_CMD_HW2SW_MPT, MLX4_CMD_TIME_CLASS_B);
 }

+int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd,
+			   u64 iova, u64 size, u32 access, int npages,
+			   int page_shift, struct mlx4_mr *mr)
+{
+	mr->iova       = iova;
+	mr->size       = size;
+	mr->pd	       = pd;
+	mr->access     = access;
+	mr->enabled    = 0;
+	mr->key	       = hw_index_to_key(mridx);
+
+	return mlx4_mtt_init(dev, npages, page_shift, &mr->mtt);
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_alloc_reserved);
+
 int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access,
 		  int npages, int page_shift, struct mlx4_mr *mr)
 {
@@ -261,14 +280,8 @@ int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access,
 	if (index == -1)
 		return -ENOMEM;

-	mr->iova       = iova;
-	mr->size       = size;
-	mr->pd	       = pd;
-	mr->access     = access;
-	mr->enabled    = 0;
-	mr->key	       = hw_index_to_key(index);
-
-	err = mlx4_mtt_init(dev, npages, page_shift, &mr->mtt);
+	err = mlx4_mr_alloc_reserved(dev, index, pd, iova, size,
+				     access, npages, page_shift, mr);
 	if (err)
 		mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, index);

@@ -276,9 +289,8 @@ int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access,
 }
 EXPORT_SYMBOL_GPL(mlx4_mr_alloc);

-void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr)
+void mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr)
 {
-	struct mlx4_priv *priv = mlx4_priv(dev);
 	int err;

 	if (mr->enabled) {
@@ -290,6 +302,13 @@ void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr)
 	}

 	mlx4_mtt_cleanup(dev, &mr->mtt);
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_free_reserved);
+
+void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	mlx4_mr_free_reserved(dev, mr);
 	mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, key_to_hw_index(mr->key));
 }
 EXPORT_SYMBOL_GPL(mlx4_mr_free);
@@ -435,8 +454,15 @@ int mlx4_init_mr_table(struct mlx4_dev *dev)
 	struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
 	int err;

-	err = mlx4_bitmap_init(&mr_table->mpt_bitmap, dev->caps.num_mpts,
-			       ~0, dev->caps.reserved_mrws);
+	if (!is_power_of_2(dev->caps.num_mpts))
+		return -EINVAL;
+
+	dev->caps.reserved_fexch_mpts_base = dev->caps.num_mpts -
+		(2 * dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH]);
+	err = mlx4_bitmap_init_with_effective_max(&mr_table->mpt_bitmap,
+					dev->caps.num_mpts,
+					~0, dev->caps.reserved_mrws,
+					dev->caps.reserved_fexch_mpts_base);
 	if (err)
 		return err;

@@ -500,8 +526,9 @@ static inline int mlx4_check_fmr(struct mlx4_fmr *fmr, u64 *page_list,
 	return 0;
 }

-int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list,
-		      int npages, u64 iova, u32 *lkey, u32 *rkey)
+int mlx4_map_phys_fmr_fbo(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
+			  u64 *page_list, int npages, u64 iova, u32 fbo,
+			  u32 len, u32 *lkey, u32 *rkey, int same_key)
 {
 	u32 key;
 	int i, err;
@@ -513,7 +540,8 @@ int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list
 	++fmr->maps;

 	key = key_to_hw_index(fmr->mr.key);
-	key += dev->caps.num_mpts;
+	if (same_key)
+		key += dev->caps.num_mpts;
 	*lkey = *rkey = fmr->mr.key = hw_index_to_key(key);

 	*(u8 *) fmr->mpt = MLX4_MPT_STATUS_SW;
@@ -529,8 +557,10 @@ int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list

 	fmr->mpt->key    = cpu_to_be32(key);
 	fmr->mpt->lkey   = cpu_to_be32(key);
-	fmr->mpt->length = cpu_to_be64(npages * (1ull << fmr->page_shift));
+	fmr->mpt->length = cpu_to_be64(len);
 	fmr->mpt->start  = cpu_to_be64(iova);
+	fmr->mpt->first_byte_offset = cpu_to_be32(fbo & 0x001fffff);
+	fmr->mpt->flags2 = (fbo ? MLX4_MPT_FLAG2_FBO_EN : 0);

 	/* Make MTT entries are visible before setting MPT status */
 	wmb();
@@ -542,6 +572,16 @@ int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list

 	return 0;
 }
+EXPORT_SYMBOL_GPL(mlx4_map_phys_fmr_fbo);
+
+int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list,
+		      int npages, u64 iova, u32 *lkey, u32 *rkey)
+{
+	u32 len = npages * (1ull << fmr->page_shift);
+
+	return mlx4_map_phys_fmr_fbo(dev, fmr, page_list, npages, iova, 0,
+				     len, lkey, rkey, 1);
+}
 EXPORT_SYMBOL_GPL(mlx4_map_phys_fmr);

 int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages,
@@ -586,6 +626,49 @@ err_free:
 }
 EXPORT_SYMBOL_GPL(mlx4_fmr_alloc);

+int mlx4_fmr_alloc_reserved(struct mlx4_dev *dev, u32 mridx,
+			    u32 pd, u32 access, int max_pages,
+			    int max_maps, u8 page_shift, struct mlx4_fmr *fmr)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	u64 mtt_seg;
+	int err = -ENOMEM;
+
+	if (page_shift < 12 || page_shift >= 32)
+		return -EINVAL;
+
+	/* All MTTs must fit in the same page */
+	if (max_pages * sizeof *fmr->mtts > PAGE_SIZE)
+		return -EINVAL;
+
+	fmr->page_shift = page_shift;
+	fmr->max_pages  = max_pages;
+	fmr->max_maps   = max_maps;
+	fmr->maps = 0;
+
+	err = mlx4_mr_alloc_reserved(dev, mridx, pd, 0, 0, access, max_pages,
+				     page_shift, &fmr->mr);
+	if (err)
+		return err;
+
+	mtt_seg = fmr->mr.mtt.first_seg * dev->caps.mtt_entry_sz;
+
+	fmr->mtts = mlx4_table_find(&priv->mr_table.mtt_table,
+				    fmr->mr.mtt.first_seg,
+				    &fmr->dma_handle);
+	if (!fmr->mtts) {
+		err = -ENOMEM;
+		goto err_free;
+	}
+
+	return 0;
+
+err_free:
+	mlx4_mr_free_reserved(dev, &fmr->mr);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_fmr_alloc_reserved);
+
 int mlx4_fmr_enable(struct mlx4_dev *dev, struct mlx4_fmr *fmr)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -634,6 +717,18 @@ int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr)
 }
 EXPORT_SYMBOL_GPL(mlx4_fmr_free);

+int mlx4_fmr_free_reserved(struct mlx4_dev *dev, struct mlx4_fmr *fmr)
+{
+	if (fmr->maps)
+		return -EBUSY;
+
+	fmr->mr.enabled = 0;
+	mlx4_mr_free_reserved(dev, &fmr->mr);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_fmr_free_reserved);
+
 int mlx4_SYNC_TPT(struct mlx4_dev *dev)
 {
 	return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_SYNC_TPT, 1000);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 673462c..e417673 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -215,6 +215,7 @@ struct mlx4_caps {
 	int                     log_num_vlans;
 	int                     log_num_prios;
 	enum mlx4_port_type	port_type[MLX4_MAX_PORTS + 1];
+	int			reserved_fexch_mpts_base;
 };

 struct mlx4_buf_list {
@@ -400,6 +401,12 @@ static inline u32 mlx4_get_ports_of_type(struct mlx4_dev *dev,
 	    for ((port) = 1; (port) <= MLX4_MAX_PORTS; ++(port)) \
 		if (bitmap & 1 << ((port)-1))

+
+static inline int mlx4_get_fexch_mpts_base(struct mlx4_dev *dev)
+{
+	return dev->caps.reserved_fexch_mpts_base;
+}
+
 int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
 		   struct mlx4_buf *buf);
 void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf);
@@ -423,8 +430,12 @@ int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift,
 void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt);
 u64 mlx4_mtt_addr(struct mlx4_dev *dev, struct mlx4_mtt *mtt);

+int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd,
+			   u64 iova, u64 size, u32 access, int npages,
+			   int page_shift, struct mlx4_mr *mr);
 int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access,
 		  int npages, int page_shift, struct mlx4_mr *mr);
+void mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr);
 void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr);
 int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr);
 int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
@@ -469,13 +480,20 @@ void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int index);
 int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
 void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index);

+int mlx4_map_phys_fmr_fbo(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
+			  u64 *page_list, int npages, u64 iova, u32 fbo,
+			  u32 len, u32 *lkey, u32 *rkey, int same_key);
 int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list,
 		      int npages, u64 iova, u32 *lkey, u32 *rkey);
+int mlx4_fmr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd,
+			    u32 access, int max_pages, int max_maps,
+			    u8 page_shift, struct mlx4_fmr *fmr);
 int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages,
 		   int max_maps, u8 page_shift, struct mlx4_fmr *fmr);
 int mlx4_fmr_enable(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
 void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
 		    u32 *lkey, u32 *rkey);
+int mlx4_fmr_free_reserved(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
 int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
 int mlx4_SYNC_TPT(struct mlx4_dev *dev);

diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index a5e43fe..d7c0227 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -151,7 +151,16 @@ struct mlx4_qp_context {
 	u8			reserved4[2];
 	u8			mtt_base_addr_h;
 	__be32			mtt_base_addr_l;
-	u32			reserved5[10];
+	u8			VE;
+	u8			reserved5;
+	__be16			VFT_id_prio;
+	u8			reserved6;
+	u8			exch_size;
+	__be16			exch_base;
+	u8			VFT_hop_cnt;
+	u8			my_fc_id_idx;
+	__be16			reserved7;
+	u32			reserved8[7];
 };

 /* Which firmware version adds support for NEC (NoErrorCompletion) bit */
-- 
1.5.4




More information about the general mailing list