[openib-general] [PATCH v2 07/14] CXGB3 RDMA Core HAL Code.

Steve Wise swise at opengridcomputing.com
Fri Jun 23 07:30:00 PDT 2006


This code implements a HAL interface to the T3 hardware.
---

 drivers/infiniband/hw/cxgb3/core/cxio_hal.c | 1152 +++++++++++++++++++++++++++
 drivers/infiniband/hw/cxgb3/core/cxio_hal.h |  166 ++++
 2 files changed, 1318 insertions(+), 0 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb3/core/cxio_hal.c b/drivers/infiniband/hw/cxgb3/core/cxio_hal.c
new file mode 100644
index 0000000..e142e5f
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/core/cxio_hal.c
@@ -0,0 +1,1152 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/netdevice.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <asm/semaphore.h>
+#include <linux/pci.h>
+#include "cxio_hal.h"
+#include "sge_defs.h"
+#include <asm/delay.h>
+
+static struct cxio_rdev *rdev_tbl[T3_MAX_NUM_RNIC];
+static cxio_hal_ev_callback_func_t cxio_ev_cb = NULL;
+
+static inline struct cxio_rdev *cxio_hal_find_rdev_by_name(char *dev_name)
+{
+	int i;
+	for (i = 0; i < T3_MAX_NUM_RNIC; i++)
+		if (rdev_tbl[i])
+			if (!strcmp(rdev_tbl[i]->dev_name, dev_name))
+				return rdev_tbl[i];
+	return NULL;
+}
+
+static inline struct cxio_rdev *cxio_hal_find_rdev_by_t3cdev(struct t3cdev
+							     *tdev)
+{
+	int i;
+	for (i = 0; i < T3_MAX_NUM_RNIC; i++)
+		if (rdev_tbl[i])
+			if (rdev_tbl[i]->t3cdev_p == tdev)
+				return rdev_tbl[i];
+	return NULL;
+}
+
+static inline int cxio_hal_add_rdev(struct cxio_rdev *rdev_p)
+{
+	int i;
+	for (i = 0; i < T3_MAX_NUM_RNIC; i++) {
+		if (!rdev_tbl[i]) {
+			rdev_tbl[i] = rdev_p;
+			break;
+		}
+	}
+	return (i == T3_MAX_NUM_RNIC);
+}
+
+static inline void cxio_hal_delete_rdev(struct cxio_rdev *rdev_p)
+{
+	int i;
+	for (i = 0; i < T3_MAX_NUM_RNIC; i++)
+		if (rdev_tbl[i] == rdev_p) {
+			rdev_tbl[i] = NULL;
+			break;
+		}
+}
+
+extern int cxio_hal_init_rhdl_resource(u32 nr_rhdl);
+extern void cxio_hal_destroy_rhdl_resource(void);
+extern int cxio_hal_init_resource(struct cxio_hal_resource **rscpp,
+				  u32 nr_tpt, u32 nr_pbl,
+				  u32 nr_rqt, u32 nr_qpid, u32 nr_cqid,
+				  u32 nr_pdid);
+extern u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp);
+extern void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag);
+extern u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp);
+extern void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid);
+extern u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp);
+extern void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid);
+extern void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp);
+
+int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq, 
+		   enum t3_cq_opcode op, u32 credit)
+{
+	int ret;
+	struct t3_cqe *cqe;
+	u32 rptr;
+
+	struct rdma_cq_op setup;
+	setup.id = cq->cqid;
+	setup.credits = (op == CQ_CREDIT_UPDATE) ? credit : 0;
+	setup.op = op;
+	ret = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_OP, &setup);
+
+	if ((ret < 0) || (op == CQ_CREDIT_UPDATE)) 
+		return ret;
+
+	/*
+	 * If the rearm returned an index other than our current index,
+	 * then there might be CQE's in flight (being DMA'd).  We must wait
+	 * here for them to complete or the consumer can miss a notification.
+	 */
+	if (Q_PTR2IDX((cq->rptr), cq->size_log2) != ret) {
+		int i=0;
+
+		rptr = cq->rptr;
+
+		/* 
+		 * Keep the generation correct by bumping rptr until it
+		 * matches the index returned by the rearm - 1.
+	 	 */
+		while (Q_PTR2IDX((rptr+1), cq->size_log2) != ret)
+			rptr++;
+
+		/* 
+		 * Now rptr is the index for the (last) cqe that was 
+	 	 * in-flight at the time the HW rearmed the CQ.  We 
+		 * spin until that CQE is valid.
+	 	 */
+		cqe = cq->queue + Q_PTR2IDX(rptr, cq->size_log2);
+		while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) {
+			udelay(1);
+			if (i++ > 1000000) {
+				BUG_ON(1);
+				printk(KERN_ERR "%s: stalled rnic\n", 
+				       rdev_p->dev_name);
+				return -EIO;
+			}
+		}
+	}
+	return 0;
+}
+
+static inline int cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid)
+{
+	struct rdma_cq_setup setup;
+	setup.id = cqid;
+	setup.base_addr = 0;	/* NULL address */
+	setup.size = 0;		/* disaable the CQ */
+	setup.credits = 0;
+	setup.credit_thres = 0;
+	setup.ovfl_mode = 0;
+	return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
+}
+
+int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid)
+{
+	u64 sge_cmd;
+	struct t3_modify_qp_wr *wqe;
+	struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
+	if (!skb) {
+		DBG("failed in alloc_skb in destroy_ctrl_qp\n");
+		return -ENOMEM;
+	}
+	wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
+	build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0x3, 1, qpid,
+		       0x4);
+	sge_cmd = qpid << 8 | 3;
+	wqe->wrid.id1 = cpu_to_be64(sge_cmd);
+	wqe->ctx1 = 0ULL;
+	wqe->ctx0 = 0ULL;
+	skb->priority = CPL_PRIORITY_CONTROL;
+	return (t3c_send(rdev_p->t3cdev_p, skb));
+}
+
+int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
+{
+	struct rdma_cq_setup setup;
+	int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe);
+
+	cq->cqid = cxio_hal_get_cqid(rdev_p->rscp);
+	if (!cq->cqid)
+		return -ENOMEM;
+	cq->sw_queue = kzalloc(size, GFP_KERNEL);
+	if (!cq->sw_queue)
+		return -ENOMEM;
+	cq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev),
+					     (1UL << (cq->size_log2)) *
+					     sizeof(struct t3_cqe),
+					     &(cq->dma_addr), GFP_KERNEL);
+	if (!cq->queue) {
+		kfree(cq->sw_queue);
+		return -ENOMEM;
+	}
+	pci_unmap_addr_set(cq, mapping, cq->dma_addr);
+	memset(cq->queue, 0, size);
+	setup.id = cq->cqid;
+	setup.base_addr = (u64) (cq->dma_addr);
+	setup.size = 1UL << cq->size_log2;
+	setup.credits = 65535;
+	setup.credit_thres = 1;
+	setup.ovfl_mode = 1;
+	return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
+}
+
+int cxio_resize_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
+{
+	struct rdma_cq_setup setup;
+	setup.id = cq->cqid;
+	setup.base_addr = (u64) (cq->dma_addr);
+	setup.size = 1UL << cq->size_log2;
+	setup.credits = setup.size;
+	setup.credit_thres = setup.size;	/* TBD: overflow recovery */
+	setup.ovfl_mode = 1;
+	return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
+}
+
+int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain,
+		   struct t3_wq *wq)
+{
+	int depth = 1UL << wq->size_log2;
+	wq->qpid = cxio_hal_get_qpid(rdev_p->rscp);
+	if (!wq->qpid)
+		return -ENOMEM;
+
+	wq->rq = kzalloc(depth * sizeof(u64), GFP_KERNEL);
+	if (!wq->rq) {
+		cxio_hal_put_qpid(rdev_p->rscp, wq->qpid);
+		return -ENOMEM;
+	}
+	
+	wq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev),
+					     depth * sizeof(union t3_wr),
+					     &(wq->dma_addr), GFP_KERNEL);
+	if (!wq->queue) {
+		kfree(wq->rq);
+		cxio_hal_put_qpid(rdev_p->rscp, wq->qpid);
+		return -ENOMEM;
+	}
+
+	pci_unmap_addr_set(wq, mapping, wq->dma_addr);
+#ifdef USER_DOORBELL
+	if (kernel_domain)
+#endif
+		wq->doorbell = rdev_p->rnic_info.kdb_addr;
+#ifdef USER_DOORBELL
+	else			
+		wq->doorbell = (void *)rdev_p->rnic_info.udbell_physbase +
+				(wq->qpid << PAGE_SHIFT);
+#endif
+	return 0;
+}
+
+int cxio_destroy_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
+{
+	int err;
+	err = cxio_hal_clear_cq_ctx(rdev_p, cq->cqid);
+	kfree(cq->sw_queue);
+	dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
+			  (1UL << (cq->size_log2))
+			  * sizeof(struct t3_cqe), cq->queue, 
+			  pci_unmap_addr(cq, mapping));
+	cxio_hal_put_cqid(rdev_p->rscp, cq->cqid);
+	return err;
+}
+
+int cxio_destroy_qp(struct cxio_rdev *rdev_p, struct t3_wq *wq)
+{
+	int err;
+	err = cxio_hal_clear_qp_ctx(rdev_p, wq->qpid);
+	dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
+			  (1UL << (wq->size_log2))
+			  * sizeof(union t3_wr), wq->queue, 
+			  pci_unmap_addr(wq, mapping));
+	kfree(wq->rq);
+	cxio_hal_put_qpid(rdev_p->rscp, wq->qpid);
+	return err;
+}
+
+static void insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq)
+{
+	struct t3_cqe cqe;
+
+	DBG("%s %d wq %p cq %p sw_rptr %x sw_wptr %x\n", __FUNCTION__, 
+	    __LINE__, wq, cq, cq->sw_rptr, cq->sw_wptr);
+	memset(&cqe, 0, sizeof(cqe));
+	cqe.header = V_CQE_STATUS(1) | 
+		     V_CQE_OPCODE(T3_SEND) | 
+		     V_CQE_TYPE(0) |
+		     V_CQE_SWCQE(1) |
+		     V_CQE_QPID(wq->qpid) | 
+		     V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr, cq->size_log2));
+	cqe.header = cpu_to_be32(cqe.header);
+	*(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe;
+	cq->sw_wptr++;
+}
+
+void cxio_flush_rq(struct cxio_rdev *rdev_p, struct t3_wq *wq, 
+		   struct t3_cq *cq)
+{
+	u32 ptr;
+
+	DBG("%s %d wq %p cq %p\n", __FUNCTION__, __LINE__, wq, cq);
+
+	/* mark the wq in error so all CQEs will be completed as flushed */
+	wq->error = 1;
+
+	/* flush RQ */
+	ptr = wq->rq_rptr;
+	while (ptr++ != wq->rq_wptr) {
+		insert_recv_cqe(wq, cq);
+	}
+}
+
+static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq, union t3_wr *wr)
+{
+	struct t3_cqe cqe;
+	enum t3_rdma_opcode op;
+
+	DBG("%s %d wq %p cq %p sw_rptr %x sw_wptr %x\n", __FUNCTION__, 
+	    __LINE__, wq, cq, cq->sw_rptr, cq->sw_wptr);
+	memset(&cqe, 0, sizeof(cqe));
+	op = wr2opcode(G_FW_RIWR_OP(be32_to_cpu(wr->send.wrh.op_seop_flags)));
+	cqe.header = V_CQE_STATUS(1) | 
+		     V_CQE_OPCODE(op) |
+		     V_CQE_TYPE(1) |
+		     V_CQE_SWCQE(1) |
+		     V_CQE_QPID(wq->qpid) | 
+		     V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr, cq->size_log2));
+	cqe.header = cpu_to_be32(cqe.header);
+	*(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe;
+	CQE_WRID_SQ_WPTR(cqe) = wr->send.wrid.id0.hi;
+	CQE_WRID_WPTR(cqe) = wr->send.wrid.id0.low;
+	cq->sw_wptr++;
+}
+
+void cxio_flush_sq(struct cxio_rdev *rdev_p, struct t3_wq *wq, 
+		   struct t3_cq *cq)
+{
+	u32 ptr;
+	union t3_wr *wr = wq->sq_oldest_wr;
+
+	DBG("%s %d wq %p cq %p\n", __FUNCTION__, __LINE__, wq, cq);
+
+	/* mark the wq in error so all CQEs will be completed as flushed */
+	wq->error = 1;
+
+	/* flush SQ */
+	ptr = wq->sq_rptr;
+	while (ptr++ != wq->sq_wptr) {
+		BUG_ON(!wr);
+		insert_sq_cqe(wq, cq, wr);
+		wr = next_sq_wr(wq);
+
+	}
+}
+
+static int cxio_hal_init_ctrl_cq(struct cxio_rdev *rdev_p)
+{
+	struct rdma_cq_setup setup;
+	setup.id = 0;
+	setup.base_addr = 0;	/* NULL address */
+	setup.size = 1;		/* enable the CQ */
+	setup.credits = 0;
+
+	/* force SGE to redirect to RspQ and interrupt */
+	setup.credit_thres = 0;	
+	setup.ovfl_mode = 1;
+	return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
+}
+
+static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
+{
+	int err;
+	u64 sge_cmd, ctx0, ctx1;
+	u64 base_addr;
+	struct t3_modify_qp_wr *wqe;
+	struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
+	if (!skb) {
+		DBG("failed in alloc_skb in init_ctrl_qp\n");
+		return -ENOMEM;
+	}
+	err = cxio_hal_init_ctrl_cq(rdev_p);
+	if (err) {
+		DBG("err initializing ctrl_cq, err status =%d\n", err);
+		return err;
+	}
+	rdev_p->ctrl_qp.workq = dma_alloc_coherent(
+					&(rdev_p->rnic_info.pdev->dev),
+					(1 << T3_CTRL_QP_SIZE_LOG2) *
+					sizeof(union t3_wr),
+					&(rdev_p->ctrl_qp.dma_addr), 
+					GFP_KERNEL);
+	if (!rdev_p->ctrl_qp.workq) {
+		DBG("failed to allocate memory for ctrl QP\n");
+		return -ENOMEM;
+	}
+	pci_unmap_addr_set(&rdev_p->ctrl_qp, mapping, 
+			   rdev_p->ctrl_qp.dma_addr);
+	rdev_p->ctrl_qp.doorbell = rdev_p->rnic_info.kdb_addr;
+	memset(rdev_p->ctrl_qp.workq, 0,
+	       (1 << T3_CTRL_QP_SIZE_LOG2) * sizeof(union t3_wr));
+
+	init_MUTEX(&rdev_p->ctrl_qp.sem);
+	init_waitqueue_head(&rdev_p->ctrl_qp.waitq);
+
+	/* update HW Ctrl QP context */
+	base_addr = rdev_p->ctrl_qp.dma_addr;
+	base_addr >>= 12;
+	ctx0 = (V_EC_SIZE((1 << T3_CTRL_QP_SIZE_LOG2)) |
+		V_EC_BASE_LO((u32) base_addr & 0xffff));
+	ctx0 <<= 32;
+	ctx0 |= V_EC_CREDITS(FW_WR_NUM);
+	base_addr >>= 16;
+	ctx1 = (u32) base_addr;
+	base_addr >>= 32;
+	ctx1 |= ((u64) (V_EC_BASE_HI((u32) base_addr & 0xf) | V_EC_RESPQ(0) |
+			V_EC_TYPE(0) | V_EC_GEN(1) |
+			V_EC_UP_TOKEN(FW_RI_TID_START) | F_EC_VALID)) << 32;
+	wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
+	build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0x3, 1,
+		       T3_CTRL_QP_ID, 0x4);
+	sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3;
+	wqe->wrid.id1 = cpu_to_be64(sge_cmd);
+	wqe->ctx1 = cpu_to_be64(ctx1);
+	wqe->ctx0 = cpu_to_be64(ctx0);
+	DBG("CtrlQP dma_addr=0x%llx kaddr=%p size=%d\n",
+	     (u64) rdev_p->ctrl_qp.dma_addr, rdev_p->ctrl_qp.workq,
+	     1 << T3_CTRL_QP_SIZE_LOG2);
+	skb->priority = CPL_PRIORITY_CONTROL;
+	return (t3c_send(rdev_p->t3cdev_p, skb));
+}
+
+static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p)
+{
+	dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
+			  (1UL << T3_CTRL_QP_SIZE_LOG2)
+			  * sizeof(union t3_wr), rdev_p->ctrl_qp.workq,
+			  pci_unmap_addr(&rdev_p->ctrl_qp, mapping));
+	return cxio_hal_clear_qp_ctx(rdev_p, T3_CTRL_QP_ID);
+}
+
+/* write len bytes of data into addr (32B aligned address) 
+ * If data is NULL, clear len byte of memory to zero.
+ * caller aquires the sem before the call
+ */
+static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
+				      u32 len, void *data, int completion)
+{
+	u32 i, nr_wqe, copy_len;
+	u8 *copy_data;
+	u8 wr_len, utx_len;	/* lenght in 8 byte flit */
+	enum t3_wr_flags flag;
+	u64 *wqe;
+	u64 utx_cmd;
+	addr &= 0x7FFFFFF;
+	nr_wqe = len % 96 ? len / 96 + 1 : len / 96;	/* 96B max per WQE */
+	DBG("wptr=%d rptr=%d len=%d, nr_wqe=%d data=%p addr=0x%0x\n",
+	     rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, len, nr_wqe, data,
+	     addr);
+	utx_len = 3;		/* in 32B unit */
+	for (i = 0; i < nr_wqe; i++) {
+		if (Q_FULL(rdev_p->ctrl_qp.rptr, rdev_p->ctrl_qp.wptr,
+		           T3_CTRL_QP_SIZE_LOG2)) {
+			DBG("ctrl_qp full wtpr=0x%0x rptr=0x%0x, "
+			     "wait for more space i=%d\n", rdev_p->ctrl_qp.wptr,
+			     rdev_p->ctrl_qp.rptr, i);
+			return 0;
+			if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
+					     !Q_FULL(rdev_p->ctrl_qp.
+						     rptr,
+						     rdev_p->ctrl_qp.
+						     wptr,
+						     T3_CTRL_QP_SIZE_LOG2))) {
+				DBG("ctrl_qp workq wakeup due to interrupt\n");
+				return -ERESTARTSYS;
+			}
+			DBG("ctrl_qp wakeup, continue posting work request "
+			     "i=%d\n", i);
+		}
+		wqe = (u64 *) (rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr %
+			      (1 << T3_CTRL_QP_SIZE_LOG2)));
+		flag = 0;
+		if (i == (nr_wqe - 1)) {
+			/* last WQE */
+			flag = completion ? T3_COMPLETION_FLAG : 0;
+			if (len % 32)
+				utx_len = len / 32 + 1;
+			else
+				utx_len = len / 32;
+		}
+
+		/* 
+		 * Force a CQE to return the credit to the workq in case 
+		 * we posted more than half the max QP size of WRs 
+		 */
+		if ((i != 0) && 
+		    (i % (((1 << T3_CTRL_QP_SIZE_LOG2)) >> 1) == 0)) {
+			flag = T3_COMPLETION_FLAG;
+			DBG("force a completion at i=%d\n", i);
+		}
+
+		/* build the utx mem command */
+		wqe += (sizeof(struct t3_bypass_wr) >> 3);
+		utx_cmd = (T3_UTX_MEM_WRITE << 28) | (addr + i * 3);
+		utx_cmd <<= 32;
+		utx_cmd |= (utx_len << 28) | ((utx_len << 2) + 1);
+		*wqe = cpu_to_be64(utx_cmd);
+		wqe++;
+		copy_data = (u8 *) data + i * 96;
+		copy_len = len > 96 ? 96 : len;
+
+		/* clear memory content if data is NULL */
+		if (data)
+			memcpy(wqe, copy_data, copy_len);
+		else
+			memset(wqe, 0, copy_len);
+		if (copy_len % 32)
+			memset(((u8 *) wqe) + copy_len, 0,
+			       32 - (copy_len % 32));
+		wr_len = ((sizeof(struct t3_bypass_wr)) >> 3) + 1 + 
+			 (utx_len << 2);
+		wqe = (u64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr %
+			      (1 << T3_CTRL_QP_SIZE_LOG2)));
+
+		/* wptr in the WRID[31:0] */
+		*(wqe + 1) = cpu_to_be64((u64) rdev_p->ctrl_qp.wptr);
+
+		/* 
+		 * This must be the last write with a memory barrier 
+		 * for the genbit 
+		 */
+		build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_BP, flag,
+			       Q_GENBIT(rdev_p->ctrl_qp.wptr,
+					T3_CTRL_QP_SIZE_LOG2), T3_CTRL_QP_ID,
+			       wr_len);
+		if (flag == T3_COMPLETION_FLAG)
+			RING_DOORBELL(rdev_p->ctrl_qp.doorbell, T3_CTRL_QP_ID);
+		len -= 96;
+		rdev_p->ctrl_qp.wptr++;
+	}
+	return 0;
+}
+
+/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl, and pbl_size
+ * OUT: stag index, actual pbl_size, pbl_addr allocated.
+ * TBD: shared memory region support
+ */
+static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
+			 u32 * stag, u8 stag_state, u32 pdid,
+			 enum tpt_mem_type type, enum tpt_mem_perm perm,
+			 u32 zbva, u64 to, u32 len, u8 page_size, u64 * pbl,
+			 u32 * pbl_size, u32 * pbl_addr)
+{
+	int err;
+	struct tpt_entry tpt;
+	u32 stag_idx;
+	u32 wptr;
+	u32 pbl_size_save;
+	stag_state = stag_state > 0;
+	stag_idx = (*stag) >> 8;
+	pbl_size_save = reset_tpt_entry ? 0 : *pbl_size;
+	if ((!reset_tpt_entry) && !(*stag != T3_STAG_UNSET)) {
+		stag_idx = cxio_hal_get_stag(rdev_p->rscp);
+		if (!stag_idx)
+			return -ENOMEM;
+		*stag = (stag_idx << 8) | ((*stag) & 0xFF);
+	}
+	DBG("stag_state=%0x type=%0x pdid=%0x, stag_idx = 0x%x`\n", 
+	    stag_state, type, pdid, stag_idx);
+	
+
+	/* allocate pbl entries if requested size >0 */
+	if (pbl_size_save) {
+
+		/* 
+		 * TBD: pbl resource management.
+		 * For now, give each stag a 2KB pbl region, i.e. 256 pages 
+		 */
+		if ((*pbl_size) > 256) {
+			DBG("TBD: PBL allocation failure: fixed 256 entries "
+			     "for now\n");
+			return -ENOMEM;
+		}
+		*pbl_addr = (stag_idx << 8);
+
+		/* update the actual pbl_size allocated */
+		*pbl_size = 256;
+	}
+	down_interruptible(&rdev_p->ctrl_qp.sem);
+
+	/* write PBL first if any - update pbl only if pbl list exist */
+	if (pbl) {
+
+		DBG("*pdb_addr %x, pbl_base %x, pbl_size_save %d\n",
+			*pbl_addr, rdev_p->rnic_info.pbl_base, pbl_size_save);
+		err = cxio_hal_ctrl_qp_write_mem(rdev_p, ((*pbl_addr) >> 2) + 
+				(rdev_p->rnic_info.pbl_base >> 5),
+				(pbl_size_save << 3), pbl, 0);
+		if (err)
+			goto ret;
+	}
+
+	/* write TPT entry */
+	if (reset_tpt_entry) {
+		memset(&tpt, 0, sizeof(tpt));
+	} else {
+		tpt.valid_stag_pdid = cpu_to_be32(F_TPT_VALID |
+				V_TPT_STAG_KEY((*stag) & M_TPT_STAG_KEY) |
+				V_TPT_STAG_STATE(stag_state) |
+				V_TPT_STAG_TYPE(type) | V_TPT_PDID(pdid));
+		BUG_ON(page_size >= 28);
+		tpt.flags_pagesize_qpid = cpu_to_be32(V_TPT_PERM(perm) | 
+			    	F_TPT_MW_BIND_ENABLE |
+				V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) |
+				V_TPT_PAGE_SIZE(page_size));
+		tpt.rsvd_pbl_addr = pbl_size_save ? 
+				    cpu_to_be32(V_TPT_PBL_ADDR(*pbl_addr)) : 0;
+		tpt.len = cpu_to_be32(len);
+		tpt.va_hi = cpu_to_be32((u32) (to >> 32));
+		tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL));
+		tpt.rsvd_bind_cnt_or_pstag = 0;
+		tpt.rsvd_pbl_size = pbl_size_save ?
+			    cpu_to_be32(V_TPT_PBL_SIZE((*pbl_size) >> 2)) : 0;
+	}
+	err = cxio_hal_ctrl_qp_write_mem(rdev_p,
+				       stag_idx +
+				       (rdev_p->rnic_info.tpt_base >> 5),
+				       sizeof(tpt), &tpt, 1);
+
+	/* release the stag index to free pool */
+	if (reset_tpt_entry)
+		cxio_hal_put_stag(rdev_p->rscp, stag_idx);
+ret:	
+	wptr = rdev_p->ctrl_qp.wptr;
+	up(&rdev_p->ctrl_qp.sem);
+	if (!err) {
+		if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
+					     SEQ32_GE(rdev_p->ctrl_qp.rptr,
+						      wptr)))
+			return -ERESTARTSYS;
+	}
+	return err;
+}
+
+/* IN : stag key, pdid, pbl_size
+ * Out: stag index, actaul pbl_size, and pbl_addr allocated. 
+ */
+int cxio_allocate_stag(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid,
+		       enum tpt_mem_perm perm, u32 * pbl_size, u32 * pbl_addr)
+{
+	*stag = T3_STAG_UNSET;
+	return (__cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_NON_SHARED_MR, 
+			      perm, 0, 0ULL, 0, 0, NULL, pbl_size, pbl_addr));
+}
+
+int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid,
+			   enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
+			   u8 page_size, u64 * pbl, u32 * pbl_size,
+			   u32 * pbl_addr)
+{
+	*stag = T3_STAG_UNSET;
+	return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
+			     zbva, to, len, page_size, pbl, pbl_size, pbl_addr);
+}
+
+int cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid,
+			   enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
+			   u8 page_size, u64 * pbl, u32 * pbl_size,
+			   u32 * pbl_addr)
+{
+	return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
+			     zbva, to, len, page_size, pbl, pbl_size, pbl_addr);
+}
+
+int cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag)
+{
+	/* TBD: check if there is any MW bound to the MR */
+	return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL,
+			     NULL, NULL);
+}
+
+int cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid)
+{
+	u32 pbl_size = 0;
+	*stag = T3_STAG_UNSET;
+	return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0,
+			     NULL, &pbl_size, NULL);
+}
+
+int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag)
+{
+	return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL,
+			     NULL, NULL);
+}
+
+int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
+{
+	struct t3_rdma_init_wr *wqe;
+	struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+	DBG("%s %d\n", __FUNCTION__, __LINE__);
+	wqe = (struct t3_rdma_init_wr *) __skb_put(skb, sizeof(*wqe));
+	wqe->wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_INIT));
+	wqe->wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(attr->tid) |
+					   V_FW_RIWR_LEN(sizeof(*wqe) >> 3));
+	wqe->wrid.id1 = 0;
+	wqe->qpid = cpu_to_be32(attr->qpid);
+	wqe->pdid = cpu_to_be32(attr->pdid);
+	wqe->scqid = cpu_to_be32(attr->scqid);
+	wqe->rcqid = cpu_to_be32(attr->rcqid);
+	wqe->rq_addr = cpu_to_be32(attr->rq_addr);
+	wqe->rq_size = cpu_to_be32(attr->rq_size);
+	wqe->mpaattrs = attr->mpaattrs;
+	wqe->qpcaps = attr->qpcaps;
+	wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss);
+	wqe->rqes_posted = cpu_to_be32(attr->rqes_posted);
+	wqe->ord = cpu_to_be32(attr->ord);
+	wqe->ird = cpu_to_be32(attr->ird);
+	wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr);
+	wqe->qp_dma_size = cpu_to_be32(attr->qp_dma_size);
+	wqe->rsvd = 0;
+	skb->priority = 0;	/* 0=>ToeQ; 1=>CtrlQ */
+	return (t3c_send(rdev_p->t3cdev_p, skb));
+}
+
+void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb)
+{
+	cxio_ev_cb = ev_cb;
+}
+
+void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb)
+{
+	cxio_ev_cb = NULL;
+}
+
+static int cxio_hal_ev_handler(struct t3cdev *t3cdev_p, struct sk_buff *skb)
+{
+	static int cnt;
+	struct cxio_rdev *rdev_p = NULL;
+	struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data;
+	DBG("%d: cxio_hal_ev_handler being called for CQ_ID(%d), "
+	     "overflow=%0x, notify=%0x with CQE:\n", cnt, 
+	     be16_to_cpu(rsp_msg->cq_id), rsp_msg->cq_overflow, 
+			      rsp_msg->cq_notify);
+	DBG("QPID=%0x genbit=%0x type=%0x Status=%0x opcode=%0x "
+	     "len=%0x wrid_hi_stag=%x wrid_low_msn=%x\n", 
+	     CQE_QPID(rsp_msg->cqe), CQE_GENBIT(rsp_msg->cqe), 
+	     CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe), 
+	     CQE_OPCODE(rsp_msg->cqe), CQE_LEN(rsp_msg->cqe), 
+	     CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
+	rdev_p = (struct cxio_rdev *)t3cdev_p->ulp;
+	if (!rdev_p) {
+		DBG("cxio_hal_ev_handler called by t3cdev (%p) with null!\n",
+		     t3cdev_p);
+		return 0;
+	}
+	if (CQE_QPID(rsp_msg->cqe) == T3_CTRL_QP_ID) {
+		rdev_p->ctrl_qp.rptr = CQE_WRID_LOW(rsp_msg->cqe) + 1;
+		wake_up_interruptible(&rdev_p->ctrl_qp.waitq);
+		dev_kfree_skb_irq(skb);
+	} else if (cxio_ev_cb) {
+		(*cxio_ev_cb) (rdev_p, skb);
+	} else {
+		dev_kfree_skb_irq(skb);
+	}
+	DBG("ev call back wptr=%d rptr=%d\n", rdev_p->ctrl_qp.wptr,
+	       rdev_p->ctrl_qp.rptr);
+	cnt++;
+	return 0;
+}
+
+/* Caller takes care of locking if needed */
+int cxio_rdev_open(struct cxio_rdev *rdev_p)
+{
+	struct net_device *netdev_p = NULL;
+	int err = 0;
+	if (strlen(rdev_p->dev_name)) {
+		if (cxio_hal_find_rdev_by_name(rdev_p->dev_name)) {
+			return -EBUSY;
+		}
+		netdev_p = dev_get_by_name(rdev_p->dev_name);
+		if (!netdev_p) {
+			DBG("dev_get_by_name(%s) failed\n", rdev_p->dev_name);
+			return -EINVAL;
+		}
+		dev_put(netdev_p);
+	} else if (rdev_p->t3cdev_p) {
+		if (cxio_hal_find_rdev_by_t3cdev(rdev_p->t3cdev_p)) {
+			return -EBUSY;
+		}
+		netdev_p = rdev_p->t3cdev_p->lldev;
+		strncpy(rdev_p->dev_name, rdev_p->t3cdev_p->name,
+			T3_MAX_DEV_NAME_LEN);
+	} else {
+		DBG("t3cdev_p or dev_name must be set\n");
+		return -EINVAL;
+	}
+
+	if (cxio_hal_add_rdev(rdev_p)) {
+		DBG("max number of RNIC supported exceeded\n");
+		return -ENOMEM;
+	}
+
+	DBG("opening rnic dev %s\n", rdev_p->dev_name);
+	memset(&rdev_p->ctrl_qp, 0, sizeof(rdev_p->ctrl_qp));
+	if (!rdev_p->t3cdev_p)
+		rdev_p->t3cdev_p = T3CDEV(netdev_p);
+	rdev_p->t3cdev_p->ulp = (void *) rdev_p;
+	err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_GET_PARAMS,
+					 &(rdev_p->rnic_info));
+	if (err) {
+		printk("%s t3cdev_p(%p)->ctl returned error %d.\n",
+		     __FUNCTION__, rdev_p->t3cdev_p, err);
+		goto err1;
+	}
+	DBG("rnic %s info: tpt_base=0x%0x tpt_top=0x%0x pbl_base=0x%0x "
+	     "pbl_top=0x%0x rqt_base=0x%0x, rqt_top=0x%0x\n", 
+	     rdev_p->dev_name, rdev_p->rnic_info.tpt_base, 
+	     rdev_p->rnic_info.tpt_top, rdev_p->rnic_info.pbl_base, 
+	     rdev_p->rnic_info.pbl_top, rdev_p->rnic_info.rqt_base,
+	     rdev_p->rnic_info.rqt_top);
+	DBG("udbell_len=0x%0x udbell_physbase=0x%lx "
+	     "kdb_addr=%p\n", rdev_p->rnic_info.udbell_len, 
+	     rdev_p->rnic_info.udbell_physbase, rdev_p->rnic_info.kdb_addr);
+
+	err = cxio_hal_init_ctrl_qp(rdev_p);
+	if (err) {
+		printk("%s error %d initializing ctrl_qp.\n", 
+		       __FUNCTION__, err);
+		goto err1;
+	}
+	err = cxio_hal_init_resource(&rdev_p->rscp, T3_MAX_NUM_STAG, 0,
+					  0, T3_MAX_NUM_QP, T3_MAX_NUM_CQ,
+					  T3_MAX_NUM_PD);
+	if (err) {
+		printk(KERN_ERR "%s error %d initializing hal resources.\n", 
+		       __FUNCTION__, err);
+		goto err2;
+	}
+	return 0;
+err2:
+	cxio_hal_destroy_ctrl_qp(rdev_p);
+err1:
+	cxio_hal_delete_rdev(rdev_p);
+	return err;
+}
+
+void cxio_rdev_close(struct cxio_rdev *rdev_p)
+{
+	if (rdev_p) {
+		cxio_hal_delete_rdev(rdev_p);
+		rdev_p->t3cdev_p->ulp = NULL;
+		cxio_hal_destroy_ctrl_qp(rdev_p);
+		cxio_hal_destroy_resource(rdev_p->rscp);
+	}
+}
+
+int __init cxio_hal_init(void)
+{
+	if (cxio_hal_init_rhdl_resource(T3_MAX_NUM_RI))
+		return -ENOMEM;
+	memset(rdev_tbl, 0, T3_MAX_NUM_RNIC * sizeof(void *));
+	t3_register_cpl_handler(CPL_ASYNC_NOTIF, cxio_hal_ev_handler);
+	return 0;
+}
+
+void __exit cxio_hal_exit(void)
+{
+	int i;
+	for (i = 0; i < T3_MAX_NUM_RNIC; i++) {
+		cxio_rdev_close(rdev_tbl[i]);
+	}
+	cxio_hal_destroy_rhdl_resource();
+}
+
+int cxio_peek_cq(struct t3_wq *wq, struct t3_cq *cq, int cqe_opcode) 
+{
+	struct t3_cqe *peek_cqe;
+	u32 peekptr;
+
+	peekptr = cq->rptr;
+	peek_cqe = cq->queue + Q_PTR2IDX(peekptr, cq->size_log2);
+
+	/* 
+	 * see if the cqe with the requested opcode is here already. 
+	 */
+	while (CQ_VLD_ENTRY(peekptr, cq->size_log2, peek_cqe)) {
+		if ((RQ_TYPE(*peek_cqe)) &&
+		    (CQE_OPCODE(*peek_cqe) == cqe_opcode) &&
+		    (CQE_QPID(*peek_cqe) == wq->qpid)) {
+			return 0;
+		} else {
+			++(peekptr);
+			peek_cqe = cq->queue +
+			    Q_PTR2IDX(peekptr, cq->size_log2);
+		}
+		if (peekptr == cq->rptr) {	/* CQ full */
+			/* Don't handle error here */
+			/* Don't reset timer */
+			return 0;
+		}
+	}
+
+	/*
+ 	 * The opcode was not found
+ 	 */
+	return -EAGAIN;
+}
+
+static inline void create_read_req_cqe(struct t3_rdma_read_wr *wr, 
+				       struct t3_cqe *response_cqe, 
+			               struct t3_cqe *read_cqe)
+{
+	DBG("%s %d enter\n", __FUNCTION__, __LINE__);
+
+	/* 
+	 * Now that we found the read response cqe,
+	 * we build a proper read request sq cqe to
+	 * return to the user, using the read request WR
+	 * and bits of the read response cqe.
+	 */
+	read_cqe->header = 
+		V_CQE_STATUS(CQE_STATUS(*response_cqe)) |
+		V_CQE_OPCODE(T3_READ_REQ) |
+		V_CQE_TYPE(1) |
+		V_CQE_QPID(CQE_QPID(*response_cqe));
+	read_cqe->header = cpu_to_be32(read_cqe->header);
+	CQE_WRID_SQ_WPTR(*read_cqe) = wr->wrid.id0.hi;
+	CQE_WRID_WPTR(*read_cqe) = wr->wrid.id0.low;
+	read_cqe->len = wr->local_len;	/* XXX Violates RDMAC but matches IB */
+}
+
+/*
+ * Slow path poll code.
+ */
+int __cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq,
+		   struct t3_cqe *cqe, u8 * cqe_flushed,
+		   u64 * cookie, u32 * credit)
+{
+	int ret = 0;
+	struct t3_cqe *rd_cqe, *peek_cqe, read_cqe;
+	u32 peekptr;
+
+	rd_cqe = cxio_next_cqe(cq);
+
+	BUG_ON(!rd_cqe);
+
+	/* 
+	 * skip cqe's not affiliated with a QP.
+	 */
+	if (wq == NULL) {
+		ret = -1;
+		goto skip_cqe;
+	}
+
+	/*
+	 * If this CQE was already returned (out of order completion)
+	 * then silently toss it.
+	 */
+	if (CQE_OPCODE(*rd_cqe) == T3_READ_RESP && 
+	    (!wq->sq_oldest_wr || 
+	     (wq->sq_oldest_wr->send.rdmaop != T3_READ_REQ))) {
+		DBG("%s %d dropping old read response cqe\n", 
+		    __FUNCTION__, __LINE__);
+		ret = -1;
+		goto skip_cqe;
+	}
+
+	if (CQE_OPCODE(*rd_cqe) == T3_TERMINATE) {
+		ret = -1;
+		wq->error = 1;
+		goto skip_cqe;
+	}
+
+	if (CQE_STATUS(*rd_cqe) || wq->error) {
+		ret = 0;
+		*cqe_flushed = wq->error;
+		wq->error = 1;
+	
+		/* 
+		 * T3A inserts errors into the CQE.  We cannot return 
+	 	 * these as work completions.
+	 	 */
+		/* incoming write failures */
+		if ((CQE_OPCODE(*rd_cqe) == T3_RDMA_WRITE) 
+		     && RQ_TYPE(*rd_cqe)) {
+			ret = -1;
+			goto skip_cqe;
+		}
+		/* incoming read request failures */
+		if ((CQE_OPCODE(*rd_cqe) == T3_READ_RESP) && SQ_TYPE(*rd_cqe)) {
+			ret = -1;
+			goto skip_cqe;
+		}
+
+		/* incoming SEND with no receive posted failures */
+		if ((CQE_OPCODE(*rd_cqe) == T3_SEND) && RQ_TYPE(*rd_cqe) &&
+		    Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) {
+			ret = -1;
+			goto skip_cqe;
+		}
+		goto proc_cqe;
+	}
+
+	/*
+	 * If this WQ's oldest pending SQ WR is a read request, then we
+	 * must try and find the RQ Read Response which might not
+	 * be the next CQE for that WQ on the CQ (reads can complete
+	 * out of order). If its not in the CQ yet, then we must return 
+	 * "empty".  This ensures we don't complete a subsequent WR 
+	 * out of order...
+	 */
+
+	/*
+	 * XXX This stalls the CQ for all QPs.  Need to redesign this later
+	 * to only stall the WQ in question.  
+	 */
+	if (wq->sq_oldest_wr && 
+	    (wq->sq_oldest_wr->send.rdmaop == T3_READ_REQ)) {
+		DBG("%s %d oldest wr is read!\n", __FUNCTION__, __LINE__);
+		peekptr = cq->rptr;
+		peek_cqe = cq->queue + Q_PTR2IDX(peekptr, cq->size_log2);
+
+		/* 
+		 * see if the read response is here already. 
+		 */
+		while (CQ_VLD_ENTRY(peekptr, cq->size_log2, peek_cqe)) {
+			if ((RQ_TYPE(*peek_cqe)) &&
+			    (CQE_OPCODE(*peek_cqe) == T3_READ_RESP) &&
+			    (CQE_QPID(*peek_cqe) == wq->qpid)) {
+				create_read_req_cqe(&wq->sq_oldest_wr->read, 
+						    peek_cqe, &read_cqe);
+				rd_cqe = &read_cqe;
+				ret = 0;
+				goto proc_cqe;
+			} else {
+				++peekptr;
+				peek_cqe = cq->queue +
+				    Q_PTR2IDX(peekptr, cq->size_log2);
+			}
+			if (peekptr == cq->rptr) {	/* CQ full */
+				wq->error = 1;
+				*cqe_flushed = 1;
+				ret = 0;
+				goto proc_cqe;
+			}
+		}
+
+		/*
+	 	 * The read response hasn't happened, so we cannot return
+		 * any other completion event for this WQ.
+	 	 */
+		ret = -1;
+		goto ret_cqe;
+	}
+	
+	/* 
+	 * HW only validates 4 bits of MSN.  So we must validate that
+	 * the MSN in the SEND is the next expected MSN.  If its not,
+	 * then we complete this with TPT_ERR_MSN and mark the wq in error.
+ 	 */
+	if (RQ_TYPE(*rd_cqe) && (CQE_WRID_MSN(*rd_cqe) != (wq->rq_rptr + 1))) {
+		ret = 0;
+		wq->error = 1;
+		(*rd_cqe).header = cpu_to_be32(cpu_to_be32((*rd_cqe).header) | 
+			        	       V_CQE_STATUS(TPT_ERR_MSN));
+		goto proc_cqe;
+	}
+
+proc_cqe:
+	*cqe = *rd_cqe;
+
+	/*
+	 * Reap the associated WR(s) that are freed up with this
+	 * completion.
+	 */
+	if (SQ_TYPE(*rd_cqe)) {
+		BUG_ON(!wq->sq_oldest_wr);
+		wq->sq_rptr = CQE_WRID_SQ_WPTR(*rd_cqe) + 1;
+		BUG_ON((wq->sq_oldest_wr-wq->queue) != 
+		       Q_PTR2IDX(CQE_WRID_WPTR(*rd_cqe), wq->size_log2));
+		*cookie = wq->queue[Q_PTR2IDX(CQE_WRID_WPTR(*rd_cqe), 
+					      wq->size_log2)
+				   ].flit[T3_SQ_COOKIE_FLIT];
+		wq->sq_oldest_wr = next_sq_wr(wq);
+	} else {
+		*cookie = wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)];
+		++(wq->rq_rptr);
+	}
+
+	/* If we created a READ_REQ CQE, don't skip this one */
+	if (rd_cqe == &read_cqe)
+		goto ret_cqe;
+skip_cqe:
+	if (SW_CQE(*rd_cqe)) {
+		DBG("skip sw cqe sw_rptr %x\n", cq->sw_rptr);
+		++cq->sw_rptr;
+	} else {
+		DBG("cq %p cqid %d skip hw cqe rptr %x\n", cq, cq->cqid, 
+		    cq->rptr);
+		++cq->rptr;
+
+		/*
+		 * compute credits.
+		 */
+		if (((cq->rptr - cq->wptr) > (1 << (cq->size_log2 - 1)))
+		    || ((cq->rptr - cq->wptr) >= 128)) {
+			*credit = cq->rptr - cq->wptr;
+			cq->wptr = cq->rptr;
+		}
+	}
+
+ret_cqe:
+	return ret;
+}
+
+EXPORT_SYMBOL(__cxio_poll_cq);
+EXPORT_SYMBOL(cxio_peek_cq);
+EXPORT_SYMBOL(cxio_hal_cq_op);
+EXPORT_SYMBOL(cxio_hal_clear_qp_ctx);
+EXPORT_SYMBOL(cxio_create_cq);
+EXPORT_SYMBOL(cxio_destroy_cq);
+EXPORT_SYMBOL(cxio_resize_cq);
+EXPORT_SYMBOL(cxio_create_qp);
+EXPORT_SYMBOL(cxio_destroy_qp);
+EXPORT_SYMBOL(cxio_allocate_stag);
+EXPORT_SYMBOL(cxio_register_phys_mem);
+EXPORT_SYMBOL(cxio_reregister_phys_mem);
+EXPORT_SYMBOL(cxio_dereg_mem);
+EXPORT_SYMBOL(cxio_allocate_window);
+EXPORT_SYMBOL(cxio_deallocate_window);
+EXPORT_SYMBOL(cxio_rdma_init);
+EXPORT_SYMBOL(cxio_hal_get_rhdl);
+EXPORT_SYMBOL(cxio_hal_put_rhdl);
+EXPORT_SYMBOL(cxio_hal_get_pdid);
+EXPORT_SYMBOL(cxio_hal_put_pdid);
+EXPORT_SYMBOL(cxio_register_ev_cb);
+EXPORT_SYMBOL(cxio_unregister_ev_cb);
+EXPORT_SYMBOL(cxio_rdev_open);
+EXPORT_SYMBOL(cxio_rdev_close);
diff --git a/drivers/infiniband/hw/cxgb3/core/cxio_hal.h b/drivers/infiniband/hw/cxgb3/core/cxio_hal.h
new file mode 100644
index 0000000..37db2b5
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/core/cxio_hal.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef  __CXIO_HAL_H__
+#define  __CXIO_HAL_H__
+
+#include "t3_cpl.h"
+#include "defs.h"
+#include "t3cdev.h"
+#include "cxgb3_ctl_defs.h"
+#include "cxio_wr.h"
+
+#define T3_CTRL_QP_ID    FW_RI_SGEEC_START
+#define T3_CTL_QP_TID	 FW_RI_TID_START
+#define T3_CTRL_QP_SIZE_LOG2  10
+#define T3_CTRL_CQ_ID    0
+
+/* TBD */
+#define T3_MAX_NUM_RNIC  8
+#define T3_MAX_NUM_RI (1<<15)
+#define T3_MAX_NUM_QP (1<<15)
+#define T3_MAX_NUM_CQ (1<<15)
+#define T3_MAX_NUM_PD (1<<15)
+#define T3_MAX_NUM_STAG (1<<13)
+#define T3_MAX_PBL_SIZE 256
+#define T3_MAX_RQ_SIZE 1024
+
+#define T3_STAG_UNSET 0xffffffff
+
+#define T3_MAX_DEV_NAME_LEN 32
+
+struct cxio_hal_ctrl_qp {
+	u32 wptr;
+	u32 rptr;
+	struct semaphore sem;	/* for the wtpr, can sleep */
+	wait_queue_head_t waitq;	/* wait for RspQ/CQE msg */
+	union t3_wr *workq;	/* the work request queue */
+	dma_addr_t dma_addr;	/* pci bus address of the workq */
+	DECLARE_PCI_UNMAP_ADDR(mapping)
+	void __iomem *doorbell;
+};
+
+struct cxio_hal_resource {
+	struct kfifo *tpt_fifo;
+	spinlock_t tpt_fifo_lock;
+	struct kfifo *qpid_fifo;
+	spinlock_t qpid_fifo_lock;
+	struct kfifo *cqid_fifo;
+	spinlock_t cqid_fifo_lock;
+	struct kfifo *pdid_fifo;
+	spinlock_t pdid_fifo_lock;
+};
+
+struct cxio_rdev {
+	char dev_name[T3_MAX_DEV_NAME_LEN];
+	struct t3cdev *t3cdev_p;
+	struct rdma_info rnic_info;
+	struct cxio_hal_resource *rscp;
+	struct cxio_hal_ctrl_qp ctrl_qp;
+	void *ulp;
+};
+
+typedef void (*cxio_hal_ev_callback_func_t) (struct cxio_rdev * rdev_p,
+					     struct sk_buff * skb);
+
+struct respQ_msg_t {
+	u32 opaque0:32;
+	u32 opaque1:8;
+	u32 cq_overflow:1;	/* bit 16 */
+	u32 opaque2:7;
+	u32 opaque3:16;
+
+	u32 opaque4:2;
+	u32 cq_notify:1;	/* bit 58 */
+	u32 opaque5:5;
+	u32 opaque6:24;
+	u32 opaque7:16;
+	u32 cq_id:16;		/* bit [15:0] */
+
+	struct t3_cqe cqe;
+};
+
+enum t3_cq_opcode {
+	CQ_ARM_AN = 0x2,
+	CQ_ARM_SE = 0x6,
+	CQ_FORCE_AN = 0x3,
+	CQ_CREDIT_UPDATE = 0x7
+};
+
+int cxio_rdev_open(struct cxio_rdev *rdev);
+void cxio_rdev_close(struct cxio_rdev *rdev);
+int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq, 
+	 	   enum t3_cq_opcode op, u32 credit);
+int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev, u32 qpid);
+int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
+int cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
+int cxio_resize_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
+int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq);
+int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq);
+int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode);
+int cxio_allocate_stag(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
+		       enum tpt_mem_perm perm, u32 * pbl_size, u32 * pbl_addr);
+int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
+			   enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
+			   u8 page_size, u64 * pbl, u32 * pbl_size,
+			   u32 * pbl_addr);
+int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
+			   enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
+			   u8 page_size, u64 * pbl, u32 * pbl_size,
+			   u32 * pbl_addr);
+int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag);
+int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid);
+int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag);
+int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr);
+void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
+void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
+u32 cxio_hal_get_rhdl(void);
+void cxio_hal_put_rhdl(u32 rhdl);
+u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp);
+void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid);
+int __init cxio_hal_init(void);
+void __exit cxio_hal_exit(void);
+void cxio_flush_rq(struct cxio_rdev *dev, struct t3_wq *wq, struct t3_cq *cq);
+void cxio_flush_sq(struct cxio_rdev *dev, struct t3_wq *wq, struct t3_cq *cq);
+
+#define DBG(fmt, args...) pr_debug("iw_cxgb3: " fmt, ## args)
+
+#ifdef DEBUG
+void cxio_dump_tpt(struct cxio_rdev *rev, u32 stag);
+void cxio_dump_pbl(struct cxio_rdev *rev, u32 pbl_addr, uint len, u8 shift);
+void cxio_dump_wqe(union t3_wr *wqe);
+void cxio_dump_wce(struct t3_cqe *wce);
+void cxio_dump_rqt(struct cxio_rdev *rdev, u32 hwtid, int nents);
+void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid);
+#endif
+
+#endif




More information about the general mailing list