[openib-general] [PATCH v2 02/14] CXGB3 QP and CQ.

Steve Wise swise at opengridcomputing.com
Fri Jun 23 07:29:34 PDT 2006


This patch contains qp and cq manipulation code.

ISSUE:  CQs can overflow in with the T3A hardware.  There is
no way around this for now.  The next spin of the T3 hardware
will resolve this issue and the driver will be updated.

ISSUE: QP termination/WR flushing not handled correctly.  Need firmware
support to finalize this.
---

 drivers/infiniband/hw/cxgb3/iwch_cq.c   |  228 +++++++
 drivers/infiniband/hw/cxgb3/iwch_qp.c   | 1006 +++++++++++++++++++++++++++++++
 drivers/infiniband/hw/cxgb3/iwch_user.h |   62 ++
 3 files changed, 1296 insertions(+), 0 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c
new file mode 100644
index 0000000..303b7f2
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "iwch_provider.h"
+#include "iwch.h"
+
+/*
+ * Get one cq entry from cxio and map it to openib.
+ *
+ * Returns:
+ * 	0 			EMPTY;
+ *	1			cqe returned
+ *	-EAGAIN 		caller must try again
+ * 	any other -errno	fatal error
+ */
+int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
+		     struct ib_wc *wc)
+{
+	struct iwch_qp *qhp = NULL;
+	struct t3_cqe cqe, *rd_cqe;
+	struct t3_wq *wq;
+	u32 credit = 0;
+	u8 cqe_flushed;
+	u64 cookie;
+	int ret = 1;
+
+	rd_cqe = cxio_next_cqe(&chp->cq);
+
+	if (!rd_cqe)
+		return 0;
+
+	qhp = get_qhp(rhp, CQE_QPID(*rd_cqe));
+	if (!qhp)
+		wq = NULL;
+	else {
+		spin_lock(&qhp->lock);
+		wq = &(qhp->wq);
+	}
+	ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie,
+				   &credit);
+	if (credit) {
+		PDBG("%s updating %d cq credits on id %d\n", __FUNCTION__, 
+		     credit, chp->cq.cqid);
+		cxio_hal_cq_op(&rhp->rdev, &chp->cq, CQ_CREDIT_UPDATE, credit);
+	}
+
+	if (ret) {
+		ret = -EAGAIN;
+		goto out;
+	}
+	ret = 1;
+
+	BUG_ON(!qhp);
+
+	wc->wr_id = cookie;
+	wc->qp_num = qhp->wq.qpid;
+
+	PDBG("%s qpid 0x%x type %d opcode %d status 0x%d wrid hi 0x%x "
+	     "lo %x cookie %llx\n", __FUNCTION__, CQE_QPID(cqe), CQE_TYPE(cqe),
+	     CQE_OPCODE(cqe), CQE_STATUS(cqe), CQE_WRID_HI(cqe),
+	     CQE_WRID_LOW(cqe), cookie);
+
+	if (CQE_TYPE(cqe) == 0) {
+		if (!CQE_STATUS(cqe))
+			wc->byte_len = CQE_LEN(cqe);
+		else
+			wc->byte_len = 0;
+		wc->opcode = IB_WC_RECV;
+	} else {
+		switch (CQE_OPCODE(cqe)) {
+		case T3_RDMA_WRITE:
+			wc->opcode = IB_WC_RDMA_WRITE;
+			break;
+		case T3_READ_REQ:
+			wc->opcode = IB_WC_RDMA_READ;
+			wc->byte_len = CQE_LEN(cqe);
+			break;
+		case T3_SEND:
+		case T3_SEND_WITH_SE:
+			wc->opcode = IB_WC_SEND;
+			break;
+		case T3_BIND_MW:
+			wc->opcode = IB_WC_BIND_MW;
+			break;
+
+		/* these aren't supported yet */
+		case T3_SEND_WITH_INV:
+		case T3_SEND_WITH_SE_INV:
+		case T3_LOCAL_INV:
+		case T3_FAST_REGISTER:
+		default:
+			PDBG("unexpected opcode(0x%0x) in the CQE received "
+			     "for QPID=0x%0x\n", CQE_OPCODE(cqe), 
+			     CQE_QPID(cqe));
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+
+	if (cqe_flushed) {
+		wc->status = IB_WC_WR_FLUSH_ERR;
+	} else {
+		
+		switch (CQE_STATUS(cqe)) {
+		case TPT_ERR_SUCCESS:
+			wc->status = IB_WC_SUCCESS;
+			break;
+		case TPT_ERR_STAG:
+			wc->status = IB_WC_LOC_ACCESS_ERR;
+			break;
+		case TPT_ERR_PDID:
+			wc->status = IB_WC_LOC_PROT_ERR;
+			break;
+		case TPT_ERR_QPID:
+		case TPT_ERR_ACCESS:
+			wc->status = IB_WC_LOC_ACCESS_ERR;
+			break;
+		case TPT_ERR_WRAP:
+			wc->status = IB_WC_GENERAL_ERR;
+			break;
+		case TPT_ERR_BOUND:
+			wc->status = IB_WC_LOC_LEN_ERR;
+			break;
+		case TPT_ERR_INVALIDATE_SHARED_MR:
+		case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
+			wc->status = IB_WC_MW_BIND_ERR;
+			break;
+		case TPT_ERR_CRC:
+		case TPT_ERR_MARKER:
+		case TPT_ERR_PDU_LEN_ERR:
+		case TPT_ERR_OUT_OF_RQE:
+		case TPT_ERR_DDP_VERSION:
+		case TPT_ERR_RDMA_VERSION:
+		case TPT_ERR_DDP_QUEUE_NUM:
+		case TPT_ERR_MSN:
+		case TPT_ERR_TBIT:
+		case TPT_ERR_MO:
+		case TPT_ERR_MSN_RANGE:
+		case TPT_ERR_IRD_OVERFLOW:
+		case TPT_ERR_OPCODE:
+			wc->status = IB_WC_FATAL_ERR;
+			break;
+		default:
+			PDBG("unexpected cqe_status(0x%0x) for QPID=0x(%0x)\n",
+			     CQE_STATUS(cqe), CQE_QPID(cqe));
+			ret = -EINVAL;
+		}
+	}
+out:
+	if (wq)
+		spin_unlock(&qhp->lock);
+	return ret;
+}
+
+int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+{
+	struct iwch_dev *rhp;
+	struct iwch_cq *chp;
+	unsigned long flags;
+	int npolled;
+	int err = 0;
+
+	chp = to_iwch_cq(ibcq);
+	rhp = chp->rhp;
+
+	spin_lock_irqsave(&chp->lock, flags);
+	for (npolled = 0; npolled < num_entries; ++npolled) {
+#ifdef DEBUG
+		int i=0;
+#endif
+
+		/*
+	 	 * Because T3 can post CQEs that are _not_ associated
+	 	 * with a WR, we might have to poll again after removing
+	 	 * one of these.  
+		 */
+		do {
+			err = iwch_poll_cq_one(rhp, chp, wc + npolled);
+#ifdef DEBUG
+			BUG_ON(++i > 1000);
+#endif
+		} while (err == -EAGAIN);
+		if (err <= 0)
+			break;
+	}
+	spin_unlock_irqrestore(&chp->lock, flags);
+
+	if (err < 0)
+		return err;
+	else {
+		return npolled;
+	}
+}
+
+int iwch_modify_cq(struct ib_cq *cq, int cqe)
+{
+	PDBG("iwch_modify_cq: TBD\n");
+	return 0;
+}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
new file mode 100644
index 0000000..f1136c1
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -0,0 +1,1006 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "iwch_provider.h"
+#include "iwch.h"
+#include "iwch_cm.h"
+
+#define NO_SUPPORT -1
+
+static inline int iwch_build_rdma_send(union t3_wr *wqe,
+						    struct ib_send_wr *wr,
+						    u8 * flit_cnt)
+{
+	int i;
+	switch (wr->opcode) {
+	case IB_WR_SEND:
+	case IB_WR_SEND_WITH_IMM:
+		if (wr->send_flags & IB_SEND_SOLICITED)
+			wqe->send.rdmaop = T3_SEND_WITH_SE;
+		else
+			wqe->send.rdmaop = T3_SEND;
+		wqe->send.rem_stag = 0;
+		break;
+#if 0				/* Not currently supported */
+	case TYPE_SEND_INVALIDATE:
+	case TYPE_SEND_INVALIDATE_IMMEDIATE:
+		wqe->send.rdmaop = T3_SEND_WITH_INV;
+		wqe->send.rem_stag = cpu_to_be32(wr->wr.rdma.rkey);
+		break;
+	case TYPE_SEND_SE_INVALIDATE:
+		wqe->send.rdmaop = T3_SEND_WITH_SE_INV;
+		wqe->send.rem_stag = cpu_to_be32(wr->wr.rdma.rkey);
+		break;
+#endif
+	default:
+		break;
+	}
+	if (wr->num_sge > T3_MAX_SGE)
+		return -EINVAL;
+	wqe->send.reserved = 0;
+	if (wr->opcode == IB_WR_SEND_WITH_IMM) {
+		wqe->send.plen = 4;
+		wqe->send.sgl[0].stag = wr->imm_data;
+		wqe->send.sgl[0].len = 0;
+		wqe->send.num_sgle = 0;
+		*flit_cnt = 5;
+	} else {
+		wqe->send.plen = 0;
+		for (i = 0; i < wr->num_sge; i++) {
+			if ((wqe->send.plen + wr->sg_list[i].length) < 
+			    wqe->send.plen) {
+				return -EMSGSIZE;
+			}
+			wqe->send.plen += wr->sg_list[i].length;
+			wqe->send.sgl[i].stag =
+			    cpu_to_be32(wr->sg_list[i].lkey);
+			wqe->send.sgl[i].len =
+			    cpu_to_be32(wr->sg_list[i].length);
+			wqe->send.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr);
+		}
+		wqe->send.plen = cpu_to_be32(wqe->send.plen);
+		wqe->send.num_sgle = cpu_to_be32(wr->num_sge);
+		*flit_cnt = 4 + ((wr->num_sge) << 1);
+	}
+	return 0;
+}
+
+static inline int iwch_build_rdma_write(union t3_wr *wqe,
+							struct ib_send_wr *wr,
+							u8 *flit_cnt)
+{
+	int i;
+	if (wr->num_sge > T3_MAX_SGE)
+		return -EINVAL;
+	wqe->write.rdmaop = T3_RDMA_WRITE;
+	wqe->write.reserved = 0;
+	wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey);
+	wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr);
+
+	wqe->write.num_sgle = wr->num_sge;
+
+	if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
+		wqe->write.plen = cpu_to_be32(4);
+		wqe->write.sgl[0].stag = cpu_to_be32(wr->imm_data);
+		wqe->write.sgl[0].len = 0;
+		wqe->write.num_sgle = 0;
+		*flit_cnt = 6;
+	} else {
+		wqe->write.plen = 0;
+		for (i = 0; i < wr->num_sge; i++) {
+			if ((wqe->send.plen + wr->sg_list[i].length) < 
+			    wqe->send.plen) {
+				return -EMSGSIZE;
+			}
+			wqe->write.plen += wr->sg_list[i].length;
+			wqe->write.sgl[i].stag =
+			    cpu_to_be32(wr->sg_list[i].lkey);
+			wqe->write.sgl[i].len =
+			    cpu_to_be32(wr->sg_list[i].length);
+			wqe->write.sgl[i].to =
+			    cpu_to_be64(wr->sg_list[i].addr);
+		}
+		wqe->write.plen = cpu_to_be32(wqe->write.plen);
+		wqe->write.num_sgle = cpu_to_be32(wr->num_sge);
+		*flit_cnt = 5 + ((wr->num_sge) << 1);
+	}
+	return 0;
+}
+
+static inline int iwch_build_rdma_read(union t3_wr *wqe,
+						    struct ib_send_wr *wr,
+						    u8 *flit_cnt)
+{
+	if (wr->num_sge > 1)
+		return -EINVAL;
+	wqe->read.rdmaop = T3_READ_REQ;
+	wqe->read.reserved = 0;
+	wqe->read.rem_stag = cpu_to_be32(wr->wr.rdma.rkey);
+	wqe->read.rem_to = cpu_to_be64(wr->wr.rdma.remote_addr);
+	wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey);
+	wqe->read.local_len = cpu_to_be32(wr->sg_list[0].length);
+	wqe->read.local_to = cpu_to_be64(wr->sg_list[0].addr);
+	*flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
+	return 0;
+}
+
+/* 
+ * TBD: this is going to be moved to firmware. Missing pdid/qpid check for now.
+ */
+static inline int iwch_sgl2pbl_map(struct iwch_dev *rhp,
+				   struct ib_sge *sg_list, u32 num_sgle,
+				   u32 * pbl_addr, u8 * page_size)
+{
+	int i;
+	struct iwch_mr *mhp;
+	u32 offset;
+	for (i = 0; i < num_sgle; i++) {
+		mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8);
+		if (!mhp)
+			return -EIO;
+		if (!mhp->attr.state)
+			return -EIO;
+		if (mhp->attr.zbva) 
+			return -EIO;
+		if (sg_list[i].addr < mhp->attr.va_fbo)
+			return -EINVAL;
+		if (sg_list[i].addr + ((u64) sg_list[i].length) <
+		    sg_list[i].addr)
+			return -EINVAL;
+		if (sg_list[i].addr + ((u64) sg_list[i].length) >
+		    mhp->attr.va_fbo + ((u64) mhp->attr.len))
+			return -EINVAL;
+		offset = sg_list[i].addr - mhp->attr.va_fbo;
+		offset += ((u32) mhp->attr.va_fbo) %
+		    (1UL << (12 + mhp->attr.page_size));
+		pbl_addr[i] = mhp->attr.pbl_addr +
+		    (offset >> (12 + mhp->attr.page_size));
+		page_size[i] = mhp->attr.page_size;
+	}
+	return 0;
+}
+
+static inline int iwch_build_rdma_recv(struct iwch_dev *rhp,
+						    union t3_wr *wqe,
+						    struct ib_recv_wr *wr)
+{
+	int i, err = 0;
+	u32 pbl_addr[4];
+	u8 page_size[4];
+	if (wr->num_sge > T3_MAX_SGE)
+		return -EINVAL;
+	err = iwch_sgl2pbl_map(rhp, wr->sg_list, wr->num_sge, pbl_addr, 
+			       page_size);
+	if (err)
+		return err;
+	wqe->recv.pagesz[0] = page_size[0];
+	wqe->recv.pagesz[1] = page_size[1];
+	wqe->recv.pagesz[2] = page_size[2];
+	wqe->recv.pagesz[3] = page_size[3];
+	wqe->recv.num_sgle = cpu_to_be32(wr->num_sge);
+	for (i = 0; i < wr->num_sge; i++) {
+		wqe->recv.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey);
+		wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
+		
+		/* to in the WQE == the offset into the page */
+		wqe->recv.sgl[i].to = cpu_to_be64(((u32) wr->sg_list[i].addr) %
+				(1UL << (12 + page_size[i])));
+
+		/* pbl_addr is the adapters address in the PBL */
+		wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]);
+	}
+	for (; i < T3_MAX_SGE; i++) {
+		wqe->recv.sgl[i].stag = 0;
+		wqe->recv.sgl[i].len = 0;
+		wqe->recv.sgl[i].to = 0;
+		wqe->recv.pbl_addr[i] = 0;
+	}
+	return 0;
+}
+
+int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+		      struct ib_send_wr **bad_wr)
+{
+	int err = 0;
+	u8 t3_wr_flit_cnt;
+	enum t3_wr_opcode t3_wr_opcode = 0;
+	enum t3_wr_flags t3_wr_flags;
+	struct iwch_qp *qhp;
+	u32 idx;
+	union t3_wr *wqe;
+	u32 num_wrs;
+	int flag;
+
+	qhp = to_iwch_qp(ibqp);
+	spin_lock_irqsave(&qhp->lock, flag);
+	if (qhp->attr.state > IWCH_QP_STATE_RTS) {
+		spin_unlock_irqrestore(&qhp->lock, flag);
+		return -EINVAL;
+	}
+	num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr, 
+		  qhp->wq.sq_size_log2);
+	if (num_wrs <= 0) {
+		spin_unlock_irqrestore(&qhp->lock, flag);
+		return -ENOMEM;
+	}
+	while (wr) {
+		if (num_wrs == 0) {
+			err = -ENOMEM;
+			*bad_wr = wr;
+			break;
+		}
+		idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
+		wqe = (union t3_wr *) (qhp->wq.queue + idx);
+		t3_wr_flags = 0;
+		if (wr->send_flags & IB_SEND_SOLICITED)
+			t3_wr_flags |= T3_SOLICITED_EVENT_FLAG;
+		if (wr->send_flags & IB_SEND_FENCE)
+			t3_wr_flags |= T3_READ_FENCE_FLAG;
+		if (wr->send_flags & IB_SEND_SIGNALED)
+			t3_wr_flags |= T3_COMPLETION_FLAG;
+		switch (wr->opcode) {
+		case IB_WR_SEND:
+		case IB_WR_SEND_WITH_IMM:
+			t3_wr_opcode = T3_WR_SEND;
+			err = iwch_build_rdma_send(wqe, wr, &t3_wr_flit_cnt);
+			break;
+		case IB_WR_RDMA_WRITE:
+		case IB_WR_RDMA_WRITE_WITH_IMM:
+			t3_wr_opcode = T3_WR_WRITE;
+			err = iwch_build_rdma_write(wqe, wr, &t3_wr_flit_cnt);
+			break;
+		case IB_WR_RDMA_READ:
+			PDBG("%s %d - read sq_wptr %u wptr %u cookie %llx\n",
+				__FUNCTION__, __LINE__, qhp->wq.sq_wptr,
+				qhp->wq.wptr, wr->wr_id);
+			t3_wr_opcode = T3_WR_READ;
+			t3_wr_flags = 0; /* XXX */
+			err = iwch_build_rdma_read(wqe, wr, &t3_wr_flit_cnt);
+			break;
+		default:
+			PDBG("iwch_post_sendq: post of type=0x%0x TBD!\n",
+			     wr->opcode);
+			err = -EINVAL;
+		}
+		if (err) {
+			*bad_wr = wr;
+			break;
+		}
+		wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
+		wqe->send.wrid.id0.low = qhp->wq.wptr;
+		wqe->flit[T3_SQ_COOKIE_FLIT] = wr->wr_id;
+		build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags,
+			       Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
+			       0, t3_wr_flit_cnt);
+		PDBG("%s %d cookie %llx idx 0x%x sq_wptr %x sw_rptr %x wqe %p opcode %d\n", 
+		     __FUNCTION__, __LINE__, wr->wr_id, idx, 
+		     qhp->wq.sq_wptr, qhp->wq.sq_rptr, wqe, t3_wr_opcode);
+		if (!qhp->wq.sq_oldest_wr && 
+		    ((wr->send_flags & IB_SEND_SIGNALED) || 
+		     (wr->opcode == IB_WR_RDMA_READ))) {
+			qhp->wq.sq_oldest_wr = wqe;
+			PDBG("%s %d sq_oldest_wr %p\n", __FUNCTION__, __LINE__,
+				qhp->wq.sq_oldest_wr);
+		}
+		wr = wr->next;
+		num_wrs--;
+		++(qhp->wq.wptr);
+		++(qhp->wq.sq_wptr);
+	}
+	spin_unlock_irqrestore(&qhp->lock, flag);
+	RING_DOORBELL(qhp->wq.doorbell, qhp->wq.qpid);
+	return err;
+}
+
+int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+		      struct ib_recv_wr **bad_wr)
+{
+	int err = 0;
+	struct iwch_qp *qhp;
+	u32 idx;
+	union t3_wr *wqe;
+	u32 num_wrs;
+	int flag;
+
+	qhp = to_iwch_qp(ibqp);
+	spin_lock_irqsave(&qhp->lock, flag);
+	if (qhp->attr.state > IWCH_QP_STATE_RTS) {
+		spin_unlock_irqrestore(&qhp->lock, flag);
+		return -EINVAL;
+	}
+	num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr, 
+			    qhp->wq.rq_size_log2) - 1;
+	if (!wr) {
+		spin_unlock_irqrestore(&qhp->lock, flag);
+		return -EINVAL;
+	}
+	while (wr) {
+		idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
+		wqe = (union t3_wr *) (qhp->wq.queue + idx);
+		if (num_wrs)
+			err = iwch_build_rdma_recv(qhp->rhp, wqe, wr);
+		else
+			err = -ENOMEM;
+		if (err) {
+			*bad_wr = wr;
+			break;
+		}
+		qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, qhp->wq.rq_size_log2)] = 
+			wr->wr_id;
+		build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
+			       Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
+			       0, sizeof(struct t3_receive_wr) >> 3);
+		PDBG("%s %d cookie %llx idx 0x%x rq_wptr %x rw_rptr %x "
+		     "wqe %p \n", __FUNCTION__, __LINE__, wr->wr_id, idx, 
+		     qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe);
+		++(qhp->wq.rq_wptr);
+		++(qhp->wq.wptr);
+		wr = wr->next;
+		num_wrs--;
+	}
+	spin_unlock_irqrestore(&qhp->lock, flag);
+	RING_DOORBELL(qhp->wq.doorbell, qhp->wq.qpid);
+	return err;
+}
+
+int iwch_bind_mw(struct ib_qp *qp,
+			     struct ib_mw *mw,
+			     struct ib_mw_bind *mw_bind)
+{
+	struct iwch_dev *rhp;
+	struct iwch_mw *mhp;
+	struct iwch_qp *qhp;
+	union t3_wr *wqe;
+	u32 pbl_addr;
+	u8 page_size;
+	u32 num_wrs;
+	int flag;
+	struct ib_sge sgl;
+	int err=0;
+	enum t3_wr_flags t3_wr_flags;
+	u32 idx;
+
+	qhp = to_iwch_qp(qp);
+	mhp = to_iwch_mw(mw);
+	rhp = qhp->rhp;
+
+	spin_lock_irqsave(&qhp->lock, flag);
+	if (qhp->attr.state > IWCH_QP_STATE_RTS) {
+		spin_unlock_irqrestore(&qhp->lock, flag);
+		return -EINVAL;
+	}
+	num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr, 
+			    qhp->wq.sq_size_log2);
+	if ((num_wrs) <= 0) {
+		spin_unlock_irqrestore(&qhp->lock, flag);
+		return -ENOMEM;
+	}
+	idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
+	PDBG("%s: idx=0x%0x, mw=0x%p, mw_bind=0x%p\n", __FUNCTION__, idx, 
+	     mw, mw_bind);
+	wqe = (union t3_wr *) (qhp->wq.queue + idx);
+	wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
+	wqe->send.wrid.id0.low = qhp->wq.wptr;
+
+	t3_wr_flags = 0;
+	if (mw_bind->send_flags & IB_SEND_SIGNALED)
+		t3_wr_flags = T3_COMPLETION_FLAG;
+
+        sgl.addr = mw_bind->addr;
+        sgl.lkey = mw_bind->mr->lkey;
+        sgl.length = mw_bind->length;
+        wqe->bind.reserved = 0;
+        wqe->bind.type = T3_VA_BASED_TO;
+
+        /* TBD: check perms */
+        wqe->bind.perms = iwch_convert_access(mw_bind->mw_access_flags);
+        wqe->bind.mr_stag = cpu_to_be32(mw_bind->mr->lkey);
+        wqe->bind.mw_stag = cpu_to_be32(mw->rkey);
+        wqe->bind.mw_len = cpu_to_be32(mw_bind->length);
+        wqe->bind.mw_va = cpu_to_be64(mw_bind->addr);
+        err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size);
+        if (err) {
+		spin_unlock_irqrestore(&qhp->lock, flag);
+                return err;
+	}
+        wqe->bind.mr_pbl_addr = cpu_to_be32(pbl_addr);
+        wqe->bind.mr_pagesz = page_size;
+        wqe->bind.reserved2 = 0;
+	wqe->flit[T3_SQ_COOKIE_FLIT] = mw_bind->wr_id;
+	build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags,
+		       Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0, 
+			        sizeof(struct t3_bind_mw_wr) >> 3);
+
+	if (!qhp->wq.sq_oldest_wr) {
+		qhp->wq.sq_oldest_wr = wqe;
+		PDBG("%s %d sq_oldest_wr %p\n", __FUNCTION__, __LINE__,
+			qhp->wq.sq_oldest_wr);
+	}
+	++(qhp->wq.wptr);
+	++(qhp->wq.sq_wptr);
+	spin_unlock_irqrestore(&qhp->lock, flag);
+
+	RING_DOORBELL(qhp->wq.doorbell, qhp->wq.qpid);
+
+	return err;
+}
+
+int iwch_query_qp(u64 rh, u64 qp_h, enum iwch_qp_query_flags flags,
+			       struct iwch_qp_attributes *attrs)
+{
+	return 0;
+}
+
+
+static inline void build_term_codes(int t3err, u8 *layer_type, u8 *ecode,
+				    int tagged)
+{
+	switch (t3err) {
+	case TPT_ERR_STAG:
+		if (tagged == 1) {
+			*layer_type = LAYER_DDP|DDP_TAGGED_ERR;
+			*ecode = DDPT_INV_STAG;
+		} else if (tagged == 2) {
+			*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+			*ecode = RDMAP_INV_STAG;
+		}
+		break;
+	case TPT_ERR_PDID:
+	case TPT_ERR_QPID:
+	case TPT_ERR_ACCESS:
+		if (tagged == 1) {
+			*layer_type = LAYER_DDP|DDP_TAGGED_ERR;
+			*ecode = DDPT_STAG_NOT_ASSOC;
+		} else if (tagged == 2) {
+			*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+			*ecode = RDMAP_STAG_NOT_ASSOC;
+		}
+		break;
+	case TPT_ERR_WRAP:
+		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+		*ecode = RDMAP_TO_WRAP;
+		break;
+	case TPT_ERR_BOUND:
+		if (tagged == 1) {
+			*layer_type = LAYER_DDP|DDP_TAGGED_ERR;
+			*ecode = DDPT_BASE_BOUNDS;
+		} else if (tagged == 2) {
+			*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+			*ecode = RDMAP_BASE_BOUNDS;
+		} else {
+			*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+			*ecode = DDPU_MSG_TOOBIG;
+		}
+		break;
+	case TPT_ERR_INVALIDATE_SHARED_MR:
+	case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
+		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
+		*ecode = RDMAP_CANT_INV_STAG;
+		break;
+	case TPT_ERR_ECC:
+	case TPT_ERR_ECC_PSTAG:
+	case TPT_ERR_INTERNAL_ERR: 
+		*layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA;
+		*ecode = 0;
+		break;
+	case TPT_ERR_OUT_OF_RQE:
+		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+		*ecode = DDPU_INV_MSN_NOBUF;
+		break;
+	case TPT_ERR_PBL_ADDR_BOUND:
+		*layer_type = LAYER_DDP|DDP_TAGGED_ERR;
+		*ecode = DDPT_BASE_BOUNDS;
+		break;
+	case TPT_ERR_CRC:
+		*layer_type = LAYER_MPA|DDP_LLP;
+		*ecode = MPA_CRC_ERR;
+		break;
+	case TPT_ERR_MARKER:
+		*layer_type = LAYER_MPA|DDP_LLP;
+		*ecode = MPA_MARKER_ERR;
+		break;
+	case TPT_ERR_PDU_LEN_ERR:
+		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+		*ecode = DDPU_MSG_TOOBIG;
+		break;
+	case TPT_ERR_DDP_VERSION:
+		if (tagged) {
+			*layer_type = LAYER_DDP|DDP_TAGGED_ERR; /* XXX */
+			*ecode = DDPT_INV_VERS;
+		} else {
+			*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; /* XXX */
+			*ecode = DDPU_INV_VERS;
+		}
+		break;
+	case TPT_ERR_RDMA_VERSION:
+		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
+		*ecode = RDMAP_INV_VERS;
+		break;
+	case TPT_ERR_OPCODE:
+		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
+		*ecode = RDMAP_INV_OPCODE;
+		break;
+	case TPT_ERR_DDP_QUEUE_NUM:
+		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+		*ecode = DDPU_INV_QN;
+		break;
+	case TPT_ERR_MSN:
+	case TPT_ERR_MSN_GAP:
+	case TPT_ERR_MSN_RANGE:
+	case TPT_ERR_IRD_OVERFLOW:
+		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+		*ecode = DDPU_INV_MSN_RANGE;
+		break;
+	case TPT_ERR_TBIT:
+		*layer_type = LAYER_DDP|DDP_LOCAL_CATA;
+		*ecode = 0;
+		break;
+	case TPT_ERR_MO:
+		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+		*ecode = DDPU_INV_MO;
+		break;
+	default: 
+		*layer_type = LAYER_RDMAP|DDP_LOCAL_CATA;
+		*ecode = 0;
+		break;
+	}
+}
+
+/*
+ * This posts a TERMINATE with layer=RDMA, type=catastrophic.
+ */
+int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
+{
+	int err = 0;
+	u32 idx;
+	union t3_wr *wqe;
+	int num_wrs;
+	int flag;
+	struct terminate_message *term;
+	int status;
+	int tagged = 0;
+
+	PDBG("%s %d\n", __FUNCTION__, __LINE__);
+	spin_lock_irqsave(&qhp->lock, flag);
+	num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr, 
+			    qhp->wq.sq_size_log2);
+	if (num_wrs <= 0) {
+		spin_unlock_irqrestore(&qhp->lock, flag);
+		return -EIO;
+	}
+	idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
+	wqe = (union t3_wr *) (qhp->wq.queue + idx);
+	if (!qhp->wq.sq_oldest_wr) {
+		qhp->wq.sq_oldest_wr = wqe;
+		PDBG("%s %d sq_oldest_wr %p\n", __FUNCTION__, __LINE__,
+			qhp->wq.sq_oldest_wr);
+	}
+	wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
+	wqe->send.wrid.id0.low = qhp->wq.wptr;
+	wqe->send.rdmaop = T3_TERMINATE;
+	wqe->send.rem_stag = 0;
+	wqe->send.reserved = 0;
+	
+	/* indicate data is immediate. */
+	wqe->send.num_sgle = 0;
+
+	/* immediate data length */
+	wqe->send.plen = htonl(4);
+
+	/* immediate data starts here. */
+	term = (struct terminate_message *)wqe->send.sgl;
+	status = rsp_msg ? CQE_STATUS(rsp_msg->cqe) : TPT_ERR_INTERNAL_ERR;
+	if (CQE_OPCODE(rsp_msg->cqe) == T3_RDMA_WRITE)
+		tagged = 1;
+        if ((CQE_OPCODE(rsp_msg->cqe) == T3_READ_REQ) ||
+            (CQE_OPCODE(rsp_msg->cqe) == T3_READ_RESP))
+		tagged = 2;
+	build_term_codes(status, &term->layer_etype, &term->ecode, tagged);
+	term->hdrct_rsvd = 0; /* no header info */
+	
+	wqe->flit[T3_SQ_COOKIE_FLIT] = ~0;
+	build_fw_riwrh((void *)wqe, T3_WR_SEND, 
+		       T3_COMPLETION_FLAG|T3_NOTIFY_FLAG,
+		       Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0, 5);
+	++(qhp->wq.wptr);
+	++(qhp->wq.sq_wptr);
+	spin_unlock_irqrestore(&qhp->lock, flag);
+	RING_DOORBELL(qhp->wq.doorbell, qhp->wq.qpid);
+	return err;
+}
+
+/*
+ * Assumes qhp lock is held.
+ */
+static void flush_qp(struct iwch_qp *qhp, int *flag)
+{
+	struct iwch_cq *rchp, *schp;
+
+	rchp = qhp->rhp->cqid2hlp[qhp->attr.rcq];
+	schp = qhp->rhp->cqid2hlp[qhp->attr.scq];
+	
+	/* take a ref on the qhp since we must release the lock */
+	atomic_inc(&qhp->refcnt);
+	spin_unlock_irqrestore(&qhp->lock, *flag);
+
+	/* locking heirarchy: cq lock first, then qp lock. */
+	spin_lock_irqsave(&rchp->lock, *flag);
+	spin_lock(&qhp->lock);
+	cxio_flush_rq(&qhp->rhp->rdev, &qhp->wq, &rchp->cq);
+	spin_unlock(&qhp->lock);
+	spin_unlock_irqrestore(&rchp->lock, *flag);
+
+	/* locking heirarchy: cq lock first, then qp lock. */
+	spin_lock_irqsave(&schp->lock, *flag);
+	spin_lock(&qhp->lock);
+	cxio_flush_sq(&qhp->rhp->rdev, &qhp->wq, &schp->cq);
+	spin_unlock(&qhp->lock);
+	spin_unlock_irqrestore(&schp->lock, *flag);
+
+	/* deref */
+	if (atomic_dec_and_test(&qhp->refcnt))
+                wake_up(&qhp->wait);
+
+	spin_lock_irqsave(&qhp->lock, *flag);
+}
+
+static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
+				enum iwch_qp_attr_mask mask,
+				struct iwch_qp_attributes *attrs)
+{
+	struct t3_rdma_init_attr init_attr;
+	int ret;
+
+	init_attr.tid = qhp->ep->hwtid;
+	init_attr.qpid = qhp->wq.qpid;
+	init_attr.pdid = qhp->attr.pd;
+	init_attr.scqid = qhp->attr.scq;
+	init_attr.rcqid = qhp->attr.rcq;
+
+	/* TBD!!! rq table slot allocation needs 
+	 * to be implemented in the core driver.
+	 * For now, allocate 1Kx64B for each rq 
+	 */
+	init_attr.rq_addr = (qhp->ep->hwtid) << 16;
+	init_attr.rq_size = 1 << qhp->wq.rq_size_log2;
+
+	PDBG("%s init_attr.rq_size = %d\n", __FUNCTION__, init_attr.rq_size);
+	init_attr.mpaattrs = uP_RI_MPA_IETF_ENABLE | 
+		qhp->attr.mpa_attr.recv_marker_enabled |
+		(qhp->attr.mpa_attr.xmit_marker_enabled << 1) |
+		(qhp->attr.mpa_attr.crc_enabled << 2);
+
+	/* 
+	 * XXX - The IWCM doesn't quite handle getting these
+ 	 * attrs set before going into RTS.  For now, just turn 
+	 * them on always...
+	 */
+#if 0
+	init_attr.qpcaps = qhp->attr.enableRdmaRead |
+		(qhp->attr.enableRdmaWrite << 1) |
+		(qhp->attr.enableBind << 2) |
+		(qhp->attr.enable_stag0_fastreg << 3) |
+		(qhp->attr.enable_stag0_fastreg << 4);
+#else
+	init_attr.qpcaps = 0x1f;
+#endif
+	init_attr.tcp_emss = qhp->ep->emss;
+	init_attr.ord = qhp->attr.max_ord;
+	init_attr.ird = qhp->attr.max_ird;
+	init_attr.qp_dma_addr = qhp->wq.dma_addr;
+	init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
+	init_attr.rqes_posted = Q_EMPTY(qhp->wq.rq_rptr, qhp->wq.rq_wptr) ? 
+				0 : 1;
+	ret = cxio_rdma_init(&rhp->rdev, &init_attr);
+	PDBG("%s ret %d\n", __FUNCTION__, ret);
+	return ret;
+}
+
+int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
+				enum iwch_qp_attr_mask mask,
+				struct iwch_qp_attributes *attrs,
+				int internal)
+{
+	int ret = 0;
+	struct iwch_qp_attributes newattr = qhp->attr;
+	int flag;
+	int disconnect = 0;
+	int terminate = 0;
+	int abort = 0;
+	int free = 0;
+	struct iwch_ep *ep = NULL;
+
+	PDBG("%s %d qhp %p qpid %d ep %p state %d -> %d\n", __FUNCTION__, 
+	     __LINE__, qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state, 
+	     (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
+
+	spin_lock_irqsave(&qhp->lock, flag);
+
+	/* Process attr changes if in IDLE */
+	if (mask & IWCH_QP_ATTR_VALID_MODIFY) {
+		if (qhp->attr.state != IWCH_QP_STATE_IDLE) {
+			ret = -EIO;
+			goto out;
+		}
+		if (mask & IWCH_QP_ATTR_ENABLE_RDMA_READ)
+			newattr.enable_rdma_read = attrs->enable_rdma_read;
+		if (mask & IWCH_QP_ATTR_ENABLE_RDMA_WRITE)
+			newattr.enable_rdma_write = attrs->enable_rdma_write;
+		if (mask & IWCH_QP_ATTR_ENABLE_RDMA_BIND)
+			newattr.enable_bind = attrs->enable_bind;
+		if (mask & IWCH_QP_ATTR_MAX_ORD) {
+			if (attrs->max_ord > 
+			    rhp->attr.max_rdma_read_qp_depth) {
+				ret = -EINVAL;
+				goto out;
+			}
+			newattr.max_ord = attrs->max_ord;
+		}
+		if (mask & IWCH_QP_ATTR_MAX_IRD) {
+			if (attrs->max_ird > 
+		  	    rhp->attr.max_rdma_reads_per_qp) {
+				ret = -EINVAL;
+				goto out;
+			}
+			newattr.max_ird = attrs->max_ird;
+		}
+		qhp->attr = newattr;
+	}
+	
+	if (!(mask & IWCH_QP_ATTR_NEXT_STATE)) 
+		goto out;
+	if (qhp->attr.state == attrs->next_state)
+		goto out;
+
+	switch (qhp->attr.state) {
+	case IWCH_QP_STATE_IDLE:
+		switch (attrs->next_state) {
+		case IWCH_QP_STATE_RTS: 
+			if (!(mask & IWCH_QP_ATTR_LLP_STREAM_HANDLE)) {
+				ret = -EINVAL;
+				goto out;
+			}
+			if (!(mask & IWCH_QP_ATTR_MPA_ATTR)) {
+				ret = -EINVAL;
+				goto out;
+			}
+			qhp->attr.mpa_attr = attrs->mpa_attr;
+			qhp->attr.llp_stream_handle = attrs->llp_stream_handle;
+			qhp->ep = qhp->attr.llp_stream_handle;
+			qhp->attr.state = IWCH_QP_STATE_RTS;
+
+			/*
+			 * Ref the endpoint here and deref when we
+	 		 * disassociate the endpoint from the QP.  This
+			 * happens in CLOSING->IDLE transition or *->ERROR
+			 * transition.
+			 */
+			atomic_inc(&qhp->ep->com.refcnt);
+			spin_unlock_irqrestore(&qhp->lock, flag);
+			ret = rdma_init(rhp, qhp, mask, attrs);
+			spin_lock_irqsave(&qhp->lock, flag);
+			if (ret)
+				goto err;
+			break;
+		case IWCH_QP_STATE_ERROR:
+			qhp->attr.state = IWCH_QP_STATE_ERROR;
+			flush_qp(qhp, &flag);
+			break;
+		default:
+			ret = -EINVAL;	
+			goto out;
+		}
+		break;
+	case IWCH_QP_STATE_RTS:
+		switch (attrs->next_state) {
+		case IWCH_QP_STATE_CLOSING:
+			BUG_ON(atomic_read(&qhp->ep->com.refcnt) < 2);
+			qhp->attr.state = IWCH_QP_STATE_CLOSING;
+			if (Q_EMPTY(qhp->wq.sq_rptr, qhp->wq.sq_wptr)) {
+				if (!internal) {
+					abort=0;
+					disconnect = 1;
+					ep = qhp->ep;
+				}
+			} else {
+				if (!internal) {
+					abort=1;
+					disconnect = 1;
+					ep = qhp->ep;
+				}
+				ret = -EINVAL;
+				goto err;
+			}
+			break;
+		case IWCH_QP_STATE_TERMINATE:
+			qhp->attr.state = IWCH_QP_STATE_TERMINATE;
+			if (!internal) 
+				terminate = 1;
+			break;
+		case IWCH_QP_STATE_ERROR:
+			qhp->attr.state = IWCH_QP_STATE_ERROR;
+			if (!internal) {
+				abort=1;
+				disconnect = 1;
+				ep = qhp->ep;
+			}
+			goto err;
+			break;
+		default:
+			ret = -EINVAL;
+			goto out;
+		}
+		break;
+	case IWCH_QP_STATE_CLOSING:
+		if (!internal) {
+			ret = -EINVAL;
+			goto out;
+		}
+		switch (attrs->next_state) {
+			case IWCH_QP_STATE_IDLE:
+				qhp->attr.state = IWCH_QP_STATE_IDLE;
+				qhp->attr.llp_stream_handle = NULL;
+				free_ep(&qhp->ep->com);
+				qhp->ep = NULL;
+				wake_up(&qhp->wait);
+				break;
+			case IWCH_QP_STATE_ERROR:
+				goto err;
+			default:
+				ret = -EINVAL;
+				goto err;
+		}
+		break;
+	case IWCH_QP_STATE_ERROR:
+		if (attrs->next_state != IWCH_QP_STATE_IDLE) {
+			ret = -EINVAL;
+			goto out;
+		}
+		
+		if (!Q_EMPTY(qhp->wq.sq_rptr, qhp->wq.sq_wptr) || 
+		    !Q_EMPTY(qhp->wq.rq_rptr, qhp->wq.rq_wptr)) {
+			ret = -EINVAL;
+			goto out;
+		}
+		qhp->attr.state = IWCH_QP_STATE_IDLE;
+		memset(&qhp->attr, 0, sizeof(qhp->attr));
+		break;
+	case IWCH_QP_STATE_TERMINATE:
+		if (!internal) {
+			ret = -EINVAL;
+			goto out;
+		}
+		goto err;
+		break;
+	default:
+		printk(KERN_ERR "%s in a bad state %d\n", 
+		       __FUNCTION__, qhp->attr.state);
+		ret = -EINVAL;
+		goto err;
+		break;
+	}
+	goto out;
+err:
+	PDBG("%s disassociating LLP EP %p qpid %d\n", __FUNCTION__, qhp->ep, 
+	     qhp->wq.qpid);
+
+	/* disassociate the LLP connection */
+	qhp->attr.llp_stream_handle = NULL;
+	ep = qhp->ep;
+	qhp->ep = NULL;
+	qhp->attr.state = IWCH_QP_STATE_ERROR;
+	free=1;
+	wake_up(&qhp->wait);
+	BUG_ON(!ep);
+#ifdef notyet
+	flush_qp(qhp, flag);
+#endif
+out:
+	spin_unlock_irqrestore(&qhp->lock, flag);
+
+	if (terminate)
+		iwch_post_terminate(qhp, NULL);
+
+	/*
+	 * If disconnect is 1, then we need to initiate a disconnect
+	 * on the EP.  This can be a normal close (RTS->CLOSING) or
+	 * an abnormal close (RTS/CLOSING->ERROR).
+	 */
+	if (disconnect)
+		iwch_ep_disconnect(ep, abort, GFP_KERNEL);
+
+	/* 
+	 * If free is 1, then we've disassociated the EP from the QP 
+	 * and we need to dereference the EP.
+	 */
+	if (free)
+		free_ep(&ep->com);
+
+	PDBG("%s %d state -> %d\n", __FUNCTION__, __LINE__, qhp->attr.state);
+	return ret;
+}
+
+static int quiesce_qp(struct iwch_qp *qhp)
+{
+	spin_lock_irq(&qhp->lock);
+	iwch_quiesce_tid(qhp->ep);
+	qhp->flags |= QP_QUIESCED;
+	spin_unlock_irq(&qhp->lock);
+	return 0;
+}
+
+static int resume_qp(struct iwch_qp *qhp)
+{
+	spin_lock_irq(&qhp->lock);
+	iwch_resume_tid(qhp->ep);
+	qhp->flags &= ~QP_QUIESCED;
+	spin_unlock_irq(&qhp->lock);
+	return 0;
+}
+
+int iwch_quiesce_qps(struct iwch_cq *chp)
+{
+	int i;
+	struct iwch_qp *qhp;
+
+	for (i=0; i < T3_MAX_NUM_QP; i++) {
+		qhp = chp->rhp->qpid2hlp[i];
+		if (!qhp)
+			continue;
+		if ((qhp->attr.rcq == chp->cq.cqid) && !qp_quiesced(qhp)) {
+			quiesce_qp(qhp);
+			continue;
+		}
+		if ((qhp->attr.scq == chp->cq.cqid) && !qp_quiesced(qhp)) 
+			quiesce_qp(qhp);
+	}
+	return 0;
+}
+
+int iwch_resume_qps(struct iwch_cq *chp)
+{
+	int i;
+	struct iwch_qp *qhp;
+
+	for (i=0; i < T3_MAX_NUM_QP; i++) {
+		qhp = chp->rhp->qpid2hlp[i];
+		if (!qhp)
+			continue;
+		if ((qhp->attr.rcq == chp->cq.cqid) && qp_quiesced(qhp)) {
+			resume_qp(qhp);
+			continue;
+		}
+		if ((qhp->attr.scq == chp->cq.cqid) && qp_quiesced(qhp))
+			resume_qp(qhp);
+	}
+	return 0;
+}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_user.h b/drivers/infiniband/hw/cxgb3/iwch_user.h
new file mode 100644
index 0000000..ab87f72
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch_user.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __IWCH_USER_H__
+#define __IWCH_USER_H__
+
+#define IWCH_UVERBS_ABI_VERSION	1
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+
+struct iwch_create_cq_resp {
+	__u32 cqid;
+	__u32 entries;		/* actual number of entries after creation */
+	__u64 physaddr;		/* library mmaps this to get addressability */
+	__u64 queue;
+};
+
+struct iwch_create_qp_resp {
+	__u32 qpid;
+	__u32 entries;		/* actual number of entries after creation */
+	__u64 physaddr;		/* library mmaps this to get addressability */
+	__u64 physsize;		/* library mmaps this to get addressability */
+	__u64 queue;
+	__u64 sq_db_page;
+	__u64 rq_db_page;
+};
+#endif




More information about the general mailing list