[ofa-general] [PATCH 7/8 V3] core: Add XRC receive-only qp support

Jack Morgenstein jackm at dev.mellanox.co.il
Sat Feb 2 23:49:44 PST 2008


ib/core:  Implement XRC receive-only QPs for userspace apps.

Added creation of XRC receive-only QPs for userspace, which
reside in kernel space (user cannot post-to or poll these QPs).

Motivation:  MPI community requires XRC receive QPs which will
not be destroyed when the creating process terminates.

Solution:  Userspace requests that a QP be created in kernel space.
Each userspace process using that QP (i.e. receiving packets on an XRC SRQ
via the qp), registers with that QP (-- the creator is also registered, whether
or not it is a user of the QP). When the last userspace user unregisters with
the QP, it is destroyed.  Unregistration is also part of userspace process
cleanup, so there is no leakage.

This patch implements the kernel procedures to implement the following
(new) libibverbs API:
ibv_create_xrc_rcv_qp
ibv_modify_xrc_rcv_qp
ibv_query_xrc_rcv_qp
ibv_reg_xrc_rcv_qp
ibv_unreg_xrc_rcv_qp

In addition, the patch implements the foundation for distributing
XRC-receive-only QP events to userspace processes registered with that QP.

Finally, the patch modifies ib_uverbs_close_xrc_domain() to return BUSY
if any resources are still in use by the process, so that the XRC rcv-only
QP cleanup can operate properly.

V2:
Fixed bug in ib_uverbs_close_xrc_domain.
We need to allow the process to successfully close its copy of the domain,
even if it still has undestroyed XRC QPs -- these will continue to operate,
although it will not be possible to create new ones (there will be no Oops).

However, we need to check that there are no outstanding xrc-qp-registrations:
the cleanup procedure for this depends on the xrc domain still being accessible
in this process in order to perform all needed un-registrations (and thus prevent
resource leakage).

V3:
Fix thinko in ib_uverbs_reg_xrc_rcv_qp, ib_uverbs_unreg_xrc_rcv_qp,
and ib_uverbs_modify_xrc_rcv_qp: on success, incorrectly returned 0 
instead of input length.

Signed-off-by: Jack Morgenstein <jackm at dev.mellanox.co.il>

Index: infiniband/include/rdma/ib_verbs.h
===================================================================
--- infiniband.orig/include/rdma/ib_verbs.h	2008-01-28 12:20:55.000000000 +0200
+++ infiniband/include/rdma/ib_verbs.h	2008-01-28 12:22:09.000000000 +0200
@@ -285,6 +285,10 @@ enum ib_event_type {
 	IB_EVENT_CLIENT_REREGISTER
 };
 
+enum ib_event_flags {
+	IB_XRC_QP_EVENT_FLAG = 0x80000000,
+};
+
 struct ib_event {
 	struct ib_device	*device;
 	union {
@@ -292,6 +296,7 @@ struct ib_event {
 		struct ib_qp	*qp;
 		struct ib_srq	*srq;
 		u8		port_num;
+		u32		xrc_qp_num;
 	} element;
 	enum ib_event_type	event;
 };
@@ -492,6 +497,7 @@ enum ib_qp_type {
 
 enum qp_create_flags {
 	QP_CREATE_LSO = 1 << 0,
+	XRC_RCV_QP    = 1 << 1,
 };
 
 struct ib_qp_init_attr {
@@ -723,6 +729,7 @@ struct ib_ucontext {
 	struct list_head	srq_list;
 	struct list_head	ah_list;
 	struct list_head	xrc_domain_list;
+	struct list_head	xrc_reg_qp_list;
 	int			closing;
 };
 
@@ -744,6 +751,12 @@ struct ib_udata {
 	size_t       outlen;
 };
 
+struct ib_uxrc_rcv_object {
+	struct list_head	list;		/* link to context's list */
+	u32			qp_num;
+	u32			domain_handle;
+};
+
 struct ib_pd {
 	struct ib_device       *device;
 	struct ib_uobject      *uobject;
@@ -1053,6 +1066,23 @@ struct ib_device {
 						 struct ib_ucontext *context,
 						 struct ib_udata *udata);
 	int                        (*dealloc_xrcd)(struct ib_xrcd *xrcd);
+	int			   (*create_xrc_rcv_qp)(struct ib_qp_init_attr *init_attr,
+							u32* qp_num);
+	int			   (*modify_xrc_rcv_qp)(struct ib_xrcd *xrcd,
+							u32 qp_num,
+							struct ib_qp_attr *attr,
+							int attr_mask);
+	int			   (*query_xrc_rcv_qp)(struct ib_xrcd *xrcd,
+						       u32 qp_num,
+						       struct ib_qp_attr *attr,
+						       int attr_mask,
+						       struct ib_qp_init_attr *init_attr);
+	int 			   (*reg_xrc_rcv_qp)(struct ib_xrcd *xrcd,
+						     void *context,
+						     u32 qp_num);
+	int 			   (*unreg_xrc_rcv_qp)(struct ib_xrcd *xrcd,
+						       void *context,
+						       u32 qp_num);
 
 	struct ib_dma_mapping_ops   *dma_ops;
 
Index: infiniband/drivers/infiniband/core/uverbs_main.c
===================================================================
--- infiniband.orig/drivers/infiniband/core/uverbs_main.c	2008-01-28 12:20:55.000000000 +0200
+++ infiniband/drivers/infiniband/core/uverbs_main.c	2008-01-28 12:20:56.000000000 +0200
@@ -114,6 +114,11 @@ static ssize_t (*uverbs_cmd_table[])(str
 	[IB_USER_VERBS_CMD_CREATE_XRC_SRQ]	= ib_uverbs_create_xrc_srq,
 	[IB_USER_VERBS_CMD_OPEN_XRC_DOMAIN]	= ib_uverbs_open_xrc_domain,
 	[IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN]	= ib_uverbs_close_xrc_domain,
+	[IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP]	= ib_uverbs_create_xrc_rcv_qp,
+	[IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP]	= ib_uverbs_modify_xrc_rcv_qp,
+	[IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP]	= ib_uverbs_query_xrc_rcv_qp,
+	[IB_USER_VERBS_CMD_REG_XRC_RCV_QP]	= ib_uverbs_reg_xrc_rcv_qp,
+	[IB_USER_VERBS_CMD_UNREG_XRC_RCV_QP]	= ib_uverbs_unreg_xrc_rcv_qp,
 };
 
 static struct vfsmount *uverbs_event_mnt;
@@ -191,6 +196,7 @@ static int ib_uverbs_cleanup_ucontext(st
 				      struct ib_ucontext *context)
 {
 	struct ib_uobject *uobj, *tmp;
+	struct ib_uxrc_rcv_object *xrc_qp_obj, *tmp1;
 
 	if (!context)
 		return 0;
@@ -251,6 +257,13 @@ static int ib_uverbs_cleanup_ucontext(st
 		kfree(uobj);
 	}
 
+	list_for_each_entry_safe(xrc_qp_obj, tmp1, &context->xrc_reg_qp_list, list) {
+		list_del(&xrc_qp_obj->list);
+		ib_uverbs_cleanup_xrc_rcv_qp(file, xrc_qp_obj->domain_handle,
+					     xrc_qp_obj->qp_num);
+		kfree(xrc_qp_obj);
+	}
+
 	mutex_lock(&file->device->ib_dev->xrcd_table_mutex);
 	list_for_each_entry_safe(uobj, tmp, &context->xrc_domain_list, list) {
 		struct ib_xrcd *xrcd = uobj->object;
@@ -506,6 +519,12 @@ void ib_uverbs_event_handler(struct ib_e
 				NULL, NULL);
 }
 
+void ib_uverbs_xrc_rcv_qp_event_handler(struct ib_event *event, void *context_ptr)
+{
+	ib_uverbs_async_handler(context_ptr, event->element.xrc_qp_num,
+				event->event, NULL, NULL);
+}
+
 struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
 					int is_async, int *fd)
 {
Index: infiniband/drivers/infiniband/core/uverbs_cmd.c
===================================================================
--- infiniband.orig/drivers/infiniband/core/uverbs_cmd.c	2008-01-28 12:20:55.000000000 +0200
+++ infiniband/drivers/infiniband/core/uverbs_cmd.c	2008-01-28 12:20:56.000000000 +0200
@@ -315,6 +315,7 @@ ssize_t ib_uverbs_get_context(struct ib_
 	INIT_LIST_HEAD(&ucontext->srq_list);
 	INIT_LIST_HEAD(&ucontext->ah_list);
 	INIT_LIST_HEAD(&ucontext->xrc_domain_list);
+	INIT_LIST_HEAD(&ucontext->xrc_reg_qp_list);
 	ucontext->closing = 0;
 
 	resp.num_comp_vectors = file->device->num_comp_vectors;
@@ -1080,6 +1081,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uv
 		goto err_put;
 	}
 
+	attr.create_flags  = 0;
 	attr.event_handler = ib_uverbs_qp_event_handler;
 	attr.qp_context    = file;
 	attr.send_cq       = scq;
@@ -2561,6 +2563,7 @@ ssize_t ib_uverbs_close_xrc_domain(struc
 				   int out_len)
 {
 	struct ib_uverbs_close_xrc_domain cmd;
+	struct ib_uxrc_rcv_object  *tmp;
 	struct ib_uobject          *uobj;
 	struct ib_xrcd             *xrcd = NULL;
 	struct inode		   *inode = NULL;
@@ -2576,6 +2579,18 @@ ssize_t ib_uverbs_close_xrc_domain(struc
 		goto err_unlock_mutex;
 	}
 
+	mutex_lock(&file->mutex);
+	list_for_each_entry(tmp, &file->ucontext->xrc_reg_qp_list, list)
+		if (cmd.xrcd_handle == tmp->domain_handle) {
+			ret = -EBUSY;
+			break;
+		}
+	mutex_unlock(&file->mutex);
+	if (ret) {
+		put_uobj_write(uobj);
+		goto err_unlock_mutex;
+	}
+
 	xrcd = (struct ib_xrcd *) (uobj->object);
 	inode = xrcd->inode;
 
@@ -2611,7 +2626,7 @@ err_unlock_mutex:
 }
 
 void ib_uverbs_dealloc_xrcd(struct ib_device *ib_dev,
-				   struct ib_xrcd *xrcd)
+			    struct ib_xrcd *xrcd)
 {
 	struct inode		   *inode = NULL;
 	int                         ret = 0;
@@ -2625,4 +2640,353 @@ void ib_uverbs_dealloc_xrcd(struct ib_de
 		xrcd_table_delete(ib_dev, inode);
 }
 
+ssize_t ib_uverbs_create_xrc_rcv_qp(struct ib_uverbs_file *file,
+			    const char __user *buf, int in_len,
+			    int out_len)
+{
+	struct ib_uverbs_create_xrc_rcv_qp	cmd;
+	struct ib_uverbs_create_xrc_rcv_qp_resp resp;
+	struct ib_uxrc_rcv_object      *obj;
+	struct ib_qp_init_attr		init_attr;
+	struct ib_xrcd		       *xrcd;
+	struct ib_uobject	       *xrcd_uobj;
+	u32				qp_num;
+	int				err;
+
+	if (out_len < sizeof resp)
+		return -ENOSPC;
+
+	if (copy_from_user(&cmd, buf, sizeof cmd))
+		return -EFAULT;
+
+	obj = kmalloc(sizeof *obj, GFP_KERNEL);
+	if (!obj)
+		return -ENOMEM;
+
+	xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &xrcd_uobj);
+	if (!xrcd) {
+		err = -EINVAL;
+		goto err_out;
+	}
+
+	memset(&init_attr, 0, sizeof init_attr);
+	init_attr.event_handler = ib_uverbs_xrc_rcv_qp_event_handler;
+	init_attr.qp_context	= file;
+	init_attr.srq		= NULL;
+	init_attr.sq_sig_type	= cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
+	init_attr.qp_type	= IB_QPT_XRC;
+	init_attr.xrc_domain	= xrcd;
+	init_attr.create_flags	= XRC_RCV_QP;
+
+	init_attr.cap.max_send_wr	= 1;
+	init_attr.cap.max_recv_wr	= 0;
+	init_attr.cap.max_send_sge	= 1;
+	init_attr.cap.max_recv_sge	= 0;
+	init_attr.cap.max_inline_data	= 0;
+
+	err = xrcd->device->create_xrc_rcv_qp(&init_attr, &qp_num);
+	if (err)
+		goto err_put;
+
+	memset(&resp, 0, sizeof resp);
+	resp.qpn = qp_num;
+
+	if (copy_to_user((void __user *) (unsigned long) cmd.response,
+			 &resp, sizeof resp)) {
+		err = -EFAULT;
+		goto err_destroy;
+	}
+
+	atomic_inc(&xrcd->usecnt);
+	put_xrcd_read(xrcd_uobj);
+	obj->qp_num = qp_num;
+	obj->domain_handle = cmd.xrc_domain_handle;
+	mutex_lock(&file->mutex);
+	list_add_tail(&obj->list, &file->ucontext->xrc_reg_qp_list);
+	mutex_unlock(&file->mutex);
+
+	return in_len;
+
+err_destroy:
+	xrcd->device->unreg_xrc_rcv_qp(xrcd, file, qp_num);
+err_put:
+	put_xrcd_read(xrcd_uobj);
+err_out:
+	kfree(obj);
+	return err;
+}
+
+ssize_t ib_uverbs_modify_xrc_rcv_qp(struct ib_uverbs_file *file,
+			    const char __user *buf, int in_len,
+			    int out_len)
+{
+	struct ib_uverbs_modify_xrc_rcv_qp      cmd;
+	struct ib_qp_attr	       *attr;
+	struct ib_xrcd		       *xrcd;
+	struct ib_uobject	       *xrcd_uobj;
+	int				err;
+
+	if (copy_from_user(&cmd, buf, sizeof cmd))
+		return -EFAULT;
+
+	attr = kmalloc(sizeof *attr, GFP_KERNEL);
+	if (!attr)
+		return -ENOMEM;
+
+	xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &xrcd_uobj);
+	if (!xrcd) {
+		kfree(attr);
+		return -EINVAL;
+	}
+
+	memset(attr, 0, sizeof *attr);
+	attr->qp_state		  = cmd.qp_state;
+	attr->cur_qp_state	  = cmd.cur_qp_state;
+	attr->qp_access_flags	  = cmd.qp_access_flags;
+	attr->pkey_index	  = cmd.pkey_index;
+	attr->port_num		  = cmd.port_num;
+	attr->path_mtu		  = cmd.path_mtu;
+	attr->path_mig_state	  = cmd.path_mig_state;
+	attr->qkey		  = cmd.qkey;
+	attr->rq_psn		  = cmd.rq_psn;
+	attr->sq_psn		  = cmd.sq_psn;
+	attr->dest_qp_num	  = cmd.dest_qp_num;
+	attr->alt_pkey_index	  = cmd.alt_pkey_index;
+	attr->en_sqd_async_notify = cmd.en_sqd_async_notify;
+	attr->max_rd_atomic	  = cmd.max_rd_atomic;
+	attr->max_dest_rd_atomic  = cmd.max_dest_rd_atomic;
+	attr->min_rnr_timer	  = cmd.min_rnr_timer;
+	attr->port_num		  = cmd.port_num;
+	attr->timeout		  = cmd.timeout;
+	attr->retry_cnt		  = cmd.retry_cnt;
+	attr->rnr_retry		  = cmd.rnr_retry;
+	attr->alt_port_num	  = cmd.alt_port_num;
+	attr->alt_timeout	  = cmd.alt_timeout;
+
+	memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16);
+	attr->ah_attr.grh.flow_label	    = cmd.dest.flow_label;
+	attr->ah_attr.grh.sgid_index	    = cmd.dest.sgid_index;
+	attr->ah_attr.grh.hop_limit	    = cmd.dest.hop_limit;
+	attr->ah_attr.grh.traffic_class	    = cmd.dest.traffic_class;
+	attr->ah_attr.dlid		    = cmd.dest.dlid;
+	attr->ah_attr.sl		    = cmd.dest.sl;
+	attr->ah_attr.src_path_bits	    = cmd.dest.src_path_bits;
+	attr->ah_attr.static_rate	    = cmd.dest.static_rate;
+	attr->ah_attr.ah_flags		    = cmd.dest.is_global ? IB_AH_GRH : 0;
+	attr->ah_attr.port_num		    = cmd.dest.port_num;
+
+	memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16);
+	attr->alt_ah_attr.grh.flow_label    = cmd.alt_dest.flow_label;
+	attr->alt_ah_attr.grh.sgid_index    = cmd.alt_dest.sgid_index;
+	attr->alt_ah_attr.grh.hop_limit     = cmd.alt_dest.hop_limit;
+	attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class;
+	attr->alt_ah_attr.dlid 	    	    = cmd.alt_dest.dlid;
+	attr->alt_ah_attr.sl		    = cmd.alt_dest.sl;
+	attr->alt_ah_attr.src_path_bits	    = cmd.alt_dest.src_path_bits;
+	attr->alt_ah_attr.static_rate	    = cmd.alt_dest.static_rate;
+	attr->alt_ah_attr.ah_flags	    = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
+	attr->alt_ah_attr.port_num	    = cmd.alt_dest.port_num;
+
+	err = xrcd->device->modify_xrc_rcv_qp(xrcd, cmd.qp_num, attr, cmd.attr_mask);
+	put_xrcd_read(xrcd_uobj);
+	kfree(attr);
+	return err ? err : in_len;
+}
+
+ssize_t ib_uverbs_query_xrc_rcv_qp(struct ib_uverbs_file *file,
+				   const char __user *buf, int in_len,
+				   int out_len)
+{
+	struct ib_uverbs_query_xrc_rcv_qp cmd;
+	struct ib_uverbs_query_qp_resp	  resp;
+	struct ib_qp_attr		*attr;
+	struct ib_qp_init_attr		*init_attr;
+	struct ib_xrcd			*xrcd;
+	struct ib_uobject		*xrcd_uobj;
+	int				 ret;
+
+	if (copy_from_user(&cmd, buf, sizeof cmd))
+		return -EFAULT;
+
+	attr      = kmalloc(sizeof *attr, GFP_KERNEL);
+	init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL);
+	if (!attr || !init_attr) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &xrcd_uobj);
+	if (!xrcd) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = xrcd->device->query_xrc_rcv_qp(xrcd, cmd.qp_num, attr,
+					     cmd.attr_mask, init_attr);
+
+	put_xrcd_read(xrcd_uobj);
+
+	if (ret)
+		goto out;
+
+	memset(&resp, 0, sizeof resp);
+	resp.qp_state		    = attr->qp_state;
+	resp.cur_qp_state	    = attr->cur_qp_state;
+	resp.path_mtu		    = attr->path_mtu;
+	resp.path_mig_state	    = attr->path_mig_state;
+	resp.qkey		    = attr->qkey;
+	resp.rq_psn		    = attr->rq_psn;
+	resp.sq_psn		    = attr->sq_psn;
+	resp.dest_qp_num	    = attr->dest_qp_num;
+	resp.qp_access_flags	    = attr->qp_access_flags;
+	resp.pkey_index		    = attr->pkey_index;
+	resp.alt_pkey_index	    = attr->alt_pkey_index;
+	resp.sq_draining	    = attr->sq_draining;
+	resp.max_rd_atomic	    = attr->max_rd_atomic;
+	resp.max_dest_rd_atomic	    = attr->max_dest_rd_atomic;
+	resp.min_rnr_timer	    = attr->min_rnr_timer;
+	resp.port_num		    = attr->port_num;
+	resp.timeout		    = attr->timeout;
+	resp.retry_cnt		    = attr->retry_cnt;
+	resp.rnr_retry		    = attr->rnr_retry;
+	resp.alt_port_num	    = attr->alt_port_num;
+	resp.alt_timeout	    = attr->alt_timeout;
+
+	memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16);
+	resp.dest.flow_label	    = attr->ah_attr.grh.flow_label;
+	resp.dest.sgid_index	    = attr->ah_attr.grh.sgid_index;
+	resp.dest.hop_limit	    = attr->ah_attr.grh.hop_limit;
+	resp.dest.traffic_class	    = attr->ah_attr.grh.traffic_class;
+	resp.dest.dlid		    = attr->ah_attr.dlid;
+	resp.dest.sl		    = attr->ah_attr.sl;
+	resp.dest.src_path_bits	    = attr->ah_attr.src_path_bits;
+	resp.dest.static_rate	    = attr->ah_attr.static_rate;
+	resp.dest.is_global	    = !!(attr->ah_attr.ah_flags & IB_AH_GRH);
+	resp.dest.port_num	    = attr->ah_attr.port_num;
+
+	memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16);
+	resp.alt_dest.flow_label    = attr->alt_ah_attr.grh.flow_label;
+	resp.alt_dest.sgid_index    = attr->alt_ah_attr.grh.sgid_index;
+	resp.alt_dest.hop_limit     = attr->alt_ah_attr.grh.hop_limit;
+	resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class;
+	resp.alt_dest.dlid	    = attr->alt_ah_attr.dlid;
+	resp.alt_dest.sl	    = attr->alt_ah_attr.sl;
+	resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits;
+	resp.alt_dest.static_rate   = attr->alt_ah_attr.static_rate;
+	resp.alt_dest.is_global	    = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH);
+	resp.alt_dest.port_num	    = attr->alt_ah_attr.port_num;
+
+	resp.max_send_wr	    = init_attr->cap.max_send_wr;
+	resp.max_recv_wr	    = init_attr->cap.max_recv_wr;
+	resp.max_send_sge	    = init_attr->cap.max_send_sge;
+	resp.max_recv_sge	    = init_attr->cap.max_recv_sge;
+	resp.max_inline_data	    = init_attr->cap.max_inline_data;
+	resp.sq_sig_all		    = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR;
+
+	if (copy_to_user((void __user *) (unsigned long) cmd.response,
+			 &resp, sizeof resp))
+		ret = -EFAULT;
+
+out:
+	kfree(attr);
+	kfree(init_attr);
+
+	return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_reg_xrc_rcv_qp(struct ib_uverbs_file *file,
+			    const char __user *buf, int in_len,
+			    int out_len)
+{
+	struct ib_uverbs_reg_xrc_rcv_qp  cmd;
+	struct ib_uxrc_rcv_object	*qp_obj, *tmp;
+	struct ib_xrcd			*xrcd;
+	struct ib_uobject		*xrcd_uobj;
+	int				 ret;
+
+	if (copy_from_user(&cmd, buf, sizeof cmd))
+		return -EFAULT;
+
+	qp_obj = kmalloc(sizeof *qp_obj, GFP_KERNEL);
+	if (!qp_obj)
+		return -ENOMEM;
+
+	xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &xrcd_uobj);
+	if (!xrcd) {
+		ret = -EINVAL;
+		goto err_out;
+	}
+
+	ret = xrcd->device->reg_xrc_rcv_qp(xrcd, file, cmd.qp_num);
+	if (ret)
+		goto err_put;
+
+	atomic_inc(&xrcd->usecnt);
+	put_xrcd_read(xrcd_uobj);
+	mutex_lock(&file->mutex);
+	list_for_each_entry(tmp, &file->ucontext->xrc_reg_qp_list, list)
+		if (cmd.qp_num == tmp->qp_num) {
+			kfree(qp_obj);
+			mutex_unlock(&file->mutex);
+			put_xrcd_read(xrcd_uobj);
+			return 0;
+		}
+	qp_obj->qp_num = cmd.qp_num;
+	qp_obj->domain_handle = cmd.xrc_domain_handle;
+	list_add_tail(&qp_obj->list, &file->ucontext->xrc_reg_qp_list);
+	mutex_unlock(&file->mutex);
+	return in_len;
+
+err_put:
+	put_xrcd_read(xrcd_uobj);
+err_out:
+
+	kfree(qp_obj);
+	return ret;
+}
+
+int ib_uverbs_cleanup_xrc_rcv_qp(struct ib_uverbs_file *file,
+				 u32 domain_handle, u32 qp_num)
+{
+	struct ib_xrcd *xrcd;
+	struct ib_uobject *xrcd_uobj;
+	int err;
+
+	xrcd = idr_read_xrcd(domain_handle, file->ucontext, &xrcd_uobj);
+	if (!xrcd)
+		return -EINVAL;
 
+	err = xrcd->device->unreg_xrc_rcv_qp(xrcd, file, qp_num);
+
+	if (!err)
+		atomic_dec(&xrcd->usecnt);
+	put_xrcd_read(xrcd_uobj);
+	return err;
+}
+
+ssize_t ib_uverbs_unreg_xrc_rcv_qp(struct ib_uverbs_file *file,
+			    const char __user *buf, int in_len,
+			    int out_len)
+{
+	struct ib_uverbs_unreg_xrc_rcv_qp cmd;
+	struct ib_uxrc_rcv_object *qp_obj, *tmp;
+	int ret;
+
+	if (copy_from_user(&cmd, buf, sizeof cmd))
+		return -EFAULT;
+
+	ret = ib_uverbs_cleanup_xrc_rcv_qp(file, cmd.xrc_domain_handle, cmd.qp_num);
+	if (ret)
+		return ret;
+
+	mutex_lock(&file->mutex);
+	list_for_each_entry_safe(qp_obj, tmp, &file->ucontext->xrc_reg_qp_list, list)
+		if (cmd.qp_num == qp_obj->qp_num) {
+			list_del(&qp_obj->list);
+			kfree(qp_obj);
+			break;
+		}
+	mutex_unlock(&file->mutex);
+	return in_len;
+
+}
Index: infiniband/include/rdma/ib_user_verbs.h
===================================================================
--- infiniband.orig/include/rdma/ib_user_verbs.h	2008-01-28 12:20:54.000000000 +0200
+++ infiniband/include/rdma/ib_user_verbs.h	2008-01-28 12:20:56.000000000 +0200
@@ -86,7 +86,12 @@ enum {
 	IB_USER_VERBS_CMD_POST_SRQ_RECV,
 	IB_USER_VERBS_CMD_CREATE_XRC_SRQ,
 	IB_USER_VERBS_CMD_OPEN_XRC_DOMAIN,
-	IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN
+	IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN,
+	IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP,
+	IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP,
+	IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP,
+	IB_USER_VERBS_CMD_REG_XRC_RCV_QP,
+	IB_USER_VERBS_CMD_UNREG_XRC_RCV_QP,
 };
 
 /*
@@ -714,6 +719,76 @@ struct ib_uverbs_close_xrc_domain {
 	__u64 driver_data[0];
 };
 
+struct ib_uverbs_create_xrc_rcv_qp {
+	__u64 response;
+	__u64 user_handle;
+	__u32 xrc_domain_handle;
+	__u32 max_send_wr;
+	__u32 max_recv_wr;
+	__u32 max_send_sge;
+	__u32 max_recv_sge;
+	__u32 max_inline_data;
+	__u8  sq_sig_all;
+	__u8  qp_type;
+	__u8  reserved[2];
+	__u64 driver_data[0];
+};
+
+struct ib_uverbs_create_xrc_rcv_qp_resp {
+	__u32 qpn;
+	__u32 reserved;
+};
+
+struct ib_uverbs_modify_xrc_rcv_qp {
+	__u32 xrc_domain_handle;
+	__u32 qp_num;
+	struct ib_uverbs_qp_dest dest;
+	struct ib_uverbs_qp_dest alt_dest;
+	__u32 attr_mask;
+	__u32 qkey;
+	__u32 rq_psn;
+	__u32 sq_psn;
+	__u32 dest_qp_num;
+	__u32 qp_access_flags;
+	__u16 pkey_index;
+	__u16 alt_pkey_index;
+	__u8  qp_state;
+	__u8  cur_qp_state;
+	__u8  path_mtu;
+	__u8  path_mig_state;
+	__u8  en_sqd_async_notify;
+	__u8  max_rd_atomic;
+	__u8  max_dest_rd_atomic;
+	__u8  min_rnr_timer;
+	__u8  port_num;
+	__u8  timeout;
+	__u8  retry_cnt;
+	__u8  rnr_retry;
+	__u8  alt_port_num;
+	__u8  alt_timeout;
+	__u8  reserved[2];
+	__u64 driver_data[0];
+};
+
+struct ib_uverbs_query_xrc_rcv_qp {
+	__u64 response;
+	__u32 xrc_domain_handle;
+	__u32 qp_num;
+	__u32 attr_mask;
+	__u64 driver_data[0];
+};
+
+struct ib_uverbs_reg_xrc_rcv_qp {
+	__u32 xrc_domain_handle;
+	__u32 qp_num;
+	__u64 driver_data[0];
+};
+
+struct ib_uverbs_unreg_xrc_rcv_qp {
+	__u32 xrc_domain_handle;
+	__u32 qp_num;
+	__u64 driver_data[0];
+};
 
 
 #endif /* IB_USER_VERBS_H */
Index: infiniband/drivers/infiniband/core/uverbs.h
===================================================================
--- infiniband.orig/drivers/infiniband/core/uverbs.h	2008-01-28 12:20:55.000000000 +0200
+++ infiniband/drivers/infiniband/core/uverbs.h	2008-01-28 12:20:56.000000000 +0200
@@ -163,8 +163,12 @@ void ib_uverbs_qp_event_handler(struct i
 void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
 void ib_uverbs_event_handler(struct ib_event_handler *handler,
 			     struct ib_event *event);
+void ib_uverbs_xrc_rcv_qp_event_handler(struct ib_event *event,
+					void *context_ptr);
 void ib_uverbs_dealloc_xrcd(struct ib_device *ib_dev,
 			    struct ib_xrcd *xrcd);
+int ib_uverbs_cleanup_xrc_rcv_qp(struct ib_uverbs_file *file,
+				 u32 domain_handle, u32 qp_num);
 
 #define IB_UVERBS_DECLARE_CMD(name)					\
 	ssize_t ib_uverbs_##name(struct ib_uverbs_file *file,		\
@@ -202,6 +206,11 @@ IB_UVERBS_DECLARE_CMD(destroy_srq);
 IB_UVERBS_DECLARE_CMD(create_xrc_srq);
 IB_UVERBS_DECLARE_CMD(open_xrc_domain);
 IB_UVERBS_DECLARE_CMD(close_xrc_domain);
+IB_UVERBS_DECLARE_CMD(create_xrc_rcv_qp);
+IB_UVERBS_DECLARE_CMD(modify_xrc_rcv_qp);
+IB_UVERBS_DECLARE_CMD(query_xrc_rcv_qp);
+IB_UVERBS_DECLARE_CMD(reg_xrc_rcv_qp);
+IB_UVERBS_DECLARE_CMD(unreg_xrc_rcv_qp);
 
 
 #endif /* UVERBS_H */




More information about the general mailing list